Data Structures | Python Tutorial

Lists, tuples, dicts, sets, and the specialized containers in collections. Pick the right one and the rest of your code gets simpler.

Lists

Ordered, mutable sequences. The default container when you just need "a bunch of things in order":

# Creating
items = [1, 2, 3]
mixed = [1, "hello", 3.14, True]
empty = []
from_range = list(range(5))  # [0, 1, 2, 3, 4]

Accessing Elements

items = ['a', 'b', 'c', 'd', 'e']

items[0]      # 'a' (first)
items[-1]     # 'e' (last)
items[1:3]    # ['b', 'c'] (slice)
items[::2]    # ['a', 'c', 'e'] (every 2nd)
items[::-1]   # ['e', 'd', 'c', 'b', 'a'] (reversed)

Modifying Lists

items = [1, 2, 3]

# Add elements
items.append(4)           # [1, 2, 3, 4]
items.insert(0, 0)        # [0, 1, 2, 3, 4]
items.extend([5, 6])      # [0, 1, 2, 3, 4, 5, 6]
items += [7, 8]           # [0, 1, 2, 3, 4, 5, 6, 7, 8]

# Remove elements
items.pop()               # Removes and returns last (8)
items.pop(0)              # Removes and returns first (0)
items.remove(3)           # Removes first occurrence of 3
del items[0]              # Delete by index
items.clear()             # Remove all

# Modify
items = [1, 2, 3]
items[0] = 10             # [10, 2, 3]
items[1:3] = [20, 30]     # [10, 20, 30]

List Operations

items = [3, 1, 4, 1, 5]

len(items)                # 5
min(items)                # 1
max(items)                # 5
sum(items)                # 14

items.count(1)            # 2
items.index(4)            # 2 (first occurrence)
1 in items                # True

# Sorting
items.sort()              # In-place: [1, 1, 3, 4, 5]
items.sort(reverse=True)  # In-place descending
sorted_copy = sorted(items)  # Returns new list

# Sort by key
words = ["apple", "Banana", "cherry"]
words.sort(key=str.lower)
words.sort(key=len)

# Reverse
items.reverse()           # In-place
reversed_copy = items[::-1]  # New list

# Copy
shallow_copy = items.copy()
shallow_copy = items[:]
shallow_copy = list(items)

import copy
deep_copy = copy.deepcopy(nested_list)

Tuples

Ordered, immutable sequences:

# Creating
point = (3, 4)
single = (42,)            # Note the comma
empty = ()
from_list = tuple([1, 2, 3])

# Unpacking
x, y = point
first, *rest = (1, 2, 3, 4)  # first=1, rest=[2, 3, 4]
first, *middle, last = (1, 2, 3, 4, 5)

When to Use Tuples

# Return multiple values
def get_point():
    return (3, 4)  # or just: return 3, 4

# Heterogeneous data (like a record)
person = ("Alice", 30, "Engineer")

# Dictionary keys (must be immutable)
locations = {(40.7, -74.0): "NYC", (34.0, -118.2): "LA"}

# Swap values
a, b = b, a

Named Tuples

Tuples with named fields:

from collections import namedtuple

Point = namedtuple('Point', ['x', 'y'])
p = Point(3, 4)
print(p.x, p.y)     # 3 4
print(p[0], p[1])   # 3 4 (still works as tuple)

# Or use typing.NamedTuple for type hints
from typing import NamedTuple

class Point(NamedTuple):
    x: float
    y: float
    label: str = "origin"

Dictionaries

Key-value mappings:

# Creating
person = {"name": "Alice", "age": 30}
empty = {}
from_pairs = dict([("a", 1), ("b", 2)])
from_keys = dict.fromkeys(["a", "b", "c"], 0)  # All values = 0

Accessing Values

d = {"name": "Alice", "age": 30}

d["name"]             # "Alice"
d["missing"]          # KeyError!
d.get("missing")      # None
d.get("missing", 0)   # 0 (default)

# Keys and values
d.keys()              # dict_keys(['name', 'age'])
d.values()            # dict_values(['Alice', 30])
d.items()             # dict_items([('name', 'Alice'), ('age', 30)])

# Check membership
"name" in d           # True
"missing" in d        # False

Modifying Dictionaries

d = {"a": 1, "b": 2}

# Add/update
d["c"] = 3            # Add new key
d["a"] = 10           # Update existing
d.update({"d": 4, "e": 5})  # Merge

# Remove
del d["a"]            # KeyError if missing
d.pop("b")            # Returns value, KeyError if missing
d.pop("x", None)      # Returns None if missing
d.popitem()           # Remove and return last item
d.clear()             # Remove all

# Get with default and set
d.setdefault("key", []).append(1)  # Creates key if missing

Dictionary Operations

d = {"a": 1, "b": 2, "c": 3}

len(d)                # 3

# Iteration
for key in d:
    print(key)

for key, value in d.items():
    print(f"{key}: {value}")

# Merge (3.9+)
merged = d | {"d": 4}
d |= {"e": 5}         # In-place merge

# Copy
shallow = d.copy()
shallow = dict(d)

Default Dict

from collections import defaultdict

# Default value for missing keys
counts = defaultdict(int)     # Default: 0
counts["a"] += 1              # No KeyError

groups = defaultdict(list)    # Default: []
groups["team1"].append("Alice")

nested = defaultdict(lambda: defaultdict(int))
nested["row"]["col"] = 1

Counter

from collections import Counter

# Count occurrences
counts = Counter("abracadabra")
# Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

counts.most_common(2)     # [('a', 5), ('b', 2)]
counts["a"]               # 5
counts["z"]               # 0 (missing = 0)

# Operations
Counter("abc") + Counter("bcd")  # Add counts
Counter("abc") - Counter("bcd")  # Subtract

Sets

Unordered, unique elements:

# Creating
s = {1, 2, 3}
empty = set()             # NOT {} (that's a dict)
from_list = set([1, 2, 2, 3])  # {1, 2, 3}

Set Operations

a = {1, 2, 3}
b = {2, 3, 4}

# Membership
2 in a                    # True

# Set operations
a | b                     # Union: {1, 2, 3, 4}
a & b                     # Intersection: {2, 3}
a - b                     # Difference: {1}
a ^ b                     # Symmetric difference: {1, 4}

# Comparisons
{1, 2} <= {1, 2, 3}       # Subset
{1, 2, 3} >= {1, 2}       # Superset
{1, 2}.isdisjoint({3, 4}) # No common elements

Modifying Sets

s = {1, 2, 3}

s.add(4)                  # Add element
s.update([5, 6])          # Add multiple
s.remove(1)               # Remove (KeyError if missing)
s.discard(1)              # Remove (no error if missing)
s.pop()                   # Remove and return arbitrary element
s.clear()                 # Remove all

Frozen Sets

Immutable sets (can be dict keys):

fs = frozenset([1, 2, 3])
# fs.add(4)  # Error - immutable

Comprehensions

Concise syntax for creating collections:

List Comprehension

# [expression for item in iterable if condition]

squares = [x**2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]
pairs = [(x, y) for x in range(3) for y in range(3)]

# Transform
words = ["hello", "world"]
upper = [w.upper() for w in words]

# Filter and transform
positive_squares = [x**2 for x in numbers if x > 0]

Dictionary Comprehension

# {key: value for item in iterable if condition}

squares = {x: x**2 for x in range(5)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

# Invert a dictionary
original = {"a": 1, "b": 2}
inverted = {v: k for k, v in original.items()}
# {1: 'a', 2: 'b'}

# Filter
filtered = {k: v for k, v in d.items() if v > 0}

Set Comprehension

# {expression for item in iterable if condition}

unique_lengths = {len(word) for word in words}

Generator Expression

# (expression for item in iterable if condition)
# Lazy - doesn't create list in memory

squares = (x**2 for x in range(1000000))
sum(x**2 for x in range(1000000))  # Memory efficient

Choosing the Right Structure

Need	Use
Ordered, mutable	`list`
Ordered, immutable	`tuple`
Key-value pairs	`dict`
Unique values	`set`
Count occurrences	`Counter`
Default values	`defaultdict`
Named fields	`namedtuple` or dataclass
FIFO queue	`collections.deque`
Priority queue	`heapq`

Deque

Double-ended queue - efficient append/pop from both ends:

from collections import deque

d = deque([1, 2, 3])
d.append(4)           # Right: [1, 2, 3, 4]
d.appendleft(0)       # Left: [0, 1, 2, 3, 4]
d.pop()               # Right: 4
d.popleft()           # Left: 0
d.rotate(1)           # Rotate right
d.rotate(-1)          # Rotate left

# Fixed size (oldest items dropped)
recent = deque(maxlen=3)
recent.extend([1, 2, 3, 4, 5])  # deque([3, 4, 5])

Practice

# 1. Group items by property
words = ["apple", "banana", "cherry", "apricot", "blueberry"]
by_first_letter = defaultdict(list)
for word in words:
    by_first_letter[word[0]].append(word)

# 2. Find duplicates
items = [1, 2, 2, 3, 3, 3, 4]
duplicates = [x for x in set(items) if items.count(x) > 1]
# Or with Counter
duplicates = [x for x, count in Counter(items).items() if count > 1]

# 3. Merge dicts with conflict resolution
d1 = {"a": 1, "b": 2}
d2 = {"b": 3, "c": 4}
merged = {**d1, **d2}  # d2 wins conflicts

# 4. Flatten nested list
nested = [[1, 2], [3, 4], [5, 6]]
flat = [x for sublist in nested for x in sublist]

# 5. Remove duplicates preserving order
items = [3, 1, 4, 1, 5, 9, 2, 6, 5]
unique = list(dict.fromkeys(items))  # [3, 1, 4, 5, 9, 2, 6]

Next Steps

Continue to 06-oop.md to wrap state and behaviour into classes, dunder methods, and dataclasses.