Data Structures
Lists, tuples, dicts, sets, and the specialized containers in collections. Pick the right one and the rest of your code gets simpler.
Lists
Ordered, mutable sequences. The default container when you just need "a bunch of things in order":
# Creating
items = [1, 2, 3]
mixed = [1, "hello", 3.14, True]
empty = []
from_range = list(range(5)) # [0, 1, 2, 3, 4]
Accessing Elements
items = ['a', 'b', 'c', 'd', 'e']
items[0] # 'a' (first)
items[-1] # 'e' (last)
items[1:3] # ['b', 'c'] (slice)
items[::2] # ['a', 'c', 'e'] (every 2nd)
items[::-1] # ['e', 'd', 'c', 'b', 'a'] (reversed)
Modifying Lists
items = [1, 2, 3]
# Add elements
items.append(4) # [1, 2, 3, 4]
items.insert(0, 0) # [0, 1, 2, 3, 4]
items.extend([5, 6]) # [0, 1, 2, 3, 4, 5, 6]
items += [7, 8] # [0, 1, 2, 3, 4, 5, 6, 7, 8]
# Remove elements
items.pop() # Removes and returns last (8)
items.pop(0) # Removes and returns first (0)
items.remove(3) # Removes first occurrence of 3
del items[0] # Delete by index
items.clear() # Remove all
# Modify
items = [1, 2, 3]
items[0] = 10 # [10, 2, 3]
items[1:3] = [20, 30] # [10, 20, 30]
List Operations
items = [3, 1, 4, 1, 5]
len(items) # 5
min(items) # 1
max(items) # 5
sum(items) # 14
items.count(1) # 2
items.index(4) # 2 (first occurrence)
1 in items # True
# Sorting
items.sort() # In-place: [1, 1, 3, 4, 5]
items.sort(reverse=True) # In-place descending
sorted_copy = sorted(items) # Returns new list
# Sort by key
words = ["apple", "Banana", "cherry"]
words.sort(key=str.lower)
words.sort(key=len)
# Reverse
items.reverse() # In-place
reversed_copy = items[::-1] # New list
# Copy
shallow_copy = items.copy()
shallow_copy = items[:]
shallow_copy = list(items)
import copy
deep_copy = copy.deepcopy(nested_list)
Tuples
Ordered, immutable sequences:
# Creating
point = (3, 4)
single = (42,) # Note the comma
empty = ()
from_list = tuple([1, 2, 3])
# Unpacking
x, y = point
first, *rest = (1, 2, 3, 4) # first=1, rest=[2, 3, 4]
first, *middle, last = (1, 2, 3, 4, 5)
When to Use Tuples
# Return multiple values
def get_point():
return (3, 4) # or just: return 3, 4
# Heterogeneous data (like a record)
person = ("Alice", 30, "Engineer")
# Dictionary keys (must be immutable)
locations = {(40.7, -74.0): "NYC", (34.0, -118.2): "LA"}
# Swap values
a, b = b, a
Named Tuples
Tuples with named fields:
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
p = Point(3, 4)
print(p.x, p.y) # 3 4
print(p[0], p[1]) # 3 4 (still works as tuple)
# Or use typing.NamedTuple for type hints
from typing import NamedTuple
class Point(NamedTuple):
x: float
y: float
label: str = "origin"
Dictionaries
Key-value mappings:
# Creating
person = {"name": "Alice", "age": 30}
empty = {}
from_pairs = dict([("a", 1), ("b", 2)])
from_keys = dict.fromkeys(["a", "b", "c"], 0) # All values = 0
Accessing Values
d = {"name": "Alice", "age": 30}
d["name"] # "Alice"
d["missing"] # KeyError!
d.get("missing") # None
d.get("missing", 0) # 0 (default)
# Keys and values
d.keys() # dict_keys(['name', 'age'])
d.values() # dict_values(['Alice', 30])
d.items() # dict_items([('name', 'Alice'), ('age', 30)])
# Check membership
"name" in d # True
"missing" in d # False
Modifying Dictionaries
d = {"a": 1, "b": 2}
# Add/update
d["c"] = 3 # Add new key
d["a"] = 10 # Update existing
d.update({"d": 4, "e": 5}) # Merge
# Remove
del d["a"] # KeyError if missing
d.pop("b") # Returns value, KeyError if missing
d.pop("x", None) # Returns None if missing
d.popitem() # Remove and return last item
d.clear() # Remove all
# Get with default and set
d.setdefault("key", []).append(1) # Creates key if missing
Dictionary Operations
d = {"a": 1, "b": 2, "c": 3}
len(d) # 3
# Iteration
for key in d:
print(key)
for key, value in d.items():
print(f"{key}: {value}")
# Merge (3.9+)
merged = d | {"d": 4}
d |= {"e": 5} # In-place merge
# Copy
shallow = d.copy()
shallow = dict(d)
Default Dict
from collections import defaultdict
# Default value for missing keys
counts = defaultdict(int) # Default: 0
counts["a"] += 1 # No KeyError
groups = defaultdict(list) # Default: []
groups["team1"].append("Alice")
nested = defaultdict(lambda: defaultdict(int))
nested["row"]["col"] = 1
Counter
from collections import Counter
# Count occurrences
counts = Counter("abracadabra")
# Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
counts.most_common(2) # [('a', 5), ('b', 2)]
counts["a"] # 5
counts["z"] # 0 (missing = 0)
# Operations
Counter("abc") + Counter("bcd") # Add counts
Counter("abc") - Counter("bcd") # Subtract
Sets
Unordered, unique elements:
# Creating
s = {1, 2, 3}
empty = set() # NOT {} (that's a dict)
from_list = set([1, 2, 2, 3]) # {1, 2, 3}
Set Operations
a = {1, 2, 3}
b = {2, 3, 4}
# Membership
2 in a # True
# Set operations
a | b # Union: {1, 2, 3, 4}
a & b # Intersection: {2, 3}
a - b # Difference: {1}
a ^ b # Symmetric difference: {1, 4}
# Comparisons
{1, 2} <= {1, 2, 3} # Subset
{1, 2, 3} >= {1, 2} # Superset
{1, 2}.isdisjoint({3, 4}) # No common elements
Modifying Sets
s = {1, 2, 3}
s.add(4) # Add element
s.update([5, 6]) # Add multiple
s.remove(1) # Remove (KeyError if missing)
s.discard(1) # Remove (no error if missing)
s.pop() # Remove and return arbitrary element
s.clear() # Remove all
Frozen Sets
Immutable sets (can be dict keys):
fs = frozenset([1, 2, 3])
# fs.add(4) # Error - immutable
Comprehensions
Concise syntax for creating collections:
List Comprehension
# [expression for item in iterable if condition]
squares = [x**2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]
pairs = [(x, y) for x in range(3) for y in range(3)]
# Transform
words = ["hello", "world"]
upper = [w.upper() for w in words]
# Filter and transform
positive_squares = [x**2 for x in numbers if x > 0]
Dictionary Comprehension
# {key: value for item in iterable if condition}
squares = {x: x**2 for x in range(5)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
# Invert a dictionary
original = {"a": 1, "b": 2}
inverted = {v: k for k, v in original.items()}
# {1: 'a', 2: 'b'}
# Filter
filtered = {k: v for k, v in d.items() if v > 0}
Set Comprehension
# {expression for item in iterable if condition}
unique_lengths = {len(word) for word in words}
Generator Expression
# (expression for item in iterable if condition)
# Lazy - doesn't create list in memory
squares = (x**2 for x in range(1000000))
sum(x**2 for x in range(1000000)) # Memory efficient
Choosing the Right Structure
| Need | Use |
|---|---|
| Ordered, mutable | list |
| Ordered, immutable | tuple |
| Key-value pairs | dict |
| Unique values | set |
| Count occurrences | Counter |
| Default values | defaultdict |
| Named fields | namedtuple or dataclass |
| FIFO queue | collections.deque |
| Priority queue | heapq |
Deque
Double-ended queue - efficient append/pop from both ends:
from collections import deque
d = deque([1, 2, 3])
d.append(4) # Right: [1, 2, 3, 4]
d.appendleft(0) # Left: [0, 1, 2, 3, 4]
d.pop() # Right: 4
d.popleft() # Left: 0
d.rotate(1) # Rotate right
d.rotate(-1) # Rotate left
# Fixed size (oldest items dropped)
recent = deque(maxlen=3)
recent.extend([1, 2, 3, 4, 5]) # deque([3, 4, 5])
Practice
# 1. Group items by property
words = ["apple", "banana", "cherry", "apricot", "blueberry"]
by_first_letter = defaultdict(list)
for word in words:
by_first_letter[word[0]].append(word)
# 2. Find duplicates
items = [1, 2, 2, 3, 3, 3, 4]
duplicates = [x for x in set(items) if items.count(x) > 1]
# Or with Counter
duplicates = [x for x, count in Counter(items).items() if count > 1]
# 3. Merge dicts with conflict resolution
d1 = {"a": 1, "b": 2}
d2 = {"b": 3, "c": 4}
merged = {**d1, **d2} # d2 wins conflicts
# 4. Flatten nested list
nested = [[1, 2], [3, 4], [5, 6]]
flat = [x for sublist in nested for x in sublist]
# 5. Remove duplicates preserving order
items = [3, 1, 4, 1, 5, 9, 2, 6, 5]
unique = list(dict.fromkeys(items)) # [3, 1, 4, 5, 9, 2, 6]
Next Steps
Continue to 06-oop.md to wrap state and behaviour into classes, dunder methods, and dataclasses.