Data Structures
Python's built-in collection types.
Lists
Ordered, mutable sequences:
# Creating
items = [1, 2, 3]
mixed = [1, "hello", 3.14, True]
empty = []
from_range = list(range(5)) # [0, 1, 2, 3, 4]
Accessing Elements
items = ['a', 'b', 'c', 'd', 'e']
items[0] # 'a' (first)
items[-1] # 'e' (last)
items[1:3] # ['b', 'c'] (slice)
items[::2] # ['a', 'c', 'e'] (every 2nd)
items[::-1] # ['e', 'd', 'c', 'b', 'a'] (reversed)
Modifying Lists
items = [1, 2, 3]
# Add elements
items.append(4) # [1, 2, 3, 4]
items.insert(0, 0) # [0, 1, 2, 3, 4]
items.extend([5, 6]) # [0, 1, 2, 3, 4, 5, 6]
items += [7, 8] # [0, 1, 2, 3, 4, 5, 6, 7, 8]
# Remove elements
items.pop() # Removes and returns last (8)
items.pop(0) # Removes and returns first (0)
items.remove(3) # Removes first occurrence of 3
del items[0] # Delete by index
items.clear() # Remove all
# Modify
items = [1, 2, 3]
items[0] = 10 # [10, 2, 3]
items[1:3] = [20, 30] # [10, 20, 30]
List Operations
items = [3, 1, 4, 1, 5]
len(items) # 5
min(items) # 1
max(items) # 5
sum(items) # 14
items.count(1) # 2
items.index(4) # 2 (first occurrence)
1 in items # True
# Sorting
items.sort() # In-place: [1, 1, 3, 4, 5]
items.sort(reverse=True) # In-place descending
sorted_copy = sorted(items) # Returns new list
# Sort by key
words = ["apple", "Banana", "cherry"]
words.sort(key=str.lower)
words.sort(key=len)
# Reverse
items.reverse() # In-place
reversed_copy = items[::-1] # New list
# Copy
shallow_copy = items.copy()
shallow_copy = items[:]
shallow_copy = list(items)
import copy
deep_copy = copy.deepcopy(nested_list)
Tuples
Ordered, immutable sequences:
# Creating
point = (3, 4)
single = (42,) # Note the comma
empty = ()
from_list = tuple([1, 2, 3])
# Unpacking
x, y = point
first, *rest = (1, 2, 3, 4) # first=1, rest=[2, 3, 4]
first, *middle, last = (1, 2, 3, 4, 5)
When to Use Tuples
# Return multiple values
def get_point():
return (3, 4) # or just: return 3, 4
# Heterogeneous data (like a record)
person = ("Alice", 30, "Engineer")
# Dictionary keys (must be immutable)
locations = {(40.7, -74.0): "NYC", (34.0, -118.2): "LA"}
# Swap values
a, b = b, a
Named Tuples
Tuples with named fields:
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
p = Point(3, 4)
print(p.x, p.y) # 3 4
print(p[0], p[1]) # 3 4 (still works as tuple)
# Or use typing.NamedTuple for type hints
from typing import NamedTuple
class Point(NamedTuple):
x: float
y: float
label: str = "origin"
Dictionaries
Key-value mappings:
# Creating
person = {"name": "Alice", "age": 30}
empty = {}
from_pairs = dict([("a", 1), ("b", 2)])
from_keys = dict.fromkeys(["a", "b", "c"], 0) # All values = 0
Accessing Values
d = {"name": "Alice", "age": 30}
d["name"] # "Alice"
d["missing"] # KeyError!
d.get("missing") # None
d.get("missing", 0) # 0 (default)
# Keys and values
d.keys() # dict_keys(['name', 'age'])
d.values() # dict_values(['Alice', 30])
d.items() # dict_items([('name', 'Alice'), ('age', 30)])
# Check membership
"name" in d # True
"missing" in d # False
Modifying Dictionaries
d = {"a": 1, "b": 2}
# Add/update
d["c"] = 3 # Add new key
d["a"] = 10 # Update existing
d.update({"d": 4, "e": 5}) # Merge
# Remove
del d["a"] # KeyError if missing
d.pop("b") # Returns value, KeyError if missing
d.pop("x", None) # Returns None if missing
d.popitem() # Remove and return last item
d.clear() # Remove all
# Get with default and set
d.setdefault("key", []).append(1) # Creates key if missing
Dictionary Operations
d = {"a": 1, "b": 2, "c": 3}
len(d) # 3
# Iteration
for key in d:
print(key)
for key, value in d.items():
print(f"{key}: {value}")
# Merge (3.9+)
merged = d | {"d": 4}
d |= {"e": 5} # In-place merge
# Copy
shallow = d.copy()
shallow = dict(d)
Default Dict
from collections import defaultdict
# Default value for missing keys
counts = defaultdict(int) # Default: 0
counts["a"] += 1 # No KeyError
groups = defaultdict(list) # Default: []
groups["team1"].append("Alice")
nested = defaultdict(lambda: defaultdict(int))
nested["row"]["col"] = 1
Counter
from collections import Counter
# Count occurrences
counts = Counter("abracadabra")
# Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
counts.most_common(2) # [('a', 5), ('b', 2)]
counts["a"] # 5
counts["z"] # 0 (missing = 0)
# Operations
Counter("abc") + Counter("bcd") # Add counts
Counter("abc") - Counter("bcd") # Subtract
Sets
Unordered, unique elements:
# Creating
s = {1, 2, 3}
empty = set() # NOT {} (that's a dict)
from_list = set([1, 2, 2, 3]) # {1, 2, 3}
Set Operations
a = {1, 2, 3}
b = {2, 3, 4}
# Membership
2 in a # True
# Set operations
a | b # Union: {1, 2, 3, 4}
a & b # Intersection: {2, 3}
a - b # Difference: {1}
a ^ b # Symmetric difference: {1, 4}
# Comparisons
{1, 2} <= {1, 2, 3} # Subset
{1, 2, 3} >= {1, 2} # Superset
{1, 2}.isdisjoint({3, 4}) # No common elements
Modifying Sets
s = {1, 2, 3}
s.add(4) # Add element
s.update([5, 6]) # Add multiple
s.remove(1) # Remove (KeyError if missing)
s.discard(1) # Remove (no error if missing)
s.pop() # Remove and return arbitrary element
s.clear() # Remove all
Frozen Sets
Immutable sets (can be dict keys):
fs = frozenset([1, 2, 3])
# fs.add(4) # Error - immutable
Comprehensions
Concise syntax for creating collections:
List Comprehension
# [expression for item in iterable if condition]
squares = [x**2 for x in range(10)]
evens = [x for x in range(20) if x % 2 == 0]
pairs = [(x, y) for x in range(3) for y in range(3)]
# Transform
words = ["hello", "world"]
upper = [w.upper() for w in words]
# Filter and transform
positive_squares = [x**2 for x in numbers if x > 0]
Dictionary Comprehension
# {key: value for item in iterable if condition}
squares = {x: x**2 for x in range(5)}
# {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
# Invert a dictionary
original = {"a": 1, "b": 2}
inverted = {v: k for k, v in original.items()}
# {1: 'a', 2: 'b'}
# Filter
filtered = {k: v for k, v in d.items() if v > 0}
Set Comprehension
# {expression for item in iterable if condition}
unique_lengths = {len(word) for word in words}
Generator Expression
# (expression for item in iterable if condition)
# Lazy - doesn't create list in memory
squares = (x**2 for x in range(1000000))
sum(x**2 for x in range(1000000)) # Memory efficient
Choosing the Right Structure
| Need | Use |
|---|---|
| Ordered, mutable | list |
| Ordered, immutable | tuple |
| Key-value pairs | dict |
| Unique values | set |
| Count occurrences | Counter |
| Default values | defaultdict |
| Named fields | namedtuple or dataclass |
| FIFO queue | collections.deque |
| Priority queue | heapq |
Deque
Double-ended queue - efficient append/pop from both ends:
from collections import deque
d = deque([1, 2, 3])
d.append(4) # Right: [1, 2, 3, 4]
d.appendleft(0) # Left: [0, 1, 2, 3, 4]
d.pop() # Right: 4
d.popleft() # Left: 0
d.rotate(1) # Rotate right
d.rotate(-1) # Rotate left
# Fixed size (oldest items dropped)
recent = deque(maxlen=3)
recent.extend([1, 2, 3, 4, 5]) # deque([3, 4, 5])
Practice
# 1. Group items by property
words = ["apple", "banana", "cherry", "apricot", "blueberry"]
by_first_letter = defaultdict(list)
for word in words:
by_first_letter[word[0]].append(word)
# 2. Find duplicates
items = [1, 2, 2, 3, 3, 3, 4]
duplicates = [x for x in set(items) if items.count(x) > 1]
# Or with Counter
duplicates = [x for x, count in Counter(items).items() if count > 1]
# 3. Merge dicts with conflict resolution
d1 = {"a": 1, "b": 2}
d2 = {"b": 3, "c": 4}
merged = {**d1, **d2} # d2 wins conflicts
# 4. Flatten nested list
nested = [[1, 2], [3, 4], [5, 6]]
flat = [x for sublist in nested for x in sublist]
# 5. Remove duplicates preserving order
items = [3, 1, 4, 1, 5, 9, 2, 6, 5]
unique = list(dict.fromkeys(items)) # [3, 1, 4, 5, 9, 2, 6]