File Handling

Reading, writing, and working with files.

Basic File Operations

Opening Files

# Basic pattern
file = open("filename.txt", "r")
content = file.read()
file.close()

# Better - context manager (auto-closes)
with open("filename.txt", "r") as file:
    content = file.read()
# File automatically closed here

File Modes

ModeDescription
rRead (default)
wWrite (truncates)
aAppend
xExclusive create (fails if exists)
r+Read and write
w+Write and read (truncates)
a+Append and read
bBinary mode (add to others: rb, wb)
tText mode (default)

Reading Files

# Read entire file
with open("file.txt") as f:
    content = f.read()

# Read lines into list
with open("file.txt") as f:
    lines = f.readlines()          # Includes \n
    lines = f.read().splitlines()  # Without \n

# Read line by line (memory efficient)
with open("file.txt") as f:
    for line in f:
        print(line.strip())

# Read specific amount
with open("file.txt") as f:
    chunk = f.read(100)    # First 100 characters
    line = f.readline()    # Next line

Writing Files

# Write string
with open("file.txt", "w") as f:
    f.write("Hello, World!\n")

# Write multiple lines
lines = ["Line 1", "Line 2", "Line 3"]
with open("file.txt", "w") as f:
    f.writelines(line + "\n" for line in lines)

# Append to file
with open("file.txt", "a") as f:
    f.write("New line\n")

# Write with print
with open("file.txt", "w") as f:
    print("Hello", file=f)
    print("World", file=f)

File Position

with open("file.txt", "r+") as f:
    f.read(10)           # Read 10 chars
    pos = f.tell()       # Current position
    f.seek(0)            # Go to beginning
    f.seek(0, 2)         # Go to end
    f.seek(-10, 2)       # 10 chars before end

Pathlib (Modern Path Handling)

from pathlib import Path

# Create path objects
p = Path("folder/file.txt")
p = Path.cwd()               # Current directory
p = Path.home()              # Home directory
p = Path(__file__).parent    # Script's directory

# Path components
p = Path("/home/user/file.txt")
p.name          # "file.txt"
p.stem          # "file"
p.suffix        # ".txt"
p.parent        # Path("/home/user")
p.parts         # ('/', 'home', 'user', 'file.txt')

# Join paths
new_path = p / "subdir" / "file.txt"
new_path = p.joinpath("subdir", "file.txt")

# Check existence
p.exists()
p.is_file()
p.is_dir()

# File info
p.stat().st_size    # Size in bytes
p.stat().st_mtime   # Modification time

Reading/Writing with Pathlib

from pathlib import Path

p = Path("file.txt")

# Read
content = p.read_text()
data = p.read_bytes()

# Write
p.write_text("Hello, World!")
p.write_bytes(b"Binary data")

# Read lines
lines = p.read_text().splitlines()

Directory Operations

from pathlib import Path

p = Path("mydir")

# Create directory
p.mkdir()                      # Error if exists
p.mkdir(exist_ok=True)         # No error if exists
p.mkdir(parents=True)          # Create parent dirs too

# List contents
list(p.iterdir())              # All items
list(p.glob("*.txt"))          # Matching pattern
list(p.glob("**/*.py"))        # Recursive
list(p.rglob("*.py"))          # Same as above

# Remove
p.unlink()                     # Delete file
p.rmdir()                      # Delete empty directory

# Rename/move
p.rename("newname.txt")
p.replace("destination.txt")   # Overwrites if exists

Common File Operations

Copy, Move, Delete

import shutil
from pathlib import Path

# Copy file
shutil.copy("source.txt", "dest.txt")
shutil.copy2("source.txt", "dest.txt")  # Preserves metadata

# Copy directory
shutil.copytree("source_dir", "dest_dir")

# Move
shutil.move("source.txt", "dest.txt")

# Delete directory (including contents)
shutil.rmtree("directory")

Temporary Files

import tempfile

# Temporary file (auto-deleted)
with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
    f.write("temp data")
    temp_path = f.name

# Temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
    # Use tmpdir
    pass  # Deleted when context exits

Check File Properties

import os
from pathlib import Path

p = Path("file.txt")

# Size
os.path.getsize("file.txt")
p.stat().st_size

# Modification time
import datetime
mtime = p.stat().st_mtime
dt = datetime.datetime.fromtimestamp(mtime)

# Permissions
os.access("file.txt", os.R_OK)  # Readable?
os.access("file.txt", os.W_OK)  # Writable?
os.access("file.txt", os.X_OK)  # Executable?

Working with Text Encodings

# Specify encoding (always do this)
with open("file.txt", "r", encoding="utf-8") as f:
    content = f.read()

# Common encodings
"utf-8"       # Unicode (default on most systems)
"latin-1"     # Western European
"cp1252"      # Windows Western European
"ascii"       # Basic ASCII

# Handle encoding errors
with open("file.txt", "r", encoding="utf-8", errors="ignore") as f:
    content = f.read()

# errors options: 'strict', 'ignore', 'replace', 'backslashreplace'

Binary Files

# Read binary
with open("image.png", "rb") as f:
    data = f.read()

# Write binary
with open("output.bin", "wb") as f:
    f.write(b"\x00\x01\x02\x03")

# Copy binary file
with open("source.png", "rb") as src:
    with open("dest.png", "wb") as dst:
        dst.write(src.read())

# Read in chunks (memory efficient)
with open("large_file.bin", "rb") as f:
    while chunk := f.read(8192):
        process(chunk)

CSV Files

import csv

# Read CSV
with open("data.csv", newline="") as f:
    reader = csv.reader(f)
    header = next(reader)      # First row
    for row in reader:
        print(row)             # List of values

# Read as dictionaries
with open("data.csv", newline="") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row["name"])     # Access by column name

# Write CSV
with open("output.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["name", "age"])
    writer.writerow(["Alice", 30])
    writer.writerows([["Bob", 25], ["Charlie", 35]])

# Write from dictionaries
with open("output.csv", "w", newline="") as f:
    fieldnames = ["name", "age"]
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerow({"name": "Alice", "age": 30})

JSON Files

import json

# Read JSON
with open("data.json") as f:
    data = json.load(f)

# Write JSON
with open("output.json", "w") as f:
    json.dump(data, f, indent=2)

# String conversion
json_string = json.dumps(data)
data = json.loads(json_string)

# Handle non-serializable types
def custom_encoder(obj):
    if isinstance(obj, datetime.datetime):
        return obj.isoformat()
    raise TypeError(f"Not serializable: {type(obj)}")

json.dumps(data, default=custom_encoder)

Configuration Files

INI Files

import configparser

config = configparser.ConfigParser()
config.read("config.ini")

# Access values
value = config["section"]["key"]
value = config.get("section", "key", fallback="default")

# Write
config["section"] = {"key": "value"}
with open("config.ini", "w") as f:
    config.write(f)

YAML Files

import yaml  # pip install pyyaml

# Read
with open("config.yaml") as f:
    data = yaml.safe_load(f)

# Write
with open("config.yaml", "w") as f:
    yaml.dump(data, f)

TOML Files (3.11+)

import tomllib  # Built-in for reading (3.11+)

with open("config.toml", "rb") as f:
    data = tomllib.load(f)

# For writing, use tomli-w package

Compressed Files

Gzip

import gzip

# Read
with gzip.open("file.txt.gz", "rt") as f:
    content = f.read()

# Write
with gzip.open("file.txt.gz", "wt") as f:
    f.write("Compressed content")

Zip Archives

import zipfile

# Create zip
with zipfile.ZipFile("archive.zip", "w") as zf:
    zf.write("file1.txt")
    zf.write("file2.txt")

# Extract
with zipfile.ZipFile("archive.zip", "r") as zf:
    zf.extractall("output_dir")
    zf.extract("file1.txt", "output_dir")

# List contents
with zipfile.ZipFile("archive.zip", "r") as zf:
    print(zf.namelist())

Best Practices

Always Use Context Managers

# Good
with open("file.txt") as f:
    content = f.read()

# Bad - may not close on error
f = open("file.txt")
content = f.read()
f.close()

Always Specify Encoding

# Good
with open("file.txt", encoding="utf-8") as f:
    content = f.read()

# Bad - uses system default
with open("file.txt") as f:
    content = f.read()

Use Pathlib

# Good
from pathlib import Path
path = Path("dir") / "file.txt"

# Less good
import os
path = os.path.join("dir", "file.txt")

Practice

# 1. Read and process log file
from pathlib import Path

log_file = Path("app.log")
errors = [line for line in log_file.read_text().splitlines()
          if "ERROR" in line]

# 2. Batch rename files
from pathlib import Path

folder = Path("images")
for i, file in enumerate(folder.glob("*.jpg")):
    file.rename(folder / f"image_{i:03d}.jpg")

# 3. Merge CSV files
import csv
from pathlib import Path

output = []
for csv_file in Path(".").glob("*.csv"):
    with open(csv_file, newline="") as f:
        reader = csv.DictReader(f)
        output.extend(reader)

# 4. Watch file for changes
import time
from pathlib import Path

file = Path("data.txt")
last_mtime = file.stat().st_mtime
while True:
    time.sleep(1)
    current_mtime = file.stat().st_mtime
    if current_mtime != last_mtime:
        print("File changed!")
        last_mtime = current_mtime