File Handling
Reading, writing, and working with files.
Basic File Operations
Opening Files
# Basic pattern
file = open("filename.txt", "r")
content = file.read()
file.close()
# Better - context manager (auto-closes)
with open("filename.txt", "r") as file:
content = file.read()
# File automatically closed here
File Modes
| Mode | Description |
|---|---|
r | Read (default) |
w | Write (truncates) |
a | Append |
x | Exclusive create (fails if exists) |
r+ | Read and write |
w+ | Write and read (truncates) |
a+ | Append and read |
b | Binary mode (add to others: rb, wb) |
t | Text mode (default) |
Reading Files
# Read entire file
with open("file.txt") as f:
content = f.read()
# Read lines into list
with open("file.txt") as f:
lines = f.readlines() # Includes \n
lines = f.read().splitlines() # Without \n
# Read line by line (memory efficient)
with open("file.txt") as f:
for line in f:
print(line.strip())
# Read specific amount
with open("file.txt") as f:
chunk = f.read(100) # First 100 characters
line = f.readline() # Next line
Writing Files
# Write string
with open("file.txt", "w") as f:
f.write("Hello, World!\n")
# Write multiple lines
lines = ["Line 1", "Line 2", "Line 3"]
with open("file.txt", "w") as f:
f.writelines(line + "\n" for line in lines)
# Append to file
with open("file.txt", "a") as f:
f.write("New line\n")
# Write with print
with open("file.txt", "w") as f:
print("Hello", file=f)
print("World", file=f)
File Position
with open("file.txt", "r+") as f:
f.read(10) # Read 10 chars
pos = f.tell() # Current position
f.seek(0) # Go to beginning
f.seek(0, 2) # Go to end
f.seek(-10, 2) # 10 chars before end
Pathlib (Modern Path Handling)
from pathlib import Path
# Create path objects
p = Path("folder/file.txt")
p = Path.cwd() # Current directory
p = Path.home() # Home directory
p = Path(__file__).parent # Script's directory
# Path components
p = Path("/home/user/file.txt")
p.name # "file.txt"
p.stem # "file"
p.suffix # ".txt"
p.parent # Path("/home/user")
p.parts # ('/', 'home', 'user', 'file.txt')
# Join paths
new_path = p / "subdir" / "file.txt"
new_path = p.joinpath("subdir", "file.txt")
# Check existence
p.exists()
p.is_file()
p.is_dir()
# File info
p.stat().st_size # Size in bytes
p.stat().st_mtime # Modification time
Reading/Writing with Pathlib
from pathlib import Path
p = Path("file.txt")
# Read
content = p.read_text()
data = p.read_bytes()
# Write
p.write_text("Hello, World!")
p.write_bytes(b"Binary data")
# Read lines
lines = p.read_text().splitlines()
Directory Operations
from pathlib import Path
p = Path("mydir")
# Create directory
p.mkdir() # Error if exists
p.mkdir(exist_ok=True) # No error if exists
p.mkdir(parents=True) # Create parent dirs too
# List contents
list(p.iterdir()) # All items
list(p.glob("*.txt")) # Matching pattern
list(p.glob("**/*.py")) # Recursive
list(p.rglob("*.py")) # Same as above
# Remove
p.unlink() # Delete file
p.rmdir() # Delete empty directory
# Rename/move
p.rename("newname.txt")
p.replace("destination.txt") # Overwrites if exists
Common File Operations
Copy, Move, Delete
import shutil
from pathlib import Path
# Copy file
shutil.copy("source.txt", "dest.txt")
shutil.copy2("source.txt", "dest.txt") # Preserves metadata
# Copy directory
shutil.copytree("source_dir", "dest_dir")
# Move
shutil.move("source.txt", "dest.txt")
# Delete directory (including contents)
shutil.rmtree("directory")
Temporary Files
import tempfile
# Temporary file (auto-deleted)
with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
f.write("temp data")
temp_path = f.name
# Temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
# Use tmpdir
pass # Deleted when context exits
Check File Properties
import os
from pathlib import Path
p = Path("file.txt")
# Size
os.path.getsize("file.txt")
p.stat().st_size
# Modification time
import datetime
mtime = p.stat().st_mtime
dt = datetime.datetime.fromtimestamp(mtime)
# Permissions
os.access("file.txt", os.R_OK) # Readable?
os.access("file.txt", os.W_OK) # Writable?
os.access("file.txt", os.X_OK) # Executable?
Working with Text Encodings
# Specify encoding (always do this)
with open("file.txt", "r", encoding="utf-8") as f:
content = f.read()
# Common encodings
"utf-8" # Unicode (default on most systems)
"latin-1" # Western European
"cp1252" # Windows Western European
"ascii" # Basic ASCII
# Handle encoding errors
with open("file.txt", "r", encoding="utf-8", errors="ignore") as f:
content = f.read()
# errors options: 'strict', 'ignore', 'replace', 'backslashreplace'
Binary Files
# Read binary
with open("image.png", "rb") as f:
data = f.read()
# Write binary
with open("output.bin", "wb") as f:
f.write(b"\x00\x01\x02\x03")
# Copy binary file
with open("source.png", "rb") as src:
with open("dest.png", "wb") as dst:
dst.write(src.read())
# Read in chunks (memory efficient)
with open("large_file.bin", "rb") as f:
while chunk := f.read(8192):
process(chunk)
CSV Files
import csv
# Read CSV
with open("data.csv", newline="") as f:
reader = csv.reader(f)
header = next(reader) # First row
for row in reader:
print(row) # List of values
# Read as dictionaries
with open("data.csv", newline="") as f:
reader = csv.DictReader(f)
for row in reader:
print(row["name"]) # Access by column name
# Write CSV
with open("output.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["name", "age"])
writer.writerow(["Alice", 30])
writer.writerows([["Bob", 25], ["Charlie", 35]])
# Write from dictionaries
with open("output.csv", "w", newline="") as f:
fieldnames = ["name", "age"]
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerow({"name": "Alice", "age": 30})
JSON Files
import json
# Read JSON
with open("data.json") as f:
data = json.load(f)
# Write JSON
with open("output.json", "w") as f:
json.dump(data, f, indent=2)
# String conversion
json_string = json.dumps(data)
data = json.loads(json_string)
# Handle non-serializable types
def custom_encoder(obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
raise TypeError(f"Not serializable: {type(obj)}")
json.dumps(data, default=custom_encoder)
Configuration Files
INI Files
import configparser
config = configparser.ConfigParser()
config.read("config.ini")
# Access values
value = config["section"]["key"]
value = config.get("section", "key", fallback="default")
# Write
config["section"] = {"key": "value"}
with open("config.ini", "w") as f:
config.write(f)
YAML Files
import yaml # pip install pyyaml
# Read
with open("config.yaml") as f:
data = yaml.safe_load(f)
# Write
with open("config.yaml", "w") as f:
yaml.dump(data, f)
TOML Files (3.11+)
import tomllib # Built-in for reading (3.11+)
with open("config.toml", "rb") as f:
data = tomllib.load(f)
# For writing, use tomli-w package
Compressed Files
Gzip
import gzip
# Read
with gzip.open("file.txt.gz", "rt") as f:
content = f.read()
# Write
with gzip.open("file.txt.gz", "wt") as f:
f.write("Compressed content")
Zip Archives
import zipfile
# Create zip
with zipfile.ZipFile("archive.zip", "w") as zf:
zf.write("file1.txt")
zf.write("file2.txt")
# Extract
with zipfile.ZipFile("archive.zip", "r") as zf:
zf.extractall("output_dir")
zf.extract("file1.txt", "output_dir")
# List contents
with zipfile.ZipFile("archive.zip", "r") as zf:
print(zf.namelist())
Best Practices
Always Use Context Managers
# Good
with open("file.txt") as f:
content = f.read()
# Bad - may not close on error
f = open("file.txt")
content = f.read()
f.close()
Always Specify Encoding
# Good
with open("file.txt", encoding="utf-8") as f:
content = f.read()
# Bad - uses system default
with open("file.txt") as f:
content = f.read()
Use Pathlib
# Good
from pathlib import Path
path = Path("dir") / "file.txt"
# Less good
import os
path = os.path.join("dir", "file.txt")
Practice
# 1. Read and process log file
from pathlib import Path
log_file = Path("app.log")
errors = [line for line in log_file.read_text().splitlines()
if "ERROR" in line]
# 2. Batch rename files
from pathlib import Path
folder = Path("images")
for i, file in enumerate(folder.glob("*.jpg")):
file.rename(folder / f"image_{i:03d}.jpg")
# 3. Merge CSV files
import csv
from pathlib import Path
output = []
for csv_file in Path(".").glob("*.csv"):
with open(csv_file, newline="") as f:
reader = csv.DictReader(f)
output.extend(reader)
# 4. Watch file for changes
import time
from pathlib import Path
file = Path("data.txt")
last_mtime = file.stat().st_mtime
while True:
time.sleep(1)
current_mtime = file.stat().st_mtime
if current_mtime != last_mtime:
print("File changed!")
last_mtime = current_mtime