Common Python pitfalls and patterns that TAs should watch for in student code. These are language-specific issues that can cause subtle bugs or make code harder to maintain.
❌ The classic Python trap
def add_item(item, target_list=[]):
"""Add item to a list - BROKEN!"""
target_list.append(item)
return target_list
# This breaks because the same list is reused across calls
items1 = add_item("apple") # ["apple"]
items2 = add_item("banana") # ["apple", "banana"] - Oops!
✅ Use None as default, create new objects inside function
def add_item(item, target_list=None):
"""Add item to a list."""
if target_list is None:
target_list = []
target_list.append(item)
return target_list
# Alternative: use copy if you want to modify an existing list
def add_item(item, target_list=None):
if target_list is None:
target_list = []
else:
target_list = target_list.copy() # Don't modify caller's list
target_list.append(item)
return target_list
❌ Late binding closures
# This doesn't work as expected
functions = []
for i in range(3):
functions.append(lambda: print(i))
# All functions print 2 (the final value of i)
for func in functions:
func() # Prints: 2, 2, 2
✅ Capture variables explicitly
functions = []
for i in range(3):
functions.append(lambda x=i: print(x)) # Capture i's current value
# Now each function prints its expected value
for func in functions:
func() # Prints: 0, 1, 2
Variable shadowing in comprehensions
# ❌ Can be confusing
items = ["a", "b", "c"]
result = [item.upper() for item in items if len(item) > 0]
# What's the value of 'item' here? Undefined behavior
# ✅ Use different variable names to avoid confusion
items = ["a", "b", "c"]
result = [x.upper() for x in items if len(x) > 0]
# 'items' is still clearly the original list
✅ Good import practices
# Standard library first, then third-party, then local
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from your_package.utils import helper_function
❌ Common import mistakes
# Avoid star imports - makes it unclear where things come from
from pandas import * # What functions are available?
# Avoid importing inside functions unless necessary
def process_data():
import pandas as pd # Usually should be at module level
return pd.DataFrame()
# Don't override builtin names
import json as json # Shadows built-in json if it existed
from datetime import datetime as datetime # Confusing
Relative imports in packages
# In a package structure:
# my_package/
# __init__.py
# analysis.py
# utils.py
# In analysis.py:
from .utils import helper_function # ✅ Explicit relative import
from my_package.utils import helper_function # ✅ Also fine
# Avoid
import utils # ❌ Might not find the right module
✅ Specific exception handling
def load_config(path: str) -> dict:
"""Load configuration file with specific error handling."""
try:
with open(path) as f:
return json.load(f)
except FileNotFoundError:
raise ValueError(f"Config file not found: {path}")
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in config file: {e}")
# Don't catch Exception - too broad
❌ Common error handling mistakes
# Too broad exception catching
try:
result = risky_operation()
except: # ❌ Catches everything, including KeyboardInterrupt
print("Something went wrong")
# Silencing errors
try:
result = risky_operation()
except Exception:
pass # ❌ Error disappears, hard to debug
# Not re-raising when you should
try:
result = risky_operation()
except ValueError:
print("Error occurred") # ❌ Should probably re-raise
return None
EAFP vs LBYL (Easier to Ask Forgiveness vs Look Before You Leap)
# ✅ Pythonic: EAFP
try:
value = my_dict[key]
except KeyError:
value = default_value
# Less Pythonic: LBYL
if key in my_dict:
value = my_dict[key]
else:
value = default_value
String building
# ❌ Inefficient for many concatenations
result = ""
for item in large_list:
result += str(item) + ", "
# ✅ Use join for multiple concatenations
result = ", ".join(str(item) for item in large_list)
# ✅ Use f-strings for formatting
name = "Alice"
age = 30
message = f"Hello {name}, you are {age} years old" # Clear and fast
Dictionary and list operations
# ✅ Use dict.get() with defaults
value = config.get("timeout", 30) # Returns 30 if "timeout" not in config
# ✅ Use collections.defaultdict for accumulating
from collections import defaultdict
counts = defaultdict(int)
for item in items:
counts[item] += 1 # No need to check if key exists
# ✅ Use enumerate when you need both index and value
for i, item in enumerate(items):
print(f"{i}: {item}")
# ❌ Don't do this
for i in range(len(items)):
item = items[i]
print(f"{i}: {item}")
Generator expressions vs list comprehensions
# ✅ Use generators for large datasets
def process_large_file(filename):
# Generator - memory efficient
lines = (line.strip() for line in open(filename))
return sum(1 for line in lines if line.startswith("ERROR"))
# ❌ List comprehension loads everything into memory
def process_large_file_bad(filename):
lines = [line.strip() for line in open(filename)] # Could be huge!
return sum(1 for line in lines if line.startswith("ERROR"))
Avoid repeated expensive operations
# ❌ Repeated computation in loop
for item in items:
if expensive_function() > threshold: # Called every iteration!
process(item)
# ✅ Compute once
expensive_result = expensive_function()
for item in items:
if expensive_result > threshold:
process(item)
✅ Always use context managers for resources
# File handling
with open("data.txt") as f:
content = f.read()
# File automatically closed
# Database connections
with get_database_connection() as conn:
result = conn.execute(query)
# Connection automatically closed
Custom context managers when needed
from contextlib import contextmanager
import time
@contextmanager
def timer(description):
"""Time a block of code."""
start = time.time()
try:
yield
finally:
elapsed = time.time() - start
print(f"{description} took {elapsed:.2f} seconds")
# Usage
with timer("Data processing"):
process_large_dataset()
✅ Good practices to encourage:
None defaults instead of mutable defaults❌ Common issues to flag:
def func(items=[]):)except: clauses that catch everything+= in loopslist = [1,2,3])Review questions to ask:
Progression from problematic to Pythonic:
# Show them the mutable default argument problem
def broken_function(items=[]):
items.append("new")
return items
print(broken_function()) # ["new"]
print(broken_function()) # ["new", "new"] - Surprise!
def fixed_function(items=None):
if items is None:
items = []
items.append("new")
return items
# This pattern shows up in data processing
def add_features(df, new_columns=None):
if new_columns is None:
new_columns = []
# Now safe to modify new_columns
Teaching approach: