| name | csv-processing |
| description | Read, write, and transform CSV files with proper handling of encoding and formatting. Use when loading CSV data files, exporting results to CSV, handling large CSV files, or converting between CSV and other data formats. |
CSV Processing
Provides patterns for CSV file handling in ETL pipelines.
Reading CSV Files
import csv
from typing import List, Dict
def read_csv(filepath: str, encoding: str = 'utf-8') -> List[Dict]:
"""Read CSV file as list of dictionaries."""
with open(filepath, newline='', encoding=encoding) as f:
reader = csv.DictReader(f)
return list(reader)
def read_csv_with_types(filepath: str, type_map: Dict[str, type]) -> List[Dict]:
"""Read CSV and convert columns to specified types."""
records = read_csv(filepath)
for record in records:
for field, field_type in type_map.items():
if field in record and record[field]:
try:
record[field] = field_type(record[field])
except (ValueError, TypeError):
record[field] = None
return records
Writing CSV Files
import csv
from typing import List, Dict
from pathlib import Path
def write_csv(filepath: str, data: List[Dict], fieldnames: List[str] = None):
"""Write list of dictionaries to CSV file."""
if not data:
return
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
fieldnames = fieldnames or list(data[0].keys())
with open(filepath, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
def append_csv(filepath: str, data: List[Dict]):
"""Append records to existing CSV file."""
if not data:
return
fieldnames = list(data[0].keys())
file_exists = Path(filepath).exists()
with open(filepath, 'a', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
if not file_exists:
writer.writeheader()
writer.writerows(data)
Streaming Large Files
def process_csv_streaming(filepath: str, processor, batch_size: int = 1000):
"""Process large CSV file in batches."""
with open(filepath, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
batch = []
for row in reader:
batch.append(row)
if len(batch) >= batch_size:
processor(batch)
batch = []
if batch:
processor(batch)