| name | data-validation |
| description | Validate data fields against schemas, rules, and constraints. Use when validating invoice data integrity, checking required fields, enforcing business rules, verifying data types and formats, or ensuring data quality before processing. |
Data Validation
Provides patterns for validating invoice data quality and integrity.
Field Validation
from typing import Any, List, Dict, Callable
import re
from datetime import datetime
def validate_required(value: Any, field_name: str) -> tuple[bool, str]:
"""Check if field has a value."""
if value is None or value == "":
return False, f"{field_name} is required"
return True, ""
def validate_type(value: Any, expected_type: type, field_name: str) -> tuple[bool, str]:
"""Validate field type."""
if value is not None and not isinstance(value, expected_type):
return False, f"{field_name} must be {expected_type.__name__}"
return True, ""
def validate_range(value: float, min_val: float, max_val: float, field_name: str) -> tuple[bool, str]:
"""Validate numeric range."""
if value is not None and (value < min_val or value > max_val):
return False, f"{field_name} must be between {min_val} and {max_val}"
return True, ""
def validate_pattern(value: str, pattern: str, field_name: str, description: str = "format") -> tuple[bool, str]:
"""Validate string against regex pattern."""
if value and not re.match(pattern, value):
return False, f"{field_name} has invalid {description}"
return True, ""
Common Invoice Validators
def validate_invoice_number(value: str) -> tuple[bool, str]:
"""Validate invoice number format."""
if not value:
return False, "Invoice number is required"
if not re.match(r"^[A-Z]{2,3}-\d{4,10}$", value):
return False, "Invoice number must match format: XX-0000"
return True, ""
def validate_amount(value: Any) -> tuple[bool, str]:
"""Validate monetary amount."""
try:
amount = float(value)
if amount < 0:
return False, "Amount cannot be negative"
return True, ""
except (ValueError, TypeError):
return False, "Amount must be a valid number"
def validate_date(value: str, format: str = "%Y-%m-%d") -> tuple[bool, str]:
"""Validate date string."""
if not value:
return False, "Date is required"
try:
datetime.strptime(value, format)
return True, ""
except ValueError:
return False, f"Date must be in format: {format}"
def validate_email(value: str) -> tuple[bool, str]:
"""Validate email address."""
if not value:
return True, "" # Optional
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if not re.match(pattern, value):
return False, "Invalid email address"
return True, ""
Record Validation
def validate_record(record: dict, rules: Dict[str, List[Callable]]) -> tuple[bool, List[str]]:
"""
Validate a record against multiple rules.
Usage:
rules = {
"invoice_number": [validate_invoice_number],
"amount": [validate_amount],
"date": [lambda v: validate_date(v)]
}
is_valid, errors = validate_record(invoice, rules)
"""
errors = []
for field, validators in rules.items():
value = record.get(field)
for validator in validators:
is_valid, error = validator(value)
if not is_valid:
errors.append(error)
return len(errors) == 0, errors
def validate_batch(records: List[dict], rules: Dict[str, List[Callable]]) -> tuple[List[dict], List[dict]]:
"""
Validate batch of records, separating valid from invalid.
Returns:
Tuple of (valid_records, invalid_records_with_errors)
"""
valid = []
invalid = []
for record in records:
is_valid, errors = validate_record(record, rules)
if is_valid:
valid.append(record)
else:
record_copy = record.copy()
record_copy["_validation_errors"] = errors
invalid.append(record_copy)
return valid, invalid
Cross-Field Validation
def validate_totals(invoice: dict) -> tuple[bool, str]:
"""Validate that line item totals match invoice total."""
line_items = invoice.get("line_items", [])
calculated_total = sum(float(item.get("amount", 0)) for item in line_items)
invoice_total = float(invoice.get("total", 0))
if abs(calculated_total - invoice_total) > 0.01:
return False, f"Line items total ({calculated_total}) doesn't match invoice total ({invoice_total})"
return True, ""
def validate_date_range(invoice: dict) -> tuple[bool, str]:
"""Validate invoice date is before due date."""
invoice_date = invoice.get("invoice_date")
due_date = invoice.get("due_date")
if invoice_date and due_date:
if invoice_date > due_date:
return False, "Invoice date cannot be after due date"
return True, ""
Schema Validation
def validate_schema(record: dict, schema: dict) -> tuple[bool, List[str]]:
"""
Validate record against schema definition.
Schema format:
{
"field_name": {
"type": str,
"required": True,
"pattern": r"...",
"min": 0,
"max": 100
}
}
"""
errors = []
for field, rules in schema.items():
value = record.get(field)
# Required check
if rules.get("required") and (value is None or value == ""):
errors.append(f"{field} is required")
continue
if value is None:
continue
# Type check
expected_type = rules.get("type")
if expected_type and not isinstance(value, expected_type):
errors.append(f"{field} must be {expected_type.__name__}")
# Pattern check
pattern = rules.get("pattern")
if pattern and isinstance(value, str) and not re.match(pattern, value):
errors.append(f"{field} has invalid format")
# Range check
if isinstance(value, (int, float)):
if "min" in rules and value < rules["min"]:
errors.append(f"{field} must be at least {rules['min']}")
if "max" in rules and value > rules["max"]:
errors.append(f"{field} must be at most {rules['max']}")
return len(errors) == 0, errors
Helper Script
Use helper.py for the DataValidator class with comprehensive validation features.