| name | json-processing |
| description | Parse, transform, validate, and generate JSON data structures. Use when reading JSON configuration files, processing API responses, extracting nested data, or converting between JSON and other formats. |
JSON Processing
Provides patterns for working with JSON data in report generation.
Reading and Writing JSON
import json
from pathlib import Path
def read_json(filepath: str) -> dict:
"""Read JSON from file."""
return json.loads(Path(filepath).read_text())
def write_json(data: dict, filepath: str, indent: int = 2):
"""Write JSON to file with formatting."""
Path(filepath).write_text(json.dumps(data, indent=indent, default=str))
def parse_json(json_string: str) -> dict:
"""Parse JSON string."""
return json.loads(json_string)
def to_json(data: dict, indent: int = 2) -> str:
"""Convert data to formatted JSON string."""
return json.dumps(data, indent=indent, default=str)
Extracting Nested Data
from typing import Any, List
def get_nested(data: dict, path: str, default: Any = None) -> Any:
"""
Get value from nested dictionary using dot notation.
Usage:
data = {"user": {"profile": {"name": "John"}}}
name = get_nested(data, "user.profile.name") # "John"
missing = get_nested(data, "user.email", "N/A") # "N/A"
"""
keys = path.split(".")
value = data
for key in keys:
if isinstance(value, dict) and key in value:
value = value[key]
elif isinstance(value, list) and key.isdigit():
index = int(key)
if 0 <= index < len(value):
value = value[index]
else:
return default
else:
return default
return value
def set_nested(data: dict, path: str, value: Any) -> dict:
"""
Set value in nested dictionary using dot notation.
Usage:
data = {}
set_nested(data, "user.profile.name", "John")
# Result: {"user": {"profile": {"name": "John"}}}
"""
keys = path.split(".")
current = data
for key in keys[:-1]:
if key not in current:
current[key] = {}
current = current[key]
current[keys[-1]] = value
return data
Flattening and Unflattening
def flatten_json(data: dict, separator: str = ".") -> dict:
"""
Flatten nested dictionary to single level.
Usage:
nested = {"a": {"b": {"c": 1}}, "d": 2}
flat = flatten_json(nested)
# Result: {"a.b.c": 1, "d": 2}
"""
def _flatten(obj, parent_key=""):
items = []
if isinstance(obj, dict):
for k, v in obj.items():
new_key = f"{parent_key}{separator}{k}" if parent_key else k
items.extend(_flatten(v, new_key).items())
elif isinstance(obj, list):
for i, v in enumerate(obj):
new_key = f"{parent_key}{separator}{i}" if parent_key else str(i)
items.extend(_flatten(v, new_key).items())
else:
items.append((parent_key, obj))
return dict(items)
return _flatten(data)
def unflatten_json(data: dict, separator: str = ".") -> dict:
"""
Unflatten dictionary back to nested structure.
Usage:
flat = {"a.b.c": 1, "d": 2}
nested = unflatten_json(flat)
# Result: {"a": {"b": {"c": 1}}, "d": 2}
"""
result = {}
for key, value in data.items():
parts = key.split(separator)
current = result
for part in parts[:-1]:
if part not in current:
current[part] = {}
current = current[part]
current[parts[-1]] = value
return result
JSON Querying (JSONPath-like)
from typing import List, Any
def query_json(data: dict, expression: str) -> List[Any]:
"""
Simple JSONPath-like query.
Supports:
- Dot notation: "store.books"
- Array index: "books[0]"
- Wildcard: "books[*].title"
Usage:
query_json(data, "store.books[*].title")
"""
import re
def resolve(obj, parts):
if not parts:
return [obj] if obj is not None else []
part = parts[0]
remaining = parts[1:]
# Array wildcard
if part == "[*]":
if isinstance(obj, list):
results = []
for item in obj:
results.extend(resolve(item, remaining))
return results
return []
# Array index
match = re.match(r"\[(\d+)\]", part)
if match:
index = int(match.group(1))
if isinstance(obj, list) and 0 <= index < len(obj):
return resolve(obj[index], remaining)
return []
# Object key
if isinstance(obj, dict) and part in obj:
return resolve(obj[part], remaining)
return []
# Parse expression
parts = []
for segment in re.split(r"(\[\*\]|\[\d+\])", expression):
if segment.startswith("["):
parts.append(segment)
else:
parts.extend(p for p in segment.split(".") if p)
return resolve(data, parts)
JSON Schema Extraction
def extract_schema(data: Any, max_items: int = 3) -> dict:
"""
Extract a JSON schema-like structure from data.
Usage:
data = {"name": "John", "age": 30, "tags": ["a", "b"]}
schema = extract_schema(data)
# Result: {"name": "string", "age": "number", "tags": ["string"]}
"""
if isinstance(data, dict):
return {k: extract_schema(v, max_items) for k, v in list(data.items())[:max_items]}
elif isinstance(data, list):
if data:
return [extract_schema(data[0], max_items)]
return []
elif isinstance(data, str):
return "string"
elif isinstance(data, bool):
return "boolean"
elif isinstance(data, (int, float)):
return "number"
elif data is None:
return "null"
return "unknown"
Helper Script
Use helper.py for the JsonProcessor class with advanced querying and transformation features.