| name | error-retry-tracking |
| description | Instrument error handling, retries, fallbacks, and failure patterns |
| triggers | error tracking, retry instrumentation, failure handling, fallback tracking, rate limit handling |
| priority | 2 |
Error and Retry Tracking
Instrument error handling to understand failure patterns and recovery behavior.
Core Principle
Error observability answers:
- What failed and why?
- How many retries before success/failure?
- What fallbacks were used?
- What's the recovery rate?
- Are errors correlated (rate limits, outages)?
Error Classification
Transient vs. Permanent
TRANSIENT_ERRORS = [
"RateLimitError",
"TimeoutError",
"ServiceUnavailable",
"ConnectionError",
]
PERMANENT_ERRORS = [
"InvalidRequestError",
"AuthenticationError",
"ContentPolicyViolation",
"ContextLengthExceeded",
]
def classify_error(error: Exception) -> str:
error_type = type(error).__name__
if error_type in TRANSIENT_ERRORS:
return "transient"
elif error_type in PERMANENT_ERRORS:
return "permanent"
return "unknown"
Error Span Attributes
# Error identification (P0)
span.set_attribute("error.type", "RateLimitError")
span.set_attribute("error.message", "Rate limit exceeded")
span.set_attribute("error.category", "transient")
span.set_attribute("error.source", "llm_provider")
# Provider context (P1)
span.set_attribute("error.provider", "anthropic")
span.set_attribute("error.model", "claude-3-opus")
span.set_attribute("error.status_code", 429)
span.set_attribute("error.request_id", "req_abc123")
# Timing context (P1)
span.set_attribute("error.retry_after_ms", 60000)
span.set_attribute("error.occurred_at_step", 3)
span.set_attribute("error.time_into_request_ms", 2500)
# Impact (P2)
span.set_attribute("error.tokens_wasted", 1500) # Tokens sent before failure
span.set_attribute("error.cost_wasted_usd", 0.015)
Retry Span Attributes
# Retry tracking (P0)
span.set_attribute("retry.attempt", 2)
span.set_attribute("retry.max_attempts", 3)
span.set_attribute("retry.strategy", "exponential_backoff")
# Timing (P1)
span.set_attribute("retry.delay_ms", 2000)
span.set_attribute("retry.total_wait_ms", 3500)
span.set_attribute("retry.jitter_ms", 150)
# Outcome (P0)
span.set_attribute("retry.success", True)
span.set_attribute("retry.final_attempt", 2)
span.set_attribute("retry.exhausted", False)
Retry Wrapper Pattern
from functools import wraps
from langfuse.decorators import observe
import time
def with_retry(
max_attempts: int = 3,
base_delay: float = 1.0,
max_delay: float = 60.0,
exponential_base: float = 2.0,
):
def decorator(func):
@wraps(func)
@observe(name=f"{func.__name__}.with_retry")
def wrapper(*args, **kwargs):
span = get_current_span()
span.set_attribute("retry.max_attempts", max_attempts)
span.set_attribute("retry.strategy", "exponential_backoff")
last_error = None
total_wait = 0
for attempt in range(1, max_attempts + 1):
try:
span.set_attribute("retry.attempt", attempt)
result = func(*args, **kwargs)
span.set_attribute("retry.success", True)
span.set_attribute("retry.final_attempt", attempt)
return result
except Exception as e:
last_error = e
span.set_attribute("error.type", type(e).__name__)
span.set_attribute("error.category", classify_error(e))
if classify_error(e) == "permanent":
span.set_attribute("retry.exhausted", False)
span.set_attribute("retry.abort_reason", "permanent_error")
raise
if attempt < max_attempts:
delay = min(
base_delay * (exponential_base ** (attempt - 1)),
max_delay
)
total_wait += delay
span.add_event("retry.waiting", {"delay_ms": delay * 1000})
time.sleep(delay)
span.set_attribute("retry.success", False)
span.set_attribute("retry.exhausted", True)
span.set_attribute("retry.total_wait_ms", total_wait * 1000)
raise last_error
return wrapper
return decorator
@with_retry(max_attempts=3)
def call_llm(messages):
return client.messages.create(messages=messages)
Fallback Tracking
# Fallback span attributes
span.set_attribute("fallback.triggered", True)
span.set_attribute("fallback.reason", "primary_model_unavailable")
span.set_attribute("fallback.from_model", "claude-3-opus")
span.set_attribute("fallback.to_model", "claude-3-sonnet")
span.set_attribute("fallback.quality_impact", "reduced")
# Fallback chain
span.set_attribute("fallback.chain", ["opus", "sonnet", "haiku"])
span.set_attribute("fallback.chain_position", 2)
Rate Limit Handling
# Rate limit specific attributes
span.set_attribute("rate_limit.type", "tokens_per_minute")
span.set_attribute("rate_limit.limit", 100000)
span.set_attribute("rate_limit.remaining", 0)
span.set_attribute("rate_limit.reset_at", "2024-01-15T10:01:00Z")
span.set_attribute("rate_limit.retry_after_ms", 45000)
# Proactive rate limiting
span.set_attribute("rate_limit.preemptive_wait", True)
span.set_attribute("rate_limit.tokens_queued", 5000)
Circuit Breaker Pattern
from enum import Enum
class CircuitState(Enum):
CLOSED = "closed" # Normal operation
OPEN = "open" # Failing, reject requests
HALF_OPEN = "half_open" # Testing recovery
# Circuit breaker attributes
span.set_attribute("circuit.state", "open")
span.set_attribute("circuit.failure_count", 5)
span.set_attribute("circuit.failure_threshold", 5)
span.set_attribute("circuit.last_failure_at", timestamp)
span.set_attribute("circuit.opens_at", timestamp)
span.set_attribute("circuit.half_open_attempts", 0)
Error Aggregation
Track error patterns:
# Per-session error summary
span.set_attribute("session.total_errors", 3)
span.set_attribute("session.transient_errors", 2)
span.set_attribute("session.permanent_errors", 1)
span.set_attribute("session.retry_success_rate", 0.67)
# Per-provider health
span.set_attribute("provider.health", "degraded")
span.set_attribute("provider.error_rate_1h", 0.05)
span.set_attribute("provider.avg_latency_1h_ms", 2500)
Framework Integration
LangChain Retry
from langchain.chat_models import ChatAnthropic
from langfuse.callback import CallbackHandler
llm = ChatAnthropic(
model="claude-3-opus",
max_retries=3,
request_timeout=30,
)
# Callbacks capture retry behavior
handler = CallbackHandler()
response = llm.invoke(messages, config={"callbacks": [handler]})
Tenacity Integration
from tenacity import retry, stop_after_attempt, wait_exponential
from langfuse.decorators import observe
@observe(name="llm.call")
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=60),
)
def call_with_tenacity(messages):
return client.messages.create(messages=messages)
Anti-Patterns
- Catching all exceptions silently (hidden failures)
- No retry tracking (can't optimize retry config)
- Missing error classification (can't distinguish transient vs. permanent)
- No fallback logging (unclear degradation)
- Retrying permanent errors (wasted cost)
Related Skills
llm-call-tracing- LLM error contexttool-call-tracking- Tool error handling