| name | apollo-observability |
| description | Set up Apollo.io monitoring and observability. Use when implementing logging, metrics, tracing, and alerting for Apollo integrations. Trigger with phrases like "apollo monitoring", "apollo metrics", "apollo observability", "apollo logging", "apollo alerts". |
| allowed-tools | Read, Write, Edit, Bash(kubectl:*), Bash(curl:*) |
| version | 1.0.0 |
| license | MIT |
| author | Jeremy Longshore <jeremy@intentsolutions.io> |
Apollo Observability
Overview
Comprehensive observability setup for Apollo.io integrations including metrics, logging, tracing, and alerting.
Metrics with Prometheus
// src/lib/apollo/metrics.ts
import { Registry, Counter, Histogram, Gauge } from 'prom-client';
const register = new Registry();
// Request metrics
export const apolloRequestsTotal = new Counter({
name: 'apollo_requests_total',
help: 'Total number of Apollo API requests',
labelNames: ['endpoint', 'method', 'status'],
registers: [register],
});
export const apolloRequestDuration = new Histogram({
name: 'apollo_request_duration_seconds',
help: 'Duration of Apollo API requests in seconds',
labelNames: ['endpoint', 'method'],
buckets: [0.1, 0.25, 0.5, 1, 2.5, 5, 10],
registers: [register],
});
// Rate limit metrics
export const apolloRateLimitRemaining = new Gauge({
name: 'apollo_rate_limit_remaining',
help: 'Remaining Apollo API rate limit',
labelNames: ['endpoint'],
registers: [register],
});
export const apolloRateLimitHits = new Counter({
name: 'apollo_rate_limit_hits_total',
help: 'Number of times rate limit was hit',
registers: [register],
});
// Cache metrics
export const apolloCacheHits = new Counter({
name: 'apollo_cache_hits_total',
help: 'Number of Apollo cache hits',
labelNames: ['endpoint'],
registers: [register],
});
export const apolloCacheMisses = new Counter({
name: 'apollo_cache_misses_total',
help: 'Number of Apollo cache misses',
labelNames: ['endpoint'],
registers: [register],
});
// Credit usage
export const apolloCreditsUsed = new Counter({
name: 'apollo_credits_used_total',
help: 'Total Apollo credits consumed',
labelNames: ['operation'],
registers: [register],
});
// Error tracking
export const apolloErrors = new Counter({
name: 'apollo_errors_total',
help: 'Total Apollo API errors',
labelNames: ['endpoint', 'error_type'],
registers: [register],
});
export { register };
Instrumented Client
// src/lib/apollo/instrumented-client.ts
import { apolloRequestsTotal, apolloRequestDuration, apolloErrors } from './metrics';
export class InstrumentedApolloClient {
async request<T>(endpoint: string, options: RequestOptions): Promise<T> {
const labels = { endpoint, method: options.method || 'POST' };
const endTimer = apolloRequestDuration.startTimer(labels);
try {
const response = await this.baseClient.request(endpoint, options);
apolloRequestsTotal.inc({ ...labels, status: 'success' });
// Track rate limit from headers
const remaining = response.headers['x-ratelimit-remaining'];
if (remaining) {
apolloRateLimitRemaining.set({ endpoint }, parseInt(remaining));
}
return response.data;
} catch (error: any) {
const errorType = this.classifyError(error);
apolloRequestsTotal.inc({ ...labels, status: 'error' });
apolloErrors.inc({ endpoint, error_type: errorType });
if (error.response?.status === 429) {
apolloRateLimitHits.inc();
}
throw error;
} finally {
endTimer();
}
}
private classifyError(error: any): string {
const status = error.response?.status;
if (status === 401) return 'auth_error';
if (status === 403) return 'permission_error';
if (status === 422) return 'validation_error';
if (status === 429) return 'rate_limit';
if (status >= 500) return 'server_error';
if (error.code === 'ECONNREFUSED') return 'connection_error';
if (error.code === 'ETIMEDOUT') return 'timeout';
return 'unknown';
}
}
Structured Logging
// src/lib/apollo/logger.ts
import pino from 'pino';
const logger = pino({
level: process.env.LOG_LEVEL || 'info',
formatters: {
level: (label) => ({ level: label }),
},
redact: {
paths: ['api_key', '*.email', '*.phone', 'headers.authorization'],
censor: '[REDACTED]',
},
base: {
service: 'apollo-integration',
environment: process.env.NODE_ENV,
},
});
export const apolloLogger = logger.child({ component: 'apollo' });
// Request/response logging
export function logApolloRequest(context: {
endpoint: string;
method: string;
params?: object;
requestId: string;
}): void {
apolloLogger.info({
type: 'apollo_request',
...context,
timestamp: new Date().toISOString(),
});
}
export function logApolloResponse(context: {
endpoint: string;
status: number;
durationMs: number;
requestId: string;
resultCount?: number;
}): void {
apolloLogger.info({
type: 'apollo_response',
...context,
timestamp: new Date().toISOString(),
});
}
export function logApolloError(context: {
endpoint: string;
error: Error;
requestId: string;
retryCount?: number;
}): void {
apolloLogger.error({
type: 'apollo_error',
endpoint: context.endpoint,
error: {
name: context.error.name,
message: context.error.message,
stack: context.error.stack,
},
requestId: context.requestId,
retryCount: context.retryCount,
timestamp: new Date().toISOString(),
});
}
Distributed Tracing (OpenTelemetry)
// src/lib/apollo/tracing.ts
import { trace, Span, SpanStatusCode, context as otelContext } from '@opentelemetry/api';
import { W3CTraceContextPropagator } from '@opentelemetry/core';
const tracer = trace.getTracer('apollo-integration');
const propagator = new W3CTraceContextPropagator();
export function createApolloSpan(
name: string,
attributes: Record<string, any>
): Span {
return tracer.startSpan(`apollo.${name}`, {
attributes: {
'apollo.endpoint': attributes.endpoint,
'apollo.method': attributes.method,
'service.name': 'apollo-integration',
},
});
}
export async function traceApolloRequest<T>(
endpoint: string,
requestFn: () => Promise<T>
): Promise<T> {
const span = createApolloSpan('request', { endpoint });
try {
const result = await otelContext.with(
trace.setSpan(otelContext.active(), span),
requestFn
);
span.setStatus({ code: SpanStatusCode.OK });
return result;
} catch (error: any) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: error.message,
});
span.recordException(error);
throw error;
} finally {
span.end();
}
}
// Middleware for Express
export function apolloTracingMiddleware(req: any, res: any, next: any) {
const span = createApolloSpan('http_request', {
endpoint: req.path,
method: req.method,
});
req.apolloSpan = span;
res.on('finish', () => {
span.setAttribute('http.status_code', res.statusCode);
span.end();
});
next();
}
Alerting Rules
# prometheus/apollo-alerts.yml
groups:
- name: apollo-alerts
rules:
# High error rate
- alert: ApolloHighErrorRate
expr: |
sum(rate(apollo_errors_total[5m])) /
sum(rate(apollo_requests_total[5m])) > 0.05
for: 5m
labels:
severity: warning
annotations:
summary: "High Apollo API error rate"
description: "Apollo error rate is {{ $value | humanizePercentage }}"
# Rate limit warnings
- alert: ApolloRateLimitApproaching
expr: apollo_rate_limit_remaining < 20
for: 2m
labels:
severity: warning
annotations:
summary: "Apollo rate limit approaching"
description: "Only {{ $value }} requests remaining"
- alert: ApolloRateLimitHit
expr: increase(apollo_rate_limit_hits_total[5m]) > 0
labels:
severity: critical
annotations:
summary: "Apollo rate limit hit"
description: "Rate limit was hit {{ $value }} times in last 5 minutes"
# Latency alerts
- alert: ApolloHighLatency
expr: |
histogram_quantile(0.95, rate(apollo_request_duration_seconds_bucket[5m])) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "High Apollo API latency"
description: "P95 latency is {{ $value | humanizeDuration }}"
# Credit usage
- alert: ApolloHighCreditUsage
expr: |
increase(apollo_credits_used_total[24h]) > 8000
labels:
severity: warning
annotations:
summary: "High Apollo credit consumption"
description: "{{ $value }} credits used in last 24 hours"
Grafana Dashboard
{
"dashboard": {
"title": "Apollo.io Integration",
"panels": [
{
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "sum(rate(apollo_requests_total[5m])) by (endpoint)",
"legendFormat": "{{ endpoint }}"
}
]
},
{
"title": "Error Rate",
"type": "graph",
"targets": [
{
"expr": "sum(rate(apollo_errors_total[5m])) by (error_type)",
"legendFormat": "{{ error_type }}"
}
]
},
{
"title": "Request Duration (P95)",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(apollo_request_duration_seconds_bucket[5m]))",
"legendFormat": "P95"
}
]
},
{
"title": "Rate Limit Status",
"type": "gauge",
"targets": [
{
"expr": "apollo_rate_limit_remaining",
"legendFormat": "Remaining"
}
],
"thresholds": [
{ "value": 0, "color": "red" },
{ "value": 20, "color": "yellow" },
{ "value": 50, "color": "green" }
]
},
{
"title": "Cache Hit Rate",
"type": "stat",
"targets": [
{
"expr": "sum(rate(apollo_cache_hits_total[5m])) / (sum(rate(apollo_cache_hits_total[5m])) + sum(rate(apollo_cache_misses_total[5m])))",
"legendFormat": "Hit Rate"
}
]
},
{
"title": "Credits Used Today",
"type": "stat",
"targets": [
{
"expr": "increase(apollo_credits_used_total[24h])"
}
]
}
]
}
}
Health Check Endpoint
// src/routes/health/apollo.ts
import { Router } from 'express';
import { register } from '../../lib/apollo/metrics';
const router = Router();
router.get('/health/apollo', async (req, res) => {
const checks = {
api: false,
rateLimit: false,
cache: false,
};
try {
// Check API connectivity
await apollo.healthCheck();
checks.api = true;
// Check rate limit status
const remaining = apolloRateLimitRemaining.get();
checks.rateLimit = remaining > 10;
// Check cache health
const cacheStats = apolloCache.getStats();
checks.cache = cacheStats.size > 0;
const healthy = Object.values(checks).every(Boolean);
res.status(healthy ? 200 : 503).json({
status: healthy ? 'healthy' : 'degraded',
checks,
timestamp: new Date().toISOString(),
});
} catch (error: any) {
res.status(503).json({
status: 'unhealthy',
error: error.message,
checks,
});
}
});
router.get('/metrics', async (req, res) => {
res.set('Content-Type', register.contentType);
res.end(await register.metrics());
});
export default router;
Output
- Prometheus metrics for all Apollo operations
- Structured JSON logging with PII redaction
- OpenTelemetry distributed tracing
- Alerting rules for errors, rate limits, latency
- Grafana dashboard configuration
- Health check endpoints
Error Handling
| Issue | Resolution |
|---|---|
| Missing metrics | Verify instrumentation |
| Alert noise | Tune thresholds |
| Log volume | Adjust log levels |
| Trace gaps | Check propagation |
Resources
Next Steps
Proceed to apollo-incident-runbook for incident response.