name	apollo-observability
description	Set up Apollo.io monitoring and observability. Use when implementing logging, metrics, tracing, and alerting for Apollo integrations. Trigger with phrases like "apollo monitoring", "apollo metrics", "apollo observability", "apollo logging", "apollo alerts".
allowed-tools	Read, Write, Edit, Bash(kubectl:), Bash(curl:)
version	1.0.0
license	MIT
author	Jeremy Longshore <jeremy@intentsolutions.io>

Apollo Observability

Overview

Comprehensive observability setup for Apollo.io integrations including metrics, logging, tracing, and alerting.

Metrics with Prometheus

// src/lib/apollo/metrics.ts
import { Registry, Counter, Histogram, Gauge } from 'prom-client';

const register = new Registry();

// Request metrics
export const apolloRequestsTotal = new Counter({
  name: 'apollo_requests_total',
  help: 'Total number of Apollo API requests',
  labelNames: ['endpoint', 'method', 'status'],
  registers: [register],
});

export const apolloRequestDuration = new Histogram({
  name: 'apollo_request_duration_seconds',
  help: 'Duration of Apollo API requests in seconds',
  labelNames: ['endpoint', 'method'],
  buckets: [0.1, 0.25, 0.5, 1, 2.5, 5, 10],
  registers: [register],
});

// Rate limit metrics
export const apolloRateLimitRemaining = new Gauge({
  name: 'apollo_rate_limit_remaining',
  help: 'Remaining Apollo API rate limit',
  labelNames: ['endpoint'],
  registers: [register],
});

export const apolloRateLimitHits = new Counter({
  name: 'apollo_rate_limit_hits_total',
  help: 'Number of times rate limit was hit',
  registers: [register],
});

// Cache metrics
export const apolloCacheHits = new Counter({
  name: 'apollo_cache_hits_total',
  help: 'Number of Apollo cache hits',
  labelNames: ['endpoint'],
  registers: [register],
});

export const apolloCacheMisses = new Counter({
  name: 'apollo_cache_misses_total',
  help: 'Number of Apollo cache misses',
  labelNames: ['endpoint'],
  registers: [register],
});

// Credit usage
export const apolloCreditsUsed = new Counter({
  name: 'apollo_credits_used_total',
  help: 'Total Apollo credits consumed',
  labelNames: ['operation'],
  registers: [register],
});

// Error tracking
export const apolloErrors = new Counter({
  name: 'apollo_errors_total',
  help: 'Total Apollo API errors',
  labelNames: ['endpoint', 'error_type'],
  registers: [register],
});

export { register };

Instrumented Client

// src/lib/apollo/instrumented-client.ts
import { apolloRequestsTotal, apolloRequestDuration, apolloErrors } from './metrics';

export class InstrumentedApolloClient {
  async request<T>(endpoint: string, options: RequestOptions): Promise<T> {
    const labels = { endpoint, method: options.method || 'POST' };
    const endTimer = apolloRequestDuration.startTimer(labels);

    try {
      const response = await this.baseClient.request(endpoint, options);

      apolloRequestsTotal.inc({ ...labels, status: 'success' });

      // Track rate limit from headers
      const remaining = response.headers['x-ratelimit-remaining'];
      if (remaining) {
        apolloRateLimitRemaining.set({ endpoint }, parseInt(remaining));
      }

      return response.data;
    } catch (error: any) {
      const errorType = this.classifyError(error);
      apolloRequestsTotal.inc({ ...labels, status: 'error' });
      apolloErrors.inc({ endpoint, error_type: errorType });

      if (error.response?.status === 429) {
        apolloRateLimitHits.inc();
      }

      throw error;
    } finally {
      endTimer();
    }
  }

  private classifyError(error: any): string {
    const status = error.response?.status;
    if (status === 401) return 'auth_error';
    if (status === 403) return 'permission_error';
    if (status === 422) return 'validation_error';
    if (status === 429) return 'rate_limit';
    if (status >= 500) return 'server_error';
    if (error.code === 'ECONNREFUSED') return 'connection_error';
    if (error.code === 'ETIMEDOUT') return 'timeout';
    return 'unknown';
  }
}

Structured Logging

// src/lib/apollo/logger.ts
import pino from 'pino';

const logger = pino({
  level: process.env.LOG_LEVEL || 'info',
  formatters: {
    level: (label) => ({ level: label }),
  },
  redact: {
    paths: ['api_key', '*.email', '*.phone', 'headers.authorization'],
    censor: '[REDACTED]',
  },
  base: {
    service: 'apollo-integration',
    environment: process.env.NODE_ENV,
  },
});

export const apolloLogger = logger.child({ component: 'apollo' });

// Request/response logging
export function logApolloRequest(context: {
  endpoint: string;
  method: string;
  params?: object;
  requestId: string;
}): void {
  apolloLogger.info({
    type: 'apollo_request',
    ...context,
    timestamp: new Date().toISOString(),
  });
}

export function logApolloResponse(context: {
  endpoint: string;
  status: number;
  durationMs: number;
  requestId: string;
  resultCount?: number;
}): void {
  apolloLogger.info({
    type: 'apollo_response',
    ...context,
    timestamp: new Date().toISOString(),
  });
}

export function logApolloError(context: {
  endpoint: string;
  error: Error;
  requestId: string;
  retryCount?: number;
}): void {
  apolloLogger.error({
    type: 'apollo_error',
    endpoint: context.endpoint,
    error: {
      name: context.error.name,
      message: context.error.message,
      stack: context.error.stack,
    },
    requestId: context.requestId,
    retryCount: context.retryCount,
    timestamp: new Date().toISOString(),
  });
}

Distributed Tracing (OpenTelemetry)

// src/lib/apollo/tracing.ts
import { trace, Span, SpanStatusCode, context as otelContext } from '@opentelemetry/api';
import { W3CTraceContextPropagator } from '@opentelemetry/core';

const tracer = trace.getTracer('apollo-integration');
const propagator = new W3CTraceContextPropagator();

export function createApolloSpan(
  name: string,
  attributes: Record<string, any>
): Span {
  return tracer.startSpan(`apollo.${name}`, {
    attributes: {
      'apollo.endpoint': attributes.endpoint,
      'apollo.method': attributes.method,
      'service.name': 'apollo-integration',
    },
  });
}

export async function traceApolloRequest<T>(
  endpoint: string,
  requestFn: () => Promise<T>
): Promise<T> {
  const span = createApolloSpan('request', { endpoint });

  try {
    const result = await otelContext.with(
      trace.setSpan(otelContext.active(), span),
      requestFn
    );

    span.setStatus({ code: SpanStatusCode.OK });
    return result;
  } catch (error: any) {
    span.setStatus({
      code: SpanStatusCode.ERROR,
      message: error.message,
    });
    span.recordException(error);
    throw error;
  } finally {
    span.end();
  }
}

// Middleware for Express
export function apolloTracingMiddleware(req: any, res: any, next: any) {
  const span = createApolloSpan('http_request', {
    endpoint: req.path,
    method: req.method,
  });

  req.apolloSpan = span;

  res.on('finish', () => {
    span.setAttribute('http.status_code', res.statusCode);
    span.end();
  });

  next();
}

Alerting Rules

# prometheus/apollo-alerts.yml
groups:
  - name: apollo-alerts
    rules:
      # High error rate
      - alert: ApolloHighErrorRate
        expr: |
          sum(rate(apollo_errors_total[5m])) /
          sum(rate(apollo_requests_total[5m])) > 0.05
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High Apollo API error rate"
          description: "Apollo error rate is {{ $value | humanizePercentage }}"

      # Rate limit warnings
      - alert: ApolloRateLimitApproaching
        expr: apollo_rate_limit_remaining < 20
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: "Apollo rate limit approaching"
          description: "Only {{ $value }} requests remaining"

      - alert: ApolloRateLimitHit
        expr: increase(apollo_rate_limit_hits_total[5m]) > 0
        labels:
          severity: critical
        annotations:
          summary: "Apollo rate limit hit"
          description: "Rate limit was hit {{ $value }} times in last 5 minutes"

      # Latency alerts
      - alert: ApolloHighLatency
        expr: |
          histogram_quantile(0.95, rate(apollo_request_duration_seconds_bucket[5m])) > 5
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High Apollo API latency"
          description: "P95 latency is {{ $value | humanizeDuration }}"

      # Credit usage
      - alert: ApolloHighCreditUsage
        expr: |
          increase(apollo_credits_used_total[24h]) > 8000
        labels:
          severity: warning
        annotations:
          summary: "High Apollo credit consumption"
          description: "{{ $value }} credits used in last 24 hours"

Grafana Dashboard

{
  "dashboard": {
    "title": "Apollo.io Integration",
    "panels": [
      {
        "title": "Request Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "sum(rate(apollo_requests_total[5m])) by (endpoint)",
            "legendFormat": "{{ endpoint }}"
          }
        ]
      },
      {
        "title": "Error Rate",
        "type": "graph",
        "targets": [
          {
            "expr": "sum(rate(apollo_errors_total[5m])) by (error_type)",
            "legendFormat": "{{ error_type }}"
          }
        ]
      },
      {
        "title": "Request Duration (P95)",
        "type": "graph",
        "targets": [
          {
            "expr": "histogram_quantile(0.95, rate(apollo_request_duration_seconds_bucket[5m]))",
            "legendFormat": "P95"
          }
        ]
      },
      {
        "title": "Rate Limit Status",
        "type": "gauge",
        "targets": [
          {
            "expr": "apollo_rate_limit_remaining",
            "legendFormat": "Remaining"
          }
        ],
        "thresholds": [
          { "value": 0, "color": "red" },
          { "value": 20, "color": "yellow" },
          { "value": 50, "color": "green" }
        ]
      },
      {
        "title": "Cache Hit Rate",
        "type": "stat",
        "targets": [
          {
            "expr": "sum(rate(apollo_cache_hits_total[5m])) / (sum(rate(apollo_cache_hits_total[5m])) + sum(rate(apollo_cache_misses_total[5m])))",
            "legendFormat": "Hit Rate"
          }
        ]
      },
      {
        "title": "Credits Used Today",
        "type": "stat",
        "targets": [
          {
            "expr": "increase(apollo_credits_used_total[24h])"
          }
        ]
      }
    ]
  }
}

Health Check Endpoint

// src/routes/health/apollo.ts
import { Router } from 'express';
import { register } from '../../lib/apollo/metrics';

const router = Router();

router.get('/health/apollo', async (req, res) => {
  const checks = {
    api: false,
    rateLimit: false,
    cache: false,
  };

  try {
    // Check API connectivity
    await apollo.healthCheck();
    checks.api = true;

    // Check rate limit status
    const remaining = apolloRateLimitRemaining.get();
    checks.rateLimit = remaining > 10;

    // Check cache health
    const cacheStats = apolloCache.getStats();
    checks.cache = cacheStats.size > 0;

    const healthy = Object.values(checks).every(Boolean);

    res.status(healthy ? 200 : 503).json({
      status: healthy ? 'healthy' : 'degraded',
      checks,
      timestamp: new Date().toISOString(),
    });
  } catch (error: any) {
    res.status(503).json({
      status: 'unhealthy',
      error: error.message,
      checks,
    });
  }
});

router.get('/metrics', async (req, res) => {
  res.set('Content-Type', register.contentType);
  res.end(await register.metrics());
});

export default router;

Output

Prometheus metrics for all Apollo operations
Structured JSON logging with PII redaction
OpenTelemetry distributed tracing
Alerting rules for errors, rate limits, latency
Grafana dashboard configuration
Health check endpoints

Error Handling

Issue	Resolution
Missing metrics	Verify instrumentation
Alert noise	Tune thresholds
Log volume	Adjust log levels
Trace gaps	Check propagation

Resources

Next Steps

Proceed to apollo-incident-runbook for incident response.

apollo-observability

Install Skill

SKILL.md

Apollo Observability

Overview

Metrics with Prometheus

Instrumented Client

Structured Logging

Distributed Tracing (OpenTelemetry)

Alerting Rules

Grafana Dashboard

Health Check Endpoint

Output

Error Handling

Resources

Next Steps