| name | blue-green-deployment |
| description | Implement blue-green deployment strategies for zero-downtime releases with instant rollback capability and traffic switching between environments. |
Blue-Green Deployment
Overview
Deploy applications using blue-green deployment patterns to maintain two identical production environments, enabling instant traffic switching and rapid rollback capabilities.
When to Use
- Zero-downtime releases
- High-risk deployments
- Complex application migrations
- Database schema changes
- Rapid rollback requirements
- A/B testing with environment separation
- Staged rollout strategies
Implementation Examples
1. Blue-Green with Load Balancer
# blue-green-setup.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: blue-green-config
namespace: production
data:
switch-traffic.sh: |
#!/bin/bash
set -euo pipefail
CURRENT_ACTIVE="${1:-blue}"
TARGET="${2:-green}"
ALB_ARN="arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/myapp-alb/1234567890abcdef"
echo "Switching traffic from $CURRENT_ACTIVE to $TARGET..."
# Get target group ARNs
BLUE_TG=$(aws elbv2 describe-target-groups \
--load-balancer-arn "$ALB_ARN" \
--query "TargetGroups[?Tags[?Key=='Name' && Value=='blue']].TargetGroupArn" \
--output text)
GREEN_TG=$(aws elbv2 describe-target-groups \
--load-balancer-arn "$ALB_ARN" \
--query "TargetGroups[?Tags[?Key=='Name' && Value=='green']].TargetGroupArn" \
--output text)
# Get listener ARN
LISTENER_ARN=$(aws elbv2 describe-listeners \
--load-balancer-arn "$ALB_ARN" \
--query "Listeners[0].ListenerArn" \
--output text)
# Switch target group
if [ "$TARGET" = "green" ]; then
TARGET_ARN=$GREEN_TG
else
TARGET_ARN=$BLUE_TG
fi
aws elbv2 modify-listener \
--listener-arn "$LISTENER_ARN" \
--default-actions Type=forward,TargetGroupArn="$TARGET_ARN"
echo "Traffic switched to $TARGET"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: deploy-script
namespace: production
data:
deploy-blue-green.sh: |
#!/bin/bash
set -euo pipefail
ENVIRONMENT="${1:-production}"
VERSION="${2:-latest}"
HEALTH_CHECK_ENDPOINT="/health"
HEALTH_CHECK_TIMEOUT=300
# Determine which environment to deploy to
CURRENT_ACTIVE=$(kubectl get configmap active-environment -n "$ENVIRONMENT" \
-o jsonpath='{.data.active}' 2>/dev/null || echo "blue")
if [ "$CURRENT_ACTIVE" = "blue" ]; then
TARGET="green"
else
TARGET="blue"
fi
echo "Current active: $CURRENT_ACTIVE, deploying to: $TARGET"
# Update deployment with new version
kubectl set image deployment/myapp-$TARGET \
myapp=myrepo/myapp:$VERSION \
-n "$ENVIRONMENT"
# Wait for rollout
echo "Waiting for deployment to rollout..."
kubectl rollout status deployment/myapp-$TARGET \
-n "$ENVIRONMENT" --timeout=10m
# Run health checks
echo "Running health checks on $TARGET..."
TARGET_PODS=$(kubectl get pods -l app=myapp,environment=$TARGET \
-n "$ENVIRONMENT" -o jsonpath='{.items[0].metadata.name}')
for pod in $TARGET_PODS; do
echo "Health checking pod: $pod"
kubectl port-forward pod/$pod 8080:8080 -n "$ENVIRONMENT" &
PF_PID=$!
if ! timeout 30 bash -c "until curl -f http://localhost:8080$HEALTH_CHECK_ENDPOINT; do sleep 1; done"; then
kill $PF_PID
echo "Health check failed for $pod"
exit 1
fi
kill $PF_PID
done
# Run smoke tests
echo "Running smoke tests..."
kubectl exec -it deployment/myapp-$TARGET -n "$ENVIRONMENT" -- \
npm run test:smoke || true
# Update active environment ConfigMap
kubectl patch configmap active-environment -n "$ENVIRONMENT" \
-p '{"data":{"active":"'$TARGET'"}}'
# Switch traffic
echo "Switching traffic to $TARGET..."
bash /scripts/switch-traffic.sh "$CURRENT_ACTIVE" "$TARGET"
echo "Deployment complete! $TARGET is now active"
echo "Previous version still running on $CURRENT_ACTIVE for rollback"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: myapp-blue
namespace: production
spec:
replicas: 3
selector:
matchLabels:
app: myapp
environment: blue
template:
metadata:
labels:
app: myapp
environment: blue
spec:
containers:
- name: myapp
image: myrepo/myapp:v1.0.0
ports:
- containerPort: 8080
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: myapp-green
namespace: production
spec:
replicas: 3
selector:
matchLabels:
app: myapp
environment: green
template:
metadata:
labels:
app: myapp
environment: green
spec:
containers:
- name: myapp
image: myrepo/myapp:v1.0.0
ports:
- containerPort: 8080
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
name: myapp
namespace: production
spec:
type: LoadBalancer
selector:
app: myapp
ports:
- port: 80
targetPort: 8080
---
apiVersion: v1
kind: ConfigMap
metadata:
name: active-environment
namespace: production
data:
active: "blue"
2. Blue-Green Rollback Script
#!/bin/bash
# rollback-blue-green.sh - Rollback to previous environment
set -euo pipefail
NAMESPACE="${1:-production}"
HEALTH_CHECK_TIMEOUT=60
echo "Starting rollback procedure..."
# Get current active environment
CURRENT_ACTIVE=$(kubectl get configmap active-environment -n "$NAMESPACE" \
-o jsonpath='{.data.active}')
# Target is the previous environment
if [ "$CURRENT_ACTIVE" = "blue" ]; then
TARGET="green"
else
TARGET="blue"
fi
echo "Rolling back from $CURRENT_ACTIVE to $TARGET..."
# Verify target environment is healthy
echo "Verifying $TARGET environment health..."
HEALTHY_PODS=$(kubectl get pods -l app=myapp,environment=$TARGET \
-n "$NAMESPACE" --field-selector=status.phase=Running -o json | \
jq '.items | length')
if [ "$HEALTHY_PODS" -lt 1 ]; then
echo "ERROR: No healthy pods in $TARGET environment"
exit 1
fi
# Switch traffic back
echo "Switching traffic back to $TARGET..."
kubectl patch configmap active-environment -n "$NAMESPACE" \
-p '{"data":{"active":"'$TARGET'"}}'
# Update load balancer
aws elbv2 modify-listener \
--listener-arn "arn:aws:elasticloadbalancing:us-east-1:123456789012:listener/app/myapp-alb/1234567890abcdef/50dc6c495c0c9188" \
--default-actions Type=forward,TargetGroupArn="arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/myapp-$TARGET/1234567890abcdef"
echo "Rollback complete! Traffic switched to $TARGET"
echo "Previous active environment ($CURRENT_ACTIVE) is still running for analysis"
3. Monitoring and Validation
# blue-green-monitoring.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: validation-script
namespace: production
data:
validate-deployment.sh: |
#!/bin/bash
set -euo pipefail
ENVIRONMENT="${1:-production}"
DEPLOYMENT="${2:-myapp-green}"
TIMEOUT=300
echo "Validating deployment: $DEPLOYMENT"
# Wait for deployment
kubectl rollout status deployment/$DEPLOYMENT -n "$ENVIRONMENT" --timeout=${TIMEOUT}s
# Check pod readiness
READY_REPLICAS=$(kubectl get deployment $DEPLOYMENT -n "$ENVIRONMENT" \
-o jsonpath='{.status.readyReplicas}')
DESIRED_REPLICAS=$(kubectl get deployment $DEPLOYMENT -n "$ENVIRONMENT" \
-o jsonpath='{.spec.replicas}')
if [ "$READY_REPLICAS" != "$DESIRED_REPLICAS" ]; then
echo "ERROR: Not all replicas are ready ($READY_REPLICAS/$DESIRED_REPLICAS)"
exit 1
fi
# Run smoke tests
echo "Running smoke tests..."
SMOKE_TEST_POD=$(kubectl get pods -l app=myapp,environment=${DEPLOYMENT#myapp-} \
-n "$ENVIRONMENT" -o jsonpath='{.items[0].metadata.name}')
kubectl exec -it $SMOKE_TEST_POD -n "$ENVIRONMENT" -- bash -c '
echo "Testing health endpoint..."
curl -f http://localhost:8080/health || exit 1
echo "Testing API endpoints..."
curl -f http://localhost:8080/api/version || exit 1
echo "All smoke tests passed"
'
echo "Validation complete: $DEPLOYMENT is healthy"
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: blue-green-alerts
namespace: production
spec:
groups:
- name: blue-green-deployment
rules:
- alert: HighErrorRateAfterDeployment
expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate detected after deployment"
description: "Error rate is {{ $value | humanizePercentage }}"
- alert: DeploymentHealthCheckFailed
expr: up{job="myapp"} == 0
for: 2m
labels:
severity: critical
annotations:
summary: "Deployment health check failed"
description: "Pod is unreachable for 2 minutes"
- alert: PodRestartingAfterDeployment
expr: rate(kube_pod_container_status_restarts_total[15m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
summary: "Pod is restarting frequently after deployment"
Blue-Green Best Practices
✅ DO
- Run comprehensive health checks
- Monitor both environments during switching
- Keep previous version running for quick rollback
- Test traffic switching in non-prod first
- Document deployment procedure
- Have rollback plan ready
- Monitor error rates post-switch
- Automate environment sync
❌ DON'T
- Switch traffic without health checks
- Tear down old environment immediately
- Mix blue and green traffic
- Skip smoke tests
- Deploy without capacity planning
- Rush traffic switching
- Ignore monitoring post-deployment
- Run different versions in production
Rollback Scenarios
- Health Check Failure: Automatic rollback on failed checks
- High Error Rate: Monitor and trigger rollback if error rate exceeds threshold
- Performance Degradation: Rollback if latency spikes detected
- Dependency Failures: Rollback if external service integration fails