Multi Cloud Strategist Examples

Externalized from the agent definition per the few-shot-examples rule (#1587).

Multi-Cloud Strategist — Worked Examples

Externalized from the agent definition per the few-shot-examples rule (#1587).

Your Process — Illustrative Sample Blocks

1. Cloud Provider Comparison Framework

Use a structured scorecard before committing to a multi-cloud topology:

#!/bin/bash
# cost-compare.sh — compare compute costs for a specific workload profile
# Requires: AWS CLI, Azure CLI, gcloud SDK configured

VCPU=8
RAM_GB=32
HOURS_PER_MONTH=730
REGION_AWS="us-east-1"
REGION_AZURE="eastus"
REGION_GCP="us-central1"

echo "=== Compute Cost Comparison: ${VCPU} vCPU / ${RAM_GB}GB RAM ==="
echo ""

# AWS: m6i.2xlarge (8 vCPU, 32GB, x86) vs m7g.2xlarge (8 vCPU, 32GB, ARM/Graviton3)
echo "AWS:"
aws pricing get-products \
  --service-code AmazonEC2 \
  --filters \
    "Type=TERM_MATCH,Field=instanceType,Value=m6i.2xlarge" \
    "Type=TERM_MATCH,Field=location,Value=US East (N. Virginia)" \
    "Type=TERM_MATCH,Field=operatingSystem,Value=Linux" \
    "Type=TERM_MATCH,Field=tenancy,Value=Shared" \
    "Type=TERM_MATCH,Field=preInstalledSw,Value=NA" \
  --query 'PriceList[0]' \
  --output json 2>/dev/null | python3 -c "
import json, sys
data = json.load(sys.stdin)
price = list(list(data['terms']['OnDemand'].values())[0]['priceDimensions'].values())[0]['pricePerUnit']['USD']
print(f'  m6i.2xlarge: \${float(price):.4f}/hr = \${float(price)*730:.2f}/mo')
" || echo "  (pricing API unavailable - check aws configure)"

echo ""
echo "Azure: Standard_D8s_v5 (8 vCPU, 32GB)"
az vm list-skus --location eastus --size Standard_D8s_v5 --query '[0].name' -o tsv 2>/dev/null \
  && echo "  See: https://azure.microsoft.com/pricing/details/virtual-machines/linux/" \
  || echo "  (az login required)"

echo ""
echo "GCP: n2-standard-8 (8 vCPU, 32GB)"
gcloud compute machine-types describe n2-standard-8 \
  --zone us-central1-a \
  --format="value(description)" 2>/dev/null \
  && echo "  See: https://cloud.google.com/compute/vm-instance-pricing" \
  || echo "  (gcloud auth required)"

2. Terraform Multi-Provider Configuration

Terraform's multi-provider support is the most common multi-cloud IaC pattern. Organize by provider to maintain clarity:

# providers.tf — multi-cloud provider configuration
terraform {
  required_version = ">= 1.7"
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~> 3.90"
    }
    google = {
      source  = "hashicorp/google"
      version = "~> 5.0"
    }
  }
  # Single backend — each workspace targets a different cloud environment
  backend "s3" {
    bucket         = "my-terraform-state"
    key            = "multi-cloud/production/terraform.tfstate"
    region         = "us-east-1"
    encrypt        = true
    dynamodb_table = "terraform-locks"
  }
}

provider "aws" {
  region = var.aws_region
  default_tags {
    tags = local.common_tags
  }
}

provider "azurerm" {
  features {
    key_vault { purge_soft_delete_on_destroy = false }
  }
  subscription_id = var.azure_subscription_id
}

provider "google" {
  project = var.gcp_project_id
  region  = var.gcp_region
}
# modules/object-storage/main.tf — cloud-agnostic storage abstraction
# Deploy with: terraform apply -var="cloud_provider=aws"

variable "cloud_provider" {
  description = "Target cloud provider: aws, azure, or gcp"
  type        = string
  validation {
    condition     = contains(["aws", "azure", "gcp"], var.cloud_provider)
    error_message = "cloud_provider must be aws, azure, or gcp"
  }
}

variable "bucket_name" { type = string }
variable "env"          { type = string }

# AWS S3
resource "aws_s3_bucket" "this" {
  count  = var.cloud_provider == "aws" ? 1 : 0
  bucket = "${var.bucket_name}-${var.env}"
}

resource "aws_s3_bucket_versioning" "this" {
  count  = var.cloud_provider == "aws" ? 1 : 0
  bucket = aws_s3_bucket.this[0].id
  versioning_configuration { status = "Enabled" }
}

# Azure Blob Storage
resource "azurerm_storage_container" "this" {
  count                 = var.cloud_provider == "azure" ? 1 : 0
  name                  = var.bucket_name
  storage_account_name  = azurerm_storage_account.this[0].name
  container_access_type = "private"
}

# GCP Cloud Storage
resource "google_storage_bucket" "this" {
  count         = var.cloud_provider == "gcp" ? 1 : 0
  name          = "${var.bucket_name}-${var.env}"
  location      = "US"
  force_destroy = var.env != "prod"

  versioning { enabled = true }

  uniform_bucket_level_access = true
}

# Unified output regardless of provider
output "bucket_endpoint" {
  value = (
    var.cloud_provider == "aws"   ? "s3://${var.bucket_name}-${var.env}" :
    var.cloud_provider == "azure" ? "https://${azurerm_storage_account.this[0].name}.blob.core.windows.net/${var.bucket_name}" :
    "gs://${var.bucket_name}-${var.env}"
  )
}

3. Pulumi Cross-Cloud Stacks

Pulumi uses real programming languages, making conditional multi-cloud logic more expressive than HCL:

// index.ts — Pulumi program deploying to AWS and GCP simultaneously
import * as aws from "@pulumi/aws";
import * as gcp from "@pulumi/gcp";
import * as pulumi from "@pulumi/pulumi";

const config = new pulumi.Config();
const env = config.require("env");

// AWS: primary region — us-east-1
const awsBucket = new aws.s3.BucketV2("primary-data", {
  bucket: `my-data-${env}-primary`,
  tags: { environment: env, cloud: "aws", role: "primary" },
});

new aws.s3.BucketVersioningV2("primary-versioning", {
  bucket: awsBucket.id,
  versioningConfiguration: { status: "Enabled" },
});

// GCP: disaster recovery region — us-central1
const gcpBucket = new gcp.storage.Bucket("dr-data", {
  name: `my-data-${env}-dr`,
  location: "US",
  labels: { environment: env, cloud: "gcp", role: "dr" },
  versioning: { enabled: true },
  uniformBucketLevelAccess: true,
});

// Cross-cloud DNS routing via Route 53 with health checks
const healthCheck = new aws.route53.HealthCheck("primary-health", {
  fqdn: "api.my-service.com",
  port: 443,
  type: "HTTPS",
  resourcePath: "/healthz",
  failureThreshold: 3,
  requestInterval: 30,
});

export const primaryBucket = awsBucket.bucket;
export const drBucket = gcpBucket.name;
export const healthCheckId = healthCheck.id;

4. Crossplane for Multi-Cloud Self-Service Infrastructure

Crossplane runs in Kubernetes and provisions infrastructure across clouds using Kubernetes-native resources:

# composition.yaml — CompositeResourceDefinition for a multi-cloud database
apiVersion: apiextensions.crossplane.io/v1
kind: Composition
metadata:
  name: multicloud-database
  labels:
    crossplane.io/xrd: xdatabases.platform.example.com
spec:
  compositeTypeRef:
    apiVersion: platform.example.com/v1alpha1
    kind: XDatabase

  # Patch-and-transform: deploy to AWS RDS or GCP Cloud SQL based on provider label
  resources:
  - name: aws-rds
    base:
      apiVersion: rds.aws.crossplane.io/v1beta1
      kind: DBInstance
      spec:
        forProvider:
          region: us-east-1
          dbInstanceClass: db.r6g.large
          engine: postgres
          engineVersion: "16"
          multiAZ: true
          storageEncrypted: true
          skipFinalSnapshot: false
    patches:
    - type: FromCompositeFieldPath
      fromFieldPath: spec.parameters.storageGB
      toFieldPath: spec.forProvider.allocatedStorage
    - type: FromCompositeFieldPath
      fromFieldPath: spec.parameters.dbName
      toFieldPath: spec.forProvider.dbName
    readinessChecks:
    - type: MatchString
      fieldPath: status.atProvider.dbInstanceStatus
      matchString: available

  - name: gcp-cloud-sql
    base:
      apiVersion: sql.gcp.crossplane.io/v1beta1
      kind: CloudSQLInstance
      spec:
        forProvider:
          region: us-central1
          databaseVersion: POSTGRES_16
          settings:
            tier: db-custom-4-16384
            availabilityType: REGIONAL
            backupConfiguration:
              enabled: true
              pointInTimeRecoveryEnabled: true
    patches:
    - type: FromCompositeFieldPath
      fromFieldPath: spec.parameters.storageGB
      toFieldPath: spec.forProvider.settings.dataDiskSizeGb
# claim.yaml — Developer requests a database without knowing the cloud provider
apiVersion: platform.example.com/v1alpha1
kind: Database
metadata:
  name: payments-db
  namespace: payments
spec:
  parameters:
    storageGB: 100
    dbName: payments
    engine: postgres
  compositionSelector:
    matchLabels:
      provider: aws   # Switch to gcp to deploy on GCP instead
  writeConnectionSecretToRef:
    name: payments-db-credentials

5. Service Mesh for Cross-Cloud Connectivity

Istio with multi-cluster federation routes traffic across clouds transparently:

# Install Istio primary cluster (AWS EKS)
istioctl install \
  --set profile=default \
  --set values.pilot.env.EXTERNAL_ISTIOD=false \
  --set values.global.meshID=mesh1 \
  --set values.global.multiCluster.clusterName=aws-us-east-1 \
  --set values.global.network=aws-network

# Install Istio remote cluster (GCP GKE) — registers to primary
istioctl install \
  --set profile=remote \
  --set values.global.meshID=mesh1 \
  --set values.global.multiCluster.clusterName=gcp-us-central1 \
  --set values.global.network=gcp-network \
  --set values.istiodRemote.injectionURL="https://${ISTIOD_EXTERNAL_IP}:15017/inject"

# Create east-west gateways for cross-cluster service discovery
kubectl apply -f - <<'EOF'
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
  name: cross-network-gateway
  namespace: istio-system
spec:
  selector:
    istio: eastwestgateway
  servers:
  - port:
      number: 15443
      name: tls
      protocol: TLS
    tls:
      mode: AUTO_PASSTHROUGH
    hosts:
    - "*.local"
EOF

# Expose services for cross-cluster discovery
kubectl apply -f - <<'EOF'
apiVersion: networking.istio.io/v1alpha3
kind: ServiceEntry
metadata:
  name: payments-service-gcp
  namespace: payments
spec:
  hosts:
  - payments.payments.svc.cluster.local
  location: MESH_INTERNAL
  ports:
  - number: 8080
    name: http
    protocol: HTTP
  resolution: STATIC
  addresses:
  - 10.96.0.100   # GCP cluster service VIP
  endpoints:
  - address: "${GCP_EASTWEST_GATEWAY_IP}"
    network: gcp-network
    ports:
      http: 15443
EOF
# traffic-policy.yaml — Weighted traffic split for cross-cloud canary deployment
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
  name: payments-split
  namespace: payments
spec:
  hosts:
  - payments
  http:
  - route:
    - destination:
        host: payments
        subset: aws-primary
      weight: 90
    - destination:
        host: payments
        subset: gcp-canary
      weight: 10   # 10% to GCP during migration validation
---
apiVersion: networking.istio.io/v1beta1
kind: DestinationRule
metadata:
  name: payments-dr
  namespace: payments
spec:
  host: payments
  subsets:
  - name: aws-primary
    labels:
      cloud: aws
  - name: gcp-canary
    labels:
      cloud: gcp
  trafficPolicy:
    outlierDetection:
      consecutive5xxErrors: 5
      interval: 30s
      baseEjectionTime: 30s

6. Vendor Lock-In Risk Assessment

#!/bin/bash
# lockup-scan.sh — identify cloud-specific dependencies in infrastructure code
# Run from your IaC repository root

echo "=== Vendor Lock-In Risk Scan ==="
echo ""

echo "AWS-specific resources:"
grep -r "aws_" --include="*.tf" -l | wc -l
grep -r "aws_" --include="*.tf" -h | grep "^resource" | \
  sed 's/resource "\(aws_[^"]*\)".*/\1/' | sort | uniq -c | sort -rn | head -10

echo ""
echo "Azure-specific resources:"
grep -r "azurerm_" --include="*.tf" -l | wc -l

echo ""
echo "GCP-specific resources:"
grep -r "google_" --include="*.tf" -l | wc -l

echo ""
echo "Cloud-agnostic patterns (Kubernetes, Helm):"
grep -r "kubernetes_" --include="*.tf" -l | wc -l

echo ""
echo "Proprietary messaging services (high lock-in risk):"
grep -rE "(aws_sqs|aws_sns|azurerm_servicebus|google_pubsub)" --include="*.tf" -l

echo ""
echo "Proprietary databases (high migration cost):"
grep -rE "(aws_dynamodb|azurerm_cosmosdb|google_spanner|aws_aurora)" --include="*.tf" -l

7. Cloud Migration Strategy

#!/bin/bash
# phased-migration.sh — traffic shifting script for cloud-to-cloud migration
# Uses AWS Route 53 weighted routing to shift traffic incrementally

HOSTED_ZONE_ID="Z1234567890ABC"
RECORD_NAME="api.my-service.com"
AWS_WEIGHT=100     # Start: all traffic on source cloud
TARGET_WEIGHT=0    # Start: no traffic on target cloud

shift_traffic() {
  local aws_weight=$1
  local target_weight=$2

  echo "Shifting traffic: AWS=${aws_weight}%, Target=${target_weight}%"

  aws route53 change-resource-record-sets \
    --hosted-zone-id "$HOSTED_ZONE_ID" \
    --change-batch "$(cat <<JSON
{
  "Changes": [
    {
      "Action": "UPSERT",
      "ResourceRecordSet": {
        "Name": "${RECORD_NAME}",
        "Type": "A",
        "SetIdentifier": "aws-primary",
        "Weight": ${aws_weight},
        "AliasTarget": {
          "HostedZoneId": "Z35SXDOTRQ7X7K",
          "DNSName": "my-alb.us-east-1.elb.amazonaws.com",
          "EvaluateTargetHealth": true
        }
      }
    },
    {
      "Action": "UPSERT",
      "ResourceRecordSet": {
        "Name": "${RECORD_NAME}",
        "Type": "A",
        "SetIdentifier": "gcp-target",
        "Weight": ${target_weight},
        "AliasTarget": {
          "HostedZoneId": "Z1234GCPALB",
          "DNSName": "my-gcp-lb.endpoints.my-project.cloud.goog",
          "EvaluateTargetHealth": true
        }
      }
    }
  ]
}
JSON
)"

  echo "Waiting 5 minutes to observe error rates..."
  sleep 300

  # Check error rate on new cloud — abort if >1%
  ERROR_RATE=$(aws cloudwatch get-metric-statistics \
    --namespace AWS/ApplicationELB \
    --metric-name HTTPCode_Target_5XX_Count \
    --start-time "$(date -u -d '5 minutes ago' +%Y-%m-%dT%H:%M:%SZ)" \
    --end-time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
    --period 300 \
    --statistics Sum \
    --query 'Datapoints[0].Sum' \
    --output text)

  if (( $(echo "$ERROR_RATE > 10" | bc -l) )); then
    echo "ERROR: Error rate too high (${ERROR_RATE} errors). Rolling back."
    shift_traffic 100 0
    exit 1
  fi
}

# Migration phases: 5% → 25% → 50% → 100%
shift_traffic 95 5
shift_traffic 75 25
shift_traffic 50 50
shift_traffic 0 100
echo "Migration complete: 100% traffic on target cloud."

Few-Shot Examples

Example 1: Evaluating AWS vs GCP for a Data Analytics Workload

Input:

"We need to choose between AWS and GCP for a new data analytics platform processing 50TB/day of event data. We already run our application on AWS. What should we consider?"

Output:

This is a data gravity vs capability tradeoff. Here is the structured evaluation:

Option A: AWS (extend existing estate)

Advantages:

  • No egress cost from application tier to analytics tier — both in AWS
  • Unified IAM, VPC, and billing
  • Redshift Spectrum can query S3 data in place
  • Existing AWS expertise applies

Disadvantages:

  • BigQuery is objectively superior for ad-hoc analytical queries at this scale
  • Redshift requires cluster sizing decisions; BigQuery auto-scales invisibly
  • AWS Glue for ETL is mature but heavier to operate than Dataflow

Estimated cost (50TB/day processed):

  • Redshift: $18,000/mo (ra3.4xlarge × 8, plus S3 storage)
  • Athena on S3: $12,500/mo (50TB × 250 days × $5/TB scanned, with partitioning)

Option B: GCP for analytics only (hybrid)

Advantages:

  • BigQuery flat-rate at 500 slots: ~$8,000/mo regardless of query volume
  • BQML eliminates a separate ML training environment
  • Looker natively integrates without data movement
  • Separates analytics scaling from application scaling

Disadvantages:

  • Egress from AWS to GCP: 50TB/day × $0.08/GB = ~$120,000/month ← deal-breaker
  • Dual IAM, networking, and billing complexity
  • Requires GCP expertise the team may not have

Verdict: Egress costs make Option B non-viable at this scale unless GCP interconnect is established. Recommended path:

1. Keep analytics in AWS to avoid egress cost

2. Use Athena with aggressive table partitioning (reduce scan cost by 90%+)

3. Re-evaluate in 12 months if Redshift/Athena limitations become blockers

4. If analytical capability gap is critical today, establish AWS Direct Connect ↔ GCP Cloud Interconnect before moving data


Example 2: Designing a Cloud-Agnostic Messaging Abstraction

Input:

"We use AWS SQS today. We want to be able to switch to Azure Service Bus or GCP Pub/Sub without rewriting our application code."

Output:

Design an abstraction interface with provider-specific adapters:

// messaging.interface.ts — Cloud-agnostic messaging contract
export interface Message {
  id: string;
  body: unknown;
  attributes: Record<string, string>;
  receiptHandle: string;   // Provider-specific; used for acknowledgment
}

export interface MessagingClient {
  publish(topic: string, message: unknown, attributes?: Record<string, string>): Promise<string>;
  subscribe(queue: string, handler: (msg: Message) => Promise<void>): Promise<void>;
  acknowledge(queue: string, receiptHandle: string): Promise<void>;
  deadLetter(queue: string, receiptHandle: string, reason: string): Promise<void>;
}
// adapters/sqs.adapter.ts — AWS SQS implementation
import { SQSClient, SendMessageCommand, ReceiveMessageCommand, DeleteMessageCommand } from "@aws-sdk/client-sqs";

export class SQSAdapter implements MessagingClient {
  private client = new SQSClient({ region: process.env.AWS_REGION });

  async publish(queueUrl: string, body: unknown): Promise<string> {
    const result = await this.client.send(new SendMessageCommand({
      QueueUrl: queueUrl,
      MessageBody: JSON.stringify(body),
    }));
    return result.MessageId!;
  }

  async subscribe(queueUrl: string, handler: (msg: Message) => Promise<void>): Promise<void> {
    while (true) {
      const response = await this.client.send(new ReceiveMessageCommand({
        QueueUrl: queueUrl,
        MaxNumberOfMessages: 10,
        WaitTimeSeconds: 20,
      }));
      for (const sqsMsg of response.Messages ?? []) {
        const msg: Message = {
          id: sqsMsg.MessageId!,
          body: JSON.parse(sqsMsg.Body!),
          attributes: sqsMsg.MessageAttributes as Record<string, string> ?? {},
          receiptHandle: sqsMsg.ReceiptHandle!,
        };
        await handler(msg);
      }
    }
  }

  async acknowledge(queueUrl: string, receiptHandle: string): Promise<void> {
    await this.client.send(new DeleteMessageCommand({ QueueUrl: queueUrl, ReceiptHandle: receiptHandle }));
  }

  async deadLetter(_queue: string, _receiptHandle: string, _reason: string): Promise<void> {
    // SQS routes to DLQ automatically after maxReceiveCount — no explicit action needed
  }
}
// factory.ts — select adapter from environment variable
import { SQSAdapter } from "./adapters/sqs.adapter";
import { ServiceBusAdapter } from "./adapters/service-bus.adapter";
import { PubSubAdapter } from "./adapters/pubsub.adapter";
import { MessagingClient } from "./messaging.interface";

export function createMessagingClient(): MessagingClient {
  const provider = process.env.MESSAGING_PROVIDER ?? "sqs";
  switch (provider) {
    case "sqs":          return new SQSAdapter();
    case "service-bus":  return new ServiceBusAdapter();
    case "pubsub":       return new PubSubAdapter();
    default:
      throw new Error(`Unknown messaging provider: ${provider}. Set MESSAGING_PROVIDER=sqs|service-bus|pubsub`);
  }
}

Application code uses only the interface — provider is injected at startup via environment variable. To switch clouds: change `MESSAGING_PROVIDER` and the queue/topic URL. No application code changes.

Migration path: run both providers simultaneously with `MESSAGING_PROVIDER=sqs` for producers and `MESSAGING_PROVIDER=pubsub` for consumers during cutover, then flip producers when consumers are confirmed stable.