Multi Cloud Strategist Examples
Externalized from the agent definition per the few-shot-examples rule (#1587).
Multi-Cloud Strategist — Worked Examples
Externalized from the agent definition per the few-shot-examples rule (#1587).
Your Process — Illustrative Sample Blocks
1. Cloud Provider Comparison Framework
Use a structured scorecard before committing to a multi-cloud topology:
#!/bin/bash
# cost-compare.sh — compare compute costs for a specific workload profile
# Requires: AWS CLI, Azure CLI, gcloud SDK configured
VCPU=8
RAM_GB=32
HOURS_PER_MONTH=730
REGION_AWS="us-east-1"
REGION_AZURE="eastus"
REGION_GCP="us-central1"
echo "=== Compute Cost Comparison: ${VCPU} vCPU / ${RAM_GB}GB RAM ==="
echo ""
# AWS: m6i.2xlarge (8 vCPU, 32GB, x86) vs m7g.2xlarge (8 vCPU, 32GB, ARM/Graviton3)
echo "AWS:"
aws pricing get-products \
--service-code AmazonEC2 \
--filters \
"Type=TERM_MATCH,Field=instanceType,Value=m6i.2xlarge" \
"Type=TERM_MATCH,Field=location,Value=US East (N. Virginia)" \
"Type=TERM_MATCH,Field=operatingSystem,Value=Linux" \
"Type=TERM_MATCH,Field=tenancy,Value=Shared" \
"Type=TERM_MATCH,Field=preInstalledSw,Value=NA" \
--query 'PriceList[0]' \
--output json 2>/dev/null | python3 -c "
import json, sys
data = json.load(sys.stdin)
price = list(list(data['terms']['OnDemand'].values())[0]['priceDimensions'].values())[0]['pricePerUnit']['USD']
print(f' m6i.2xlarge: \${float(price):.4f}/hr = \${float(price)*730:.2f}/mo')
" || echo " (pricing API unavailable - check aws configure)"
echo ""
echo "Azure: Standard_D8s_v5 (8 vCPU, 32GB)"
az vm list-skus --location eastus --size Standard_D8s_v5 --query '[0].name' -o tsv 2>/dev/null \
&& echo " See: https://azure.microsoft.com/pricing/details/virtual-machines/linux/" \
|| echo " (az login required)"
echo ""
echo "GCP: n2-standard-8 (8 vCPU, 32GB)"
gcloud compute machine-types describe n2-standard-8 \
--zone us-central1-a \
--format="value(description)" 2>/dev/null \
&& echo " See: https://cloud.google.com/compute/vm-instance-pricing" \
|| echo " (gcloud auth required)"
2. Terraform Multi-Provider Configuration
Terraform's multi-provider support is the most common multi-cloud IaC pattern. Organize by provider to maintain clarity:
# providers.tf — multi-cloud provider configuration
terraform {
required_version = ">= 1.7"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 5.0"
}
azurerm = {
source = "hashicorp/azurerm"
version = "~> 3.90"
}
google = {
source = "hashicorp/google"
version = "~> 5.0"
}
}
# Single backend — each workspace targets a different cloud environment
backend "s3" {
bucket = "my-terraform-state"
key = "multi-cloud/production/terraform.tfstate"
region = "us-east-1"
encrypt = true
dynamodb_table = "terraform-locks"
}
}
provider "aws" {
region = var.aws_region
default_tags {
tags = local.common_tags
}
}
provider "azurerm" {
features {
key_vault { purge_soft_delete_on_destroy = false }
}
subscription_id = var.azure_subscription_id
}
provider "google" {
project = var.gcp_project_id
region = var.gcp_region
}
# modules/object-storage/main.tf — cloud-agnostic storage abstraction
# Deploy with: terraform apply -var="cloud_provider=aws"
variable "cloud_provider" {
description = "Target cloud provider: aws, azure, or gcp"
type = string
validation {
condition = contains(["aws", "azure", "gcp"], var.cloud_provider)
error_message = "cloud_provider must be aws, azure, or gcp"
}
}
variable "bucket_name" { type = string }
variable "env" { type = string }
# AWS S3
resource "aws_s3_bucket" "this" {
count = var.cloud_provider == "aws" ? 1 : 0
bucket = "${var.bucket_name}-${var.env}"
}
resource "aws_s3_bucket_versioning" "this" {
count = var.cloud_provider == "aws" ? 1 : 0
bucket = aws_s3_bucket.this[0].id
versioning_configuration { status = "Enabled" }
}
# Azure Blob Storage
resource "azurerm_storage_container" "this" {
count = var.cloud_provider == "azure" ? 1 : 0
name = var.bucket_name
storage_account_name = azurerm_storage_account.this[0].name
container_access_type = "private"
}
# GCP Cloud Storage
resource "google_storage_bucket" "this" {
count = var.cloud_provider == "gcp" ? 1 : 0
name = "${var.bucket_name}-${var.env}"
location = "US"
force_destroy = var.env != "prod"
versioning { enabled = true }
uniform_bucket_level_access = true
}
# Unified output regardless of provider
output "bucket_endpoint" {
value = (
var.cloud_provider == "aws" ? "s3://${var.bucket_name}-${var.env}" :
var.cloud_provider == "azure" ? "https://${azurerm_storage_account.this[0].name}.blob.core.windows.net/${var.bucket_name}" :
"gs://${var.bucket_name}-${var.env}"
)
}
3. Pulumi Cross-Cloud Stacks
Pulumi uses real programming languages, making conditional multi-cloud logic more expressive than HCL:
// index.ts — Pulumi program deploying to AWS and GCP simultaneously
import * as aws from "@pulumi/aws";
import * as gcp from "@pulumi/gcp";
import * as pulumi from "@pulumi/pulumi";
const config = new pulumi.Config();
const env = config.require("env");
// AWS: primary region — us-east-1
const awsBucket = new aws.s3.BucketV2("primary-data", {
bucket: `my-data-${env}-primary`,
tags: { environment: env, cloud: "aws", role: "primary" },
});
new aws.s3.BucketVersioningV2("primary-versioning", {
bucket: awsBucket.id,
versioningConfiguration: { status: "Enabled" },
});
// GCP: disaster recovery region — us-central1
const gcpBucket = new gcp.storage.Bucket("dr-data", {
name: `my-data-${env}-dr`,
location: "US",
labels: { environment: env, cloud: "gcp", role: "dr" },
versioning: { enabled: true },
uniformBucketLevelAccess: true,
});
// Cross-cloud DNS routing via Route 53 with health checks
const healthCheck = new aws.route53.HealthCheck("primary-health", {
fqdn: "api.my-service.com",
port: 443,
type: "HTTPS",
resourcePath: "/healthz",
failureThreshold: 3,
requestInterval: 30,
});
export const primaryBucket = awsBucket.bucket;
export const drBucket = gcpBucket.name;
export const healthCheckId = healthCheck.id;
4. Crossplane for Multi-Cloud Self-Service Infrastructure
Crossplane runs in Kubernetes and provisions infrastructure across clouds using Kubernetes-native resources:
# composition.yaml — CompositeResourceDefinition for a multi-cloud database
apiVersion: apiextensions.crossplane.io/v1
kind: Composition
metadata:
name: multicloud-database
labels:
crossplane.io/xrd: xdatabases.platform.example.com
spec:
compositeTypeRef:
apiVersion: platform.example.com/v1alpha1
kind: XDatabase
# Patch-and-transform: deploy to AWS RDS or GCP Cloud SQL based on provider label
resources:
- name: aws-rds
base:
apiVersion: rds.aws.crossplane.io/v1beta1
kind: DBInstance
spec:
forProvider:
region: us-east-1
dbInstanceClass: db.r6g.large
engine: postgres
engineVersion: "16"
multiAZ: true
storageEncrypted: true
skipFinalSnapshot: false
patches:
- type: FromCompositeFieldPath
fromFieldPath: spec.parameters.storageGB
toFieldPath: spec.forProvider.allocatedStorage
- type: FromCompositeFieldPath
fromFieldPath: spec.parameters.dbName
toFieldPath: spec.forProvider.dbName
readinessChecks:
- type: MatchString
fieldPath: status.atProvider.dbInstanceStatus
matchString: available
- name: gcp-cloud-sql
base:
apiVersion: sql.gcp.crossplane.io/v1beta1
kind: CloudSQLInstance
spec:
forProvider:
region: us-central1
databaseVersion: POSTGRES_16
settings:
tier: db-custom-4-16384
availabilityType: REGIONAL
backupConfiguration:
enabled: true
pointInTimeRecoveryEnabled: true
patches:
- type: FromCompositeFieldPath
fromFieldPath: spec.parameters.storageGB
toFieldPath: spec.forProvider.settings.dataDiskSizeGb
# claim.yaml — Developer requests a database without knowing the cloud provider
apiVersion: platform.example.com/v1alpha1
kind: Database
metadata:
name: payments-db
namespace: payments
spec:
parameters:
storageGB: 100
dbName: payments
engine: postgres
compositionSelector:
matchLabels:
provider: aws # Switch to gcp to deploy on GCP instead
writeConnectionSecretToRef:
name: payments-db-credentials
5. Service Mesh for Cross-Cloud Connectivity
Istio with multi-cluster federation routes traffic across clouds transparently:
# Install Istio primary cluster (AWS EKS)
istioctl install \
--set profile=default \
--set values.pilot.env.EXTERNAL_ISTIOD=false \
--set values.global.meshID=mesh1 \
--set values.global.multiCluster.clusterName=aws-us-east-1 \
--set values.global.network=aws-network
# Install Istio remote cluster (GCP GKE) — registers to primary
istioctl install \
--set profile=remote \
--set values.global.meshID=mesh1 \
--set values.global.multiCluster.clusterName=gcp-us-central1 \
--set values.global.network=gcp-network \
--set values.istiodRemote.injectionURL="https://${ISTIOD_EXTERNAL_IP}:15017/inject"
# Create east-west gateways for cross-cluster service discovery
kubectl apply -f - <<'EOF'
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: cross-network-gateway
namespace: istio-system
spec:
selector:
istio: eastwestgateway
servers:
- port:
number: 15443
name: tls
protocol: TLS
tls:
mode: AUTO_PASSTHROUGH
hosts:
- "*.local"
EOF
# Expose services for cross-cluster discovery
kubectl apply -f - <<'EOF'
apiVersion: networking.istio.io/v1alpha3
kind: ServiceEntry
metadata:
name: payments-service-gcp
namespace: payments
spec:
hosts:
- payments.payments.svc.cluster.local
location: MESH_INTERNAL
ports:
- number: 8080
name: http
protocol: HTTP
resolution: STATIC
addresses:
- 10.96.0.100 # GCP cluster service VIP
endpoints:
- address: "${GCP_EASTWEST_GATEWAY_IP}"
network: gcp-network
ports:
http: 15443
EOF
# traffic-policy.yaml — Weighted traffic split for cross-cloud canary deployment
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
name: payments-split
namespace: payments
spec:
hosts:
- payments
http:
- route:
- destination:
host: payments
subset: aws-primary
weight: 90
- destination:
host: payments
subset: gcp-canary
weight: 10 # 10% to GCP during migration validation
---
apiVersion: networking.istio.io/v1beta1
kind: DestinationRule
metadata:
name: payments-dr
namespace: payments
spec:
host: payments
subsets:
- name: aws-primary
labels:
cloud: aws
- name: gcp-canary
labels:
cloud: gcp
trafficPolicy:
outlierDetection:
consecutive5xxErrors: 5
interval: 30s
baseEjectionTime: 30s
6. Vendor Lock-In Risk Assessment
#!/bin/bash
# lockup-scan.sh — identify cloud-specific dependencies in infrastructure code
# Run from your IaC repository root
echo "=== Vendor Lock-In Risk Scan ==="
echo ""
echo "AWS-specific resources:"
grep -r "aws_" --include="*.tf" -l | wc -l
grep -r "aws_" --include="*.tf" -h | grep "^resource" | \
sed 's/resource "\(aws_[^"]*\)".*/\1/' | sort | uniq -c | sort -rn | head -10
echo ""
echo "Azure-specific resources:"
grep -r "azurerm_" --include="*.tf" -l | wc -l
echo ""
echo "GCP-specific resources:"
grep -r "google_" --include="*.tf" -l | wc -l
echo ""
echo "Cloud-agnostic patterns (Kubernetes, Helm):"
grep -r "kubernetes_" --include="*.tf" -l | wc -l
echo ""
echo "Proprietary messaging services (high lock-in risk):"
grep -rE "(aws_sqs|aws_sns|azurerm_servicebus|google_pubsub)" --include="*.tf" -l
echo ""
echo "Proprietary databases (high migration cost):"
grep -rE "(aws_dynamodb|azurerm_cosmosdb|google_spanner|aws_aurora)" --include="*.tf" -l
7. Cloud Migration Strategy
#!/bin/bash
# phased-migration.sh — traffic shifting script for cloud-to-cloud migration
# Uses AWS Route 53 weighted routing to shift traffic incrementally
HOSTED_ZONE_ID="Z1234567890ABC"
RECORD_NAME="api.my-service.com"
AWS_WEIGHT=100 # Start: all traffic on source cloud
TARGET_WEIGHT=0 # Start: no traffic on target cloud
shift_traffic() {
local aws_weight=$1
local target_weight=$2
echo "Shifting traffic: AWS=${aws_weight}%, Target=${target_weight}%"
aws route53 change-resource-record-sets \
--hosted-zone-id "$HOSTED_ZONE_ID" \
--change-batch "$(cat <<JSON
{
"Changes": [
{
"Action": "UPSERT",
"ResourceRecordSet": {
"Name": "${RECORD_NAME}",
"Type": "A",
"SetIdentifier": "aws-primary",
"Weight": ${aws_weight},
"AliasTarget": {
"HostedZoneId": "Z35SXDOTRQ7X7K",
"DNSName": "my-alb.us-east-1.elb.amazonaws.com",
"EvaluateTargetHealth": true
}
}
},
{
"Action": "UPSERT",
"ResourceRecordSet": {
"Name": "${RECORD_NAME}",
"Type": "A",
"SetIdentifier": "gcp-target",
"Weight": ${target_weight},
"AliasTarget": {
"HostedZoneId": "Z1234GCPALB",
"DNSName": "my-gcp-lb.endpoints.my-project.cloud.goog",
"EvaluateTargetHealth": true
}
}
}
]
}
JSON
)"
echo "Waiting 5 minutes to observe error rates..."
sleep 300
# Check error rate on new cloud — abort if >1%
ERROR_RATE=$(aws cloudwatch get-metric-statistics \
--namespace AWS/ApplicationELB \
--metric-name HTTPCode_Target_5XX_Count \
--start-time "$(date -u -d '5 minutes ago' +%Y-%m-%dT%H:%M:%SZ)" \
--end-time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--period 300 \
--statistics Sum \
--query 'Datapoints[0].Sum' \
--output text)
if (( $(echo "$ERROR_RATE > 10" | bc -l) )); then
echo "ERROR: Error rate too high (${ERROR_RATE} errors). Rolling back."
shift_traffic 100 0
exit 1
fi
}
# Migration phases: 5% → 25% → 50% → 100%
shift_traffic 95 5
shift_traffic 75 25
shift_traffic 50 50
shift_traffic 0 100
echo "Migration complete: 100% traffic on target cloud."
Few-Shot Examples
Example 1: Evaluating AWS vs GCP for a Data Analytics Workload
Input:
"We need to choose between AWS and GCP for a new data analytics platform processing 50TB/day of event data. We already run our application on AWS. What should we consider?"
Output:
This is a data gravity vs capability tradeoff. Here is the structured evaluation:
Option A: AWS (extend existing estate)
Advantages:
- No egress cost from application tier to analytics tier — both in AWS
- Unified IAM, VPC, and billing
- Redshift Spectrum can query S3 data in place
- Existing AWS expertise applies
Disadvantages:
- BigQuery is objectively superior for ad-hoc analytical queries at this scale
- Redshift requires cluster sizing decisions; BigQuery auto-scales invisibly
- AWS Glue for ETL is mature but heavier to operate than Dataflow
Estimated cost (50TB/day processed):
- Redshift: $18,000/mo (ra3.4xlarge × 8, plus S3 storage)
- Athena on S3: $12,500/mo (50TB × 250 days × $5/TB scanned, with partitioning)
Option B: GCP for analytics only (hybrid)
Advantages:
- BigQuery flat-rate at 500 slots: ~$8,000/mo regardless of query volume
- BQML eliminates a separate ML training environment
- Looker natively integrates without data movement
- Separates analytics scaling from application scaling
Disadvantages:
- Egress from AWS to GCP: 50TB/day × $0.08/GB = ~$120,000/month ← deal-breaker
- Dual IAM, networking, and billing complexity
- Requires GCP expertise the team may not have
Verdict: Egress costs make Option B non-viable at this scale unless GCP interconnect is established. Recommended path:
1. Keep analytics in AWS to avoid egress cost
2. Use Athena with aggressive table partitioning (reduce scan cost by 90%+)
3. Re-evaluate in 12 months if Redshift/Athena limitations become blockers
4. If analytical capability gap is critical today, establish AWS Direct Connect ↔ GCP Cloud Interconnect before moving data
Example 2: Designing a Cloud-Agnostic Messaging Abstraction
Input:
"We use AWS SQS today. We want to be able to switch to Azure Service Bus or GCP Pub/Sub without rewriting our application code."
Output:
Design an abstraction interface with provider-specific adapters:
// messaging.interface.ts — Cloud-agnostic messaging contract
export interface Message {
id: string;
body: unknown;
attributes: Record<string, string>;
receiptHandle: string; // Provider-specific; used for acknowledgment
}
export interface MessagingClient {
publish(topic: string, message: unknown, attributes?: Record<string, string>): Promise<string>;
subscribe(queue: string, handler: (msg: Message) => Promise<void>): Promise<void>;
acknowledge(queue: string, receiptHandle: string): Promise<void>;
deadLetter(queue: string, receiptHandle: string, reason: string): Promise<void>;
}
// adapters/sqs.adapter.ts — AWS SQS implementation
import { SQSClient, SendMessageCommand, ReceiveMessageCommand, DeleteMessageCommand } from "@aws-sdk/client-sqs";
export class SQSAdapter implements MessagingClient {
private client = new SQSClient({ region: process.env.AWS_REGION });
async publish(queueUrl: string, body: unknown): Promise<string> {
const result = await this.client.send(new SendMessageCommand({
QueueUrl: queueUrl,
MessageBody: JSON.stringify(body),
}));
return result.MessageId!;
}
async subscribe(queueUrl: string, handler: (msg: Message) => Promise<void>): Promise<void> {
while (true) {
const response = await this.client.send(new ReceiveMessageCommand({
QueueUrl: queueUrl,
MaxNumberOfMessages: 10,
WaitTimeSeconds: 20,
}));
for (const sqsMsg of response.Messages ?? []) {
const msg: Message = {
id: sqsMsg.MessageId!,
body: JSON.parse(sqsMsg.Body!),
attributes: sqsMsg.MessageAttributes as Record<string, string> ?? {},
receiptHandle: sqsMsg.ReceiptHandle!,
};
await handler(msg);
}
}
}
async acknowledge(queueUrl: string, receiptHandle: string): Promise<void> {
await this.client.send(new DeleteMessageCommand({ QueueUrl: queueUrl, ReceiptHandle: receiptHandle }));
}
async deadLetter(_queue: string, _receiptHandle: string, _reason: string): Promise<void> {
// SQS routes to DLQ automatically after maxReceiveCount — no explicit action needed
}
}
// factory.ts — select adapter from environment variable
import { SQSAdapter } from "./adapters/sqs.adapter";
import { ServiceBusAdapter } from "./adapters/service-bus.adapter";
import { PubSubAdapter } from "./adapters/pubsub.adapter";
import { MessagingClient } from "./messaging.interface";
export function createMessagingClient(): MessagingClient {
const provider = process.env.MESSAGING_PROVIDER ?? "sqs";
switch (provider) {
case "sqs": return new SQSAdapter();
case "service-bus": return new ServiceBusAdapter();
case "pubsub": return new PubSubAdapter();
default:
throw new Error(`Unknown messaging provider: ${provider}. Set MESSAGING_PROVIDER=sqs|service-bus|pubsub`);
}
}
Application code uses only the interface — provider is injected at startup via environment variable. To switch clouds: change `MESSAGING_PROVIDER` and the queue/topic URL. No application code changes.
Migration path: run both providers simultaneously with `MESSAGING_PROVIDER=sqs` for producers and `MESSAGING_PROVIDER=pubsub` for consumers during cutover, then flip producers when consumers are confirmed stable.