From 029c833e5f3ef1770fb9bfcca1f38db6e5e14734 Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Sat, 21 Mar 2026 15:53:23 -0400 Subject: [PATCH 1/9] feat: first draft managed instances --- .../src/components/aws/managed-instances.ts | 622 ++++++++++++++++++ platform/src/components/aws/service.ts | 266 ++++++-- 2 files changed, 842 insertions(+), 46 deletions(-) create mode 100644 platform/src/components/aws/managed-instances.ts diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts new file mode 100644 index 0000000000..e1cac871f2 --- /dev/null +++ b/platform/src/components/aws/managed-instances.ts @@ -0,0 +1,622 @@ +import fs from "fs"; +import path from "path"; +import { + all, + ComponentResourceOptions, + interpolate, + output, + Output, + secret, +} from "@pulumi/pulumi"; +import { cloudwatch, ecr, ecs, getRegionOutput, iam } from "@pulumi/aws"; +import { ImageArgs } from "@pulumi/docker-build"; +import { Component, Transform, transform } from "../component.js"; +import { Input } from "../input.js"; +import { VisibleError } from "../error.js"; +import { Link } from "../link.js"; +import { toSeconds } from "../duration.js"; +import { toNumber } from "../cpu.js"; +import { toGBs, toMBs } from "../size.js"; +import { RETENTION } from "./logging.js"; +import { bootstrap } from "./helpers/bootstrap.js"; +import { imageBuilder } from "./helpers/container-builder.js"; +import { normalizeContainers } from "./fargate.js"; + +export const managedGpuManufacturers = ["nvidia"] as const; +export const ManagedGpuAcceleratorName = { + A100: "a100", + A10G: "a10g", + H100: "h100", + K520: "k520", + K80: "k80", + M60: "m60", + T4: "t4", + T4G: "t4g", + V100: "v100", +} as const; + +export type ManagedGpuAcceleratorName = + (typeof ManagedGpuAcceleratorName)[keyof typeof ManagedGpuAcceleratorName]; + +type ManagedContainers = ReturnType; + +type ManagedRoleInput = Input; + +type ManagedGpuCount = + | Input + | Input<{ + min: Input; + max?: Input; + }>; + +export interface ManagedServiceCapacityArgs { + cpu?: Input<{ + min: Input; + max?: Input; + }>; + memory?: Input<{ + min: Input<`${number} GB`>; + max?: Input<`${number} GB`>; + }>; + gpu?: Input<{ + count?: ManagedGpuCount; + manufacturer?: Input<(typeof managedGpuManufacturers)[number]>; + /** + * The NVIDIA GPU model to require. + * + * Supported values: `"a100"`, `"a10g"`, `"h100"`, `"k520"`, `"k80"`, + * `"m60"`, `"t4"`, `"t4g"`, and `"v100"`. + */ + name?: Input[]>; + }>; + infrastructureRole: ManagedRoleInput; + instanceRole?: ManagedRoleInput; + instanceProfile?: Input; + storage?: Input<`${number} GB`>; +} + +type ServiceSizingArgs = { + cpu?: Input<`${number} vCPU`>; + memory?: Input<`${number} GB`>; +}; + +type ManagedTaskDefinitionArgs = { + cluster: { + nodes: { + cluster: { + name: Output; + }; + }; + }; + link?: any; + transform?: { + image?: Transform; + taskDefinition?: Transform; + logGroup?: Transform; + }; +}; + +type ManagedCapacityProviderArgs = { + transform?: { + capacityProvider?: Transform; + instanceProfile?: Transform; + }; +}; + +type ManagedVpcArgs = { + containerSubnets: Input[]>; + securityGroups: Input[]>; +}; + +type NormalizedManagedCapacity = { + taskCpu: string; + taskMemory: string; + hostCpu: { + min: number; + max?: number; + }; + hostMemory: { + min: number; + max?: number; + }; + hostStorage?: number; + gpu?: { + count: { + min: number; + max?: number; + }; + manufacturer: (typeof managedGpuManufacturers)[number]; + names?: ManagedGpuAcceleratorName[]; + }; +}; + +export function normalizeManagedCapacity( + name: string, + args: ManagedServiceCapacityArgs, + serviceSizing: ServiceSizingArgs, +) { + return all([serviceSizing.cpu, serviceSizing.memory]).apply( + ([serviceCpu, serviceMemory]) => { + const managed = args as ManagedServiceCapacityArgs & { + infrastructureRole?: string; + instanceRole?: string; + instanceProfile?: string; + storage?: `${number} GB`; + cpu?: { min: number; max?: number }; + memory?: { min: `${number} GB`; max?: `${number} GB` }; + gpu?: { + count?: number | { min: number; max?: number }; + manufacturer?: (typeof managedGpuManufacturers)[number]; + name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[]; + }; + }; + + if (!managed.infrastructureRole) { + throw new VisibleError( + `Missing \"capacity.managed.infrastructureRole\" for the \"${name}\" Service.`, + ); + } + + if (!managed.instanceRole && !managed.instanceProfile) { + throw new VisibleError( + `You must provide either \"capacity.managed.instanceRole\" or \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`, + ); + } + + if (managed.instanceRole && managed.instanceProfile) { + throw new VisibleError( + `Do not provide both \"capacity.managed.instanceRole\" and \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`, + ); + } + + const hostCpu = normalizeHostCpu(managed.cpu, serviceCpu); + const hostMemory = normalizeHostMemory(managed.memory, serviceMemory); + const gpu = normalizeGpu(managed.gpu); + const hostStorage = normalizeStorage(managed.storage); + + return { + taskCpu: serviceCpu + ? toNumber(serviceCpu).toString() + : Math.round(hostCpu.min * 1024).toString(), + taskMemory: serviceMemory + ? toMBs(serviceMemory).toString() + : hostMemory.min.toString(), + hostCpu, + hostMemory, + hostStorage, + gpu, + } satisfies NormalizedManagedCapacity; + }, + ); + + function normalizeHostCpu( + cpu: { min: number; max?: number } | undefined, + fallback?: `${number} vCPU`, + ) { + if (cpu) { + validateRange("capacity.managed.cpu", cpu.min, cpu.max); + return { min: cpu.min, max: cpu.max }; + } + if (fallback) { + const min = parseFloat(fallback.split(" ")[0]); + return { min, max: min }; + } + throw new VisibleError( + `You must provide either \"capacity.managed.cpu\" or top-level \"cpu\" for managed instances.`, + ); + } + + function normalizeHostMemory( + memory: { min: `${number} GB`; max?: `${number} GB` } | undefined, + fallback?: `${number} GB`, + ) { + if (memory) { + const min = toMBs(memory.min); + const max = memory.max ? toMBs(memory.max) : undefined; + validateRange("capacity.managed.memory", min, max); + return { min, max }; + } + if (fallback) { + const min = toMBs(fallback); + return { min, max: min }; + } + throw new VisibleError( + `You must provide either \"capacity.managed.memory\" or top-level \"memory\" for managed instances.`, + ); + } + + function normalizeGpu( + gpu: + | { + count?: number | { min: number; max?: number }; + manufacturer?: (typeof managedGpuManufacturers)[number]; + name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[]; + } + | undefined, + ) { + if (!gpu) return undefined; + const manufacturer = gpu.manufacturer ?? "nvidia"; + if (!managedGpuManufacturers.includes(manufacturer)) { + throw new VisibleError( + `Unsupported GPU manufacturer \"${manufacturer}\". The supported values are ${managedGpuManufacturers.join(", ")}.`, + ); + } + + const count = + typeof gpu.count === "number" + ? { min: gpu.count, max: gpu.count } + : gpu.count + ? { min: gpu.count.min, max: gpu.count.max } + : { min: 1, max: 1 }; + + validateRange("capacity.managed.gpu.count", count.min, count.max); + + return { + count, + manufacturer, + names: normalizeGpuNames(gpu.name), + }; + } + + function normalizeGpuNames( + name: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[] | undefined, + ) { + if (!name) return undefined; + const names = Array.isArray(name) ? name : [name]; + const supported = Object.values(ManagedGpuAcceleratorName); + const invalid = names.filter((name) => !supported.includes(name)); + if (invalid.length > 0) { + throw new VisibleError( + `Unsupported GPU accelerator name ${invalid.map((name) => `"${name}"`).join(", ")}. The supported NVIDIA values are ${supported + .map((name) => `"${name}"`) + .join(", ")}.`, + ); + } + return names; + } + + function normalizeStorage(storage?: `${number} GB`) { + if (!storage) return undefined; + const value = toGBs(storage); + if (value <= 0) { + throw new VisibleError( + `Invalid \"capacity.managed.storage\" value \"${storage}\". It must be greater than 0 GB.`, + ); + } + return value; + } + + function validateRange(label: string, min: number, max?: number) { + if (min <= 0) { + throw new VisibleError(`\"${label}.min\" must be greater than 0.`); + } + if (max !== undefined && max < min) { + throw new VisibleError( + `\"${label}.max\" must be greater than or equal to \"${label}.min\".`, + ); + } + } +} + +export function createManagedCapacityProvider( + name: string, + args: ManagedCapacityProviderArgs & { + capacity: Input; + }, + opts: ComponentResourceOptions, + parent: Component, + clusterName: Output, + vpc: ManagedVpcArgs, + normalized: Output, +) { + const infrastructureRole = iam.Role.get( + `${name}ManagedInfrastructureRole`, + output(args.capacity).apply((v) => v.infrastructureRole), + {}, + { parent }, + ); + + const instanceProfileArn = output(args.capacity).apply((v) => { + if (v.instanceProfile) return v.instanceProfile; + + return new iam.InstanceProfile( + ...transform( + args.transform?.instanceProfile, + `${name}ManagedInstanceProfile`, + { + role: output(v.instanceRole!).apply(extractRoleName), + }, + { parent }, + ), + ).arn; + }); + + return new ecs.CapacityProvider( + ...transform( + args.transform?.capacityProvider, + `${name}ManagedCapacityProvider`, + { + cluster: clusterName, + managedInstancesProvider: all([ + normalized, + infrastructureRole.arn, + instanceProfileArn, + vpc.containerSubnets, + vpc.securityGroups, + ]).apply( + ([normalized, infrastructureRoleArn, instanceProfileArn, subnets, securityGroups]) => ({ + infrastructureRoleArn, + propagateTags: "CAPACITY_PROVIDER", + instanceLaunchTemplate: { + ec2InstanceProfileArn: instanceProfileArn, + networkConfiguration: { + subnets, + securityGroups, + }, + ...(normalized.hostStorage + ? { + storageConfiguration: { + storageSizeGib: normalized.hostStorage, + }, + } + : {}), + instanceRequirements: { + vcpuCount: { + min: normalized.hostCpu.min, + max: normalized.hostCpu.max, + }, + memoryMib: { + min: normalized.hostMemory.min, + max: normalized.hostMemory.max, + }, + instanceGenerations: ["current"], + ...(normalized.gpu + ? { + acceleratorTypes: ["gpu"], + acceleratorCount: { + min: normalized.gpu.count.min, + max: normalized.gpu.count.max, + }, + acceleratorManufacturers: [normalized.gpu.manufacturer], + ...(normalized.gpu.names + ? { + acceleratorNames: normalized.gpu.names, + } + : {}), + } + : {}), + }, + }, + }), + ), + }, + { parent }, + ), + ); +} + +export function createManagedTaskDefinition( + name: string, + args: ManagedTaskDefinitionArgs, + opts: ComponentResourceOptions, + parent: Component, + containers: ManagedContainers, + architecture: Output<"x86_64" | "arm64">, + taskRole: iam.Role, + executionRole: iam.Role, + normalized: Output, +) { + const clusterName = args.cluster.nodes.cluster.name; + const region = getRegionOutput({}, opts).region; + const bootstrapData = region.apply((region) => bootstrap.forRegion(region)); + const linkEnvs = Link.propertiesToEnv(Link.getProperties(args.link)); + + const containerDefinitions = all([containers, normalized]).apply( + ([containers, normalized]) => { + if (normalized.gpu && containers.length > 1) { + throw new VisibleError( + `GPU support currently requires a single container when using managed instances.`, + ); + } + + return containers.map((container) => ({ + name: container.name, + image: (() => { + if (typeof container.image === "string") return output(container.image); + + const containerImage = container.image; + const contextPath = path.join($cli.paths.root, container.image.context); + const dockerfile = container.image.dockerfile ?? "Dockerfile"; + const dockerfilePath = path.join(contextPath, dockerfile); + const dockerIgnorePath = fs.existsSync( + path.join(contextPath, `${dockerfile}.dockerignore`), + ) + ? path.join(contextPath, `${dockerfile}.dockerignore`) + : path.join(contextPath, ".dockerignore"); + + const lines = fs.existsSync(dockerIgnorePath) + ? fs.readFileSync(dockerIgnorePath).toString().split("\n") + : []; + if (!lines.find((line) => line === ".sst")) { + fs.writeFileSync( + dockerIgnorePath, + [...lines, "", "# sst", ".sst"].join("\n"), + ); + } + + const image = imageBuilder( + ...transform( + args.transform?.image, + `${name}Image${container.name}`, + { + context: { location: contextPath }, + dockerfile: { location: dockerfilePath }, + buildArgs: containerImage.args, + secrets: all([linkEnvs, containerImage.secrets ?? {}]).apply( + ([link, secrets]) => ({ ...link, ...secrets }), + ), + target: container.image.target, + platforms: [container.image.platform], + tags: [container.name, ...(container.image.tags ?? [])].map( + (tag) => interpolate`${bootstrapData.assetEcrUrl}:${tag}`, + ), + registries: [ + ecr + .getAuthorizationTokenOutput( + { + registryId: bootstrapData.assetEcrRegistryId, + }, + { parent }, + ) + .apply((authToken) => ({ + address: authToken.proxyEndpoint, + password: secret(authToken.password), + username: authToken.userName, + })), + ], + ...(container.image.cache !== false + ? { + cacheFrom: [ + { + registry: { + ref: interpolate`${bootstrapData.assetEcrUrl}:${container.name}-cache`, + }, + }, + ], + cacheTo: [ + { + registry: { + ref: interpolate`${bootstrapData.assetEcrUrl}:${container.name}-cache`, + imageManifest: true, + ociMediaTypes: true, + mode: "max", + }, + }, + ], + } + : {}), + push: true, + }, + { parent }, + ), + ); + + return interpolate`${bootstrapData.assetEcrUrl}@${image.digest}`; + })(), + cpu: container.cpu ? toNumber(container.cpu) : undefined, + memory: container.memory ? toMBs(container.memory) : undefined, + command: container.command, + entrypoint: container.entrypoint, + healthCheck: container.health && { + command: container.health.command, + startPeriod: toSeconds(container.health.startPeriod ?? "0 seconds"), + timeout: toSeconds(container.health.timeout ?? "5 seconds"), + interval: toSeconds(container.health.interval ?? "30 seconds"), + retries: container.health.retries ?? 3, + }, + pseudoTerminal: true, + portMappings: [{ containerPortRange: "1-65535" }], + logConfiguration: { + logDriver: "awslogs", + options: { + "awslogs-group": (() => { + return new cloudwatch.LogGroup( + ...transform( + args.transform?.logGroup, + `${name}LogGroup${container.name}`, + { + name: container.logging.name, + retentionInDays: RETENTION[container.logging.retention], + }, + { parent, ignoreChanges: ["name"] }, + ), + ); + })().name, + "awslogs-region": region, + "awslogs-stream-prefix": "/service", + }, + }, + environment: linkEnvs.apply((linkEnvs) => + Object.entries({ + ...container.environment, + ...linkEnvs, + }).map(([name, value]) => ({ name, value })), + ), + environmentFiles: container.environmentFiles?.map((file) => ({ + type: "s3", + value: file, + })), + linuxParameters: { + initProcessEnabled: true, + }, + mountPoints: container.volumes?.map((volume) => ({ + sourceVolume: volume.efs.accessPoint, + containerPath: volume.path, + })), + secrets: Object.entries(container.ssm ?? {}).map(([name, valueFrom]) => ({ + name, + valueFrom, + })), + resourceRequirements: normalized.gpu + ? [{ type: "GPU", value: normalized.gpu.count.min.toString() }] + : undefined, + })); + }, + ); + + return output( + new ecs.TaskDefinition( + ...transform( + args.transform?.taskDefinition, + `${name}Task`, + { + family: interpolate`${clusterName}-${name}`, + trackLatest: true, + cpu: normalized.apply((v) => v.taskCpu), + memory: normalized.apply((v) => v.taskMemory), + networkMode: "awsvpc", + requiresCompatibilities: ["MANAGED_INSTANCES"], + runtimePlatform: { + cpuArchitecture: architecture.apply((v) => v.toUpperCase()), + operatingSystemFamily: "LINUX", + }, + executionRoleArn: executionRole.arn, + taskRoleArn: taskRole.arn, + volumes: output(containers).apply((containers) => { + const uniqueAccessPoints: Set = new Set(); + return containers.flatMap((container) => + (container.volumes ?? []).flatMap((volume) => { + if (uniqueAccessPoints.has(volume.efs.accessPoint)) return []; + uniqueAccessPoints.add(volume.efs.accessPoint); + return { + name: volume.efs.accessPoint, + efsVolumeConfiguration: { + fileSystemId: volume.efs.fileSystem, + transitEncryption: "ENABLED", + authorizationConfig: { + accessPointId: volume.efs.accessPoint, + }, + }, + }; + }), + ); + }), + containerDefinitions: $jsonStringify(containerDefinitions), + }, + { parent }, + ), + ), + ); +} + +export function isManagedCapacityInput( + capacity: unknown, +): capacity is { + managed: Input; +} { + return typeof capacity === "object" && capacity !== null && "managed" in capacity; +} + +function extractRoleName(role: string) { + return role.split("/").pop()!; +} diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index 3ea034162a..dc8b811c88 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -36,6 +36,13 @@ import { normalizeMemory, normalizeStorage, } from "./fargate.js"; +import { + createManagedCapacityProvider, + createManagedTaskDefinition, + isManagedCapacityInput, + ManagedServiceCapacityArgs, + normalizeManagedCapacity, +} from "./managed-instances.js"; import { Dns } from "../dns.js"; import { hashStringToPrettyString } from "../naming.js"; import { Alb } from "./alb.js"; @@ -333,8 +340,23 @@ interface ServiceContainerArgs extends FargateContainerArgs { */ directory?: Input; }; + /** + * The number of GPUs to reserve for this container when using managed instances. + */ + gpu?: Input; } +type ServiceFargateCapacity = { + fargate?: { + base?: Input; + weight: Input; + }; + spot?: { + base?: Input; + weight: Input; + }; +}; + export interface ServiceArgs extends FargateBaseArgs { /** * Configure how this component works in `sst dev`. @@ -1305,17 +1327,55 @@ export interface ServiceArgs extends FargateBaseArgs { * regular Fargate. * * ```js - * { - * capacity: { - * fargate: { weight: 1, base: 2 }, - * spot: { weight: 1 } - * } - * } - * ``` - */ + * { + * capacity: { + * fargate: { weight: 1, base: 2 }, + * spot: { weight: 1 } + * } + * } + * ``` + * - Use ECS Managed Instances for a CPU-only workload. + * + * ```js + * { + * cpu: "1 vCPU", + * memory: "2 GB", + * capacity: { + * managed: { + * infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra", + * instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed", + * cpu: { min: 1, max: 4 }, + * memory: { min: "2 GB", max: "8 GB" } + * } + * } + * } + * ``` + * - Use ECS Managed Instances for a GPU workload. + * + * ```js + * { + * cpu: "4 vCPU", + * memory: "16 GB", + * containers: [{ + * name: "app", + * gpu: 1, + * }], + * capacity: { + * managed: { + * infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra", + * instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed", + * gpu: { + * count: 1, + * name: "t4", + * } + * } + * } + * } + * ``` + */ capacity?: Input< | "spot" - | { + | (ServiceFargateCapacity & { /** * Configure how the regular Fargate capacity is allocated. */ @@ -1350,6 +1410,18 @@ export interface ServiceArgs extends FargateBaseArgs { */ weight: Input; }>; + /** + * Configure ECS Managed Instances for this service. + */ + managed?: never; + }) + | { + /** + * Configure ECS Managed Instances for this service. + * + * This mode is exclusive and cannot be combined with `fargate` or `spot`. + */ + managed?: Input; } >; /** @@ -1525,6 +1597,14 @@ export interface ServiceArgs extends FargateBaseArgs { * attaching to an external ALB via the `loadBalancer.instance` prop. */ listenerRule?: Transform; + /** + * Transform the ECS managed instances capacity provider resource. + */ + capacityProvider?: Transform; + /** + * Transform the IAM instance profile resource created for managed instances. + */ + instanceProfile?: Transform; } >; } @@ -1789,6 +1869,7 @@ export class Service extends Component implements Link.Linkable { const scaling = normalizeScaling(); const capacity = normalizeCapacity(); const vpc = normalizeVpc(); + const managed = normalizeManaged(); const taskRole = createTaskRole(name, args, opts, self, !!dev); @@ -1803,19 +1884,51 @@ export class Service extends Component implements Link.Linkable { } const executionRole = createExecutionRole(name, args, opts, self); - const taskDefinition = createTaskDefinition( - name, - args, - opts, - self, - containers, - architecture, - cpu, - memory, - storage, - taskRole, - executionRole, - ); + const managedCapacityProvider = managed + ? createManagedCapacityProvider( + name, + { + capacity: managed.capacity, + transform: { + capacityProvider: args.transform?.capacityProvider, + instanceProfile: args.transform?.instanceProfile, + }, + }, + opts, + self, + clusterName, + { + containerSubnets: vpc.containerSubnets, + securityGroups: vpc.securityGroups, + }, + managed.normalized, + ) + : undefined; + const taskDefinition = managed + ? createManagedTaskDefinition( + name, + args, + opts, + self, + containers, + architecture, + taskRole, + executionRole, + managed.normalized, + ) + : createTaskDefinition( + name, + args, + opts, + self, + containers, + architecture, + cpu, + memory, + storage, + taskRole, + executionRole, + ); let loadBalancer: lb.LoadBalancer | undefined; let targetGroups: ReturnType; let targetEntries: Output<{ targetGroup: lb.TargetGroup; containerName: string; containerPort: number }[]>; @@ -1930,11 +2043,56 @@ export class Service extends Component implements Link.Linkable { function normalizeCapacity() { if (!args.capacity) return; - return output(args.capacity).apply((v) => { + return output(args.capacity).apply((v): ServiceFargateCapacity | undefined => { + if (isManagedCapacityInput(v)) return undefined; if (v === "spot") return { spot: { weight: 1 }, fargate: { weight: 0 } }; - return v; + const fargateCapacity = v as ServiceFargateCapacity; + return { + fargate: fargateCapacity.fargate, + spot: fargateCapacity.spot, + }; + }); + } + + function normalizeManaged() { + if (!args.capacity) return; + + const managedCapacity = output(args.capacity).apply((v) => { + if (v === "spot" || !isManagedCapacityInput(v)) return; + + if ("fargate" in v || "spot" in v) { + throw new VisibleError( + `Do not combine \"capacity.managed\" with \"capacity.fargate\" or \"capacity.spot\" in the \"${name}\" Service.`, + ); + } + + return v.managed; }); + + return { + capacity: managedCapacity.apply((v) => { + if (!v) + throw new VisibleError( + `Missing \"capacity.managed\" for the \"${name}\" Service.`, + ); + return v; + }), + normalized: managedCapacity + .apply((v) => { + if (!v) + throw new VisibleError( + `Missing \"capacity.managed\" for the \"${name}\" Service.`, + ); + return v; + }) + .apply((managed) => + normalizeManagedCapacity(name, managed, { + cpu: args.cpu, + memory: args.memory, + }), + ), + }; } function normalizeLoadBalancer() { @@ -2397,31 +2555,47 @@ export class Service extends Component implements Link.Linkable { cluster: clusterArn, taskDefinition: taskDefinition.arn, desiredCount: scaling.min, - ...(capacity + ...(managed + ? { + forceNewDeployment: true, + capacityProviderStrategies: [ + { + capacityProvider: managedCapacityProvider!.name, + weight: 1, + }, + ], + } + : capacity ? { // setting `forceNewDeployment` ensures that the service is not recreated // when the capacity provider config changes. forceNewDeployment: true, - capacityProviderStrategies: capacity.apply((v) => [ - ...(v.fargate - ? [ - { - capacityProvider: "FARGATE", - base: v.fargate?.base, - weight: v.fargate?.weight, - }, - ] - : []), - ...(v.spot - ? [ - { - capacityProvider: "FARGATE_SPOT", - base: v.spot?.base, - weight: v.spot?.weight, - }, - ] - : []), - ]), + capacityProviderStrategies: capacity.apply((v) => { + if (!v) + throw new VisibleError( + `Invalid Fargate capacity configuration for the \"${name}\" Service.`, + ); + return [ + ...(v.fargate + ? [ + { + capacityProvider: "FARGATE", + base: v.fargate?.base, + weight: v.fargate?.weight, + }, + ] + : []), + ...(v.spot + ? [ + { + capacityProvider: "FARGATE_SPOT", + base: v.spot?.base, + weight: v.spot?.weight, + }, + ] + : []), + ]; + }), } : // @deprecated do not use `launchType`, set `capacityProviderStrategies` // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead @@ -2431,7 +2605,7 @@ export class Service extends Component implements Link.Linkable { networkConfiguration: { // If the vpc is an SST vpc, services are automatically deployed to the public // subnets. So we need to assign a public IP for the service to be accessible. - assignPublicIp: vpc.isSstVpc, + ...(managed ? {} : { assignPublicIp: vpc.isSstVpc }), subnets: vpc.containerSubnets, securityGroups: vpc.securityGroups, }, From 481d44c660554a63b038c1313cd0a267ef0d0640 Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Sun, 22 Mar 2026 16:53:47 -0400 Subject: [PATCH 2/9] feat: move gpu flag to toplevel --- examples/aws-ecs-gpus/.dockerignore | 6 + examples/aws-ecs-gpus/Dockerfile | 12 + examples/aws-ecs-gpus/app.py | 34 + examples/aws-ecs-gpus/package.json | 9 + examples/aws-ecs-gpus/sst.config.ts | 37 + .../src/components/aws/managed-instances.ts | 410 +++--- platform/src/components/aws/service.ts | 1278 +++++++++-------- platform/src/components/component.ts | 1 + 8 files changed, 925 insertions(+), 862 deletions(-) create mode 100644 examples/aws-ecs-gpus/.dockerignore create mode 100644 examples/aws-ecs-gpus/Dockerfile create mode 100644 examples/aws-ecs-gpus/app.py create mode 100644 examples/aws-ecs-gpus/package.json create mode 100644 examples/aws-ecs-gpus/sst.config.ts diff --git a/examples/aws-ecs-gpus/.dockerignore b/examples/aws-ecs-gpus/.dockerignore new file mode 100644 index 0000000000..763039c77b --- /dev/null +++ b/examples/aws-ecs-gpus/.dockerignore @@ -0,0 +1,6 @@ + +# sst +.sst +node_modules +__pycache__ +*.pyc diff --git a/examples/aws-ecs-gpus/Dockerfile b/examples/aws-ecs-gpus/Dockerfile new file mode 100644 index 0000000000..b0a57dfb4c --- /dev/null +++ b/examples/aws-ecs-gpus/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.11-slim + +WORKDIR /app + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +COPY app.py /app/app.py + +EXPOSE 8000 + +ENTRYPOINT ["python", "/app/app.py"] diff --git a/examples/aws-ecs-gpus/app.py b/examples/aws-ecs-gpus/app.py new file mode 100644 index 0000000000..152d62ef81 --- /dev/null +++ b/examples/aws-ecs-gpus/app.py @@ -0,0 +1,34 @@ +from http.server import BaseHTTPRequestHandler, HTTPServer +import json +import os + + +class Handler(BaseHTTPRequestHandler): + def do_GET(self): + if self.path == "/health": + self.respond({"ok": True}) + return + + self.respond( + { + "message": "hello from ecs managed instances", + "gpu": os.getenv("NVIDIA_VISIBLE_DEVICES", "unknown"), + } + ) + + def log_message(self, format, *args): + return + + def respond(self, payload): + body = json.dumps(payload).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + +if __name__ == "__main__": + port = int(os.getenv("PORT", "8000")) + server = HTTPServer(("0.0.0.0", port), Handler) + server.serve_forever() diff --git a/examples/aws-ecs-gpus/package.json b/examples/aws-ecs-gpus/package.json new file mode 100644 index 0000000000..9d67054b8e --- /dev/null +++ b/examples/aws-ecs-gpus/package.json @@ -0,0 +1,9 @@ +{ + "name": "aws-ecs-gpus", + "version": "1.0.0", + "private": true, + "type": "module", + "dependencies": { + "sst": "^4" + } +} diff --git a/examples/aws-ecs-gpus/sst.config.ts b/examples/aws-ecs-gpus/sst.config.ts new file mode 100644 index 0000000000..4c67e9c0dc --- /dev/null +++ b/examples/aws-ecs-gpus/sst.config.ts @@ -0,0 +1,37 @@ +/// + +/** + * ## AWS ECS GPUs + * + * A minimal ECS service running on ECS Managed Instances with a GPU-enabled host. + * The service uses top-level `gpu`, `cpu`, `memory`, and `storage` settings, while + * the managed instances IAM resources remain customizable through `transform`. + */ +export default $config({ + app(input) { + return { + name: "service-gpu-example", + removal: input?.stage === "production" ? "retain" : "remove", + home: "aws", + }; + }, + async run() { + const vpc = new sst.aws.Vpc("MyVpc"); + const cluster = new sst.aws.Cluster("MyCluster", { vpc }); + + const service = new sst.aws.Service("MyService", { + cluster, + image: { context: "./" }, + gpu: "nvidia/t4", + cpu: "4 vCPU", + memory: "16 GB", + loadBalancer: { + ports: [{ listen: "80/http", forward: "8000/http" }], + }, + }); + + return { + url: service.url, + }; + }, +}); diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts index e1cac871f2..6cc4ead302 100644 --- a/platform/src/components/aws/managed-instances.ts +++ b/platform/src/components/aws/managed-instances.ts @@ -8,7 +8,14 @@ import { Output, secret, } from "@pulumi/pulumi"; -import { cloudwatch, ecr, ecs, getRegionOutput, iam } from "@pulumi/aws"; +import { + cloudwatch, + ecr, + ecs, + getPartitionOutput, + getRegionOutput, + iam, +} from "@pulumi/aws"; import { ImageArgs } from "@pulumi/docker-build"; import { Component, Transform, transform } from "../component.js"; import { Input } from "../input.js"; @@ -37,47 +44,15 @@ export const ManagedGpuAcceleratorName = { export type ManagedGpuAcceleratorName = (typeof ManagedGpuAcceleratorName)[keyof typeof ManagedGpuAcceleratorName]; +export type ManagedGpu = + `${(typeof managedGpuManufacturers)[number]}/${ManagedGpuAcceleratorName}`; type ManagedContainers = ReturnType; - -type ManagedRoleInput = Input; - -type ManagedGpuCount = - | Input - | Input<{ - min: Input; - max?: Input; - }>; - -export interface ManagedServiceCapacityArgs { - cpu?: Input<{ - min: Input; - max?: Input; - }>; - memory?: Input<{ - min: Input<`${number} GB`>; - max?: Input<`${number} GB`>; - }>; - gpu?: Input<{ - count?: ManagedGpuCount; - manufacturer?: Input<(typeof managedGpuManufacturers)[number]>; - /** - * The NVIDIA GPU model to require. - * - * Supported values: `"a100"`, `"a10g"`, `"h100"`, `"k520"`, `"k80"`, - * `"m60"`, `"t4"`, `"t4g"`, and `"v100"`. - */ - name?: Input[]>; - }>; - infrastructureRole: ManagedRoleInput; - instanceRole?: ManagedRoleInput; - instanceProfile?: Input; - storage?: Input<`${number} GB`>; -} - -type ServiceSizingArgs = { +type ManagedServiceArgs = { + gpu: Input; cpu?: Input<`${number} vCPU`>; memory?: Input<`${number} GB`>; + storage?: Input<`${number} GB`>; }; type ManagedTaskDefinitionArgs = { @@ -98,6 +73,8 @@ type ManagedTaskDefinitionArgs = { type ManagedCapacityProviderArgs = { transform?: { + infrastructureRole?: Transform; + instanceRole?: Transform; capacityProvider?: Transform; instanceProfile?: Transform; }; @@ -132,142 +109,74 @@ type NormalizedManagedCapacity = { export function normalizeManagedCapacity( name: string, - args: ManagedServiceCapacityArgs, - serviceSizing: ServiceSizingArgs, + args: ManagedServiceArgs, ) { - return all([serviceSizing.cpu, serviceSizing.memory]).apply( - ([serviceCpu, serviceMemory]) => { - const managed = args as ManagedServiceCapacityArgs & { - infrastructureRole?: string; - instanceRole?: string; - instanceProfile?: string; - storage?: `${number} GB`; - cpu?: { min: number; max?: number }; - memory?: { min: `${number} GB`; max?: `${number} GB` }; - gpu?: { - count?: number | { min: number; max?: number }; - manufacturer?: (typeof managedGpuManufacturers)[number]; - name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[]; - }; - }; - - if (!managed.infrastructureRole) { - throw new VisibleError( - `Missing \"capacity.managed.infrastructureRole\" for the \"${name}\" Service.`, - ); - } - - if (!managed.instanceRole && !managed.instanceProfile) { - throw new VisibleError( - `You must provide either \"capacity.managed.instanceRole\" or \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`, - ); - } - - if (managed.instanceRole && managed.instanceProfile) { - throw new VisibleError( - `Do not provide both \"capacity.managed.instanceRole\" and \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`, - ); - } - - const hostCpu = normalizeHostCpu(managed.cpu, serviceCpu); - const hostMemory = normalizeHostMemory(managed.memory, serviceMemory); - const gpu = normalizeGpu(managed.gpu); - const hostStorage = normalizeStorage(managed.storage); + return all([args.gpu, args.cpu, args.memory, args.storage]).apply( + ([gpu, cpu, memory, storage]) => { + const hostCpu = normalizeHostCpu(cpu); + const hostMemory = normalizeHostMemory(memory); + const hostStorage = normalizeStorage(storage); return { - taskCpu: serviceCpu - ? toNumber(serviceCpu).toString() - : Math.round(hostCpu.min * 1024).toString(), - taskMemory: serviceMemory - ? toMBs(serviceMemory).toString() - : hostMemory.min.toString(), + taskCpu: cpu!, + taskMemory: memory!, hostCpu, hostMemory, hostStorage, - gpu, + gpu: normalizeGpu(gpu), } satisfies NormalizedManagedCapacity; }, ); - function normalizeHostCpu( - cpu: { min: number; max?: number } | undefined, - fallback?: `${number} vCPU`, - ) { + function normalizeHostCpu(cpu?: `${number} vCPU`) { if (cpu) { - validateRange("capacity.managed.cpu", cpu.min, cpu.max); - return { min: cpu.min, max: cpu.max }; - } - if (fallback) { - const min = parseFloat(fallback.split(" ")[0]); + const min = parseFloat(cpu.split(" ")[0]); return { min, max: min }; } throw new VisibleError( - `You must provide either \"capacity.managed.cpu\" or top-level \"cpu\" for managed instances.`, + `You must provide top-level \"cpu\" for the \"${name}\" Service when \"gpu\" is set.`, ); } - function normalizeHostMemory( - memory: { min: `${number} GB`; max?: `${number} GB` } | undefined, - fallback?: `${number} GB`, - ) { + function normalizeHostMemory(memory?: `${number} GB`) { if (memory) { - const min = toMBs(memory.min); - const max = memory.max ? toMBs(memory.max) : undefined; - validateRange("capacity.managed.memory", min, max); - return { min, max }; - } - if (fallback) { - const min = toMBs(fallback); + const min = toMBs(memory); return { min, max: min }; } throw new VisibleError( - `You must provide either \"capacity.managed.memory\" or top-level \"memory\" for managed instances.`, + `You must provide top-level \"memory\" for the \"${name}\" Service when \"gpu\" is set.`, ); } - function normalizeGpu( - gpu: - | { - count?: number | { min: number; max?: number }; - manufacturer?: (typeof managedGpuManufacturers)[number]; - name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[]; - } - | undefined, - ) { - if (!gpu) return undefined; - const manufacturer = gpu.manufacturer ?? "nvidia"; + function normalizeGpu(gpu: ManagedGpu) { + const [manufacturer, name] = gpu.split("/") as [ + (typeof managedGpuManufacturers)[number], + ManagedGpuAcceleratorName, + ]; if (!managedGpuManufacturers.includes(manufacturer)) { throw new VisibleError( - `Unsupported GPU manufacturer \"${manufacturer}\". The supported values are ${managedGpuManufacturers.join(", ")}.`, + `Unsupported GPU manufacturer \"${manufacturer}\". The supported values are ${managedGpuManufacturers.join( + ", ", + )}.`, ); } - const count = - typeof gpu.count === "number" - ? { min: gpu.count, max: gpu.count } - : gpu.count - ? { min: gpu.count.min, max: gpu.count.max } - : { min: 1, max: 1 }; - - validateRange("capacity.managed.gpu.count", count.min, count.max); - return { - count, + count: { min: 1, max: 1 }, manufacturer, - names: normalizeGpuNames(gpu.name), + names: normalizeGpuNames(name), }; } - function normalizeGpuNames( - name: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[] | undefined, - ) { - if (!name) return undefined; - const names = Array.isArray(name) ? name : [name]; + function normalizeGpuNames(name: ManagedGpuAcceleratorName) { + const names = [name]; const supported = Object.values(ManagedGpuAcceleratorName); const invalid = names.filter((name) => !supported.includes(name)); if (invalid.length > 0) { throw new VisibleError( - `Unsupported GPU accelerator name ${invalid.map((name) => `"${name}"`).join(", ")}. The supported NVIDIA values are ${supported + `Unsupported GPU accelerator name ${invalid + .map((name) => `"${name}"`) + .join(", ")}. The supported NVIDIA values are ${supported .map((name) => `"${name}"`) .join(", ")}.`, ); @@ -280,56 +189,48 @@ export function normalizeManagedCapacity( const value = toGBs(storage); if (value <= 0) { throw new VisibleError( - `Invalid \"capacity.managed.storage\" value \"${storage}\". It must be greater than 0 GB.`, + `Invalid top-level \"storage\" value \"${storage}\" for the \"${name}\" Service. It must be greater than 0 GB.`, ); } return value; } - - function validateRange(label: string, min: number, max?: number) { - if (min <= 0) { - throw new VisibleError(`\"${label}.min\" must be greater than 0.`); - } - if (max !== undefined && max < min) { - throw new VisibleError( - `\"${label}.max\" must be greater than or equal to \"${label}.min\".`, - ); - } - } } export function createManagedCapacityProvider( name: string, - args: ManagedCapacityProviderArgs & { - capacity: Input; - }, + args: ManagedCapacityProviderArgs, opts: ComponentResourceOptions, parent: Component, clusterName: Output, vpc: ManagedVpcArgs, normalized: Output, ) { - const infrastructureRole = iam.Role.get( - `${name}ManagedInfrastructureRole`, - output(args.capacity).apply((v) => v.infrastructureRole), - {}, - { parent }, - ); + const partition = getPartitionOutput({}, opts).partition; - const instanceProfileArn = output(args.capacity).apply((v) => { - if (v.instanceProfile) return v.instanceProfile; + const infrastructureRole = new iam.Role( + ...transform( + args.transform?.infrastructureRole, + `${name}ManagedInfrastructureRole`, + { + assumeRolePolicy: iam.assumeRolePolicyForPrincipal({ + Service: "ecs.amazonaws.com", + }), + managedPolicyArns: [ + interpolate`arn:${partition}:iam::aws:policy/AmazonECSInfrastructureRolePolicyForManagedInstances`, + ], + }, + { parent }, + ), + ); - return new iam.InstanceProfile( - ...transform( - args.transform?.instanceProfile, - `${name}ManagedInstanceProfile`, - { - role: output(v.instanceRole!).apply(extractRoleName), - }, - { parent }, - ), - ).arn; - }); + const instanceProfileArn = getOrCreateManagedInstanceProfile( + name, + partition, + args.transform?.instanceRole, + args.transform?.instanceProfile, + parent, + opts, + ).arn; return new ecs.CapacityProvider( ...transform( @@ -344,50 +245,60 @@ export function createManagedCapacityProvider( vpc.containerSubnets, vpc.securityGroups, ]).apply( - ([normalized, infrastructureRoleArn, instanceProfileArn, subnets, securityGroups]) => ({ + ([ + normalized, infrastructureRoleArn, - propagateTags: "CAPACITY_PROVIDER", - instanceLaunchTemplate: { - ec2InstanceProfileArn: instanceProfileArn, - networkConfiguration: { - subnets, - securityGroups, - }, - ...(normalized.hostStorage - ? { - storageConfiguration: { - storageSizeGib: normalized.hostStorage, - }, - } - : {}), - instanceRequirements: { - vcpuCount: { - min: normalized.hostCpu.min, - max: normalized.hostCpu.max, + instanceProfileArn, + subnets, + securityGroups, + ]) => { + const managedInstancesProvider = { + infrastructureRoleArn, + propagateTags: "CAPACITY_PROVIDER" as const, + instanceLaunchTemplate: { + ec2InstanceProfileArn: instanceProfileArn, + networkConfiguration: { + subnets, + securityGroups, }, - memoryMib: { - min: normalized.hostMemory.min, - max: normalized.hostMemory.max, - }, - instanceGenerations: ["current"], - ...(normalized.gpu + ...(normalized.hostStorage ? { - acceleratorTypes: ["gpu"], - acceleratorCount: { - min: normalized.gpu.count.min, - max: normalized.gpu.count.max, + storageConfiguration: { + storageSizeGib: normalized.hostStorage, }, - acceleratorManufacturers: [normalized.gpu.manufacturer], - ...(normalized.gpu.names - ? { - acceleratorNames: normalized.gpu.names, - } - : {}), } : {}), + instanceRequirements: { + vcpuCount: { + min: normalized.hostCpu.min, + max: normalized.hostCpu.max, + }, + memoryMib: { + min: normalized.hostMemory.min, + max: normalized.hostMemory.max, + }, + instanceGenerations: ["current"], + ...(normalized.gpu + ? { + acceleratorTypes: ["gpu"], + acceleratorCount: { + min: normalized.gpu.count.min, + max: normalized.gpu.count.max, + }, + acceleratorManufacturers: [normalized.gpu.manufacturer], + ...(normalized.gpu.names + ? { + acceleratorNames: normalized.gpu.names, + } + : {}), + } + : {}), + }, }, - }, - }), + }; + + return managedInstancesProvider; + }, ), }, { parent }, @@ -395,6 +306,61 @@ export function createManagedCapacityProvider( ); } +const sharedManagedInstanceProfileByProvider = new WeakMap< + object, + iam.InstanceProfile +>(); +let defaultManagedInstanceProfile: iam.InstanceProfile | undefined; + +function getOrCreateManagedInstanceProfile( + name: string, + partition: Output, + roleTransform: Transform | undefined, + profileTransform: Transform | undefined, + parent: Component, + opts: ComponentResourceOptions, +) { + const provider = opts.provider; + const existing = provider + ? sharedManagedInstanceProfileByProvider.get(provider) + : defaultManagedInstanceProfile; + if (existing) return existing; + + const role = new iam.Role( + ...transform( + roleTransform, + `${name}ManagedInstancesEcsInstanceRole`, + { + name: "ecsInstanceRole", + assumeRolePolicy: iam.assumeRolePolicyForPrincipal({ + Service: "ec2.amazonaws.com", + }), + managedPolicyArns: [ + interpolate`arn:${partition}:iam::aws:policy/AmazonECSInstanceRolePolicyForManagedInstances`, + ], + }, + { parent }, + ), + ); + + const profile = new iam.InstanceProfile( + ...transform( + profileTransform, + `${name}ManagedInstancesEcsInstanceProfile`, + { + name: "ecsInstanceRole", + role: role.name, + }, + { parent }, + ), + ); + + if (provider) sharedManagedInstanceProfileByProvider.set(provider, profile); + else defaultManagedInstanceProfile = profile; + + return profile; +} + export function createManagedTaskDefinition( name: string, args: ManagedTaskDefinitionArgs, @@ -422,10 +388,14 @@ export function createManagedTaskDefinition( return containers.map((container) => ({ name: container.name, image: (() => { - if (typeof container.image === "string") return output(container.image); + if (typeof container.image === "string") + return output(container.image); const containerImage = container.image; - const contextPath = path.join($cli.paths.root, container.image.context); + const contextPath = path.join( + $cli.paths.root, + container.image.context, + ); const dockerfile = container.image.dockerfile ?? "Dockerfile"; const dockerfilePath = path.join(contextPath, dockerfile); const dockerIgnorePath = fs.existsSync( @@ -553,10 +523,12 @@ export function createManagedTaskDefinition( sourceVolume: volume.efs.accessPoint, containerPath: volume.path, })), - secrets: Object.entries(container.ssm ?? {}).map(([name, valueFrom]) => ({ - name, - valueFrom, - })), + secrets: Object.entries(container.ssm ?? {}).map( + ([name, valueFrom]) => ({ + name, + valueFrom, + }), + ), resourceRequirements: normalized.gpu ? [{ type: "GPU", value: normalized.gpu.count.min.toString() }] : undefined, @@ -608,15 +580,3 @@ export function createManagedTaskDefinition( ), ); } - -export function isManagedCapacityInput( - capacity: unknown, -): capacity is { - managed: Input; -} { - return typeof capacity === "object" && capacity !== null && "managed" in capacity; -} - -function extractRoleName(role: string) { - return role.split("/").pop()!; -} diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index dc8b811c88..2f47a727dc 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -39,8 +39,7 @@ import { import { createManagedCapacityProvider, createManagedTaskDefinition, - isManagedCapacityInput, - ManagedServiceCapacityArgs, + ManagedGpu, normalizeManagedCapacity, } from "./managed-instances.js"; import { Dns } from "../dns.js"; @@ -340,10 +339,6 @@ interface ServiceContainerArgs extends FargateContainerArgs { */ directory?: Input; }; - /** - * The number of GPUs to reserve for this container when using managed instances. - */ - gpu?: Input; } type ServiceFargateCapacity = { @@ -661,437 +656,437 @@ export interface ServiceArgs extends FargateBaseArgs { */ loadBalancer?: Input< | { - /** - * Configure if the load balancer should be public or private. - * - * When set to `false`, the load balancer endpoint will only be accessible within the - * VPC. - * - * @default `true` - */ - public?: Input; - /** - * Set a custom domain for your load balancer endpoint. - * - * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other - * providers, you'll need to pass in a `cert` that validates domain ownership and add the - * DNS records. - * - * :::tip - * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other - * providers. - * ::: - * - * @example - * - * By default this assumes the domain is hosted on Route 53. - * - * ```js - * { - * domain: "example.com" - * } - * ``` - * - * For domains hosted on Cloudflare. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.cloudflare.dns() - * } - * } - * ``` - */ - domain?: Input< - | string - | { - /** - * The custom domain you want to use. - * - * @example - * ```js - * { - * domain: { - * name: "example.com" - * } - * } - * ``` - * - * Can also include subdomains based on the current stage. - * - * ```js - * { - * domain: { - * name: `${$app.stage}.example.com` - * } - * } - * ``` - * - * Wildcard domains are supported. - * - * ```js - * { - * domain: { - * name: "*.example.com" - * } - * } - * ``` - */ - name: Input; - /** - * Alias domains that should be used. - * - * @example - * ```js {4} - * { - * domain: { - * name: "app1.example.com", - * aliases: ["app2.example.com"] - * } - * } - * ``` - */ - aliases?: Input; - /** - * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the - * domain. By default, a certificate is created and validated automatically. - * - * :::tip - * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers. - * ::: - * - * To manually set up a domain on an unsupported provider, you'll need to: - * - * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner. - * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`. - * 3. Add the DNS records in your provider to point to the load balancer endpoint. - * - * @example - * ```js - * { - * domain: { - * name: "example.com", - * dns: false, - * cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63" - * } - * } - * ``` - */ - cert?: Input; - /** - * The DNS provider to use for the domain. Defaults to the AWS. - * - * Takes an adapter that can create the DNS records on the provider. This can automate - * validating the domain and setting up the DNS routing. - * - * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need - * to set `dns` to `false` and pass in a certificate validating ownership via `cert`. - * - * @default `sst.aws.dns` - * - * @example - * - * Specify the hosted zone ID for the Route 53 domain. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.aws.dns({ - * zone: "Z2FDTNDATAQYW2" - * }) - * } - * } - * ``` - * - * Use a domain hosted on Cloudflare, needs the Cloudflare provider. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.cloudflare.dns() - * } - * } - * ``` - * - * Use a domain hosted on Vercel, needs the Vercel provider. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.vercel.dns() - * } - * } - * ``` - */ - dns?: Input; - } - >; - /** @deprecated Use `rules` instead. */ - ports?: Input[]>; - /** - * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to - * the service. - * This supports two types of protocols: - * - * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html). - * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html). - * - * :::note - * If you want to listen on `https` or `tls`, you need to specify a custom - * `loadBalancer.domain`. - * ::: - * - * You **can not configure** both application and network layer protocols for the same - * service. - * - * @example - * Here we are listening on port `80` and forwarding it to the service on port `8080`. - * ```js - * { - * rules: [ - * { listen: "80/http", forward: "8080/http" } - * ] - * } - * ``` - * - * The `forward` port and protocol defaults to the `listen` port and protocol. So in this - * case both are `80/http`. - * - * ```js - * { - * rules: [ - * { listen: "80/http" } - * ] - * } - * ``` - * - * If multiple containers are configured via the `containers` argument, you need to - * specify which container the traffic should be forwarded to. - * - * ```js - * { - * rules: [ - * { listen: "80/http", container: "app" }, - * { listen: "8000/http", container: "admin" } - * ] - * } - * ``` - * - * You can also route the same port to multiple containers via path-based routing. - * - * ```js - * { - * rules: [ - * { - * listen: "80/http", - * container: "app", - * conditions: { path: "/api/*" } - * }, - * { - * listen: "80/http", - * container: "admin", - * conditions: { path: "/admin/*" } - * } - * ] - * } - * ``` - * - * Additionally, you can redirect traffic from one port to another. This is - * commonly used to redirect http to https. - * - * ```js - * { - * rules: [ - * { listen: "80/http", redirect: "443/https" }, - * { listen: "443/https", forward: "80/http" } - * ] - * } - * ``` - */ - rules?: Input[]>; - /** - * Configure the health check that the load balancer runs on your containers. - * - * :::tip - * This health check is different from the [`health`](#health) check. - * ::: - * - * This health check is run by the load balancer. While, `health` is run by ECS. This - * cannot be disabled if you are using a load balancer. While the other is off by default. - * - * Since this cannot be disabled, here are some tips on how to debug an unhealthy - * health check. - * - *
- * How to debug a load balancer health check - * - * If you notice a `Unhealthy: Health checks failed` error, it's because the health - * check has failed. When it fails, the load balancer will terminate the containers, - * causing any requests to fail. - * - * Here's how to debug it: - * - * 1. Verify the health check path. - * - * By default, the load balancer checks the `/` path. Ensure it's accessible in your - * containers. If your application runs on a different path, then update the path in - * the health check config accordingly. - * - * 2. Confirm the containers are operational. - * - * Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** > - * choose **Any desired status** under the **Filter desired status** dropdown > select - * a task and check for errors under the **Logs tab**. If it has error that means that - * the container failed to start. - * - * 3. If the container was terminated by the load balancer while still starting up, try - * increasing the health check interval and timeout. - *
- * - * For `http` and `https` the default is: - * - * ```js - * { - * path: "/", - * healthyThreshold: 5, - * successCodes: "200", - * timeout: "5 seconds", - * unhealthyThreshold: 2, - * interval: "30 seconds" - * } - * ``` - * - * For `tcp` and `udp` the default is: - * - * ```js - * { - * healthyThreshold: 5, - * timeout: "6 seconds", - * unhealthyThreshold: 2, - * interval: "30 seconds" - * } - * ``` - * - * @example - * - * To configure the health check, we use the _port/protocol_ format. Here we are - * configuring a health check that pings the `/health` path on port `8080` - * every 10 seconds. - * - * ```js - * { - * rules: [ - * { listen: "80/http", forward: "8080/http" } - * ], - * health: { - * "8080/http": { - * path: "/health", - * interval: "10 seconds" - * } - * } - * } - * ``` - * - */ - health?: Input< - Record< - Port, - Input<{ - /** - * The URL path to ping on the service for health checks. Only applicable to - * `http` and `https` protocols. - * @default `"/"` - */ - path?: Input; - /** - * The time period between each health check request. Must be between `5 seconds` - * and `300 seconds`. - * @default `"30 seconds"` - */ - interval?: Input; - /** - * The timeout for each health check request. If no response is received within this - * time, it is considered failed. Must be between `2 seconds` and `120 seconds`. - * @default `"5 seconds"` - */ - timeout?: Input; - /** - * The number of consecutive successful health check requests required to consider the - * target healthy. Must be between 2 and 10. - * @default `5` - */ - healthyThreshold?: Input; - /** - * The number of consecutive failed health check requests required to consider the - * target unhealthy. Must be between 2 and 10. - * @default `2` - */ - unhealthyThreshold?: Input; - /** - * One or more HTTP response codes the health check treats as successful. Only - * applicable to `http` and `https` protocols. - * - * @default `"200"` - * @example - * ```js - * { - * successCodes: "200-299" - * } - * ``` - */ - successCodes?: Input; - }> - > - >; - } + /** + * Configure if the load balancer should be public or private. + * + * When set to `false`, the load balancer endpoint will only be accessible within the + * VPC. + * + * @default `true` + */ + public?: Input; + /** + * Set a custom domain for your load balancer endpoint. + * + * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other + * providers, you'll need to pass in a `cert` that validates domain ownership and add the + * DNS records. + * + * :::tip + * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other + * providers. + * ::: + * + * @example + * + * By default this assumes the domain is hosted on Route 53. + * + * ```js + * { + * domain: "example.com" + * } + * ``` + * + * For domains hosted on Cloudflare. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.cloudflare.dns() + * } + * } + * ``` + */ + domain?: Input< + | string + | { + /** + * The custom domain you want to use. + * + * @example + * ```js + * { + * domain: { + * name: "example.com" + * } + * } + * ``` + * + * Can also include subdomains based on the current stage. + * + * ```js + * { + * domain: { + * name: `${$app.stage}.example.com` + * } + * } + * ``` + * + * Wildcard domains are supported. + * + * ```js + * { + * domain: { + * name: "*.example.com" + * } + * } + * ``` + */ + name: Input; + /** + * Alias domains that should be used. + * + * @example + * ```js {4} + * { + * domain: { + * name: "app1.example.com", + * aliases: ["app2.example.com"] + * } + * } + * ``` + */ + aliases?: Input; + /** + * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the + * domain. By default, a certificate is created and validated automatically. + * + * :::tip + * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers. + * ::: + * + * To manually set up a domain on an unsupported provider, you'll need to: + * + * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner. + * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`. + * 3. Add the DNS records in your provider to point to the load balancer endpoint. + * + * @example + * ```js + * { + * domain: { + * name: "example.com", + * dns: false, + * cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63" + * } + * } + * ``` + */ + cert?: Input; + /** + * The DNS provider to use for the domain. Defaults to the AWS. + * + * Takes an adapter that can create the DNS records on the provider. This can automate + * validating the domain and setting up the DNS routing. + * + * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need + * to set `dns` to `false` and pass in a certificate validating ownership via `cert`. + * + * @default `sst.aws.dns` + * + * @example + * + * Specify the hosted zone ID for the Route 53 domain. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.aws.dns({ + * zone: "Z2FDTNDATAQYW2" + * }) + * } + * } + * ``` + * + * Use a domain hosted on Cloudflare, needs the Cloudflare provider. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.cloudflare.dns() + * } + * } + * ``` + * + * Use a domain hosted on Vercel, needs the Vercel provider. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.vercel.dns() + * } + * } + * ``` + */ + dns?: Input; + } + >; + /** @deprecated Use `rules` instead. */ + ports?: Input[]>; + /** + * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to + * the service. + * This supports two types of protocols: + * + * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html). + * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html). + * + * :::note + * If you want to listen on `https` or `tls`, you need to specify a custom + * `loadBalancer.domain`. + * ::: + * + * You **can not configure** both application and network layer protocols for the same + * service. + * + * @example + * Here we are listening on port `80` and forwarding it to the service on port `8080`. + * ```js + * { + * rules: [ + * { listen: "80/http", forward: "8080/http" } + * ] + * } + * ``` + * + * The `forward` port and protocol defaults to the `listen` port and protocol. So in this + * case both are `80/http`. + * + * ```js + * { + * rules: [ + * { listen: "80/http" } + * ] + * } + * ``` + * + * If multiple containers are configured via the `containers` argument, you need to + * specify which container the traffic should be forwarded to. + * + * ```js + * { + * rules: [ + * { listen: "80/http", container: "app" }, + * { listen: "8000/http", container: "admin" } + * ] + * } + * ``` + * + * You can also route the same port to multiple containers via path-based routing. + * + * ```js + * { + * rules: [ + * { + * listen: "80/http", + * container: "app", + * conditions: { path: "/api/*" } + * }, + * { + * listen: "80/http", + * container: "admin", + * conditions: { path: "/admin/*" } + * } + * ] + * } + * ``` + * + * Additionally, you can redirect traffic from one port to another. This is + * commonly used to redirect http to https. + * + * ```js + * { + * rules: [ + * { listen: "80/http", redirect: "443/https" }, + * { listen: "443/https", forward: "80/http" } + * ] + * } + * ``` + */ + rules?: Input[]>; + /** + * Configure the health check that the load balancer runs on your containers. + * + * :::tip + * This health check is different from the [`health`](#health) check. + * ::: + * + * This health check is run by the load balancer. While, `health` is run by ECS. This + * cannot be disabled if you are using a load balancer. While the other is off by default. + * + * Since this cannot be disabled, here are some tips on how to debug an unhealthy + * health check. + * + *
+ * How to debug a load balancer health check + * + * If you notice a `Unhealthy: Health checks failed` error, it's because the health + * check has failed. When it fails, the load balancer will terminate the containers, + * causing any requests to fail. + * + * Here's how to debug it: + * + * 1. Verify the health check path. + * + * By default, the load balancer checks the `/` path. Ensure it's accessible in your + * containers. If your application runs on a different path, then update the path in + * the health check config accordingly. + * + * 2. Confirm the containers are operational. + * + * Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** > + * choose **Any desired status** under the **Filter desired status** dropdown > select + * a task and check for errors under the **Logs tab**. If it has error that means that + * the container failed to start. + * + * 3. If the container was terminated by the load balancer while still starting up, try + * increasing the health check interval and timeout. + *
+ * + * For `http` and `https` the default is: + * + * ```js + * { + * path: "/", + * healthyThreshold: 5, + * successCodes: "200", + * timeout: "5 seconds", + * unhealthyThreshold: 2, + * interval: "30 seconds" + * } + * ``` + * + * For `tcp` and `udp` the default is: + * + * ```js + * { + * healthyThreshold: 5, + * timeout: "6 seconds", + * unhealthyThreshold: 2, + * interval: "30 seconds" + * } + * ``` + * + * @example + * + * To configure the health check, we use the _port/protocol_ format. Here we are + * configuring a health check that pings the `/health` path on port `8080` + * every 10 seconds. + * + * ```js + * { + * rules: [ + * { listen: "80/http", forward: "8080/http" } + * ], + * health: { + * "8080/http": { + * path: "/health", + * interval: "10 seconds" + * } + * } + * } + * ``` + * + */ + health?: Input< + Record< + Port, + Input<{ + /** + * The URL path to ping on the service for health checks. Only applicable to + * `http` and `https` protocols. + * @default `"/"` + */ + path?: Input; + /** + * The time period between each health check request. Must be between `5 seconds` + * and `300 seconds`. + * @default `"30 seconds"` + */ + interval?: Input; + /** + * The timeout for each health check request. If no response is received within this + * time, it is considered failed. Must be between `2 seconds` and `120 seconds`. + * @default `"5 seconds"` + */ + timeout?: Input; + /** + * The number of consecutive successful health check requests required to consider the + * target healthy. Must be between 2 and 10. + * @default `5` + */ + healthyThreshold?: Input; + /** + * The number of consecutive failed health check requests required to consider the + * target unhealthy. Must be between 2 and 10. + * @default `2` + */ + unhealthyThreshold?: Input; + /** + * One or more HTTP response codes the health check treats as successful. Only + * applicable to `http` and `https` protocols. + * + * @default `"200"` + * @example + * ```js + * { + * successCodes: "200-299" + * } + * ``` + */ + successCodes?: Input; + }> + > + >; + } | { - /** - * The `Alb` instance to attach this service to. When provided, the service creates - * target groups and listener rules on the shared ALB instead of creating its own - * load balancer. - * - * ECS tasks use the VPC's default security group, which allows all traffic within the - * VPC CIDR. For tighter security, add an explicit security group ingress rule from the - * ALB's security group using `transform`. - * - * @example - * ```js - * { - * loadBalancer: { - * instance: alb, - * rules: [ - * { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 } - * ] - * } - * } - * ``` - */ - instance: Alb; - /** - * The rules for routing traffic from the ALB to this service's containers. - * Each rule must have explicit conditions and priority. - */ - rules: Prettify[]; - /** - * Configure health checks for the target groups. Uses the same format as the inline - * health check config, keyed by `{port}/{protocol}`. - */ - health?: Record< - AlbPort, - Input<{ - path?: Input; - interval?: Input; - timeout?: Input; - healthyThreshold?: Input; - unhealthyThreshold?: Input; - successCodes?: Input; - }> - >; - } + /** + * The `Alb` instance to attach this service to. When provided, the service creates + * target groups and listener rules on the shared ALB instead of creating its own + * load balancer. + * + * ECS tasks use the VPC's default security group, which allows all traffic within the + * VPC CIDR. For tighter security, add an explicit security group ingress rule from the + * ALB's security group using `transform`. + * + * @example + * ```js + * { + * loadBalancer: { + * instance: alb, + * rules: [ + * { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 } + * ] + * } + * } + * ``` + */ + instance: Alb; + /** + * The rules for routing traffic from the ALB to this service's containers. + * Each rule must have explicit conditions and priority. + */ + rules: Prettify[]; + /** + * Configure health checks for the target groups. Uses the same format as the inline + * health check config, keyed by `{port}/{protocol}`. + */ + health?: Record< + AlbPort, + Input<{ + path?: Input; + interval?: Input; + timeout?: Input; + healthyThreshold?: Input; + unhealthyThreshold?: Input; + successCodes?: Input; + }> + >; + } >; /** * Configure the CloudMap service registry for the service. @@ -1231,6 +1226,22 @@ export interface ServiceArgs extends FargateBaseArgs { */ scaleOutCooldown?: Input; }>; + /** + * Run this service on ECS Managed Instances with a GPU-enabled host. + * + * This automatically switches the service from Fargate to ECS Managed Instances. + * Use the top-level `cpu`, `memory`, and `storage` props to size the workload. + * + * @example + * ```js + * { + * gpu: "nvidia/t4", + * cpu: "4 vCPU", + * memory: "16 GB" + * } + * ``` + */ + gpu?: Input; /** * Configure the capacity provider; regular Fargate or Fargate Spot, for this service. * @@ -1327,52 +1338,14 @@ export interface ServiceArgs extends FargateBaseArgs { * regular Fargate. * * ```js - * { - * capacity: { - * fargate: { weight: 1, base: 2 }, - * spot: { weight: 1 } - * } - * } - * ``` - * - Use ECS Managed Instances for a CPU-only workload. - * - * ```js - * { - * cpu: "1 vCPU", - * memory: "2 GB", - * capacity: { - * managed: { - * infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra", - * instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed", - * cpu: { min: 1, max: 4 }, - * memory: { min: "2 GB", max: "8 GB" } - * } - * } - * } - * ``` - * - Use ECS Managed Instances for a GPU workload. - * - * ```js - * { - * cpu: "4 vCPU", - * memory: "16 GB", - * containers: [{ - * name: "app", - * gpu: 1, - * }], - * capacity: { - * managed: { - * infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra", - * instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed", - * gpu: { - * count: 1, - * name: "t4", - * } - * } - * } - * } - * ``` - */ + * { + * capacity: { + * fargate: { weight: 1, base: 2 }, + * spot: { weight: 1 } + * } + * } + * ``` + */ capacity?: Input< | "spot" | (ServiceFargateCapacity & { @@ -1410,19 +1383,8 @@ export interface ServiceArgs extends FargateBaseArgs { */ weight: Input; }>; - /** - * Configure ECS Managed Instances for this service. - */ managed?: never; }) - | { - /** - * Configure ECS Managed Instances for this service. - * - * This mode is exclusive and cannot be combined with `fargate` or `spot`. - */ - managed?: Input; - } >; /** * Configure the health check that ECS runs on your containers. @@ -1597,6 +1559,14 @@ export interface ServiceArgs extends FargateBaseArgs { * attaching to an external ALB via the `loadBalancer.instance` prop. */ listenerRule?: Transform; + /** + * Transform the IAM infrastructure role resource created for managed instances. + */ + infrastructureRole?: Transform; + /** + * Transform the IAM instance role resource created for managed instances. + */ + instanceRole?: Transform; /** * Transform the ECS managed instances capacity provider resource. */ @@ -1878,7 +1848,7 @@ export class Service extends Component implements Link.Linkable { this.taskRole = taskRole; if (dev) { - this.devUrl = (!lbArgs && !args.loadBalancer) ? undefined : dev.url; + this.devUrl = !lbArgs && !args.loadBalancer ? undefined : dev.url; registerReceiver(); return; } @@ -1888,8 +1858,9 @@ export class Service extends Component implements Link.Linkable { ? createManagedCapacityProvider( name, { - capacity: managed.capacity, transform: { + infrastructureRole: args.transform?.infrastructureRole, + instanceRole: args.transform?.instanceRole, capacityProvider: args.transform?.capacityProvider, instanceProfile: args.transform?.instanceProfile, }, @@ -1931,7 +1902,13 @@ export class Service extends Component implements Link.Linkable { ); let loadBalancer: lb.LoadBalancer | undefined; let targetGroups: ReturnType; - let targetEntries: Output<{ targetGroup: lb.TargetGroup; containerName: string; containerPort: number }[]>; + let targetEntries: Output< + { + targetGroup: lb.TargetGroup; + containerName: string; + containerPort: number; + }[] + >; let effectiveLbArn: Output | undefined; let effectiveDomain: Output; let effectiveDnsName: Output | undefined; @@ -1946,7 +1923,8 @@ export class Service extends Component implements Link.Linkable { } }, ); - const { targets: albTargets, entries: albEntries } = createAlbTargetsAndEntries(albAttachment); + const { targets: albTargets, entries: albEntries } = + createAlbTargetsAndEntries(albAttachment); targetGroups = output(albTargets); targetEntries = albEntries; createAlbListenerRules(albAttachment, albTargets); @@ -1963,7 +1941,8 @@ export class Service extends Component implements Link.Linkable { effectiveLbArn = loadBalancer.arn; effectiveDnsName = loadBalancer.dnsName; } - effectiveDomain = lbArgs?.domain?.apply((d) => d?.name) ?? output(undefined); + effectiveDomain = + lbArgs?.domain?.apply((d) => d?.name) ?? output(undefined); } const cloudmapService = createCloudmapService(); const service = createService(); @@ -1973,13 +1952,13 @@ export class Service extends Component implements Link.Linkable { this.cloudmapService = cloudmapService; this.executionRole = executionRole; this.taskDefinition = taskDefinition; - this.loadBalancer = loadBalancer ?? albAttachment?.instance.nodes.loadBalancer; + this.loadBalancer = + loadBalancer ?? albAttachment?.instance.nodes.loadBalancer; this.autoScalingTarget = autoScalingTarget; this.domain = effectiveDomain; this._url = effectiveDnsName - ? all([effectiveDomain, effectiveDnsName]).apply( - ([domain, dnsName]) => - domain ? `https://${domain}/` : `http://${dnsName}`, + ? all([effectiveDomain, effectiveDnsName]).apply(([domain, dnsName]) => + domain ? `https://${domain}/` : `http://${dnsName}`, ) : undefined; @@ -2021,7 +2000,9 @@ export class Service extends Component implements Link.Linkable { function normalizeScaling() { // External ALB is always "application" type - const lbType = albAttachment ? output("application" as const) : lbArgs?.type; + const lbType = albAttachment + ? output("application" as const) + : lbArgs?.type; return all([lbType, args.scaling]).apply(([type, v]) => { if (type !== "application" && v?.requestCount) throw new VisibleError( @@ -2034,64 +2015,51 @@ export class Service extends Component implements Link.Linkable { cpuUtilization: v?.cpuUtilization ?? 70, memoryUtilization: v?.memoryUtilization ?? 70, requestCount: v?.requestCount ?? false, - scaleInCooldown: v?.scaleInCooldown ? toSeconds(v.scaleInCooldown) : undefined, - scaleOutCooldown: v?.scaleOutCooldown ? toSeconds(v.scaleOutCooldown) : undefined, + scaleInCooldown: v?.scaleInCooldown + ? toSeconds(v.scaleInCooldown) + : undefined, + scaleOutCooldown: v?.scaleOutCooldown + ? toSeconds(v.scaleOutCooldown) + : undefined, }; }); } function normalizeCapacity() { + if (args.gpu && args.capacity) { + throw new VisibleError( + `Do not combine top-level "gpu" with "capacity" in the "${name}" Service. GPU services use ECS Managed Instances automatically.`, + ); + } if (!args.capacity) return; - return output(args.capacity).apply((v): ServiceFargateCapacity | undefined => { - if (isManagedCapacityInput(v)) return undefined; - if (v === "spot") - return { spot: { weight: 1 }, fargate: { weight: 0 } }; - const fargateCapacity = v as ServiceFargateCapacity; - return { - fargate: fargateCapacity.fargate, - spot: fargateCapacity.spot, - }; - }); + return output(args.capacity).apply( + (v): ServiceFargateCapacity | undefined => { + if (v === "spot") + return { spot: { weight: 1 }, fargate: { weight: 0 } }; + const fargateCapacity = v as ServiceFargateCapacity; + return { + fargate: fargateCapacity.fargate, + spot: fargateCapacity.spot, + }; + }, + ); } function normalizeManaged() { - if (!args.capacity) return; - - const managedCapacity = output(args.capacity).apply((v) => { - if (v === "spot" || !isManagedCapacityInput(v)) return; + if (!args.gpu) return; - if ("fargate" in v || "spot" in v) { - throw new VisibleError( - `Do not combine \"capacity.managed\" with \"capacity.fargate\" or \"capacity.spot\" in the \"${name}\" Service.`, - ); - } - - return v.managed; + const managedCapacity = output({ + gpu: args.gpu, + cpu: args.cpu, + memory: args.memory, + storage: args.storage, }); return { - capacity: managedCapacity.apply((v) => { - if (!v) - throw new VisibleError( - `Missing \"capacity.managed\" for the \"${name}\" Service.`, - ); - return v; - }), - normalized: managedCapacity - .apply((v) => { - if (!v) - throw new VisibleError( - `Missing \"capacity.managed\" for the \"${name}\" Service.`, - ); - return v; - }) - .apply((managed) => - normalizeManagedCapacity(name, managed, { - cpu: args.cpu, - memory: args.memory, - }), - ), + normalized: managedCapacity.apply((managed) => + normalizeManagedCapacity(name, managed), + ), }; } @@ -2374,7 +2342,11 @@ export class Service extends Component implements Link.Linkable { const seen = new Set(); for (const rule of rules) { if (rule.type !== "forward") continue; - const targetId = targetKey(rule.container!, rule.forwardProtocol, rule.forwardPort); + const targetId = targetKey( + rule.container!, + rule.forwardProtocol, + rule.forwardPort, + ); if (seen.has(targetId)) continue; seen.add(targetId); entries.push({ @@ -2561,47 +2533,48 @@ export class Service extends Component implements Link.Linkable { capacityProviderStrategies: [ { capacityProvider: managedCapacityProvider!.name, + base: 1, weight: 1, }, ], } : capacity - ? { - // setting `forceNewDeployment` ensures that the service is not recreated - // when the capacity provider config changes. - forceNewDeployment: true, - capacityProviderStrategies: capacity.apply((v) => { - if (!v) - throw new VisibleError( - `Invalid Fargate capacity configuration for the \"${name}\" Service.`, - ); - return [ - ...(v.fargate - ? [ - { - capacityProvider: "FARGATE", - base: v.fargate?.base, - weight: v.fargate?.weight, - }, - ] - : []), - ...(v.spot - ? [ - { - capacityProvider: "FARGATE_SPOT", - base: v.spot?.base, - weight: v.spot?.weight, - }, - ] - : []), - ]; + ? { + // setting `forceNewDeployment` ensures that the service is not recreated + // when the capacity provider config changes. + forceNewDeployment: true, + capacityProviderStrategies: capacity.apply((v) => { + if (!v) + throw new VisibleError( + `Invalid Fargate capacity configuration for the \"${name}\" Service.`, + ); + return [ + ...(v.fargate + ? [ + { + capacityProvider: "FARGATE", + base: v.fargate?.base, + weight: v.fargate?.weight, + }, + ] + : []), + ...(v.spot + ? [ + { + capacityProvider: "FARGATE_SPOT", + base: v.spot?.base, + weight: v.spot?.weight, + }, + ] + : []), + ]; + }), + } + : // @deprecated do not use `launchType`, set `capacityProviderStrategies` + // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead + { + launchType: "FARGATE", }), - } - : // @deprecated do not use `launchType`, set `capacityProviderStrategies` - // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead - { - launchType: "FARGATE", - }), networkConfiguration: { // If the vpc is an SST vpc, services are automatically deployed to the public // subnets. So we need to assign a public IP for the service to be accessible. @@ -2651,55 +2624,64 @@ export class Service extends Component implements Link.Linkable { ), ); - all([scaling.cpuUtilization, scaling.scaleInCooldown, scaling.scaleOutCooldown]).apply( - ([cpuUtilization, scaleInCooldown, scaleOutCooldown]) => { - if (cpuUtilization === false) return; - new appautoscaling.Policy( - `${name}AutoScalingCpuPolicy`, - { - serviceNamespace: target.serviceNamespace, - scalableDimension: target.scalableDimension, - resourceId: target.resourceId, - policyType: "TargetTrackingScaling", - targetTrackingScalingPolicyConfiguration: { - predefinedMetricSpecification: { - predefinedMetricType: "ECSServiceAverageCPUUtilization", - }, - targetValue: cpuUtilization, - scaleInCooldown, - scaleOutCooldown, + all([ + scaling.cpuUtilization, + scaling.scaleInCooldown, + scaling.scaleOutCooldown, + ]).apply(([cpuUtilization, scaleInCooldown, scaleOutCooldown]) => { + if (cpuUtilization === false) return; + new appautoscaling.Policy( + `${name}AutoScalingCpuPolicy`, + { + serviceNamespace: target.serviceNamespace, + scalableDimension: target.scalableDimension, + resourceId: target.resourceId, + policyType: "TargetTrackingScaling", + targetTrackingScalingPolicyConfiguration: { + predefinedMetricSpecification: { + predefinedMetricType: "ECSServiceAverageCPUUtilization", }, + targetValue: cpuUtilization, + scaleInCooldown, + scaleOutCooldown, }, - { parent: self }, - ); - } - ); + }, + { parent: self }, + ); + }); - all([scaling.memoryUtilization, scaling.scaleInCooldown, scaling.scaleOutCooldown]).apply( - ([memoryUtilization, scaleInCooldown, scaleOutCooldown]) => { - if (memoryUtilization === false) return; - new appautoscaling.Policy( - `${name}AutoScalingMemoryPolicy`, - { - serviceNamespace: target.serviceNamespace, - scalableDimension: target.scalableDimension, - resourceId: target.resourceId, - policyType: "TargetTrackingScaling", - targetTrackingScalingPolicyConfiguration: { - predefinedMetricSpecification: { - predefinedMetricType: "ECSServiceAverageMemoryUtilization", - }, - targetValue: memoryUtilization, - scaleInCooldown, - scaleOutCooldown, + all([ + scaling.memoryUtilization, + scaling.scaleInCooldown, + scaling.scaleOutCooldown, + ]).apply(([memoryUtilization, scaleInCooldown, scaleOutCooldown]) => { + if (memoryUtilization === false) return; + new appautoscaling.Policy( + `${name}AutoScalingMemoryPolicy`, + { + serviceNamespace: target.serviceNamespace, + scalableDimension: target.scalableDimension, + resourceId: target.resourceId, + policyType: "TargetTrackingScaling", + targetTrackingScalingPolicyConfiguration: { + predefinedMetricSpecification: { + predefinedMetricType: "ECSServiceAverageMemoryUtilization", }, + targetValue: memoryUtilization, + scaleInCooldown, + scaleOutCooldown, }, - { parent: self }, - ); - } - ); + }, + { parent: self }, + ); + }); - all([scaling.requestCount, scaling.scaleInCooldown, scaling.scaleOutCooldown, targetGroups]).apply( + all([ + scaling.requestCount, + scaling.scaleInCooldown, + scaling.scaleOutCooldown, + targetGroups, + ]).apply( ([requestCount, scaleInCooldown, scaleOutCooldown, targetGroups]) => { if (requestCount === false) return; if (!targetGroups) return; @@ -2809,7 +2791,9 @@ export class Service extends Component implements Link.Linkable { const cn = rule.container ?? ctrs[0].name; if (!containerNames.has(cn)) { throw new VisibleError( - `Container "${cn}" in "loadBalancer.rules" does not match any container in Service "${name}". Available: ${[...containerNames].join(", ")}.`, + `Container "${cn}" in "loadBalancer.rules" does not match any container in Service "${name}". Available: ${[ + ...containerNames, + ].join(", ")}.`, ); } } @@ -2829,7 +2813,11 @@ export class Service extends Component implements Link.Linkable { const forwardProtocol = parts[1].toUpperCase(); // Use explicit container or component name for keying/naming const containerNameForKey = rule.container ?? name; - const tgtId = targetKey(containerNameForKey, forwardProtocol, forwardPort); + const tgtId = targetKey( + containerNameForKey, + forwardProtocol, + forwardPort, + ); if (!targets[tgtId]) { const healthKey = `${forwardPort}/${parts[1]}` as AlbPort; @@ -2902,8 +2890,7 @@ export class Service extends Component implements Link.Linkable { ); } - const seen = - prioritiesByListener.get(rule.listen) ?? new Set(); + const seen = prioritiesByListener.get(rule.listen) ?? new Set(); if (seen.has(rule.priority)) { throw new VisibleError( `Duplicate priority ${rule.priority} on listener "${rule.listen}" in Service "${name}".`, @@ -2930,7 +2917,11 @@ export class Service extends Component implements Link.Linkable { const forwardPort = parseInt(forwardParts[0]); const forwardProtocol = forwardParts[1].toUpperCase(); const containerNameForKey = rule.container ?? name; - const tgtId = targetKey(containerNameForKey, forwardProtocol, forwardPort); + const tgtId = targetKey( + containerNameForKey, + forwardProtocol, + forwardPort, + ); const targetGroup = albTargets[tgtId]; if (!targetGroup) { @@ -2939,13 +2930,17 @@ export class Service extends Component implements Link.Linkable { ); } - const listenerResource = - attachment.instance.getListener(listenerProtocol, listenerPort); + const listenerResource = attachment.instance.getListener( + listenerProtocol, + listenerPort, + ); new lb.ListenerRule( ...transform( args.transform?.listenerRule, - `${name}AlbRule${listenerProtocol.toUpperCase()}${listenerPort}P${rule.priority}`, + `${name}AlbRule${listenerProtocol.toUpperCase()}${listenerPort}P${ + rule.priority + }`, { listenerArn: listenerResource.arn, priority: rule.priority, @@ -3034,10 +3029,14 @@ export class Service extends Component implements Link.Linkable { throw new VisibleError( `Cannot access the AWS Cloud Map service name for the "${this._name}" Service. Cloud Map is not configured for the cluster.`, ); + if (!service) + throw new VisibleError( + `Cannot access the AWS Cloud Map service name for the "${this._name}" Service. The Cloud Map service is not available.`, + ); return this.dev ? interpolate`dev.${namespace}` - : interpolate`${service!.name}.${namespace}`; + : interpolate`${service.name}.${namespace}`; }, ); } @@ -3123,8 +3122,13 @@ export class Service extends Component implements Link.Linkable { return { properties: { url: this.dev ? this.devUrl : this._url, - service: output(this.cloudmapNamespace).apply((namespace) => - namespace ? this.service : undefined, + service: all([this.cloudmapNamespace, this.cloudmapService]).apply( + ([namespace, service]) => + namespace && service + ? this.dev + ? `dev.${namespace}` + : `${service.name}.${namespace}` + : undefined, ), }, }; diff --git a/platform/src/components/component.ts b/platform/src/components/component.ts index 1aa5fea935..fe5b68a233 100644 --- a/platform/src/components/component.ts +++ b/platform/src/components/component.ts @@ -238,6 +238,7 @@ export class Component extends ComponentResource { "aws:ec2/subnet:Subnet": ["tags", 255], "aws:ec2/vpc:Vpc": ["tags", 255], "aws:ec2/vpcEndpoint:VpcEndpoint": ["tags", 255], + "aws:ecs/capacityProvider:CapacityProvider": ["name", 255], "aws:ecs/cluster:Cluster": ["name", 255], "aws:elasticache/parameterGroup:ParameterGroup": [ "name", From c569145e2d742279bb719623801272c1d3018e84 Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Tue, 24 Mar 2026 10:48:05 -0400 Subject: [PATCH 3/9] feat: add instance profile and infra role inputs --- .../src/components/aws/managed-instances.ts | 85 ++++++++++++------- platform/src/components/aws/service.ts | 21 +++-- 2 files changed, 68 insertions(+), 38 deletions(-) diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts index 6cc4ead302..8d1ca7ff71 100644 --- a/platform/src/components/aws/managed-instances.ts +++ b/platform/src/components/aws/managed-instances.ts @@ -53,6 +53,8 @@ type ManagedServiceArgs = { cpu?: Input<`${number} vCPU`>; memory?: Input<`${number} GB`>; storage?: Input<`${number} GB`>; + infrastructureRole?: Input; + instanceProfile?: Input; }; type ManagedTaskDefinitionArgs = { @@ -72,9 +74,10 @@ type ManagedTaskDefinitionArgs = { }; type ManagedCapacityProviderArgs = { + infrastructureRole?: Input; + instanceProfile?: Input; transform?: { infrastructureRole?: Transform; - instanceRole?: Transform; capacityProvider?: Transform; instanceProfile?: Transform; }; @@ -111,8 +114,25 @@ export function normalizeManagedCapacity( name: string, args: ManagedServiceArgs, ) { - return all([args.gpu, args.cpu, args.memory, args.storage]).apply( - ([gpu, cpu, memory, storage]) => { + return all([ + args.gpu, + args.cpu, + args.memory, + args.storage, + args.infrastructureRole, + args.instanceProfile, + ]).apply(([gpu, cpu, memory, storage, infrastructureRole, instanceProfile]) => { + if (!infrastructureRole) { + throw new VisibleError( + `You must provide \"infrastructureRole\" for the \"${name}\" Service when \"gpu\" is set.`, + ); + } + if (!instanceProfile) { + throw new VisibleError( + `You must provide \"instanceProfile\" for the \"${name}\" Service when \"gpu\" is set.`, + ); + } + const hostCpu = normalizeHostCpu(cpu); const hostMemory = normalizeHostMemory(memory); const hostStorage = normalizeStorage(storage); @@ -125,8 +145,7 @@ export function normalizeManagedCapacity( hostStorage, gpu: normalizeGpu(gpu), } satisfies NormalizedManagedCapacity; - }, - ); + }); function normalizeHostCpu(cpu?: `${number} vCPU`) { if (cpu) { @@ -207,30 +226,33 @@ export function createManagedCapacityProvider( ) { const partition = getPartitionOutput({}, opts).partition; - const infrastructureRole = new iam.Role( - ...transform( - args.transform?.infrastructureRole, - `${name}ManagedInfrastructureRole`, - { - assumeRolePolicy: iam.assumeRolePolicyForPrincipal({ - Service: "ecs.amazonaws.com", - }), - managedPolicyArns: [ - interpolate`arn:${partition}:iam::aws:policy/AmazonECSInfrastructureRolePolicyForManagedInstances`, - ], - }, - { parent }, - ), - ); - - const instanceProfileArn = getOrCreateManagedInstanceProfile( - name, - partition, - args.transform?.instanceRole, - args.transform?.instanceProfile, - parent, - opts, - ).arn; + const infrastructureRoleArn = args.infrastructureRole + ? output(args.infrastructureRole) + : new iam.Role( + ...transform( + args.transform?.infrastructureRole, + `${name}ManagedInfrastructureRole`, + { + assumeRolePolicy: iam.assumeRolePolicyForPrincipal({ + Service: "ecs.amazonaws.com", + }), + managedPolicyArns: [ + interpolate`arn:${partition}:iam::aws:policy/AmazonECSInfrastructureRolePolicyForManagedInstances`, + ], + }, + { parent }, + ), + ).arn; + + const instanceProfileArn = args.instanceProfile + ? output(args.instanceProfile) + : getOrCreateManagedInstanceProfile( + name, + partition, + args.transform?.instanceProfile, + parent, + opts, + ).arn; return new ecs.CapacityProvider( ...transform( @@ -240,7 +262,7 @@ export function createManagedCapacityProvider( cluster: clusterName, managedInstancesProvider: all([ normalized, - infrastructureRole.arn, + infrastructureRoleArn, instanceProfileArn, vpc.containerSubnets, vpc.securityGroups, @@ -315,7 +337,6 @@ let defaultManagedInstanceProfile: iam.InstanceProfile | undefined; function getOrCreateManagedInstanceProfile( name: string, partition: Output, - roleTransform: Transform | undefined, profileTransform: Transform | undefined, parent: Component, opts: ComponentResourceOptions, @@ -328,7 +349,7 @@ function getOrCreateManagedInstanceProfile( const role = new iam.Role( ...transform( - roleTransform, + undefined, `${name}ManagedInstancesEcsInstanceRole`, { name: "ecsInstanceRole", diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index 2f47a727dc..d3a825dfc3 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -1237,11 +1237,21 @@ export interface ServiceArgs extends FargateBaseArgs { * { * gpu: "nvidia/t4", * cpu: "4 vCPU", - * memory: "16 GB" + * memory: "16 GB", + * infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra", + * instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed" * } * ``` */ gpu?: Input; + /** + * The ARN of an existing ECS infrastructure role to use for managed instances. + */ + infrastructureRole?: Input; + /** + * The ARN of an existing EC2 instance profile to use for managed instances. + */ + instanceProfile?: Input; /** * Configure the capacity provider; regular Fargate or Fargate Spot, for this service. * @@ -1563,10 +1573,6 @@ export interface ServiceArgs extends FargateBaseArgs { * Transform the IAM infrastructure role resource created for managed instances. */ infrastructureRole?: Transform; - /** - * Transform the IAM instance role resource created for managed instances. - */ - instanceRole?: Transform; /** * Transform the ECS managed instances capacity provider resource. */ @@ -1858,9 +1864,10 @@ export class Service extends Component implements Link.Linkable { ? createManagedCapacityProvider( name, { + infrastructureRole: args.infrastructureRole, + instanceProfile: args.instanceProfile, transform: { infrastructureRole: args.transform?.infrastructureRole, - instanceRole: args.transform?.instanceRole, capacityProvider: args.transform?.capacityProvider, instanceProfile: args.transform?.instanceProfile, }, @@ -2054,6 +2061,8 @@ export class Service extends Component implements Link.Linkable { cpu: args.cpu, memory: args.memory, storage: args.storage, + infrastructureRole: args.infrastructureRole, + instanceProfile: args.instanceProfile, }); return { From 1697faccd62b1c56da7360e1e57b00c43a727700 Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Tue, 24 Mar 2026 12:35:54 -0400 Subject: [PATCH 4/9] fix: typecompletion for gpus --- .../src/components/aws/managed-instances.ts | 52 +++++++++++++++++-- platform/src/components/aws/service.ts | 5 +- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts index 8d1ca7ff71..0ee9d5b6fd 100644 --- a/platform/src/components/aws/managed-instances.ts +++ b/platform/src/components/aws/managed-instances.ts @@ -29,17 +29,44 @@ import { bootstrap } from "./helpers/bootstrap.js"; import { imageBuilder } from "./helpers/container-builder.js"; import { normalizeContainers } from "./fargate.js"; -export const managedGpuManufacturers = ["nvidia"] as const; +export const managedGpuManufacturers = [ + "amazon-web-services", + "amd", + "nvidia", + "xilinx", + "habana", +] as const; export const ManagedGpuAcceleratorName = { A100: "a100", A10G: "a10g", H100: "h100", + INFERENTIA: "inferentia", K520: "k520", K80: "k80", M60: "m60", + RADEON_PRO_V520: "radeon-pro-v520", T4: "t4", T4G: "t4g", V100: "v100", + VU9P: "vu9p", +} as const; + +const managedGpuManufacturerNames = { + "amazon-web-services": [ManagedGpuAcceleratorName.INFERENTIA], + amd: [ManagedGpuAcceleratorName.RADEON_PRO_V520], + nvidia: [ + ManagedGpuAcceleratorName.A100, + ManagedGpuAcceleratorName.A10G, + ManagedGpuAcceleratorName.H100, + ManagedGpuAcceleratorName.K520, + ManagedGpuAcceleratorName.K80, + ManagedGpuAcceleratorName.M60, + ManagedGpuAcceleratorName.T4, + ManagedGpuAcceleratorName.T4G, + ManagedGpuAcceleratorName.V100, + ], + xilinx: [ManagedGpuAcceleratorName.VU9P], + habana: [], } as const; export type ManagedGpuAcceleratorName = @@ -183,11 +210,14 @@ export function normalizeManagedCapacity( return { count: { min: 1, max: 1 }, manufacturer, - names: normalizeGpuNames(name), + names: normalizeGpuNames(manufacturer, name), }; } - function normalizeGpuNames(name: ManagedGpuAcceleratorName) { + function normalizeGpuNames( + manufacturer: (typeof managedGpuManufacturers)[number], + name: ManagedGpuAcceleratorName, + ) { const names = [name]; const supported = Object.values(ManagedGpuAcceleratorName); const invalid = names.filter((name) => !supported.includes(name)); @@ -195,11 +225,25 @@ export function normalizeManagedCapacity( throw new VisibleError( `Unsupported GPU accelerator name ${invalid .map((name) => `"${name}"`) - .join(", ")}. The supported NVIDIA values are ${supported + .join(", ")}. The supported values are ${supported .map((name) => `"${name}"`) .join(", ")}.`, ); } + + const supportedForManufacturer = managedGpuManufacturerNames[ + manufacturer + ] as readonly ManagedGpuAcceleratorName[]; + if (!supportedForManufacturer.includes(name)) { + const validNames = supportedForManufacturer + .map((name) => `"${name}"`) + .join(", "); + throw new VisibleError( + supportedForManufacturer.length > 0 + ? `Unsupported GPU accelerator \"${manufacturer}/${name}\". The supported values for \"${manufacturer}\" are ${validNames}.` + : `Unsupported GPU accelerator \"${manufacturer}/${name}\". No accelerator names are currently supported for \"${manufacturer}\".`, + ); + } return names; } diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index d3a825dfc3..7dc9348dc4 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -1242,7 +1242,10 @@ export interface ServiceArgs extends FargateBaseArgs { * instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed" * } * ``` - */ + * + * The GPU value must be in the form `/`. Valid manufacturers are + * `amazon-web-services`, `amd`, `nvidia`, `xilinx`, and `habana`. + */ gpu?: Input; /** * The ARN of an existing ECS infrastructure role to use for managed instances. From b2c36899f60a947974ccf74b41dd58d3fdbb328a Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Tue, 24 Mar 2026 12:52:26 -0400 Subject: [PATCH 5/9] fix: whitespaces in service --- platform/src/components/aws/service.ts | 862 ++++++++++++------------- 1 file changed, 431 insertions(+), 431 deletions(-) diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index 7dc9348dc4..6f646664d2 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -656,437 +656,437 @@ export interface ServiceArgs extends FargateBaseArgs { */ loadBalancer?: Input< | { - /** - * Configure if the load balancer should be public or private. - * - * When set to `false`, the load balancer endpoint will only be accessible within the - * VPC. - * - * @default `true` - */ - public?: Input; - /** - * Set a custom domain for your load balancer endpoint. - * - * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other - * providers, you'll need to pass in a `cert` that validates domain ownership and add the - * DNS records. - * - * :::tip - * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other - * providers. - * ::: - * - * @example - * - * By default this assumes the domain is hosted on Route 53. - * - * ```js - * { - * domain: "example.com" - * } - * ``` - * - * For domains hosted on Cloudflare. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.cloudflare.dns() - * } - * } - * ``` - */ - domain?: Input< - | string - | { - /** - * The custom domain you want to use. - * - * @example - * ```js - * { - * domain: { - * name: "example.com" - * } - * } - * ``` - * - * Can also include subdomains based on the current stage. - * - * ```js - * { - * domain: { - * name: `${$app.stage}.example.com` - * } - * } - * ``` - * - * Wildcard domains are supported. - * - * ```js - * { - * domain: { - * name: "*.example.com" - * } - * } - * ``` - */ - name: Input; - /** - * Alias domains that should be used. - * - * @example - * ```js {4} - * { - * domain: { - * name: "app1.example.com", - * aliases: ["app2.example.com"] - * } - * } - * ``` - */ - aliases?: Input; - /** - * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the - * domain. By default, a certificate is created and validated automatically. - * - * :::tip - * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers. - * ::: - * - * To manually set up a domain on an unsupported provider, you'll need to: - * - * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner. - * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`. - * 3. Add the DNS records in your provider to point to the load balancer endpoint. - * - * @example - * ```js - * { - * domain: { - * name: "example.com", - * dns: false, - * cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63" - * } - * } - * ``` - */ - cert?: Input; - /** - * The DNS provider to use for the domain. Defaults to the AWS. - * - * Takes an adapter that can create the DNS records on the provider. This can automate - * validating the domain and setting up the DNS routing. - * - * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need - * to set `dns` to `false` and pass in a certificate validating ownership via `cert`. - * - * @default `sst.aws.dns` - * - * @example - * - * Specify the hosted zone ID for the Route 53 domain. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.aws.dns({ - * zone: "Z2FDTNDATAQYW2" - * }) - * } - * } - * ``` - * - * Use a domain hosted on Cloudflare, needs the Cloudflare provider. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.cloudflare.dns() - * } - * } - * ``` - * - * Use a domain hosted on Vercel, needs the Vercel provider. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.vercel.dns() - * } - * } - * ``` - */ - dns?: Input; - } - >; - /** @deprecated Use `rules` instead. */ - ports?: Input[]>; - /** - * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to - * the service. - * This supports two types of protocols: - * - * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html). - * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html). - * - * :::note - * If you want to listen on `https` or `tls`, you need to specify a custom - * `loadBalancer.domain`. - * ::: - * - * You **can not configure** both application and network layer protocols for the same - * service. - * - * @example - * Here we are listening on port `80` and forwarding it to the service on port `8080`. - * ```js - * { - * rules: [ - * { listen: "80/http", forward: "8080/http" } - * ] - * } - * ``` - * - * The `forward` port and protocol defaults to the `listen` port and protocol. So in this - * case both are `80/http`. - * - * ```js - * { - * rules: [ - * { listen: "80/http" } - * ] - * } - * ``` - * - * If multiple containers are configured via the `containers` argument, you need to - * specify which container the traffic should be forwarded to. - * - * ```js - * { - * rules: [ - * { listen: "80/http", container: "app" }, - * { listen: "8000/http", container: "admin" } - * ] - * } - * ``` - * - * You can also route the same port to multiple containers via path-based routing. - * - * ```js - * { - * rules: [ - * { - * listen: "80/http", - * container: "app", - * conditions: { path: "/api/*" } - * }, - * { - * listen: "80/http", - * container: "admin", - * conditions: { path: "/admin/*" } - * } - * ] - * } - * ``` - * - * Additionally, you can redirect traffic from one port to another. This is - * commonly used to redirect http to https. - * - * ```js - * { - * rules: [ - * { listen: "80/http", redirect: "443/https" }, - * { listen: "443/https", forward: "80/http" } - * ] - * } - * ``` - */ - rules?: Input[]>; - /** - * Configure the health check that the load balancer runs on your containers. - * - * :::tip - * This health check is different from the [`health`](#health) check. - * ::: - * - * This health check is run by the load balancer. While, `health` is run by ECS. This - * cannot be disabled if you are using a load balancer. While the other is off by default. - * - * Since this cannot be disabled, here are some tips on how to debug an unhealthy - * health check. - * - *
- * How to debug a load balancer health check - * - * If you notice a `Unhealthy: Health checks failed` error, it's because the health - * check has failed. When it fails, the load balancer will terminate the containers, - * causing any requests to fail. - * - * Here's how to debug it: - * - * 1. Verify the health check path. - * - * By default, the load balancer checks the `/` path. Ensure it's accessible in your - * containers. If your application runs on a different path, then update the path in - * the health check config accordingly. - * - * 2. Confirm the containers are operational. - * - * Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** > - * choose **Any desired status** under the **Filter desired status** dropdown > select - * a task and check for errors under the **Logs tab**. If it has error that means that - * the container failed to start. - * - * 3. If the container was terminated by the load balancer while still starting up, try - * increasing the health check interval and timeout. - *
- * - * For `http` and `https` the default is: - * - * ```js - * { - * path: "/", - * healthyThreshold: 5, - * successCodes: "200", - * timeout: "5 seconds", - * unhealthyThreshold: 2, - * interval: "30 seconds" - * } - * ``` - * - * For `tcp` and `udp` the default is: - * - * ```js - * { - * healthyThreshold: 5, - * timeout: "6 seconds", - * unhealthyThreshold: 2, - * interval: "30 seconds" - * } - * ``` - * - * @example - * - * To configure the health check, we use the _port/protocol_ format. Here we are - * configuring a health check that pings the `/health` path on port `8080` - * every 10 seconds. - * - * ```js - * { - * rules: [ - * { listen: "80/http", forward: "8080/http" } - * ], - * health: { - * "8080/http": { - * path: "/health", - * interval: "10 seconds" - * } - * } - * } - * ``` - * - */ - health?: Input< - Record< - Port, - Input<{ - /** - * The URL path to ping on the service for health checks. Only applicable to - * `http` and `https` protocols. - * @default `"/"` - */ - path?: Input; - /** - * The time period between each health check request. Must be between `5 seconds` - * and `300 seconds`. - * @default `"30 seconds"` - */ - interval?: Input; - /** - * The timeout for each health check request. If no response is received within this - * time, it is considered failed. Must be between `2 seconds` and `120 seconds`. - * @default `"5 seconds"` - */ - timeout?: Input; - /** - * The number of consecutive successful health check requests required to consider the - * target healthy. Must be between 2 and 10. - * @default `5` - */ - healthyThreshold?: Input; - /** - * The number of consecutive failed health check requests required to consider the - * target unhealthy. Must be between 2 and 10. - * @default `2` - */ - unhealthyThreshold?: Input; - /** - * One or more HTTP response codes the health check treats as successful. Only - * applicable to `http` and `https` protocols. - * - * @default `"200"` - * @example - * ```js - * { - * successCodes: "200-299" - * } - * ``` - */ - successCodes?: Input; - }> - > - >; - } + /** + * Configure if the load balancer should be public or private. + * + * When set to `false`, the load balancer endpoint will only be accessible within the + * VPC. + * + * @default `true` + */ + public?: Input; + /** + * Set a custom domain for your load balancer endpoint. + * + * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other + * providers, you'll need to pass in a `cert` that validates domain ownership and add the + * DNS records. + * + * :::tip + * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other + * providers. + * ::: + * + * @example + * + * By default this assumes the domain is hosted on Route 53. + * + * ```js + * { + * domain: "example.com" + * } + * ``` + * + * For domains hosted on Cloudflare. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.cloudflare.dns() + * } + * } + * ``` + */ + domain?: Input< + | string + | { + /** + * The custom domain you want to use. + * + * @example + * ```js + * { + * domain: { + * name: "example.com" + * } + * } + * ``` + * + * Can also include subdomains based on the current stage. + * + * ```js + * { + * domain: { + * name: `${$app.stage}.example.com` + * } + * } + * ``` + * + * Wildcard domains are supported. + * + * ```js + * { + * domain: { + * name: "*.example.com" + * } + * } + * ``` + */ + name: Input; + /** + * Alias domains that should be used. + * + * @example + * ```js {4} + * { + * domain: { + * name: "app1.example.com", + * aliases: ["app2.example.com"] + * } + * } + * ``` + */ + aliases?: Input; + /** + * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the + * domain. By default, a certificate is created and validated automatically. + * + * :::tip + * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers. + * ::: + * + * To manually set up a domain on an unsupported provider, you'll need to: + * + * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner. + * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`. + * 3. Add the DNS records in your provider to point to the load balancer endpoint. + * + * @example + * ```js + * { + * domain: { + * name: "example.com", + * dns: false, + * cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63" + * } + * } + * ``` + */ + cert?: Input; + /** + * The DNS provider to use for the domain. Defaults to the AWS. + * + * Takes an adapter that can create the DNS records on the provider. This can automate + * validating the domain and setting up the DNS routing. + * + * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need + * to set `dns` to `false` and pass in a certificate validating ownership via `cert`. + * + * @default `sst.aws.dns` + * + * @example + * + * Specify the hosted zone ID for the Route 53 domain. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.aws.dns({ + * zone: "Z2FDTNDATAQYW2" + * }) + * } + * } + * ``` + * + * Use a domain hosted on Cloudflare, needs the Cloudflare provider. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.cloudflare.dns() + * } + * } + * ``` + * + * Use a domain hosted on Vercel, needs the Vercel provider. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.vercel.dns() + * } + * } + * ``` + */ + dns?: Input; + } + >; + /** @deprecated Use `rules` instead. */ + ports?: Input[]>; + /** + * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to + * the service. + * This supports two types of protocols: + * + * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html). + * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html). + * + * :::note + * If you want to listen on `https` or `tls`, you need to specify a custom + * `loadBalancer.domain`. + * ::: + * + * You **can not configure** both application and network layer protocols for the same + * service. + * + * @example + * Here we are listening on port `80` and forwarding it to the service on port `8080`. + * ```js + * { + * rules: [ + * { listen: "80/http", forward: "8080/http" } + * ] + * } + * ``` + * + * The `forward` port and protocol defaults to the `listen` port and protocol. So in this + * case both are `80/http`. + * + * ```js + * { + * rules: [ + * { listen: "80/http" } + * ] + * } + * ``` + * + * If multiple containers are configured via the `containers` argument, you need to + * specify which container the traffic should be forwarded to. + * + * ```js + * { + * rules: [ + * { listen: "80/http", container: "app" }, + * { listen: "8000/http", container: "admin" } + * ] + * } + * ``` + * + * You can also route the same port to multiple containers via path-based routing. + * + * ```js + * { + * rules: [ + * { + * listen: "80/http", + * container: "app", + * conditions: { path: "/api/*" } + * }, + * { + * listen: "80/http", + * container: "admin", + * conditions: { path: "/admin/*" } + * } + * ] + * } + * ``` + * + * Additionally, you can redirect traffic from one port to another. This is + * commonly used to redirect http to https. + * + * ```js + * { + * rules: [ + * { listen: "80/http", redirect: "443/https" }, + * { listen: "443/https", forward: "80/http" } + * ] + * } + * ``` + */ + rules?: Input[]>; + /** + * Configure the health check that the load balancer runs on your containers. + * + * :::tip + * This health check is different from the [`health`](#health) check. + * ::: + * + * This health check is run by the load balancer. While, `health` is run by ECS. This + * cannot be disabled if you are using a load balancer. While the other is off by default. + * + * Since this cannot be disabled, here are some tips on how to debug an unhealthy + * health check. + * + *
+ * How to debug a load balancer health check + * + * If you notice a `Unhealthy: Health checks failed` error, it's because the health + * check has failed. When it fails, the load balancer will terminate the containers, + * causing any requests to fail. + * + * Here's how to debug it: + * + * 1. Verify the health check path. + * + * By default, the load balancer checks the `/` path. Ensure it's accessible in your + * containers. If your application runs on a different path, then update the path in + * the health check config accordingly. + * + * 2. Confirm the containers are operational. + * + * Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** > + * choose **Any desired status** under the **Filter desired status** dropdown > select + * a task and check for errors under the **Logs tab**. If it has error that means that + * the container failed to start. + * + * 3. If the container was terminated by the load balancer while still starting up, try + * increasing the health check interval and timeout. + *
+ * + * For `http` and `https` the default is: + * + * ```js + * { + * path: "/", + * healthyThreshold: 5, + * successCodes: "200", + * timeout: "5 seconds", + * unhealthyThreshold: 2, + * interval: "30 seconds" + * } + * ``` + * + * For `tcp` and `udp` the default is: + * + * ```js + * { + * healthyThreshold: 5, + * timeout: "6 seconds", + * unhealthyThreshold: 2, + * interval: "30 seconds" + * } + * ``` + * + * @example + * + * To configure the health check, we use the _port/protocol_ format. Here we are + * configuring a health check that pings the `/health` path on port `8080` + * every 10 seconds. + * + * ```js + * { + * rules: [ + * { listen: "80/http", forward: "8080/http" } + * ], + * health: { + * "8080/http": { + * path: "/health", + * interval: "10 seconds" + * } + * } + * } + * ``` + * + */ + health?: Input< + Record< + Port, + Input<{ + /** + * The URL path to ping on the service for health checks. Only applicable to + * `http` and `https` protocols. + * @default `"/"` + */ + path?: Input; + /** + * The time period between each health check request. Must be between `5 seconds` + * and `300 seconds`. + * @default `"30 seconds"` + */ + interval?: Input; + /** + * The timeout for each health check request. If no response is received within this + * time, it is considered failed. Must be between `2 seconds` and `120 seconds`. + * @default `"5 seconds"` + */ + timeout?: Input; + /** + * The number of consecutive successful health check requests required to consider the + * target healthy. Must be between 2 and 10. + * @default `5` + */ + healthyThreshold?: Input; + /** + * The number of consecutive failed health check requests required to consider the + * target unhealthy. Must be between 2 and 10. + * @default `2` + */ + unhealthyThreshold?: Input; + /** + * One or more HTTP response codes the health check treats as successful. Only + * applicable to `http` and `https` protocols. + * + * @default `"200"` + * @example + * ```js + * { + * successCodes: "200-299" + * } + * ``` + */ + successCodes?: Input; + }> + > + >; + } | { - /** - * The `Alb` instance to attach this service to. When provided, the service creates - * target groups and listener rules on the shared ALB instead of creating its own - * load balancer. - * - * ECS tasks use the VPC's default security group, which allows all traffic within the - * VPC CIDR. For tighter security, add an explicit security group ingress rule from the - * ALB's security group using `transform`. - * - * @example - * ```js - * { - * loadBalancer: { - * instance: alb, - * rules: [ - * { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 } - * ] - * } - * } - * ``` - */ - instance: Alb; - /** - * The rules for routing traffic from the ALB to this service's containers. - * Each rule must have explicit conditions and priority. - */ - rules: Prettify[]; - /** - * Configure health checks for the target groups. Uses the same format as the inline - * health check config, keyed by `{port}/{protocol}`. - */ - health?: Record< - AlbPort, - Input<{ - path?: Input; - interval?: Input; - timeout?: Input; - healthyThreshold?: Input; - unhealthyThreshold?: Input; - successCodes?: Input; - }> - >; - } + /** + * The `Alb` instance to attach this service to. When provided, the service creates + * target groups and listener rules on the shared ALB instead of creating its own + * load balancer. + * + * ECS tasks use the VPC's default security group, which allows all traffic within the + * VPC CIDR. For tighter security, add an explicit security group ingress rule from the + * ALB's security group using `transform`. + * + * @example + * ```js + * { + * loadBalancer: { + * instance: alb, + * rules: [ + * { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 } + * ] + * } + * } + * ``` + */ + instance: Alb; + /** + * The rules for routing traffic from the ALB to this service's containers. + * Each rule must have explicit conditions and priority. + */ + rules: Prettify[]; + /** + * Configure health checks for the target groups. Uses the same format as the inline + * health check config, keyed by `{port}/{protocol}`. + */ + health?: Record< + AlbPort, + Input<{ + path?: Input; + interval?: Input; + timeout?: Input; + healthyThreshold?: Input; + unhealthyThreshold?: Input; + successCodes?: Input; + }> + >; + } >; /** * Configure the CloudMap service registry for the service. @@ -1245,7 +1245,7 @@ export interface ServiceArgs extends FargateBaseArgs { * * The GPU value must be in the form `/`. Valid manufacturers are * `amazon-web-services`, `amd`, `nvidia`, `xilinx`, and `habana`. - */ + */ gpu?: Input; /** * The ARN of an existing ECS infrastructure role to use for managed instances. From 1b14ca622cc118e45d0a0a1981239894ac0c992c Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Tue, 24 Mar 2026 12:58:45 -0400 Subject: [PATCH 6/9] fix: remove intersection types to fix docs --- platform/src/components/aws/service.ts | 27 ++++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index 6f646664d2..ea16f8d72c 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -341,17 +341,6 @@ interface ServiceContainerArgs extends FargateContainerArgs { }; } -type ServiceFargateCapacity = { - fargate?: { - base?: Input; - weight: Input; - }; - spot?: { - base?: Input; - weight: Input; - }; -}; - export interface ServiceArgs extends FargateBaseArgs { /** * Configure how this component works in `sst dev`. @@ -1361,7 +1350,7 @@ export interface ServiceArgs extends FargateBaseArgs { */ capacity?: Input< | "spot" - | (ServiceFargateCapacity & { + | { /** * Configure how the regular Fargate capacity is allocated. */ @@ -1397,7 +1386,7 @@ export interface ServiceArgs extends FargateBaseArgs { weight: Input; }>; managed?: never; - }) + } >; /** * Configure the health check that ECS runs on your containers. @@ -2044,10 +2033,18 @@ export class Service extends Component implements Link.Linkable { if (!args.capacity) return; return output(args.capacity).apply( - (v): ServiceFargateCapacity | undefined => { + (v): + | { + fargate?: { base?: Input; weight: Input }; + spot?: { base?: Input; weight: Input }; + } + | undefined => { if (v === "spot") return { spot: { weight: 1 }, fargate: { weight: 0 } }; - const fargateCapacity = v as ServiceFargateCapacity; + const fargateCapacity = v as { + fargate?: { base?: Input; weight: Input }; + spot?: { base?: Input; weight: Input }; + }; return { fargate: fargateCapacity.fargate, spot: fargateCapacity.spot, From d36a5c6c9706cd226f0abf5cbde4ea9d955d1112 Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Tue, 31 Mar 2026 12:11:34 -0400 Subject: [PATCH 7/9] feat(ecs): enhance GPU management and API integration --- examples/aws-ecs-gpus/app.py | 17 ++++++++++- examples/aws-ecs-gpus/sst.config.ts | 16 ++++++++--- .../src/components/aws/managed-instances.ts | 28 ++++--------------- platform/src/components/aws/service.ts | 13 +++++++-- 4 files changed, 44 insertions(+), 30 deletions(-) diff --git a/examples/aws-ecs-gpus/app.py b/examples/aws-ecs-gpus/app.py index 152d62ef81..eef93dfd30 100644 --- a/examples/aws-ecs-gpus/app.py +++ b/examples/aws-ecs-gpus/app.py @@ -1,8 +1,17 @@ from http.server import BaseHTTPRequestHandler, HTTPServer +import glob import json import os +def read_file(path): + try: + with open(path, "r", encoding="utf-8") as f: + return f.read().strip() + except FileNotFoundError: + return None + + class Handler(BaseHTTPRequestHandler): def do_GET(self): if self.path == "/health": @@ -12,7 +21,13 @@ def do_GET(self): self.respond( { "message": "hello from ecs managed instances", - "gpu": os.getenv("NVIDIA_VISIBLE_DEVICES", "unknown"), + "gpu": { + "visibleDevicesEnv": os.getenv("NVIDIA_VISIBLE_DEVICES"), + "deviceFiles": sorted(glob.glob("/dev/nvidia*")), + "procGpus": sorted(glob.glob("/proc/driver/nvidia/gpus/*")), + "driverVersion": read_file("/proc/driver/nvidia/version"), + "cudaVersion": os.getenv("CUDA_VERSION"), + }, } ) diff --git a/examples/aws-ecs-gpus/sst.config.ts b/examples/aws-ecs-gpus/sst.config.ts index 4c67e9c0dc..6bd7f5420e 100644 --- a/examples/aws-ecs-gpus/sst.config.ts +++ b/examples/aws-ecs-gpus/sst.config.ts @@ -6,6 +6,9 @@ * A minimal ECS service running on ECS Managed Instances with a GPU-enabled host. * The service uses top-level `gpu`, `cpu`, `memory`, and `storage` settings, while * the managed instances IAM resources remain customizable through `transform`. + * + * A private API Gateway HTTP API is used to test the service without exposing a public + * load balancer. */ export default $config({ app(input) { @@ -19,19 +22,24 @@ export default $config({ const vpc = new sst.aws.Vpc("MyVpc"); const cluster = new sst.aws.Cluster("MyCluster", { vpc }); + // Provisions g4dn.xlarge const service = new sst.aws.Service("MyService", { cluster, image: { context: "./" }, gpu: "nvidia/t4", cpu: "4 vCPU", - memory: "16 GB", - loadBalancer: { - ports: [{ listen: "80/http", forward: "8000/http" }], + memory: "10 GB", + serviceRegistry: { + port: 8000, }, }); + const api = new sst.aws.ApiGatewayV2("MyApi", { vpc }); + api.routePrivate("$default", service.nodes.cloudmapService.arn); + return { - url: service.url, + service: service.service, + api: api.url, }; }, }); diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts index 0ee9d5b6fd..c9c01cc55a 100644 --- a/platform/src/components/aws/managed-instances.ts +++ b/platform/src/components/aws/managed-instances.ts @@ -141,25 +141,8 @@ export function normalizeManagedCapacity( name: string, args: ManagedServiceArgs, ) { - return all([ - args.gpu, - args.cpu, - args.memory, - args.storage, - args.infrastructureRole, - args.instanceProfile, - ]).apply(([gpu, cpu, memory, storage, infrastructureRole, instanceProfile]) => { - if (!infrastructureRole) { - throw new VisibleError( - `You must provide \"infrastructureRole\" for the \"${name}\" Service when \"gpu\" is set.`, - ); - } - if (!instanceProfile) { - throw new VisibleError( - `You must provide \"instanceProfile\" for the \"${name}\" Service when \"gpu\" is set.`, - ); - } - + return all([args.gpu, args.cpu, args.memory, args.storage]).apply( + ([gpu, cpu, memory, storage]) => { const hostCpu = normalizeHostCpu(cpu); const hostMemory = normalizeHostMemory(memory); const hostStorage = normalizeStorage(storage); @@ -172,12 +155,13 @@ export function normalizeManagedCapacity( hostStorage, gpu: normalizeGpu(gpu), } satisfies NormalizedManagedCapacity; - }); + }, + ); function normalizeHostCpu(cpu?: `${number} vCPU`) { if (cpu) { const min = parseFloat(cpu.split(" ")[0]); - return { min, max: min }; + return { min }; } throw new VisibleError( `You must provide top-level \"cpu\" for the \"${name}\" Service when \"gpu\" is set.`, @@ -187,7 +171,7 @@ export function normalizeManagedCapacity( function normalizeHostMemory(memory?: `${number} GB`) { if (memory) { const min = toMBs(memory); - return { min, max: min }; + return { min }; } throw new VisibleError( `You must provide top-level \"memory\" for the \"${name}\" Service when \"gpu\" is set.`, diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index ea16f8d72c..6a8153e985 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -1226,12 +1226,14 @@ export interface ServiceArgs extends FargateBaseArgs { * { * gpu: "nvidia/t4", * cpu: "4 vCPU", - * memory: "16 GB", - * infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra", - * instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed" + * memory: "10 GB" * } * ``` * + * By default, SST creates the managed instances infrastructure role and instance profile for + * you. You can override them with `infrastructureRole`, `instanceProfile`, or the + * corresponding `transform` hooks. + * * The GPU value must be in the form `/`. Valid manufacturers are * `amazon-web-services`, `amd`, `nvidia`, `xilinx`, and `habana`. */ @@ -2538,6 +2540,9 @@ export class Service extends Component implements Link.Linkable { desiredCount: scaling.min, ...(managed ? { + // Managed capacity providers cannot be deleted while ECS still has + // tasks draining for the service. + forceDelete: true, forceNewDeployment: true, capacityProviderStrategies: [ { @@ -2618,6 +2623,8 @@ export class Service extends Component implements Link.Linkable { } function createAutoScaling() { + if (!args.scaling) return; + const target = new appautoscaling.Target( ...transform( args.transform?.autoScalingTarget, From 7e2fff47096f33ae68fd1867ee3639435c4f4134 Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Wed, 1 Apr 2026 08:49:16 -0400 Subject: [PATCH 8/9] refactor(service): remove forceDelete option from managed capacity --- platform/src/components/aws/service.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index 6a8153e985..223c6c3ea0 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -2540,9 +2540,6 @@ export class Service extends Component implements Link.Linkable { desiredCount: scaling.min, ...(managed ? { - // Managed capacity providers cannot be deleted while ECS still has - // tasks draining for the service. - forceDelete: true, forceNewDeployment: true, capacityProviderStrategies: [ { From 886d7e0c47d5649b6d05a050cb75fde733767d74 Mon Sep 17 00:00:00 2001 From: mkilp <8791079+mkilp@users.noreply.github.com> Date: Wed, 1 Apr 2026 16:34:34 -0400 Subject: [PATCH 9/9] feat: depend service on vpc to prevent route table teardown before service teardown --- examples/aws-ecs-gpus/Dockerfile | 11 +- examples/aws-ecs-gpus/app.py | 65 +- examples/aws-ecs-gpus/requirements.txt | 1 + .../src/components/aws/managed-instances.ts | 1 - platform/src/components/aws/service.ts | 1041 +++++++++-------- 5 files changed, 557 insertions(+), 562 deletions(-) create mode 100644 examples/aws-ecs-gpus/requirements.txt diff --git a/examples/aws-ecs-gpus/Dockerfile b/examples/aws-ecs-gpus/Dockerfile index b0a57dfb4c..f56431cd8b 100644 --- a/examples/aws-ecs-gpus/Dockerfile +++ b/examples/aws-ecs-gpus/Dockerfile @@ -1,12 +1,13 @@ FROM python:3.11-slim -WORKDIR /app +WORKDIR /code -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONUNBUFFERED=1 +COPY requirements.txt /code/requirements.txt -COPY app.py /app/app.py +RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt + +COPY app.py /code/app.py EXPOSE 8000 -ENTRYPOINT ["python", "/app/app.py"] +CMD ["fastapi", "run", "app.py", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/aws-ecs-gpus/app.py b/examples/aws-ecs-gpus/app.py index eef93dfd30..54897b297d 100644 --- a/examples/aws-ecs-gpus/app.py +++ b/examples/aws-ecs-gpus/app.py @@ -1,49 +1,34 @@ -from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path import glob -import json import os +from fastapi import FastAPI -def read_file(path): + +app = FastAPI() + + +def read_file(path: str): try: - with open(path, "r", encoding="utf-8") as f: - return f.read().strip() + return Path(path).read_text(encoding="utf-8").strip() except FileNotFoundError: return None -class Handler(BaseHTTPRequestHandler): - def do_GET(self): - if self.path == "/health": - self.respond({"ok": True}) - return - - self.respond( - { - "message": "hello from ecs managed instances", - "gpu": { - "visibleDevicesEnv": os.getenv("NVIDIA_VISIBLE_DEVICES"), - "deviceFiles": sorted(glob.glob("/dev/nvidia*")), - "procGpus": sorted(glob.glob("/proc/driver/nvidia/gpus/*")), - "driverVersion": read_file("/proc/driver/nvidia/version"), - "cudaVersion": os.getenv("CUDA_VERSION"), - }, - } - ) - - def log_message(self, format, *args): - return - - def respond(self, payload): - body = json.dumps(payload).encode("utf-8") - self.send_response(200) - self.send_header("Content-Type", "application/json") - self.send_header("Content-Length", str(len(body))) - self.end_headers() - self.wfile.write(body) - - -if __name__ == "__main__": - port = int(os.getenv("PORT", "8000")) - server = HTTPServer(("0.0.0.0", port), Handler) - server.serve_forever() +@app.get("/health") +def health(): + return {"ok": True} + + +@app.get("/") +def index(): + return { + "message": "hello from ecs managed instances", + "gpu": { + "visibleDevicesEnv": os.getenv("NVIDIA_VISIBLE_DEVICES"), + "deviceFiles": sorted(glob.glob("/dev/nvidia*")), + "procGpus": sorted(glob.glob("/proc/driver/nvidia/gpus/*")), + "driverVersion": read_file("/proc/driver/nvidia/version"), + "cudaVersion": os.getenv("CUDA_VERSION"), + }, + } diff --git a/examples/aws-ecs-gpus/requirements.txt b/examples/aws-ecs-gpus/requirements.txt new file mode 100644 index 0000000000..8b6ce6ac72 --- /dev/null +++ b/examples/aws-ecs-gpus/requirements.txt @@ -0,0 +1 @@ +fastapi[standard]>=0.115.0,<1.0.0 diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts index c9c01cc55a..0b9525e4c2 100644 --- a/platform/src/components/aws/managed-instances.ts +++ b/platform/src/components/aws/managed-instances.ts @@ -533,7 +533,6 @@ export function createManagedTaskDefinition( interval: toSeconds(container.health.interval ?? "30 seconds"), retries: container.health.retries ?? 3, }, - pseudoTerminal: true, portMappings: [{ containerPortRange: "1-65535" }], logConfiguration: { logDriver: "awslogs", diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts index 223c6c3ea0..76fc1d7ac6 100644 --- a/platform/src/components/aws/service.ts +++ b/platform/src/components/aws/service.ts @@ -645,437 +645,437 @@ export interface ServiceArgs extends FargateBaseArgs { */ loadBalancer?: Input< | { - /** - * Configure if the load balancer should be public or private. - * - * When set to `false`, the load balancer endpoint will only be accessible within the - * VPC. - * - * @default `true` - */ - public?: Input; - /** - * Set a custom domain for your load balancer endpoint. - * - * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other - * providers, you'll need to pass in a `cert` that validates domain ownership and add the - * DNS records. - * - * :::tip - * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other - * providers. - * ::: - * - * @example - * - * By default this assumes the domain is hosted on Route 53. - * - * ```js - * { - * domain: "example.com" - * } - * ``` - * - * For domains hosted on Cloudflare. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.cloudflare.dns() - * } - * } - * ``` - */ - domain?: Input< - | string - | { - /** - * The custom domain you want to use. - * - * @example - * ```js - * { - * domain: { - * name: "example.com" - * } - * } - * ``` - * - * Can also include subdomains based on the current stage. - * - * ```js - * { - * domain: { - * name: `${$app.stage}.example.com` - * } - * } - * ``` - * - * Wildcard domains are supported. - * - * ```js - * { - * domain: { - * name: "*.example.com" - * } - * } - * ``` - */ - name: Input; - /** - * Alias domains that should be used. - * - * @example - * ```js {4} - * { - * domain: { - * name: "app1.example.com", - * aliases: ["app2.example.com"] - * } - * } - * ``` - */ - aliases?: Input; - /** - * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the - * domain. By default, a certificate is created and validated automatically. - * - * :::tip - * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers. - * ::: - * - * To manually set up a domain on an unsupported provider, you'll need to: - * - * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner. - * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`. - * 3. Add the DNS records in your provider to point to the load balancer endpoint. - * - * @example - * ```js - * { - * domain: { - * name: "example.com", - * dns: false, - * cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63" - * } - * } - * ``` - */ - cert?: Input; - /** - * The DNS provider to use for the domain. Defaults to the AWS. - * - * Takes an adapter that can create the DNS records on the provider. This can automate - * validating the domain and setting up the DNS routing. - * - * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need - * to set `dns` to `false` and pass in a certificate validating ownership via `cert`. - * - * @default `sst.aws.dns` - * - * @example - * - * Specify the hosted zone ID for the Route 53 domain. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.aws.dns({ - * zone: "Z2FDTNDATAQYW2" - * }) - * } - * } - * ``` - * - * Use a domain hosted on Cloudflare, needs the Cloudflare provider. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.cloudflare.dns() - * } - * } - * ``` - * - * Use a domain hosted on Vercel, needs the Vercel provider. - * - * ```js - * { - * domain: { - * name: "example.com", - * dns: sst.vercel.dns() - * } - * } - * ``` - */ - dns?: Input; - } - >; - /** @deprecated Use `rules` instead. */ - ports?: Input[]>; - /** - * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to - * the service. - * This supports two types of protocols: - * - * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html). - * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html). - * - * :::note - * If you want to listen on `https` or `tls`, you need to specify a custom - * `loadBalancer.domain`. - * ::: - * - * You **can not configure** both application and network layer protocols for the same - * service. - * - * @example - * Here we are listening on port `80` and forwarding it to the service on port `8080`. - * ```js - * { - * rules: [ - * { listen: "80/http", forward: "8080/http" } - * ] - * } - * ``` - * - * The `forward` port and protocol defaults to the `listen` port and protocol. So in this - * case both are `80/http`. - * - * ```js - * { - * rules: [ - * { listen: "80/http" } - * ] - * } - * ``` - * - * If multiple containers are configured via the `containers` argument, you need to - * specify which container the traffic should be forwarded to. - * - * ```js - * { - * rules: [ - * { listen: "80/http", container: "app" }, - * { listen: "8000/http", container: "admin" } - * ] - * } - * ``` - * - * You can also route the same port to multiple containers via path-based routing. - * - * ```js - * { - * rules: [ - * { - * listen: "80/http", - * container: "app", - * conditions: { path: "/api/*" } - * }, - * { - * listen: "80/http", - * container: "admin", - * conditions: { path: "/admin/*" } - * } - * ] - * } - * ``` - * - * Additionally, you can redirect traffic from one port to another. This is - * commonly used to redirect http to https. - * - * ```js - * { - * rules: [ - * { listen: "80/http", redirect: "443/https" }, - * { listen: "443/https", forward: "80/http" } - * ] - * } - * ``` - */ - rules?: Input[]>; - /** - * Configure the health check that the load balancer runs on your containers. - * - * :::tip - * This health check is different from the [`health`](#health) check. - * ::: - * - * This health check is run by the load balancer. While, `health` is run by ECS. This - * cannot be disabled if you are using a load balancer. While the other is off by default. - * - * Since this cannot be disabled, here are some tips on how to debug an unhealthy - * health check. - * - *
- * How to debug a load balancer health check - * - * If you notice a `Unhealthy: Health checks failed` error, it's because the health - * check has failed. When it fails, the load balancer will terminate the containers, - * causing any requests to fail. - * - * Here's how to debug it: - * - * 1. Verify the health check path. - * - * By default, the load balancer checks the `/` path. Ensure it's accessible in your - * containers. If your application runs on a different path, then update the path in - * the health check config accordingly. - * - * 2. Confirm the containers are operational. - * - * Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** > - * choose **Any desired status** under the **Filter desired status** dropdown > select - * a task and check for errors under the **Logs tab**. If it has error that means that - * the container failed to start. - * - * 3. If the container was terminated by the load balancer while still starting up, try - * increasing the health check interval and timeout. - *
- * - * For `http` and `https` the default is: - * - * ```js - * { - * path: "/", - * healthyThreshold: 5, - * successCodes: "200", - * timeout: "5 seconds", - * unhealthyThreshold: 2, - * interval: "30 seconds" - * } - * ``` - * - * For `tcp` and `udp` the default is: - * - * ```js - * { - * healthyThreshold: 5, - * timeout: "6 seconds", - * unhealthyThreshold: 2, - * interval: "30 seconds" - * } - * ``` - * - * @example - * - * To configure the health check, we use the _port/protocol_ format. Here we are - * configuring a health check that pings the `/health` path on port `8080` - * every 10 seconds. - * - * ```js - * { - * rules: [ - * { listen: "80/http", forward: "8080/http" } - * ], - * health: { - * "8080/http": { - * path: "/health", - * interval: "10 seconds" - * } - * } - * } - * ``` - * - */ - health?: Input< - Record< - Port, - Input<{ - /** - * The URL path to ping on the service for health checks. Only applicable to - * `http` and `https` protocols. - * @default `"/"` - */ - path?: Input; - /** - * The time period between each health check request. Must be between `5 seconds` - * and `300 seconds`. - * @default `"30 seconds"` - */ - interval?: Input; - /** - * The timeout for each health check request. If no response is received within this - * time, it is considered failed. Must be between `2 seconds` and `120 seconds`. - * @default `"5 seconds"` - */ - timeout?: Input; - /** - * The number of consecutive successful health check requests required to consider the - * target healthy. Must be between 2 and 10. - * @default `5` - */ - healthyThreshold?: Input; - /** - * The number of consecutive failed health check requests required to consider the - * target unhealthy. Must be between 2 and 10. - * @default `2` - */ - unhealthyThreshold?: Input; - /** - * One or more HTTP response codes the health check treats as successful. Only - * applicable to `http` and `https` protocols. - * - * @default `"200"` - * @example - * ```js - * { - * successCodes: "200-299" - * } - * ``` - */ - successCodes?: Input; - }> - > - >; - } + /** + * Configure if the load balancer should be public or private. + * + * When set to `false`, the load balancer endpoint will only be accessible within the + * VPC. + * + * @default `true` + */ + public?: Input; + /** + * Set a custom domain for your load balancer endpoint. + * + * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other + * providers, you'll need to pass in a `cert` that validates domain ownership and add the + * DNS records. + * + * :::tip + * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other + * providers. + * ::: + * + * @example + * + * By default this assumes the domain is hosted on Route 53. + * + * ```js + * { + * domain: "example.com" + * } + * ``` + * + * For domains hosted on Cloudflare. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.cloudflare.dns() + * } + * } + * ``` + */ + domain?: Input< + | string + | { + /** + * The custom domain you want to use. + * + * @example + * ```js + * { + * domain: { + * name: "example.com" + * } + * } + * ``` + * + * Can also include subdomains based on the current stage. + * + * ```js + * { + * domain: { + * name: `${$app.stage}.example.com` + * } + * } + * ``` + * + * Wildcard domains are supported. + * + * ```js + * { + * domain: { + * name: "*.example.com" + * } + * } + * ``` + */ + name: Input; + /** + * Alias domains that should be used. + * + * @example + * ```js {4} + * { + * domain: { + * name: "app1.example.com", + * aliases: ["app2.example.com"] + * } + * } + * ``` + */ + aliases?: Input; + /** + * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the + * domain. By default, a certificate is created and validated automatically. + * + * :::tip + * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers. + * ::: + * + * To manually set up a domain on an unsupported provider, you'll need to: + * + * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner. + * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`. + * 3. Add the DNS records in your provider to point to the load balancer endpoint. + * + * @example + * ```js + * { + * domain: { + * name: "example.com", + * dns: false, + * cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63" + * } + * } + * ``` + */ + cert?: Input; + /** + * The DNS provider to use for the domain. Defaults to the AWS. + * + * Takes an adapter that can create the DNS records on the provider. This can automate + * validating the domain and setting up the DNS routing. + * + * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need + * to set `dns` to `false` and pass in a certificate validating ownership via `cert`. + * + * @default `sst.aws.dns` + * + * @example + * + * Specify the hosted zone ID for the Route 53 domain. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.aws.dns({ + * zone: "Z2FDTNDATAQYW2" + * }) + * } + * } + * ``` + * + * Use a domain hosted on Cloudflare, needs the Cloudflare provider. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.cloudflare.dns() + * } + * } + * ``` + * + * Use a domain hosted on Vercel, needs the Vercel provider. + * + * ```js + * { + * domain: { + * name: "example.com", + * dns: sst.vercel.dns() + * } + * } + * ``` + */ + dns?: Input; + } + >; + /** @deprecated Use `rules` instead. */ + ports?: Input[]>; + /** + * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to + * the service. + * This supports two types of protocols: + * + * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html). + * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html). + * + * :::note + * If you want to listen on `https` or `tls`, you need to specify a custom + * `loadBalancer.domain`. + * ::: + * + * You **can not configure** both application and network layer protocols for the same + * service. + * + * @example + * Here we are listening on port `80` and forwarding it to the service on port `8080`. + * ```js + * { + * rules: [ + * { listen: "80/http", forward: "8080/http" } + * ] + * } + * ``` + * + * The `forward` port and protocol defaults to the `listen` port and protocol. So in this + * case both are `80/http`. + * + * ```js + * { + * rules: [ + * { listen: "80/http" } + * ] + * } + * ``` + * + * If multiple containers are configured via the `containers` argument, you need to + * specify which container the traffic should be forwarded to. + * + * ```js + * { + * rules: [ + * { listen: "80/http", container: "app" }, + * { listen: "8000/http", container: "admin" } + * ] + * } + * ``` + * + * You can also route the same port to multiple containers via path-based routing. + * + * ```js + * { + * rules: [ + * { + * listen: "80/http", + * container: "app", + * conditions: { path: "/api/*" } + * }, + * { + * listen: "80/http", + * container: "admin", + * conditions: { path: "/admin/*" } + * } + * ] + * } + * ``` + * + * Additionally, you can redirect traffic from one port to another. This is + * commonly used to redirect http to https. + * + * ```js + * { + * rules: [ + * { listen: "80/http", redirect: "443/https" }, + * { listen: "443/https", forward: "80/http" } + * ] + * } + * ``` + */ + rules?: Input[]>; + /** + * Configure the health check that the load balancer runs on your containers. + * + * :::tip + * This health check is different from the [`health`](#health) check. + * ::: + * + * This health check is run by the load balancer. While, `health` is run by ECS. This + * cannot be disabled if you are using a load balancer. While the other is off by default. + * + * Since this cannot be disabled, here are some tips on how to debug an unhealthy + * health check. + * + *
+ * How to debug a load balancer health check + * + * If you notice a `Unhealthy: Health checks failed` error, it's because the health + * check has failed. When it fails, the load balancer will terminate the containers, + * causing any requests to fail. + * + * Here's how to debug it: + * + * 1. Verify the health check path. + * + * By default, the load balancer checks the `/` path. Ensure it's accessible in your + * containers. If your application runs on a different path, then update the path in + * the health check config accordingly. + * + * 2. Confirm the containers are operational. + * + * Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** > + * choose **Any desired status** under the **Filter desired status** dropdown > select + * a task and check for errors under the **Logs tab**. If it has error that means that + * the container failed to start. + * + * 3. If the container was terminated by the load balancer while still starting up, try + * increasing the health check interval and timeout. + *
+ * + * For `http` and `https` the default is: + * + * ```js + * { + * path: "/", + * healthyThreshold: 5, + * successCodes: "200", + * timeout: "5 seconds", + * unhealthyThreshold: 2, + * interval: "30 seconds" + * } + * ``` + * + * For `tcp` and `udp` the default is: + * + * ```js + * { + * healthyThreshold: 5, + * timeout: "6 seconds", + * unhealthyThreshold: 2, + * interval: "30 seconds" + * } + * ``` + * + * @example + * + * To configure the health check, we use the _port/protocol_ format. Here we are + * configuring a health check that pings the `/health` path on port `8080` + * every 10 seconds. + * + * ```js + * { + * rules: [ + * { listen: "80/http", forward: "8080/http" } + * ], + * health: { + * "8080/http": { + * path: "/health", + * interval: "10 seconds" + * } + * } + * } + * ``` + * + */ + health?: Input< + Record< + Port, + Input<{ + /** + * The URL path to ping on the service for health checks. Only applicable to + * `http` and `https` protocols. + * @default `"/"` + */ + path?: Input; + /** + * The time period between each health check request. Must be between `5 seconds` + * and `300 seconds`. + * @default `"30 seconds"` + */ + interval?: Input; + /** + * The timeout for each health check request. If no response is received within this + * time, it is considered failed. Must be between `2 seconds` and `120 seconds`. + * @default `"5 seconds"` + */ + timeout?: Input; + /** + * The number of consecutive successful health check requests required to consider the + * target healthy. Must be between 2 and 10. + * @default `5` + */ + healthyThreshold?: Input; + /** + * The number of consecutive failed health check requests required to consider the + * target unhealthy. Must be between 2 and 10. + * @default `2` + */ + unhealthyThreshold?: Input; + /** + * One or more HTTP response codes the health check treats as successful. Only + * applicable to `http` and `https` protocols. + * + * @default `"200"` + * @example + * ```js + * { + * successCodes: "200-299" + * } + * ``` + */ + successCodes?: Input; + }> + > + >; + } | { - /** - * The `Alb` instance to attach this service to. When provided, the service creates - * target groups and listener rules on the shared ALB instead of creating its own - * load balancer. - * - * ECS tasks use the VPC's default security group, which allows all traffic within the - * VPC CIDR. For tighter security, add an explicit security group ingress rule from the - * ALB's security group using `transform`. - * - * @example - * ```js - * { - * loadBalancer: { - * instance: alb, - * rules: [ - * { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 } - * ] - * } - * } - * ``` - */ - instance: Alb; - /** - * The rules for routing traffic from the ALB to this service's containers. - * Each rule must have explicit conditions and priority. - */ - rules: Prettify[]; - /** - * Configure health checks for the target groups. Uses the same format as the inline - * health check config, keyed by `{port}/{protocol}`. - */ - health?: Record< - AlbPort, - Input<{ - path?: Input; - interval?: Input; - timeout?: Input; - healthyThreshold?: Input; - unhealthyThreshold?: Input; - successCodes?: Input; - }> - >; - } + /** + * The `Alb` instance to attach this service to. When provided, the service creates + * target groups and listener rules on the shared ALB instead of creating its own + * load balancer. + * + * ECS tasks use the VPC's default security group, which allows all traffic within the + * VPC CIDR. For tighter security, add an explicit security group ingress rule from the + * ALB's security group using `transform`. + * + * @example + * ```js + * { + * loadBalancer: { + * instance: alb, + * rules: [ + * { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 } + * ] + * } + * } + * ``` + */ + instance: Alb; + /** + * The rules for routing traffic from the ALB to this service's containers. + * Each rule must have explicit conditions and priority. + */ + rules: Prettify[]; + /** + * Configure health checks for the target groups. Uses the same format as the inline + * health check config, keyed by `{port}/{protocol}`. + */ + health?: Record< + AlbPort, + Input<{ + path?: Input; + interval?: Input; + timeout?: Input; + healthyThreshold?: Input; + unhealthyThreshold?: Input; + successCodes?: Input; + }> + >; + } >; /** * Configure the CloudMap service registry for the service. @@ -2035,7 +2035,9 @@ export class Service extends Component implements Link.Linkable { if (!args.capacity) return; return output(args.capacity).apply( - (v): + ( + v, + ): | { fargate?: { base?: Input; weight: Input }; spot?: { base?: Input; weight: Input }; @@ -2527,96 +2529,103 @@ export class Service extends Component implements Link.Linkable { } function createService() { - return cloudmapService.apply( - (cloudmapService) => - new ecs.Service( - ...transform( - args.transform?.service, - `${name}Service`, - { - name, - cluster: clusterArn, - taskDefinition: taskDefinition.arn, - desiredCount: scaling.min, - ...(managed - ? { - forceNewDeployment: true, - capacityProviderStrategies: [ - { - capacityProvider: managedCapacityProvider!.name, - base: 1, - weight: 1, - }, - ], - } - : capacity + const create = (dependsOn?: ComponentResourceOptions["dependsOn"]) => + cloudmapService.apply( + (cloudmapService) => + new ecs.Service( + ...transform( + args.transform?.service, + `${name}Service`, + { + name, + cluster: clusterArn, + taskDefinition: taskDefinition.arn, + desiredCount: scaling.min, + ...(managed ? { - // setting `forceNewDeployment` ensures that the service is not recreated - // when the capacity provider config changes. forceNewDeployment: true, - capacityProviderStrategies: capacity.apply((v) => { - if (!v) - throw new VisibleError( - `Invalid Fargate capacity configuration for the \"${name}\" Service.`, - ); - return [ - ...(v.fargate - ? [ - { - capacityProvider: "FARGATE", - base: v.fargate?.base, - weight: v.fargate?.weight, - }, - ] - : []), - ...(v.spot - ? [ - { - capacityProvider: "FARGATE_SPOT", - base: v.spot?.base, - weight: v.spot?.weight, - }, - ] - : []), - ]; - }), + capacityProviderStrategies: [ + { + capacityProvider: managedCapacityProvider!.name, + base: 1, + weight: 1, + }, + ], } - : // @deprecated do not use `launchType`, set `capacityProviderStrategies` - // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead - { - launchType: "FARGATE", - }), - networkConfiguration: { - // If the vpc is an SST vpc, services are automatically deployed to the public - // subnets. So we need to assign a public IP for the service to be accessible. - ...(managed ? {} : { assignPublicIp: vpc.isSstVpc }), - subnets: vpc.containerSubnets, - securityGroups: vpc.securityGroups, - }, - deploymentCircuitBreaker: { - enable: true, - rollback: true, - }, - loadBalancers: targetEntries.apply((entries) => - entries.map((e) => ({ - targetGroupArn: e.targetGroup.arn, - containerName: e.containerName, - containerPort: e.containerPort, - })), - ), - enableExecuteCommand: true, - serviceRegistries: cloudmapService && { - registryArn: cloudmapService.arn, - port: args.serviceRegistry - ? output(args.serviceRegistry).port - : undefined, + : capacity + ? { + // setting `forceNewDeployment` ensures that the service is not recreated + // when the capacity provider config changes. + forceNewDeployment: true, + capacityProviderStrategies: capacity.apply((v) => { + if (!v) + throw new VisibleError( + `Invalid Fargate capacity configuration for the \"${name}\" Service.`, + ); + return [ + ...(v.fargate + ? [ + { + capacityProvider: "FARGATE", + base: v.fargate?.base, + weight: v.fargate?.weight, + }, + ] + : []), + ...(v.spot + ? [ + { + capacityProvider: "FARGATE_SPOT", + base: v.spot?.base, + weight: v.spot?.weight, + }, + ] + : []), + ]; + }), + } + : // @deprecated do not use `launchType`, set `capacityProviderStrategies` + // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead + { + launchType: "FARGATE", + }), + networkConfiguration: { + // If the vpc is an SST vpc, services are automatically deployed to the public + // subnets. So we need to assign a public IP for the service to be accessible. + ...(managed ? {} : { assignPublicIp: vpc.isSstVpc }), + subnets: vpc.containerSubnets, + securityGroups: vpc.securityGroups, + }, + deploymentCircuitBreaker: { + enable: true, + rollback: true, + }, + loadBalancers: targetEntries.apply((entries) => + entries.map((e) => ({ + targetGroupArn: e.targetGroup.arn, + containerName: e.containerName, + containerPort: e.containerPort, + })), + ), + enableExecuteCommand: true, + serviceRegistries: cloudmapService && { + registryArn: cloudmapService.arn, + port: args.serviceRegistry + ? output(args.serviceRegistry).port + : undefined, + }, + waitForSteadyState: wait, }, - waitForSteadyState: wait, - }, - { parent: self }, + { parent: self, ...(dependsOn ? { dependsOn } : {}) }, + ), ), - ), - ); + ); + + if (args.cluster.vpc instanceof Vpc) { + return create([args.cluster.vpc]); + } + + return create(); } function createAutoScaling() {