From 029c833e5f3ef1770fb9bfcca1f38db6e5e14734 Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Sat, 21 Mar 2026 15:53:23 -0400
Subject: [PATCH 1/9] feat: first draft managed instances

---
 .../src/components/aws/managed-instances.ts   | 622 ++++++++++++++++++
 platform/src/components/aws/service.ts        | 266 ++++++--
 2 files changed, 842 insertions(+), 46 deletions(-)
 create mode 100644 platform/src/components/aws/managed-instances.ts
diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts
new file mode 100644
index 0000000000..e1cac871f2
--- /dev/null
+++ b/platform/src/components/aws/managed-instances.ts
@@ -0,0 +1,622 @@
+import fs from "fs";
+import path from "path";
+import {
+  all,
+  ComponentResourceOptions,
+  interpolate,
+  output,
+  Output,
+  secret,
+} from "@pulumi/pulumi";
+import { cloudwatch, ecr, ecs, getRegionOutput, iam } from "@pulumi/aws";
+import { ImageArgs } from "@pulumi/docker-build";
+import { Component, Transform, transform } from "../component.js";
+import { Input } from "../input.js";
+import { VisibleError } from "../error.js";
+import { Link } from "../link.js";
+import { toSeconds } from "../duration.js";
+import { toNumber } from "../cpu.js";
+import { toGBs, toMBs } from "../size.js";
+import { RETENTION } from "./logging.js";
+import { bootstrap } from "./helpers/bootstrap.js";
+import { imageBuilder } from "./helpers/container-builder.js";
+import { normalizeContainers } from "./fargate.js";
+
+export const managedGpuManufacturers = ["nvidia"] as const;
+export const ManagedGpuAcceleratorName = {
+  A100: "a100",
+  A10G: "a10g",
+  H100: "h100",
+  K520: "k520",
+  K80: "k80",
+  M60: "m60",
+  T4: "t4",
+  T4G: "t4g",
+  V100: "v100",
+} as const;
+
+export type ManagedGpuAcceleratorName =
+  (typeof ManagedGpuAcceleratorName)[keyof typeof ManagedGpuAcceleratorName];
+
+type ManagedContainers = ReturnType<typeof normalizeContainers>;
+
+type ManagedRoleInput = Input<string>;
+
+type ManagedGpuCount =
+  | Input<number>
+  | Input<{
+      min: Input<number>;
+      max?: Input<number>;
+    }>;
+
+export interface ManagedServiceCapacityArgs {
+  cpu?: Input<{
+    min: Input<number>;
+    max?: Input<number>;
+  }>;
+  memory?: Input<{
+    min: Input<`${number} GB`>;
+    max?: Input<`${number} GB`>;
+  }>;
+  gpu?: Input<{
+    count?: ManagedGpuCount;
+    manufacturer?: Input<(typeof managedGpuManufacturers)[number]>;
+    /**
+     * The NVIDIA GPU model to require.
+     *
+     * Supported values: `"a100"`, `"a10g"`, `"h100"`, `"k520"`, `"k80"`,
+     * `"m60"`, `"t4"`, `"t4g"`, and `"v100"`.
+     */
+    name?: Input<ManagedGpuAcceleratorName | Input<ManagedGpuAcceleratorName>[]>;
+  }>;
+  infrastructureRole: ManagedRoleInput;
+  instanceRole?: ManagedRoleInput;
+  instanceProfile?: Input<string>;
+  storage?: Input<`${number} GB`>;
+}
+
+type ServiceSizingArgs = {
+  cpu?: Input<`${number} vCPU`>;
+  memory?: Input<`${number} GB`>;
+};
+
+type ManagedTaskDefinitionArgs = {
+  cluster: {
+    nodes: {
+      cluster: {
+        name: Output<string>;
+      };
+    };
+  };
+  link?: any;
+  transform?: {
+    image?: Transform<ImageArgs>;
+    taskDefinition?: Transform<ecs.TaskDefinitionArgs>;
+    logGroup?: Transform<cloudwatch.LogGroupArgs>;
+  };
+};
+
+type ManagedCapacityProviderArgs = {
+  transform?: {
+    capacityProvider?: Transform<ecs.CapacityProviderArgs>;
+    instanceProfile?: Transform<iam.InstanceProfileArgs>;
+  };
+};
+
+type ManagedVpcArgs = {
+  containerSubnets: Input<Input<string>[]>;
+  securityGroups: Input<Input<string>[]>;
+};
+
+type NormalizedManagedCapacity = {
+  taskCpu: string;
+  taskMemory: string;
+  hostCpu: {
+    min: number;
+    max?: number;
+  };
+  hostMemory: {
+    min: number;
+    max?: number;
+  };
+  hostStorage?: number;
+  gpu?: {
+    count: {
+      min: number;
+      max?: number;
+    };
+    manufacturer: (typeof managedGpuManufacturers)[number];
+    names?: ManagedGpuAcceleratorName[];
+  };
+};
+
+export function normalizeManagedCapacity(
+  name: string,
+  args: ManagedServiceCapacityArgs,
+  serviceSizing: ServiceSizingArgs,
+) {
+  return all([serviceSizing.cpu, serviceSizing.memory]).apply(
+    ([serviceCpu, serviceMemory]) => {
+      const managed = args as ManagedServiceCapacityArgs & {
+        infrastructureRole?: string;
+        instanceRole?: string;
+        instanceProfile?: string;
+        storage?: `${number} GB`;
+        cpu?: { min: number; max?: number };
+        memory?: { min: `${number} GB`; max?: `${number} GB` };
+        gpu?: {
+          count?: number | { min: number; max?: number };
+          manufacturer?: (typeof managedGpuManufacturers)[number];
+          name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[];
+        };
+      };
+
+      if (!managed.infrastructureRole) {
+        throw new VisibleError(
+          `Missing \"capacity.managed.infrastructureRole\" for the \"${name}\" Service.`,
+        );
+      }
+
+      if (!managed.instanceRole && !managed.instanceProfile) {
+        throw new VisibleError(
+          `You must provide either \"capacity.managed.instanceRole\" or \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`,
+        );
+      }
+
+      if (managed.instanceRole && managed.instanceProfile) {
+        throw new VisibleError(
+          `Do not provide both \"capacity.managed.instanceRole\" and \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`,
+        );
+      }
+
+      const hostCpu = normalizeHostCpu(managed.cpu, serviceCpu);
+      const hostMemory = normalizeHostMemory(managed.memory, serviceMemory);
+      const gpu = normalizeGpu(managed.gpu);
+      const hostStorage = normalizeStorage(managed.storage);
+
+      return {
+        taskCpu: serviceCpu
+          ? toNumber(serviceCpu).toString()
+          : Math.round(hostCpu.min * 1024).toString(),
+        taskMemory: serviceMemory
+          ? toMBs(serviceMemory).toString()
+          : hostMemory.min.toString(),
+        hostCpu,
+        hostMemory,
+        hostStorage,
+        gpu,
+      } satisfies NormalizedManagedCapacity;
+    },
+  );
+
+  function normalizeHostCpu(
+    cpu: { min: number; max?: number } | undefined,
+    fallback?: `${number} vCPU`,
+  ) {
+    if (cpu) {
+      validateRange("capacity.managed.cpu", cpu.min, cpu.max);
+      return { min: cpu.min, max: cpu.max };
+    }
+    if (fallback) {
+      const min = parseFloat(fallback.split(" ")[0]);
+      return { min, max: min };
+    }
+    throw new VisibleError(
+      `You must provide either \"capacity.managed.cpu\" or top-level \"cpu\" for managed instances.`,
+    );
+  }
+
+  function normalizeHostMemory(
+    memory: { min: `${number} GB`; max?: `${number} GB` } | undefined,
+    fallback?: `${number} GB`,
+  ) {
+    if (memory) {
+      const min = toMBs(memory.min);
+      const max = memory.max ? toMBs(memory.max) : undefined;
+      validateRange("capacity.managed.memory", min, max);
+      return { min, max };
+    }
+    if (fallback) {
+      const min = toMBs(fallback);
+      return { min, max: min };
+    }
+    throw new VisibleError(
+      `You must provide either \"capacity.managed.memory\" or top-level \"memory\" for managed instances.`,
+    );
+  }
+
+  function normalizeGpu(
+    gpu:
+      | {
+          count?: number | { min: number; max?: number };
+          manufacturer?: (typeof managedGpuManufacturers)[number];
+          name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[];
+        }
+      | undefined,
+  ) {
+    if (!gpu) return undefined;
+    const manufacturer = gpu.manufacturer ?? "nvidia";
+    if (!managedGpuManufacturers.includes(manufacturer)) {
+      throw new VisibleError(
+        `Unsupported GPU manufacturer \"${manufacturer}\". The supported values are ${managedGpuManufacturers.join(", ")}.`,
+      );
+    }
+
+    const count =
+      typeof gpu.count === "number"
+        ? { min: gpu.count, max: gpu.count }
+        : gpu.count
+          ? { min: gpu.count.min, max: gpu.count.max }
+          : { min: 1, max: 1 };
+
+    validateRange("capacity.managed.gpu.count", count.min, count.max);
+
+    return {
+      count,
+      manufacturer,
+      names: normalizeGpuNames(gpu.name),
+    };
+  }
+
+  function normalizeGpuNames(
+    name: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[] | undefined,
+  ) {
+    if (!name) return undefined;
+    const names = Array.isArray(name) ? name : [name];
+    const supported = Object.values(ManagedGpuAcceleratorName);
+    const invalid = names.filter((name) => !supported.includes(name));
+    if (invalid.length > 0) {
+      throw new VisibleError(
+        `Unsupported GPU accelerator name ${invalid.map((name) => `"${name}"`).join(", ")}. The supported NVIDIA values are ${supported
+          .map((name) => `"${name}"`)
+          .join(", ")}.`,
+      );
+    }
+    return names;
+  }
+
+  function normalizeStorage(storage?: `${number} GB`) {
+    if (!storage) return undefined;
+    const value = toGBs(storage);
+    if (value <= 0) {
+      throw new VisibleError(
+        `Invalid \"capacity.managed.storage\" value \"${storage}\". It must be greater than 0 GB.`,
+      );
+    }
+    return value;
+  }
+
+  function validateRange(label: string, min: number, max?: number) {
+    if (min <= 0) {
+      throw new VisibleError(`\"${label}.min\" must be greater than 0.`);
+    }
+    if (max !== undefined && max < min) {
+      throw new VisibleError(
+        `\"${label}.max\" must be greater than or equal to \"${label}.min\".`,
+      );
+    }
+  }
+}
+
+export function createManagedCapacityProvider(
+  name: string,
+  args: ManagedCapacityProviderArgs & {
+    capacity: Input<ManagedServiceCapacityArgs>;
+  },
+  opts: ComponentResourceOptions,
+  parent: Component,
+  clusterName: Output<string>,
+  vpc: ManagedVpcArgs,
+  normalized: Output<NormalizedManagedCapacity>,
+) {
+  const infrastructureRole = iam.Role.get(
+    `${name}ManagedInfrastructureRole`,
+    output(args.capacity).apply((v) => v.infrastructureRole),
+    {},
+    { parent },
+  );
+
+  const instanceProfileArn = output(args.capacity).apply((v) => {
+    if (v.instanceProfile) return v.instanceProfile;
+
+    return new iam.InstanceProfile(
+      ...transform(
+        args.transform?.instanceProfile,
+        `${name}ManagedInstanceProfile`,
+        {
+          role: output(v.instanceRole!).apply(extractRoleName),
+        },
+        { parent },
+      ),
+    ).arn;
+  });
+
+  return new ecs.CapacityProvider(
+    ...transform(
+      args.transform?.capacityProvider,
+      `${name}ManagedCapacityProvider`,
+      {
+        cluster: clusterName,
+        managedInstancesProvider: all([
+          normalized,
+          infrastructureRole.arn,
+          instanceProfileArn,
+          vpc.containerSubnets,
+          vpc.securityGroups,
+        ]).apply(
+          ([normalized, infrastructureRoleArn, instanceProfileArn, subnets, securityGroups]) => ({
+            infrastructureRoleArn,
+            propagateTags: "CAPACITY_PROVIDER",
+            instanceLaunchTemplate: {
+              ec2InstanceProfileArn: instanceProfileArn,
+              networkConfiguration: {
+                subnets,
+                securityGroups,
+              },
+              ...(normalized.hostStorage
+                ? {
+                    storageConfiguration: {
+                      storageSizeGib: normalized.hostStorage,
+                    },
+                  }
+                : {}),
+              instanceRequirements: {
+                vcpuCount: {
+                  min: normalized.hostCpu.min,
+                  max: normalized.hostCpu.max,
+                },
+                memoryMib: {
+                  min: normalized.hostMemory.min,
+                  max: normalized.hostMemory.max,
+                },
+                instanceGenerations: ["current"],
+                ...(normalized.gpu
+                  ? {
+                      acceleratorTypes: ["gpu"],
+                      acceleratorCount: {
+                        min: normalized.gpu.count.min,
+                        max: normalized.gpu.count.max,
+                      },
+                      acceleratorManufacturers: [normalized.gpu.manufacturer],
+                      ...(normalized.gpu.names
+                        ? {
+                            acceleratorNames: normalized.gpu.names,
+                          }
+                        : {}),
+                    }
+                  : {}),
+              },
+            },
+          }),
+        ),
+      },
+      { parent },
+    ),
+  );
+}
+
+export function createManagedTaskDefinition(
+  name: string,
+  args: ManagedTaskDefinitionArgs,
+  opts: ComponentResourceOptions,
+  parent: Component,
+  containers: ManagedContainers,
+  architecture: Output<"x86_64" | "arm64">,
+  taskRole: iam.Role,
+  executionRole: iam.Role,
+  normalized: Output<NormalizedManagedCapacity>,
+) {
+  const clusterName = args.cluster.nodes.cluster.name;
+  const region = getRegionOutput({}, opts).region;
+  const bootstrapData = region.apply((region) => bootstrap.forRegion(region));
+  const linkEnvs = Link.propertiesToEnv(Link.getProperties(args.link));
+
+  const containerDefinitions = all([containers, normalized]).apply(
+    ([containers, normalized]) => {
+      if (normalized.gpu && containers.length > 1) {
+        throw new VisibleError(
+          `GPU support currently requires a single container when using managed instances.`,
+        );
+      }
+
+      return containers.map((container) => ({
+        name: container.name,
+        image: (() => {
+          if (typeof container.image === "string") return output(container.image);
+
+          const containerImage = container.image;
+          const contextPath = path.join($cli.paths.root, container.image.context);
+          const dockerfile = container.image.dockerfile ?? "Dockerfile";
+          const dockerfilePath = path.join(contextPath, dockerfile);
+          const dockerIgnorePath = fs.existsSync(
+            path.join(contextPath, `${dockerfile}.dockerignore`),
+          )
+            ? path.join(contextPath, `${dockerfile}.dockerignore`)
+            : path.join(contextPath, ".dockerignore");
+
+          const lines = fs.existsSync(dockerIgnorePath)
+            ? fs.readFileSync(dockerIgnorePath).toString().split("\n")
+            : [];
+          if (!lines.find((line) => line === ".sst")) {
+            fs.writeFileSync(
+              dockerIgnorePath,
+              [...lines, "", "# sst", ".sst"].join("\n"),
+            );
+          }
+
+          const image = imageBuilder(
+            ...transform(
+              args.transform?.image,
+              `${name}Image${container.name}`,
+              {
+                context: { location: contextPath },
+                dockerfile: { location: dockerfilePath },
+                buildArgs: containerImage.args,
+                secrets: all([linkEnvs, containerImage.secrets ?? {}]).apply(
+                  ([link, secrets]) => ({ ...link, ...secrets }),
+                ),
+                target: container.image.target,
+                platforms: [container.image.platform],
+                tags: [container.name, ...(container.image.tags ?? [])].map(
+                  (tag) => interpolate`${bootstrapData.assetEcrUrl}:${tag}`,
+                ),
+                registries: [
+                  ecr
+                    .getAuthorizationTokenOutput(
+                      {
+                        registryId: bootstrapData.assetEcrRegistryId,
+                      },
+                      { parent },
+                    )
+                    .apply((authToken) => ({
+                      address: authToken.proxyEndpoint,
+                      password: secret(authToken.password),
+                      username: authToken.userName,
+                    })),
+                ],
+                ...(container.image.cache !== false
+                  ? {
+                      cacheFrom: [
+                        {
+                          registry: {
+                            ref: interpolate`${bootstrapData.assetEcrUrl}:${container.name}-cache`,
+                          },
+                        },
+                      ],
+                      cacheTo: [
+                        {
+                          registry: {
+                            ref: interpolate`${bootstrapData.assetEcrUrl}:${container.name}-cache`,
+                            imageManifest: true,
+                            ociMediaTypes: true,
+                            mode: "max",
+                          },
+                        },
+                      ],
+                    }
+                  : {}),
+                push: true,
+              },
+              { parent },
+            ),
+          );
+
+          return interpolate`${bootstrapData.assetEcrUrl}@${image.digest}`;
+        })(),
+        cpu: container.cpu ? toNumber(container.cpu) : undefined,
+        memory: container.memory ? toMBs(container.memory) : undefined,
+        command: container.command,
+        entrypoint: container.entrypoint,
+        healthCheck: container.health && {
+          command: container.health.command,
+          startPeriod: toSeconds(container.health.startPeriod ?? "0 seconds"),
+          timeout: toSeconds(container.health.timeout ?? "5 seconds"),
+          interval: toSeconds(container.health.interval ?? "30 seconds"),
+          retries: container.health.retries ?? 3,
+        },
+        pseudoTerminal: true,
+        portMappings: [{ containerPortRange: "1-65535" }],
+        logConfiguration: {
+          logDriver: "awslogs",
+          options: {
+            "awslogs-group": (() => {
+              return new cloudwatch.LogGroup(
+                ...transform(
+                  args.transform?.logGroup,
+                  `${name}LogGroup${container.name}`,
+                  {
+                    name: container.logging.name,
+                    retentionInDays: RETENTION[container.logging.retention],
+                  },
+                  { parent, ignoreChanges: ["name"] },
+                ),
+              );
+            })().name,
+            "awslogs-region": region,
+            "awslogs-stream-prefix": "/service",
+          },
+        },
+        environment: linkEnvs.apply((linkEnvs) =>
+          Object.entries({
+            ...container.environment,
+            ...linkEnvs,
+          }).map(([name, value]) => ({ name, value })),
+        ),
+        environmentFiles: container.environmentFiles?.map((file) => ({
+          type: "s3",
+          value: file,
+        })),
+        linuxParameters: {
+          initProcessEnabled: true,
+        },
+        mountPoints: container.volumes?.map((volume) => ({
+          sourceVolume: volume.efs.accessPoint,
+          containerPath: volume.path,
+        })),
+        secrets: Object.entries(container.ssm ?? {}).map(([name, valueFrom]) => ({
+          name,
+          valueFrom,
+        })),
+        resourceRequirements: normalized.gpu
+          ? [{ type: "GPU", value: normalized.gpu.count.min.toString() }]
+          : undefined,
+      }));
+    },
+  );
+
+  return output(
+    new ecs.TaskDefinition(
+      ...transform(
+        args.transform?.taskDefinition,
+        `${name}Task`,
+        {
+          family: interpolate`${clusterName}-${name}`,
+          trackLatest: true,
+          cpu: normalized.apply((v) => v.taskCpu),
+          memory: normalized.apply((v) => v.taskMemory),
+          networkMode: "awsvpc",
+          requiresCompatibilities: ["MANAGED_INSTANCES"],
+          runtimePlatform: {
+            cpuArchitecture: architecture.apply((v) => v.toUpperCase()),
+            operatingSystemFamily: "LINUX",
+          },
+          executionRoleArn: executionRole.arn,
+          taskRoleArn: taskRole.arn,
+          volumes: output(containers).apply((containers) => {
+            const uniqueAccessPoints: Set<string> = new Set();
+            return containers.flatMap((container) =>
+              (container.volumes ?? []).flatMap((volume) => {
+                if (uniqueAccessPoints.has(volume.efs.accessPoint)) return [];
+                uniqueAccessPoints.add(volume.efs.accessPoint);
+                return {
+                  name: volume.efs.accessPoint,
+                  efsVolumeConfiguration: {
+                    fileSystemId: volume.efs.fileSystem,
+                    transitEncryption: "ENABLED",
+                    authorizationConfig: {
+                      accessPointId: volume.efs.accessPoint,
+                    },
+                  },
+                };
+              }),
+            );
+          }),
+          containerDefinitions: $jsonStringify(containerDefinitions),
+        },
+        { parent },
+      ),
+    ),
+  );
+}
+
+export function isManagedCapacityInput(
+  capacity: unknown,
+): capacity is {
+  managed: Input<ManagedServiceCapacityArgs>;
+} {
+  return typeof capacity === "object" && capacity !== null && "managed" in capacity;
+}
+
+function extractRoleName(role: string) {
+  return role.split("/").pop()!;
+}
diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index 3ea034162a..dc8b811c88 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -36,6 +36,13 @@ import {
   normalizeMemory,
   normalizeStorage,
 } from "./fargate.js";
+import {
+  createManagedCapacityProvider,
+  createManagedTaskDefinition,
+  isManagedCapacityInput,
+  ManagedServiceCapacityArgs,
+  normalizeManagedCapacity,
+} from "./managed-instances.js";
 import { Dns } from "../dns.js";
 import { hashStringToPrettyString } from "../naming.js";
 import { Alb } from "./alb.js";
@@ -333,8 +340,23 @@ interface ServiceContainerArgs extends FargateContainerArgs {
      */
     directory?: Input<string>;
   };
+  /**
+   * The number of GPUs to reserve for this container when using managed instances.
+   */
+  gpu?: Input<number>;
 }
 
+type ServiceFargateCapacity = {
+  fargate?: {
+    base?: Input<number>;
+    weight: Input<number>;
+  };
+  spot?: {
+    base?: Input<number>;
+    weight: Input<number>;
+  };
+};
+
 export interface ServiceArgs extends FargateBaseArgs {
   /**
    * Configure how this component works in `sst dev`.
@@ -1305,17 +1327,55 @@ export interface ServiceArgs extends FargateBaseArgs {
    *   regular Fargate.
    *
    *   ```js
-   *   {
-   *     capacity: {
-   *       fargate: { weight: 1, base: 2 },
-   *       spot: { weight: 1 }
-   *     }
-   *   }
-   *   ```
-   */
+    *   {
+    *     capacity: {
+    *       fargate: { weight: 1, base: 2 },
+    *       spot: { weight: 1 }
+    *     }
+    *   }
+    *   ```
+    * - Use ECS Managed Instances for a CPU-only workload.
+    *
+    *   ```js
+    *   {
+    *     cpu: "1 vCPU",
+    *     memory: "2 GB",
+    *     capacity: {
+    *       managed: {
+    *         infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra",
+    *         instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed",
+    *         cpu: { min: 1, max: 4 },
+    *         memory: { min: "2 GB", max: "8 GB" }
+    *       }
+    *     }
+    *   }
+    *   ```
+    * - Use ECS Managed Instances for a GPU workload.
+    *
+    *   ```js
+    *   {
+    *     cpu: "4 vCPU",
+    *     memory: "16 GB",
+    *     containers: [{
+    *       name: "app",
+    *       gpu: 1,
+    *     }],
+    *     capacity: {
+    *       managed: {
+    *         infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra",
+    *         instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed",
+    *         gpu: {
+    *           count: 1,
+    *           name: "t4",
+    *         }
+    *       }
+    *     }
+    *   }
+    *   ```
+    */
   capacity?: Input<
     | "spot"
-    | {
+    | (ServiceFargateCapacity & {
         /**
          * Configure how the regular Fargate capacity is allocated.
          */
@@ -1350,6 +1410,18 @@ export interface ServiceArgs extends FargateBaseArgs {
            */
           weight: Input<number>;
         }>;
+        /**
+         * Configure ECS Managed Instances for this service.
+         */
+        managed?: never;
+      })
+    | {
+        /**
+         * Configure ECS Managed Instances for this service.
+         *
+         * This mode is exclusive and cannot be combined with `fargate` or `spot`.
+         */
+        managed?: Input<ManagedServiceCapacityArgs>;
       }
   >;
   /**
@@ -1525,6 +1597,14 @@ export interface ServiceArgs extends FargateBaseArgs {
        * attaching to an external ALB via the `loadBalancer.instance` prop.
        */
       listenerRule?: Transform<lb.ListenerRuleArgs>;
+      /**
+       * Transform the ECS managed instances capacity provider resource.
+       */
+      capacityProvider?: Transform<ecs.CapacityProviderArgs>;
+      /**
+       * Transform the IAM instance profile resource created for managed instances.
+       */
+      instanceProfile?: Transform<iam.InstanceProfileArgs>;
     }
   >;
 }
@@ -1789,6 +1869,7 @@ export class Service extends Component implements Link.Linkable {
     const scaling = normalizeScaling();
     const capacity = normalizeCapacity();
     const vpc = normalizeVpc();
+    const managed = normalizeManaged();
 
     const taskRole = createTaskRole(name, args, opts, self, !!dev);
 
@@ -1803,19 +1884,51 @@ export class Service extends Component implements Link.Linkable {
     }
 
     const executionRole = createExecutionRole(name, args, opts, self);
-    const taskDefinition = createTaskDefinition(
-      name,
-      args,
-      opts,
-      self,
-      containers,
-      architecture,
-      cpu,
-      memory,
-      storage,
-      taskRole,
-      executionRole,
-    );
+    const managedCapacityProvider = managed
+      ? createManagedCapacityProvider(
+          name,
+          {
+            capacity: managed.capacity,
+            transform: {
+              capacityProvider: args.transform?.capacityProvider,
+              instanceProfile: args.transform?.instanceProfile,
+            },
+          },
+          opts,
+          self,
+          clusterName,
+          {
+            containerSubnets: vpc.containerSubnets,
+            securityGroups: vpc.securityGroups,
+          },
+          managed.normalized,
+        )
+      : undefined;
+    const taskDefinition = managed
+      ? createManagedTaskDefinition(
+          name,
+          args,
+          opts,
+          self,
+          containers,
+          architecture,
+          taskRole,
+          executionRole,
+          managed.normalized,
+        )
+      : createTaskDefinition(
+          name,
+          args,
+          opts,
+          self,
+          containers,
+          architecture,
+          cpu,
+          memory,
+          storage,
+          taskRole,
+          executionRole,
+        );
     let loadBalancer: lb.LoadBalancer | undefined;
     let targetGroups: ReturnType<typeof createTargets>;
     let targetEntries: Output<{ targetGroup: lb.TargetGroup; containerName: string; containerPort: number }[]>;
@@ -1930,11 +2043,56 @@ export class Service extends Component implements Link.Linkable {
     function normalizeCapacity() {
       if (!args.capacity) return;
 
-      return output(args.capacity).apply((v) => {
+      return output(args.capacity).apply((v): ServiceFargateCapacity | undefined => {
+        if (isManagedCapacityInput(v)) return undefined;
         if (v === "spot")
           return { spot: { weight: 1 }, fargate: { weight: 0 } };
-        return v;
+        const fargateCapacity = v as ServiceFargateCapacity;
+        return {
+          fargate: fargateCapacity.fargate,
+          spot: fargateCapacity.spot,
+        };
+      });
+    }
+
+    function normalizeManaged() {
+      if (!args.capacity) return;
+
+      const managedCapacity = output(args.capacity).apply((v) => {
+        if (v === "spot" || !isManagedCapacityInput(v)) return;
+
+        if ("fargate" in v || "spot" in v) {
+          throw new VisibleError(
+            `Do not combine \"capacity.managed\" with \"capacity.fargate\" or \"capacity.spot\" in the \"${name}\" Service.`,
+          );
+        }
+
+        return v.managed;
       });
+
+      return {
+        capacity: managedCapacity.apply((v) => {
+          if (!v)
+            throw new VisibleError(
+              `Missing \"capacity.managed\" for the \"${name}\" Service.`,
+            );
+          return v;
+        }),
+        normalized: managedCapacity
+          .apply((v) => {
+            if (!v)
+              throw new VisibleError(
+                `Missing \"capacity.managed\" for the \"${name}\" Service.`,
+              );
+            return v;
+          })
+          .apply((managed) =>
+            normalizeManagedCapacity(name, managed, {
+              cpu: args.cpu,
+              memory: args.memory,
+            }),
+          ),
+      };
     }
 
     function normalizeLoadBalancer() {
@@ -2397,31 +2555,47 @@ export class Service extends Component implements Link.Linkable {
                 cluster: clusterArn,
                 taskDefinition: taskDefinition.arn,
                 desiredCount: scaling.min,
-                ...(capacity
+                ...(managed
+                  ? {
+                      forceNewDeployment: true,
+                      capacityProviderStrategies: [
+                        {
+                          capacityProvider: managedCapacityProvider!.name,
+                          weight: 1,
+                        },
+                      ],
+                    }
+                  : capacity
                   ? {
                       // setting `forceNewDeployment` ensures that the service is not recreated
                       // when the capacity provider config changes.
                       forceNewDeployment: true,
-                      capacityProviderStrategies: capacity.apply((v) => [
-                        ...(v.fargate
-                          ? [
-                              {
-                                capacityProvider: "FARGATE",
-                                base: v.fargate?.base,
-                                weight: v.fargate?.weight,
-                              },
-                            ]
-                          : []),
-                        ...(v.spot
-                          ? [
-                              {
-                                capacityProvider: "FARGATE_SPOT",
-                                base: v.spot?.base,
-                                weight: v.spot?.weight,
-                              },
-                            ]
-                          : []),
-                      ]),
+                      capacityProviderStrategies: capacity.apply((v) => {
+                        if (!v)
+                          throw new VisibleError(
+                            `Invalid Fargate capacity configuration for the \"${name}\" Service.`,
+                          );
+                        return [
+                          ...(v.fargate
+                            ? [
+                                {
+                                  capacityProvider: "FARGATE",
+                                  base: v.fargate?.base,
+                                  weight: v.fargate?.weight,
+                                },
+                              ]
+                            : []),
+                          ...(v.spot
+                            ? [
+                                {
+                                  capacityProvider: "FARGATE_SPOT",
+                                  base: v.spot?.base,
+                                  weight: v.spot?.weight,
+                                },
+                              ]
+                            : []),
+                        ];
+                      }),
                     }
                   : // @deprecated do not use `launchType`, set `capacityProviderStrategies`
                     // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead
@@ -2431,7 +2605,7 @@ export class Service extends Component implements Link.Linkable {
                 networkConfiguration: {
                   // If the vpc is an SST vpc, services are automatically deployed to the public
                   // subnets. So we need to assign a public IP for the service to be accessible.
-                  assignPublicIp: vpc.isSstVpc,
+                  ...(managed ? {} : { assignPublicIp: vpc.isSstVpc }),
                   subnets: vpc.containerSubnets,
                   securityGroups: vpc.securityGroups,
                 },

From 481d44c660554a63b038c1313cd0a267ef0d0640 Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Sun, 22 Mar 2026 16:53:47 -0400
Subject: [PATCH 2/9] feat: move gpu flag to toplevel

---
 examples/aws-ecs-gpus/.dockerignore           |    6 +
 examples/aws-ecs-gpus/Dockerfile              |   12 +
 examples/aws-ecs-gpus/app.py                  |   34 +
 examples/aws-ecs-gpus/package.json            |    9 +
 examples/aws-ecs-gpus/sst.config.ts           |   37 +
 .../src/components/aws/managed-instances.ts   |  410 +++---
 platform/src/components/aws/service.ts        | 1278 +++++++++--------
 platform/src/components/component.ts          |    1 +
 8 files changed, 925 insertions(+), 862 deletions(-)
 create mode 100644 examples/aws-ecs-gpus/.dockerignore
 create mode 100644 examples/aws-ecs-gpus/Dockerfile
 create mode 100644 examples/aws-ecs-gpus/app.py
 create mode 100644 examples/aws-ecs-gpus/package.json
 create mode 100644 examples/aws-ecs-gpus/sst.config.ts

diff --git a/examples/aws-ecs-gpus/.dockerignore b/examples/aws-ecs-gpus/.dockerignore
new file mode 100644
index 0000000000..763039c77b
--- /dev/null
+++ b/examples/aws-ecs-gpus/.dockerignore
@@ -0,0 +1,6 @@
+
+# sst
+.sst
+node_modules
+__pycache__
+*.pyc
diff --git a/examples/aws-ecs-gpus/Dockerfile b/examples/aws-ecs-gpus/Dockerfile
new file mode 100644
index 0000000000..b0a57dfb4c
--- /dev/null
+++ b/examples/aws-ecs-gpus/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+COPY app.py /app/app.py
+
+EXPOSE 8000
+
+ENTRYPOINT ["python", "/app/app.py"]
diff --git a/examples/aws-ecs-gpus/app.py b/examples/aws-ecs-gpus/app.py
new file mode 100644
index 0000000000..152d62ef81
--- /dev/null
+++ b/examples/aws-ecs-gpus/app.py
@@ -0,0 +1,34 @@
+from http.server import BaseHTTPRequestHandler, HTTPServer
+import json
+import os
+
+
+class Handler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        if self.path == "/health":
+            self.respond({"ok": True})
+            return
+
+        self.respond(
+            {
+                "message": "hello from ecs managed instances",
+                "gpu": os.getenv("NVIDIA_VISIBLE_DEVICES", "unknown"),
+            }
+        )
+
+    def log_message(self, format, *args):
+        return
+
+    def respond(self, payload):
+        body = json.dumps(payload).encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", "8000"))
+    server = HTTPServer(("0.0.0.0", port), Handler)
+    server.serve_forever()
diff --git a/examples/aws-ecs-gpus/package.json b/examples/aws-ecs-gpus/package.json
new file mode 100644
index 0000000000..9d67054b8e
--- /dev/null
+++ b/examples/aws-ecs-gpus/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "aws-ecs-gpus",
+  "version": "1.0.0",
+  "private": true,
+  "type": "module",
+  "dependencies": {
+    "sst": "^4"
+  }
+}
diff --git a/examples/aws-ecs-gpus/sst.config.ts b/examples/aws-ecs-gpus/sst.config.ts
new file mode 100644
index 0000000000..4c67e9c0dc
--- /dev/null
+++ b/examples/aws-ecs-gpus/sst.config.ts
@@ -0,0 +1,37 @@
+/// <reference path="./.sst/platform/config.d.ts" />
+
+/**
+ * ## AWS ECS GPUs
+ *
+ * A minimal ECS service running on ECS Managed Instances with a GPU-enabled host.
+ * The service uses top-level `gpu`, `cpu`, `memory`, and `storage` settings, while
+ * the managed instances IAM resources remain customizable through `transform`.
+ */
+export default $config({
+  app(input) {
+    return {
+      name: "service-gpu-example",
+      removal: input?.stage === "production" ? "retain" : "remove",
+      home: "aws",
+    };
+  },
+  async run() {
+    const vpc = new sst.aws.Vpc("MyVpc");
+    const cluster = new sst.aws.Cluster("MyCluster", { vpc });
+
+    const service = new sst.aws.Service("MyService", {
+      cluster,
+      image: { context: "./" },
+      gpu: "nvidia/t4",
+      cpu: "4 vCPU",
+      memory: "16 GB",
+      loadBalancer: {
+        ports: [{ listen: "80/http", forward: "8000/http" }],
+      },
+    });
+
+    return {
+      url: service.url,
+    };
+  },
+});
diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts
index e1cac871f2..6cc4ead302 100644
--- a/platform/src/components/aws/managed-instances.ts
+++ b/platform/src/components/aws/managed-instances.ts
@@ -8,7 +8,14 @@ import {
   Output,
   secret,
 } from "@pulumi/pulumi";
-import { cloudwatch, ecr, ecs, getRegionOutput, iam } from "@pulumi/aws";
+import {
+  cloudwatch,
+  ecr,
+  ecs,
+  getPartitionOutput,
+  getRegionOutput,
+  iam,
+} from "@pulumi/aws";
 import { ImageArgs } from "@pulumi/docker-build";
 import { Component, Transform, transform } from "../component.js";
 import { Input } from "../input.js";
@@ -37,47 +44,15 @@ export const ManagedGpuAcceleratorName = {
 
 export type ManagedGpuAcceleratorName =
   (typeof ManagedGpuAcceleratorName)[keyof typeof ManagedGpuAcceleratorName];
+export type ManagedGpu =
+  `${(typeof managedGpuManufacturers)[number]}/${ManagedGpuAcceleratorName}`;
 
 type ManagedContainers = ReturnType<typeof normalizeContainers>;
-
-type ManagedRoleInput = Input<string>;
-
-type ManagedGpuCount =
-  | Input<number>
-  | Input<{
-      min: Input<number>;
-      max?: Input<number>;
-    }>;
-
-export interface ManagedServiceCapacityArgs {
-  cpu?: Input<{
-    min: Input<number>;
-    max?: Input<number>;
-  }>;
-  memory?: Input<{
-    min: Input<`${number} GB`>;
-    max?: Input<`${number} GB`>;
-  }>;
-  gpu?: Input<{
-    count?: ManagedGpuCount;
-    manufacturer?: Input<(typeof managedGpuManufacturers)[number]>;
-    /**
-     * The NVIDIA GPU model to require.
-     *
-     * Supported values: `"a100"`, `"a10g"`, `"h100"`, `"k520"`, `"k80"`,
-     * `"m60"`, `"t4"`, `"t4g"`, and `"v100"`.
-     */
-    name?: Input<ManagedGpuAcceleratorName | Input<ManagedGpuAcceleratorName>[]>;
-  }>;
-  infrastructureRole: ManagedRoleInput;
-  instanceRole?: ManagedRoleInput;
-  instanceProfile?: Input<string>;
-  storage?: Input<`${number} GB`>;
-}
-
-type ServiceSizingArgs = {
+type ManagedServiceArgs = {
+  gpu: Input<ManagedGpu>;
   cpu?: Input<`${number} vCPU`>;
   memory?: Input<`${number} GB`>;
+  storage?: Input<`${number} GB`>;
 };
 
 type ManagedTaskDefinitionArgs = {
@@ -98,6 +73,8 @@ type ManagedTaskDefinitionArgs = {
 
 type ManagedCapacityProviderArgs = {
   transform?: {
+    infrastructureRole?: Transform<iam.RoleArgs>;
+    instanceRole?: Transform<iam.RoleArgs>;
     capacityProvider?: Transform<ecs.CapacityProviderArgs>;
     instanceProfile?: Transform<iam.InstanceProfileArgs>;
   };
@@ -132,142 +109,74 @@ type NormalizedManagedCapacity = {
 
 export function normalizeManagedCapacity(
   name: string,
-  args: ManagedServiceCapacityArgs,
-  serviceSizing: ServiceSizingArgs,
+  args: ManagedServiceArgs,
 ) {
-  return all([serviceSizing.cpu, serviceSizing.memory]).apply(
-    ([serviceCpu, serviceMemory]) => {
-      const managed = args as ManagedServiceCapacityArgs & {
-        infrastructureRole?: string;
-        instanceRole?: string;
-        instanceProfile?: string;
-        storage?: `${number} GB`;
-        cpu?: { min: number; max?: number };
-        memory?: { min: `${number} GB`; max?: `${number} GB` };
-        gpu?: {
-          count?: number | { min: number; max?: number };
-          manufacturer?: (typeof managedGpuManufacturers)[number];
-          name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[];
-        };
-      };
-
-      if (!managed.infrastructureRole) {
-        throw new VisibleError(
-          `Missing \"capacity.managed.infrastructureRole\" for the \"${name}\" Service.`,
-        );
-      }
-
-      if (!managed.instanceRole && !managed.instanceProfile) {
-        throw new VisibleError(
-          `You must provide either \"capacity.managed.instanceRole\" or \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`,
-        );
-      }
-
-      if (managed.instanceRole && managed.instanceProfile) {
-        throw new VisibleError(
-          `Do not provide both \"capacity.managed.instanceRole\" and \"capacity.managed.instanceProfile\" for the \"${name}\" Service.`,
-        );
-      }
-
-      const hostCpu = normalizeHostCpu(managed.cpu, serviceCpu);
-      const hostMemory = normalizeHostMemory(managed.memory, serviceMemory);
-      const gpu = normalizeGpu(managed.gpu);
-      const hostStorage = normalizeStorage(managed.storage);
+  return all([args.gpu, args.cpu, args.memory, args.storage]).apply(
+    ([gpu, cpu, memory, storage]) => {
+      const hostCpu = normalizeHostCpu(cpu);
+      const hostMemory = normalizeHostMemory(memory);
+      const hostStorage = normalizeStorage(storage);
 
       return {
-        taskCpu: serviceCpu
-          ? toNumber(serviceCpu).toString()
-          : Math.round(hostCpu.min * 1024).toString(),
-        taskMemory: serviceMemory
-          ? toMBs(serviceMemory).toString()
-          : hostMemory.min.toString(),
+        taskCpu: cpu!,
+        taskMemory: memory!,
         hostCpu,
         hostMemory,
         hostStorage,
-        gpu,
+        gpu: normalizeGpu(gpu),
       } satisfies NormalizedManagedCapacity;
     },
   );
 
-  function normalizeHostCpu(
-    cpu: { min: number; max?: number } | undefined,
-    fallback?: `${number} vCPU`,
-  ) {
+  function normalizeHostCpu(cpu?: `${number} vCPU`) {
     if (cpu) {
-      validateRange("capacity.managed.cpu", cpu.min, cpu.max);
-      return { min: cpu.min, max: cpu.max };
-    }
-    if (fallback) {
-      const min = parseFloat(fallback.split(" ")[0]);
+      const min = parseFloat(cpu.split(" ")[0]);
       return { min, max: min };
     }
     throw new VisibleError(
-      `You must provide either \"capacity.managed.cpu\" or top-level \"cpu\" for managed instances.`,
+      `You must provide top-level \"cpu\" for the \"${name}\" Service when \"gpu\" is set.`,
     );
   }
 
-  function normalizeHostMemory(
-    memory: { min: `${number} GB`; max?: `${number} GB` } | undefined,
-    fallback?: `${number} GB`,
-  ) {
+  function normalizeHostMemory(memory?: `${number} GB`) {
     if (memory) {
-      const min = toMBs(memory.min);
-      const max = memory.max ? toMBs(memory.max) : undefined;
-      validateRange("capacity.managed.memory", min, max);
-      return { min, max };
-    }
-    if (fallback) {
-      const min = toMBs(fallback);
+      const min = toMBs(memory);
       return { min, max: min };
     }
     throw new VisibleError(
-      `You must provide either \"capacity.managed.memory\" or top-level \"memory\" for managed instances.`,
+      `You must provide top-level \"memory\" for the \"${name}\" Service when \"gpu\" is set.`,
     );
   }
 
-  function normalizeGpu(
-    gpu:
-      | {
-          count?: number | { min: number; max?: number };
-          manufacturer?: (typeof managedGpuManufacturers)[number];
-          name?: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[];
-        }
-      | undefined,
-  ) {
-    if (!gpu) return undefined;
-    const manufacturer = gpu.manufacturer ?? "nvidia";
+  function normalizeGpu(gpu: ManagedGpu) {
+    const [manufacturer, name] = gpu.split("/") as [
+      (typeof managedGpuManufacturers)[number],
+      ManagedGpuAcceleratorName,
+    ];
     if (!managedGpuManufacturers.includes(manufacturer)) {
       throw new VisibleError(
-        `Unsupported GPU manufacturer \"${manufacturer}\". The supported values are ${managedGpuManufacturers.join(", ")}.`,
+        `Unsupported GPU manufacturer \"${manufacturer}\". The supported values are ${managedGpuManufacturers.join(
+          ", ",
+        )}.`,
       );
     }
 
-    const count =
-      typeof gpu.count === "number"
-        ? { min: gpu.count, max: gpu.count }
-        : gpu.count
-          ? { min: gpu.count.min, max: gpu.count.max }
-          : { min: 1, max: 1 };
-
-    validateRange("capacity.managed.gpu.count", count.min, count.max);
-
     return {
-      count,
+      count: { min: 1, max: 1 },
       manufacturer,
-      names: normalizeGpuNames(gpu.name),
+      names: normalizeGpuNames(name),
     };
   }
 
-  function normalizeGpuNames(
-    name: ManagedGpuAcceleratorName | ManagedGpuAcceleratorName[] | undefined,
-  ) {
-    if (!name) return undefined;
-    const names = Array.isArray(name) ? name : [name];
+  function normalizeGpuNames(name: ManagedGpuAcceleratorName) {
+    const names = [name];
     const supported = Object.values(ManagedGpuAcceleratorName);
     const invalid = names.filter((name) => !supported.includes(name));
     if (invalid.length > 0) {
       throw new VisibleError(
-        `Unsupported GPU accelerator name ${invalid.map((name) => `"${name}"`).join(", ")}. The supported NVIDIA values are ${supported
+        `Unsupported GPU accelerator name ${invalid
+          .map((name) => `"${name}"`)
+          .join(", ")}. The supported NVIDIA values are ${supported
           .map((name) => `"${name}"`)
           .join(", ")}.`,
       );
@@ -280,56 +189,48 @@ export function normalizeManagedCapacity(
     const value = toGBs(storage);
     if (value <= 0) {
       throw new VisibleError(
-        `Invalid \"capacity.managed.storage\" value \"${storage}\". It must be greater than 0 GB.`,
+        `Invalid top-level \"storage\" value \"${storage}\" for the \"${name}\" Service. It must be greater than 0 GB.`,
       );
     }
     return value;
   }
-
-  function validateRange(label: string, min: number, max?: number) {
-    if (min <= 0) {
-      throw new VisibleError(`\"${label}.min\" must be greater than 0.`);
-    }
-    if (max !== undefined && max < min) {
-      throw new VisibleError(
-        `\"${label}.max\" must be greater than or equal to \"${label}.min\".`,
-      );
-    }
-  }
 }
 
 export function createManagedCapacityProvider(
   name: string,
-  args: ManagedCapacityProviderArgs & {
-    capacity: Input<ManagedServiceCapacityArgs>;
-  },
+  args: ManagedCapacityProviderArgs,
   opts: ComponentResourceOptions,
   parent: Component,
   clusterName: Output<string>,
   vpc: ManagedVpcArgs,
   normalized: Output<NormalizedManagedCapacity>,
 ) {
-  const infrastructureRole = iam.Role.get(
-    `${name}ManagedInfrastructureRole`,
-    output(args.capacity).apply((v) => v.infrastructureRole),
-    {},
-    { parent },
-  );
+  const partition = getPartitionOutput({}, opts).partition;
 
-  const instanceProfileArn = output(args.capacity).apply((v) => {
-    if (v.instanceProfile) return v.instanceProfile;
+  const infrastructureRole = new iam.Role(
+    ...transform(
+      args.transform?.infrastructureRole,
+      `${name}ManagedInfrastructureRole`,
+      {
+        assumeRolePolicy: iam.assumeRolePolicyForPrincipal({
+          Service: "ecs.amazonaws.com",
+        }),
+        managedPolicyArns: [
+          interpolate`arn:${partition}:iam::aws:policy/AmazonECSInfrastructureRolePolicyForManagedInstances`,
+        ],
+      },
+      { parent },
+    ),
+  );
 
-    return new iam.InstanceProfile(
-      ...transform(
-        args.transform?.instanceProfile,
-        `${name}ManagedInstanceProfile`,
-        {
-          role: output(v.instanceRole!).apply(extractRoleName),
-        },
-        { parent },
-      ),
-    ).arn;
-  });
+  const instanceProfileArn = getOrCreateManagedInstanceProfile(
+    name,
+    partition,
+    args.transform?.instanceRole,
+    args.transform?.instanceProfile,
+    parent,
+    opts,
+  ).arn;
 
   return new ecs.CapacityProvider(
     ...transform(
@@ -344,50 +245,60 @@ export function createManagedCapacityProvider(
           vpc.containerSubnets,
           vpc.securityGroups,
         ]).apply(
-          ([normalized, infrastructureRoleArn, instanceProfileArn, subnets, securityGroups]) => ({
+          ([
+            normalized,
             infrastructureRoleArn,
-            propagateTags: "CAPACITY_PROVIDER",
-            instanceLaunchTemplate: {
-              ec2InstanceProfileArn: instanceProfileArn,
-              networkConfiguration: {
-                subnets,
-                securityGroups,
-              },
-              ...(normalized.hostStorage
-                ? {
-                    storageConfiguration: {
-                      storageSizeGib: normalized.hostStorage,
-                    },
-                  }
-                : {}),
-              instanceRequirements: {
-                vcpuCount: {
-                  min: normalized.hostCpu.min,
-                  max: normalized.hostCpu.max,
+            instanceProfileArn,
+            subnets,
+            securityGroups,
+          ]) => {
+            const managedInstancesProvider = {
+              infrastructureRoleArn,
+              propagateTags: "CAPACITY_PROVIDER" as const,
+              instanceLaunchTemplate: {
+                ec2InstanceProfileArn: instanceProfileArn,
+                networkConfiguration: {
+                  subnets,
+                  securityGroups,
                 },
-                memoryMib: {
-                  min: normalized.hostMemory.min,
-                  max: normalized.hostMemory.max,
-                },
-                instanceGenerations: ["current"],
-                ...(normalized.gpu
+                ...(normalized.hostStorage
                   ? {
-                      acceleratorTypes: ["gpu"],
-                      acceleratorCount: {
-                        min: normalized.gpu.count.min,
-                        max: normalized.gpu.count.max,
+                      storageConfiguration: {
+                        storageSizeGib: normalized.hostStorage,
                       },
-                      acceleratorManufacturers: [normalized.gpu.manufacturer],
-                      ...(normalized.gpu.names
-                        ? {
-                            acceleratorNames: normalized.gpu.names,
-                          }
-                        : {}),
                     }
                   : {}),
+                instanceRequirements: {
+                  vcpuCount: {
+                    min: normalized.hostCpu.min,
+                    max: normalized.hostCpu.max,
+                  },
+                  memoryMib: {
+                    min: normalized.hostMemory.min,
+                    max: normalized.hostMemory.max,
+                  },
+                  instanceGenerations: ["current"],
+                  ...(normalized.gpu
+                    ? {
+                        acceleratorTypes: ["gpu"],
+                        acceleratorCount: {
+                          min: normalized.gpu.count.min,
+                          max: normalized.gpu.count.max,
+                        },
+                        acceleratorManufacturers: [normalized.gpu.manufacturer],
+                        ...(normalized.gpu.names
+                          ? {
+                              acceleratorNames: normalized.gpu.names,
+                            }
+                          : {}),
+                      }
+                    : {}),
+                },
               },
-            },
-          }),
+            };
+
+            return managedInstancesProvider;
+          },
         ),
       },
       { parent },
@@ -395,6 +306,61 @@ export function createManagedCapacityProvider(
   );
 }
 
+const sharedManagedInstanceProfileByProvider = new WeakMap<
+  object,
+  iam.InstanceProfile
+>();
+let defaultManagedInstanceProfile: iam.InstanceProfile | undefined;
+
+function getOrCreateManagedInstanceProfile(
+  name: string,
+  partition: Output<string>,
+  roleTransform: Transform<iam.RoleArgs> | undefined,
+  profileTransform: Transform<iam.InstanceProfileArgs> | undefined,
+  parent: Component,
+  opts: ComponentResourceOptions,
+) {
+  const provider = opts.provider;
+  const existing = provider
+    ? sharedManagedInstanceProfileByProvider.get(provider)
+    : defaultManagedInstanceProfile;
+  if (existing) return existing;
+
+  const role = new iam.Role(
+    ...transform(
+      roleTransform,
+      `${name}ManagedInstancesEcsInstanceRole`,
+      {
+        name: "ecsInstanceRole",
+        assumeRolePolicy: iam.assumeRolePolicyForPrincipal({
+          Service: "ec2.amazonaws.com",
+        }),
+        managedPolicyArns: [
+          interpolate`arn:${partition}:iam::aws:policy/AmazonECSInstanceRolePolicyForManagedInstances`,
+        ],
+      },
+      { parent },
+    ),
+  );
+
+  const profile = new iam.InstanceProfile(
+    ...transform(
+      profileTransform,
+      `${name}ManagedInstancesEcsInstanceProfile`,
+      {
+        name: "ecsInstanceRole",
+        role: role.name,
+      },
+      { parent },
+    ),
+  );
+
+  if (provider) sharedManagedInstanceProfileByProvider.set(provider, profile);
+  else defaultManagedInstanceProfile = profile;
+
+  return profile;
+}
+
 export function createManagedTaskDefinition(
   name: string,
   args: ManagedTaskDefinitionArgs,
@@ -422,10 +388,14 @@ export function createManagedTaskDefinition(
       return containers.map((container) => ({
         name: container.name,
         image: (() => {
-          if (typeof container.image === "string") return output(container.image);
+          if (typeof container.image === "string")
+            return output(container.image);
 
           const containerImage = container.image;
-          const contextPath = path.join($cli.paths.root, container.image.context);
+          const contextPath = path.join(
+            $cli.paths.root,
+            container.image.context,
+          );
           const dockerfile = container.image.dockerfile ?? "Dockerfile";
           const dockerfilePath = path.join(contextPath, dockerfile);
           const dockerIgnorePath = fs.existsSync(
@@ -553,10 +523,12 @@ export function createManagedTaskDefinition(
           sourceVolume: volume.efs.accessPoint,
           containerPath: volume.path,
         })),
-        secrets: Object.entries(container.ssm ?? {}).map(([name, valueFrom]) => ({
-          name,
-          valueFrom,
-        })),
+        secrets: Object.entries(container.ssm ?? {}).map(
+          ([name, valueFrom]) => ({
+            name,
+            valueFrom,
+          }),
+        ),
         resourceRequirements: normalized.gpu
           ? [{ type: "GPU", value: normalized.gpu.count.min.toString() }]
           : undefined,
@@ -608,15 +580,3 @@ export function createManagedTaskDefinition(
     ),
   );
 }
-
-export function isManagedCapacityInput(
-  capacity: unknown,
-): capacity is {
-  managed: Input<ManagedServiceCapacityArgs>;
-} {
-  return typeof capacity === "object" && capacity !== null && "managed" in capacity;
-}
-
-function extractRoleName(role: string) {
-  return role.split("/").pop()!;
-}
diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index dc8b811c88..2f47a727dc 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -39,8 +39,7 @@ import {
 import {
   createManagedCapacityProvider,
   createManagedTaskDefinition,
-  isManagedCapacityInput,
-  ManagedServiceCapacityArgs,
+  ManagedGpu,
   normalizeManagedCapacity,
 } from "./managed-instances.js";
 import { Dns } from "../dns.js";
@@ -340,10 +339,6 @@ interface ServiceContainerArgs extends FargateContainerArgs {
      */
     directory?: Input<string>;
   };
-  /**
-   * The number of GPUs to reserve for this container when using managed instances.
-   */
-  gpu?: Input<number>;
 }
 
 type ServiceFargateCapacity = {
@@ -661,437 +656,437 @@ export interface ServiceArgs extends FargateBaseArgs {
    */
   loadBalancer?: Input<
     | {
-    /**
-     * Configure if the load balancer should be public or private.
-     *
-     * When set to `false`, the load balancer endpoint will only be accessible within the
-     * VPC.
-     *
-     * @default `true`
-     */
-    public?: Input<boolean>;
-    /**
-     * Set a custom domain for your load balancer endpoint.
-     *
-     * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other
-     * providers, you'll need to pass in a `cert` that validates domain ownership and add the
-     * DNS records.
-     *
-     * :::tip
-     * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other
-     * providers.
-     * :::
-     *
-     * @example
-     *
-     * By default this assumes the domain is hosted on Route 53.
-     *
-     * ```js
-     * {
-     *   domain: "example.com"
-     * }
-     * ```
-     *
-     * For domains hosted on Cloudflare.
-     *
-     * ```js
-     * {
-     *   domain: {
-     *     name: "example.com",
-     *     dns: sst.cloudflare.dns()
-     *   }
-     * }
-     * ```
-     */
-    domain?: Input<
-      | string
-      | {
-          /**
-           * The custom domain you want to use.
-           *
-           * @example
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com"
-           *   }
-           * }
-           * ```
-           *
-           * Can also include subdomains based on the current stage.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: `${$app.stage}.example.com`
-           *   }
-           * }
-           * ```
-           *
-           * Wildcard domains are supported.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "*.example.com"
-           *   }
-           * }
-           * ```
-           */
-          name: Input<string>;
-          /**
-           * Alias domains that should be used.
-           *
-           * @example
-           * ```js {4}
-           * {
-           *   domain: {
-           *     name: "app1.example.com",
-           *     aliases: ["app2.example.com"]
-           *   }
-           * }
-           * ```
-           */
-          aliases?: Input<string[]>;
-          /**
-           * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the
-           * domain. By default, a certificate is created and validated automatically.
-           *
-           * :::tip
-           * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers.
-           * :::
-           *
-           * To manually set up a domain on an unsupported provider, you'll need to:
-           *
-           * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner.
-           * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`.
-           * 3. Add the DNS records in your provider to point to the load balancer endpoint.
-           *
-           * @example
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: false,
-           *     cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63"
-           *   }
-           * }
-           * ```
-           */
-          cert?: Input<string>;
-          /**
-           * The DNS provider to use for the domain. Defaults to the AWS.
-           *
-           * Takes an adapter that can create the DNS records on the provider. This can automate
-           * validating the domain and setting up the DNS routing.
-           *
-           * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need
-           * to set `dns` to `false` and pass in a certificate validating ownership via `cert`.
-           *
-           * @default `sst.aws.dns`
-           *
-           * @example
-           *
-           * Specify the hosted zone ID for the Route 53 domain.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: sst.aws.dns({
-           *       zone: "Z2FDTNDATAQYW2"
-           *     })
-           *   }
-           * }
-           * ```
-           *
-           * Use a domain hosted on Cloudflare, needs the Cloudflare provider.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: sst.cloudflare.dns()
-           *   }
-           * }
-           * ```
-           *
-           * Use a domain hosted on Vercel, needs the Vercel provider.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: sst.vercel.dns()
-           *   }
-           * }
-           * ```
-           */
-          dns?: Input<false | (Dns & {})>;
-        }
-    >;
-    /** @deprecated Use `rules` instead. */
-    ports?: Input<Prettify<ServiceRules>[]>;
-    /**
-     * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to
-     * the service.
-     * This supports two types of protocols:
-     *
-     * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html).
-     * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html).
-     *
-     * :::note
-     * If you want to listen on `https` or `tls`, you need to specify a custom
-     * `loadBalancer.domain`.
-     * :::
-     *
-     * You **can not configure** both application and network layer protocols for the same
-     * service.
-     *
-     * @example
-     * Here we are listening on port `80` and forwarding it to the service on port `8080`.
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", forward: "8080/http" }
-     *   ]
-     * }
-     * ```
-     *
-     * The `forward` port and protocol defaults to the `listen` port and protocol. So in this
-     * case both are `80/http`.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http" }
-     *   ]
-     * }
-     * ```
-     *
-     * If multiple containers are configured via the `containers` argument, you need to
-     * specify which container the traffic should be forwarded to.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", container: "app" },
-     *     { listen: "8000/http", container: "admin" }
-     *   ]
-     * }
-     * ```
-     *
-     * You can also route the same port to multiple containers via path-based routing.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     {
-     *       listen: "80/http",
-     *       container: "app",
-     *       conditions: { path: "/api/*" }
-     *     },
-     *     {
-     *       listen: "80/http",
-     *       container: "admin",
-     *       conditions: { path: "/admin/*" }
-     *     }
-     *   ]
-     * }
-     * ```
-     *
-     * Additionally, you can redirect traffic from one port to another. This is
-     * commonly used to redirect http to https.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", redirect: "443/https" },
-     *     { listen: "443/https", forward: "80/http" }
-     *   ]
-     * }
-     * ```
-     */
-    rules?: Input<Prettify<ServiceRules>[]>;
-    /**
-     * Configure the health check that the load balancer runs on your containers.
-     *
-     * :::tip
-     * This health check is different from the [`health`](#health) check.
-     * :::
-     *
-     * This health check is run by the load balancer. While, `health` is run by ECS. This
-     * cannot be disabled if you are using a load balancer. While the other is off by default.
-     *
-     * Since this cannot be disabled, here are some tips on how to debug an unhealthy
-     * health check.
-     *
-     * <details>
-     * <summary>How to debug a load balancer health check</summary>
-     *
-     * If you notice a `Unhealthy: Health checks failed` error, it's because the health
-     * check has failed. When it fails, the load balancer will terminate the containers,
-     * causing any requests to fail.
-     *
-     * Here's how to debug it:
-     *
-     * 1. Verify the health check path.
-     *
-     *    By default, the load balancer checks the `/` path. Ensure it's accessible in your
-     *    containers. If your application runs on a different path, then update the path in
-     *    the health check config accordingly.
-     *
-     * 2. Confirm the containers are operational.
-     *
-     *    Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** >
-     *    choose **Any desired status** under the **Filter desired status** dropdown > select
-     *    a task and check for errors under the **Logs tab**. If it has error that means that
-     *    the container failed to start.
-     *
-     * 3. If the container was terminated by the load balancer while still starting up, try
-     *    increasing the health check interval and timeout.
-     * </details>
-     *
-     * For `http` and `https` the default is:
-     *
-     * ```js
-     * {
-     *   path: "/",
-     *   healthyThreshold: 5,
-     *   successCodes: "200",
-     *   timeout: "5 seconds",
-     *   unhealthyThreshold: 2,
-     *   interval: "30 seconds"
-     * }
-     * ```
-     *
-     * For `tcp` and `udp` the default is:
-     *
-     * ```js
-     * {
-     *   healthyThreshold: 5,
-     *   timeout: "6 seconds",
-     *   unhealthyThreshold: 2,
-     *   interval: "30 seconds"
-     * }
-     * ```
-     *
-     * @example
-     *
-     * To configure the health check, we use the _port/protocol_ format. Here we are
-     * configuring a health check that pings the `/health` path on port `8080`
-     * every 10 seconds.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", forward: "8080/http" }
-     *   ],
-     *   health: {
-     *     "8080/http": {
-     *       path: "/health",
-     *       interval: "10 seconds"
-     *     }
-     *   }
-     * }
-     * ```
-     *
-     */
-    health?: Input<
-      Record<
-        Port,
-        Input<{
-          /**
-           * The URL path to ping on the service for health checks. Only applicable to
-           * `http` and `https` protocols.
-           * @default `"/"`
-           */
-          path?: Input<string>;
-          /**
-           * The time period between each health check request. Must be between `5 seconds`
-           * and `300 seconds`.
-           * @default `"30 seconds"`
-           */
-          interval?: Input<DurationMinutes>;
-          /**
-           * The timeout for each health check request. If no response is received within this
-           * time, it is considered failed. Must be between `2 seconds` and `120 seconds`.
-           * @default `"5 seconds"`
-           */
-          timeout?: Input<DurationMinutes>;
-          /**
-           * The number of consecutive successful health check requests required to consider the
-           * target healthy. Must be between 2 and 10.
-           * @default `5`
-           */
-          healthyThreshold?: Input<number>;
-          /**
-           * The number of consecutive failed health check requests required to consider the
-           * target unhealthy. Must be between 2 and 10.
-           * @default `2`
-           */
-          unhealthyThreshold?: Input<number>;
-          /**
-           * One or more HTTP response codes the health check treats as successful. Only
-           * applicable to `http` and `https` protocols.
-           *
-           * @default `"200"`
-           * @example
-           * ```js
-           * {
-           *   successCodes: "200-299"
-           * }
-           * ```
-           */
-          successCodes?: Input<string>;
-        }>
-      >
-    >;
-  }
+        /**
+         * Configure if the load balancer should be public or private.
+         *
+         * When set to `false`, the load balancer endpoint will only be accessible within the
+         * VPC.
+         *
+         * @default `true`
+         */
+        public?: Input<boolean>;
+        /**
+         * Set a custom domain for your load balancer endpoint.
+         *
+         * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other
+         * providers, you'll need to pass in a `cert` that validates domain ownership and add the
+         * DNS records.
+         *
+         * :::tip
+         * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other
+         * providers.
+         * :::
+         *
+         * @example
+         *
+         * By default this assumes the domain is hosted on Route 53.
+         *
+         * ```js
+         * {
+         *   domain: "example.com"
+         * }
+         * ```
+         *
+         * For domains hosted on Cloudflare.
+         *
+         * ```js
+         * {
+         *   domain: {
+         *     name: "example.com",
+         *     dns: sst.cloudflare.dns()
+         *   }
+         * }
+         * ```
+         */
+        domain?: Input<
+          | string
+          | {
+              /**
+               * The custom domain you want to use.
+               *
+               * @example
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com"
+               *   }
+               * }
+               * ```
+               *
+               * Can also include subdomains based on the current stage.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: `${$app.stage}.example.com`
+               *   }
+               * }
+               * ```
+               *
+               * Wildcard domains are supported.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "*.example.com"
+               *   }
+               * }
+               * ```
+               */
+              name: Input<string>;
+              /**
+               * Alias domains that should be used.
+               *
+               * @example
+               * ```js {4}
+               * {
+               *   domain: {
+               *     name: "app1.example.com",
+               *     aliases: ["app2.example.com"]
+               *   }
+               * }
+               * ```
+               */
+              aliases?: Input<string[]>;
+              /**
+               * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the
+               * domain. By default, a certificate is created and validated automatically.
+               *
+               * :::tip
+               * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers.
+               * :::
+               *
+               * To manually set up a domain on an unsupported provider, you'll need to:
+               *
+               * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner.
+               * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`.
+               * 3. Add the DNS records in your provider to point to the load balancer endpoint.
+               *
+               * @example
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: false,
+               *     cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63"
+               *   }
+               * }
+               * ```
+               */
+              cert?: Input<string>;
+              /**
+               * The DNS provider to use for the domain. Defaults to the AWS.
+               *
+               * Takes an adapter that can create the DNS records on the provider. This can automate
+               * validating the domain and setting up the DNS routing.
+               *
+               * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need
+               * to set `dns` to `false` and pass in a certificate validating ownership via `cert`.
+               *
+               * @default `sst.aws.dns`
+               *
+               * @example
+               *
+               * Specify the hosted zone ID for the Route 53 domain.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: sst.aws.dns({
+               *       zone: "Z2FDTNDATAQYW2"
+               *     })
+               *   }
+               * }
+               * ```
+               *
+               * Use a domain hosted on Cloudflare, needs the Cloudflare provider.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: sst.cloudflare.dns()
+               *   }
+               * }
+               * ```
+               *
+               * Use a domain hosted on Vercel, needs the Vercel provider.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: sst.vercel.dns()
+               *   }
+               * }
+               * ```
+               */
+              dns?: Input<false | (Dns & {})>;
+            }
+        >;
+        /** @deprecated Use `rules` instead. */
+        ports?: Input<Prettify<ServiceRules>[]>;
+        /**
+         * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to
+         * the service.
+         * This supports two types of protocols:
+         *
+         * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html).
+         * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html).
+         *
+         * :::note
+         * If you want to listen on `https` or `tls`, you need to specify a custom
+         * `loadBalancer.domain`.
+         * :::
+         *
+         * You **can not configure** both application and network layer protocols for the same
+         * service.
+         *
+         * @example
+         * Here we are listening on port `80` and forwarding it to the service on port `8080`.
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", forward: "8080/http" }
+         *   ]
+         * }
+         * ```
+         *
+         * The `forward` port and protocol defaults to the `listen` port and protocol. So in this
+         * case both are `80/http`.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http" }
+         *   ]
+         * }
+         * ```
+         *
+         * If multiple containers are configured via the `containers` argument, you need to
+         * specify which container the traffic should be forwarded to.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", container: "app" },
+         *     { listen: "8000/http", container: "admin" }
+         *   ]
+         * }
+         * ```
+         *
+         * You can also route the same port to multiple containers via path-based routing.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     {
+         *       listen: "80/http",
+         *       container: "app",
+         *       conditions: { path: "/api/*" }
+         *     },
+         *     {
+         *       listen: "80/http",
+         *       container: "admin",
+         *       conditions: { path: "/admin/*" }
+         *     }
+         *   ]
+         * }
+         * ```
+         *
+         * Additionally, you can redirect traffic from one port to another. This is
+         * commonly used to redirect http to https.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", redirect: "443/https" },
+         *     { listen: "443/https", forward: "80/http" }
+         *   ]
+         * }
+         * ```
+         */
+        rules?: Input<Prettify<ServiceRules>[]>;
+        /**
+         * Configure the health check that the load balancer runs on your containers.
+         *
+         * :::tip
+         * This health check is different from the [`health`](#health) check.
+         * :::
+         *
+         * This health check is run by the load balancer. While, `health` is run by ECS. This
+         * cannot be disabled if you are using a load balancer. While the other is off by default.
+         *
+         * Since this cannot be disabled, here are some tips on how to debug an unhealthy
+         * health check.
+         *
+         * <details>
+         * <summary>How to debug a load balancer health check</summary>
+         *
+         * If you notice a `Unhealthy: Health checks failed` error, it's because the health
+         * check has failed. When it fails, the load balancer will terminate the containers,
+         * causing any requests to fail.
+         *
+         * Here's how to debug it:
+         *
+         * 1. Verify the health check path.
+         *
+         *    By default, the load balancer checks the `/` path. Ensure it's accessible in your
+         *    containers. If your application runs on a different path, then update the path in
+         *    the health check config accordingly.
+         *
+         * 2. Confirm the containers are operational.
+         *
+         *    Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** >
+         *    choose **Any desired status** under the **Filter desired status** dropdown > select
+         *    a task and check for errors under the **Logs tab**. If it has error that means that
+         *    the container failed to start.
+         *
+         * 3. If the container was terminated by the load balancer while still starting up, try
+         *    increasing the health check interval and timeout.
+         * </details>
+         *
+         * For `http` and `https` the default is:
+         *
+         * ```js
+         * {
+         *   path: "/",
+         *   healthyThreshold: 5,
+         *   successCodes: "200",
+         *   timeout: "5 seconds",
+         *   unhealthyThreshold: 2,
+         *   interval: "30 seconds"
+         * }
+         * ```
+         *
+         * For `tcp` and `udp` the default is:
+         *
+         * ```js
+         * {
+         *   healthyThreshold: 5,
+         *   timeout: "6 seconds",
+         *   unhealthyThreshold: 2,
+         *   interval: "30 seconds"
+         * }
+         * ```
+         *
+         * @example
+         *
+         * To configure the health check, we use the _port/protocol_ format. Here we are
+         * configuring a health check that pings the `/health` path on port `8080`
+         * every 10 seconds.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", forward: "8080/http" }
+         *   ],
+         *   health: {
+         *     "8080/http": {
+         *       path: "/health",
+         *       interval: "10 seconds"
+         *     }
+         *   }
+         * }
+         * ```
+         *
+         */
+        health?: Input<
+          Record<
+            Port,
+            Input<{
+              /**
+               * The URL path to ping on the service for health checks. Only applicable to
+               * `http` and `https` protocols.
+               * @default `"/"`
+               */
+              path?: Input<string>;
+              /**
+               * The time period between each health check request. Must be between `5 seconds`
+               * and `300 seconds`.
+               * @default `"30 seconds"`
+               */
+              interval?: Input<DurationMinutes>;
+              /**
+               * The timeout for each health check request. If no response is received within this
+               * time, it is considered failed. Must be between `2 seconds` and `120 seconds`.
+               * @default `"5 seconds"`
+               */
+              timeout?: Input<DurationMinutes>;
+              /**
+               * The number of consecutive successful health check requests required to consider the
+               * target healthy. Must be between 2 and 10.
+               * @default `5`
+               */
+              healthyThreshold?: Input<number>;
+              /**
+               * The number of consecutive failed health check requests required to consider the
+               * target unhealthy. Must be between 2 and 10.
+               * @default `2`
+               */
+              unhealthyThreshold?: Input<number>;
+              /**
+               * One or more HTTP response codes the health check treats as successful. Only
+               * applicable to `http` and `https` protocols.
+               *
+               * @default `"200"`
+               * @example
+               * ```js
+               * {
+               *   successCodes: "200-299"
+               * }
+               * ```
+               */
+              successCodes?: Input<string>;
+            }>
+          >
+        >;
+      }
     | {
-    /**
-     * The `Alb` instance to attach this service to. When provided, the service creates
-     * target groups and listener rules on the shared ALB instead of creating its own
-     * load balancer.
-     *
-     * ECS tasks use the VPC's default security group, which allows all traffic within the
-     * VPC CIDR. For tighter security, add an explicit security group ingress rule from the
-     * ALB's security group using `transform`.
-     *
-     * @example
-     * ```js
-     * {
-     *   loadBalancer: {
-     *     instance: alb,
-     *     rules: [
-     *       { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 }
-     *     ]
-     *   }
-     * }
-     * ```
-     */
-    instance: Alb;
-    /**
-     * The rules for routing traffic from the ALB to this service's containers.
-     * Each rule must have explicit conditions and priority.
-     */
-    rules: Prettify<ServiceAlbRule>[];
-    /**
-     * Configure health checks for the target groups. Uses the same format as the inline
-     * health check config, keyed by `{port}/{protocol}`.
-     */
-    health?: Record<
-      AlbPort,
-      Input<{
-        path?: Input<string>;
-        interval?: Input<DurationMinutes>;
-        timeout?: Input<DurationMinutes>;
-        healthyThreshold?: Input<number>;
-        unhealthyThreshold?: Input<number>;
-        successCodes?: Input<string>;
-      }>
-    >;
-  }
+        /**
+         * The `Alb` instance to attach this service to. When provided, the service creates
+         * target groups and listener rules on the shared ALB instead of creating its own
+         * load balancer.
+         *
+         * ECS tasks use the VPC's default security group, which allows all traffic within the
+         * VPC CIDR. For tighter security, add an explicit security group ingress rule from the
+         * ALB's security group using `transform`.
+         *
+         * @example
+         * ```js
+         * {
+         *   loadBalancer: {
+         *     instance: alb,
+         *     rules: [
+         *       { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 }
+         *     ]
+         *   }
+         * }
+         * ```
+         */
+        instance: Alb;
+        /**
+         * The rules for routing traffic from the ALB to this service's containers.
+         * Each rule must have explicit conditions and priority.
+         */
+        rules: Prettify<ServiceAlbRule>[];
+        /**
+         * Configure health checks for the target groups. Uses the same format as the inline
+         * health check config, keyed by `{port}/{protocol}`.
+         */
+        health?: Record<
+          AlbPort,
+          Input<{
+            path?: Input<string>;
+            interval?: Input<DurationMinutes>;
+            timeout?: Input<DurationMinutes>;
+            healthyThreshold?: Input<number>;
+            unhealthyThreshold?: Input<number>;
+            successCodes?: Input<string>;
+          }>
+        >;
+      }
   >;
   /**
    * Configure the CloudMap service registry for the service.
@@ -1231,6 +1226,22 @@ export interface ServiceArgs extends FargateBaseArgs {
      */
     scaleOutCooldown?: Input<DurationMinutes>;
   }>;
+  /**
+   * Run this service on ECS Managed Instances with a GPU-enabled host.
+   *
+   * This automatically switches the service from Fargate to ECS Managed Instances.
+   * Use the top-level `cpu`, `memory`, and `storage` props to size the workload.
+   *
+   * @example
+   * ```js
+   * {
+   *   gpu: "nvidia/t4",
+   *   cpu: "4 vCPU",
+   *   memory: "16 GB"
+   * }
+   * ```
+   */
+  gpu?: Input<ManagedGpu>;
   /**
    * Configure the capacity provider; regular Fargate or Fargate Spot, for this service.
    *
@@ -1327,52 +1338,14 @@ export interface ServiceArgs extends FargateBaseArgs {
    *   regular Fargate.
    *
    *   ```js
-    *   {
-    *     capacity: {
-    *       fargate: { weight: 1, base: 2 },
-    *       spot: { weight: 1 }
-    *     }
-    *   }
-    *   ```
-    * - Use ECS Managed Instances for a CPU-only workload.
-    *
-    *   ```js
-    *   {
-    *     cpu: "1 vCPU",
-    *     memory: "2 GB",
-    *     capacity: {
-    *       managed: {
-    *         infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra",
-    *         instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed",
-    *         cpu: { min: 1, max: 4 },
-    *         memory: { min: "2 GB", max: "8 GB" }
-    *       }
-    *     }
-    *   }
-    *   ```
-    * - Use ECS Managed Instances for a GPU workload.
-    *
-    *   ```js
-    *   {
-    *     cpu: "4 vCPU",
-    *     memory: "16 GB",
-    *     containers: [{
-    *       name: "app",
-    *       gpu: 1,
-    *     }],
-    *     capacity: {
-    *       managed: {
-    *         infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra",
-    *         instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed",
-    *         gpu: {
-    *           count: 1,
-    *           name: "t4",
-    *         }
-    *       }
-    *     }
-    *   }
-    *   ```
-    */
+   *   {
+   *     capacity: {
+   *       fargate: { weight: 1, base: 2 },
+   *       spot: { weight: 1 }
+   *     }
+   *   }
+   *   ```
+   */
   capacity?: Input<
     | "spot"
     | (ServiceFargateCapacity & {
@@ -1410,19 +1383,8 @@ export interface ServiceArgs extends FargateBaseArgs {
            */
           weight: Input<number>;
         }>;
-        /**
-         * Configure ECS Managed Instances for this service.
-         */
         managed?: never;
       })
-    | {
-        /**
-         * Configure ECS Managed Instances for this service.
-         *
-         * This mode is exclusive and cannot be combined with `fargate` or `spot`.
-         */
-        managed?: Input<ManagedServiceCapacityArgs>;
-      }
   >;
   /**
    * Configure the health check that ECS runs on your containers.
@@ -1597,6 +1559,14 @@ export interface ServiceArgs extends FargateBaseArgs {
        * attaching to an external ALB via the `loadBalancer.instance` prop.
        */
       listenerRule?: Transform<lb.ListenerRuleArgs>;
+      /**
+       * Transform the IAM infrastructure role resource created for managed instances.
+       */
+      infrastructureRole?: Transform<iam.RoleArgs>;
+      /**
+       * Transform the IAM instance role resource created for managed instances.
+       */
+      instanceRole?: Transform<iam.RoleArgs>;
       /**
        * Transform the ECS managed instances capacity provider resource.
        */
@@ -1878,7 +1848,7 @@ export class Service extends Component implements Link.Linkable {
     this.taskRole = taskRole;
 
     if (dev) {
-      this.devUrl = (!lbArgs && !args.loadBalancer) ? undefined : dev.url;
+      this.devUrl = !lbArgs && !args.loadBalancer ? undefined : dev.url;
       registerReceiver();
       return;
     }
@@ -1888,8 +1858,9 @@ export class Service extends Component implements Link.Linkable {
       ? createManagedCapacityProvider(
           name,
           {
-            capacity: managed.capacity,
             transform: {
+              infrastructureRole: args.transform?.infrastructureRole,
+              instanceRole: args.transform?.instanceRole,
               capacityProvider: args.transform?.capacityProvider,
               instanceProfile: args.transform?.instanceProfile,
             },
@@ -1931,7 +1902,13 @@ export class Service extends Component implements Link.Linkable {
         );
     let loadBalancer: lb.LoadBalancer | undefined;
     let targetGroups: ReturnType<typeof createTargets>;
-    let targetEntries: Output<{ targetGroup: lb.TargetGroup; containerName: string; containerPort: number }[]>;
+    let targetEntries: Output<
+      {
+        targetGroup: lb.TargetGroup;
+        containerName: string;
+        containerPort: number;
+      }[]
+    >;
     let effectiveLbArn: Output<string> | undefined;
     let effectiveDomain: Output<string | undefined>;
     let effectiveDnsName: Output<string> | undefined;
@@ -1946,7 +1923,8 @@ export class Service extends Component implements Link.Linkable {
           }
         },
       );
-      const { targets: albTargets, entries: albEntries } = createAlbTargetsAndEntries(albAttachment);
+      const { targets: albTargets, entries: albEntries } =
+        createAlbTargetsAndEntries(albAttachment);
       targetGroups = output(albTargets);
       targetEntries = albEntries;
       createAlbListenerRules(albAttachment, albTargets);
@@ -1963,7 +1941,8 @@ export class Service extends Component implements Link.Linkable {
         effectiveLbArn = loadBalancer.arn;
         effectiveDnsName = loadBalancer.dnsName;
       }
-      effectiveDomain = lbArgs?.domain?.apply((d) => d?.name) ?? output(undefined);
+      effectiveDomain =
+        lbArgs?.domain?.apply((d) => d?.name) ?? output(undefined);
     }
     const cloudmapService = createCloudmapService();
     const service = createService();
@@ -1973,13 +1952,13 @@ export class Service extends Component implements Link.Linkable {
     this.cloudmapService = cloudmapService;
     this.executionRole = executionRole;
     this.taskDefinition = taskDefinition;
-    this.loadBalancer = loadBalancer ?? albAttachment?.instance.nodes.loadBalancer;
+    this.loadBalancer =
+      loadBalancer ?? albAttachment?.instance.nodes.loadBalancer;
     this.autoScalingTarget = autoScalingTarget;
     this.domain = effectiveDomain;
     this._url = effectiveDnsName
-      ? all([effectiveDomain, effectiveDnsName]).apply(
-          ([domain, dnsName]) =>
-            domain ? `https://${domain}/` : `http://${dnsName}`,
+      ? all([effectiveDomain, effectiveDnsName]).apply(([domain, dnsName]) =>
+          domain ? `https://${domain}/` : `http://${dnsName}`,
         )
       : undefined;
 
@@ -2021,7 +2000,9 @@ export class Service extends Component implements Link.Linkable {
 
     function normalizeScaling() {
       // External ALB is always "application" type
-      const lbType = albAttachment ? output("application" as const) : lbArgs?.type;
+      const lbType = albAttachment
+        ? output("application" as const)
+        : lbArgs?.type;
       return all([lbType, args.scaling]).apply(([type, v]) => {
         if (type !== "application" && v?.requestCount)
           throw new VisibleError(
@@ -2034,64 +2015,51 @@ export class Service extends Component implements Link.Linkable {
           cpuUtilization: v?.cpuUtilization ?? 70,
           memoryUtilization: v?.memoryUtilization ?? 70,
           requestCount: v?.requestCount ?? false,
-          scaleInCooldown: v?.scaleInCooldown ? toSeconds(v.scaleInCooldown) : undefined,
-          scaleOutCooldown: v?.scaleOutCooldown ? toSeconds(v.scaleOutCooldown) : undefined,
+          scaleInCooldown: v?.scaleInCooldown
+            ? toSeconds(v.scaleInCooldown)
+            : undefined,
+          scaleOutCooldown: v?.scaleOutCooldown
+            ? toSeconds(v.scaleOutCooldown)
+            : undefined,
         };
       });
     }
 
     function normalizeCapacity() {
+      if (args.gpu && args.capacity) {
+        throw new VisibleError(
+          `Do not combine top-level "gpu" with "capacity" in the "${name}" Service. GPU services use ECS Managed Instances automatically.`,
+        );
+      }
       if (!args.capacity) return;
 
-      return output(args.capacity).apply((v): ServiceFargateCapacity | undefined => {
-        if (isManagedCapacityInput(v)) return undefined;
-        if (v === "spot")
-          return { spot: { weight: 1 }, fargate: { weight: 0 } };
-        const fargateCapacity = v as ServiceFargateCapacity;
-        return {
-          fargate: fargateCapacity.fargate,
-          spot: fargateCapacity.spot,
-        };
-      });
+      return output(args.capacity).apply(
+        (v): ServiceFargateCapacity | undefined => {
+          if (v === "spot")
+            return { spot: { weight: 1 }, fargate: { weight: 0 } };
+          const fargateCapacity = v as ServiceFargateCapacity;
+          return {
+            fargate: fargateCapacity.fargate,
+            spot: fargateCapacity.spot,
+          };
+        },
+      );
     }
 
     function normalizeManaged() {
-      if (!args.capacity) return;
-
-      const managedCapacity = output(args.capacity).apply((v) => {
-        if (v === "spot" || !isManagedCapacityInput(v)) return;
+      if (!args.gpu) return;
 
-        if ("fargate" in v || "spot" in v) {
-          throw new VisibleError(
-            `Do not combine \"capacity.managed\" with \"capacity.fargate\" or \"capacity.spot\" in the \"${name}\" Service.`,
-          );
-        }
-
-        return v.managed;
+      const managedCapacity = output({
+        gpu: args.gpu,
+        cpu: args.cpu,
+        memory: args.memory,
+        storage: args.storage,
       });
 
       return {
-        capacity: managedCapacity.apply((v) => {
-          if (!v)
-            throw new VisibleError(
-              `Missing \"capacity.managed\" for the \"${name}\" Service.`,
-            );
-          return v;
-        }),
-        normalized: managedCapacity
-          .apply((v) => {
-            if (!v)
-              throw new VisibleError(
-                `Missing \"capacity.managed\" for the \"${name}\" Service.`,
-              );
-            return v;
-          })
-          .apply((managed) =>
-            normalizeManagedCapacity(name, managed, {
-              cpu: args.cpu,
-              memory: args.memory,
-            }),
-          ),
+        normalized: managedCapacity.apply((managed) =>
+          normalizeManagedCapacity(name, managed),
+        ),
       };
     }
 
@@ -2374,7 +2342,11 @@ export class Service extends Component implements Link.Linkable {
         const seen = new Set<string>();
         for (const rule of rules) {
           if (rule.type !== "forward") continue;
-          const targetId = targetKey(rule.container!, rule.forwardProtocol, rule.forwardPort);
+          const targetId = targetKey(
+            rule.container!,
+            rule.forwardProtocol,
+            rule.forwardPort,
+          );
           if (seen.has(targetId)) continue;
           seen.add(targetId);
           entries.push({
@@ -2561,47 +2533,48 @@ export class Service extends Component implements Link.Linkable {
                       capacityProviderStrategies: [
                         {
                           capacityProvider: managedCapacityProvider!.name,
+                          base: 1,
                           weight: 1,
                         },
                       ],
                     }
                   : capacity
-                  ? {
-                      // setting `forceNewDeployment` ensures that the service is not recreated
-                      // when the capacity provider config changes.
-                      forceNewDeployment: true,
-                      capacityProviderStrategies: capacity.apply((v) => {
-                        if (!v)
-                          throw new VisibleError(
-                            `Invalid Fargate capacity configuration for the \"${name}\" Service.`,
-                          );
-                        return [
-                          ...(v.fargate
-                            ? [
-                                {
-                                  capacityProvider: "FARGATE",
-                                  base: v.fargate?.base,
-                                  weight: v.fargate?.weight,
-                                },
-                              ]
-                            : []),
-                          ...(v.spot
-                            ? [
-                                {
-                                  capacityProvider: "FARGATE_SPOT",
-                                  base: v.spot?.base,
-                                  weight: v.spot?.weight,
-                                },
-                              ]
-                            : []),
-                        ];
+                    ? {
+                        // setting `forceNewDeployment` ensures that the service is not recreated
+                        // when the capacity provider config changes.
+                        forceNewDeployment: true,
+                        capacityProviderStrategies: capacity.apply((v) => {
+                          if (!v)
+                            throw new VisibleError(
+                              `Invalid Fargate capacity configuration for the \"${name}\" Service.`,
+                            );
+                          return [
+                            ...(v.fargate
+                              ? [
+                                  {
+                                    capacityProvider: "FARGATE",
+                                    base: v.fargate?.base,
+                                    weight: v.fargate?.weight,
+                                  },
+                                ]
+                              : []),
+                            ...(v.spot
+                              ? [
+                                  {
+                                    capacityProvider: "FARGATE_SPOT",
+                                    base: v.spot?.base,
+                                    weight: v.spot?.weight,
+                                  },
+                                ]
+                              : []),
+                          ];
+                        }),
+                      }
+                    : // @deprecated do not use `launchType`, set `capacityProviderStrategies`
+                      // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead
+                      {
+                        launchType: "FARGATE",
                       }),
-                    }
-                  : // @deprecated do not use `launchType`, set `capacityProviderStrategies`
-                    // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead
-                    {
-                      launchType: "FARGATE",
-                    }),
                 networkConfiguration: {
                   // If the vpc is an SST vpc, services are automatically deployed to the public
                   // subnets. So we need to assign a public IP for the service to be accessible.
@@ -2651,55 +2624,64 @@ export class Service extends Component implements Link.Linkable {
         ),
       );
 
-      all([scaling.cpuUtilization, scaling.scaleInCooldown, scaling.scaleOutCooldown]).apply(
-        ([cpuUtilization, scaleInCooldown, scaleOutCooldown]) => {
-          if (cpuUtilization === false) return;
-          new appautoscaling.Policy(
-            `${name}AutoScalingCpuPolicy`,
-            {
-              serviceNamespace: target.serviceNamespace,
-              scalableDimension: target.scalableDimension,
-              resourceId: target.resourceId,
-              policyType: "TargetTrackingScaling",
-              targetTrackingScalingPolicyConfiguration: {
-                predefinedMetricSpecification: {
-                  predefinedMetricType: "ECSServiceAverageCPUUtilization",
-                },
-                targetValue: cpuUtilization,
-                scaleInCooldown,
-                scaleOutCooldown,
+      all([
+        scaling.cpuUtilization,
+        scaling.scaleInCooldown,
+        scaling.scaleOutCooldown,
+      ]).apply(([cpuUtilization, scaleInCooldown, scaleOutCooldown]) => {
+        if (cpuUtilization === false) return;
+        new appautoscaling.Policy(
+          `${name}AutoScalingCpuPolicy`,
+          {
+            serviceNamespace: target.serviceNamespace,
+            scalableDimension: target.scalableDimension,
+            resourceId: target.resourceId,
+            policyType: "TargetTrackingScaling",
+            targetTrackingScalingPolicyConfiguration: {
+              predefinedMetricSpecification: {
+                predefinedMetricType: "ECSServiceAverageCPUUtilization",
               },
+              targetValue: cpuUtilization,
+              scaleInCooldown,
+              scaleOutCooldown,
             },
-            { parent: self },
-          );
-        }
-      );
+          },
+          { parent: self },
+        );
+      });
 
-      all([scaling.memoryUtilization, scaling.scaleInCooldown, scaling.scaleOutCooldown]).apply(
-        ([memoryUtilization, scaleInCooldown, scaleOutCooldown]) => {
-          if (memoryUtilization === false) return;
-          new appautoscaling.Policy(
-            `${name}AutoScalingMemoryPolicy`,
-            {
-              serviceNamespace: target.serviceNamespace,
-              scalableDimension: target.scalableDimension,
-              resourceId: target.resourceId,
-              policyType: "TargetTrackingScaling",
-              targetTrackingScalingPolicyConfiguration: {
-                predefinedMetricSpecification: {
-                  predefinedMetricType: "ECSServiceAverageMemoryUtilization",
-                },
-                targetValue: memoryUtilization,
-                scaleInCooldown,
-                scaleOutCooldown,
+      all([
+        scaling.memoryUtilization,
+        scaling.scaleInCooldown,
+        scaling.scaleOutCooldown,
+      ]).apply(([memoryUtilization, scaleInCooldown, scaleOutCooldown]) => {
+        if (memoryUtilization === false) return;
+        new appautoscaling.Policy(
+          `${name}AutoScalingMemoryPolicy`,
+          {
+            serviceNamespace: target.serviceNamespace,
+            scalableDimension: target.scalableDimension,
+            resourceId: target.resourceId,
+            policyType: "TargetTrackingScaling",
+            targetTrackingScalingPolicyConfiguration: {
+              predefinedMetricSpecification: {
+                predefinedMetricType: "ECSServiceAverageMemoryUtilization",
               },
+              targetValue: memoryUtilization,
+              scaleInCooldown,
+              scaleOutCooldown,
             },
-            { parent: self },
-          );
-        }
-      );
+          },
+          { parent: self },
+        );
+      });
 
-      all([scaling.requestCount, scaling.scaleInCooldown, scaling.scaleOutCooldown, targetGroups]).apply(
+      all([
+        scaling.requestCount,
+        scaling.scaleInCooldown,
+        scaling.scaleOutCooldown,
+        targetGroups,
+      ]).apply(
         ([requestCount, scaleInCooldown, scaleOutCooldown, targetGroups]) => {
           if (requestCount === false) return;
           if (!targetGroups) return;
@@ -2809,7 +2791,9 @@ export class Service extends Component implements Link.Linkable {
           const cn = rule.container ?? ctrs[0].name;
           if (!containerNames.has(cn)) {
             throw new VisibleError(
-              `Container "${cn}" in "loadBalancer.rules" does not match any container in Service "${name}". Available: ${[...containerNames].join(", ")}.`,
+              `Container "${cn}" in "loadBalancer.rules" does not match any container in Service "${name}". Available: ${[
+                ...containerNames,
+              ].join(", ")}.`,
             );
           }
         }
@@ -2829,7 +2813,11 @@ export class Service extends Component implements Link.Linkable {
         const forwardProtocol = parts[1].toUpperCase();
         // Use explicit container or component name for keying/naming
         const containerNameForKey = rule.container ?? name;
-        const tgtId = targetKey(containerNameForKey, forwardProtocol, forwardPort);
+        const tgtId = targetKey(
+          containerNameForKey,
+          forwardProtocol,
+          forwardPort,
+        );
 
         if (!targets[tgtId]) {
           const healthKey = `${forwardPort}/${parts[1]}` as AlbPort;
@@ -2902,8 +2890,7 @@ export class Service extends Component implements Link.Linkable {
           );
         }
 
-        const seen =
-          prioritiesByListener.get(rule.listen) ?? new Set();
+        const seen = prioritiesByListener.get(rule.listen) ?? new Set();
         if (seen.has(rule.priority)) {
           throw new VisibleError(
             `Duplicate priority ${rule.priority} on listener "${rule.listen}" in Service "${name}".`,
@@ -2930,7 +2917,11 @@ export class Service extends Component implements Link.Linkable {
         const forwardPort = parseInt(forwardParts[0]);
         const forwardProtocol = forwardParts[1].toUpperCase();
         const containerNameForKey = rule.container ?? name;
-        const tgtId = targetKey(containerNameForKey, forwardProtocol, forwardPort);
+        const tgtId = targetKey(
+          containerNameForKey,
+          forwardProtocol,
+          forwardPort,
+        );
 
         const targetGroup = albTargets[tgtId];
         if (!targetGroup) {
@@ -2939,13 +2930,17 @@ export class Service extends Component implements Link.Linkable {
           );
         }
 
-        const listenerResource =
-          attachment.instance.getListener(listenerProtocol, listenerPort);
+        const listenerResource = attachment.instance.getListener(
+          listenerProtocol,
+          listenerPort,
+        );
 
         new lb.ListenerRule(
           ...transform(
             args.transform?.listenerRule,
-            `${name}AlbRule${listenerProtocol.toUpperCase()}${listenerPort}P${rule.priority}`,
+            `${name}AlbRule${listenerProtocol.toUpperCase()}${listenerPort}P${
+              rule.priority
+            }`,
             {
               listenerArn: listenerResource.arn,
               priority: rule.priority,
@@ -3034,10 +3029,14 @@ export class Service extends Component implements Link.Linkable {
           throw new VisibleError(
             `Cannot access the AWS Cloud Map service name for the "${this._name}" Service. Cloud Map is not configured for the cluster.`,
           );
+        if (!service)
+          throw new VisibleError(
+            `Cannot access the AWS Cloud Map service name for the "${this._name}" Service. The Cloud Map service is not available.`,
+          );
 
         return this.dev
           ? interpolate`dev.${namespace}`
-          : interpolate`${service!.name}.${namespace}`;
+          : interpolate`${service.name}.${namespace}`;
       },
     );
   }
@@ -3123,8 +3122,13 @@ export class Service extends Component implements Link.Linkable {
     return {
       properties: {
         url: this.dev ? this.devUrl : this._url,
-        service: output(this.cloudmapNamespace).apply((namespace) =>
-          namespace ? this.service : undefined,
+        service: all([this.cloudmapNamespace, this.cloudmapService]).apply(
+          ([namespace, service]) =>
+            namespace && service
+              ? this.dev
+                ? `dev.${namespace}`
+                : `${service.name}.${namespace}`
+              : undefined,
         ),
       },
     };
diff --git a/platform/src/components/component.ts b/platform/src/components/component.ts
index 1aa5fea935..fe5b68a233 100644
--- a/platform/src/components/component.ts
+++ b/platform/src/components/component.ts
@@ -238,6 +238,7 @@ export class Component extends ComponentResource {
             "aws:ec2/subnet:Subnet": ["tags", 255],
             "aws:ec2/vpc:Vpc": ["tags", 255],
             "aws:ec2/vpcEndpoint:VpcEndpoint": ["tags", 255],
+            "aws:ecs/capacityProvider:CapacityProvider": ["name", 255],
             "aws:ecs/cluster:Cluster": ["name", 255],
             "aws:elasticache/parameterGroup:ParameterGroup": [
               "name",

From c569145e2d742279bb719623801272c1d3018e84 Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Tue, 24 Mar 2026 10:48:05 -0400
Subject: [PATCH 3/9] feat: add instance profile and infra role inputs

---
 .../src/components/aws/managed-instances.ts   | 85 ++++++++++++-------
 platform/src/components/aws/service.ts        | 21 +++--
 2 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts
index 6cc4ead302..8d1ca7ff71 100644
--- a/platform/src/components/aws/managed-instances.ts
+++ b/platform/src/components/aws/managed-instances.ts
@@ -53,6 +53,8 @@ type ManagedServiceArgs = {
   cpu?: Input<`${number} vCPU`>;
   memory?: Input<`${number} GB`>;
   storage?: Input<`${number} GB`>;
+  infrastructureRole?: Input<string>;
+  instanceProfile?: Input<string>;
 };
 
 type ManagedTaskDefinitionArgs = {
@@ -72,9 +74,10 @@ type ManagedTaskDefinitionArgs = {
 };
 
 type ManagedCapacityProviderArgs = {
+  infrastructureRole?: Input<string>;
+  instanceProfile?: Input<string>;
   transform?: {
     infrastructureRole?: Transform<iam.RoleArgs>;
-    instanceRole?: Transform<iam.RoleArgs>;
     capacityProvider?: Transform<ecs.CapacityProviderArgs>;
     instanceProfile?: Transform<iam.InstanceProfileArgs>;
   };
@@ -111,8 +114,25 @@ export function normalizeManagedCapacity(
   name: string,
   args: ManagedServiceArgs,
 ) {
-  return all([args.gpu, args.cpu, args.memory, args.storage]).apply(
-    ([gpu, cpu, memory, storage]) => {
+  return all([
+    args.gpu,
+    args.cpu,
+    args.memory,
+    args.storage,
+    args.infrastructureRole,
+    args.instanceProfile,
+  ]).apply(([gpu, cpu, memory, storage, infrastructureRole, instanceProfile]) => {
+      if (!infrastructureRole) {
+        throw new VisibleError(
+          `You must provide \"infrastructureRole\" for the \"${name}\" Service when \"gpu\" is set.`,
+        );
+      }
+      if (!instanceProfile) {
+        throw new VisibleError(
+          `You must provide \"instanceProfile\" for the \"${name}\" Service when \"gpu\" is set.`,
+        );
+      }
+
       const hostCpu = normalizeHostCpu(cpu);
       const hostMemory = normalizeHostMemory(memory);
       const hostStorage = normalizeStorage(storage);
@@ -125,8 +145,7 @@ export function normalizeManagedCapacity(
         hostStorage,
         gpu: normalizeGpu(gpu),
       } satisfies NormalizedManagedCapacity;
-    },
-  );
+    });
 
   function normalizeHostCpu(cpu?: `${number} vCPU`) {
     if (cpu) {
@@ -207,30 +226,33 @@ export function createManagedCapacityProvider(
 ) {
   const partition = getPartitionOutput({}, opts).partition;
 
-  const infrastructureRole = new iam.Role(
-    ...transform(
-      args.transform?.infrastructureRole,
-      `${name}ManagedInfrastructureRole`,
-      {
-        assumeRolePolicy: iam.assumeRolePolicyForPrincipal({
-          Service: "ecs.amazonaws.com",
-        }),
-        managedPolicyArns: [
-          interpolate`arn:${partition}:iam::aws:policy/AmazonECSInfrastructureRolePolicyForManagedInstances`,
-        ],
-      },
-      { parent },
-    ),
-  );
-
-  const instanceProfileArn = getOrCreateManagedInstanceProfile(
-    name,
-    partition,
-    args.transform?.instanceRole,
-    args.transform?.instanceProfile,
-    parent,
-    opts,
-  ).arn;
+  const infrastructureRoleArn = args.infrastructureRole
+    ? output(args.infrastructureRole)
+    : new iam.Role(
+        ...transform(
+          args.transform?.infrastructureRole,
+          `${name}ManagedInfrastructureRole`,
+          {
+            assumeRolePolicy: iam.assumeRolePolicyForPrincipal({
+              Service: "ecs.amazonaws.com",
+            }),
+            managedPolicyArns: [
+              interpolate`arn:${partition}:iam::aws:policy/AmazonECSInfrastructureRolePolicyForManagedInstances`,
+            ],
+          },
+          { parent },
+        ),
+      ).arn;
+
+  const instanceProfileArn = args.instanceProfile
+    ? output(args.instanceProfile)
+    : getOrCreateManagedInstanceProfile(
+        name,
+        partition,
+        args.transform?.instanceProfile,
+        parent,
+        opts,
+      ).arn;
 
   return new ecs.CapacityProvider(
     ...transform(
@@ -240,7 +262,7 @@ export function createManagedCapacityProvider(
         cluster: clusterName,
         managedInstancesProvider: all([
           normalized,
-          infrastructureRole.arn,
+          infrastructureRoleArn,
           instanceProfileArn,
           vpc.containerSubnets,
           vpc.securityGroups,
@@ -315,7 +337,6 @@ let defaultManagedInstanceProfile: iam.InstanceProfile | undefined;
 function getOrCreateManagedInstanceProfile(
   name: string,
   partition: Output<string>,
-  roleTransform: Transform<iam.RoleArgs> | undefined,
   profileTransform: Transform<iam.InstanceProfileArgs> | undefined,
   parent: Component,
   opts: ComponentResourceOptions,
@@ -328,7 +349,7 @@ function getOrCreateManagedInstanceProfile(
 
   const role = new iam.Role(
     ...transform(
-      roleTransform,
+      undefined,
       `${name}ManagedInstancesEcsInstanceRole`,
       {
         name: "ecsInstanceRole",
diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index 2f47a727dc..d3a825dfc3 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -1237,11 +1237,21 @@ export interface ServiceArgs extends FargateBaseArgs {
    * {
    *   gpu: "nvidia/t4",
    *   cpu: "4 vCPU",
-   *   memory: "16 GB"
+   *   memory: "16 GB",
+   *   infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra",
+   *   instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed"
    * }
    * ```
    */
   gpu?: Input<ManagedGpu>;
+  /**
+   * The ARN of an existing ECS infrastructure role to use for managed instances.
+   */
+  infrastructureRole?: Input<string>;
+  /**
+   * The ARN of an existing EC2 instance profile to use for managed instances.
+   */
+  instanceProfile?: Input<string>;
   /**
    * Configure the capacity provider; regular Fargate or Fargate Spot, for this service.
    *
@@ -1563,10 +1573,6 @@ export interface ServiceArgs extends FargateBaseArgs {
        * Transform the IAM infrastructure role resource created for managed instances.
        */
       infrastructureRole?: Transform<iam.RoleArgs>;
-      /**
-       * Transform the IAM instance role resource created for managed instances.
-       */
-      instanceRole?: Transform<iam.RoleArgs>;
       /**
        * Transform the ECS managed instances capacity provider resource.
        */
@@ -1858,9 +1864,10 @@ export class Service extends Component implements Link.Linkable {
       ? createManagedCapacityProvider(
           name,
           {
+            infrastructureRole: args.infrastructureRole,
+            instanceProfile: args.instanceProfile,
             transform: {
               infrastructureRole: args.transform?.infrastructureRole,
-              instanceRole: args.transform?.instanceRole,
               capacityProvider: args.transform?.capacityProvider,
               instanceProfile: args.transform?.instanceProfile,
             },
@@ -2054,6 +2061,8 @@ export class Service extends Component implements Link.Linkable {
         cpu: args.cpu,
         memory: args.memory,
         storage: args.storage,
+        infrastructureRole: args.infrastructureRole,
+        instanceProfile: args.instanceProfile,
       });
 
       return {

From 1697faccd62b1c56da7360e1e57b00c43a727700 Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Tue, 24 Mar 2026 12:35:54 -0400
Subject: [PATCH 4/9] fix: typecompletion for gpus

---
 .../src/components/aws/managed-instances.ts   | 52 +++++++++++++++++--
 platform/src/components/aws/service.ts        |  5 +-
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts
index 8d1ca7ff71..0ee9d5b6fd 100644
--- a/platform/src/components/aws/managed-instances.ts
+++ b/platform/src/components/aws/managed-instances.ts
@@ -29,17 +29,44 @@ import { bootstrap } from "./helpers/bootstrap.js";
 import { imageBuilder } from "./helpers/container-builder.js";
 import { normalizeContainers } from "./fargate.js";
 
-export const managedGpuManufacturers = ["nvidia"] as const;
+export const managedGpuManufacturers = [
+  "amazon-web-services",
+  "amd",
+  "nvidia",
+  "xilinx",
+  "habana",
+] as const;
 export const ManagedGpuAcceleratorName = {
   A100: "a100",
   A10G: "a10g",
   H100: "h100",
+  INFERENTIA: "inferentia",
   K520: "k520",
   K80: "k80",
   M60: "m60",
+  RADEON_PRO_V520: "radeon-pro-v520",
   T4: "t4",
   T4G: "t4g",
   V100: "v100",
+  VU9P: "vu9p",
+} as const;
+
+const managedGpuManufacturerNames = {
+  "amazon-web-services": [ManagedGpuAcceleratorName.INFERENTIA],
+  amd: [ManagedGpuAcceleratorName.RADEON_PRO_V520],
+  nvidia: [
+    ManagedGpuAcceleratorName.A100,
+    ManagedGpuAcceleratorName.A10G,
+    ManagedGpuAcceleratorName.H100,
+    ManagedGpuAcceleratorName.K520,
+    ManagedGpuAcceleratorName.K80,
+    ManagedGpuAcceleratorName.M60,
+    ManagedGpuAcceleratorName.T4,
+    ManagedGpuAcceleratorName.T4G,
+    ManagedGpuAcceleratorName.V100,
+  ],
+  xilinx: [ManagedGpuAcceleratorName.VU9P],
+  habana: [],
 } as const;
 
 export type ManagedGpuAcceleratorName =
@@ -183,11 +210,14 @@ export function normalizeManagedCapacity(
     return {
       count: { min: 1, max: 1 },
       manufacturer,
-      names: normalizeGpuNames(name),
+      names: normalizeGpuNames(manufacturer, name),
     };
   }
 
-  function normalizeGpuNames(name: ManagedGpuAcceleratorName) {
+  function normalizeGpuNames(
+    manufacturer: (typeof managedGpuManufacturers)[number],
+    name: ManagedGpuAcceleratorName,
+  ) {
     const names = [name];
     const supported = Object.values(ManagedGpuAcceleratorName);
     const invalid = names.filter((name) => !supported.includes(name));
@@ -195,11 +225,25 @@ export function normalizeManagedCapacity(
       throw new VisibleError(
         `Unsupported GPU accelerator name ${invalid
           .map((name) => `"${name}"`)
-          .join(", ")}. The supported NVIDIA values are ${supported
+          .join(", ")}. The supported values are ${supported
           .map((name) => `"${name}"`)
           .join(", ")}.`,
       );
     }
+
+    const supportedForManufacturer = managedGpuManufacturerNames[
+      manufacturer
+    ] as readonly ManagedGpuAcceleratorName[];
+    if (!supportedForManufacturer.includes(name)) {
+      const validNames = supportedForManufacturer
+        .map((name) => `"${name}"`)
+        .join(", ");
+      throw new VisibleError(
+        supportedForManufacturer.length > 0
+          ? `Unsupported GPU accelerator \"${manufacturer}/${name}\". The supported values for \"${manufacturer}\" are ${validNames}.`
+          : `Unsupported GPU accelerator \"${manufacturer}/${name}\". No accelerator names are currently supported for \"${manufacturer}\".`,
+      );
+    }
     return names;
   }
 
diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index d3a825dfc3..7dc9348dc4 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -1242,7 +1242,10 @@ export interface ServiceArgs extends FargateBaseArgs {
    *   instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed"
    * }
    * ```
-   */
+   *
+   * The GPU value must be in the form `<manufacturer>/<name>`. Valid manufacturers are
+   * `amazon-web-services`, `amd`, `nvidia`, `xilinx`, and `habana`.
+    */
   gpu?: Input<ManagedGpu>;
   /**
    * The ARN of an existing ECS infrastructure role to use for managed instances.

From b2c36899f60a947974ccf74b41dd58d3fdbb328a Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Tue, 24 Mar 2026 12:52:26 -0400
Subject: [PATCH 5/9] fix: whitespaces in service

---
 platform/src/components/aws/service.ts | 862 ++++++++++++-------------
 1 file changed, 431 insertions(+), 431 deletions(-)

diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index 7dc9348dc4..6f646664d2 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -656,437 +656,437 @@ export interface ServiceArgs extends FargateBaseArgs {
    */
   loadBalancer?: Input<
     | {
-        /**
-         * Configure if the load balancer should be public or private.
-         *
-         * When set to `false`, the load balancer endpoint will only be accessible within the
-         * VPC.
-         *
-         * @default `true`
-         */
-        public?: Input<boolean>;
-        /**
-         * Set a custom domain for your load balancer endpoint.
-         *
-         * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other
-         * providers, you'll need to pass in a `cert` that validates domain ownership and add the
-         * DNS records.
-         *
-         * :::tip
-         * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other
-         * providers.
-         * :::
-         *
-         * @example
-         *
-         * By default this assumes the domain is hosted on Route 53.
-         *
-         * ```js
-         * {
-         *   domain: "example.com"
-         * }
-         * ```
-         *
-         * For domains hosted on Cloudflare.
-         *
-         * ```js
-         * {
-         *   domain: {
-         *     name: "example.com",
-         *     dns: sst.cloudflare.dns()
-         *   }
-         * }
-         * ```
-         */
-        domain?: Input<
-          | string
-          | {
-              /**
-               * The custom domain you want to use.
-               *
-               * @example
-               * ```js
-               * {
-               *   domain: {
-               *     name: "example.com"
-               *   }
-               * }
-               * ```
-               *
-               * Can also include subdomains based on the current stage.
-               *
-               * ```js
-               * {
-               *   domain: {
-               *     name: `${$app.stage}.example.com`
-               *   }
-               * }
-               * ```
-               *
-               * Wildcard domains are supported.
-               *
-               * ```js
-               * {
-               *   domain: {
-               *     name: "*.example.com"
-               *   }
-               * }
-               * ```
-               */
-              name: Input<string>;
-              /**
-               * Alias domains that should be used.
-               *
-               * @example
-               * ```js {4}
-               * {
-               *   domain: {
-               *     name: "app1.example.com",
-               *     aliases: ["app2.example.com"]
-               *   }
-               * }
-               * ```
-               */
-              aliases?: Input<string[]>;
-              /**
-               * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the
-               * domain. By default, a certificate is created and validated automatically.
-               *
-               * :::tip
-               * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers.
-               * :::
-               *
-               * To manually set up a domain on an unsupported provider, you'll need to:
-               *
-               * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner.
-               * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`.
-               * 3. Add the DNS records in your provider to point to the load balancer endpoint.
-               *
-               * @example
-               * ```js
-               * {
-               *   domain: {
-               *     name: "example.com",
-               *     dns: false,
-               *     cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63"
-               *   }
-               * }
-               * ```
-               */
-              cert?: Input<string>;
-              /**
-               * The DNS provider to use for the domain. Defaults to the AWS.
-               *
-               * Takes an adapter that can create the DNS records on the provider. This can automate
-               * validating the domain and setting up the DNS routing.
-               *
-               * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need
-               * to set `dns` to `false` and pass in a certificate validating ownership via `cert`.
-               *
-               * @default `sst.aws.dns`
-               *
-               * @example
-               *
-               * Specify the hosted zone ID for the Route 53 domain.
-               *
-               * ```js
-               * {
-               *   domain: {
-               *     name: "example.com",
-               *     dns: sst.aws.dns({
-               *       zone: "Z2FDTNDATAQYW2"
-               *     })
-               *   }
-               * }
-               * ```
-               *
-               * Use a domain hosted on Cloudflare, needs the Cloudflare provider.
-               *
-               * ```js
-               * {
-               *   domain: {
-               *     name: "example.com",
-               *     dns: sst.cloudflare.dns()
-               *   }
-               * }
-               * ```
-               *
-               * Use a domain hosted on Vercel, needs the Vercel provider.
-               *
-               * ```js
-               * {
-               *   domain: {
-               *     name: "example.com",
-               *     dns: sst.vercel.dns()
-               *   }
-               * }
-               * ```
-               */
-              dns?: Input<false | (Dns & {})>;
-            }
-        >;
-        /** @deprecated Use `rules` instead. */
-        ports?: Input<Prettify<ServiceRules>[]>;
-        /**
-         * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to
-         * the service.
-         * This supports two types of protocols:
-         *
-         * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html).
-         * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html).
-         *
-         * :::note
-         * If you want to listen on `https` or `tls`, you need to specify a custom
-         * `loadBalancer.domain`.
-         * :::
-         *
-         * You **can not configure** both application and network layer protocols for the same
-         * service.
-         *
-         * @example
-         * Here we are listening on port `80` and forwarding it to the service on port `8080`.
-         * ```js
-         * {
-         *   rules: [
-         *     { listen: "80/http", forward: "8080/http" }
-         *   ]
-         * }
-         * ```
-         *
-         * The `forward` port and protocol defaults to the `listen` port and protocol. So in this
-         * case both are `80/http`.
-         *
-         * ```js
-         * {
-         *   rules: [
-         *     { listen: "80/http" }
-         *   ]
-         * }
-         * ```
-         *
-         * If multiple containers are configured via the `containers` argument, you need to
-         * specify which container the traffic should be forwarded to.
-         *
-         * ```js
-         * {
-         *   rules: [
-         *     { listen: "80/http", container: "app" },
-         *     { listen: "8000/http", container: "admin" }
-         *   ]
-         * }
-         * ```
-         *
-         * You can also route the same port to multiple containers via path-based routing.
-         *
-         * ```js
-         * {
-         *   rules: [
-         *     {
-         *       listen: "80/http",
-         *       container: "app",
-         *       conditions: { path: "/api/*" }
-         *     },
-         *     {
-         *       listen: "80/http",
-         *       container: "admin",
-         *       conditions: { path: "/admin/*" }
-         *     }
-         *   ]
-         * }
-         * ```
-         *
-         * Additionally, you can redirect traffic from one port to another. This is
-         * commonly used to redirect http to https.
-         *
-         * ```js
-         * {
-         *   rules: [
-         *     { listen: "80/http", redirect: "443/https" },
-         *     { listen: "443/https", forward: "80/http" }
-         *   ]
-         * }
-         * ```
-         */
-        rules?: Input<Prettify<ServiceRules>[]>;
-        /**
-         * Configure the health check that the load balancer runs on your containers.
-         *
-         * :::tip
-         * This health check is different from the [`health`](#health) check.
-         * :::
-         *
-         * This health check is run by the load balancer. While, `health` is run by ECS. This
-         * cannot be disabled if you are using a load balancer. While the other is off by default.
-         *
-         * Since this cannot be disabled, here are some tips on how to debug an unhealthy
-         * health check.
-         *
-         * <details>
-         * <summary>How to debug a load balancer health check</summary>
-         *
-         * If you notice a `Unhealthy: Health checks failed` error, it's because the health
-         * check has failed. When it fails, the load balancer will terminate the containers,
-         * causing any requests to fail.
-         *
-         * Here's how to debug it:
-         *
-         * 1. Verify the health check path.
-         *
-         *    By default, the load balancer checks the `/` path. Ensure it's accessible in your
-         *    containers. If your application runs on a different path, then update the path in
-         *    the health check config accordingly.
-         *
-         * 2. Confirm the containers are operational.
-         *
-         *    Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** >
-         *    choose **Any desired status** under the **Filter desired status** dropdown > select
-         *    a task and check for errors under the **Logs tab**. If it has error that means that
-         *    the container failed to start.
-         *
-         * 3. If the container was terminated by the load balancer while still starting up, try
-         *    increasing the health check interval and timeout.
-         * </details>
-         *
-         * For `http` and `https` the default is:
-         *
-         * ```js
-         * {
-         *   path: "/",
-         *   healthyThreshold: 5,
-         *   successCodes: "200",
-         *   timeout: "5 seconds",
-         *   unhealthyThreshold: 2,
-         *   interval: "30 seconds"
-         * }
-         * ```
-         *
-         * For `tcp` and `udp` the default is:
-         *
-         * ```js
-         * {
-         *   healthyThreshold: 5,
-         *   timeout: "6 seconds",
-         *   unhealthyThreshold: 2,
-         *   interval: "30 seconds"
-         * }
-         * ```
-         *
-         * @example
-         *
-         * To configure the health check, we use the _port/protocol_ format. Here we are
-         * configuring a health check that pings the `/health` path on port `8080`
-         * every 10 seconds.
-         *
-         * ```js
-         * {
-         *   rules: [
-         *     { listen: "80/http", forward: "8080/http" }
-         *   ],
-         *   health: {
-         *     "8080/http": {
-         *       path: "/health",
-         *       interval: "10 seconds"
-         *     }
-         *   }
-         * }
-         * ```
-         *
-         */
-        health?: Input<
-          Record<
-            Port,
-            Input<{
-              /**
-               * The URL path to ping on the service for health checks. Only applicable to
-               * `http` and `https` protocols.
-               * @default `"/"`
-               */
-              path?: Input<string>;
-              /**
-               * The time period between each health check request. Must be between `5 seconds`
-               * and `300 seconds`.
-               * @default `"30 seconds"`
-               */
-              interval?: Input<DurationMinutes>;
-              /**
-               * The timeout for each health check request. If no response is received within this
-               * time, it is considered failed. Must be between `2 seconds` and `120 seconds`.
-               * @default `"5 seconds"`
-               */
-              timeout?: Input<DurationMinutes>;
-              /**
-               * The number of consecutive successful health check requests required to consider the
-               * target healthy. Must be between 2 and 10.
-               * @default `5`
-               */
-              healthyThreshold?: Input<number>;
-              /**
-               * The number of consecutive failed health check requests required to consider the
-               * target unhealthy. Must be between 2 and 10.
-               * @default `2`
-               */
-              unhealthyThreshold?: Input<number>;
-              /**
-               * One or more HTTP response codes the health check treats as successful. Only
-               * applicable to `http` and `https` protocols.
-               *
-               * @default `"200"`
-               * @example
-               * ```js
-               * {
-               *   successCodes: "200-299"
-               * }
-               * ```
-               */
-              successCodes?: Input<string>;
-            }>
-          >
-        >;
-      }
+    /**
+     * Configure if the load balancer should be public or private.
+     *
+     * When set to `false`, the load balancer endpoint will only be accessible within the
+     * VPC.
+     *
+     * @default `true`
+     */
+    public?: Input<boolean>;
+    /**
+     * Set a custom domain for your load balancer endpoint.
+     *
+     * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other
+     * providers, you'll need to pass in a `cert` that validates domain ownership and add the
+     * DNS records.
+     *
+     * :::tip
+     * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other
+     * providers.
+     * :::
+     *
+     * @example
+     *
+     * By default this assumes the domain is hosted on Route 53.
+     *
+     * ```js
+     * {
+     *   domain: "example.com"
+     * }
+     * ```
+     *
+     * For domains hosted on Cloudflare.
+     *
+     * ```js
+     * {
+     *   domain: {
+     *     name: "example.com",
+     *     dns: sst.cloudflare.dns()
+     *   }
+     * }
+     * ```
+     */
+    domain?: Input<
+      | string
+      | {
+          /**
+           * The custom domain you want to use.
+           *
+           * @example
+           * ```js
+           * {
+           *   domain: {
+           *     name: "example.com"
+           *   }
+           * }
+           * ```
+           *
+           * Can also include subdomains based on the current stage.
+           *
+           * ```js
+           * {
+           *   domain: {
+           *     name: `${$app.stage}.example.com`
+           *   }
+           * }
+           * ```
+           *
+           * Wildcard domains are supported.
+           *
+           * ```js
+           * {
+           *   domain: {
+           *     name: "*.example.com"
+           *   }
+           * }
+           * ```
+           */
+          name: Input<string>;
+          /**
+           * Alias domains that should be used.
+           *
+           * @example
+           * ```js {4}
+           * {
+           *   domain: {
+           *     name: "app1.example.com",
+           *     aliases: ["app2.example.com"]
+           *   }
+           * }
+           * ```
+           */
+          aliases?: Input<string[]>;
+          /**
+           * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the
+           * domain. By default, a certificate is created and validated automatically.
+           *
+           * :::tip
+           * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers.
+           * :::
+           *
+           * To manually set up a domain on an unsupported provider, you'll need to:
+           *
+           * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner.
+           * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`.
+           * 3. Add the DNS records in your provider to point to the load balancer endpoint.
+           *
+           * @example
+           * ```js
+           * {
+           *   domain: {
+           *     name: "example.com",
+           *     dns: false,
+           *     cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63"
+           *   }
+           * }
+           * ```
+           */
+          cert?: Input<string>;
+          /**
+           * The DNS provider to use for the domain. Defaults to the AWS.
+           *
+           * Takes an adapter that can create the DNS records on the provider. This can automate
+           * validating the domain and setting up the DNS routing.
+           *
+           * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need
+           * to set `dns` to `false` and pass in a certificate validating ownership via `cert`.
+           *
+           * @default `sst.aws.dns`
+           *
+           * @example
+           *
+           * Specify the hosted zone ID for the Route 53 domain.
+           *
+           * ```js
+           * {
+           *   domain: {
+           *     name: "example.com",
+           *     dns: sst.aws.dns({
+           *       zone: "Z2FDTNDATAQYW2"
+           *     })
+           *   }
+           * }
+           * ```
+           *
+           * Use a domain hosted on Cloudflare, needs the Cloudflare provider.
+           *
+           * ```js
+           * {
+           *   domain: {
+           *     name: "example.com",
+           *     dns: sst.cloudflare.dns()
+           *   }
+           * }
+           * ```
+           *
+           * Use a domain hosted on Vercel, needs the Vercel provider.
+           *
+           * ```js
+           * {
+           *   domain: {
+           *     name: "example.com",
+           *     dns: sst.vercel.dns()
+           *   }
+           * }
+           * ```
+           */
+          dns?: Input<false | (Dns & {})>;
+        }
+    >;
+    /** @deprecated Use `rules` instead. */
+    ports?: Input<Prettify<ServiceRules>[]>;
+    /**
+     * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to
+     * the service.
+     * This supports two types of protocols:
+     *
+     * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html).
+     * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html).
+     *
+     * :::note
+     * If you want to listen on `https` or `tls`, you need to specify a custom
+     * `loadBalancer.domain`.
+     * :::
+     *
+     * You **can not configure** both application and network layer protocols for the same
+     * service.
+     *
+     * @example
+     * Here we are listening on port `80` and forwarding it to the service on port `8080`.
+     * ```js
+     * {
+     *   rules: [
+     *     { listen: "80/http", forward: "8080/http" }
+     *   ]
+     * }
+     * ```
+     *
+     * The `forward` port and protocol defaults to the `listen` port and protocol. So in this
+     * case both are `80/http`.
+     *
+     * ```js
+     * {
+     *   rules: [
+     *     { listen: "80/http" }
+     *   ]
+     * }
+     * ```
+     *
+     * If multiple containers are configured via the `containers` argument, you need to
+     * specify which container the traffic should be forwarded to.
+     *
+     * ```js
+     * {
+     *   rules: [
+     *     { listen: "80/http", container: "app" },
+     *     { listen: "8000/http", container: "admin" }
+     *   ]
+     * }
+     * ```
+     *
+     * You can also route the same port to multiple containers via path-based routing.
+     *
+     * ```js
+     * {
+     *   rules: [
+     *     {
+     *       listen: "80/http",
+     *       container: "app",
+     *       conditions: { path: "/api/*" }
+     *     },
+     *     {
+     *       listen: "80/http",
+     *       container: "admin",
+     *       conditions: { path: "/admin/*" }
+     *     }
+     *   ]
+     * }
+     * ```
+     *
+     * Additionally, you can redirect traffic from one port to another. This is
+     * commonly used to redirect http to https.
+     *
+     * ```js
+     * {
+     *   rules: [
+     *     { listen: "80/http", redirect: "443/https" },
+     *     { listen: "443/https", forward: "80/http" }
+     *   ]
+     * }
+     * ```
+     */
+    rules?: Input<Prettify<ServiceRules>[]>;
+    /**
+     * Configure the health check that the load balancer runs on your containers.
+     *
+     * :::tip
+     * This health check is different from the [`health`](#health) check.
+     * :::
+     *
+     * This health check is run by the load balancer. While, `health` is run by ECS. This
+     * cannot be disabled if you are using a load balancer. While the other is off by default.
+     *
+     * Since this cannot be disabled, here are some tips on how to debug an unhealthy
+     * health check.
+     *
+     * <details>
+     * <summary>How to debug a load balancer health check</summary>
+     *
+     * If you notice a `Unhealthy: Health checks failed` error, it's because the health
+     * check has failed. When it fails, the load balancer will terminate the containers,
+     * causing any requests to fail.
+     *
+     * Here's how to debug it:
+     *
+     * 1. Verify the health check path.
+     *
+     *    By default, the load balancer checks the `/` path. Ensure it's accessible in your
+     *    containers. If your application runs on a different path, then update the path in
+     *    the health check config accordingly.
+     *
+     * 2. Confirm the containers are operational.
+     *
+     *    Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** >
+     *    choose **Any desired status** under the **Filter desired status** dropdown > select
+     *    a task and check for errors under the **Logs tab**. If it has error that means that
+     *    the container failed to start.
+     *
+     * 3. If the container was terminated by the load balancer while still starting up, try
+     *    increasing the health check interval and timeout.
+     * </details>
+     *
+     * For `http` and `https` the default is:
+     *
+     * ```js
+     * {
+     *   path: "/",
+     *   healthyThreshold: 5,
+     *   successCodes: "200",
+     *   timeout: "5 seconds",
+     *   unhealthyThreshold: 2,
+     *   interval: "30 seconds"
+     * }
+     * ```
+     *
+     * For `tcp` and `udp` the default is:
+     *
+     * ```js
+     * {
+     *   healthyThreshold: 5,
+     *   timeout: "6 seconds",
+     *   unhealthyThreshold: 2,
+     *   interval: "30 seconds"
+     * }
+     * ```
+     *
+     * @example
+     *
+     * To configure the health check, we use the _port/protocol_ format. Here we are
+     * configuring a health check that pings the `/health` path on port `8080`
+     * every 10 seconds.
+     *
+     * ```js
+     * {
+     *   rules: [
+     *     { listen: "80/http", forward: "8080/http" }
+     *   ],
+     *   health: {
+     *     "8080/http": {
+     *       path: "/health",
+     *       interval: "10 seconds"
+     *     }
+     *   }
+     * }
+     * ```
+     *
+     */
+    health?: Input<
+      Record<
+        Port,
+        Input<{
+          /**
+           * The URL path to ping on the service for health checks. Only applicable to
+           * `http` and `https` protocols.
+           * @default `"/"`
+           */
+          path?: Input<string>;
+          /**
+           * The time period between each health check request. Must be between `5 seconds`
+           * and `300 seconds`.
+           * @default `"30 seconds"`
+           */
+          interval?: Input<DurationMinutes>;
+          /**
+           * The timeout for each health check request. If no response is received within this
+           * time, it is considered failed. Must be between `2 seconds` and `120 seconds`.
+           * @default `"5 seconds"`
+           */
+          timeout?: Input<DurationMinutes>;
+          /**
+           * The number of consecutive successful health check requests required to consider the
+           * target healthy. Must be between 2 and 10.
+           * @default `5`
+           */
+          healthyThreshold?: Input<number>;
+          /**
+           * The number of consecutive failed health check requests required to consider the
+           * target unhealthy. Must be between 2 and 10.
+           * @default `2`
+           */
+          unhealthyThreshold?: Input<number>;
+          /**
+           * One or more HTTP response codes the health check treats as successful. Only
+           * applicable to `http` and `https` protocols.
+           *
+           * @default `"200"`
+           * @example
+           * ```js
+           * {
+           *   successCodes: "200-299"
+           * }
+           * ```
+           */
+          successCodes?: Input<string>;
+        }>
+      >
+    >;
+  }
     | {
-        /**
-         * The `Alb` instance to attach this service to. When provided, the service creates
-         * target groups and listener rules on the shared ALB instead of creating its own
-         * load balancer.
-         *
-         * ECS tasks use the VPC's default security group, which allows all traffic within the
-         * VPC CIDR. For tighter security, add an explicit security group ingress rule from the
-         * ALB's security group using `transform`.
-         *
-         * @example
-         * ```js
-         * {
-         *   loadBalancer: {
-         *     instance: alb,
-         *     rules: [
-         *       { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 }
-         *     ]
-         *   }
-         * }
-         * ```
-         */
-        instance: Alb;
-        /**
-         * The rules for routing traffic from the ALB to this service's containers.
-         * Each rule must have explicit conditions and priority.
-         */
-        rules: Prettify<ServiceAlbRule>[];
-        /**
-         * Configure health checks for the target groups. Uses the same format as the inline
-         * health check config, keyed by `{port}/{protocol}`.
-         */
-        health?: Record<
-          AlbPort,
-          Input<{
-            path?: Input<string>;
-            interval?: Input<DurationMinutes>;
-            timeout?: Input<DurationMinutes>;
-            healthyThreshold?: Input<number>;
-            unhealthyThreshold?: Input<number>;
-            successCodes?: Input<string>;
-          }>
-        >;
-      }
+    /**
+     * The `Alb` instance to attach this service to. When provided, the service creates
+     * target groups and listener rules on the shared ALB instead of creating its own
+     * load balancer.
+     *
+     * ECS tasks use the VPC's default security group, which allows all traffic within the
+     * VPC CIDR. For tighter security, add an explicit security group ingress rule from the
+     * ALB's security group using `transform`.
+     *
+     * @example
+     * ```js
+     * {
+     *   loadBalancer: {
+     *     instance: alb,
+     *     rules: [
+     *       { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 }
+     *     ]
+     *   }
+     * }
+     * ```
+     */
+    instance: Alb;
+    /**
+     * The rules for routing traffic from the ALB to this service's containers.
+     * Each rule must have explicit conditions and priority.
+     */
+    rules: Prettify<ServiceAlbRule>[];
+    /**
+     * Configure health checks for the target groups. Uses the same format as the inline
+     * health check config, keyed by `{port}/{protocol}`.
+     */
+    health?: Record<
+      AlbPort,
+      Input<{
+        path?: Input<string>;
+        interval?: Input<DurationMinutes>;
+        timeout?: Input<DurationMinutes>;
+        healthyThreshold?: Input<number>;
+        unhealthyThreshold?: Input<number>;
+        successCodes?: Input<string>;
+      }>
+    >;
+  }
   >;
   /**
    * Configure the CloudMap service registry for the service.
@@ -1245,7 +1245,7 @@ export interface ServiceArgs extends FargateBaseArgs {
    *
    * The GPU value must be in the form `<manufacturer>/<name>`. Valid manufacturers are
    * `amazon-web-services`, `amd`, `nvidia`, `xilinx`, and `habana`.
-    */
+   */
   gpu?: Input<ManagedGpu>;
   /**
    * The ARN of an existing ECS infrastructure role to use for managed instances.

From 1b14ca622cc118e45d0a0a1981239894ac0c992c Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Tue, 24 Mar 2026 12:58:45 -0400
Subject: [PATCH 6/9] fix: remove intersection types to fix docs

---
 platform/src/components/aws/service.ts | 27 ++++++++++++--------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index 6f646664d2..ea16f8d72c 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -341,17 +341,6 @@ interface ServiceContainerArgs extends FargateContainerArgs {
   };
 }
 
-type ServiceFargateCapacity = {
-  fargate?: {
-    base?: Input<number>;
-    weight: Input<number>;
-  };
-  spot?: {
-    base?: Input<number>;
-    weight: Input<number>;
-  };
-};
-
 export interface ServiceArgs extends FargateBaseArgs {
   /**
    * Configure how this component works in `sst dev`.
@@ -1361,7 +1350,7 @@ export interface ServiceArgs extends FargateBaseArgs {
    */
   capacity?: Input<
     | "spot"
-    | (ServiceFargateCapacity & {
+    | {
         /**
          * Configure how the regular Fargate capacity is allocated.
          */
@@ -1397,7 +1386,7 @@ export interface ServiceArgs extends FargateBaseArgs {
           weight: Input<number>;
         }>;
         managed?: never;
-      })
+      }
   >;
   /**
    * Configure the health check that ECS runs on your containers.
@@ -2044,10 +2033,18 @@ export class Service extends Component implements Link.Linkable {
       if (!args.capacity) return;
 
       return output(args.capacity).apply(
-        (v): ServiceFargateCapacity | undefined => {
+        (v):
+          | {
+              fargate?: { base?: Input<number>; weight: Input<number> };
+              spot?: { base?: Input<number>; weight: Input<number> };
+            }
+          | undefined => {
           if (v === "spot")
             return { spot: { weight: 1 }, fargate: { weight: 0 } };
-          const fargateCapacity = v as ServiceFargateCapacity;
+          const fargateCapacity = v as {
+            fargate?: { base?: Input<number>; weight: Input<number> };
+            spot?: { base?: Input<number>; weight: Input<number> };
+          };
           return {
             fargate: fargateCapacity.fargate,
             spot: fargateCapacity.spot,

From d36a5c6c9706cd226f0abf5cbde4ea9d955d1112 Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Tue, 31 Mar 2026 12:11:34 -0400
Subject: [PATCH 7/9] feat(ecs): enhance GPU management and API integration

---
 examples/aws-ecs-gpus/app.py                  | 17 ++++++++++-
 examples/aws-ecs-gpus/sst.config.ts           | 16 ++++++++---
 .../src/components/aws/managed-instances.ts   | 28 ++++---------------
 platform/src/components/aws/service.ts        | 13 +++++++--
 4 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/examples/aws-ecs-gpus/app.py b/examples/aws-ecs-gpus/app.py
index 152d62ef81..eef93dfd30 100644
--- a/examples/aws-ecs-gpus/app.py
+++ b/examples/aws-ecs-gpus/app.py
@@ -1,8 +1,17 @@
 from http.server import BaseHTTPRequestHandler, HTTPServer
+import glob
 import json
 import os
 
 
+def read_file(path):
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read().strip()
+    except FileNotFoundError:
+        return None
+
+
 class Handler(BaseHTTPRequestHandler):
     def do_GET(self):
         if self.path == "/health":
@@ -12,7 +21,13 @@ def do_GET(self):
         self.respond(
             {
                 "message": "hello from ecs managed instances",
-                "gpu": os.getenv("NVIDIA_VISIBLE_DEVICES", "unknown"),
+                "gpu": {
+                    "visibleDevicesEnv": os.getenv("NVIDIA_VISIBLE_DEVICES"),
+                    "deviceFiles": sorted(glob.glob("/dev/nvidia*")),
+                    "procGpus": sorted(glob.glob("/proc/driver/nvidia/gpus/*")),
+                    "driverVersion": read_file("/proc/driver/nvidia/version"),
+                    "cudaVersion": os.getenv("CUDA_VERSION"),
+                },
             }
         )
 
diff --git a/examples/aws-ecs-gpus/sst.config.ts b/examples/aws-ecs-gpus/sst.config.ts
index 4c67e9c0dc..6bd7f5420e 100644
--- a/examples/aws-ecs-gpus/sst.config.ts
+++ b/examples/aws-ecs-gpus/sst.config.ts
@@ -6,6 +6,9 @@
  * A minimal ECS service running on ECS Managed Instances with a GPU-enabled host.
  * The service uses top-level `gpu`, `cpu`, `memory`, and `storage` settings, while
  * the managed instances IAM resources remain customizable through `transform`.
+ *
+ * A private API Gateway HTTP API is used to test the service without exposing a public
+ * load balancer.
  */
 export default $config({
   app(input) {
@@ -19,19 +22,24 @@ export default $config({
     const vpc = new sst.aws.Vpc("MyVpc");
     const cluster = new sst.aws.Cluster("MyCluster", { vpc });
 
+    // Provisions g4dn.xlarge
     const service = new sst.aws.Service("MyService", {
       cluster,
       image: { context: "./" },
       gpu: "nvidia/t4",
       cpu: "4 vCPU",
-      memory: "16 GB",
-      loadBalancer: {
-        ports: [{ listen: "80/http", forward: "8000/http" }],
+      memory: "10 GB",
+      serviceRegistry: {
+        port: 8000,
       },
     });
 
+    const api = new sst.aws.ApiGatewayV2("MyApi", { vpc });
+    api.routePrivate("$default", service.nodes.cloudmapService.arn);
+
     return {
-      url: service.url,
+      service: service.service,
+      api: api.url,
     };
   },
 });
diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts
index 0ee9d5b6fd..c9c01cc55a 100644
--- a/platform/src/components/aws/managed-instances.ts
+++ b/platform/src/components/aws/managed-instances.ts
@@ -141,25 +141,8 @@ export function normalizeManagedCapacity(
   name: string,
   args: ManagedServiceArgs,
 ) {
-  return all([
-    args.gpu,
-    args.cpu,
-    args.memory,
-    args.storage,
-    args.infrastructureRole,
-    args.instanceProfile,
-  ]).apply(([gpu, cpu, memory, storage, infrastructureRole, instanceProfile]) => {
-      if (!infrastructureRole) {
-        throw new VisibleError(
-          `You must provide \"infrastructureRole\" for the \"${name}\" Service when \"gpu\" is set.`,
-        );
-      }
-      if (!instanceProfile) {
-        throw new VisibleError(
-          `You must provide \"instanceProfile\" for the \"${name}\" Service when \"gpu\" is set.`,
-        );
-      }
-
+  return all([args.gpu, args.cpu, args.memory, args.storage]).apply(
+    ([gpu, cpu, memory, storage]) => {
       const hostCpu = normalizeHostCpu(cpu);
       const hostMemory = normalizeHostMemory(memory);
       const hostStorage = normalizeStorage(storage);
@@ -172,12 +155,13 @@ export function normalizeManagedCapacity(
         hostStorage,
         gpu: normalizeGpu(gpu),
       } satisfies NormalizedManagedCapacity;
-    });
+    },
+  );
 
   function normalizeHostCpu(cpu?: `${number} vCPU`) {
     if (cpu) {
       const min = parseFloat(cpu.split(" ")[0]);
-      return { min, max: min };
+      return { min };
     }
     throw new VisibleError(
       `You must provide top-level \"cpu\" for the \"${name}\" Service when \"gpu\" is set.`,
@@ -187,7 +171,7 @@ export function normalizeManagedCapacity(
   function normalizeHostMemory(memory?: `${number} GB`) {
     if (memory) {
       const min = toMBs(memory);
-      return { min, max: min };
+      return { min };
     }
     throw new VisibleError(
       `You must provide top-level \"memory\" for the \"${name}\" Service when \"gpu\" is set.`,
diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index ea16f8d72c..6a8153e985 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -1226,12 +1226,14 @@ export interface ServiceArgs extends FargateBaseArgs {
    * {
    *   gpu: "nvidia/t4",
    *   cpu: "4 vCPU",
-   *   memory: "16 GB",
-   *   infrastructureRole: "arn:aws:iam::123456789012:role/ecs-infra",
-   *   instanceProfile: "arn:aws:iam::123456789012:instance-profile/ecs-managed"
+   *   memory: "10 GB"
    * }
    * ```
    *
+   * By default, SST creates the managed instances infrastructure role and instance profile for
+   * you. You can override them with `infrastructureRole`, `instanceProfile`, or the
+   * corresponding `transform` hooks.
+   *
    * The GPU value must be in the form `<manufacturer>/<name>`. Valid manufacturers are
    * `amazon-web-services`, `amd`, `nvidia`, `xilinx`, and `habana`.
    */
@@ -2538,6 +2540,9 @@ export class Service extends Component implements Link.Linkable {
                 desiredCount: scaling.min,
                 ...(managed
                   ? {
+                      // Managed capacity providers cannot be deleted while ECS still has
+                      // tasks draining for the service.
+                      forceDelete: true,
                       forceNewDeployment: true,
                       capacityProviderStrategies: [
                         {
@@ -2618,6 +2623,8 @@ export class Service extends Component implements Link.Linkable {
     }
 
     function createAutoScaling() {
+      if (!args.scaling) return;
+
       const target = new appautoscaling.Target(
         ...transform(
           args.transform?.autoScalingTarget,

From 7e2fff47096f33ae68fd1867ee3639435c4f4134 Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Wed, 1 Apr 2026 08:49:16 -0400
Subject: [PATCH 8/9] refactor(service): remove forceDelete option from managed
 capacity

---
 platform/src/components/aws/service.ts | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index 6a8153e985..223c6c3ea0 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -2540,9 +2540,6 @@ export class Service extends Component implements Link.Linkable {
                 desiredCount: scaling.min,
                 ...(managed
                   ? {
-                      // Managed capacity providers cannot be deleted while ECS still has
-                      // tasks draining for the service.
-                      forceDelete: true,
                       forceNewDeployment: true,
                       capacityProviderStrategies: [
                         {

From 886d7e0c47d5649b6d05a050cb75fde733767d74 Mon Sep 17 00:00:00 2001
From: mkilp <8791079+mkilp@users.noreply.github.com>
Date: Wed, 1 Apr 2026 16:34:34 -0400
Subject: [PATCH 9/9] feat: depend service on vpc to prevent route table
 teardown before service teardown

---
 examples/aws-ecs-gpus/Dockerfile              |   11 +-
 examples/aws-ecs-gpus/app.py                  |   65 +-
 examples/aws-ecs-gpus/requirements.txt        |    1 +
 .../src/components/aws/managed-instances.ts   |    1 -
 platform/src/components/aws/service.ts        | 1041 +++++++++--------
 5 files changed, 557 insertions(+), 562 deletions(-)
 create mode 100644 examples/aws-ecs-gpus/requirements.txt

diff --git a/examples/aws-ecs-gpus/Dockerfile b/examples/aws-ecs-gpus/Dockerfile
index b0a57dfb4c..f56431cd8b 100644
--- a/examples/aws-ecs-gpus/Dockerfile
+++ b/examples/aws-ecs-gpus/Dockerfile
@@ -1,12 +1,13 @@
 FROM python:3.11-slim
 
-WORKDIR /app
+WORKDIR /code
 
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV PYTHONUNBUFFERED=1
+COPY requirements.txt /code/requirements.txt
 
-COPY app.py /app/app.py
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+
+COPY app.py /code/app.py
 
 EXPOSE 8000
 
-ENTRYPOINT ["python", "/app/app.py"]
+CMD ["fastapi", "run", "app.py", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/aws-ecs-gpus/app.py b/examples/aws-ecs-gpus/app.py
index eef93dfd30..54897b297d 100644
--- a/examples/aws-ecs-gpus/app.py
+++ b/examples/aws-ecs-gpus/app.py
@@ -1,49 +1,34 @@
-from http.server import BaseHTTPRequestHandler, HTTPServer
+from pathlib import Path
 import glob
-import json
 import os
 
+from fastapi import FastAPI
 
-def read_file(path):
+
+app = FastAPI()
+
+
+def read_file(path: str):
     try:
-        with open(path, "r", encoding="utf-8") as f:
-            return f.read().strip()
+        return Path(path).read_text(encoding="utf-8").strip()
     except FileNotFoundError:
         return None
 
 
-class Handler(BaseHTTPRequestHandler):
-    def do_GET(self):
-        if self.path == "/health":
-            self.respond({"ok": True})
-            return
-
-        self.respond(
-            {
-                "message": "hello from ecs managed instances",
-                "gpu": {
-                    "visibleDevicesEnv": os.getenv("NVIDIA_VISIBLE_DEVICES"),
-                    "deviceFiles": sorted(glob.glob("/dev/nvidia*")),
-                    "procGpus": sorted(glob.glob("/proc/driver/nvidia/gpus/*")),
-                    "driverVersion": read_file("/proc/driver/nvidia/version"),
-                    "cudaVersion": os.getenv("CUDA_VERSION"),
-                },
-            }
-        )
-
-    def log_message(self, format, *args):
-        return
-
-    def respond(self, payload):
-        body = json.dumps(payload).encode("utf-8")
-        self.send_response(200)
-        self.send_header("Content-Type", "application/json")
-        self.send_header("Content-Length", str(len(body)))
-        self.end_headers()
-        self.wfile.write(body)
-
-
-if __name__ == "__main__":
-    port = int(os.getenv("PORT", "8000"))
-    server = HTTPServer(("0.0.0.0", port), Handler)
-    server.serve_forever()
+@app.get("/health")
+def health():
+    return {"ok": True}
+
+
+@app.get("/")
+def index():
+    return {
+        "message": "hello from ecs managed instances",
+        "gpu": {
+            "visibleDevicesEnv": os.getenv("NVIDIA_VISIBLE_DEVICES"),
+            "deviceFiles": sorted(glob.glob("/dev/nvidia*")),
+            "procGpus": sorted(glob.glob("/proc/driver/nvidia/gpus/*")),
+            "driverVersion": read_file("/proc/driver/nvidia/version"),
+            "cudaVersion": os.getenv("CUDA_VERSION"),
+        },
+    }
diff --git a/examples/aws-ecs-gpus/requirements.txt b/examples/aws-ecs-gpus/requirements.txt
new file mode 100644
index 0000000000..8b6ce6ac72
--- /dev/null
+++ b/examples/aws-ecs-gpus/requirements.txt
@@ -0,0 +1 @@
+fastapi[standard]>=0.115.0,<1.0.0
diff --git a/platform/src/components/aws/managed-instances.ts b/platform/src/components/aws/managed-instances.ts
index c9c01cc55a..0b9525e4c2 100644
--- a/platform/src/components/aws/managed-instances.ts
+++ b/platform/src/components/aws/managed-instances.ts
@@ -533,7 +533,6 @@ export function createManagedTaskDefinition(
           interval: toSeconds(container.health.interval ?? "30 seconds"),
           retries: container.health.retries ?? 3,
         },
-        pseudoTerminal: true,
         portMappings: [{ containerPortRange: "1-65535" }],
         logConfiguration: {
           logDriver: "awslogs",
diff --git a/platform/src/components/aws/service.ts b/platform/src/components/aws/service.ts
index 223c6c3ea0..76fc1d7ac6 100644
--- a/platform/src/components/aws/service.ts
+++ b/platform/src/components/aws/service.ts
@@ -645,437 +645,437 @@ export interface ServiceArgs extends FargateBaseArgs {
    */
   loadBalancer?: Input<
     | {
-    /**
-     * Configure if the load balancer should be public or private.
-     *
-     * When set to `false`, the load balancer endpoint will only be accessible within the
-     * VPC.
-     *
-     * @default `true`
-     */
-    public?: Input<boolean>;
-    /**
-     * Set a custom domain for your load balancer endpoint.
-     *
-     * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other
-     * providers, you'll need to pass in a `cert` that validates domain ownership and add the
-     * DNS records.
-     *
-     * :::tip
-     * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other
-     * providers.
-     * :::
-     *
-     * @example
-     *
-     * By default this assumes the domain is hosted on Route 53.
-     *
-     * ```js
-     * {
-     *   domain: "example.com"
-     * }
-     * ```
-     *
-     * For domains hosted on Cloudflare.
-     *
-     * ```js
-     * {
-     *   domain: {
-     *     name: "example.com",
-     *     dns: sst.cloudflare.dns()
-     *   }
-     * }
-     * ```
-     */
-    domain?: Input<
-      | string
-      | {
-          /**
-           * The custom domain you want to use.
-           *
-           * @example
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com"
-           *   }
-           * }
-           * ```
-           *
-           * Can also include subdomains based on the current stage.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: `${$app.stage}.example.com`
-           *   }
-           * }
-           * ```
-           *
-           * Wildcard domains are supported.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "*.example.com"
-           *   }
-           * }
-           * ```
-           */
-          name: Input<string>;
-          /**
-           * Alias domains that should be used.
-           *
-           * @example
-           * ```js {4}
-           * {
-           *   domain: {
-           *     name: "app1.example.com",
-           *     aliases: ["app2.example.com"]
-           *   }
-           * }
-           * ```
-           */
-          aliases?: Input<string[]>;
-          /**
-           * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the
-           * domain. By default, a certificate is created and validated automatically.
-           *
-           * :::tip
-           * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers.
-           * :::
-           *
-           * To manually set up a domain on an unsupported provider, you'll need to:
-           *
-           * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner.
-           * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`.
-           * 3. Add the DNS records in your provider to point to the load balancer endpoint.
-           *
-           * @example
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: false,
-           *     cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63"
-           *   }
-           * }
-           * ```
-           */
-          cert?: Input<string>;
-          /**
-           * The DNS provider to use for the domain. Defaults to the AWS.
-           *
-           * Takes an adapter that can create the DNS records on the provider. This can automate
-           * validating the domain and setting up the DNS routing.
-           *
-           * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need
-           * to set `dns` to `false` and pass in a certificate validating ownership via `cert`.
-           *
-           * @default `sst.aws.dns`
-           *
-           * @example
-           *
-           * Specify the hosted zone ID for the Route 53 domain.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: sst.aws.dns({
-           *       zone: "Z2FDTNDATAQYW2"
-           *     })
-           *   }
-           * }
-           * ```
-           *
-           * Use a domain hosted on Cloudflare, needs the Cloudflare provider.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: sst.cloudflare.dns()
-           *   }
-           * }
-           * ```
-           *
-           * Use a domain hosted on Vercel, needs the Vercel provider.
-           *
-           * ```js
-           * {
-           *   domain: {
-           *     name: "example.com",
-           *     dns: sst.vercel.dns()
-           *   }
-           * }
-           * ```
-           */
-          dns?: Input<false | (Dns & {})>;
-        }
-    >;
-    /** @deprecated Use `rules` instead. */
-    ports?: Input<Prettify<ServiceRules>[]>;
-    /**
-     * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to
-     * the service.
-     * This supports two types of protocols:
-     *
-     * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html).
-     * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html).
-     *
-     * :::note
-     * If you want to listen on `https` or `tls`, you need to specify a custom
-     * `loadBalancer.domain`.
-     * :::
-     *
-     * You **can not configure** both application and network layer protocols for the same
-     * service.
-     *
-     * @example
-     * Here we are listening on port `80` and forwarding it to the service on port `8080`.
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", forward: "8080/http" }
-     *   ]
-     * }
-     * ```
-     *
-     * The `forward` port and protocol defaults to the `listen` port and protocol. So in this
-     * case both are `80/http`.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http" }
-     *   ]
-     * }
-     * ```
-     *
-     * If multiple containers are configured via the `containers` argument, you need to
-     * specify which container the traffic should be forwarded to.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", container: "app" },
-     *     { listen: "8000/http", container: "admin" }
-     *   ]
-     * }
-     * ```
-     *
-     * You can also route the same port to multiple containers via path-based routing.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     {
-     *       listen: "80/http",
-     *       container: "app",
-     *       conditions: { path: "/api/*" }
-     *     },
-     *     {
-     *       listen: "80/http",
-     *       container: "admin",
-     *       conditions: { path: "/admin/*" }
-     *     }
-     *   ]
-     * }
-     * ```
-     *
-     * Additionally, you can redirect traffic from one port to another. This is
-     * commonly used to redirect http to https.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", redirect: "443/https" },
-     *     { listen: "443/https", forward: "80/http" }
-     *   ]
-     * }
-     * ```
-     */
-    rules?: Input<Prettify<ServiceRules>[]>;
-    /**
-     * Configure the health check that the load balancer runs on your containers.
-     *
-     * :::tip
-     * This health check is different from the [`health`](#health) check.
-     * :::
-     *
-     * This health check is run by the load balancer. While, `health` is run by ECS. This
-     * cannot be disabled if you are using a load balancer. While the other is off by default.
-     *
-     * Since this cannot be disabled, here are some tips on how to debug an unhealthy
-     * health check.
-     *
-     * <details>
-     * <summary>How to debug a load balancer health check</summary>
-     *
-     * If you notice a `Unhealthy: Health checks failed` error, it's because the health
-     * check has failed. When it fails, the load balancer will terminate the containers,
-     * causing any requests to fail.
-     *
-     * Here's how to debug it:
-     *
-     * 1. Verify the health check path.
-     *
-     *    By default, the load balancer checks the `/` path. Ensure it's accessible in your
-     *    containers. If your application runs on a different path, then update the path in
-     *    the health check config accordingly.
-     *
-     * 2. Confirm the containers are operational.
-     *
-     *    Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** >
-     *    choose **Any desired status** under the **Filter desired status** dropdown > select
-     *    a task and check for errors under the **Logs tab**. If it has error that means that
-     *    the container failed to start.
-     *
-     * 3. If the container was terminated by the load balancer while still starting up, try
-     *    increasing the health check interval and timeout.
-     * </details>
-     *
-     * For `http` and `https` the default is:
-     *
-     * ```js
-     * {
-     *   path: "/",
-     *   healthyThreshold: 5,
-     *   successCodes: "200",
-     *   timeout: "5 seconds",
-     *   unhealthyThreshold: 2,
-     *   interval: "30 seconds"
-     * }
-     * ```
-     *
-     * For `tcp` and `udp` the default is:
-     *
-     * ```js
-     * {
-     *   healthyThreshold: 5,
-     *   timeout: "6 seconds",
-     *   unhealthyThreshold: 2,
-     *   interval: "30 seconds"
-     * }
-     * ```
-     *
-     * @example
-     *
-     * To configure the health check, we use the _port/protocol_ format. Here we are
-     * configuring a health check that pings the `/health` path on port `8080`
-     * every 10 seconds.
-     *
-     * ```js
-     * {
-     *   rules: [
-     *     { listen: "80/http", forward: "8080/http" }
-     *   ],
-     *   health: {
-     *     "8080/http": {
-     *       path: "/health",
-     *       interval: "10 seconds"
-     *     }
-     *   }
-     * }
-     * ```
-     *
-     */
-    health?: Input<
-      Record<
-        Port,
-        Input<{
-          /**
-           * The URL path to ping on the service for health checks. Only applicable to
-           * `http` and `https` protocols.
-           * @default `"/"`
-           */
-          path?: Input<string>;
-          /**
-           * The time period between each health check request. Must be between `5 seconds`
-           * and `300 seconds`.
-           * @default `"30 seconds"`
-           */
-          interval?: Input<DurationMinutes>;
-          /**
-           * The timeout for each health check request. If no response is received within this
-           * time, it is considered failed. Must be between `2 seconds` and `120 seconds`.
-           * @default `"5 seconds"`
-           */
-          timeout?: Input<DurationMinutes>;
-          /**
-           * The number of consecutive successful health check requests required to consider the
-           * target healthy. Must be between 2 and 10.
-           * @default `5`
-           */
-          healthyThreshold?: Input<number>;
-          /**
-           * The number of consecutive failed health check requests required to consider the
-           * target unhealthy. Must be between 2 and 10.
-           * @default `2`
-           */
-          unhealthyThreshold?: Input<number>;
-          /**
-           * One or more HTTP response codes the health check treats as successful. Only
-           * applicable to `http` and `https` protocols.
-           *
-           * @default `"200"`
-           * @example
-           * ```js
-           * {
-           *   successCodes: "200-299"
-           * }
-           * ```
-           */
-          successCodes?: Input<string>;
-        }>
-      >
-    >;
-  }
+        /**
+         * Configure if the load balancer should be public or private.
+         *
+         * When set to `false`, the load balancer endpoint will only be accessible within the
+         * VPC.
+         *
+         * @default `true`
+         */
+        public?: Input<boolean>;
+        /**
+         * Set a custom domain for your load balancer endpoint.
+         *
+         * Automatically manages domains hosted on AWS Route 53, Cloudflare, and Vercel. For other
+         * providers, you'll need to pass in a `cert` that validates domain ownership and add the
+         * DNS records.
+         *
+         * :::tip
+         * Built-in support for AWS Route 53, Cloudflare, and Vercel. And manual setup for other
+         * providers.
+         * :::
+         *
+         * @example
+         *
+         * By default this assumes the domain is hosted on Route 53.
+         *
+         * ```js
+         * {
+         *   domain: "example.com"
+         * }
+         * ```
+         *
+         * For domains hosted on Cloudflare.
+         *
+         * ```js
+         * {
+         *   domain: {
+         *     name: "example.com",
+         *     dns: sst.cloudflare.dns()
+         *   }
+         * }
+         * ```
+         */
+        domain?: Input<
+          | string
+          | {
+              /**
+               * The custom domain you want to use.
+               *
+               * @example
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com"
+               *   }
+               * }
+               * ```
+               *
+               * Can also include subdomains based on the current stage.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: `${$app.stage}.example.com`
+               *   }
+               * }
+               * ```
+               *
+               * Wildcard domains are supported.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "*.example.com"
+               *   }
+               * }
+               * ```
+               */
+              name: Input<string>;
+              /**
+               * Alias domains that should be used.
+               *
+               * @example
+               * ```js {4}
+               * {
+               *   domain: {
+               *     name: "app1.example.com",
+               *     aliases: ["app2.example.com"]
+               *   }
+               * }
+               * ```
+               */
+              aliases?: Input<string[]>;
+              /**
+               * The ARN of an ACM (AWS Certificate Manager) certificate that proves ownership of the
+               * domain. By default, a certificate is created and validated automatically.
+               *
+               * :::tip
+               * You need to pass in a `cert` for domains that are not hosted on supported `dns` providers.
+               * :::
+               *
+               * To manually set up a domain on an unsupported provider, you'll need to:
+               *
+               * 1. [Validate that you own the domain](https://docs.aws.amazon.com/acm/latest/userguide/domain-ownership-validation.html) by creating an ACM certificate. You can either validate it by setting a DNS record or by verifying an email sent to the domain owner.
+               * 2. Once validated, set the certificate ARN as the `cert` and set `dns` to `false`.
+               * 3. Add the DNS records in your provider to point to the load balancer endpoint.
+               *
+               * @example
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: false,
+               *     cert: "arn:aws:acm:us-east-1:112233445566:certificate/3a958790-8878-4cdc-a396-06d95064cf63"
+               *   }
+               * }
+               * ```
+               */
+              cert?: Input<string>;
+              /**
+               * The DNS provider to use for the domain. Defaults to the AWS.
+               *
+               * Takes an adapter that can create the DNS records on the provider. This can automate
+               * validating the domain and setting up the DNS routing.
+               *
+               * Supports Route 53, Cloudflare, and Vercel adapters. For other providers, you'll need
+               * to set `dns` to `false` and pass in a certificate validating ownership via `cert`.
+               *
+               * @default `sst.aws.dns`
+               *
+               * @example
+               *
+               * Specify the hosted zone ID for the Route 53 domain.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: sst.aws.dns({
+               *       zone: "Z2FDTNDATAQYW2"
+               *     })
+               *   }
+               * }
+               * ```
+               *
+               * Use a domain hosted on Cloudflare, needs the Cloudflare provider.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: sst.cloudflare.dns()
+               *   }
+               * }
+               * ```
+               *
+               * Use a domain hosted on Vercel, needs the Vercel provider.
+               *
+               * ```js
+               * {
+               *   domain: {
+               *     name: "example.com",
+               *     dns: sst.vercel.dns()
+               *   }
+               * }
+               * ```
+               */
+              dns?: Input<false | (Dns & {})>;
+            }
+        >;
+        /** @deprecated Use `rules` instead. */
+        ports?: Input<Prettify<ServiceRules>[]>;
+        /**
+         * Configure the mapping for the ports the load balancer listens to, forwards, or redirects to
+         * the service.
+         * This supports two types of protocols:
+         *
+         * 1. Application Layer Protocols: `http` and `https`. This'll create an [Application Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/introduction.html).
+         * 2. Network Layer Protocols: `tcp`, `udp`, `tcp_udp`, and `tls`. This'll create a [Network Load Balancer](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/introduction.html).
+         *
+         * :::note
+         * If you want to listen on `https` or `tls`, you need to specify a custom
+         * `loadBalancer.domain`.
+         * :::
+         *
+         * You **can not configure** both application and network layer protocols for the same
+         * service.
+         *
+         * @example
+         * Here we are listening on port `80` and forwarding it to the service on port `8080`.
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", forward: "8080/http" }
+         *   ]
+         * }
+         * ```
+         *
+         * The `forward` port and protocol defaults to the `listen` port and protocol. So in this
+         * case both are `80/http`.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http" }
+         *   ]
+         * }
+         * ```
+         *
+         * If multiple containers are configured via the `containers` argument, you need to
+         * specify which container the traffic should be forwarded to.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", container: "app" },
+         *     { listen: "8000/http", container: "admin" }
+         *   ]
+         * }
+         * ```
+         *
+         * You can also route the same port to multiple containers via path-based routing.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     {
+         *       listen: "80/http",
+         *       container: "app",
+         *       conditions: { path: "/api/*" }
+         *     },
+         *     {
+         *       listen: "80/http",
+         *       container: "admin",
+         *       conditions: { path: "/admin/*" }
+         *     }
+         *   ]
+         * }
+         * ```
+         *
+         * Additionally, you can redirect traffic from one port to another. This is
+         * commonly used to redirect http to https.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", redirect: "443/https" },
+         *     { listen: "443/https", forward: "80/http" }
+         *   ]
+         * }
+         * ```
+         */
+        rules?: Input<Prettify<ServiceRules>[]>;
+        /**
+         * Configure the health check that the load balancer runs on your containers.
+         *
+         * :::tip
+         * This health check is different from the [`health`](#health) check.
+         * :::
+         *
+         * This health check is run by the load balancer. While, `health` is run by ECS. This
+         * cannot be disabled if you are using a load balancer. While the other is off by default.
+         *
+         * Since this cannot be disabled, here are some tips on how to debug an unhealthy
+         * health check.
+         *
+         * <details>
+         * <summary>How to debug a load balancer health check</summary>
+         *
+         * If you notice a `Unhealthy: Health checks failed` error, it's because the health
+         * check has failed. When it fails, the load balancer will terminate the containers,
+         * causing any requests to fail.
+         *
+         * Here's how to debug it:
+         *
+         * 1. Verify the health check path.
+         *
+         *    By default, the load balancer checks the `/` path. Ensure it's accessible in your
+         *    containers. If your application runs on a different path, then update the path in
+         *    the health check config accordingly.
+         *
+         * 2. Confirm the containers are operational.
+         *
+         *    Navigate to **ECS console** > select the **cluster** > go to the **Tasks tab** >
+         *    choose **Any desired status** under the **Filter desired status** dropdown > select
+         *    a task and check for errors under the **Logs tab**. If it has error that means that
+         *    the container failed to start.
+         *
+         * 3. If the container was terminated by the load balancer while still starting up, try
+         *    increasing the health check interval and timeout.
+         * </details>
+         *
+         * For `http` and `https` the default is:
+         *
+         * ```js
+         * {
+         *   path: "/",
+         *   healthyThreshold: 5,
+         *   successCodes: "200",
+         *   timeout: "5 seconds",
+         *   unhealthyThreshold: 2,
+         *   interval: "30 seconds"
+         * }
+         * ```
+         *
+         * For `tcp` and `udp` the default is:
+         *
+         * ```js
+         * {
+         *   healthyThreshold: 5,
+         *   timeout: "6 seconds",
+         *   unhealthyThreshold: 2,
+         *   interval: "30 seconds"
+         * }
+         * ```
+         *
+         * @example
+         *
+         * To configure the health check, we use the _port/protocol_ format. Here we are
+         * configuring a health check that pings the `/health` path on port `8080`
+         * every 10 seconds.
+         *
+         * ```js
+         * {
+         *   rules: [
+         *     { listen: "80/http", forward: "8080/http" }
+         *   ],
+         *   health: {
+         *     "8080/http": {
+         *       path: "/health",
+         *       interval: "10 seconds"
+         *     }
+         *   }
+         * }
+         * ```
+         *
+         */
+        health?: Input<
+          Record<
+            Port,
+            Input<{
+              /**
+               * The URL path to ping on the service for health checks. Only applicable to
+               * `http` and `https` protocols.
+               * @default `"/"`
+               */
+              path?: Input<string>;
+              /**
+               * The time period between each health check request. Must be between `5 seconds`
+               * and `300 seconds`.
+               * @default `"30 seconds"`
+               */
+              interval?: Input<DurationMinutes>;
+              /**
+               * The timeout for each health check request. If no response is received within this
+               * time, it is considered failed. Must be between `2 seconds` and `120 seconds`.
+               * @default `"5 seconds"`
+               */
+              timeout?: Input<DurationMinutes>;
+              /**
+               * The number of consecutive successful health check requests required to consider the
+               * target healthy. Must be between 2 and 10.
+               * @default `5`
+               */
+              healthyThreshold?: Input<number>;
+              /**
+               * The number of consecutive failed health check requests required to consider the
+               * target unhealthy. Must be between 2 and 10.
+               * @default `2`
+               */
+              unhealthyThreshold?: Input<number>;
+              /**
+               * One or more HTTP response codes the health check treats as successful. Only
+               * applicable to `http` and `https` protocols.
+               *
+               * @default `"200"`
+               * @example
+               * ```js
+               * {
+               *   successCodes: "200-299"
+               * }
+               * ```
+               */
+              successCodes?: Input<string>;
+            }>
+          >
+        >;
+      }
     | {
-    /**
-     * The `Alb` instance to attach this service to. When provided, the service creates
-     * target groups and listener rules on the shared ALB instead of creating its own
-     * load balancer.
-     *
-     * ECS tasks use the VPC's default security group, which allows all traffic within the
-     * VPC CIDR. For tighter security, add an explicit security group ingress rule from the
-     * ALB's security group using `transform`.
-     *
-     * @example
-     * ```js
-     * {
-     *   loadBalancer: {
-     *     instance: alb,
-     *     rules: [
-     *       { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 }
-     *     ]
-     *   }
-     * }
-     * ```
-     */
-    instance: Alb;
-    /**
-     * The rules for routing traffic from the ALB to this service's containers.
-     * Each rule must have explicit conditions and priority.
-     */
-    rules: Prettify<ServiceAlbRule>[];
-    /**
-     * Configure health checks for the target groups. Uses the same format as the inline
-     * health check config, keyed by `{port}/{protocol}`.
-     */
-    health?: Record<
-      AlbPort,
-      Input<{
-        path?: Input<string>;
-        interval?: Input<DurationMinutes>;
-        timeout?: Input<DurationMinutes>;
-        healthyThreshold?: Input<number>;
-        unhealthyThreshold?: Input<number>;
-        successCodes?: Input<string>;
-      }>
-    >;
-  }
+        /**
+         * The `Alb` instance to attach this service to. When provided, the service creates
+         * target groups and listener rules on the shared ALB instead of creating its own
+         * load balancer.
+         *
+         * ECS tasks use the VPC's default security group, which allows all traffic within the
+         * VPC CIDR. For tighter security, add an explicit security group ingress rule from the
+         * ALB's security group using `transform`.
+         *
+         * @example
+         * ```js
+         * {
+         *   loadBalancer: {
+         *     instance: alb,
+         *     rules: [
+         *       { listen: "443/https", forward: "8080/http", conditions: { path: "/api/*" }, priority: 100 }
+         *     ]
+         *   }
+         * }
+         * ```
+         */
+        instance: Alb;
+        /**
+         * The rules for routing traffic from the ALB to this service's containers.
+         * Each rule must have explicit conditions and priority.
+         */
+        rules: Prettify<ServiceAlbRule>[];
+        /**
+         * Configure health checks for the target groups. Uses the same format as the inline
+         * health check config, keyed by `{port}/{protocol}`.
+         */
+        health?: Record<
+          AlbPort,
+          Input<{
+            path?: Input<string>;
+            interval?: Input<DurationMinutes>;
+            timeout?: Input<DurationMinutes>;
+            healthyThreshold?: Input<number>;
+            unhealthyThreshold?: Input<number>;
+            successCodes?: Input<string>;
+          }>
+        >;
+      }
   >;
   /**
    * Configure the CloudMap service registry for the service.
@@ -2035,7 +2035,9 @@ export class Service extends Component implements Link.Linkable {
       if (!args.capacity) return;
 
       return output(args.capacity).apply(
-        (v):
+        (
+          v,
+        ):
           | {
               fargate?: { base?: Input<number>; weight: Input<number> };
               spot?: { base?: Input<number>; weight: Input<number> };
@@ -2527,96 +2529,103 @@ export class Service extends Component implements Link.Linkable {
     }
 
     function createService() {
-      return cloudmapService.apply(
-        (cloudmapService) =>
-          new ecs.Service(
-            ...transform(
-              args.transform?.service,
-              `${name}Service`,
-              {
-                name,
-                cluster: clusterArn,
-                taskDefinition: taskDefinition.arn,
-                desiredCount: scaling.min,
-                ...(managed
-                  ? {
-                      forceNewDeployment: true,
-                      capacityProviderStrategies: [
-                        {
-                          capacityProvider: managedCapacityProvider!.name,
-                          base: 1,
-                          weight: 1,
-                        },
-                      ],
-                    }
-                  : capacity
+      const create = (dependsOn?: ComponentResourceOptions["dependsOn"]) =>
+        cloudmapService.apply(
+          (cloudmapService) =>
+            new ecs.Service(
+              ...transform(
+                args.transform?.service,
+                `${name}Service`,
+                {
+                  name,
+                  cluster: clusterArn,
+                  taskDefinition: taskDefinition.arn,
+                  desiredCount: scaling.min,
+                  ...(managed
                     ? {
-                        // setting `forceNewDeployment` ensures that the service is not recreated
-                        // when the capacity provider config changes.
                         forceNewDeployment: true,
-                        capacityProviderStrategies: capacity.apply((v) => {
-                          if (!v)
-                            throw new VisibleError(
-                              `Invalid Fargate capacity configuration for the \"${name}\" Service.`,
-                            );
-                          return [
-                            ...(v.fargate
-                              ? [
-                                  {
-                                    capacityProvider: "FARGATE",
-                                    base: v.fargate?.base,
-                                    weight: v.fargate?.weight,
-                                  },
-                                ]
-                              : []),
-                            ...(v.spot
-                              ? [
-                                  {
-                                    capacityProvider: "FARGATE_SPOT",
-                                    base: v.spot?.base,
-                                    weight: v.spot?.weight,
-                                  },
-                                ]
-                              : []),
-                          ];
-                        }),
+                        capacityProviderStrategies: [
+                          {
+                            capacityProvider: managedCapacityProvider!.name,
+                            base: 1,
+                            weight: 1,
+                          },
+                        ],
                       }
-                    : // @deprecated do not use `launchType`, set `capacityProviderStrategies`
-                      // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead
-                      {
-                        launchType: "FARGATE",
-                      }),
-                networkConfiguration: {
-                  // If the vpc is an SST vpc, services are automatically deployed to the public
-                  // subnets. So we need to assign a public IP for the service to be accessible.
-                  ...(managed ? {} : { assignPublicIp: vpc.isSstVpc }),
-                  subnets: vpc.containerSubnets,
-                  securityGroups: vpc.securityGroups,
-                },
-                deploymentCircuitBreaker: {
-                  enable: true,
-                  rollback: true,
-                },
-                loadBalancers: targetEntries.apply((entries) =>
-                  entries.map((e) => ({
-                    targetGroupArn: e.targetGroup.arn,
-                    containerName: e.containerName,
-                    containerPort: e.containerPort,
-                  })),
-                ),
-                enableExecuteCommand: true,
-                serviceRegistries: cloudmapService && {
-                  registryArn: cloudmapService.arn,
-                  port: args.serviceRegistry
-                    ? output(args.serviceRegistry).port
-                    : undefined,
+                    : capacity
+                      ? {
+                          // setting `forceNewDeployment` ensures that the service is not recreated
+                          // when the capacity provider config changes.
+                          forceNewDeployment: true,
+                          capacityProviderStrategies: capacity.apply((v) => {
+                            if (!v)
+                              throw new VisibleError(
+                                `Invalid Fargate capacity configuration for the \"${name}\" Service.`,
+                              );
+                            return [
+                              ...(v.fargate
+                                ? [
+                                    {
+                                      capacityProvider: "FARGATE",
+                                      base: v.fargate?.base,
+                                      weight: v.fargate?.weight,
+                                    },
+                                  ]
+                                : []),
+                              ...(v.spot
+                                ? [
+                                    {
+                                      capacityProvider: "FARGATE_SPOT",
+                                      base: v.spot?.base,
+                                      weight: v.spot?.weight,
+                                    },
+                                  ]
+                                : []),
+                            ];
+                          }),
+                        }
+                      : // @deprecated do not use `launchType`, set `capacityProviderStrategies`
+                        // to `[{ capacityProvider: "FARGATE", weight: 1 }]` instead
+                        {
+                          launchType: "FARGATE",
+                        }),
+                  networkConfiguration: {
+                    // If the vpc is an SST vpc, services are automatically deployed to the public
+                    // subnets. So we need to assign a public IP for the service to be accessible.
+                    ...(managed ? {} : { assignPublicIp: vpc.isSstVpc }),
+                    subnets: vpc.containerSubnets,
+                    securityGroups: vpc.securityGroups,
+                  },
+                  deploymentCircuitBreaker: {
+                    enable: true,
+                    rollback: true,
+                  },
+                  loadBalancers: targetEntries.apply((entries) =>
+                    entries.map((e) => ({
+                      targetGroupArn: e.targetGroup.arn,
+                      containerName: e.containerName,
+                      containerPort: e.containerPort,
+                    })),
+                  ),
+                  enableExecuteCommand: true,
+                  serviceRegistries: cloudmapService && {
+                    registryArn: cloudmapService.arn,
+                    port: args.serviceRegistry
+                      ? output(args.serviceRegistry).port
+                      : undefined,
+                  },
+                  waitForSteadyState: wait,
                 },
-                waitForSteadyState: wait,
-              },
-              { parent: self },
+                { parent: self, ...(dependsOn ? { dependsOn } : {}) },
+              ),
             ),
-          ),
-      );
+        );
+
+      if (args.cluster.vpc instanceof Vpc) {
+        return create([args.cluster.vpc]);
+      }
+
+      return create();
     }
 
     function createAutoScaling() {