diff --git a/packages/@aws-cdk/aws-ecs/README.md b/packages/@aws-cdk/aws-ecs/README.md index 5ad2855c8f642..4f19e2e1c915f 100644 --- a/packages/@aws-cdk/aws-ecs/README.md +++ b/packages/@aws-cdk/aws-ecs/README.md @@ -788,3 +788,39 @@ new ecs.FargateService(stack, 'FargateService', { app.synth(); ``` + +## Elastic Inference Accelerators + +Currently, this feature is only supported for services with EC2 launch types. + +To add elastic inference accelerators to your EC2 instance, first add +`inferenceAccelerator` field to the EC2TaskDefinition and set the `deviceName` +and `deviceType` properties. + +```ts +const inferenceAccelerators = [{ + deviceName: 'device1', + deviceType: 'eia2.medium', +}]; + +const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', { + inferenceAccelerators, +}); +``` + +To enable using the inference accelerator in the containers, set the +`type` and `value` properties accordingly. The `value` should match the +`DeviceName` for an `InferenceAccelerator` specified in a task definition. + +```ts +const resourceRequirements = [{ + type: ecs.ResourceRequirementType.INFERENCEACCELERATOR, + value: 'device1', +}]; + +taskDefinition.addContainer('cont', { + image: ecs.ContainerImage.fromRegistry('test'), + memoryLimitMiB: 1024, + resourceRequirements, +}); +``` diff --git a/packages/@aws-cdk/aws-ecs/lib/base/task-definition.ts b/packages/@aws-cdk/aws-ecs/lib/base/task-definition.ts index 7c3bb618142a5..3e7f82160d3fa 100644 --- a/packages/@aws-cdk/aws-ecs/lib/base/task-definition.ts +++ b/packages/@aws-cdk/aws-ecs/lib/base/task-definition.ts @@ -184,6 +184,15 @@ export interface TaskDefinitionProps extends CommonTaskDefinitionProps { * @default - PidMode used by the task is not specified */ readonly pidMode?: PidMode; + + /** + * The inference accelerators to use for the containers in the task. + * + * Not supported in Fargate. + * + * @default - No inference accelerators. + */ + readonly inferenceAccelerators?: InferenceAccelerator[]; } /** @@ -322,6 +331,11 @@ export class TaskDefinition extends TaskDefinitionBase { */ private readonly placementConstraints = new Array(); + /** + * Inference accelerators for task instances + */ + private readonly _inferenceAccelerators: InferenceAccelerator[] = []; + private _executionRole?: iam.IRole; private _referencesSecretJsonField?: boolean; @@ -354,12 +368,20 @@ export class TaskDefinition extends TaskDefinitionBase { throw new Error(`Fargate-compatible tasks require both CPU (${props.cpu}) and memory (${props.memoryMiB}) specifications`); } + if (props.inferenceAccelerators && props.inferenceAccelerators.length > 0 && this.isFargateCompatible) { + throw new Error('Cannot use inference accelerators on tasks that run on Fargate'); + } + this._executionRole = props.executionRole; this.taskRole = props.taskRole || new iam.Role(this, 'TaskRole', { assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), }); + if (props.inferenceAccelerators) { + props.inferenceAccelerators.forEach(ia => this.addInferenceAccelerator(ia)); + } + const taskDef = new CfnTaskDefinition(this, 'Resource', { containerDefinitions: Lazy.any({ produce: () => this.renderContainers() }, { omitEmptyArray: true }), volumes: Lazy.any({ produce: () => this.renderVolumes() }, { omitEmptyArray: true }), @@ -380,6 +402,10 @@ export class TaskDefinition extends TaskDefinitionBase { memory: props.memoryMiB, ipcMode: props.ipcMode, pidMode: props.pidMode, + inferenceAccelerators: Lazy.any({ + produce: () => + !isFargateCompatible(this.compatibility) ? this.renderInferenceAccelerators() : undefined, + }, { omitEmptyArray: true }), }); if (props.placementConstraints) { @@ -393,6 +419,13 @@ export class TaskDefinition extends TaskDefinitionBase { return this._executionRole; } + /** + * Public getter method to access list of inference accelerators attached to the instance. + */ + public get inferenceAccelerators(): InferenceAccelerator[] { + return this._inferenceAccelerators; + } + private renderVolumes(): CfnTaskDefinition.VolumeProperty[] { return this.volumes.map(renderVolume); @@ -419,6 +452,17 @@ export class TaskDefinition extends TaskDefinitionBase { } } + private renderInferenceAccelerators(): CfnTaskDefinition.InferenceAcceleratorProperty[] { + return this._inferenceAccelerators.map(renderInferenceAccelerator); + + function renderInferenceAccelerator(inferenceAccelerator: InferenceAccelerator) : CfnTaskDefinition.InferenceAcceleratorProperty { + return { + deviceName: inferenceAccelerator.deviceName, + deviceType: inferenceAccelerator.deviceType, + }; + } + } + /** * Validate the existence of the input target and set default values. * @@ -531,6 +575,16 @@ export class TaskDefinition extends TaskDefinitionBase { extension.extend(this); } + /** + * Adds an inference accelerator to the task definition. + */ + public addInferenceAccelerator(inferenceAccelerator: InferenceAccelerator) { + if (isFargateCompatible(this.compatibility)) { + throw new Error('Cannot use inference accelerators on tasks that run on Fargate'); + } + this._inferenceAccelerators.push(inferenceAccelerator); + } + /** * Creates the task execution IAM role if it doesn't already exist. */ @@ -683,6 +737,24 @@ export enum PidMode { TASK = 'task', } +/** + * Elastic Inference Accelerator. + * For more information, see [Elastic Inference Basics](https://docs.aws.amazon.com/elastic-inference/latest/developerguide/basics.html) + */ +export interface InferenceAccelerator { + /** + * The Elastic Inference accelerator device name. + * @default - empty + */ + readonly deviceName?: string; + + /** + * The Elastic Inference accelerator type to use. The allowed values are: eia2.medium, eia2.large and eia2.xlarge. + * @default - empty + */ + readonly deviceType?: string; +} + /** * A data volume used in a task definition. * diff --git a/packages/@aws-cdk/aws-ecs/lib/container-definition.ts b/packages/@aws-cdk/aws-ecs/lib/container-definition.ts index dfe041afd2d99..8f57e84872de5 100644 --- a/packages/@aws-cdk/aws-ecs/lib/container-definition.ts +++ b/packages/@aws-cdk/aws-ecs/lib/container-definition.ts @@ -294,6 +294,12 @@ export interface ContainerDefinitionOptions { * @default - No ports are mapped. */ readonly portMappings?: PortMapping[]; + + /** + * The inference accelerators referenced by the container. + * @default - No inference accelerators assigned. + */ + readonly inferenceAcceleratorResources?: string[]; } /** @@ -386,6 +392,11 @@ export class ContainerDefinition extends CoreConstruct { */ public readonly referencesSecretJsonField?: boolean; + /** + * The inference accelerators referenced by this container. + */ + private readonly inferenceAcceleratorResources: string[] = []; + /** * The configured container links */ @@ -443,6 +454,10 @@ export class ContainerDefinition extends CoreConstruct { if (props.portMappings) { this.addPortMappings(...props.portMappings); } + + if (props.inferenceAcceleratorResources) { + this.addInferenceAcceleratorResource(...props.inferenceAcceleratorResources); + } } /** @@ -516,6 +531,20 @@ export class ContainerDefinition extends CoreConstruct { })); } + /** + * This method adds one or more resources to the container. + */ + public addInferenceAcceleratorResource(...inferenceAcceleratorResources: string[]) { + this.inferenceAcceleratorResources.push(...inferenceAcceleratorResources.map(resource => { + for (const inferenceAccelerator of this.taskDefinition.inferenceAccelerators) { + if (resource === inferenceAccelerator.deviceName) { + return resource; + } + } + throw new Error(`Resource value ${resource} in container definition doesn't match any inference accelerator device name in the task definition.`); + })); + } + /** * This method adds one or more ulimits to the container. */ @@ -631,7 +660,8 @@ export class ContainerDefinition extends CoreConstruct { healthCheck: this.props.healthCheck && renderHealthCheck(this.props.healthCheck), links: cdk.Lazy.list({ produce: () => this.links }, { omitEmpty: true }), linuxParameters: this.linuxParameters && this.linuxParameters.renderLinuxParameters(), - resourceRequirements: (this.props.gpuCount !== undefined) ? renderResourceRequirements(this.props.gpuCount) : undefined, + resourceRequirements: (!this.props.gpuCount && this.inferenceAcceleratorResources.length == 0 ) ? undefined : + renderResourceRequirements(this.props.gpuCount, this.inferenceAcceleratorResources), }; } } @@ -742,12 +772,22 @@ function getHealthCheckCommand(hc: HealthCheck): string[] { return hcCommand.concat(cmd); } -function renderResourceRequirements(gpuCount: number): CfnTaskDefinition.ResourceRequirementProperty[] | undefined { - if (gpuCount === 0) { return undefined; } - return [{ - type: 'GPU', - value: gpuCount.toString(), - }]; +function renderResourceRequirements(gpuCount: number = 0, inferenceAcceleratorResources: string[] = []): +CfnTaskDefinition.ResourceRequirementProperty[] | undefined { + const ret = []; + for (const resource of inferenceAcceleratorResources) { + ret.push({ + type: 'InferenceAccelerator', + value: resource, + }); + } + if (gpuCount > 0) { + ret.push({ + type: 'GPU', + value: gpuCount.toString(), + }); + } + return ret; } /** diff --git a/packages/@aws-cdk/aws-ecs/lib/ec2/ec2-task-definition.ts b/packages/@aws-cdk/aws-ecs/lib/ec2/ec2-task-definition.ts index ff571c884b73e..3b65516ba7dfa 100644 --- a/packages/@aws-cdk/aws-ecs/lib/ec2/ec2-task-definition.ts +++ b/packages/@aws-cdk/aws-ecs/lib/ec2/ec2-task-definition.ts @@ -1,4 +1,5 @@ import { Construct } from 'constructs'; +import { ImportedTaskDefinition } from '../base/_imported-task-definition'; import { CommonTaskDefinitionAttributes, CommonTaskDefinitionProps, @@ -8,9 +9,9 @@ import { NetworkMode, PidMode, TaskDefinition, + InferenceAccelerator, } from '../base/task-definition'; import { PlacementConstraint } from '../placement'; -import { ImportedTaskDefinition } from '../base/_imported-task-definition'; /** * The properties for a task definition run on an EC2 cluster. @@ -51,6 +52,15 @@ export interface Ec2TaskDefinitionProps extends CommonTaskDefinitionProps { * @default - PidMode used by the task is not specified */ readonly pidMode?: PidMode; + + /** + * The inference accelerators to use for the containers in the task. + * + * Not supported in Fargate. + * + * @default - No inference accelerators. + */ + readonly inferenceAccelerators?: InferenceAccelerator[]; } /** @@ -109,6 +119,7 @@ export class Ec2TaskDefinition extends TaskDefinition implements IEc2TaskDefinit placementConstraints: props.placementConstraints, ipcMode: props.ipcMode, pidMode: props.pidMode, + inferenceAccelerators: props.inferenceAccelerators, }); } } diff --git a/packages/@aws-cdk/aws-ecs/test/container-definition.test.ts b/packages/@aws-cdk/aws-ecs/test/container-definition.test.ts index e69abefd2036c..c5d6ece5776d3 100644 --- a/packages/@aws-cdk/aws-ecs/test/container-definition.test.ts +++ b/packages/@aws-cdk/aws-ecs/test/container-definition.test.ts @@ -997,6 +997,176 @@ describe('container definition', () => { }); }); + describe('Given InferenceAccelerator resource parameter', () => { + test('correctly adds resource requirements to container definition using inference accelerator resource property', () => { + // GIVEN + const stack = new cdk.Stack(); + + const inferenceAccelerators = [{ + deviceName: 'device1', + deviceType: 'eia2.medium', + }]; + + const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', { + inferenceAccelerators, + }); + + const inferenceAcceleratorResources = ['device1']; + + // WHEN + taskDefinition.addContainer('cont', { + image: ecs.ContainerImage.fromRegistry('test'), + memoryLimitMiB: 1024, + inferenceAcceleratorResources, + }); + + // THEN + expect(stack).toHaveResourceLike('AWS::ECS::TaskDefinition', { + Family: 'Ec2TaskDef', + InferenceAccelerators: [{ + DeviceName: 'device1', + DeviceType: 'eia2.medium', + }], + ContainerDefinitions: [ + { + Image: 'test', + ResourceRequirements: [ + { + Type: 'InferenceAccelerator', + Value: 'device1', + }, + ], + }, + ], + }); + + + }); + test('correctly adds resource requirements to container definition using both props and addInferenceAcceleratorResource method', () => { + // GIVEN + const stack = new cdk.Stack(); + + const inferenceAccelerators = [{ + deviceName: 'device1', + deviceType: 'eia2.medium', + }, { + deviceName: 'device2', + deviceType: 'eia2.large', + }]; + + const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', { + inferenceAccelerators, + }); + + const inferenceAcceleratorResources = ['device1']; + + const container = taskDefinition.addContainer('cont', { + image: ecs.ContainerImage.fromRegistry('test'), + memoryLimitMiB: 1024, + inferenceAcceleratorResources, + }); + + // WHEN + container.addInferenceAcceleratorResource('device2'); + + // THEN + expect(stack).toHaveResourceLike('AWS::ECS::TaskDefinition', { + Family: 'Ec2TaskDef', + InferenceAccelerators: [{ + DeviceName: 'device1', + DeviceType: 'eia2.medium', + }, { + DeviceName: 'device2', + DeviceType: 'eia2.large', + }], + ContainerDefinitions: [ + { + Image: 'test', + ResourceRequirements: [ + { + Type: 'InferenceAccelerator', + Value: 'device1', + }, + { + Type: 'InferenceAccelerator', + Value: 'device2', + }, + ], + }, + ], + }); + + }); + test('throws when the value of inference accelerator resource does not match any inference accelerators defined in the Task Definition', () => { + // GIVEN + const stack = new cdk.Stack(); + + const inferenceAccelerators = [{ + deviceName: 'device1', + deviceType: 'eia2.medium', + }]; + + const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', { + inferenceAccelerators, + }); + + const inferenceAcceleratorResources = ['device2']; + + // THEN + expect(() => { + taskDefinition.addContainer('cont', { + image: ecs.ContainerImage.fromRegistry('test'), + memoryLimitMiB: 1024, + inferenceAcceleratorResources, + }); + }).toThrow(/Resource value device2 in container definition doesn't match any inference accelerator device name in the task definition./); + }); + }); + + test('adds resource requirements when both inference accelerator and gpu count are defined in the container definition', () => { + // GIVEN + const stack = new cdk.Stack(); + + const inferenceAccelerators = [{ + deviceName: 'device1', + deviceType: 'eia2.medium', + }]; + + const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', { + inferenceAccelerators, + }); + + const inferenceAcceleratorResources = ['device1']; + + taskDefinition.addContainer('cont', { + image: ecs.ContainerImage.fromRegistry('test'), + memoryLimitMiB: 1024, + gpuCount: 2, + inferenceAcceleratorResources, + }); + + // THEN + expect(stack).toHaveResourceLike('AWS::ECS::TaskDefinition', { + Family: 'Ec2TaskDef', + InferenceAccelerators: [{ + DeviceName: 'device1', + DeviceType: 'eia2.medium', + }], + ContainerDefinitions: [ + { + Image: 'test', + ResourceRequirements: [{ + Type: 'InferenceAccelerator', + Value: 'device1', + }, { + Type: 'GPU', + Value: '2', + }], + }, + ], + }); + }); + test('can add secret environment variables to the container definition', () => { // GIVEN const stack = new cdk.Stack(); diff --git a/packages/@aws-cdk/aws-ecs/test/ec2/ec2-task-definition.test.ts b/packages/@aws-cdk/aws-ecs/test/ec2/ec2-task-definition.test.ts index 0d21804117863..06f891868feb8 100644 --- a/packages/@aws-cdk/aws-ecs/test/ec2/ec2-task-definition.test.ts +++ b/packages/@aws-cdk/aws-ecs/test/ec2/ec2-task-definition.test.ts @@ -1199,6 +1199,72 @@ describe('ec2 task definition', () => { }); }); + describe('setting inferenceAccelerators', () => { + test('correctly sets inferenceAccelerators using props', () => { + // GIVEN + const stack = new cdk.Stack(); + const inferenceAccelerators = [{ + deviceName: 'device1', + deviceType: 'eia2.medium', + }]; + + // WHEN + const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', { + inferenceAccelerators, + }); + + taskDefinition.addContainer('web', { + image: ecs.ContainerImage.fromRegistry('amazon/amazon-ecs-sample'), + memoryLimitMiB: 512, + }); + + // THEN + expect(stack).toHaveResourceLike('AWS::ECS::TaskDefinition', { + Family: 'Ec2TaskDef', + InferenceAccelerators: [{ + DeviceName: 'device1', + DeviceType: 'eia2.medium', + }], + }); + + }); + test('correctly sets inferenceAccelerators using props and addInferenceAccelerator method', () => { + // GIVEN + const stack = new cdk.Stack(); + const inferenceAccelerators = [{ + deviceName: 'device1', + deviceType: 'eia2.medium', + }]; + + const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', { + inferenceAccelerators, + }); + + // WHEN + taskDefinition.addInferenceAccelerator({ + deviceName: 'device2', + deviceType: 'eia2.large', + }); + + taskDefinition.addContainer('web', { + image: ecs.ContainerImage.fromRegistry('amazon/amazon-ecs-sample'), + memoryLimitMiB: 512, + }); + + // THEN + expect(stack).toHaveResourceLike('AWS::ECS::TaskDefinition', { + Family: 'Ec2TaskDef', + InferenceAccelerators: [{ + DeviceName: 'device1', + DeviceType: 'eia2.medium', + }, { + DeviceName: 'device2', + DeviceType: 'eia2.large', + }], + }); + }); + }); + describe('When importing from an existing Ec2 TaskDefinition', () => { test('can succeed using TaskDefinition Arn', () => { // GIVEN diff --git a/packages/@aws-cdk/aws-ecs/test/fargate/fargate-task-definition.test.ts b/packages/@aws-cdk/aws-ecs/test/fargate/fargate-task-definition.test.ts index e0fedafa2c3a6..9cd5d994c9555 100644 --- a/packages/@aws-cdk/aws-ecs/test/fargate/fargate-task-definition.test.ts +++ b/packages/@aws-cdk/aws-ecs/test/fargate/fargate-task-definition.test.ts @@ -113,6 +113,24 @@ nodeunitShim({ test.done(); }, + + 'throws when adding inference accelerators'(test: Test) { + // GIVEN + const stack = new cdk.Stack(); + const taskDefinition = new ecs.FargateTaskDefinition(stack, 'FargateTaskDef'); + + const inferenceAccelerator = { + deviceName: 'device1', + deviceType: 'eia2.medium', + }; + + // THEN + test.throws(() => { + taskDefinition.addInferenceAccelerator(inferenceAccelerator); + }, /Cannot use inference accelerators on tasks that run on Fargate/); + + test.done(); + }, }, 'When importing from an existing Fargate TaskDefinition': {