Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ecs): add support for elastic inference accelerators in ECS task defintions #13950

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 36 additions & 0 deletions packages/@aws-cdk/aws-ecs/README.md
Expand Up @@ -788,3 +788,39 @@ new ecs.FargateService(stack, 'FargateService', {

app.synth();
```

## Elastic Inference Accelerators

Currently, not supported on Fargate.
upparekh marked this conversation as resolved.
Show resolved Hide resolved

To add elastic inference accelerators to your EC2 instance, first add
`inferenceAccelerator` field to the EC2TaskDefinition and set the `deviceName`
and `deviceType` properties.

```ts
const inferenceAccelerator = [{
deviceName: 'device1',
deviceType: 'eia2.medium',
}];

const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'Ec2TaskDef', {
inferenceAccelerators: inferenceAccelerator,
});
upparekh marked this conversation as resolved.
Show resolved Hide resolved
```

To enable using the inference accelerator in the containers, set the
`type` and `value` properties accordingly. The `value` should match the
`DeviceName` for an `InferenceAccelerator` specified in a task definition.

```ts
const resourceRequirements = [{
type: ecs.ResourceRequirementType.INFERENCEACCELERATOR,
value: 'device1',
}];

taskDefinition.addContainer('cont', {
image: ecs.ContainerImage.fromRegistry('test'),
memoryLimitMiB: 1024,
resourceRequirements: resourceRequirements,
upparekh marked this conversation as resolved.
Show resolved Hide resolved
});
```
64 changes: 64 additions & 0 deletions packages/@aws-cdk/aws-ecs/lib/base/task-definition.ts
Expand Up @@ -184,6 +184,15 @@ export interface TaskDefinitionProps extends CommonTaskDefinitionProps {
* @default - PidMode used by the task is not specified
*/
readonly pidMode?: PidMode;

/**
* The inference accelerators to use for the containers in the task.
*
* Not supported in Fargate.
*
* @default - No inference accelerators.
*/
readonly inferenceAccelerators?: InferenceAccelerator[];
}

/**
Expand Down Expand Up @@ -322,6 +331,11 @@ export class TaskDefinition extends TaskDefinitionBase {
*/
private readonly placementConstraints = new Array<CfnTaskDefinition.TaskDefinitionPlacementConstraintProperty>();

/**
* Inference accelerators for task instances
*/
public readonly inferenceAccelerators: InferenceAccelerator[] = [];
upparekh marked this conversation as resolved.
Show resolved Hide resolved

private _executionRole?: iam.IRole;

private _referencesSecretJsonField?: boolean;
Expand Down Expand Up @@ -354,6 +368,10 @@ export class TaskDefinition extends TaskDefinitionBase {
throw new Error(`Fargate-compatible tasks require both CPU (${props.cpu}) and memory (${props.memoryMiB}) specifications`);
}

if (props.inferenceAccelerators && props.inferenceAccelerators.length > 0 && this.isFargateCompatible) {
SoManyHs marked this conversation as resolved.
Show resolved Hide resolved
upparekh marked this conversation as resolved.
Show resolved Hide resolved
throw new Error('Cannot use inference accelerators on tasks that run on Fargate');
}

this._executionRole = props.executionRole;

this.taskRole = props.taskRole || new iam.Role(this, 'TaskRole', {
Expand All @@ -380,12 +398,20 @@ export class TaskDefinition extends TaskDefinitionBase {
memory: props.memoryMiB,
ipcMode: props.ipcMode,
pidMode: props.pidMode,
inferenceAccelerators: Lazy.any({
produce: () =>
!isFargateCompatible(this.compatibility) ? this.renderInferenceAccelerators() : undefined,
}, { omitEmptyArray: true }),
});

if (props.placementConstraints) {
props.placementConstraints.forEach(pc => this.addPlacementConstraint(pc));
}

if (props.inferenceAccelerators) {
props.inferenceAccelerators.forEach(ia => this.addInferenceAccelerator(ia));
}

this.taskDefinitionArn = taskDef.ref;
}

Expand Down Expand Up @@ -419,6 +445,20 @@ export class TaskDefinition extends TaskDefinitionBase {
}
}

private renderInferenceAccelerators(): CfnTaskDefinition.InferenceAcceleratorProperty[] {
if (isFargateCompatible(this.compatibility)) {
throw new Error('Cannot use inference accelerators on tasks that run on Fargate');
}
upparekh marked this conversation as resolved.
Show resolved Hide resolved
return this.inferenceAccelerators.map(renderInferenceAccelerator);

function renderInferenceAccelerator(inferenceAccelerator: InferenceAccelerator) : CfnTaskDefinition.InferenceAcceleratorProperty {
return {
deviceName: inferenceAccelerator.deviceName,
deviceType: inferenceAccelerator.deviceType,
};
}
}

/**
* Validate the existence of the input target and set default values.
*
Expand Down Expand Up @@ -531,6 +571,13 @@ export class TaskDefinition extends TaskDefinitionBase {
extension.extend(this);
}

/**
* Adds an inference accelerator to the task definition.
*/
public addInferenceAccelerator(inferenceAccelerator: InferenceAccelerator) {
this.inferenceAccelerators.push(inferenceAccelerator);
}

/**
* Creates the task execution IAM role if it doesn't already exist.
*/
Expand Down Expand Up @@ -683,6 +730,23 @@ export enum PidMode {
TASK = 'task',
}

/**
* Elastic Inference Accelerator.
*/
export interface InferenceAccelerator {
/**
* The Elastic Inference accelerator device name.
* @default - empty
*/
readonly deviceName?: string;

/**
* The Elastic Inference accelerator type to use.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be helpful to include what values are valid here in the docstring (i.e., eia2.medium, eia2.large, and eia2.xlarge, according to the docs).
Normally, having the different valid values as an enum might be nice so that an IDE can pick up the valid values, but the downside is that we'd have to manually add any new types that get added in the future at the risk of always being behind what's available, so I think a string is fine for now.

* @default - empty
*/
readonly deviceType?: string;
}

/**
* A data volume used in a task definition.
*
Expand Down
89 changes: 82 additions & 7 deletions packages/@aws-cdk/aws-ecs/lib/container-definition.ts
Expand Up @@ -294,6 +294,12 @@ export interface ContainerDefinitionOptions {
* @default - No ports are mapped.
*/
readonly portMappings?: PortMapping[];

/**
* The type and amount of a resource to assign to a container.
* @default - No resources assigned.
*/
readonly resourceRequirements?: ResourceRequirement[];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we should have the new field be something that is more specific to EIA, since resourceRequirements in CFN technically encompasses both GPU and EIA. GPU currently is specified through its own field, and it may be confusing for this field to exist when we have GPU specified elsewhere.

Copy link
Contributor Author

@upparekh upparekh Apr 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, it might get confusing this way. I will make the required changes.

Copy link
Contributor Author

@upparekh upparekh Apr 6, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed the resourceRequirements prop to inferenceAcceleratorResources (as inferenceAccelerators was already a task definiton prop) and changed the type to be a list of device names (strings).

}

/**
Expand Down Expand Up @@ -386,6 +392,11 @@ export class ContainerDefinition extends CoreConstruct {
*/
public readonly referencesSecretJsonField?: boolean;

/**
* The type and amount of resource assigned to this container.
*/
private readonly resourceRequirements: ResourceRequirement[] = [];

/**
* The configured container links
*/
Expand Down Expand Up @@ -443,6 +454,10 @@ export class ContainerDefinition extends CoreConstruct {
if (props.portMappings) {
this.addPortMappings(...props.portMappings);
}

if (props.resourceRequirements) {
this.addResourceRequirements(...props.resourceRequirements);
}
}

/**
Expand Down Expand Up @@ -516,6 +531,25 @@ export class ContainerDefinition extends CoreConstruct {
}));
}

/**
* This method adds one or more resources to the container.
*/
private addResourceRequirements(...resourceRequirements: ResourceRequirement[]) {
if (!this.taskDefinition.inferenceAccelerators) {
throw new Error('InferenceAccelerator resource(s) defined in container definition without specifying any inference accelerators in task definition.');
upparekh marked this conversation as resolved.
Show resolved Hide resolved
}
this.resourceRequirements.push(...resourceRequirements.map(resource => {
for (const inferenceAccelerator of this.taskDefinition.inferenceAccelerators) {
if (resource.value === inferenceAccelerator.deviceName) {
return {
...resource,
};
}
}
throw new Error(`Resource value (${resource.value}) doesn't match any inference accelerator device name.`);
}));
}

/**
* This method adds one or more ulimits to the container.
*/
Expand Down Expand Up @@ -631,7 +665,7 @@ export class ContainerDefinition extends CoreConstruct {
healthCheck: this.props.healthCheck && renderHealthCheck(this.props.healthCheck),
links: cdk.Lazy.list({ produce: () => this.links }, { omitEmpty: true }),
linuxParameters: this.linuxParameters && this.linuxParameters.renderLinuxParameters(),
resourceRequirements: (this.props.gpuCount !== undefined) ? renderResourceRequirements(this.props.gpuCount) : undefined,
resourceRequirements: renderResourceRequirements(this.props.gpuCount, this.resourceRequirements),
};
}
}
Expand Down Expand Up @@ -742,12 +776,53 @@ function getHealthCheckCommand(hc: HealthCheck): string[] {
return hcCommand.concat(cmd);
}

function renderResourceRequirements(gpuCount: number): CfnTaskDefinition.ResourceRequirementProperty[] | undefined {
if (gpuCount === 0) { return undefined; }
return [{
type: 'GPU',
value: gpuCount.toString(),
}];
/**
* The type and amount of a resource to assign to a container.
*/
export interface ResourceRequirement {
/**
* The type of resource to assign to a container.
*/
readonly type: ResourceRequirementType,

/**
* The value for the specified resource type.
*/
readonly value: string,
}

/** Type of resource to assign to a container. */
export enum ResourceRequirementType {
/**
* GPU.
*/
GPU = 'GPU',
/**
* InferenceAccelerator.
*/
INFERENCEACCELERATOR = 'InferenceAccelerator'
upparekh marked this conversation as resolved.
Show resolved Hide resolved
}

function renderResourceRequirements(gpuCount: number = 0, resourceRequirements: ResourceRequirement[] = []):
CfnTaskDefinition.ResourceRequirementProperty[] | undefined {
if (resourceRequirements.length > 0) {
const ret = [];

for (const resource of resourceRequirements) {
ret.push({
type: ResourceRequirementType.INFERENCEACCELERATOR,
value: resource.value,
});
}
return ret;
}
if (gpuCount > 0) {
return [{
type: ResourceRequirementType.GPU,
value: gpuCount.toString(),
}];
}
upparekh marked this conversation as resolved.
Show resolved Hide resolved
return undefined;
}

/**
Expand Down
13 changes: 12 additions & 1 deletion packages/@aws-cdk/aws-ecs/lib/ec2/ec2-task-definition.ts
@@ -1,4 +1,5 @@
import { Construct } from 'constructs';
import { ImportedTaskDefinition } from '../base/_imported-task-definition';
import {
CommonTaskDefinitionAttributes,
CommonTaskDefinitionProps,
Expand All @@ -8,9 +9,9 @@ import {
NetworkMode,
PidMode,
TaskDefinition,
InferenceAccelerator,
} from '../base/task-definition';
import { PlacementConstraint } from '../placement';
import { ImportedTaskDefinition } from '../base/_imported-task-definition';

/**
* The properties for a task definition run on an EC2 cluster.
Expand Down Expand Up @@ -51,6 +52,15 @@ export interface Ec2TaskDefinitionProps extends CommonTaskDefinitionProps {
* @default - PidMode used by the task is not specified
*/
readonly pidMode?: PidMode;

/**
* The inference accelerators to use for the containers in the task.
*
* Not supported in Fargate.
*
* @default - No inference accelerators.
*/
readonly inferenceAccelerators?: InferenceAccelerator[];
}

/**
Expand Down Expand Up @@ -109,6 +119,7 @@ export class Ec2TaskDefinition extends TaskDefinition implements IEc2TaskDefinit
placementConstraints: props.placementConstraints,
ipcMode: props.ipcMode,
pidMode: props.pidMode,
inferenceAccelerators: props.inferenceAccelerators,
});
}
}