Skip to content

Commit

Permalink
chore: Update documentation for installing EFA device plugin to use o…
Browse files Browse the repository at this point in the history
…fficial EKS chart
  • Loading branch information
bryantbiggs committed Mar 13, 2024
1 parent 2440b23 commit f1d9a0b
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 192 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repos:
- id: detect-aws-credentials
args: [--allow-missing-credentials]
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.88.1
rev: v1.88.2
hooks:
- id: terraform_fmt
- id: terraform_docs
Expand Down
1 change: 1 addition & 0 deletions docs/cSpell_dict.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ tcpdump
templatefile
tfstate
tfvars
tolerations
tolist
toset
velero
Expand Down
4 changes: 4 additions & 0 deletions patterns/elastic-fabric-adapter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started

## Validate

!!! note

The following steps are shown with `g5.8xlarge` due to cost and availability. Values shown below will change based on the instance type selected (i.e. - `p5.48xlarge` has 8 GPUs and 32 EFA interfaces)

1. List the nodes by instance type:

```sh
Expand Down
265 changes: 78 additions & 187 deletions patterns/elastic-fabric-adapter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,6 @@ provider "aws" {
region = local.region
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
Expand All @@ -34,7 +22,7 @@ locals {
name = basename(path.cwd)
region = "us-west-2"

cluster_version = "1.29"
efa_instance_type = "p5.48xlarge"

vpc_cidr = "10.0.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)
Expand All @@ -51,10 +39,10 @@ locals {

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 20.0"
version = "~> 20.4"

cluster_name = local.name
cluster_version = local.cluster_version
cluster_version = "1.29"
cluster_endpoint_public_access = true

# Give the Terraform identity admin access to the cluster
Expand All @@ -67,29 +55,12 @@ module "eks" {
vpc-cni = {}
}

# Add security group rules on the node group security group to allow EFA traffic
enable_efa_support = true

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets

# Extend node-to-node security group rules
node_security_group_additional_rules = {
ingress_self_all = {
description = "Node to node all ingress traffic"
protocol = "-1"
from_port = 0
to_port = 0
type = "ingress"
self = true
}
egress_self_all = {
description = "Node to node all egress traffic"
protocol = "-1"
from_port = 0
to_port = 0
type = "egress"
self = true
}
}

eks_managed_node_group_defaults = {
iam_role_additional_policies = {
# Not required, but used in the example to access the nodes to inspect drivers and devices
Expand All @@ -98,41 +69,54 @@ module "eks" {
}

eks_managed_node_groups = {
# For running services that do not require GPUs
# This node group is for core addons such as CoreDNS
default = {
instance_types = ["m5.large"]

# Default AMI has only 8GB of storage
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 128
volume_type = "gp3"
delete_on_termination = true
}
}
}

min_size = 1
max_size = 5
max_size = 2
desired_size = 2
}

efa = {
nvidia-efa = {
# The EKS AL2 GPU AMI provides all of the necessary components
# for accelerated workloads w/ EFA
ami_type = "AL2_x86_64_GPU"
instance_types = ["g5.8xlarge"]
instance_types = [local.efa_instance_type]

min_size = 1
max_size = 1
desired_size = 1

subnet_ids = slice(module.vpc.private_subnets, 0, 1)

network_interfaces = [
{
description = "EFA interface"
delete_on_termination = true
device_index = 0
associate_public_ip_address = false
interface_type = "efa"
}
]
pre_bootstrap_user_data = <<-EOT
#!/usr/bin/env bash
placement = {
group_name = aws_placement_group.efa.name
}
# Mount instance store volumes in RAID-0 for Kubelet and Containerd (raid0)
# https://github.com/awslabs/amazon-eks-ami/blob/master/doc/USER_GUIDE.md#raid-0-for-kubelet-and-containerd-raid0
/bin/setup-local-disks raid0
EOT

min_size = 2
max_size = 2
desired_size = 2

# This will:
# 1. Create a placement group for the node group to cluster instances together
# 2. Filter out subnets that reside in AZs that do not support the instance type
# 3. Expose all of the available EFA interfaces on the launch template
enable_efa_support = true

taints = {
dedicated = {
# Ensure only GPU workloads are scheduled on this node group
gpu = {
key = "nvidia.com/gpu"
value = "true"
effect = "NO_SCHEDULE"
Expand Down Expand Up @@ -160,7 +144,6 @@ module "eks_blueprints_addons" {
# We want to wait for the Fargate profiles to be deployed first
create_delay_dependencies = [for group in module.eks.eks_managed_node_groups : group.node_group_arn]

enable_aws_efs_csi_driver = true
enable_aws_fsx_csi_driver = true
enable_kube_prometheus_stack = true
kube_prometheus_stack = {
Expand All @@ -175,6 +158,42 @@ module "eks_blueprints_addons" {
enable_metrics_server = true

helm_releases = {
aws-efa-k8s-device-plugin = {
repository = "https://aws.github.io/eks-charts"
chart = "aws-efa-k8s-device-plugin"
version = "v0.4.4"
namespace = "nvidia-device-plugin"
create_namespace = true

values = [
<<-EOT
nodeSelector:
node.kubernetes.io/instance-type: "${local.efa_instance_type}"
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
EOT
]
}

nvidia-device-plugin = {
repository = "https://nvidia.github.io/k8s-device-plugin"
chart = "nvidia-device-plugin"
version = "0.14.4"
namespace = "nvidia-device-plugin"
create_namespace = true

values = [
<<-EOT
gfd:
enabled: false
nodeSelector:
node.kubernetes.io/instance-type: "${local.efa_instance_type}"
EOT
]
}

prometheus-adapter = {
description = "A Helm chart for k8s prometheus adapter"
namespace = "prometheus-adapter"
Expand All @@ -190,133 +209,11 @@ module "eks_blueprints_addons" {
EOT
]
}
gpu-operator = {
description = "A Helm chart for NVIDIA GPU operator"
namespace = "gpu-operator"
create_namespace = true
chart = "gpu-operator"
chart_version = "v23.3.2"
repository = "https://nvidia.github.io/gpu-operator"
values = [
<<-EOT
dcgmExporter:
enabled: false
driver:
enabled: false
toolkit:
version: v1.13.5-centos7
operator:
defaultRuntime: containerd
validator:
driver:
env:
# https://github.com/NVIDIA/gpu-operator/issues/569
- name: DISABLE_DEV_CHAR_SYMLINK_CREATION
value: "true"
EOT
]
}
}

tags = local.tags
}

################################################################################
# Amazon Elastic Fabric Adapter (EFA)
################################################################################

resource "kubernetes_daemonset" "aws_efa_k8s_device_plugin" {
metadata {
name = "aws-efa-k8s-device-plugin-daemonset"
namespace = "kube-system"
}

spec {
selector {
match_labels = {
name = "aws-efa-k8s-device-plugin"
}
}

template {
metadata {
labels = {
name = "aws-efa-k8s-device-plugin"
}
}

spec {
volume {
name = "device-plugin"

host_path {
path = "/var/lib/kubelet/device-plugins"
}
}

container {
name = "aws-efa-k8s-device-plugin"
image = "602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/aws-efa-k8s-device-plugin:v0.4.4"

volume_mount {
name = "device-plugin"
mount_path = "/var/lib/kubelet/device-plugins"
}

image_pull_policy = "Always"

security_context {
capabilities {
drop = ["ALL"]
}
}
}

host_network = true

affinity {
node_affinity {
required_during_scheduling_ignored_during_execution {
node_selector_term {
match_expressions {
key = "beta.kubernetes.io/instance-type"
operator = "In"
values = ["c5n.18xlarge", "c5n.9xlarge", "c5n.metal", "c6a.48xlarge", "c6a.metal", "c6gn.16xlarge", "c6i.32xlarge", "c6i.metal", "c6id.32xlarge", "c6id.metal", "c6in.32xlarge", "c6in.metal", "c7g.16xlarge", "c7g.metal", "c7gd.16xlarge", "c7gn.16xlarge", "c7i.48xlarge", "dl1.24xlarge", "g4dn.12xlarge", "g4dn.16xlarge", "g4dn.8xlarge", "g4dn.metal", "g5.12xlarge", "g5.16xlarge", "g5.24xlarge", "g5.48xlarge", "g5.8xlarge", "hpc7g.16xlarge", "hpc7g.4xlarge", "hpc7g.8xlarge", "i3en.12xlarge", "i3en.24xlarge", "i3en.metal", "i4g.16xlarge", "i4i.32xlarge", "i4i.metal", "im4gn.16xlarge", "inf1.24xlarge", "m5dn.24xlarge", "m5dn.metal", "m5n.24xlarge", "m5n.metal", "m5zn.12xlarge", "m5zn.metal", "m6a.48xlarge", "m6a.metal", "m6i.32xlarge", "m6i.metal", "m6id.32xlarge", "m6id.metal", "m6idn.32xlarge", "m6idn.metal", "m6in.32xlarge", "m6in.metal", "m7a.48xlarge", "m7a.metal-48xl", "m7g.16xlarge", "m7g.metal", "m7gd.16xlarge", "m7i.48xlarge", "p3dn.24xlarge", "p4d.24xlarge", "p5.48xlarge", "r5dn.24xlarge", "r5dn.metal", "r5n.24xlarge", "r5n.metal", "r6a.48xlarge", "r6a.metal", "r6i.32xlarge", "r6i.metal", "r6id.32xlarge", "r6id.metal", "r6idn.32xlarge", "r6idn.metal", "r6in.32xlarge", "r6in.metal", "r7a.48xlarge", "r7g.16xlarge", "r7g.metal", "r7gd.16xlarge", "r7iz.32xlarge", "trn1.32xlarge", "trn1n.32xlarge", "vt1.24xlarge", "x2idn.32xlarge", "x2idn.metal", "x2iedn.32xlarge", "x2iedn.metal", "x2iezn.12xlarge", "x2iezn.metal"]
}
}

node_selector_term {
match_expressions {
key = "node.kubernetes.io/instance-type"
operator = "In"
values = ["c5n.18xlarge", "c5n.9xlarge", "c5n.metal", "c6a.48xlarge", "c6a.metal", "c6gn.16xlarge", "c6i.32xlarge", "c6i.metal", "c6id.32xlarge", "c6id.metal", "c6in.32xlarge", "c6in.metal", "c7g.16xlarge", "c7g.metal", "c7gd.16xlarge", "c7gn.16xlarge", "c7i.48xlarge", "dl1.24xlarge", "g4dn.12xlarge", "g4dn.16xlarge", "g4dn.8xlarge", "g4dn.metal", "g5.12xlarge", "g5.16xlarge", "g5.24xlarge", "g5.48xlarge", "g5.8xlarge", "hpc7g.16xlarge", "hpc7g.4xlarge", "hpc7g.8xlarge", "i3en.12xlarge", "i3en.24xlarge", "i3en.metal", "i4g.16xlarge", "i4i.32xlarge", "i4i.metal", "im4gn.16xlarge", "inf1.24xlarge", "m5dn.24xlarge", "m5dn.metal", "m5n.24xlarge", "m5n.metal", "m5zn.12xlarge", "m5zn.metal", "m6a.48xlarge", "m6a.metal", "m6i.32xlarge", "m6i.metal", "m6id.32xlarge", "m6id.metal", "m6idn.32xlarge", "m6idn.metal", "m6in.32xlarge", "m6in.metal", "m7a.48xlarge", "m7a.metal-48xl", "m7g.16xlarge", "m7g.metal", "m7gd.16xlarge", "m7i.48xlarge", "p3dn.24xlarge", "p4d.24xlarge", "p5.48xlarge", "r5dn.24xlarge", "r5dn.metal", "r5n.24xlarge", "r5n.metal", "r6a.48xlarge", "r6a.metal", "r6i.32xlarge", "r6i.metal", "r6id.32xlarge", "r6id.metal", "r6idn.32xlarge", "r6idn.metal", "r6in.32xlarge", "r6in.metal", "r7a.48xlarge", "r7g.16xlarge", "r7g.metal", "r7gd.16xlarge", "r7iz.32xlarge", "trn1.32xlarge", "trn1n.32xlarge", "vt1.24xlarge", "x2idn.32xlarge", "x2idn.metal", "x2iedn.32xlarge", "x2iedn.metal", "x2iezn.12xlarge", "x2iezn.metal"]
}
}
}
}
}

toleration {
key = "CriticalAddonsOnly"
operator = "Exists"
}

toleration {
key = "aws.amazon.com/efa"
operator = "Exists"
effect = "NoSchedule"
}

priority_class_name = "system-node-critical"
}
}

strategy {
type = "RollingUpdate"
}
}
}

################################################################################
# Supporting Resources
################################################################################
Expand Down Expand Up @@ -345,9 +242,3 @@ module "vpc" {

tags = local.tags
}

# Group instances within clustered placement group so they are in close proximity
resource "aws_placement_group" "efa" {
name = local.name
strategy = "cluster"
}
4 changes: 0 additions & 4 deletions patterns/elastic-fabric-adapter/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ terraform {
source = "hashicorp/helm"
version = ">= 2.9"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = ">= 2.20"
}
}

# ## Used for end-to-end testing on project; update to suit your needs
Expand Down

0 comments on commit f1d9a0b

Please sign in to comment.