Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Update documentation for installing EFA device plugin to use official EKS chart #1904

Merged
merged 2 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repos:
- id: detect-aws-credentials
args: [--allow-missing-credentials]
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.88.1
rev: v1.88.2
hooks:
- id: terraform_fmt
- id: terraform_docs
Expand Down
1 change: 1 addition & 0 deletions docs/cSpell_dict.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ tcpdump
templatefile
tfstate
tfvars
tolerations
tolist
toset
velero
Expand Down
4 changes: 4 additions & 0 deletions patterns/elastic-fabric-adapter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started

## Validate

!!! note

The following steps are shown with `g5.8xlarge` for frugality. Values shown below will change based on the instance type selected (i.e. - `p5.48xlarge` has 8 GPUs and 32 EFA interfaces)

1. List the nodes by instance type:

```sh
Expand Down
265 changes: 78 additions & 187 deletions patterns/elastic-fabric-adapter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,6 @@ provider "aws" {
region = local.region
}

provider "kubernetes" {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
Expand All @@ -34,7 +22,7 @@ locals {
name = basename(path.cwd)
region = "us-west-2"

cluster_version = "1.29"
efa_instance_type = "p5.48xlarge"

vpc_cidr = "10.0.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)
Expand All @@ -51,10 +39,10 @@ locals {

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 20.0"
version = "~> 20.4"

cluster_name = local.name
cluster_version = local.cluster_version
cluster_version = "1.29"
cluster_endpoint_public_access = true

# Give the Terraform identity admin access to the cluster
Expand All @@ -67,29 +55,12 @@ module "eks" {
vpc-cni = {}
}

# Add security group rules on the node group security group to allow EFA traffic
enable_efa_support = true

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets

# Extend node-to-node security group rules
node_security_group_additional_rules = {
ingress_self_all = {
description = "Node to node all ingress traffic"
protocol = "-1"
from_port = 0
to_port = 0
type = "ingress"
self = true
}
egress_self_all = {
description = "Node to node all egress traffic"
protocol = "-1"
from_port = 0
to_port = 0
type = "egress"
self = true
}
}

eks_managed_node_group_defaults = {
iam_role_additional_policies = {
# Not required, but used in the example to access the nodes to inspect drivers and devices
Expand All @@ -98,41 +69,54 @@ module "eks" {
}

eks_managed_node_groups = {
# For running services that do not require GPUs
# This node group is for core addons such as CoreDNS
default = {
instance_types = ["m5.large"]

# Default AMI has only 8GB of storage
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 128
volume_type = "gp3"
delete_on_termination = true
}
}
}

min_size = 1
max_size = 5
max_size = 2
desired_size = 2
}

efa = {
nvidia-efa = {
# The EKS AL2 GPU AMI provides all of the necessary components
# for accelerated workloads w/ EFA
ami_type = "AL2_x86_64_GPU"
instance_types = ["g5.8xlarge"]
instance_types = [local.efa_instance_type]

min_size = 1
max_size = 1
desired_size = 1

subnet_ids = slice(module.vpc.private_subnets, 0, 1)

network_interfaces = [
{
description = "EFA interface"
delete_on_termination = true
device_index = 0
associate_public_ip_address = false
interface_type = "efa"
}
]
pre_bootstrap_user_data = <<-EOT
#!/usr/bin/env bash

placement = {
group_name = aws_placement_group.efa.name
}
# Mount instance store volumes in RAID-0 for Kubelet and Containerd (raid0)
# https://github.com/awslabs/amazon-eks-ami/blob/master/doc/USER_GUIDE.md#raid-0-for-kubelet-and-containerd-raid0
/bin/setup-local-disks raid0
EOT

min_size = 2
max_size = 2
desired_size = 2

# This will:
# 1. Create a placement group for the node group to cluster instances together
# 2. Filter out subnets that reside in AZs that do not support the instance type
# 3. Expose all of the available EFA interfaces on the launch template
enable_efa_support = true

taints = {
dedicated = {
# Ensure only GPU workloads are scheduled on this node group
gpu = {
key = "nvidia.com/gpu"
value = "true"
effect = "NO_SCHEDULE"
Expand Down Expand Up @@ -160,7 +144,6 @@ module "eks_blueprints_addons" {
# We want to wait for the Fargate profiles to be deployed first
create_delay_dependencies = [for group in module.eks.eks_managed_node_groups : group.node_group_arn]

enable_aws_efs_csi_driver = true
enable_aws_fsx_csi_driver = true
enable_kube_prometheus_stack = true
kube_prometheus_stack = {
Expand All @@ -175,6 +158,42 @@ module "eks_blueprints_addons" {
enable_metrics_server = true

helm_releases = {
aws-efa-k8s-device-plugin = {
repository = "https://aws.github.io/eks-charts"
chart = "aws-efa-k8s-device-plugin"
version = "v0.4.4"
namespace = "nvidia-device-plugin"
create_namespace = true

values = [
<<-EOT
nodeSelector:
node.kubernetes.io/instance-type: "${local.efa_instance_type}"
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
EOT
]
}

nvidia-device-plugin = {
repository = "https://nvidia.github.io/k8s-device-plugin"
chart = "nvidia-device-plugin"
version = "0.14.4"
namespace = "nvidia-device-plugin"
create_namespace = true

values = [
<<-EOT
gfd:
enabled: false
nodeSelector:
node.kubernetes.io/instance-type: "${local.efa_instance_type}"
EOT
]
}

prometheus-adapter = {
description = "A Helm chart for k8s prometheus adapter"
namespace = "prometheus-adapter"
Expand All @@ -190,133 +209,11 @@ module "eks_blueprints_addons" {
EOT
]
}
gpu-operator = {
description = "A Helm chart for NVIDIA GPU operator"
namespace = "gpu-operator"
create_namespace = true
chart = "gpu-operator"
chart_version = "v23.3.2"
repository = "https://nvidia.github.io/gpu-operator"
values = [
<<-EOT
dcgmExporter:
enabled: false
driver:
enabled: false
toolkit:
version: v1.13.5-centos7
operator:
defaultRuntime: containerd
validator:
driver:
env:
# https://github.com/NVIDIA/gpu-operator/issues/569
- name: DISABLE_DEV_CHAR_SYMLINK_CREATION
value: "true"
EOT
]
}
}

tags = local.tags
}

################################################################################
# Amazon Elastic Fabric Adapter (EFA)
################################################################################

resource "kubernetes_daemonset" "aws_efa_k8s_device_plugin" {
metadata {
name = "aws-efa-k8s-device-plugin-daemonset"
namespace = "kube-system"
}

spec {
selector {
match_labels = {
name = "aws-efa-k8s-device-plugin"
}
}

template {
metadata {
labels = {
name = "aws-efa-k8s-device-plugin"
}
}

spec {
volume {
name = "device-plugin"

host_path {
path = "/var/lib/kubelet/device-plugins"
}
}

container {
name = "aws-efa-k8s-device-plugin"
image = "602401143452.dkr.ecr.us-west-2.amazonaws.com/eks/aws-efa-k8s-device-plugin:v0.4.4"

volume_mount {
name = "device-plugin"
mount_path = "/var/lib/kubelet/device-plugins"
}

image_pull_policy = "Always"

security_context {
capabilities {
drop = ["ALL"]
}
}
}

host_network = true

affinity {
node_affinity {
required_during_scheduling_ignored_during_execution {
node_selector_term {
match_expressions {
key = "beta.kubernetes.io/instance-type"
operator = "In"
values = ["c5n.18xlarge", "c5n.9xlarge", "c5n.metal", "c6a.48xlarge", "c6a.metal", "c6gn.16xlarge", "c6i.32xlarge", "c6i.metal", "c6id.32xlarge", "c6id.metal", "c6in.32xlarge", "c6in.metal", "c7g.16xlarge", "c7g.metal", "c7gd.16xlarge", "c7gn.16xlarge", "c7i.48xlarge", "dl1.24xlarge", "g4dn.12xlarge", "g4dn.16xlarge", "g4dn.8xlarge", "g4dn.metal", "g5.12xlarge", "g5.16xlarge", "g5.24xlarge", "g5.48xlarge", "g5.8xlarge", "hpc7g.16xlarge", "hpc7g.4xlarge", "hpc7g.8xlarge", "i3en.12xlarge", "i3en.24xlarge", "i3en.metal", "i4g.16xlarge", "i4i.32xlarge", "i4i.metal", "im4gn.16xlarge", "inf1.24xlarge", "m5dn.24xlarge", "m5dn.metal", "m5n.24xlarge", "m5n.metal", "m5zn.12xlarge", "m5zn.metal", "m6a.48xlarge", "m6a.metal", "m6i.32xlarge", "m6i.metal", "m6id.32xlarge", "m6id.metal", "m6idn.32xlarge", "m6idn.metal", "m6in.32xlarge", "m6in.metal", "m7a.48xlarge", "m7a.metal-48xl", "m7g.16xlarge", "m7g.metal", "m7gd.16xlarge", "m7i.48xlarge", "p3dn.24xlarge", "p4d.24xlarge", "p5.48xlarge", "r5dn.24xlarge", "r5dn.metal", "r5n.24xlarge", "r5n.metal", "r6a.48xlarge", "r6a.metal", "r6i.32xlarge", "r6i.metal", "r6id.32xlarge", "r6id.metal", "r6idn.32xlarge", "r6idn.metal", "r6in.32xlarge", "r6in.metal", "r7a.48xlarge", "r7g.16xlarge", "r7g.metal", "r7gd.16xlarge", "r7iz.32xlarge", "trn1.32xlarge", "trn1n.32xlarge", "vt1.24xlarge", "x2idn.32xlarge", "x2idn.metal", "x2iedn.32xlarge", "x2iedn.metal", "x2iezn.12xlarge", "x2iezn.metal"]
}
}

node_selector_term {
match_expressions {
key = "node.kubernetes.io/instance-type"
operator = "In"
values = ["c5n.18xlarge", "c5n.9xlarge", "c5n.metal", "c6a.48xlarge", "c6a.metal", "c6gn.16xlarge", "c6i.32xlarge", "c6i.metal", "c6id.32xlarge", "c6id.metal", "c6in.32xlarge", "c6in.metal", "c7g.16xlarge", "c7g.metal", "c7gd.16xlarge", "c7gn.16xlarge", "c7i.48xlarge", "dl1.24xlarge", "g4dn.12xlarge", "g4dn.16xlarge", "g4dn.8xlarge", "g4dn.metal", "g5.12xlarge", "g5.16xlarge", "g5.24xlarge", "g5.48xlarge", "g5.8xlarge", "hpc7g.16xlarge", "hpc7g.4xlarge", "hpc7g.8xlarge", "i3en.12xlarge", "i3en.24xlarge", "i3en.metal", "i4g.16xlarge", "i4i.32xlarge", "i4i.metal", "im4gn.16xlarge", "inf1.24xlarge", "m5dn.24xlarge", "m5dn.metal", "m5n.24xlarge", "m5n.metal", "m5zn.12xlarge", "m5zn.metal", "m6a.48xlarge", "m6a.metal", "m6i.32xlarge", "m6i.metal", "m6id.32xlarge", "m6id.metal", "m6idn.32xlarge", "m6idn.metal", "m6in.32xlarge", "m6in.metal", "m7a.48xlarge", "m7a.metal-48xl", "m7g.16xlarge", "m7g.metal", "m7gd.16xlarge", "m7i.48xlarge", "p3dn.24xlarge", "p4d.24xlarge", "p5.48xlarge", "r5dn.24xlarge", "r5dn.metal", "r5n.24xlarge", "r5n.metal", "r6a.48xlarge", "r6a.metal", "r6i.32xlarge", "r6i.metal", "r6id.32xlarge", "r6id.metal", "r6idn.32xlarge", "r6idn.metal", "r6in.32xlarge", "r6in.metal", "r7a.48xlarge", "r7g.16xlarge", "r7g.metal", "r7gd.16xlarge", "r7iz.32xlarge", "trn1.32xlarge", "trn1n.32xlarge", "vt1.24xlarge", "x2idn.32xlarge", "x2idn.metal", "x2iedn.32xlarge", "x2iedn.metal", "x2iezn.12xlarge", "x2iezn.metal"]
}
}
}
}
}

toleration {
key = "CriticalAddonsOnly"
operator = "Exists"
}

toleration {
key = "aws.amazon.com/efa"
operator = "Exists"
effect = "NoSchedule"
}

priority_class_name = "system-node-critical"
}
}

strategy {
type = "RollingUpdate"
}
}
}

################################################################################
# Supporting Resources
################################################################################
Expand Down Expand Up @@ -345,9 +242,3 @@ module "vpc" {

tags = local.tags
}

# Group instances within clustered placement group so they are in close proximity
resource "aws_placement_group" "efa" {
name = local.name
strategy = "cluster"
}
4 changes: 0 additions & 4 deletions patterns/elastic-fabric-adapter/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ terraform {
source = "hashicorp/helm"
version = ">= 2.9"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = ">= 2.20"
}
}

# ## Used for end-to-end testing on project; update to suit your needs
Expand Down
Loading