From ca7c0da55927a3b299f57a7608651dce284f7839 Mon Sep 17 00:00:00 2001 From: Bryant Biggs Date: Wed, 14 Aug 2024 20:23:57 -0500 Subject: [PATCH] chore: Group patterns under sub-directory to reduce length of sidebar nav and better co-locate patterns by some categorization (#1997) --- .github/workflows/markdown-link-check.yml | 4 +- .pre-commit-config.yaml | 6 +-- docs/patterns/.pages | 19 +++++++++ docs/patterns/aws-vpc-cni-network-policy.md | 7 ---- docs/patterns/bottlerocket.md | 2 +- .../patterns/gitops-getting-started-argocd.md | 7 ---- .../gitops-multi-cluster-hub-spoke-argocd.md | 7 ---- .../gitops/gitops-getting-started-argocd.md | 7 ++++ .../gitops-multi-cluster-hub-spoke-argocd.md | 7 ++++ docs/patterns/ipv6-eks-cluster.md | 7 ---- .../machine-learning/ml-capacity-block.md | 7 ++++ .../machine-learning/nvidia-gpu-efa.md | 7 ++++ .../{ => machine-learning}/targeted-odcr.md | 2 +- docs/patterns/ml-capacity-block.md | 7 ---- .../network/aws-vpc-cni-network-policy.md | 7 ++++ .../client-server-communication.md | 0 .../cross-cluster-pod-communication.md | 0 docs/patterns/network/ipv6-eks-cluster.md | 7 ++++ .../network/private-public-ingress.md | 7 ++++ docs/patterns/network/privatelink-access.md | 7 ++++ .../network/tls-with-aws-pca-issuer.md | 7 ++++ docs/patterns/network/vpc-lattice.md | 7 ++++ .../patterns/network/wireguard-with-cilium.md | 7 ++++ docs/patterns/nvidia-gpu-efa.md | 7 ---- docs/patterns/private-public-ingress.md | 7 ---- docs/patterns/privatelink-access.md | 7 ---- docs/patterns/tls-with-aws-pca-issuer.md | 7 ---- docs/patterns/vpc-lattice.md | 7 ---- docs/patterns/wireguard-with-cilium.md | 7 ---- .../templates/ec2nodeclass.yaml | 2 +- .../templates/nodepool.yaml | 2 +- .../karpenter-resources/values.yaml | 4 +- patterns/ecr-pull-through-cache/addons.tf | 2 +- patterns/nvidia-gpu-efa/README.md | 40 +++++++++---------- .../nvidia-gpu-efa/generate-efa-info-test.sh | 1 - .../cross-cluster-pod-communication/README.md | 4 +- .../templates/kyverno-cluster-policy.yaml | 18 ++++----- 37 files changed, 138 insertions(+), 122 deletions(-) create mode 100644 docs/patterns/.pages delete mode 100644 docs/patterns/aws-vpc-cni-network-policy.md delete mode 100644 docs/patterns/gitops-getting-started-argocd.md delete mode 100644 docs/patterns/gitops-multi-cluster-hub-spoke-argocd.md create mode 100644 docs/patterns/gitops/gitops-getting-started-argocd.md create mode 100644 docs/patterns/gitops/gitops-multi-cluster-hub-spoke-argocd.md delete mode 100644 docs/patterns/ipv6-eks-cluster.md create mode 100644 docs/patterns/machine-learning/ml-capacity-block.md create mode 100644 docs/patterns/machine-learning/nvidia-gpu-efa.md rename docs/patterns/{ => machine-learning}/targeted-odcr.md (51%) delete mode 100644 docs/patterns/ml-capacity-block.md create mode 100644 docs/patterns/network/aws-vpc-cni-network-policy.md rename docs/patterns/{vpc-lattice => network}/client-server-communication.md (100%) rename docs/patterns/{vpc-lattice => network}/cross-cluster-pod-communication.md (100%) create mode 100644 docs/patterns/network/ipv6-eks-cluster.md create mode 100644 docs/patterns/network/private-public-ingress.md create mode 100644 docs/patterns/network/privatelink-access.md create mode 100644 docs/patterns/network/tls-with-aws-pca-issuer.md create mode 100644 docs/patterns/network/vpc-lattice.md create mode 100644 docs/patterns/network/wireguard-with-cilium.md delete mode 100644 docs/patterns/nvidia-gpu-efa.md delete mode 100644 docs/patterns/private-public-ingress.md delete mode 100644 docs/patterns/privatelink-access.md delete mode 100644 docs/patterns/tls-with-aws-pca-issuer.md delete mode 100644 docs/patterns/vpc-lattice.md delete mode 100644 docs/patterns/wireguard-with-cilium.md diff --git a/.github/workflows/markdown-link-check.yml b/.github/workflows/markdown-link-check.yml index d935d0272e..b961e099e7 100644 --- a/.github/workflows/markdown-link-check.yml +++ b/.github/workflows/markdown-link-check.yml @@ -28,9 +28,9 @@ jobs: - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 - uses: actions/setup-node@v4 with: - node-version: '16.x' + node-version: '20.x' - name: install markdown-link-check - run: npm install -g markdown-link-check@3.10.2 + run: npm install -g markdown-link-check@3.12.2 - name: markdown-link-check version run: npm list -g markdown-link-check - name: Run markdown-link-check on MD files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ad4aec964c..024afe9fa7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,11 @@ repos: - repo: https://github.com/streetsidesoftware/cspell-cli - rev: v8.10.0 + rev: v8.13.3 hooks: - id: cspell args: [--exclude, 'ADOPTERS.md', --exclude, '.pre-commit-config.yaml', --exclude, '.gitignore', --exclude, '*.drawio', --exclude, 'mkdocs.yml', --exclude, '.helmignore', --exclude, '.github/workflows/*', --exclude, 'patterns/istio-multi-cluster/*', --exclude, 'patterns/blue-green-upgrade/*'] - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks - rev: v2.13.0 + rev: v2.14.0 hooks: - id: pretty-format-yaml args: [--autofix, --indent, '2', --offset, '2', --preserve-quotes] @@ -19,7 +19,7 @@ repos: - id: detect-aws-credentials args: [--allow-missing-credentials] - repo: https://github.com/antonbabenko/pre-commit-terraform - rev: v1.92.0 + rev: v1.92.1 hooks: - id: terraform_fmt - id: terraform_docs diff --git a/docs/patterns/.pages b/docs/patterns/.pages new file mode 100644 index 0000000000..630292f2af --- /dev/null +++ b/docs/patterns/.pages @@ -0,0 +1,19 @@ +nav: + - Agones Game Controller: agones-game-controller.md + - Blue/Green Upgrade: blue-green-upgrade.md + - Bottlerocket: bottlerocket.md + - ECR Pull Through Cache: ecr-pull-through-cache.md + - External Secrets: external-secrets.md + - Fargate Serverless: fargate-serverless.md + - Fully Private Cluster: fully-private-cluster.md + - GitOps: gitops + - Istio: istio.md + - Karpenter on EKS Fargate: karpenter.md + - Karpenter on EKS MNG: karpenter-mng.md + - Kubecost: kubecost.md + - Machine Learning: machine-learning + - Multi-tenancy: multi-tenancy-with-teams.md + - Network: network + - SSO - IAM Identity Center: sso-iam-identity-center.md + - SSO - Okta: sso-okta.md + - Stateful: stateful.md diff --git a/docs/patterns/aws-vpc-cni-network-policy.md b/docs/patterns/aws-vpc-cni-network-policy.md deleted file mode 100644 index 4ea881ace2..0000000000 --- a/docs/patterns/aws-vpc-cni-network-policy.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: AWS VPC CNI Network Policy ---- - -{% - include-markdown "../../patterns/aws-vpc-cni-network-policy/README.md" -%} diff --git a/docs/patterns/bottlerocket.md b/docs/patterns/bottlerocket.md index 956eb338d4..c2c4e4ee3a 100644 --- a/docs/patterns/bottlerocket.md +++ b/docs/patterns/bottlerocket.md @@ -1,5 +1,5 @@ --- -title: Bottlerocket +title: Bottlerocket --- {% diff --git a/docs/patterns/gitops-getting-started-argocd.md b/docs/patterns/gitops-getting-started-argocd.md deleted file mode 100644 index 1492cc377a..0000000000 --- a/docs/patterns/gitops-getting-started-argocd.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: GitOps Getting Started (ArgoCD) ---- - -{% - include-markdown "../../patterns/gitops/getting-started-argocd/README.md" -%} diff --git a/docs/patterns/gitops-multi-cluster-hub-spoke-argocd.md b/docs/patterns/gitops-multi-cluster-hub-spoke-argocd.md deleted file mode 100644 index 0ef45bb13a..0000000000 --- a/docs/patterns/gitops-multi-cluster-hub-spoke-argocd.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: GitOps Multi-Cluster Hub-Spoke Topology (ArgoCD) ---- - -{% - include-markdown "../../patterns/gitops/multi-cluster-hub-spoke-argocd/README.md" -%} diff --git a/docs/patterns/gitops/gitops-getting-started-argocd.md b/docs/patterns/gitops/gitops-getting-started-argocd.md new file mode 100644 index 0000000000..1c7f53e51d --- /dev/null +++ b/docs/patterns/gitops/gitops-getting-started-argocd.md @@ -0,0 +1,7 @@ +--- +title: ArgoCD - Getting Started +--- + +{% + include-markdown "../../../patterns/gitops/getting-started-argocd/README.md" +%} diff --git a/docs/patterns/gitops/gitops-multi-cluster-hub-spoke-argocd.md b/docs/patterns/gitops/gitops-multi-cluster-hub-spoke-argocd.md new file mode 100644 index 0000000000..71b06b0c3a --- /dev/null +++ b/docs/patterns/gitops/gitops-multi-cluster-hub-spoke-argocd.md @@ -0,0 +1,7 @@ +--- +title: ArgoCD - Multi-Cluster Hub & Spoke +--- + +{% + include-markdown "../../../patterns/gitops/multi-cluster-hub-spoke-argocd/README.md" +%} diff --git a/docs/patterns/ipv6-eks-cluster.md b/docs/patterns/ipv6-eks-cluster.md deleted file mode 100644 index a60e7c38f7..0000000000 --- a/docs/patterns/ipv6-eks-cluster.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: IPv6 Networking ---- - -{% - include-markdown "../../patterns/ipv6-eks-cluster/README.md" -%} diff --git a/docs/patterns/machine-learning/ml-capacity-block.md b/docs/patterns/machine-learning/ml-capacity-block.md new file mode 100644 index 0000000000..55d3f37e13 --- /dev/null +++ b/docs/patterns/machine-learning/ml-capacity-block.md @@ -0,0 +1,7 @@ +--- +title: ML Capacity Block Reservation (CBR) +--- + +{% + include-markdown "../../../patterns/ml-capacity-block/README.md" +%} diff --git a/docs/patterns/machine-learning/nvidia-gpu-efa.md b/docs/patterns/machine-learning/nvidia-gpu-efa.md new file mode 100644 index 0000000000..3ad7110120 --- /dev/null +++ b/docs/patterns/machine-learning/nvidia-gpu-efa.md @@ -0,0 +1,7 @@ +--- +title: NVIDIA GPUs with EFA +--- + +{% + include-markdown "../../../patterns/nvidia-gpu-efa/README.md" +%} diff --git a/docs/patterns/targeted-odcr.md b/docs/patterns/machine-learning/targeted-odcr.md similarity index 51% rename from docs/patterns/targeted-odcr.md rename to docs/patterns/machine-learning/targeted-odcr.md index dde78b0816..156f6b216b 100644 --- a/docs/patterns/targeted-odcr.md +++ b/docs/patterns/machine-learning/targeted-odcr.md @@ -3,5 +3,5 @@ title: Targeted On-Demand Capacity Reservation (ODCR) --- {% - include-markdown "../../patterns/targeted-odcr/README.md" + include-markdown "../../../patterns/targeted-odcr/README.md" %} diff --git a/docs/patterns/ml-capacity-block.md b/docs/patterns/ml-capacity-block.md deleted file mode 100644 index 2d1528bcd3..0000000000 --- a/docs/patterns/ml-capacity-block.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: ML Capacity Block Reservation (CBR) ---- - -{% - include-markdown "../../patterns/ml-capacity-block/README.md" -%} diff --git a/docs/patterns/network/aws-vpc-cni-network-policy.md b/docs/patterns/network/aws-vpc-cni-network-policy.md new file mode 100644 index 0000000000..c86bbf59af --- /dev/null +++ b/docs/patterns/network/aws-vpc-cni-network-policy.md @@ -0,0 +1,7 @@ +--- +title: AWS VPC CNI Network Policy +--- + +{% + include-markdown "../../../patterns/aws-vpc-cni-network-policy/README.md" +%} diff --git a/docs/patterns/vpc-lattice/client-server-communication.md b/docs/patterns/network/client-server-communication.md similarity index 100% rename from docs/patterns/vpc-lattice/client-server-communication.md rename to docs/patterns/network/client-server-communication.md diff --git a/docs/patterns/vpc-lattice/cross-cluster-pod-communication.md b/docs/patterns/network/cross-cluster-pod-communication.md similarity index 100% rename from docs/patterns/vpc-lattice/cross-cluster-pod-communication.md rename to docs/patterns/network/cross-cluster-pod-communication.md diff --git a/docs/patterns/network/ipv6-eks-cluster.md b/docs/patterns/network/ipv6-eks-cluster.md new file mode 100644 index 0000000000..2e4c706123 --- /dev/null +++ b/docs/patterns/network/ipv6-eks-cluster.md @@ -0,0 +1,7 @@ +--- +title: IPv6 Networking +--- + +{% + include-markdown "../../../patterns/ipv6-eks-cluster/README.md" +%} diff --git a/docs/patterns/network/private-public-ingress.md b/docs/patterns/network/private-public-ingress.md new file mode 100644 index 0000000000..b27250ee04 --- /dev/null +++ b/docs/patterns/network/private-public-ingress.md @@ -0,0 +1,7 @@ +--- +title: Private and Public Ingress +--- + +{% + include-markdown "../../../patterns/private-public-ingress/README.md" +%} diff --git a/docs/patterns/network/privatelink-access.md b/docs/patterns/network/privatelink-access.md new file mode 100644 index 0000000000..319e2b319a --- /dev/null +++ b/docs/patterns/network/privatelink-access.md @@ -0,0 +1,7 @@ +--- +title: PrivateLink Access +--- + +{% + include-markdown "../../../patterns/privatelink-access/README.md" +%} diff --git a/docs/patterns/network/tls-with-aws-pca-issuer.md b/docs/patterns/network/tls-with-aws-pca-issuer.md new file mode 100644 index 0000000000..602397fb0a --- /dev/null +++ b/docs/patterns/network/tls-with-aws-pca-issuer.md @@ -0,0 +1,7 @@ +--- +title: TLS w/ AWS PCA Issuer +--- + +{% + include-markdown "../../../patterns/tls-with-aws-pca-issuer/README.md" +%} diff --git a/docs/patterns/network/vpc-lattice.md b/docs/patterns/network/vpc-lattice.md new file mode 100644 index 0000000000..0946a855b8 --- /dev/null +++ b/docs/patterns/network/vpc-lattice.md @@ -0,0 +1,7 @@ +--- +title: VPC lattice +--- + +{% + include-markdown "../../../patterns/vpc-lattice/README.md" +%} diff --git a/docs/patterns/network/wireguard-with-cilium.md b/docs/patterns/network/wireguard-with-cilium.md new file mode 100644 index 0000000000..4642d36420 --- /dev/null +++ b/docs/patterns/network/wireguard-with-cilium.md @@ -0,0 +1,7 @@ +--- +title: Wireguard /w Cilium +--- + +{% + include-markdown "../../../patterns/wireguard-with-cilium/README.md" +%} diff --git a/docs/patterns/nvidia-gpu-efa.md b/docs/patterns/nvidia-gpu-efa.md deleted file mode 100644 index 6fa32b43b9..0000000000 --- a/docs/patterns/nvidia-gpu-efa.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: NVIDIA GPUs with EFA ---- - -{% - include-markdown "../../patterns/nvidia-gpu-efa/README.md" -%} diff --git a/docs/patterns/private-public-ingress.md b/docs/patterns/private-public-ingress.md deleted file mode 100644 index f5cc654f1d..0000000000 --- a/docs/patterns/private-public-ingress.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: Private and Public Ingress ---- - -{% - include-markdown "../../patterns/private-public-ingress/README.md" -%} diff --git a/docs/patterns/privatelink-access.md b/docs/patterns/privatelink-access.md deleted file mode 100644 index ee705001c4..0000000000 --- a/docs/patterns/privatelink-access.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: PrivateLink Access ---- - -{% - include-markdown "../../patterns/privatelink-access/README.md" -%} diff --git a/docs/patterns/tls-with-aws-pca-issuer.md b/docs/patterns/tls-with-aws-pca-issuer.md deleted file mode 100644 index 2a345ef96f..0000000000 --- a/docs/patterns/tls-with-aws-pca-issuer.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: TLS w/ AWS PCA Issuer ---- - -{% - include-markdown "../../patterns/tls-with-aws-pca-issuer/README.md" -%} diff --git a/docs/patterns/vpc-lattice.md b/docs/patterns/vpc-lattice.md deleted file mode 100644 index f5ae6dc863..0000000000 --- a/docs/patterns/vpc-lattice.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: VPC lattice ---- - -{% - include-markdown "../../patterns/vpc-lattice/README.md" -%} diff --git a/docs/patterns/wireguard-with-cilium.md b/docs/patterns/wireguard-with-cilium.md deleted file mode 100644 index e7eb318480..0000000000 --- a/docs/patterns/wireguard-with-cilium.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: Wireguard /w Cilium ---- - -{% - include-markdown "../../patterns/wireguard-with-cilium/README.md" -%} diff --git a/patterns/bottlerocket/karpenter-resources/templates/ec2nodeclass.yaml b/patterns/bottlerocket/karpenter-resources/templates/ec2nodeclass.yaml index 57128ee103..f8c1aaed54 100644 --- a/patterns/bottlerocket/karpenter-resources/templates/ec2nodeclass.yaml +++ b/patterns/bottlerocket/karpenter-resources/templates/ec2nodeclass.yaml @@ -25,4 +25,4 @@ spec: encrypted: true kmsKeyID: {{ .Values.ec2nodeclass.blockDeviceMappings.ebs.kmsKeyID }} deleteOnTermination: true - volumeSize: 20Gi \ No newline at end of file + volumeSize: 20Gi diff --git a/patterns/bottlerocket/karpenter-resources/templates/nodepool.yaml b/patterns/bottlerocket/karpenter-resources/templates/nodepool.yaml index 5b37ab3f2f..4813cbf505 100644 --- a/patterns/bottlerocket/karpenter-resources/templates/nodepool.yaml +++ b/patterns/bottlerocket/karpenter-resources/templates/nodepool.yaml @@ -44,4 +44,4 @@ spec: - key: kubernetes.io/os operator: In values: - - linux \ No newline at end of file + - linux diff --git a/patterns/bottlerocket/karpenter-resources/values.yaml b/patterns/bottlerocket/karpenter-resources/values.yaml index b3b272efc6..f7aabaaecf 100644 --- a/patterns/bottlerocket/karpenter-resources/values.yaml +++ b/patterns/bottlerocket/karpenter-resources/values.yaml @@ -4,10 +4,10 @@ ec2nodeclass: role: "" securityGroupSelectorTerms: tags: - karpenter.sh/discovery: "" + karpenter.sh/discovery: "" subnetSelectorTerms: tags: - karpenter.sh/discovery: "" + karpenter.sh/discovery: "" blockDeviceMappings: ebs: kmsKeyID: "" diff --git a/patterns/ecr-pull-through-cache/addons.tf b/patterns/ecr-pull-through-cache/addons.tf index 3ccb5e6d93..ad0de9c6b3 100644 --- a/patterns/ecr-pull-through-cache/addons.tf +++ b/patterns/ecr-pull-through-cache/addons.tf @@ -1,5 +1,5 @@ locals { - ecr_url = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${local.region}.amazonaws.com" + ecr_url = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${local.region}.amazonaws.com" } module "eks_blueprints_addons" { diff --git a/patterns/nvidia-gpu-efa/README.md b/patterns/nvidia-gpu-efa/README.md index 873bbc4a82..f8f92bbf9e 100644 --- a/patterns/nvidia-gpu-efa/README.md +++ b/patterns/nvidia-gpu-efa/README.md @@ -33,7 +33,7 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started !!! note - Desired instance type can be specified in [eks.tf](eks.tf#L36). + Desired instance type can be specified in [eks.tf](eks.tf#L36). Values shown below will change based on the instance type selected (i.e. - `p5.48xlarge` has 8 GPUs and 32 EFA interfaces). A list of EFA-enabled instance types is available [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-instance-types). If you are using an on-demand capacity reservation (ODCR) for your instance type, please uncomment the `capacity_reservation_specification` block in `eks.tf` @@ -92,15 +92,15 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started This test prints a list of available EFA interfaces by using the `/opt/amazon/efa/bin/fi_info` utility. The script [generate-efa-info-test.sh](generate-efa-info-test.sh) creates an MPIJob manifest file named `efa-info-test.yaml`. It assumes that there are two cluster nodes with 8 GPU's per node and 32 EFA adapters. If you are not using `p5.48xlarge` instances in your cluster, you may adjust the settings in the script prior to running it. - + `NUM_WORKERS` - number of nodes you want to run the test on `GPU_PER_WORKER` - number of GPUs available on each node `EFA_PER_WORKER` - number of EFA interfaces available on each node - + ```sh ./generate-efa-info-test.sh ``` - + To start the test apply the generated manifest to the cluster: ```sh @@ -109,7 +109,7 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started ```text mpijob.kubeflow.org/efa-info-test created - ``` + ``` Observe the pods in the current namespace. You should see a launcher pod and worker pods. It is normal for the launcher pod to restart a few times until the worker pods are fully running. @@ -137,7 +137,7 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started efa-info-test-launcher-wm8pm 0/1 Completed 2 5m20s ``` - Once the test launcher pod enters status `Running` or `Completed`, + Once the test launcher pod enters status `Running` or `Completed`, see the test logs using the command below: ```sh @@ -153,9 +153,9 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started [1,1]: version: 120.10 [1,1]: type: FI_EP_RDM [1,1]: protocol: FI_PROTO_EFA - + ... - + [1,0]:provider: efa [1,0]: fabric: efa [1,0]: domain: rdmap201s0-rdm @@ -165,20 +165,20 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started ``` Finally, remove the job: - + ```sh kubectl delete -f ./efa-info-test.yaml ``` 4. EFA NCCL test - The EFA NCCL test is used to measure network bandwidth by running the `/opt/nccl-tests/build/all_reduce_perf` utility. + The EFA NCCL test is used to measure network bandwidth by running the `/opt/nccl-tests/build/all_reduce_perf` utility. Create an MPIjob manifest by executing the script below: - + ```sh ./generate-efa-nccl-test.sh ``` - + This script creates a file named `efa-nccl-test.yaml`. Apply the manifest to start the EFA nccl test. ```sh @@ -186,21 +186,21 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started ```text mpijob.kubeflow.org/efa-nccl-test created - ``` + ``` Similarly to the EFA info test, a launcher and worker pods will be created. The launcher pod will be - in CrashLoopBackoff mode until the worker pods enter Running state. + in CrashLoopBackoff mode until the worker pods enter Running state. As soon as the launcher pod enters Running state as well, execute the following command to see the test logs: - + ```sh kubectl logs -f $(kubectl get pods | grep launcher | cut -d ' ' -f 1) ``` ```text ... - [1,0]:# out-of-place in-place + [1,0]:# out-of-place in-place [1,0]:# size count type redop root time algbw busbw #wrong time algbw busbw #wrong - [1,0]:# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) + [1,0]:# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) [1,0]: 0 0 float sum -1 0.13 0.00 0.00 0 0.12 0.00 0.00 0 [1,0]: 0 0 float sum -1 0.12 0.00 0.00 0 0.12 0.00 0.00 0 [1,0]: 4 1 float sum -1 65.43 0.00 0.00 0 65.82 0.00 0.00 0 @@ -234,17 +234,17 @@ See [here](https://aws-ia.github.io/terraform-aws-eks-blueprints/getting-started [1,0]: 1073741824 268435456 float sum -1 4553.6 235.80 442.13 0 4553.0 235.83 442.19 0 [1,0]: 2147483648 536870912 float sum -1 9062.5 236.96 444.31 0 9060.4 237.02 444.41 0 [1,0]:# Out of bounds values : 0 OK - [1,0]:# Avg bus bandwidth : 79.9352 + [1,0]:# Avg bus bandwidth : 79.9352 [1,0]:# ``` - Columns 9 and 13 in the output table show the in-place and out-of-place bus bandwidth calculated for the data size listed in column 2. + Columns 9 and 13 in the output table show the in-place and out-of-place bus bandwidth calculated for the data size listed in column 2. In this case it is at maximum 444.31 and 444.41 GB/s respectively. Your actual results may be slightly different. The calculated average bus bandwidth is displayed at the end of the log. In this test run the average bus bandwidth was 79.9352 GB/s. Lastly, delete the MPIJob: - + ```sh kubectl delete -f ./efa-nccl-test.yaml ``` diff --git a/patterns/nvidia-gpu-efa/generate-efa-info-test.sh b/patterns/nvidia-gpu-efa/generate-efa-info-test.sh index 646d9724db..ac550351fa 100755 --- a/patterns/nvidia-gpu-efa/generate-efa-info-test.sh +++ b/patterns/nvidia-gpu-efa/generate-efa-info-test.sh @@ -90,4 +90,3 @@ spec: nvidia.com/gpu: ${GPU_PER_WORKER} vpc.amazonaws.com/efa: ${EFA_PER_WORKER} EOF - diff --git a/patterns/vpc-lattice/cross-cluster-pod-communication/README.md b/patterns/vpc-lattice/cross-cluster-pod-communication/README.md index 68e6b621ae..f6d4c60f23 100644 --- a/patterns/vpc-lattice/cross-cluster-pod-communication/README.md +++ b/patterns/vpc-lattice/cross-cluster-pod-communication/README.md @@ -148,7 +148,7 @@ In this setup, we used a Kyverno rule to inject iptables rules, and an envoy sid kubectl --context eks-cluster1 exec -it deploy/demo-cluster1-v1 -c envoy-sigv4 -n apps -- cat /usr/local/bin/launch_envoy.sh ``` - Output: + Output: ``` #!/bin/sh @@ -187,7 +187,7 @@ In this setup, we used a Kyverno rule to inject iptables rules, and an envoy sid > If the VPC was not able to destroy, you may want to re-run the destroy command a second time -If the VPC lattice service network still exists, you can remove it with the following command: +If the VPC lattice service network still exists, you can remove it with the following command: ```bash SN=$(aws vpc-lattice list-service-networks --query 'items[?name==`lattice-gateway`].id' --output text) diff --git a/patterns/vpc-lattice/cross-cluster-pod-communication/cluster/charts/platform/templates/kyverno-cluster-policy.yaml b/patterns/vpc-lattice/cross-cluster-pod-communication/cluster/charts/platform/templates/kyverno-cluster-policy.yaml index 48f18b6dda..43c3025987 100644 --- a/patterns/vpc-lattice/cross-cluster-pod-communication/cluster/charts/platform/templates/kyverno-cluster-policy.yaml +++ b/patterns/vpc-lattice/cross-cluster-pod-communication/cluster/charts/platform/templates/kyverno-cluster-policy.yaml @@ -42,14 +42,12 @@ spec: securityContext: runAsGroup: 0 env: - - name: APP_DOMAIN - value: "example.com" - - name: CA_ARN - value: "{{ .Values.acmpCAArn }}" - args: [ - "-l", "info" - ] + - name: APP_DOMAIN + value: "example.com" + - name: CA_ARN + value: "{{ .Values.acmpCAArn }}" + args: ["-l", "info"] ports: - - containerPort: 8080 - name: proxy - protocol: TCP + - containerPort: 8080 + name: proxy + protocol: TCP