Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: config karpenter TDE-903 #196

Merged
merged 24 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
61d75bc
feat: upgrade cdk8s-cli
paulfouquet Oct 13, 2023
c469211
wip
paulfouquet Oct 16, 2023
a28ec4d
feat(cdk8s): retrieve eks cluster information
paulfouquet Oct 17, 2023
6edffe8
Merge branch 'master' into feat/config-karpenter-tde-903
paulfouquet Oct 18, 2023
6e505b5
wip
paulfouquet Oct 19, 2023
eff13ce
Merge branch 'master' into feat/config-karpenter-tde-903
paulfouquet Oct 19, 2023
8478bd3
fix: delete tsconfig.json
paulfouquet Oct 19, 2023
9704ae3
wip
paulfouquet Oct 19, 2023
3323bae
feat: init cdk8s karpenter and provisioners
paulfouquet Oct 20, 2023
d019878
feat: import config
blacha Oct 20, 2023
be19119
fix: allow any for imported files
blacha Oct 20, 2023
397cc9c
fix: allow provisoners to be deployed
blacha Oct 20, 2023
4bb282d
fix: taint all karpenter instances
blacha Oct 20, 2023
7fe0ee2
wip: hack around a bit to get karpenter to start
blacha Oct 23, 2023
38dac74
fix: allow ipv6 address creation
blacha Oct 23, 2023
75dfe4a
fix: lint fails
paulfouquet Oct 23, 2023
ac33309
fix: need to use something to determine what subnets to use Name: * s…
blacha Oct 24, 2023
c5dfc33
fix: override coredns to fix AAAA records being resolved for external…
blacha Oct 24, 2023
2e14936
docs: initial docs for debugging dns resolution
blacha Oct 24, 2023
6fa300d
docs: add small details to dns.configuration
paulfouquet Oct 24, 2023
38d87a2
docs: add missing dots
paulfouquet Oct 24, 2023
3b3a60f
refactor: share cluster name
paulfouquet Oct 24, 2023
5c56550
docs: update readme
paulfouquet Oct 24, 2023
df7a413
Merge branch 'master' into feat/config-karpenter-tde-903
paulfouquet Oct 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ node_modules/
dist/
cdk.out/
cdk.context.json
config/imports/
paulfouquet marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions cdk8s.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
app: npx tsx config/cdk8s.ts
language: typescript
imports:
- https://raw.githubusercontent.com/aws/karpenter/main/pkg/apis/crds/karpenter.sh_provisioners.yaml
- https://raw.githubusercontent.com/aws/karpenter/main/pkg/apis/crds/karpenter.k8s.aws_awsnodetemplates.yaml
14 changes: 12 additions & 2 deletions config/cdk8s.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { App } from 'cdk8s';

import { CfnOutputKeys } from './cfn.output';
import { ArgoSemaphore } from './charts/argo.semaphores';
import { Karpenter } from './charts/karpenter';
import { Karpenter, KarpenterProvisioner } from './charts/karpenter';
import { getCfnOutputs } from './util/cloud.formation';

const app = new App();
Expand All @@ -17,14 +17,24 @@ async function main(): Promise<void> {

new ArgoSemaphore(app, 'semaphore', {});

new Karpenter(app, 'karpenter', {
const karpenter = new Karpenter(app, 'karpenter', {
clusterName: 'Workflows',
clusterEndpoint: cfnOutputs[CfnOutputKeys.Karpenter.ClusterEndpoint],
saRoleName: cfnOutputs[CfnOutputKeys.Karpenter.ServiceAccountName],
saRoleArn: cfnOutputs[CfnOutputKeys.Karpenter.ServiceAccountRoleArn],
instanceProfile: cfnOutputs[CfnOutputKeys.Karpenter.DefaultInstanceProfile],
});

const karpenterProvisioner = new KarpenterProvisioner(app, 'karpenter-provisioner', {
clusterName: 'Workflows',
clusterEndpoint: cfnOutputs[CfnOutputKeys.Karpenter.ClusterEndpoint],
saRoleName: cfnOutputs[CfnOutputKeys.Karpenter.ServiceAccountName],
saRoleArn: cfnOutputs[CfnOutputKeys.Karpenter.ServiceAccountRoleArn],
instanceProfile: cfnOutputs[CfnOutputKeys.Karpenter.DefaultInstanceProfile],
});

karpenterProvisioner.addDependency(karpenter);

app.synth();
}

Expand Down
92 changes: 88 additions & 4 deletions config/charts/karpenter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { Chart, ChartProps, Helm } from 'cdk8s';
import { Chart, ChartProps, Duration, Helm } from 'cdk8s';
import { Construct } from 'constructs';

import {
AwsNodeTemplateSpec,
AwsNodeTemplateSpecBlockDeviceMappingsEbsVolumeSize,
} from '../imports/karpenter.k8s.aws.js';
import { Provisioner, ProvisionerSpecLimitsResources } from '../imports/karpenter.sh.js';
import { applyDefaultLabels } from '../util/labels.js';

export interface KarpenterProps {
Expand All @@ -16,12 +21,32 @@ export class Karpenter extends Chart {
// TODO: What is the component name? 'karpenter' or 'autoscaling'?
super(scope, id, applyDefaultLabels(props, 'karpenter', '', 'karpenter', 'workflows'));

// Deploying the CRD
new Helm(this, 'karpenter-crd', {
chart: 'oci://public.ecr.aws/karpenter/karpenter-crd',
namespace: 'karpenter',
version: 'v0.31.0',
});

// Karpenter is using `oci` rather than regular helm repo: https://gallery.ecr.aws/karpenter/karpenter.
// This Helm constructor has been tricked to be able to use `oci`,
// the `oci` repo is passed inside `chart` instead of `repo` so the generated `helm`
// command is the following:
// [
// 'template',
// '-f',
// '/tmp/cdk8s-helm-keYZCA/overrides.yaml',
// '--version',
// 'v0.31.0',
// '--namespace',
// 'karpenter',
// 'karpenter-c870a560',
// 'oci://public.ecr.aws/karpenter/karpenter'
// ]
new Helm(this, 'karpenter', {
chart: 'karpenter',
repo: 'oci://public.ecr.aws/karpenter/karpenter',
chart: 'oci://public.ecr.aws/karpenter/karpenter',
namespace: 'karpenter',
version: 'v0.31.0',
releaseName: 'karpenter',
values: {
serviceAccount: {
create: false,
Expand All @@ -39,3 +64,62 @@ export class Karpenter extends Chart {
});
}
}

export class KarpenterProvisioner extends Chart {
constructor(scope: Construct, id: string, props: KarpenterProps & ChartProps) {
// TODO: What is the component name? 'karpenter' or 'autoscaling'?
super(scope, id, applyDefaultLabels(props, 'karpenter', '', 'karpenter', 'workflows'));

const provider: AwsNodeTemplateSpec = {
amiFamily: 'Bottlerocket',
subnetSelector: { 'aws-ids': '' }, // TODO How to get those?
securityGroupSelector: { [`kubernetes.io/cluster/${props.clusterName}`]: 'owned' },
instanceProfile: props.instanceProfile,
blockDeviceMappings: [
{
deviceName: '/dev/xvdb',
ebs: {
volumeType: 'gp3',
// FIXME: This does not match `'^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$'`
volumeSize: AwsNodeTemplateSpecBlockDeviceMappingsEbsVolumeSize.fromString('200Gi'),
deleteOnTermination: true,
},
},
],
};

new Provisioner(this, 'ClusterAmd64WorkerNodes', {
metadata: { name: `eks-karpenter-${props.clusterName}-amd64`.toLowerCase(), namespace: 'karpenter' },
spec: {
requirements: [
{ key: 'karpenter.sh/capacity-type', operator: 'In', values: ['spot'] },
{ key: 'kubernetes.io/arch', operator: 'In', values: ['amd64'] },
{ key: 'karpenter.k8s.aws/instance-family', operator: 'In', values: ['c5', 'c6i', 'c6a'] },
],
limits: { resources: { cpu: ProvisionerSpecLimitsResources.fromString('20000m') } },
provider,
ttlSecondsAfterEmpty: Duration.minutes(1).toSeconds(), // optional, but never scales down if not set
},
});

new Provisioner(this, 'ClusterArmWorkerNodes', {
metadata: { name: `eks-karpenter-${props.clusterName}-arm64`.toLowerCase(), namespace: 'karpenter' },
spec: {
//Instances that want ARM have to tolerate the arm taint
// This prevenkarpenter-c870a560-76646d448b-fcq6lts some pods from accidentally trying to start on ARM
taints: [
{ key: 'kubernetes.io/arch', value: 'arm64', effect: 'NoSchedule' },
{ key: 'karpenter.sh/capacity-type', value: 'spot', effect: 'NoSchedule' },
],
requirements: [
{ key: 'karpenter.sh/capacity-type', operator: 'In', values: ['spot'] },
{ key: 'kubernetes.io/arch', operator: 'In', values: ['arm64'] },
{ key: 'karpenter.k8s.aws/instance-family', operator: 'In', values: ['c7g', 'c6g'] },
],
limits: { resources: { cpu: ProvisionerSpecLimitsResources.fromString('20000m') } },
provider,
ttlSecondsAfterEmpty: Duration.minutes(1).toSeconds(), // optional, but never scales down if not set
},
});
}
}
11 changes: 8 additions & 3 deletions config/eks/cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ export class LinzEksCluster extends Stack {
);

// Allow Karpenter to start ec2 instances
// FIXME: some policies are missing. See https://github.com/aws/karpenter/blob/8c33a40733b90aa0bb42a6436152374f7b359f69/website/content/en/docs/getting-started/getting-started-with-karpenter/cloudformation.yaml#L40
// The current policies are based on https://github.com/eksctl-io/eksctl/blob/main/pkg/cfn/builder/karpenter_test.go#L111
new Policy(this, 'ControllerPolicy', {
roles: [serviceAccount.role],
statements: [
Expand All @@ -136,19 +138,22 @@ export class LinzEksCluster extends Stack {
'ec2:CreateFleet',
'ec2:CreateLaunchTemplate',
'ec2:CreateTags',
'ec2:DeleteLaunchTemplate',
'ec2:DescribeAvailabilityZones',
'ec2:DescribeInstances',
'ec2:DescribeInstanceTypeOfferings',
'ec2:DescribeInstanceTypes',
'ec2:DescribeInstances',
'ec2:DescribeLaunchTemplates',
'ec2:DescribeSecurityGroups',
'ec2:DescribeSubnets',
'ec2:DeleteLaunchTemplate',
'ec2:RunInstances',
'ec2:TerminateInstances',
'ec2:DescribeImages',
'ec2:DescribeSpotPriceHistory',
'iam:PassRole',
'iam:CreateServiceLinkedRole',
'ssm:GetParameter',

'pricing:GetProducts',
// LINZ requires instances to be encrypted with a KMS key
'kms:Encrypt',
'kms:Decrypt',
Expand Down
Loading