From a039aecf92ea65d1773f130a044a67a5fed75cc5 Mon Sep 17 00:00:00 2001 From: Michael Barroco Date: Thu, 16 Feb 2023 10:52:39 +0100 Subject: [PATCH] [terraform] #874: terraform module for AWS (#942) --- build/.DS_Store | Bin 6148 -> 0 bytes build/README.md | 9 + build/apply-certs.sh | 11 +- build/deploy/base.libsonnet | 27 ++ build/deploy/cockroachdb-auxiliary.libsonnet | 11 +- build/deploy/core-service.libsonnet | 71 ++++-- build/deploy/metadata_base.libsonnet | 7 +- build/make-certs.py | 7 +- deploy/README.md | 11 + deploy/infrastructure/.gitignore | 1 + .../terraform-aws-kubernetes/.gitignore | 1 + .../AWSLoadBalancerControllerPolicy.json | 219 ++++++++++++++++ .../terraform-aws-kubernetes/README.md | 131 ++++++++++ .../terraform-aws-kubernetes/cluster.tf | 42 ++++ .../terraform-aws-kubernetes/ebs.tf | 18 ++ .../terraform-aws-kubernetes/iam.tf | 106 ++++++++ .../terraform-aws-kubernetes/main.tf | 33 +++ .../terraform-aws-kubernetes/network_dns.tf | 39 +++ .../terraform-aws-kubernetes/network_lb.tf | 63 +++++ .../terraform-aws-kubernetes/network_vpc.tf | 70 ++++++ .../terraform-aws-kubernetes/output.tf | 55 ++++ .../test-app.template.yml | 74 ++++++ .../terraform-aws-kubernetes/test_resource.tf | 10 + .../terraform-aws-kubernetes/variables.tf | 79 ++++++ .../terraform-commons-dss/default_latest.tf | 6 +- .../terraform-commons-dss/main.tf | 17 +- .../terraform-commons-dss/output.tf | 7 +- .../templates/main.jsonnet.tmp | 7 + .../terraform-commons-dss/variables.tf | 59 ++--- .../variables_internal.tf | 14 +- .../terraform-google-kubernetes/variables.tf | 14 +- .../modules/terraform-aws-dss/DNS.md | 57 +++++ .../modules/terraform-aws-dss/README.md | 73 ++++++ .../modules/terraform-aws-dss/TFVARS.md | 229 +++++++++++++++++ .../modules/terraform-aws-dss/main.tf | 34 +++ .../modules/terraform-aws-dss/output.tf | 15 ++ .../terraform.dev.example.tfvars | 27 ++ .../modules/terraform-aws-dss/variables.tf | 238 ++++++++++++++++++ .../modules/terraform-google-dss/TFVARS.md | 36 +-- .../modules/terraform-google-dss/main.tf | 17 +- .../modules/terraform-google-dss/variables.tf | 38 +-- .../utils/definitions/authorization.tf | 10 +- .../utils/definitions/aws_instance_type.tf | 8 + .../aws_kubernetes_storage_class.tf | 10 + .../utils/definitions/aws_region.tf | 10 + .../utils/definitions/aws_route53_zone_id.tf | 10 + .../utils/definitions/google_machine_type.tf | 5 +- .../infrastructure/utils/definitions/image.tf | 9 +- deploy/infrastructure/utils/variables.py | 62 +++-- 49 files changed, 1953 insertions(+), 154 deletions(-) delete mode 100644 build/.DS_Store create mode 100644 deploy/README.md create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/.gitignore create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/AWSLoadBalancerControllerPolicy.json create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/cluster.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/ebs.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/iam.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/main.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_vpc.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/test-app.template.yml create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/test_resource.tf create mode 100644 deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.tf create mode 100644 deploy/infrastructure/modules/terraform-aws-dss/DNS.md create mode 100644 deploy/infrastructure/modules/terraform-aws-dss/README.md create mode 100644 deploy/infrastructure/modules/terraform-aws-dss/TFVARS.md create mode 100644 deploy/infrastructure/modules/terraform-aws-dss/main.tf create mode 100644 deploy/infrastructure/modules/terraform-aws-dss/output.tf create mode 100644 deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars create mode 100644 deploy/infrastructure/modules/terraform-aws-dss/variables.tf create mode 100644 deploy/infrastructure/utils/definitions/aws_instance_type.tf create mode 100644 deploy/infrastructure/utils/definitions/aws_kubernetes_storage_class.tf create mode 100644 deploy/infrastructure/utils/definitions/aws_region.tf create mode 100644 deploy/infrastructure/utils/definitions/aws_route53_zone_id.tf diff --git a/build/.DS_Store b/build/.DS_Store deleted file mode 100644 index dfb90e4f21c8d80de44f4789f86a9b41aa6f4768..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKI|>3Z5S>vG!N$@uSMUZw^aOhW3&loM1O>aTJeNoF&8N}TP8)dxlb1~9CFB)5 zJ0ha<^Li#S5s?wxP;NGK&GyYZ*2{pSS$dn ziESVvFbyg&sG2Q?1|9K|c{Q;O47zAGADTC7b|~t%4-ZW$e#hzg+>K_t-u3GY!$o! diff --git a/build/README.md b/build/README.md index 27b81dca2..95887eead 100644 --- a/build/README.md +++ b/build/README.md @@ -81,6 +81,11 @@ endpoint. like `gcr.io/your-project-id` (do not include the image name; it will be appended by the build script) + - For Amazon Web Services, `DOCKER_URL` should be set similarly to as described + [here](https://docs.aws.amazon.com/AmazonECR/latest/userguide/docker-push-ecr-image.html), + like `${aws_account_id}.dkr.ecr.${region}.amazonaws.com/` (do not include the image name; + it will be appended by the build script) + 1. Ensure you are logged into your docker registry service. - For Google Cloud, @@ -89,6 +94,10 @@ endpoint. Ensure that [appropriate permissions are enabled](https://cloud.google.com/container-registry/docs/access-control). + - For Amazon Web Services, create a private repository by following the instructions + [here](https://docs.aws.amazon.com/AmazonECR/latest/userguide/repository-create.html), then login + as described [here](https://docs.aws.amazon.com/AmazonECR/latest/userguide/docker-push-ecr-image.html). + 1. Use the [`build.sh` script](./build.sh) in this directory to build and push an image tagged with the current date and git commit hash. diff --git a/build/apply-certs.sh b/build/apply-certs.sh index 3aa61154b..b2074ab51 100755 --- a/build/apply-certs.sh +++ b/build/apply-certs.sh @@ -24,10 +24,13 @@ set -x CONTEXT="$1" DIR="$(pwd)" NAMESPACE="$2" -CLIENTS_CERTS_DIR="$DIR/workspace/$CONTEXT/client_certs_dir" -NODE_CERTS_DIR="$DIR/workspace/$CONTEXT/node_certs_dir" -CA_KEY_DIR="$DIR/workspace/$CONTEXT/ca_key_dir" -CA_CRT_DIR="$DIR/workspace/$CONTEXT/ca_certs_dir" + +# Replace characters breaking folder names +WORKSPACE=$(echo "${CONTEXT}" | tr ':/' '_') +CLIENTS_CERTS_DIR="$DIR/workspace/$WORKSPACE/client_certs_dir" +NODE_CERTS_DIR="$DIR/workspace/$WORKSPACE/node_certs_dir" +CA_KEY_DIR="$DIR/workspace/$WORKSPACE/ca_key_dir" +CA_CRT_DIR="$DIR/workspace/$WORKSPACE/ca_certs_dir" JWT_PUBLIC_CERTS_DIR="$DIR/jwt-public-certs" UPLOAD_CA_KEY=true diff --git a/build/deploy/base.libsonnet b/build/deploy/base.libsonnet index 85d0d4b9d..5bf12ac05 100644 --- a/build/deploy/base.libsonnet +++ b/build/deploy/base.libsonnet @@ -231,4 +231,31 @@ local util = import 'util.libsonnet'; assert std.length(self.containers) > 0 : 'must have at least one container', }, + + // Reusable cloud provider specific resources + AWSLoadBalancer(metadata, name, ipNames, subnet): $.Service(metadata, name) { + type:: 'LoadBalancer', + metadata+: { + annotations+: { + 'service.beta.kubernetes.io/aws-load-balancer-type': 'external', + 'service.beta.kubernetes.io/aws-load-balancer-nlb-target-type': 'ip', + 'service.beta.kubernetes.io/aws-load-balancer-scheme': 'internet-facing', + 'service.beta.kubernetes.io/aws-load-balancer-eip-allocations': std.join(',', ipNames), + 'service.beta.kubernetes.io/aws-load-balancer-name': name, + 'service.beta.kubernetes.io/aws-load-balancer-subnets': metadata.subnet, + }, + }, + spec+: { + loadBalancerClass: "service.k8s.aws/nlb", + }, + }, + + AWSLoadBalancerWithManagedCert(metadata, name, ipNames, subnet, certARN): $.AWSLoadBalancer(metadata, name, ipNames, subnet) { + metadata+: { + annotations+: { + 'service.beta.kubernetes.io/aws-load-balancer-ssl-ports': '443', + 'service.beta.kubernetes.io/aws-load-balancer-ssl-cert': certARN, + }, + }, + } } diff --git a/build/deploy/cockroachdb-auxiliary.libsonnet b/build/deploy/cockroachdb-auxiliary.libsonnet index c906daf8c..6135c3bb6 100644 --- a/build/deploy/cockroachdb-auxiliary.libsonnet +++ b/build/deploy/cockroachdb-auxiliary.libsonnet @@ -1,7 +1,7 @@ local base = import 'base.libsonnet'; local volumes = import 'volumes.libsonnet'; -local cockroachLB(metadata, name, ip) = base.Service(metadata, name) { +local googleCockroachLB(metadata, name, ip) = base.Service(metadata, name) { port:: metadata.cockroach.grpc_port, app:: 'cockroachdb', spec+: { @@ -10,6 +10,15 @@ local cockroachLB(metadata, name, ip) = base.Service(metadata, name) { }, }; +local awsCockroachLB(metadata, name, ip) = base.AWSLoadBalancer(metadata, name, [ip], metadata.subnet) { + port:: metadata.cockroach.grpc_port, + app:: 'cockroachdb', +}; + +local cockroachLB(metadata, name, ip) = + if metadata.cloud_provider == "google" then googleCockroachLB(metadata, name, ip) + else if metadata.cloud_provider == "aws" then awsCockroachLB(metadata, name, ip); + { all(metadata): { CockroachInit: if metadata.cockroach.shouldInit then base.Job(metadata, 'init') { diff --git a/build/deploy/core-service.libsonnet b/build/deploy/core-service.libsonnet index a5919d2cd..fb472ce0f 100644 --- a/build/deploy/core-service.libsonnet +++ b/build/deploy/core-service.libsonnet @@ -1,28 +1,40 @@ local base = import 'base.libsonnet'; local volumes = import 'volumes.libsonnet'; -local ingress(metadata) = base.Ingress(metadata, 'https-ingress') { - metadata+: { - annotations: { - 'kubernetes.io/ingress.global-static-ip-name': metadata.backend.ipName, - 'kubernetes.io/ingress.allow-http': 'false', +local awsLoadBalancer(metadata) = base.AWSLoadBalancerWithManagedCert(metadata, 'gateway', [metadata.backend.ipName], metadata.subnet, metadata.backend.certName) { + app:: 'core-service', + spec+: { + ports: [{ + port: 443, + targetPort: metadata.backend.port, + protocol: "TCP", + name: "http", + }] + } +}; + +{ + GoogleIngress(metadata): base.Ingress(metadata, 'https-ingress') { + metadata+: { + annotations: { + 'kubernetes.io/ingress.global-static-ip-name': metadata.backend.ipName, + 'kubernetes.io/ingress.allow-http': 'false', + }, }, - }, - spec: { - defaultBackend: { - service: { - name: 'core-service', - port: { - number: metadata.backend.port, + spec: { + defaultBackend: { + service: { + name: 'core-service', + port: { + number: metadata.backend.port, + } } - } + }, }, }, -}; -{ - ManagedCertIngress(metadata): { - ingress: ingress(metadata) { + GoogleManagedCertIngress(metadata): { + ingress: $.GoogleIngress(metadata) { metadata+: { annotations+: { 'networking.gke.io/managed-certificates': 'https-certificate', @@ -38,7 +50,7 @@ local ingress(metadata) = base.Ingress(metadata, 'https-ingress') { }, }, - PresharedCertIngress(metadata, certName): ingress(metadata) { + GooglePresharedCertIngress(metadata, certName): $.GoogleIngress(metadata) { metadata+: { annotations+: { 'ingress.gcp.kubernetes.io/pre-shared-cert': certName, @@ -46,14 +58,23 @@ local ingress(metadata) = base.Ingress(metadata, 'https-ingress') { }, }, - all(metadata): { - ingress: $.ManagedCertIngress(metadata), - service: base.Service(metadata, 'core-service') { - app:: 'core-service', - port:: metadata.backend.port, - type:: 'NodePort', - enable_monitoring:: false, + GoogleService(metadata): base.Service(metadata, 'core-service') { + app:: 'core-service', + port:: metadata.backend.port, + type:: 'NodePort', + enable_monitoring:: false, + }, + + CloudNetwork(metadata): { + google: if metadata.cloud_provider == "google" then { + ingress: $.GoogleManagedCertIngress(metadata), + service: $.GoogleService(metadata), }, + aws_loadbalancer: if metadata.cloud_provider == "aws" then awsLoadBalancer(metadata) + }, + + all(metadata): { + network: $.CloudNetwork(metadata), deployment: base.Deployment(metadata, 'core-service') { apiVersion: 'apps/v1', diff --git a/build/deploy/metadata_base.libsonnet b/build/deploy/metadata_base.libsonnet index 3a2a930f4..b5266efdf 100644 --- a/build/deploy/metadata_base.libsonnet +++ b/build/deploy/metadata_base.libsonnet @@ -1,4 +1,5 @@ { + cloud_provider: 'google', // Either google or aws namespace: error 'must supply namespace', clusterName: error 'must supply cluster name', enable_istio: false, @@ -15,7 +16,7 @@ grpc_port: 26257, http_port: 8080, image: 'cockroachdb/cockroach:v21.2.7', - nodeIPs: error 'must supply the per-node ip addresses as an array', + nodeIPs: error 'must supply the per-node ip addresses as an array', // For AWS, this array should contain the allocation id of the elastic ips. JoinExisting: [], storageClass: 'standard', }, @@ -24,7 +25,7 @@ roleBinding: false, }, backend: { - ipName: error 'must supply ip name', + ipName: error 'must supply ip name', // For AWS, use the elastic ip allocation id. port: 8080, image: error 'must specify image', prof_grpc_name: '', @@ -33,6 +34,7 @@ jwksKeyIds: [], hostname: error 'must specify hostname', dumpRequests: false, + certName: if $.cloud_provider == "aws" then error 'must specify certName for AWS cloud provider', # Only used by AWS }, alert: { enable: false, @@ -56,4 +58,5 @@ custom_rules: [], // An array of Prometheus recording rules, each of which is an object with "record" and "expr" properties. custom_args: [], // An array of strings to pass as commandline arguments to Prometheus. }, + subnet: if $.cloud_provider == "aws" then error 'must specify subnet for AWS cloud provider', // For AWS, subnet of the elastic ips } diff --git a/build/make-certs.py b/build/make-certs.py index 31fb481df..0d25f4d46 100755 --- a/build/make-certs.py +++ b/build/make-certs.py @@ -25,7 +25,12 @@ def namespace(self): @property def directory(self): - return os.path.join('workspace', self._cluster_context) + # Replace characters breaking folder names + def remove_special_chars(s: str): + for c in [":", "/"]: + s = s.replace(c, "_") + return s + return os.path.join('workspace', remove_special_chars(self._cluster_context)) @property def ca_certs_file(self): diff --git a/deploy/README.md b/deploy/README.md new file mode 100644 index 000000000..5cdb759ec --- /dev/null +++ b/deploy/README.md @@ -0,0 +1,11 @@ +# DSS Deployment + +**Work in progress** + +This folder contains the increments toward the new deployment approach as described in [#874](https://github.com/interuss/dss/issues/874). + +The infrastructure folder contains the terraform modules to deploy the DSS to kubernetes clusters of various cloud providers: + +- Amazon Web Services: [terraform-aws-dss](./infrastructure/modules/terraform-aws-dss/README.md) +- Google Cloud Engine: [terraform-google-dss](./infrastructure/modules/terraform-google-dss/README.md) + diff --git a/deploy/infrastructure/.gitignore b/deploy/infrastructure/.gitignore index fb5e06e5b..70c452b08 100644 --- a/deploy/infrastructure/.gitignore +++ b/deploy/infrastructure/.gitignore @@ -3,3 +3,4 @@ terraform.tfstate terraform.tfstate.backup personal/ +*.pem diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/.gitignore b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/.gitignore new file mode 100644 index 000000000..3d8373ffa --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/.gitignore @@ -0,0 +1 @@ +test-app.yml \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/AWSLoadBalancerControllerPolicy.json b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/AWSLoadBalancerControllerPolicy.json new file mode 100644 index 000000000..df1133c44 --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/AWSLoadBalancerControllerPolicy.json @@ -0,0 +1,219 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "iam:CreateServiceLinkedRole" + ], + "Resource": "*", + "Condition": { + "StringEquals": { + "iam:AWSServiceName": "elasticloadbalancing.amazonaws.com" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DescribeAccountAttributes", + "ec2:DescribeAddresses", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInternetGateways", + "ec2:DescribeVpcs", + "ec2:DescribeVpcPeeringConnections", + "ec2:DescribeSubnets", + "ec2:DescribeSecurityGroups", + "ec2:DescribeInstances", + "ec2:DescribeNetworkInterfaces", + "ec2:DescribeTags", + "ec2:GetCoipPoolUsage", + "ec2:DescribeCoipPools", + "elasticloadbalancing:DescribeLoadBalancers", + "elasticloadbalancing:DescribeLoadBalancerAttributes", + "elasticloadbalancing:DescribeListeners", + "elasticloadbalancing:DescribeListenerCertificates", + "elasticloadbalancing:DescribeSSLPolicies", + "elasticloadbalancing:DescribeRules", + "elasticloadbalancing:DescribeTargetGroups", + "elasticloadbalancing:DescribeTargetGroupAttributes", + "elasticloadbalancing:DescribeTargetHealth", + "elasticloadbalancing:DescribeTags" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "cognito-idp:DescribeUserPoolClient", + "acm:ListCertificates", + "acm:DescribeCertificate", + "iam:ListServerCertificates", + "iam:GetServerCertificate", + "waf-regional:GetWebACL", + "waf-regional:GetWebACLForResource", + "waf-regional:AssociateWebACL", + "waf-regional:DisassociateWebACL", + "wafv2:GetWebACL", + "wafv2:GetWebACLForResource", + "wafv2:AssociateWebACL", + "wafv2:DisassociateWebACL", + "shield:GetSubscriptionState", + "shield:DescribeProtection", + "shield:CreateProtection", + "shield:DeleteProtection" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupIngress" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateSecurityGroup" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags" + ], + "Resource": "arn:aws:ec2:*:*:security-group/*", + "Condition": { + "StringEquals": { + "ec2:CreateAction": "CreateSecurityGroup" + }, + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags", + "ec2:DeleteTags" + ], + "Resource": "arn:aws:ec2:*:*:security-group/*", + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "true", + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:AuthorizeSecurityGroupIngress", + "ec2:RevokeSecurityGroupIngress", + "ec2:DeleteSecurityGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateLoadBalancer", + "elasticloadbalancing:CreateTargetGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateListener", + "elasticloadbalancing:DeleteListener", + "elasticloadbalancing:CreateRule", + "elasticloadbalancing:DeleteRule" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*", + "arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*" + ], + "Condition": { + "Null": { + "aws:RequestTag/elbv2.k8s.aws/cluster": "true", + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:AddTags", + "elasticloadbalancing:RemoveTags" + ], + "Resource": [ + "arn:aws:elasticloadbalancing:*:*:listener/net/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener/app/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener-rule/net/*/*/*", + "arn:aws:elasticloadbalancing:*:*:listener-rule/app/*/*/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:ModifyLoadBalancerAttributes", + "elasticloadbalancing:SetIpAddressType", + "elasticloadbalancing:SetSecurityGroups", + "elasticloadbalancing:SetSubnets", + "elasticloadbalancing:DeleteLoadBalancer", + "elasticloadbalancing:ModifyTargetGroup", + "elasticloadbalancing:ModifyTargetGroupAttributes", + "elasticloadbalancing:DeleteTargetGroup" + ], + "Resource": "*", + "Condition": { + "Null": { + "aws:ResourceTag/elbv2.k8s.aws/cluster": "false" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:RegisterTargets", + "elasticloadbalancing:DeregisterTargets" + ], + "Resource": "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:SetWebAcl", + "elasticloadbalancing:ModifyListener", + "elasticloadbalancing:AddListenerCertificates", + "elasticloadbalancing:RemoveListenerCertificates", + "elasticloadbalancing:ModifyRule" + ], + "Resource": "*" + } + ] +} \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md new file mode 100644 index 000000000..7f270a8ea --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md @@ -0,0 +1,131 @@ +# terraform-aws-kubernetes + +To deploy a complete DSS to AWS Elastic Kubernetes Service, see [terraform-aws-dss](../../modules/terraform-aws-dss). + +This folder only contains the terraform module which deploys the kubernetes cluster required to run the DSS on +Kubernetes in AWS. + +## Configuration + +See [variables.tf](variables.tf). + +## Design + +This module creates an EKS cluster with related worker nodes. EKS requires 2 subnets in different availability zones ( +AZ). A dedicated VPC is created to that effect. At the moment, worker nodes are deployed in the two first AZ of the +region. The following table summarizes current responsibilities for resource creation in the AWS account: + +| Resource type | Manager | +|---------------------------------------------|-----------------------------------------------------| +| VPC and Subnets | Terraform | +| Elastic IPs | Terraform | +| Network Load balancer | Kubernetes controller: aws-load-balancer-controller | +| Target groups | Kubernetes controller: aws-load-balancer-controller | +| Storage Volumes (Elastic Block Storage) | EKS add-on provisioned by terraform | +| SSL Certificates (AWS Certificates Manager) | Terraform | +| DNS | Terraform (or manual) | + + +### AWS Load Balancers and Kubernetes Services + +Load balancers are provisioned by the Kubernetes controller aws-load-balancer-controller v2.4 +with [Option B for IAM configuration](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.4/deploy/installation/#option-b-attach-iam-policies-to-nodes). + +Network Load Balancers map elastic IPs to Kubernetes Services (Load Balancer). Application Load Balancers (Ingress) do +not support this feature at the moment, making impossible to anticipate DNS records inside the cluster. + +The Network Load Balancers are provisioned by the aws-load-balancer-controller. It handles the TLS termination for +the dss https service. + +Note that the load balancer is distributing the traffic to possibly multiple subnets. Be aware that it is not possible +to unassign a subnet. Target pods shall be deployed in every subnet, meaning that the pods should be properly +distributed in worker nodes and a worker node should be at least present in each subnets. + +Provisioning is done by annotating a Kubernetes Service resource. The following example deploys a simple http server: + +```yaml +--- +apiVersion: v1 +kind: Namespace +metadata: + name: example +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: example-app + namespace: example + labels: + app: example-app +spec: + replicas: 2 + selector: + matchLabels: + app: example-app + template: + metadata: + labels: + app: example-app + spec: + containers: + - name: nginx + image: public.ecr.aws/nginx/nginx:1.21 + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent +--- +apiVersion: v1 +kind: Service +metadata: + name: example-service + namespace: example + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" + service.beta.kubernetes.io/aws-load-balancer-ssl-ports: '443' + service.beta.kubernetes.io/aws-load-balancer-ssl-cert: [ CERTIFICATE_ARN ] + service.beta.kubernetes.io/aws-load-balancer-eip-allocations: [ EIP_ALLOCATION_ID1,EIP_ALLOCATION_ID2,... ] + service.beta.kubernetes.io/aws-load-balancer-name: [ LOAD_BALANCER_NAME ] + service.beta.kubernetes.io/aws-load-balancer-subnets: [ SUBNET_ID1,SUBNET_ID2,... ] +spec: + selector: + app: example-app + ports: + - port: 443 + targetPort: 80 + protocol: TCP + name: http + type: LoadBalancer + loadBalancerClass: service.k8s.aws/nlb +``` + +- [CERTIFICATE_ARN]: arn of the certificate managed by AWS Certificate Manager +- [EIP_ALLOCATION_IDx]: Elastic IP allocation id (The number of elastic IP should equal to the number of SUBNET_IDx) +- [LOAD_BALANCER_NAME]: Name of the balancer created by the controller +- [SUBNET_IDx]: Name or ID of a subnet (The number of subnets should equal to the number of EIP_ALLOCATION_IDx) + + +## Test + +`terraform apply` generates a resource specification `test-app.yml`. This file can be applied to test a http server +reachability within the deployed cluster. To apply the resources, follow the next steps: + +1. Login to the EKS cluster: `aws eks --region $AWS_REGION update-kubeconfig --name $CLUSTER_NAME` +2. Create the resources: `kubectl apply -f test-app.yml` +3. Wait (up to 5min) for the load balancer to be ready. Note that the load balancer may take few minutes to start. + Monitor the progress here until the state becomes `Active`: https://console.aws.amazon.com/ec2/home#LoadBalancers: +4. Connect to the app_hostname and contemplate the nginx default welcome page. + +### Clean up test + +Delete the resources: `kubectl delete -f test-app.yml`. + + +## Clean up infrastructure + +1. Delete all created resources from the cluster (eg. clean up test as described in the previous section.) +2. Make sure all load balancers and target groups have been removed. +3. Run `terraform destroy`. + diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/cluster.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/cluster.tf new file mode 100644 index 000000000..dfca0743f --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/cluster.tf @@ -0,0 +1,42 @@ +resource "aws_eks_cluster" "kubernetes_cluster" { + name = var.cluster_name + role_arn = aws_iam_role.dss-cluster.arn + + vpc_config { + subnet_ids = aws_subnet.dss[*].id + endpoint_public_access = true + public_access_cidrs = [ + "0.0.0.0/0" + ] + } + + # Ensure that IAM Role permissions are created before and deleted after EKS Cluster handling. + # Otherwise, EKS will not be able to properly delete EKS managed EC2 infrastructure such as Security Groups. + depends_on = [ + aws_iam_role_policy_attachment.dss-cluster-service, + aws_internet_gateway.dss + ] + + version = "1.24" +} + +resource "aws_eks_node_group" "eks_node_group" { + cluster_name = aws_eks_cluster.kubernetes_cluster.name + subnet_ids = [data.aws_subnet.main_subnet.id] # Limit nodes to one subnet + node_role_arn = aws_iam_role.dss-cluster-node-group.arn + disk_size = 100 + node_group_name_prefix = aws_eks_cluster.kubernetes_cluster.name + instance_types = [ + var.aws_instance_type + ] + + scaling_config { + desired_size = var.node_count + max_size = var.node_count + min_size = var.node_count + } + + lifecycle { + create_before_destroy = true + } +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/ebs.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/ebs.tf new file mode 100644 index 000000000..eedf02822 --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/ebs.tf @@ -0,0 +1,18 @@ +data "tls_certificate" "cluster_oidc_provider" { + url = aws_eks_cluster.kubernetes_cluster.identity[0].oidc[0].issuer +} + +resource "aws_iam_openid_connect_provider" "cluster_provider" { + client_id_list = ["sts.amazonaws.com"] + thumbprint_list = data.tls_certificate.cluster_oidc_provider.certificates[*].sha1_fingerprint + url = data.tls_certificate.cluster_oidc_provider.url +} + +resource "aws_eks_addon" "aws-ebs-csi-driver" { + addon_name = "aws-ebs-csi-driver" + cluster_name = aws_eks_cluster.kubernetes_cluster.name + service_account_role_arn = aws_iam_role.AmazonEKS_EBS_CSI_DriverRole.arn + depends_on = [ + aws_eks_node_group.eks_node_group + ] +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/iam.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/iam.tf new file mode 100644 index 000000000..6eabc6ead --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/iam.tf @@ -0,0 +1,106 @@ +data "aws_caller_identity" "current" {} + +locals { + aws_account_id = data.aws_caller_identity.current.account_id + aws_cluster_id = aws_eks_cluster.kubernetes_cluster.id + aws_cluster_oidc_issuer = aws_eks_cluster.kubernetes_cluster.identity[0].oidc[0].issuer +} + +resource "aws_iam_role" "dss-cluster" { + name = "${var.cluster_name}-dss-cluster" + + assume_role_policy = < i.public_ip } +} + + +## DNS records for SSL Certificate validation +resource "aws_route53_record" "app_hostname_cert_validation" { + count = var.aws_route53_zone_id == "" ? 0 : length(aws_acm_certificate.app_hostname.domain_validation_options) + + allow_overwrite = true + name = element(aws_acm_certificate.app_hostname.domain_validation_options.*.resource_record_name, count.index) + type = element(aws_acm_certificate.app_hostname.domain_validation_options.*.resource_record_type, count.index) + records = [element(aws_acm_certificate.app_hostname.domain_validation_options.*.resource_record_value, count.index)] + ttl = 60 + zone_id = var.aws_route53_zone_id +} + +# Application DNS +resource "aws_route53_record" "app_hostname" { + count = var.aws_route53_zone_id == "" ? 0 : length(aws_eip.gateway) + + zone_id = var.aws_route53_zone_id + name = var.app_hostname + type = "A" + ttl = 300 + records = [aws_eip.gateway[count.index].public_ip] +} + +# Crdb nodes DNS +resource "aws_route53_record" "crdb_hostname" { + for_each = local.crdb_hostnames + + zone_id = var.aws_route53_zone_id + name = each.key + type = "A" + ttl = 300 + records = [each.value] +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf new file mode 100644 index 000000000..a78aaf124 --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf @@ -0,0 +1,63 @@ + +# Load Balancer Kubernetes Controller +resource "helm_release" "aws-load-balancer-controller" { + repository = "https://aws.github.io/eks-charts" + chart = "aws-load-balancer-controller" + name = "aws-load-balancer-controller" + + namespace = "kube-system" + + set { + name = "clusterName" + value = var.cluster_name + } + + depends_on = [ + aws_eks_cluster.kubernetes_cluster + ] +} + +# SSL Certificate +resource "aws_acm_certificate" "app_hostname" { + domain_name = var.app_hostname + validation_method = "DNS" + + lifecycle { + create_before_destroy = true + } +} + +resource "aws_acm_certificate_validation" "app_hostname_cert" { + count = var.aws_route53_zone_id == "" ? 0 : 1 + certificate_arn = aws_acm_certificate.app_hostname.arn + validation_record_fqdns = [for name in aws_acm_certificate.app_hostname.domain_validation_options.*.resource_record_name: trimsuffix(name, ".")] +} + +output "app_hostname_cert_arn" { + value = aws_acm_certificate.app_hostname.arn +} + +# Public Elastic IP for the gateway (1 per subnet) +# At the moment, worker nodes will be deployed in the same subnet, so only one elastic ip is required. +resource "aws_eip" "gateway" { + vpc = true + count = 1 + + tags = { + Name = format("%s-ip-gateway", var.cluster_name) + # Preserve mapping between ips and hostnames + ExpectedDNS = var.app_hostname + } +} + +# Public Elastic IPs for the crdb instances +resource "aws_eip" "ip_crdb" { + count = var.node_count + vpc = true + + tags = { + Name = format("%s-ip-crdb%v", var.cluster_name, count.index) + # Preserve mapping between ips and hostnames + ExpectedDNS = format("%s.%s", count.index, var.crdb_hostname_suffix) + } +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_vpc.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_vpc.tf new file mode 100644 index 000000000..31fc22bb1 --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_vpc.tf @@ -0,0 +1,70 @@ +resource "aws_vpc" "dss" { + # Requirements from https://docs.aws.amazon.com/eks/latest/userguide/network_reqs.html + cidr_block = "10.0.0.0/16" + + enable_dns_hostnames = true + enable_dns_support = true + + tags = { + Name = "${var.cluster_name}-vpc" + } +} + +resource "aws_internet_gateway" "dss" { + vpc_id = aws_vpc.dss.id + tags = { + Name = "${var.cluster_name}" + } +} + +data "aws_route_table" "vpc_main" { + vpc_id = aws_vpc.dss.id + + filter { + name = "association.main" + values = [true] + } +} + +# Retrieves availability zones from region configured in the provisioner +data "aws_availability_zones" "available" { + state = "available" +} + +# Uses the two first availability zones of the region +resource "aws_subnet" "dss" { + count = 2 + + availability_zone = data.aws_availability_zones.available.names[count.index] + cidr_block = cidrsubnet(aws_vpc.dss.cidr_block, 8, count.index) + vpc_id = aws_vpc.dss.id + map_public_ip_on_launch = true + + tags = { + Name = "${var.cluster_name}-subnet-${count.index}" + "kubernetes.io/role/elb" = 1 + "kubernetes.io/cluster/${var.cluster_name}" = "shared" + } +} + +# This is the subnet where Kubernetes workload will be running. +data "aws_subnet" "main_subnet" { + id = aws_subnet.dss[0].id +} + +resource "aws_route" "internet_gateway" { + route_table_id = data.aws_route_table.vpc_main.id + gateway_id = aws_internet_gateway.dss.id + destination_cidr_block = "0.0.0.0/0" +} + +resource "aws_route_table_association" "subnet" { + count = 2 + route_table_id = data.aws_route_table.vpc_main.id + subnet_id = aws_subnet.dss[count.index].id +} + +resource "aws_security_group" "eks-controlplane" { + description = "Cluster communication with worker nodes" + vpc_id = aws_vpc.dss.id +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf new file mode 100644 index 000000000..3f9594399 --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf @@ -0,0 +1,55 @@ +output "kubernetes_cloud_provider_name" { + value = "aws" +} + +output "kubernetes_get_credentials_cmd" { + value = "aws eks --region ${var.aws_region} update-kubeconfig --name ${var.cluster_name}" +} + +output "kubernetes_api_endpoint" { + value = aws_eks_cluster.kubernetes_cluster.endpoint +} + +output "kubernetes_context_name" { + value = aws_eks_cluster.kubernetes_cluster.arn +} + +output "ip_gateway" { + value = aws_eip.gateway[0].id +} + +output "crdb_nodes" { + value = [ + for i in aws_eip.ip_crdb : { + ip = i.allocation_id + dns = i.tags.ExpectedDNS + } + ] + depends_on = [ + aws_eip.ip_crdb + ] +} + +output "crdb_addresses" { + value = [for i in aws_eip.ip_crdb[*] : { expected_dns : i.tags.ExpectedDNS, address : i.public_ip }] +} + +output "gateway_address" { + value = { + expected_dns : aws_eip.gateway[0].tags.ExpectedDNS, + address : aws_eip.gateway[0].public_ip, + certificate_validation_dns : [ + for c in aws_acm_certificate.app_hostname.domain_validation_options[*] : { + managed_by_terraform : length(aws_route53_record.app_hostname_cert_validation) > 0 + name : c.resource_record_name, + type : c.resource_record_type, + records : [ + c.resource_record_value + ] + }] + } +} + +output "workload_subnet" { + value = data.aws_subnet.main_subnet.id +} \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/test-app.template.yml b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/test-app.template.yml new file mode 100644 index 000000000..fa647a59a --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/test-app.template.yml @@ -0,0 +1,74 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-app + namespace: test + labels: + app: test-app +spec: + replicas: 3 + selector: + matchLabels: + app: test-app + template: + metadata: + labels: + app: test-app + spec: + containers: + - name: nginx + image: public.ecr.aws/nginx/nginx:1.21 + ports: + - name: http + containerPort: 80 + imagePullPolicy: IfNotPresent + nodeSelector: + kubernetes.io/os: linux + +--- +apiVersion: v1 +kind: Service +metadata: + name: test-service + namespace: test + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: "internet-facing" + service.beta.kubernetes.io/aws-load-balancer-ssl-cert: ${certificate_arn} + service.beta.kubernetes.io/aws-load-balancer-ssl-ports: '443' + service.beta.kubernetes.io/aws-load-balancer-eip-allocations: ${join(",", eip_alloc_ids)} + service.beta.kubernetes.io/aws-load-balancer-name: ${loadbalancer_name} + service.beta.kubernetes.io/aws-load-balancer-subnets: ${join(",", subnet_ids)} +spec: + selector: + app: test-app + ports: + - port: 443 + targetPort: 80 + protocol: TCP + name: http + type: LoadBalancer + loadBalancerClass: service.k8s.aws/nlb + +--- +apiVersion: v1 +kind: Service +metadata: + name: test-service-headless + namespace: test +spec: + selector: + app: test-app + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: http + type: ClusterIP \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/test_resource.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/test_resource.tf new file mode 100644 index 000000000..3907669b2 --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/test_resource.tf @@ -0,0 +1,10 @@ + +resource "local_file" "test-app" { + filename = "test-app.yml" + content = templatefile("${path.module}/test-app.template.yml", { + certificate_arn = aws_acm_certificate.app_hostname.arn + eip_alloc_ids = aws_eip.gateway[*].allocation_id + loadbalancer_name = "${var.cluster_name}-lb" + subnet_ids = [data.aws_subnet.main_subnet.id] + }) +} \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.tf new file mode 100644 index 000000000..37313574a --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.tf @@ -0,0 +1,79 @@ + +# This file has been automatically generated by /deploy/infrastructure/utils/generate_terraform_variables.py. +# Please do not modify manually. + +variable "aws_region" { + type = string + description = <<-EOT + AWS region + List of available regions: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions + Currently, the terraform module uses the two first availability zones of the region. + + Example: `eu-west-1` + EOT +} + +variable "aws_instance_type" { + type = string + description = <<-EOT + AWS EC2 instance type used for the Kubernetes node pool. + + Example: `m6g.xlarge` for production and `t3.medium` for development + EOT +} + +variable "aws_route53_zone_id" { + type = string + description = <<-EOT + AWS Route 53 Zone ID + This module can automatically create DNS records in a Route 53 Zone. + Leave empty to disable record creation. + + Example: `Z0123456789ABCDEFGHIJ` + EOT +} + +variable "app_hostname" { + type = string + description = <<-EOT + Fully-qualified domain name of your HTTPS Gateway ingress endpoint. + + Example: `dss.example.com` + EOT +} + +variable "crdb_hostname_suffix" { + type = string + description = <<-EOT + The domain name suffix shared by all of your CockroachDB nodes. + For instance, if your CRDB nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + + Example: db.example.com + EOT +} + +variable "cluster_name" { + type = string + description = <<-EOT + Name of the kubernetes cluster that will host this DSS instance (should generally describe the DSS instance being hosted) + + Example: `dss-che-1` + EOT +} + +variable "node_count" { + type = number + description = <<-EOT + Number of Kubernetes nodes which should correspond to the desired CockroachDB nodes. + **Always 3.** + + Example: `3` + EOT + + validation { + condition = var.node_count == 3 + error_message = "Node count should be 3. Only configuration supported at the moment" + } +} + diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/default_latest.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/default_latest.tf index 4f1e29d02..384c7c45a 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/default_latest.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/default_latest.tf @@ -1,5 +1,5 @@ locals { - rid_db_schema = var.desired_rid_db_version == "latest" ? "4.0.0" : var.desired_rid_db_version - scd_db_schema = var.desired_scd_db_version == "latest" ? "3.1.0" : var.desired_scd_db_version - image = var.image == "latest" ? "docker.io/interuss/dss:v0.6.0" : var.image + rid_db_schema = var.desired_rid_db_version == "latest" ? "4.0.0" : var.desired_rid_db_version + scd_db_schema = var.desired_scd_db_version == "latest" ? "3.1.0" : var.desired_scd_db_version + image = var.image == "latest" ? "docker.io/interuss/dss:v0.7.0" : var.image } \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/main.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/main.tf index fd1e27377..3f43a8c17 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/main.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/main.tf @@ -1,9 +1,11 @@ locals { - workspace_location = abspath("${path.module}/../../../../build/workspace/${var.kubernetes_context_name}") + workspace_folder = replace(replace(var.kubernetes_context_name, "/", "_"), ":", "_") + # Replace ':' and '/' characters from folder name by underscores. Those characters are used by AWS for contexts. + workspace_location = abspath("${path.module}/../../../../build/workspace/${local.workspace_folder}") } resource "local_file" "tanka_config_main" { - content = templatefile("${path.module}/templates/main.jsonnet.tmp", { + content = templatefile("${path.module}/templates/main.jsonnet.tmp", { root_path = path.module VAR_NAMESPACE = var.kubernetes_namespace VAR_CLUSTER_CONTEXT = var.kubernetes_context_name @@ -22,12 +24,15 @@ resource "local_file" "tanka_config_main" { VAR_DESIRED_RID_DB_VERSION = local.rid_db_schema VAR_DESIRED_SCD_DB_VERSION = local.scd_db_schema VAR_SHOULD_INIT = var.should_init + VAR_CLOUD_PROVIDER = var.kubernetes_cloud_provider_name + VAR_CERT_NAME = var.gateway_cert_name + VAR_SUBNET = var.workload_subnet }) filename = "${local.workspace_location}/main.jsonnet" } resource "local_file" "tanka_config_spec" { - content = templatefile("${path.module}/templates/spec.json.tmp", { + content = templatefile("${path.module}/templates/spec.json.tmp", { root_path = path.module namespace = var.kubernetes_namespace cluster_context = var.kubernetes_context_name @@ -37,7 +42,7 @@ resource "local_file" "tanka_config_spec" { } resource "local_file" "make_certs" { - content = templatefile("${path.module}/templates/make-certs.sh.tmp", { + content = templatefile("${path.module}/templates/make-certs.sh.tmp", { cluster_context = var.kubernetes_context_name namespace = var.kubernetes_namespace node_address = join(" ", var.crdb_internal_nodes[*].dns) @@ -47,7 +52,7 @@ resource "local_file" "make_certs" { } resource "local_file" "apply_certs" { - content = templatefile("${path.module}/templates/apply-certs.sh.tmp", { + content = templatefile("${path.module}/templates/apply-certs.sh.tmp", { cluster_context = var.kubernetes_context_name namespace = var.kubernetes_namespace }) @@ -55,7 +60,7 @@ resource "local_file" "apply_certs" { } resource "local_file" "get_credentials" { - content = templatefile("${path.module}/templates/get-credentials.sh.tmp", { + content = templatefile("${path.module}/templates/get-credentials.sh.tmp", { get_credentials_cmd = var.kubernetes_get_credentials_cmd }) filename = "${local.workspace_location}/get-credentials.sh" diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/output.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/output.tf index 062ff1022..eff4a3ce7 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/output.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/output.tf @@ -1,10 +1,5 @@ output "generated_files_location" { value = <<-EOT - Generated files location: - - workspace: ${local.workspace_location} - - main.jsonnet: ${abspath(local_file.tanka_config_main.filename)} - - spec.json: ${abspath(local_file.tanka_config_spec.filename)} - - make-certs.sh: ${abspath(local_file.make_certs.filename)} - - apply-certs.sh: ${abspath(local_file.apply_certs.filename)} + Workspace location with generated files: ${local.workspace_location} EOT } \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp index 8fb99a3ed..d07a92069 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp @@ -23,6 +23,9 @@ local metadata = metadataBase { }, backend+: { ipName: '${VAR_INGRESS_NAME}', + %{if VAR_CLOUD_PROVIDER == "aws"} + certName: '${VAR_CERT_NAME}', + %{endif} image: '${VAR_DOCKER_IMAGE_NAME}', pubKeys: ['${VAR_PUBLIC_KEY_PEM_PATH}'], jwksEndpoint: '${VAR_JWKS_ENDPOINT}', @@ -38,6 +41,10 @@ local metadata = metadataBase { prometheus+: { storageClass: '${VAR_STORAGE_CLASS}', }, + cloud_provider: '${VAR_CLOUD_PROVIDER}', + %{if VAR_CLOUD_PROVIDER == "aws"} + subnet: '${VAR_SUBNET}', + %{endif} }; dss.all(metadata) \ No newline at end of file diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.tf index b78102c62..83e1197ac 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.tf @@ -2,15 +2,36 @@ # This file has been automatically generated by /deploy/infrastructure/utils/generate_terraform_variables.py. # Please do not modify manually. +variable "app_hostname" { + type = string + description = <<-EOT + Fully-qualified domain name of your HTTPS Gateway ingress endpoint. + + Example: `dss.example.com` + EOT +} + +variable "crdb_hostname_suffix" { + type = string + description = <<-EOT + The domain name suffix shared by all of your CockroachDB nodes. + For instance, if your CRDB nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + + Example: db.example.com + EOT +} + variable "image" { type = string description = < + +# Terraform variables + +The following sections describe the variables of this terraform module. + +## terraform-aws-dss + +### aws_region + +*Type: `string`* + +AWS region +List of available regions: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions +Currently, the terraform module uses the two first availability zones of the region. + +Example: `eu-west-1` + + +### aws_instance_type + +*Type: `string`* + +AWS EC2 instance type used for the Kubernetes node pool. + +Example: `m6g.xlarge` for production and `t3.medium` for development + + +### aws_route53_zone_id + +*Type: `string`* + +AWS Route 53 Zone ID +This module can automatically create DNS records in a Route 53 Zone. +Leave empty to disable record creation. + +Example: `Z0123456789ABCDEFGHIJ` + + +### app_hostname + +*Type: `string`* + +Fully-qualified domain name of your HTTPS Gateway ingress endpoint. + +Example: `dss.example.com` + + +### crdb_hostname_suffix + +*Type: `string`* + +The domain name suffix shared by all of your CockroachDB nodes. +For instance, if your CRDB nodes were addressable at 0.db.example.com, +1.db.example.com and 2.db.example.com, then the value would be db.example.com. + +Example: db.example.com + + +### cluster_name + +*Type: `string`* + +Name of the kubernetes cluster that will host this DSS instance (should generally describe the DSS instance being hosted) + +Example: `dss-che-1` + + +### node_count + +*Type: `number`* + +Number of Kubernetes nodes which should correspond to the desired CockroachDB nodes. +**Always 3.** + +Example: `3` + + +### aws_kubernetes_storage_class + +*Type: `string`* + +AWS Elastic Kubernetes Service Storage Class to use for CockroachDB and Prometheus persistent volumes. +See https://docs.aws.amazon.com/eks/latest/userguide/storage-classes.html for more details and +available options. + +Example: `gp2` + + +### image + +*Type: `string`* + +URL of the DSS docker image. + + +`latest` can be used to use the latest official interuss docker image. +Official public images are available on Docker Hub: https://hub.docker.com/r/interuss/dss/tags +See [/build/README.md](../../../../build/README.md#docker-images) Docker images section to learn +how to build and publish your own image. + +Example: `latest` or `docker.io/interuss/dss:v0.6.0` + + +### authorization + +*Type: `object({'public_key_pem_path': '${optional(string)}', 'jwks': "${optional(object({'endpoint': '${string}', 'key_id': '${string}'}))}"})`* + +One of `public_key_pem_path` or `jwks` should be provided but not both. + +- public_key_pem_path +If providing the access token public key via JWKS, do not provide this parameter. +If providing a .pem file directly as the public key to validate incoming access tokens, specify the name +of this .pem file here as /public-certs/YOUR-KEY-NAME.pem replacing YOUR-KEY-NAME as appropriate. For instance, +if using the provided us-demo.pem, use the path /public-certs/us-demo.pem. Note that your .pem file should be built +in the docker image or mounted manually. + +Example 1 (dummy auth): +``` +{ +public_key_pem_path = "/test-certs/auth2.pem" +} +``` +Example 2: +``` +{ +public_key_pem_path = "/jwt-public-certs/us-demo.pem" +} +``` + +- jwks +If providing a .pem file directly as the public key to validate incoming access tokens, do not provide this parameter. +- endpoint +If providing the access token public key via JWKS, specify the JWKS endpoint here. +Example: https://auth.example.com/.well-known/jwks.json +- key_id: +If providing the access token public key via JWKS, specify the kid (key ID) of they appropriate key in the JWKS file referenced above. +Example: +``` +{ +jwks = { +endpoint = "https://auth.example.com/.well-known/jwks.json" +key_id = "9C6DF78B-77A7-4E89-8990-E654841A7826" +} +} +``` + + +### enable_scd + +*Type: `bool`* + +**Default: true** + +Set this boolean true to enable ASTM strategic conflict detection functionality + + +### should_init + +*Type: `bool`* + +**Default: none** + +Set to false if joining an existing pool, true if creating the first DSS instance +for a pool. When set true, this can initialize the data directories on your cluster, +and prevent you from joining an existing pool. + +Example: `true` + + +### desired_rid_db_version + +*Type: `string`* + +**Default: "latest"** + +Desired RID DB schema version. +Use `latest` to use the latest schema version. + +Example: `4.0.0` + + +### desired_scd_db_version + +*Type: `string`* + +**Default: "latest"** + +Desired SCD DB schema version. +Use `latest` to use the latest schema version. + +Example: `3.1.0` + + +### crdb_locality + +*Type: `string`* + +Unique name for your DSS instance. Currently, we recommend "_", +and the = character is not allowed. However, any unique (among all other participating +DSS instances) value is acceptable. + +Example: + + +### crdb_external_nodes + +*Type: `list(string)`* + +**Default: []** + +Fully-qualified domain name of existing CRDB nodes outside of the cluster if you are joining an existing pool. +Example: ["0.db.dss.example.com", "1.db.dss.example.com", "2.db.dss.example.com"] + + +### kubernetes_namespace + +*Type: `string`* + +**Default: "default"** + +Namespace where to deploy Kubernetes resources. Only default is supported at the moment. + +Example: `default` + + diff --git a/deploy/infrastructure/modules/terraform-aws-dss/main.tf b/deploy/infrastructure/modules/terraform-aws-dss/main.tf new file mode 100644 index 000000000..3b3fe04c8 --- /dev/null +++ b/deploy/infrastructure/modules/terraform-aws-dss/main.tf @@ -0,0 +1,34 @@ +module "terraform-aws-kubernetes" { + # See variables.tf for variables description. + cluster_name = var.cluster_name + aws_region = var.aws_region + app_hostname = var.app_hostname + crdb_hostname_suffix = var.crdb_hostname_suffix + aws_instance_type = var.aws_instance_type + aws_route53_zone_id = var.aws_route53_zone_id + node_count = var.node_count + + source = "../../dependencies/terraform-aws-kubernetes" +} + +module "terraform-commons-dss" { + # See variables.tf for variables description. + image = var.image + kubernetes_namespace = var.kubernetes_namespace + kubernetes_storage_class = var.aws_kubernetes_storage_class + app_hostname = var.app_hostname + crdb_hostname_suffix = var.crdb_hostname_suffix + should_init = var.should_init + authorization = var.authorization + crdb_locality = var.crdb_locality + crdb_internal_nodes = module.terraform-aws-kubernetes.crdb_nodes + ip_gateway = module.terraform-aws-kubernetes.ip_gateway + kubernetes_api_endpoint = module.terraform-aws-kubernetes.kubernetes_api_endpoint + kubernetes_cloud_provider_name = module.terraform-aws-kubernetes.kubernetes_cloud_provider_name + kubernetes_context_name = module.terraform-aws-kubernetes.kubernetes_context_name + kubernetes_get_credentials_cmd = module.terraform-aws-kubernetes.kubernetes_get_credentials_cmd + workload_subnet = module.terraform-aws-kubernetes.workload_subnet + gateway_cert_name = module.terraform-aws-kubernetes.app_hostname_cert_arn + + source = "../../dependencies/terraform-commons-dss" +} diff --git a/deploy/infrastructure/modules/terraform-aws-dss/output.tf b/deploy/infrastructure/modules/terraform-aws-dss/output.tf new file mode 100644 index 000000000..4b0fceb15 --- /dev/null +++ b/deploy/infrastructure/modules/terraform-aws-dss/output.tf @@ -0,0 +1,15 @@ +output "crdb_addresses" { + value = module.terraform-aws-kubernetes.crdb_addresses +} + +output "gateway_address" { + value = module.terraform-aws-kubernetes.gateway_address +} + +output "generated_files_location" { + value = module.terraform-commons-dss.generated_files_location +} + +output "cluster_context" { + value = module.terraform-aws-kubernetes.kubernetes_context_name +} \ No newline at end of file diff --git a/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars new file mode 100644 index 000000000..745bf84c6 --- /dev/null +++ b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars @@ -0,0 +1,27 @@ +# This file is an example, please adapt it to your configuration. +# See TFVARS.md for the full set of variables and related descriptions. + +# AWS account +aws_region = "eu-west-1" + +# DNS Management +aws_route53_zone_id = "Z01551234567890123456" + +# Hostnames +app_hostname = "dss.interuss.example.com" +crdb_hostname_suffix = "db.interuss.example.com" + +# Kubernetes configuration +cluster_name = "dss-dev-ew1" +node_count = 3 +aws_instance_type = "t3.medium" +aws_kubernetes_storage_class = "gp2" + +# DSS configuration +image = "latest" +authorization = { + public_key_pem_path = "/test-certs/auth2.pem" +} +should_init = true +crdb_locality = "interuss_dss-aws-ew1" +crdb_external_nodes = [] \ No newline at end of file diff --git a/deploy/infrastructure/modules/terraform-aws-dss/variables.tf b/deploy/infrastructure/modules/terraform-aws-dss/variables.tf new file mode 100644 index 000000000..e0dbc9118 --- /dev/null +++ b/deploy/infrastructure/modules/terraform-aws-dss/variables.tf @@ -0,0 +1,238 @@ + +# This file has been automatically generated by /deploy/infrastructure/utils/generate_terraform_variables.py. +# Please do not modify manually. + +variable "aws_region" { + type = string + description = <<-EOT + AWS region + List of available regions: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-regions + Currently, the terraform module uses the two first availability zones of the region. + + Example: `eu-west-1` + EOT +} + +variable "aws_instance_type" { + type = string + description = <<-EOT + AWS EC2 instance type used for the Kubernetes node pool. + + Example: `m6g.xlarge` for production and `t3.medium` for development + EOT +} + +variable "aws_route53_zone_id" { + type = string + description = <<-EOT + AWS Route 53 Zone ID + This module can automatically create DNS records in a Route 53 Zone. + Leave empty to disable record creation. + + Example: `Z0123456789ABCDEFGHIJ` + EOT +} + +variable "app_hostname" { + type = string + description = <<-EOT + Fully-qualified domain name of your HTTPS Gateway ingress endpoint. + + Example: `dss.example.com` + EOT +} + +variable "crdb_hostname_suffix" { + type = string + description = <<-EOT + The domain name suffix shared by all of your CockroachDB nodes. + For instance, if your CRDB nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + + Example: db.example.com + EOT +} + +variable "cluster_name" { + type = string + description = <<-EOT + Name of the kubernetes cluster that will host this DSS instance (should generally describe the DSS instance being hosted) + + Example: `dss-che-1` + EOT +} + +variable "node_count" { + type = number + description = <<-EOT + Number of Kubernetes nodes which should correspond to the desired CockroachDB nodes. + **Always 3.** + + Example: `3` + EOT + + validation { + condition = var.node_count == 3 + error_message = "Node count should be 3. Only configuration supported at the moment" + } +} + +variable "aws_kubernetes_storage_class" { + type = string + description = <<-EOT + AWS Elastic Kubernetes Service Storage Class to use for CockroachDB and Prometheus persistent volumes. + See https://docs.aws.amazon.com/eks/latest/userguide/storage-classes.html for more details and + available options. + + Example: `gp2` + EOT +} + +variable "image" { + type = string + description = <_", + and the = character is not allowed. However, any unique (among all other participating + DSS instances) value is acceptable. + + Example: + EOT +} + +variable "crdb_external_nodes" { + type = list(string) + description = <<-EOT + Fully-qualified domain name of existing CRDB nodes outside of the cluster if you are joining an existing pool. + Example: ["0.db.dss.example.com", "1.db.dss.example.com", "2.db.dss.example.com"] + EOT + default = [] +} + +variable "kubernetes_namespace" { + type = string + description = <<-EOT + Namespace where to deploy Kubernetes resources. Only default is supported at the moment. + + Example: `default` + EOT + + default = "default" + + # TODO: Adapt current deployment scripts in /build/deploy to support default is supported for the moment. + validation { + condition = var.kubernetes_namespace == "default" + error_message = "Only default namespace is supported at the moment" + } +} + diff --git a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.md b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.md index d9816fb84..8559887ca 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.md +++ b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.md @@ -35,15 +35,12 @@ GCP DNS zone name to automatically manage DNS entries. Leave it empty to manage it manually. -### google_kubernetes_storage_class +### google_machine_type *Type: `string`* -GCP Kubernetes Storage Class to use for CockroachDB and Prometheus persistent volumes. -See https://cloud.google.com/kubernetes-engine/docs/concepts/persistent-volumes for more details and -available options. - -Example: `standard` +GCP machine type used for the Kubernetes node pool. +Example: `n2-standard-4` for production, `e2-medium` for development ### app_hostname @@ -85,23 +82,28 @@ Number of Kubernetes nodes which should correspond to the desired CockroachDB no Example: `3` -### google_machine_type +### google_kubernetes_storage_class *Type: `string`* -GCP machine type used for the Kubernetes node pool. Example: n2-standard-4 for production, e2-medium for development +GCP Kubernetes Storage Class to use for CockroachDB and Prometheus persistent volumes. +See https://cloud.google.com/kubernetes-engine/docs/concepts/persistent-volumes for more details and +available options. + +Example: `standard` ### image *Type: `string`* -Full name of the docker image built in the section above. build.sh prints this name as -the last thing it does when run with DOCKER_URL set. It should look something like -gcr.io/your-project-id/dss:2020-07-01-46cae72cf if you built the image yourself as -documented in /build/README.md, or docker.io/interuss/dss. +URL of the DSS docker image. + `latest` can be used to use the latest official interuss docker image. +Official public images are available on Docker Hub: https://hub.docker.com/r/interuss/dss/tags +See [/build/README.md](../../../../build/README.md#docker-images) Docker images section to learn +how to build and publish your own image. Example: `latest` or `docker.io/interuss/dss:v0.6.0` @@ -116,15 +118,17 @@ One of `public_key_pem_path` or `jwks` should be provided but not both. If providing the access token public key via JWKS, do not provide this parameter. If providing a .pem file directly as the public key to validate incoming access tokens, specify the name of this .pem file here as /public-certs/YOUR-KEY-NAME.pem replacing YOUR-KEY-NAME as appropriate. For instance, -if using the provided us-demo.pem, use the path /public-certs/us-demo.pem. Note that your .pem file should built +if using the provided us-demo.pem, use the path /public-certs/us-demo.pem. Note that your .pem file should be built in the docker image or mounted manually. -Example: -```json + Example 1 (dummy auth): +``` { public_key_pem_path = "/test-certs/auth2.pem" } +``` Example 2: +``` { public_key_pem_path = "/jwt-public-certs/us-demo.pem" } @@ -138,7 +142,7 @@ Example: https://auth.example.com/.well-known/jwks.json - key_id: If providing the access token public key via JWKS, specify the kid (key ID) of they appropriate key in the JWKS file referenced above. Example: -```json +``` { jwks = { endpoint = "https://auth.example.com/.well-known/jwks.json" diff --git a/deploy/infrastructure/modules/terraform-google-dss/main.tf b/deploy/infrastructure/modules/terraform-google-dss/main.tf index ea99ebf67..4782c5f71 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/main.tf +++ b/deploy/infrastructure/modules/terraform-google-dss/main.tf @@ -1,14 +1,13 @@ module "terraform-google-kubernetes" { # See variables.tf for variables description. - google_project_name = var.google_project_name - cluster_name = var.cluster_name - google_zone = var.google_zone - app_hostname = var.app_hostname - crdb_hostname_suffix = var.crdb_hostname_suffix - google_dns_managed_zone_name = var.google_dns_managed_zone_name - google_machine_type = var.google_machine_type - google_kubernetes_storage_class = var.google_kubernetes_storage_class - node_count = var.node_count + google_project_name = var.google_project_name + cluster_name = var.cluster_name + google_zone = var.google_zone + app_hostname = var.app_hostname + crdb_hostname_suffix = var.crdb_hostname_suffix + google_dns_managed_zone_name = var.google_dns_managed_zone_name + google_machine_type = var.google_machine_type + node_count = var.node_count source = "../../dependencies/terraform-google-kubernetes" } diff --git a/deploy/infrastructure/modules/terraform-google-dss/variables.tf b/deploy/infrastructure/modules/terraform-google-dss/variables.tf index ec2aacb9e..d0215ac95 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/variables.tf +++ b/deploy/infrastructure/modules/terraform-google-dss/variables.tf @@ -26,14 +26,11 @@ variable "google_dns_managed_zone_name" { EOT } -variable "google_kubernetes_storage_class" { +variable "google_machine_type" { type = string description = <<-EOT - GCP Kubernetes Storage Class to use for CockroachDB and Prometheus persistent volumes. - See https://cloud.google.com/kubernetes-engine/docs/concepts/persistent-volumes for more details and - available options. - - Example: `standard` + GCP machine type used for the Kubernetes node pool. + Example: `n2-standard-4` for production, `e2-medium` for development EOT } @@ -81,20 +78,27 @@ variable "node_count" { } } -variable "google_machine_type" { +variable "google_kubernetes_storage_class" { type = string - description = "GCP machine type used for the Kubernetes node pool. Example: n2-standard-4 for production, e2-medium for development" + description = <<-EOT + GCP Kubernetes Storage Class to use for CockroachDB and Prometheus persistent volumes. + See https://cloud.google.com/kubernetes-engine/docs/concepts/persistent-volumes for more details and + available options. + + Example: `standard` + EOT } variable "image" { type = string description = < bool: """ Return if the path corresponds to a project which requires example files. """ - return '/modules/' in path + return "/modules/" in path def load_tf_definitions() -> Dict[str, str]: