diff --git a/.github/workflows/tf-helm-validate.yml b/.github/workflows/tf-helm-validate.yml index 2b5dc22..8aa0914 100644 --- a/.github/workflows/tf-helm-validate.yml +++ b/.github/workflows/tf-helm-validate.yml @@ -32,4 +32,4 @@ jobs: - name: Terraform Validate id: validate - run: terraform validate + run: terraform validate -json diff --git a/README.md b/README.md index f9dd846..cc8edf2 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,70 @@ istioctl analyze > **NOTE**: Add the `sidecar.istio.io/inject: "false"` annotation to the metadata section of the pod template. This will prevent the Istio sidecar from being injected into that specific pod. +## Monitoring Stack + +To setup a monitoring stack, we will use [Prometheus](https://prometheus.io/) and [Grafana](https://grafana.com/). +Instead of installing the helm charts for these applications, we will use the custom helm chart which includes the grafana helm chart as a dependency in the prometheus chart (developed by the prometheus community). We will use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/README.md) from the prometheus community. + +> NOTE: This chart was formerly named prometheus-operator chart, now renamed to more clearly reflect that it installs the kube-prometheus project stack, within which Prometheus Operator is only one component. + +### Working with kube-prometheus-stack + +1. Get the Helm repository information + + ```bash + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm repo update + ``` + +2. By default, this chart installs additional, dependent charts: + + - [prometheus-community/kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) + - [prometheus-community/prometheus-node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter) + - [grafana/grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana) + + > NOTE: To disable dependencies during installation, see [multiple releases](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/README.md#multiple-releases). + +3. To configure the kube-prometheus-stack helm chart, refer the [documentation](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/README.md#configuration). To see the default values, use the command: + + ```bash + helm show values prometheus-community/kube-prometheus-stack + ``` + +> \[!IMPORTANT]\ +> [Workaround for known issues on GKE](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/README.md#running-on-private-gke-clusters) +> When Google configure the control plane for private clusters, they automatically configure VPC peering between your Kubernetes cluster’s network and a separate Google managed project. In order to restrict what Google are able to access within your cluster, the firewall rules configured restrict access to your Kubernetes pods. This means that in order to use the webhook component with a GKE private cluster, you must configure an additional firewall rule to allow the GKE control plane access to your webhook pod. +> You can read more information on how to add firewall rules for the GKE control plane nodes in the [GKE docs](https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#add_firewall_rules) +> Alternatively, you can disable the hooks by setting `prometheusOperator.admissionWebhooks.enabled=false`. + ## Configuring the chart values -For specific `values.yaml`, refer their specific charts and create their respective `values.yaml` files based on the dummy `values.yaml` file. +For specific `values.yaml`, refer their specific charts and create their respective `values.yaml` files based on the dummy `values.yaml` file. You can also use the `example.*.yaml` files in the `root/` directory to view specific values for the chart values. + +## Infrastructure Setup + +Once we have all our chart `values.yaml` configured, we can apply our Terraform configuration to install the helm charts to our kubernetes cluster. + +- Initialize Terraform + + ```bash + terraform init + ``` + +- Validate the Terraform infrastructure configuration as code + + ```bash + terraform validate -json + ``` + +- Plan the infrastructure setup + + ```bash + terraform plan -var-file="prod.tfvars" + ``` + +- Apply the infrastructure to the kubernetes cluster after verifying the configuration in the previous steps + + ```bash + terraform apply --auto-approve -var-file="prod.tfvars" + ``` diff --git a/modules/charts/webapp-helm-chart-1.8.2.tar.gz b/modules/charts/webapp-helm-chart-1.8.2.tar.gz deleted file mode 100644 index 4239037..0000000 Binary files a/modules/charts/webapp-helm-chart-1.8.2.tar.gz and /dev/null differ diff --git a/modules/charts/webapp-helm-chart-1.8.3.tar.gz b/modules/charts/webapp-helm-chart-1.8.3.tar.gz new file mode 100644 index 0000000..b44cbb7 Binary files /dev/null and b/modules/charts/webapp-helm-chart-1.8.3.tar.gz differ diff --git a/modules/kube_prometheus/main.tf b/modules/kube_prometheus/main.tf new file mode 100644 index 0000000..da01245 --- /dev/null +++ b/modules/kube_prometheus/main.tf @@ -0,0 +1,12 @@ +resource "helm_release" "kube_prometheus_chart" { + name = "kube-prometheus-stack" + namespace = "prometheus" + create_namespace = true + repository = "https://prometheus-community.github.io/helm-charts" + chart = "kube-prometheus-stack" + timeout = var.timeout + cleanup_on_fail = true + force_update = false + wait = false + values = ["${file(var.kube_prometheus_values_file)}"] +} diff --git a/modules/kube_prometheus/output.tf b/modules/kube_prometheus/output.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/kube_prometheus/variables.tf b/modules/kube_prometheus/variables.tf new file mode 100644 index 0000000..8706cb1 --- /dev/null +++ b/modules/kube_prometheus/variables.tf @@ -0,0 +1,2 @@ +variable "timeout" {} +variable "kube_prometheus_values_file" {} diff --git a/modules/namespace/main.tf b/modules/namespace/main.tf index b6aaead..3430f23 100644 --- a/modules/namespace/main.tf +++ b/modules/namespace/main.tf @@ -31,3 +31,12 @@ resource "kubernetes_namespace" "istio_ingress" { name = "istio-ingress" } } + +resource "kubernetes_namespace" "prometheus" { + metadata { + # labels = { + # istio-injection = "enabled" + # } + name = "prometheus" + } +} diff --git a/root/.gitignore b/root/.gitignore index a9530d2..7249d61 100644 --- a/root/.gitignore +++ b/root/.gitignore @@ -32,6 +32,6 @@ override.tf.json # example: *tfplan* # variables -dev.tfvars -prod.tfvars +*.tfvars *values.yaml +!example* diff --git a/root/example.infra.yaml b/root/example.infra.yaml new file mode 100644 index 0000000..031e100 --- /dev/null +++ b/root/example.infra.yaml @@ -0,0 +1,113 @@ +replicaCount: 1 +image: + repository: quay.io/pwncorp + name: consumer + tag: 1.1.3 + pullPolicy: Always +initContainer: + repository: quay.io/pwncorp + name: initconsumer + tag: 1.0.1 + pullPolicy: Always +imagePullSecrets: + type: kubernetes.io/dockerconfigjson + dockerConfig: b2theW1yaGFja2VyCg== + +namespace: deps +deployStrat: + rolling: RollingUpdate + maxSurge: 1 + maxUnavailable: 0 +progressDeadlineSeconds: 120 +minReadySeconds: 30 +configs: + kafka_port: "9094" + client_id: webapp + topic: healthcheck + db: consumer + dbport: "5432" + app_dbschema: app +secret: + type: Opaque + username: consumer_user + password: consumer@pswd +podLabel: + app: consumer +service: + type: ClusterIP + port: 80 +resources: + limits: + memory: 512Mi + cpu: "0.8" + requests: + memory: 128Mi + cpu: "0.4" +psql: + enabled: true +postgresql: + image: + tag: 15.5.0-debian-11-r5 + auth: + username: consumer_user + password: consumer@pswd + database: consumer + primary: + persistence: + size: 1Gi + labels: + app: consumer-db + podLabels: + app: consumer-db + resources: + limits: + memory: 1024Mi + cpu: "1" + requests: + memory: 512Mi + cpu: "0.5" +kafka: + listeners: + client: + protocol: PLAINTEXT + controller: + protocol: PLAINTEXT + interbroker: + protocol: PLAINTEXT + external: + protocol: PLAINTEXT + controller: + replicaCount: 0 + broker: + replicaCount: 3 + persistence: + size: 1Gi + resources: + limits: + memory: 1024Mi + cpu: "1" + requests: + memory: 512Mi + cpu: "0.5" + serviceAccount: + create: false + provisioning: + enabled: true + numPartitions: 3 + replicationFactor: 1 + podAnnotations: + sidecar.istio.io/inject: "false" + topics: + - name: healthcheck + partitions: 3 + replicationFactor: 1 + config: + max.message.bytes: 64000 + flush.messages: 1 + kraft: + enabled: false + zookeeper: + enabled: true + persistence: + size: 1Gi +Transform: AWS::Serverless-2016-10-31 diff --git a/root/example.prometheus_grafana.yaml b/root/example.prometheus_grafana.yaml new file mode 100644 index 0000000..6026a12 --- /dev/null +++ b/root/example.prometheus_grafana.yaml @@ -0,0 +1,66 @@ +## Default values for kube-prometheus helm chart: `helm show values prometheus-community/kube-prometheus-stack` + +## Using default values from https://github.com/grafana/helm-charts/blob/main/charts/grafana/values.yaml +## +grafana: + enabled: true + + ## Deploy default dashboards + ## + defaultDashboardsEnabled: true + + ## Timezone for the default dashboards + ## Other options are: browser or a specific timezone, i.e. Europe/Luxembourg + ## + defaultDashboardsTimezone: utc + + ## Editable flag for the default dashboards + ## + defaultDashboardsEditable: true + + adminPassword: prom-operator + + ## Passed to grafana subchart and used by servicemonitor below + ## + service: + portName: http-web + type: LoadBalancer + +## Deploy a Prometheus instance +## + +prometheus: + enabled: true + + ## Configuration for Prometheus service + ## + service: + ## Port for Prometheus Service to listen on + ## + port: 9090 + + ## To be used with a proxy extraContainer port + targetPort: 9090 + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips + ## + externalIPs: [] + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30090 + + ## Loadbalancer IP + ## Only use if service.type is "LoadBalancer" + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: LoadBalancer diff --git a/root/example.tfvars b/root/example.tfvars index 871c853..ece6557 100644 --- a/root/example.tfvars +++ b/root/example.tfvars @@ -1,6 +1,7 @@ -timeout = 600 -infra_values_file = "./infra_values.yaml" -webapp_values_file = "./webapp_values.yaml" -chart_path = "../modules/charts" -webapp_chart = "webapp-helm-chart-1.1.3.tar.gz" -infra_chart = "infra-helm-chart-1.4.0.tar.gz" +timeout = 600 +infra_values_file = "./infra_values.yaml" +webapp_values_file = "./webapp_values.yaml" +kube_prometheus_values_file = "./kube_prometheus_values.yaml" +chart_path = "../modules/charts" +webapp_chart = "webapp-helm-chart-1.1.3.tar.gz" +infra_chart = "infra-helm-chart-1.4.0.tar.gz" diff --git a/root/example.webapp.yaml b/root/example.webapp.yaml new file mode 100644 index 0000000..1c2b871 --- /dev/null +++ b/root/example.webapp.yaml @@ -0,0 +1,118 @@ +# Default values for webapp-helm-chart. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: quay.io/pwncorp + name: webapp + tag: 1.2.2 + pullPolicy: Always + +initContainer: + repository: quay.io/pwncorp + name: migrate + tag: 1.1.1 + pullPolicy: Always + +imagePullSecrets: + type: kubernetes.io/dockerconfigjson + dockerConfig: b2theW1yaGFja2VyCg== + +namespace: webapp + +hpa: + metrics: + averageUtilization: 5 + minReplicas: 1 + maxReplicas: 5 + +deployStrat: + rolling: RollingUpdate + maxSurge: 1 + maxUnavailable: 0 + +progressDeadlineSeconds: 120 +minReadySeconds: 30 + +probes: + path: /healthz + +config: + app_hostname: localhost + app_port: "3000" + app_db: app + app_dbport: "5432" + app_dbschema: app + k8s_group: webappcron.csye7125-fall2023-group05.cloud + k8s_api_version: v1 + k8s_cr_kind: Cron + k8s_cr_kind_plural: crons + k8s_namespace: webapp + k8s_broker_0: infra-helm-release-kafka-broker-0.infra-helm-release-kafka-broker-headless.deps.svc.cluster.local:9094 + k8s_broker_1: infra-helm-release-kafka-broker-1.infra-helm-release-kafka-broker-headless.deps.svc.cluster.local:9094 + k8s_broker_2: infra-helm-release-kafka-broker-2.infra-helm-release-kafka-broker-headless.deps.svc.cluster.local:9094 + k8s_client_id: webapp + k8s_docker_config_json: b2theW1yaGFja2VyCg== + + k8s_topic: healthcheck + +secret: + type: Opaque + username: app_user + password: app_user@pswd + +podLabel: + app: webapp + service: api + +psql: + enabled: true + +selectorLabel: + app: webapp + service: api + +livenessConfig: + initialDelaySeconds: 40 + periodSeconds: 10 + +readinessConfig: + initialDelaySeconds: 40 + periodSeconds: 10 + +service: + type: LoadBalancer + port: 3000 + dbport: 5432 + +postgresql: + image: + tag: 15.5.0-debian-11-r5 + auth: + username: app_user + password: app_user@pswd + database: app + primary: + persistence: + size: 1Gi + labels: + app: webapp-db + podLabels: + app: webapp-db + resources: + limits: + memory: "1024Mi" + cpu: "1" + requests: + memory: "128Mi" + cpu: "0.5" + +resources: + limits: + memory: "1024Mi" + cpu: "0.9" + requests: + memory: "512Mi" + cpu: "0.5" diff --git a/root/main.tf b/root/main.tf index f9a55a7..ef41b87 100644 --- a/root/main.tf +++ b/root/main.tf @@ -1,43 +1,54 @@ -module "webapp_namespace" { +module "namespaces" { source = "../modules/namespace" } module "istio_base" { - depends_on = [module.webapp_namespace] + depends_on = [module.namespaces] source = "../modules/istio_base" timeout = var.timeout } -resource "time_sleep" "istall_istio_crds" { +resource "time_sleep" "install_istio_crds" { depends_on = [module.istio_base] create_duration = "20s" } module "istio_discovery" { - depends_on = [time_sleep.istall_istio_crds] + depends_on = [time_sleep.install_istio_crds] source = "../modules/istiod" timeout = var.timeout } -resource "time_sleep" "istall_istio_discovery" { +resource "time_sleep" "install_istio_discovery" { depends_on = [module.istio_discovery] create_duration = "20s" } module "istio_gateway" { - depends_on = [time_sleep.istall_istio_discovery] + depends_on = [time_sleep.install_istio_discovery] source = "../modules/istio_gateway" timeout = var.timeout } -resource "time_sleep" "istall_istio_gateway" { +resource "time_sleep" "install_istio_gateway" { depends_on = [module.istio_gateway] create_duration = "20s" } +module "monitoring_stack" { + depends_on = [time_sleep.install_istio_gateway] + source = "../modules/kube_prometheus" + timeout = var.timeout + kube_prometheus_values_file = var.kube_prometheus_values_file +} + +resource "time_sleep" "install_monitoring_stack" { + depends_on = [module.monitoring_stack] + create_duration = "20s" +} module "infra_dependencies" { - depends_on = [time_sleep.istall_istio_gateway] + depends_on = [time_sleep.install_monitoring_stack] source = "../modules/infra_helm" timeout = var.timeout infra_values_file = var.infra_values_file diff --git a/root/variables.tf b/root/variables.tf index 1bb3d62..541983c 100644 --- a/root/variables.tf +++ b/root/variables.tf @@ -16,6 +16,12 @@ variable "webapp_values_file" { default = "./webapp_values.yaml" } +variable "kube_prometheus_values_file" { + type = string + description = "The path to the kube_prometheus_values.yaml file for the helm chart" + default = "./kube_prometheus_values.yaml" +} + variable "chart_path" { type = string description = "The path to the charts/ directory to install local charts"