diff --git a/.github/workflows/logzio-apm-collector-test.yaml b/.github/workflows/logzio-apm-collector-test.yaml new file mode 100644 index 00000000..cdf3a101 --- /dev/null +++ b/.github/workflows/logzio-apm-collector-test.yaml @@ -0,0 +1,94 @@ +name: Test `logzio-apm-collector` chart + +on: + pull_request: + branches: + - master + paths: + - 'charts/logzio-apm-collector/Chart.yaml' + - 'charts/logzio-apm-collector/templates/**' + - 'charts/logzio-apm-collector/values.yaml' +jobs: + test-helm-chart: + name: Test Helm Chart on Kind + runs-on: ubuntu-latest + steps: + - name: Generate random id + id: random_id + run: echo "::set-output name=rand::$(echo $RANDOM)" + + - name: Set ENV_ID + run: echo "ENV_ID=apm-test-run-${{ steps.random_id.outputs.rand }}" >> $GITHUB_ENV + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.20' + + - name: Set up Helm + uses: azure/setup-helm@v4.2.0 + + - name: Set up kubectl + uses: azure/setup-kubectl@v4 + + - name: Install Kind + run: | + curl -Lo ./kind "https://kind.sigs.k8s.io/dl/v0.11.1/kind-Linux-amd64" + chmod +x ./kind + mv ./kind /usr/local/bin/kind + + - name: Create Kind cluster + run: | + kind create cluster --name kind-${{ github.run_id }}-${{ matrix.mode }} + kubectl cluster-info + + - name: Deploy Helm Chart + run: | + cd charts/logzio-apm-collector + helm upgrade --install \ + --set enabled=true \ + --set spm.enabled=true \ + --set serviceGraph.enabled=true \ + --set global.logzioTracesToken=${{ secrets.LOGZIO_TRACES_TOKEN }} \ + --set global.logzioSpmToken=${{ secrets.LOGZIO_METRICS_TOKEN }} \ + --set global.logzioRegion="us" \ + --set global.env_id=${{ env.ENV_ID }} \ + logzio-apm-collector . + kubectl rollout status deployment/logzio-apm-collector --timeout=300s + kubectl rollout status deployment/logzio-apm-collector-spm --timeout=300s + + - name: Run trace generator + run: | + kubectl apply -f tests/resources/tracegen-apm.yaml + kubectl rollout status deployment/trace-gen --timeout=300s + + - name: Run otel demo for service graph + run: | + helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts + helm repo update + helm install otel-demo -f tests/resources/otel-demo-apm.yaml open-telemetry/opentelemetry-demo --version 0.32.5 + kubectl rollout status deployment/otel-demo-loadgenerator --timeout=300s + + - name: sleep for 3 minutes + run: sleep 180 + + - name: Run Go Tests + env: + LOGZIO_METRICS_API_KEY: ${{ secrets.LOGZIO_METRICS_API_KEY }} + LOGZIO_TRACES_API_KEY: ${{ secrets.LOGZIO_TRACES_API_KEY }} + run: | + go get go.uber.org/zap + go test -v ./tests/traces_e2e_test.go ./tests/common.go + go test -v ./tests/apm_metrics_e2e_test.go ./tests/common.go + + - name: Cleanup Environment + run: | + helm uninstall logzio-apm-collector + + - name: Delete Kind cluster + if: always() + run: kind delete cluster --name kind-${{ github.run_id }}-${{ matrix.mode }} + diff --git a/charts/logzio-apm-collector/.helmignore b/charts/logzio-apm-collector/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/charts/logzio-apm-collector/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/logzio-apm-collector/CHANGELOG.md b/charts/logzio-apm-collector/CHANGELOG.md new file mode 100644 index 00000000..c3ae05e0 --- /dev/null +++ b/charts/logzio-apm-collector/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changes by Version + + + +## 1.0.0 (date?) +- Initial release +- Kubernetes APM Agent for Logz.io, based on OpenTelemetry Collector diff --git a/charts/logzio-apm-collector/Chart.yaml b/charts/logzio-apm-collector/Chart.yaml new file mode 100644 index 00000000..c217c430 --- /dev/null +++ b/charts/logzio-apm-collector/Chart.yaml @@ -0,0 +1,11 @@ +apiVersion: v2 +name: logzio-apm-collector +version: 1.0.0 +description: Kubernetes APM agent for Logz.io based on OpenTelemetry Collector +type: application +home: https://logz.io/ +icon: https://logzbucket.s3.eu-west-1.amazonaws.com/logz-io-img/logo400x400.png +maintainers: + - name: Naama Bendalak + email: naama.bendalak@logz.io +appVersion: 0.115.1 \ No newline at end of file diff --git a/charts/logzio-apm-collector/README.md b/charts/logzio-apm-collector/README.md new file mode 100644 index 00000000..53545b1e --- /dev/null +++ b/charts/logzio-apm-collector/README.md @@ -0,0 +1,93 @@ +# Logz.io APM Collector Helm Chart +> [!IMPORTANT] +> Kubernetes APM Collection Agent is still In development + +This Helm chart deploys an agent, which leverages the OpenTelemetry Collector, that collects traces and span metrics from Kubernetes clusters and sends them to Logz.io. + + +## Prerequisites +- Kubernetes 1.24+ +- Helm 3.9+ + +## Installation +### Add Logz.io Helm Repository +Before installing the chart, add the Logz.io Helm repository: +```shell +helm repo add logzio-helm https://logzio.github.io/logzio-helm +helm repo update +``` + +### Install the Chart + +The chart provides options for enabling the following: +1. Traces +2. SPM (Service Performance Monitoring) +3. Service Graph + + +```shell +helm install -n monitoring --create-namespace \ +--set enabled=true \ +--set spm.enabled=true \ +--set serviceGraph.enabled=true \ +--set global.logzioTracesToken="<>" \ +--set global.logzioSpmToken="<>" \ +--set global.logzioRegion="<>" \ +--set global.env_id="<>" \ +logzio-apm-collector logzio-helm/logzio-apm-collector +``` + +> [!NOTE] +> To disable either one of SPM or Service Graph remove the relevant `--set XXX.enabled` line from the above command. + +> [!IMPORTANT] +> Values of `<>`, `<>` and `<>` can be found in your Logz.io account. +> For `<>` define any environment identifier attribute (for example, the cluster name). + + +## Configuration + +- [All configuration options](./VALUES.md) +- [Instrumentation](#instrumentation) +- [Custom Trace Sampling rules](#custom-trace-sampling-rules) + + +## Instrumentation +If you're using manual instrumentation or an instrumentation agent, configure it to export data to the Logz.io APM collector by setting the export/output address as follows: + +``` +logzio-apm-collector.monitoring.svc.cluster.local:<> +``` + +> [!IMPORTANT] +> Replace `<>` based on the protocol your agent uses: +> - 4317 for GRCP +> - 4318 for HTTP +> +> For a complete list, see `values.yaml` >> `traceConfig` >> `receivers`. + + +## Custom trace sampling rules +To customize the Traces Sampling rules in the OpenTelemetry Collector, you can follow the below steps: + +- **Step 1**: Create [customized Tail sampling rules configuration](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor). + +- **Step 2**: Update the `values.yaml` file: + +Get the current Chart's `values.yaml` file: +```shell +helm get values logzio-apm-collector -n monitoring > new-values.yaml +``` + +Edit the section under `traceConfig` >> `processors` >> `tail_sampling` in `new-values.yaml` to contain the custom config which you created in step 1. + +- **Step 3**: Apply the config: +```shell +helm upgrade logzio-apm-collector logzio-helm/logzio-apm-collector -n monitoring -f new-values.yaml +``` + +## Uninstalling +To uninstall the `logzio-apm-collector` chart, you can use: +```shell +helm uninstall -n monitoring logzio-apm-collector +``` diff --git a/charts/logzio-apm-collector/VALUES.md b/charts/logzio-apm-collector/VALUES.md new file mode 100644 index 00000000..a6b0ee3a --- /dev/null +++ b/charts/logzio-apm-collector/VALUES.md @@ -0,0 +1,86 @@ +# Logz.io APM Collector All Configuration options +Below you can find a complete list of settings in `values.yaml`. + +| Key | Description | Default Value | +|-----------------------------------------------|---------------------------------------------------------------------------------------------|---------------------------------------------------------------| +| enabled | Toggle for enabling the Helm chart deployment | `false` | +| spm.enabled | Toggle for enabling SPM Collection | `false` | +| serviceGraph.enabled | Toggle for enabling Service Graph metrics Collection | `false` | +| nameOverride | Override the default name for the deployment. | `""` | +| fullnameOverride | Override the default name for the deployment. | `""` | +| namespaceOverride | Override the namespace into which the resources will be deployed. | `""` | +| global.env_id | Environment identifier attribute added to all logs. | `"my_env"` | +| global.logzioTracesToken | Secret with your Logz.io tracing shipping token. | `""` | +| global.logzioSpmToken | Secret with your Logz.io SPM shipping token. | `""` | +| global.logzioRegion | Secret with your Logz.io region. | `"us"` | +| global.customTracesEndpoint | Secret with a custom endpoint to send traces to, overrides Logz.io region listener address. | `""` | +| global.customSpmEndpoint | Secret with your custom endpoint to send SPM to, overrides Logz.io region listener address. | `""` | +| otelLogLevel | Change the OpenTelemetry Collector log level | `"info"` | +| standaloneCollector.replicaCount | Number of replicas for the standalone collector. | `1` | +| SamplingProbability | Traces Sampling Probability | `10` | +| SamplingLatency | Traces Sampling Latency | `500` | +| traceConfig | Traces collector configuration, supports templating. | see `values.yaml` | +| spmForwarderConfig | Collector configuration to pass traces to the SPM Collector, supports templating. | see `values.yaml` | +| spmConfig | Collector configuration to generate SPM, supports templating. | see `values.yaml` | +| serviceGraphConfig | Collector configuration to generate Service Graph metrics. | see `values.yaml` | +| image.repository | Opentelemetry collector image repository. | `otel/opentelemetry-collector-contrib` | +| image.pullPolicy | Image pull policy. | `IfNotPresent` | +| image.tag | Overrides the image tag whose default is the chart appVersion. | `""` | +| image.digest | Pull images by digest. | `""` | +| imagePullSecrets | Specifies image pull secrets. | `[]` | +| command.name | OpenTelemetry Collector executable. | `"otelcol-contrib"` | +| command.extraArgs | Additional arguments for the command. | `["--feature-gates=connector.spanmetrics.legacyMetricNames"]` | +| secret.enabled | Toggle for creating and managing the Logz.io secret by this chart. | `"true"` | +| secret.name | The name of the secret for Logz.io APM collector. | `"logzio-apm-collector-secret"` | +| configMap.create | Specifies whether a configMap should be created. | `true` | +| serviceAccount.create | Specifies whether a service account should be created. | `true` | +| serviceAccount.annotations | Specifies annotations for the service account. | `{}` | +| serviceAccount.name | The name of the service account. | `""` | +| clusterRole.create | Specifies whether a clusterRole should be created. | `true` | +| clusterRole.annotations | Specifies annotations for the cluster role. | `{}` | +| clusterRole.name | The name of the clusterRole. | `""` | +| clusterRole.rules | Specifies additional rules for the clusterRole. | `[]` | +| clusterRole.clusterRoleBinding.annotations | Specifies annotations for the clusterRoleBinding. | `{}` | +| clusterRole.clusterRoleBinding.name | The name of the clusterRoleBinding. | `""` | +| service.enabled | Enable the creation of a Service. | `true` | +| service.type | Specifies the type of service. | `ClusterIP` | +| service.annotations | Specifies annotations for the service. | `{}` | +| spmService.type | Specifies the type of service for SPM. | `ClusterIP` | +| spmService.annotations | Specifies annotations for the service for SPM. | `{}` | +| autoscaling.enabled | Specifies if HPA should be created for the Traces Collector. | `false` | +| autoscaling.annotations | Specifies annotations for the HPA. | `{}` | +| autoscaling.minReplicas | Control HPA autoscaling scale. | `1` | +| autoscaling.maxReplicas | Control HPA autoscaling scale. | `10` | +| autoscaling.targetCPUUtilizationPercentage | Control HPA autoscaling scale. | `80` | +| autoscaling.targetMemoryUtilizationPercentage | Control HPA autoscaling scale. | `80` | +| spmAutoscaling.enabled | Specifies if VPA should be created for the SPM Collector. | `false` | +| spmAutoscaling.annotations | Specifies annotations for the VPA. | `{}` | +| spmAutoscaling.minAllowed.cpu | Control VPA autoscaling scale. | `"50m"` | +| spmAutoscaling.minAllowed.memory | Control VPA autoscaling scale. | `"70Mi"` | +| spmAutoscaling.maxAllowed.cpu | Control VPA autoscaling scale. | `"150m"` | +| spmAutoscaling.maxAllowed.memory | Control VPA autoscaling scale. | `"250Mi"` | +| ports | Defines ports configurations | see `values.yaml` | +| additionalLabels | labels to add to all otel-collector resources | `{}` | +| podSecurityContext | Security context policies for the pod. | `{}` | +| securityContext | Security context policies for the container. | `{}` | +| nodeSelector | Node labels for pod assignment | `{}` | +| tolerations | Tolerations for pod assignment | `[]` | +| affinity | Affinity rules for pod assignment. | see `values.yaml` | +| priorityClassName | Scheduler priority class name. | `""` | +| extraEnvs | Extra environment variables to set in the pods | `[]` | +| extraEnvsFrom | Extra environment variables from secret or configMap to set in the pods | `[]` | +| extraVolumes | Extra volumes to add in the pods | `[]` | +| extraVolumeMounts | Extra volume mounts to add in the pods | `[]` | +| useGOMEMLIMIT | Set `GOMEMLIMIT` env var to a percentage of `resources.limits.memory` | `false` | +| resources | CPU/memory resource requests/limits | see `values.yaml` | +| podAnnotations | Annotations to add to the pod. | `{}` | +| podLabels | Labels to add to the pod. | `{}` | +| hostAliases | Adding entries to Pod /etc/hosts with HostAliases. | `[]` | +| dnsPolicy | Pod DNS policy. | `""` | +| dnsConfig | Custom DNS config. Required when `dnsPolicy: None`. | `{}` | +| annotations | Annotations to add to the ???. | `{}` | +| extraContainers | List of extra sidecars to add. | `[]` | +| initContainers | List of init container specs. | `[]` | +| lifecycleHooks | Pod lifecycle policies. | `{}` | +| livenessProbe | Liveness probe configuration. | see `values.yaml` | +| readinessProbe | Readiness probe configuration. | see `values.yaml` | diff --git a/charts/logzio-apm-collector/templates/NOTES.txt b/charts/logzio-apm-collector/templates/NOTES.txt new file mode 100644 index 00000000..fbf4bb17 --- /dev/null +++ b/charts/logzio-apm-collector/templates/NOTES.txt @@ -0,0 +1,25 @@ +{{- if and (eq .Values.dnsPolicy "None") (not .Values.dnsConfig) }} +{{- fail "[ERROR] dnsConfig should be provided when dnsPolicy is None" }} +{{ end }} + +{{- if not .Values.configMap.create }} +[WARNING] "configMap" will not be created and `traceConfig`, `spmConfig` and `serviceGraphConfig` will not take effect. +{{ end }} + +{{- if not .Values.resources }} +[WARNING] No resource limits or requests were set. Consider setter resource requests and limits for your logzio-apm-collector via the `resources` field. +{{ end }} + +{{- $logLevel := lower .Values.otelLogLevel }} +{{- if not (or (eq $logLevel "info") (eq $logLevel "warn") (eq $logLevel "error") (eq $logLevel "debug")) }} +{{ fail "[ERROR] The logzio-apm-collector Chart's `otelLogLevel` must be one of 'info', 'warn', 'error' or 'debug'." }} +{{- end }} + +{{- $region := lower .Values.global.logzioRegion }} +{{- if not (or (eq $region "us") (eq $region "eu") (eq $region "uk") (eq $region "ca") (eq $region "au")) }} +{{ print "[WARN] The `logzioRegion` expected value should be one of 'us', 'eu', 'uk', 'ca', 'au'." }} +{{- end }} + +{{- if and (.Values.useGOMEMLIMIT) (not ((((.Values.resources).limits).memory))) }} +[WARNING] "useGOMEMLIMIT" is enabled but memory limits have not been supplied, which means no GOMEMLIMIT env var was configured but the Memory Ballast Extension was removed. It is highly recommended to only use "useGOMEMLIMIT" when memory limits have been set. +{{ end }} diff --git a/charts/logzio-apm-collector/templates/_config.tpl b/charts/logzio-apm-collector/templates/_config.tpl new file mode 100644 index 00000000..3175483c --- /dev/null +++ b/charts/logzio-apm-collector/templates/_config.tpl @@ -0,0 +1,58 @@ +{{/* Build the list of port for service */}} +{{- define "apm-collector.servicePortsConfig" -}} +{{- $ports := deepCopy .Values.ports }} +{{- range $key, $port := $ports }} +{{- if $port.enabled }} +- name: {{ $key }} + port: {{ $port.servicePort }} + targetPort: {{ $port.containerPort }} + protocol: {{ $port.protocol }} + {{- if $port.appProtocol }} + appProtocol: {{ $port.appProtocol }} + {{- end }} +{{- if $port.nodePort }} + nodePort: {{ $port.nodePort }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Build the list of port for pod */}} +{{- define "apm-collector.podPortsConfig" -}} +{{- $ports := deepCopy .Values.ports }} +{{- range $key, $port := $ports }} +{{- if $port.enabled }} +- name: {{ $key }} + containerPort: {{ $port.containerPort }} + protocol: {{ $port.protocol }} + {{- if and $.isAgent $port.hostPort }} + hostPort: {{ $port.hostPort }} + {{- end }} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* Build config file for APM Collector */}} +{{- define "apm-collector.config" -}} +{{- if .Values.spm.enabled }} +{{- $tracesConfig := deepCopy .Values.traceConfig }} +{{- $spmForwarderConfig := deepCopy .Values.spmForwarderConfig }} +{{- tpl (($tracesConfig | merge $spmForwarderConfig | mustMergeOverwrite) | toYaml) . }} +{{- else }} +{{- tpl (.Values.traceConfig | toYaml) . }} +{{- end}} +{{- end }} + +{{/* Build config file for SPM Collector */}} +{{- define "spm-collector.config" -}} +{{- if .Values.serviceGraph.enabled }} +{{- $spmConfig := deepCopy .Values.spmConfig }} +{{- $serviceGraphConfig := deepCopy .Values.serviceGraphConfig }} +{{- $mergedConfig := merge $spmConfig $serviceGraphConfig }} +{{- $_ := set (index $mergedConfig "service" "pipelines" "metrics/spm-logzio") "receivers" (concat (index $mergedConfig "service" "pipelines" "metrics/spm-logzio" "receivers") (index $serviceGraphConfig "service" "pipelines" "metrics/spm-logzio" "receivers" )) -}} +{{- $_ := set (index $mergedConfig "service" "pipelines" "traces") "exporters" (concat (index $mergedConfig "service" "pipelines" "traces" "exporters") (index $serviceGraphConfig "service" "pipelines" "traces" "exporters" )) -}} +{{- tpl ($mergedConfig | toYaml) . }} +{{- else }} +{{- tpl (.Values.spmConfig | toYaml) . }} +{{- end }} +{{- end }} diff --git a/charts/logzio-apm-collector/templates/_helpers-spm.tpl b/charts/logzio-apm-collector/templates/_helpers-spm.tpl new file mode 100644 index 00000000..c583a649 --- /dev/null +++ b/charts/logzio-apm-collector/templates/_helpers-spm.tpl @@ -0,0 +1,39 @@ + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "spm-collector.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- printf "%s-%s" .Values.fullnameOverride "spm" | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Chart.Name "spm" | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} + +{{/* +Get component name +*/}} +{{- define "spm-collector.component" -}} +component: spm-collector +{{- end }} + +{{/* +Create Logz.io listener address based on region +*/}} +{{- define "spm-collector.listenerAddress" -}} +{{- $region := .Values.global.logzioRegion -}} +{{- if or (eq $region "us") (not $region) -}} +https://listener.logz.io:8053 +{{- else }} +{{- printf "https://listener-%s.logz.io:8053" $region }} +{{- end }} +{{- end }} + +{{/* +The SPM service address +*/}} +{{- define "spm-collector.serviceAddr" -}} +{{- $serviceName := include "spm-collector.fullname" .}} +{{- printf "http://%s.%s.svc.cluster.local:4317" $serviceName .Release.Namespace }} +{{- end }} diff --git a/charts/logzio-apm-collector/templates/_helpers.tpl b/charts/logzio-apm-collector/templates/_helpers.tpl new file mode 100644 index 00000000..90212554 --- /dev/null +++ b/charts/logzio-apm-collector/templates/_helpers.tpl @@ -0,0 +1,148 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "apm-collector.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "apm-collector.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- .Chart.Name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} + +{{/* Allow the release namespace to be overridden */}} +{{- define "apm-collector.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* Get component name */}} +{{- define "apm-collector.component" -}} +component: apm-collector +{{- end }} + +{{/* Create chart name and version as used by the chart label. */}} +{{- define "apm-collector.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "apm-collector.lowercase_chartname" -}} +{{- default .Chart.Name | lower }} +{{- end }} + +{{/* Selector labels */}} +{{- define "apm-collector.selectorLabels" -}} +app.kubernetes.io/name: {{ include "apm-collector.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* Custom additional labels */}} +{{- define "apm-collector.additionalLabels" -}} +{{- if .Values.additionalLabels }} +{{- tpl (.Values.additionalLabels | toYaml) . }} +{{- end }} +{{- end }} + +{{/* Common labels */}} +{{- define "apm-collector.labels" -}} +helm.sh/chart: {{ include "apm-collector.chart" . }} +{{ include "apm-collector.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{ include "apm-collector.additionalLabels" . }} +{{- end }} + +{{/* Create the name of the service account to use */}} +{{- define "apm-collector.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "apm-collector.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* Create the name of the clusterRole to use */}} +{{- define "apm-collector.clusterRoleName" -}} +{{- default (include "apm-collector.fullname" .) .Values.clusterRole.name }} +{{- end }} + +{{/* Create the name of the clusterRoleBinding to use */}} +{{- define "apm-collector.clusterRoleBindingName" -}} +{{- default (include "apm-collector.fullname" .) .Values.clusterRole.clusterRoleBinding.name }} +{{- end }} + +{{/* Custom pod annotations */}} +{{- define "apm-collector.podAnnotations" -}} +{{- if .Values.podAnnotations -}} +{{- tpl (.Values.podAnnotations | toYaml) . }} +{{- end -}} +{{- end -}} + +{{/*Custom pod labels */}} +{{- define "apm-collector.podLabels" -}} +{{- if .Values.podLabels }} +{{- tpl (.Values.podLabels | toYaml) . }} +{{- end }} +{{- end }} + +{{/* + This helper converts the input value of memory to Bytes. + Input needs to be a valid value as supported by k8s memory resource field. + */}} +{{- define "apm-collector.convertMemToBytes" }} + {{- $mem := lower . -}} + {{- if hasSuffix "e" $mem -}} + {{- $mem = mulf (trimSuffix "e" $mem | float64) 1e18 -}} + {{- else if hasSuffix "ei" $mem -}} + {{- $mem = mulf (trimSuffix "e" $mem | float64) 0x1p60 -}} + {{- else if hasSuffix "p" $mem -}} + {{- $mem = mulf (trimSuffix "p" $mem | float64) 1e15 -}} + {{- else if hasSuffix "pi" $mem -}} + {{- $mem = mulf (trimSuffix "pi" $mem | float64) 0x1p50 -}} + {{- else if hasSuffix "t" $mem -}} + {{- $mem = mulf (trimSuffix "t" $mem | float64) 1e12 -}} + {{- else if hasSuffix "ti" $mem -}} + {{- $mem = mulf (trimSuffix "ti" $mem | float64) 0x1p40 -}} + {{- else if hasSuffix "g" $mem -}} + {{- $mem = mulf (trimSuffix "g" $mem | float64) 1e9 -}} + {{- else if hasSuffix "gi" $mem -}} + {{- $mem = mulf (trimSuffix "gi" $mem | float64) 0x1p30 -}} + {{- else if hasSuffix "m" $mem -}} + {{- $mem = mulf (trimSuffix "m" $mem | float64) 1e6 -}} + {{- else if hasSuffix "mi" $mem -}} + {{- $mem = mulf (trimSuffix "mi" $mem | float64) 0x1p20 -}} + {{- else if hasSuffix "k" $mem -}} + {{- $mem = mulf (trimSuffix "k" $mem | float64) 1e3 -}} + {{- else if hasSuffix "ki" $mem -}} + {{- $mem = mulf (trimSuffix "ki" $mem | float64) 0x1p10 -}} + {{- end }} +{{- $mem }} +{{- end }} + +{{/* +Create GOMEMLIMIT value +*/}} +{{- define "apm-collector.gomemlimit" }} +{{- $memlimitBytes := include "apm-collector.convertMemToBytes" . | mulf 0.8 -}} +{{- printf "%dMiB" (divf $memlimitBytes 0x1p20 | floor | int64) -}} +{{- end }} + +{{/* +The APM service address +*/}} +{{- define "apm-collector.serviceAddr" -}} +{{- $serviceName := include "apm-collector.fullname" .}} +{{- printf "http://%s.%s.svc.cluster.local" $serviceName .Release.Namespace }} +{{- end }} diff --git a/charts/logzio-apm-collector/templates/_pod-spm.tpl b/charts/logzio-apm-collector/templates/_pod-spm.tpl new file mode 100644 index 00000000..cc9e22ec --- /dev/null +++ b/charts/logzio-apm-collector/templates/_pod-spm.tpl @@ -0,0 +1,159 @@ +{{- define "spm-collector.pod" -}} +{{- with .Values.imagePullSecrets }} +imagePullSecrets: + {{- toYaml . | nindent 2 }} +{{- end }} +serviceAccountName: {{ include "apm-collector.serviceAccountName" . }} +securityContext: + {{- toYaml .Values.podSecurityContext | nindent 2 }} +{{- with .Values.hostAliases }} +hostAliases: + {{- toYaml . | nindent 2 }} +{{- end }} +containers: + - name: {{ include "apm-collector.lowercase_chartname" . }}-spm + command: + - /{{ .Values.command.name }} + {{- if .Values.configMap.create }} + - --config=/conf/relay.yaml + {{- end }} + {{- range .Values.command.extraArgs }} + - {{ . }} + {{- end }} + securityContext: + {{- toYaml .Values.containerSecurityContext | nindent 6 }} + {{- if .Values.image.digest }} + image: "{{ .Values.image.repository }}@{{ .Values.image.digest }}" + {{- else }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- $ports := include "apm-collector.podPortsConfig" . }} + {{- if $ports }} + ports: + {{- $ports | nindent 6}} + {{- end }} + env: + - name: MY_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: ENV_ID + valueFrom: + secretKeyRef: + name: {{ .Values.secret.name }} + key: env-id + - name: SPM_ENDPOINT + {{- if .Values.global.customSpmEndpoint }} + valueFrom: + secretKeyRef: + name: {{ .Values.secret.name }} + key: custom-spm-endpoint + {{- else }} + value: {{ include "spm-collector.listenerAddress" . | quote }} + {{- end }} + - name: LOGZIO_SPM_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.secret.name }} + key: logzio-spm-token + - name: LOG_LEVEL + value: {{ .Values.otelLogLevel | default "info" | quote }} + {{- with .Values.extraEnvs }} + {{- . | toYaml | nindent 6 }} + {{- end }} + {{- with .Values.extraEnvsFrom }} + envFrom: + {{- . | toYaml | nindent 6 }} + {{- end }} + {{- if .Values.lifecycleHooks }} + lifecycle: + {{- toYaml .Values.lifecycleHooks | nindent 6 }} + {{- end }} + livenessProbe: + {{- if .Values.livenessProbe.initialDelaySeconds | empty | not }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + {{- end }} + {{- if .Values.livenessProbe.periodSeconds | empty | not }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + {{- end }} + {{- if .Values.livenessProbe.timeoutSeconds | empty | not }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + {{- end }} + {{- if .Values.livenessProbe.failureThreshold | empty | not }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.livenessProbe.terminationGracePeriodSeconds | empty | not }} + terminationGracePeriodSeconds: {{ .Values.livenessProbe.terminationGracePeriodSeconds }} + {{- end }} + httpGet: + path: {{ .Values.livenessProbe.httpGet.path }} + port: {{ .Values.livenessProbe.httpGet.port }} + readinessProbe: + {{- if .Values.readinessProbe.initialDelaySeconds | empty | not }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + {{- end }} + {{- if .Values.readinessProbe.periodSeconds | empty | not }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + {{- end }} + {{- if .Values.readinessProbe.timeoutSeconds | empty | not }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + {{- end }} + {{- if .Values.readinessProbe.successThreshold | empty | not }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + {{- end }} + {{- if .Values.readinessProbe.failureThreshold | empty | not }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + httpGet: + path: {{ .Values.readinessProbe.httpGet.path }} + port: {{ .Values.readinessProbe.httpGet.port }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + {{- if .Values.configMap.create }} + - mountPath: /conf + name: {{ include "apm-collector.lowercase_chartname" . }}-configmap-spm + {{- end }} + {{- if .Values.extraVolumeMounts }} + {{- toYaml .Values.extraVolumeMounts | nindent 6 }} + {{- end }} +{{- if .Values.priorityClassName }} +priorityClassName: {{ .Values.priorityClassName | quote }} +{{- end }} +volumes: + {{- if .Values.configMap.create }} + - name: {{ include "apm-collector.lowercase_chartname" . }}-configmap-spm + configMap: + name: {{ include "spm-collector.fullname" . }} + items: + - key: relay + path: relay.yaml + {{- end }} + {{- if .Values.extraVolumes }} + {{- toYaml .Values.extraVolumes | nindent 2 }} + {{- end }} +{{- with .Values.nodeSelector }} +nodeSelector: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.affinity }} +affinity: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.tolerations }} +tolerations: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.topologySpreadConstraints }} +topologySpreadConstraints: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- end}} diff --git a/charts/logzio-apm-collector/templates/_pod.tpl b/charts/logzio-apm-collector/templates/_pod.tpl new file mode 100644 index 00000000..642fca6b --- /dev/null +++ b/charts/logzio-apm-collector/templates/_pod.tpl @@ -0,0 +1,179 @@ +{{- define "apm-collector.pod" -}} +{{- with .Values.imagePullSecrets }} +imagePullSecrets: + {{- toYaml . | nindent 2 }} +{{- end }} +serviceAccountName: {{ include "apm-collector.serviceAccountName" . }} +securityContext: + {{- toYaml .Values.podSecurityContext | nindent 2 }} +{{- with .Values.hostAliases }} +hostAliases: + {{- toYaml . | nindent 2 }} +{{- end }} +containers: + - name: {{ include "apm-collector.lowercase_chartname" . }} + command: + - /{{ .Values.command.name }} + {{- if .Values.configMap.create }} + - --config=/conf/relay.yaml + {{- end }} + {{- range .Values.command.extraArgs }} + - {{ . }} + {{- end }} + securityContext: + {{- toYaml .Values.containerSecurityContext | nindent 6 }} + {{- if .Values.image.digest }} + image: "{{ .Values.image.repository }}@{{ .Values.image.digest }}" + {{- else }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- $ports := include "apm-collector.podPortsConfig" . }} + {{- if $ports }} + ports: + {{- $ports | nindent 6 }} + {{- end }} + env: + - name: MY_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: SPM_SERVICE_ENDPOINT + value: {{ include "spm-collector.serviceAddr" . | quote }} + - name: ENV_ID + valueFrom: + secretKeyRef: + name: {{ .Values.secret.name }} + key: env-id + - name: LOGZIO_REGION + valueFrom: + secretKeyRef: + name: {{ .Values.secret.name }} + key: logzio-listener-region + - name: LOGZIO_TRACES_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.secret.name }} + key: logzio-traces-token + {{- if .Values.global.customTracesEndpoint }} + - name: CUSTOM_TRACES_ENDPOINT + valueFrom: + secretKeyRef: + name: {{ .Values.secret.name }} + key: custom-traces-endpoint + {{- end }} + - name: LOG_LEVEL + value: {{ .Values.otelLogLevel | default "info" | quote }} + - name: SAMPLING_LATENCY + value: {{ .Values.SamplingLatency | default 10 | quote}} + - name: SAMPLING_PROBABILITY + value: {{ .Values.SamplingProbability | default 500 | quote }} + {{- if and (.Values.useGOMEMLIMIT) (((.Values.resources).limits).memory) }} + - name: GOMEMLIMIT + value: {{ include "apm-collector.gomemlimit" .Values.resources.limits.memory | quote }} + {{- end }} + {{- with .Values.extraEnvs }} + {{- . | toYaml | nindent 6 }} + {{- end }} + {{- with .Values.extraEnvsFrom }} + envFrom: + {{- . | toYaml | nindent 6 }} + {{- end }} + {{- if .Values.lifecycleHooks }} + lifecycle: + {{- toYaml .Values.lifecycleHooks | nindent 6 }} + {{- end }} + livenessProbe: + {{- if .Values.livenessProbe.initialDelaySeconds | empty | not }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + {{- end }} + {{- if .Values.livenessProbe.periodSeconds | empty | not }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + {{- end }} + {{- if .Values.livenessProbe.timeoutSeconds | empty | not }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + {{- end }} + {{- if .Values.livenessProbe.failureThreshold | empty | not }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.livenessProbe.terminationGracePeriodSeconds | empty | not }} + terminationGracePeriodSeconds: {{ .Values.livenessProbe.terminationGracePeriodSeconds }} + {{- end }} + httpGet: + path: {{ .Values.livenessProbe.httpGet.path }} + port: {{ .Values.livenessProbe.httpGet.port }} + readinessProbe: + {{- if .Values.readinessProbe.initialDelaySeconds | empty | not }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + {{- end }} + {{- if .Values.readinessProbe.periodSeconds | empty | not }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + {{- end }} + {{- if .Values.readinessProbe.timeoutSeconds | empty | not }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + {{- end }} + {{- if .Values.readinessProbe.successThreshold | empty | not }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + {{- end }} + {{- if .Values.readinessProbe.failureThreshold | empty | not }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + httpGet: + path: {{ .Values.readinessProbe.httpGet.path }} + port: {{ .Values.readinessProbe.httpGet.port }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + {{- if .Values.configMap.create }} + - mountPath: /conf + name: {{ include "apm-collector.lowercase_chartname" . }}-configmap + {{- end }} + {{- if .Values.extraVolumeMounts }} + {{- toYaml .Values.extraVolumeMounts | nindent 6 }} + {{- end }} +{{- with .Values.extraContainers }} +{{- toYaml . | nindent 2 }} +{{- end }} +{{- if .Values.initContainers }} +initContainers: + {{- tpl (toYaml .Values.initContainers) . | nindent 2 }} +{{- end }} +{{- if .Values.priorityClassName }} +priorityClassName: {{ .Values.priorityClassName | quote }} +{{- end }} +volumes: + {{- if .Values.configMap.create }} + - name: {{ include "apm-collector.lowercase_chartname" . }}-configmap + configMap: + name: {{ include "apm-collector.fullname" . }} + items: + - key: relay + path: relay.yaml + {{- end }} + {{- if .Values.extraVolumes }} + {{- toYaml .Values.extraVolumes | nindent 2 }} + {{- end }} +{{- with .Values.nodeSelector }} +nodeSelector: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.affinity }} +affinity: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.tolerations }} +tolerations: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.topologySpreadConstraints }} +topologySpreadConstraints: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} diff --git a/charts/logzio-apm-collector/templates/_validations.tpl b/charts/logzio-apm-collector/templates/_validations.tpl new file mode 100644 index 00000000..2ed6e9b3 --- /dev/null +++ b/charts/logzio-apm-collector/templates/_validations.tpl @@ -0,0 +1,21 @@ +{{/* +Verify tracing token was provided if the chart is enabled +*/}} +{{- define "check-tracing-token" -}} + {{- if .Values.enabled }} + {{- if not .Values.global.logzioTracesToken }} + {{- fail "Missing Tracing Token" }} + {{- end }} + {{- end }} +{{- end }} + +{{/* +Verify SPM token was provided if SPM is enabled +*/}} +{{- define "check-spm-token" -}} + {{- if and (.Values.enabled) (.Values.spm.enabled) }} + {{- if not .Values.global.logzioSpmToken }} + {{- fail "Missing SPM Token" }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/logzio-apm-collector/templates/clusterrole.yaml b/charts/logzio-apm-collector/templates/clusterrole.yaml new file mode 100644 index 00000000..bc69fffb --- /dev/null +++ b/charts/logzio-apm-collector/templates/clusterrole.yaml @@ -0,0 +1,32 @@ +{{ if .Values.enabled }} +{{- if (.Values.clusterRole.create) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "apm-collector.clusterRoleName" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- if .Values.clusterRole.annotations }} + annotations: + {{- range $key, $value := .Values.clusterRole.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +rules: + {{- if .Values.clusterRole.rules -}} + {{ toYaml .Values.clusterRole.rules | nindent 2 -}} + {{- end }} + - apiGroups: [""] + resources: ["events", "namespaces", "namespaces/status", "pods", "pods/status", "replicationcontrollers", "replicationcontrollers/status", "resourcequotas", "services", "endpoints"] + verbs: ["get", "watch", "list"] + - apiGroups: ["apps"] + resources: ["deployments", "replicasets"] + verbs: ["get", "watch", "list"] + - apiGroups: ["extensions"] + resources: ["deployments", "replicasets"] + verbs: ["get", "watch", "list"] + - apiGroups: ["autoscaling"] + resources: ["horizontalpodautoscalers"] + verbs: ["get", "watch", "list"] +{{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/clusterrolebinding.yaml b/charts/logzio-apm-collector/templates/clusterrolebinding.yaml new file mode 100644 index 00000000..7d6524ec --- /dev/null +++ b/charts/logzio-apm-collector/templates/clusterrolebinding.yaml @@ -0,0 +1,24 @@ +{{ if .Values.enabled}} +{{- if .Values.clusterRole.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "apm-collector.clusterRoleBindingName" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- if .Values.clusterRole.clusterRoleBinding.annotations }} + annotations: + {{- range $key, $value := .Values.clusterRole.clusterRoleBinding.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "apm-collector.clusterRoleName" . }} +subjects: +- kind: ServiceAccount + name: {{ include "apm-collector.serviceAccountName" . }} + namespace: {{ include "apm-collector.namespace" . }} +{{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/configmap-spm.yaml b/charts/logzio-apm-collector/templates/configmap-spm.yaml new file mode 100644 index 00000000..294593cb --- /dev/null +++ b/charts/logzio-apm-collector/templates/configmap-spm.yaml @@ -0,0 +1,13 @@ +{{ if and (.Values.enabled) (.Values.spm.enabled) }} +{{- if .Values.configMap.create }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "spm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: {{- include "apm-collector.labels" . | nindent 4 }} +data: + relay: | + {{- include "spm-collector.config" . | nindent 4 }} +{{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/configmap.yaml b/charts/logzio-apm-collector/templates/configmap.yaml new file mode 100644 index 00000000..2458f4df --- /dev/null +++ b/charts/logzio-apm-collector/templates/configmap.yaml @@ -0,0 +1,13 @@ +{{ if .Values.enabled }} +{{- if .Values.configMap.create }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "apm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: {{- include "apm-collector.labels" . | nindent 4 }} +data: + relay: | + {{- include "apm-collector.config" . | nindent 4 }} +{{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/deployment-spm.yaml b/charts/logzio-apm-collector/templates/deployment-spm.yaml new file mode 100644 index 00000000..5ff9d823 --- /dev/null +++ b/charts/logzio-apm-collector/templates/deployment-spm.yaml @@ -0,0 +1,40 @@ +{{ if and .Values.enabled .Values.spm.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "spm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- if .Values.annotations }} + annotations: + {{- range $key, $value := .Values.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +spec: + replicas: {{ .Values.standaloneCollector.replicaCount }} + selector: + matchLabels: + {{- include "apm-collector.selectorLabels" . | nindent 6 }} + {{- include "spm-collector.component" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap-spm.yaml") . | sha256sum }} + {{- include "apm-collector.podAnnotations" . | nindent 8 }} + labels: + {{- include "apm-collector.selectorLabels" . | nindent 8 }} + {{- include "apm-collector.podLabels" . | nindent 8 }} + {{- include "spm-collector.component" . | nindent 8 }} + spec: + {{- include "spm-collector.pod" . | nindent 6 }} + hostNetwork: {{ .Values.hostNetwork }} + {{- with .Values.dnsPolicy }} + dnsPolicy: {{ . }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/deployment.yaml b/charts/logzio-apm-collector/templates/deployment.yaml new file mode 100644 index 00000000..13e3a93f --- /dev/null +++ b/charts/logzio-apm-collector/templates/deployment.yaml @@ -0,0 +1,42 @@ +{{ if .Values.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "apm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- if .Values.annotations }} + annotations: + {{- range $key, $value := .Values.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.standaloneCollector.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "apm-collector.selectorLabels" . | nindent 6 }} + {{- include "apm-collector.component" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- include "apm-collector.podAnnotations" . | nindent 8 }} + labels: + {{- include "apm-collector.selectorLabels" . | nindent 8 }} + {{- include "apm-collector.podLabels" . | nindent 8 }} + {{- include "apm-collector.component" . | nindent 8 }} + spec: + {{- include "apm-collector.pod" . | nindent 6}} + hostNetwork: {{ .Values.hostNetwork }} + {{- with .Values.dnsPolicy }} + dnsPolicy: {{ . }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/hpa.yaml b/charts/logzio-apm-collector/templates/hpa.yaml new file mode 100644 index 00000000..11ad637a --- /dev/null +++ b/charts/logzio-apm-collector/templates/hpa.yaml @@ -0,0 +1,39 @@ +{{- if and .Values.enabled .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "apm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- if .Values.autoscaling.annotations }} + annotations: + {{- range $key, $value := .Values.autoscaling.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "apm-collector.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/logzio-apm-collector/templates/secret.yaml b/charts/logzio-apm-collector/templates/secret.yaml new file mode 100644 index 00000000..f990ccfa --- /dev/null +++ b/charts/logzio-apm-collector/templates/secret.yaml @@ -0,0 +1,23 @@ +{{ if .Values.enabled}} +{{- if .Values.secret.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.secret.name }} + namespace: {{ .Release.Namespace }} +type: Opaque +stringData: + env-id: {{ .Values.global.env_id | quote}} + logzio-listener-region: {{ .Values.global.logzioRegion | quote}} + {{- template "check-tracing-token" . }} + logzio-traces-token: {{ .Values.global.logzioTracesToken }} + {{- template "check-spm-token" . }} + logzio-spm-token: {{ .Values.global.logzioSpmToken }} + {{- if .Values.global.customTracesEndpoint }} + custom-traces-endpoint: {{ .Values.global.customTracesEndpoint }} + {{- end }} + {{- if .Values.global.customSpmEndpoint}} + custom-spm-endpoint: {{ .Values.global.customSpmEndpoint }} + {{- end }} +{{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/service-spm.yaml b/charts/logzio-apm-collector/templates/service-spm.yaml new file mode 100644 index 00000000..61371d3f --- /dev/null +++ b/charts/logzio-apm-collector/templates/service-spm.yaml @@ -0,0 +1,42 @@ +{{ if and .Values.enabled .Values.spm.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "spm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- include "spm-collector.component" . | nindent 4 }} + {{- if .Values.spmService.annotations }} + annotations: + {{- range $key, $value := .Values.spmService.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +spec: + type: {{ .Values.spmService.type }} + {{- if .Values.spmService.clusterIP }} + clusterIP: {{ .Values.spmService.clusterIP }} + {{- end }} + {{- if and .Values.spmService.loadBalancerIP (eq .Values.spmService.type "LoadBalancer") }} + loadBalancerIP: {{ .Values.spmService.loadBalancerIP }} + {{- end }} + {{- if and .Values.spmService.loadBalancerSourceRanges (eq .Values.spmService.type "LoadBalancer") }} + loadBalancerSourceRanges: + {{- range .Values.spmService.loadBalancerSourceRanges }} + - {{ . }} + {{- end }} + {{- end }} + {{- $ports := include "apm-collector.servicePortsConfig" . }} + {{- if $ports }} + ports: + {{- $ports | nindent 4}} + {{- end }} + selector: + {{- include "apm-collector.selectorLabels" . | nindent 4 }} + {{- include "spm-collector.component" . | nindent 4 }} + internalTrafficPolicy: {{ .Values.spmService.internalTrafficPolicy | default "Cluster" }} + {{- if and (eq .Values.spmService.type "LoadBalancer") (.Values.spmService.externalTrafficPolicy) }} + externalTrafficPolicy: {{ .Values.spmService.externalTrafficPolicy | default "Cluster" }} + {{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/service.yaml b/charts/logzio-apm-collector/templates/service.yaml new file mode 100644 index 00000000..e23087f9 --- /dev/null +++ b/charts/logzio-apm-collector/templates/service.yaml @@ -0,0 +1,45 @@ +# A basic manifest for creating a service endpoint for your deployment +{{ if .Values.enabled }} +{{- if .Values.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "apm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- include "apm-collector.component" . | nindent 4 }} + {{- if .Values.service.annotations }} + annotations: + {{- range $key, $value := .Values.service.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +spec: + type: {{ .Values.service.type }} + {{- if .Values.service.clusterIP }} + clusterIP: {{ .Values.service.clusterIP }} + {{- end }} + {{- if and .Values.service.loadBalancerIP (eq .Values.service.type "LoadBalancer") }} + loadBalancerIP: {{ .Values.service.loadBalancerIP }} + {{- end }} + {{- if and .Values.service.loadBalancerSourceRanges (eq .Values.service.type "LoadBalancer") }} + loadBalancerSourceRanges: + {{- range .Values.service.loadBalancerSourceRanges }} + - {{ . }} + {{- end }} + {{- end }} + {{- $ports := include "apm-collector.servicePortsConfig" . }} + {{- if $ports }} + ports: + {{- $ports | nindent 4}} + {{- end }} + selector: + {{- include "apm-collector.selectorLabels" . | nindent 4 }} + {{- include "apm-collector.component" . | nindent 4 }} + internalTrafficPolicy: {{ .Values.service.internalTrafficPolicy | default "Cluster" }} + {{- if and (eq .Values.service.type "LoadBalancer") (.Values.service.externalTrafficPolicy) }} + externalTrafficPolicy: {{ .Values.service.externalTrafficPolicy | default "Cluster" }} + {{- end }} +{{- end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/serviceaccount.yaml b/charts/logzio-apm-collector/templates/serviceaccount.yaml new file mode 100644 index 00000000..7ed2fd43 --- /dev/null +++ b/charts/logzio-apm-collector/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{ if .Values.enabled}} +{{- if (.Values.serviceAccount.create) -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "apm-collector.serviceAccountName" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- if .Values.serviceAccount.annotations }} + annotations: + {{- range $key, $value := .Values.serviceAccount.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +{{ end }} +{{ end }} diff --git a/charts/logzio-apm-collector/templates/vpa-spm.yaml b/charts/logzio-apm-collector/templates/vpa-spm.yaml new file mode 100644 index 00000000..ffb0c367 --- /dev/null +++ b/charts/logzio-apm-collector/templates/vpa-spm.yaml @@ -0,0 +1,36 @@ +{{- if and .Values.spm.enabled .Values.spmAutoscaling.enabled }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ include "spm-collector.fullname" . }} + namespace: {{ template "apm-collector.namespace" . }} + labels: + {{- include "apm-collector.labels" . | nindent 4 }} + {{- if .Values.spmAutoscaling.annotations }} + annotations: + {{- range $key, $value := .Values.spmAutoscaling.annotations }} + {{- printf "%s: %s" $key (tpl $value $ | quote) | nindent 4 }} + {{- end }} + {{- end }} +spec: + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "spm-collector.fullname" . }} + updatePolicy: + updateMode: "Auto" + resourcePolicy: + containerPolicies: + - containerName: {{ include "apm-collector.lowercase_chartname" . }}-spm + {{- with .Values.spmAutoscaling.minAllowed }} + minAllowed: + cpu: {{ .cpu }} + memory: {{ .memory }} + {{- end }} + {{- with .Values.spmAutoscaling.maxAllowed }} + maxAllowed: + cpu: {{ .cpu }} + memory: {{ .memory }} + {{- end }} + controlledResources: ["cpu", "memory"] +{{- end }} diff --git a/charts/logzio-apm-collector/values.yaml b/charts/logzio-apm-collector/values.yaml new file mode 100644 index 00000000..a4ad987e --- /dev/null +++ b/charts/logzio-apm-collector/values.yaml @@ -0,0 +1,662 @@ +# Default values for logzio-apm-collector. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Control the deployment of this chart by a parent chart +enabled: false + +# Enable Span metrics +spm: + enabled: false + +# Enable Service Graph metrics +serviceGraph: + enabled: false + +# Specifies a custom name for the chart's resources +nameOverride: "" +fullnameOverride: "" +namespaceOverride: "" + +####################################################################################################################### +# Base Configuration Parameters +####################################################################################################################### +global: + # environment identifier attribute that will be added to all telemetry + env_id: "my_env" + # Logz.io Tracing Shipping Token + logzioTracesToken: "" + # Logz.io SPM Shipping Token + logzioSpmToken: "" + # Logz.io region code + logzioRegion: "us" + # Optional - Overrides global.LogzioRegion listener address with a custom endpoint. For example: http://endpoint:8080 + customTracesEndpoint: "" + customSpmEndpoint: "" + +# Allows changing the OpenTelemetry Collector log level +otelLogLevel: "info" + +# Number of collector replicas +standaloneCollector: + replicaCount: 1 + +####################################################################################################################### +# OpenTelemetry Collector Configuration +####################################################################################################################### + +## Trace sampling default rules configuration. +## These settings do not affect the traces used for calculating SPM (span metrics). +# SamplingProbability: 10 # Traces Sampling Probability +# SamplingLatency: 500 # Traces Sampling Latency + +# Tracing Collector configuration +traceConfig: + exporters: + logzio: + endpoint: ${CUSTOM_TRACES_ENDPOINT} + region: ${LOGZIO_REGION} + account_token: ${LOGZIO_TRACES_TOKEN} + headers: + user-agent: "{{ .Chart.Name }}-{{ .Chart.Version }}-helm" + extensions: + pprof: + endpoint: :1777 + zpages: + endpoint: :55679 + health_check: + endpoint: :13133 + receivers: + jaeger: + protocols: + thrift_compact: + endpoint: "0.0.0.0:6831" + thrift_binary: + endpoint: "0.0.0.0:6832" + grpc: + endpoint: "0.0.0.0:14250" + thrift_http: + endpoint: "0.0.0.0:14268" + otlp: + protocols: + grpc: + endpoint: "0.0.0.0:4317" + http: + endpoint: "0.0.0.0:4318" + zipkin: + endpoint: "0.0.0.0:9411" + processors: + resourcedetection/all: + detectors: [ec2, azure, gcp] + tail_sampling: + policies: + [ + { + name: error-in-policy, + type: status_code, + status_code: {status_codes: [ERROR]} + }, + { + name: slow-traces-policy, + type: latency, + latency: {threshold_ms: "${SAMPLING_LATENCY}" } + }, + { + name: probability-policy, + type: probabilistic, + probabilistic: {sampling_percentage: "${SAMPLING_PROBABILITY}" } + } + ] + k8sattributes: + extract: + metadata: + - k8s.pod.name + - k8s.deployment.name + - k8s.namespace.name + - k8s.node.name + - k8s.statefulset.name + - k8s.replicaset.name + - k8s.daemonset.name + - k8s.cronjob.name + - k8s.job.name + resource/k8s: + attributes: + # Rename fields + - key: pod + action: insert + from_attribute: k8s.pod.name + - key: kubernetes_node + action: insert + from_attribute: k8s.node.name + - key: kubernetes_namespace + action: insert + from_attribute: k8s.namespace.name + - key: kubernetes_deployment + action: insert + from_attribute: k8s.deployment.name + - key: kubernetes_pod_ip + action: insert + from_attribute: k8s.pod.ip + - key: kubernetes_statefulset + action: insert + from_attribute: k8s.statefulset.name + - key: kubernetes_replicaset + action: insert + from_attribute: k8s.replicaset.name + - key: kubernetes_cronjob + action: insert + from_attribute: k8s.cronjob.name + - key: kubernetes_daemonset + action: insert + from_attribute: k8s.daemonset.name + - key: kubernetes_job + action: insert + from_attribute: k8s.job.name + # Delete old + - key: k8s.deployment.name + action: delete + - key: k8s.pod.name + action: delete + - key: k8s.namespace.name + action: delete + - key: k8s.node.name + action: delete + - key: k8s.pod.ip + action: delete + - key: k8s.statefulset.name + action: delete + - key: k8s.replicaset.name + action: delete + - key: k8s.daemonset.name + action: delete + - key: k8s.job.name + action: delete + - key: k8s.cronjob.name + action: delete + attributes/env_id: + # Add env_id to all spans + actions: + - key: env_id + value: ${ENV_ID} + action: insert + batch: {} + service: + extensions: [health_check, pprof, zpages] + pipelines: + traces: + receivers: [jaeger, zipkin, otlp] + processors: [resourcedetection/all,attributes/env_id, k8sattributes, resource/k8s, tail_sampling, batch] + exporters: [logzio] + telemetry: + logs: + level: ${LOG_LEVEL} + +# Exporter from Traces Collector to SPM Collector +spmForwarderConfig: + exporters: + otlp: + endpoint: "${SPM_SERVICE_ENDPOINT}" + tls: + insecure: true + service: + pipelines: + traces/spm: + receivers: [jaeger, zipkin, otlp] + processors: [resourcedetection/all, attributes/env_id, k8sattributes, batch] + exporters: [otlp] + +# SPM Collector configuration +spmConfig: + exporters: + prometheusremotewrite/spm-logzio: + endpoint: ${SPM_ENDPOINT} + headers: + Authorization: Bearer ${LOGZIO_SPM_TOKEN} + user-agent: "{{ .Chart.Name }}-{{ .Chart.Version }}-helm" + timeout: 30s # Time to wait per attempt to send data + add_metric_suffixes: false + extensions: + health_check: + endpoint: :13133 + receivers: + otlp: + protocols: + grpc: + endpoint: "0.0.0.0:4317" + processors: + batch: {} + metricstransform/metrics-rename: + transforms: + # rename metric duration.XXX >> latency.XXX + - include: ^duration(.*)$$ + action: update + match_type: regexp + new_name: latency.$${1} + # rename metric calls >> calls_total + - action: update + include: calls + new_name: calls_total + # manually add 'seconds' unit to the relevant Service Graph metric names + - include: ^(traces_service_graph_request_(server|client).*)$$ + action: update + match_type: regexp + new_name: $${1}_seconds + metricstransform/labels-rename: + transforms: + # for metrics matching `latencyXXX` or `callsXXX` + # rename label span.name >> operation + - action: update + include: ^(latency|calls) + match_type: regexp + operations: + - action: update_label + label: span.name + new_label: operation + connectors: + spanmetrics: + aggregation_temporality: AGGREGATION_TEMPORALITY_CUMULATIVE + dimensions: + - name: rpc.grpc.status_code + - name: http.method + - name: http.status_code + - name: k8s.pod.name + - name: k8s.deployment.name + - name: k8s.namespace.name + - name: k8s.node.name + - name: k8s.statefulset.name + - name: k8s.replicaset.name + - name: k8s.daemonset.name + - name: k8s.cronjob.name + - name: k8s.job.name + - name: cloud.provider + - name: cloud.region + - name: db.system + - name: messaging.system + - default: ${ENV_ID} + name: env_id + dimensions_cache_size: 100000 + histogram: + explicit: + buckets: + - 2ms + - 8ms + - 50ms + - 100ms + - 200ms + - 500ms + - 1s + - 5s + - 10s + metrics_expiration: 5m + resource_metrics_key_attributes: + - service.name + - telemetry.sdk.language + - telemetry.sdk.name + service: + extensions: [health_check] + pipelines: + traces: + receivers: [otlp] + exporters: [spanmetrics] + metrics/spm-logzio: + receivers: [spanmetrics] + processors: [metricstransform/metrics-rename, metricstransform/labels-rename, batch] + exporters: [prometheusremotewrite/spm-logzio] + telemetry: + logs: + level: ${LOG_LEVEL} + +# Service Graph configuration +serviceGraphConfig: + connectors: + servicegraph: + latency_histogram_buckets: [2ms, 8ms, 50ms, 100ms, 200ms, 500ms, 1s, 5s, 10s] + dimensions: + - env_id + store: + ttl: 5s + max_items: 100000 + metrics_flush_interval: 60s + service: + pipelines: + traces: + exporters: [servicegraph] + metrics/spm-logzio: + receivers: [servicegraph] + +####################################################################################################################### +# OpenTelemetry Collector Image Settings +####################################################################################################################### +image: + # If you want to use the core image `otel/opentelemetry-collector`, you also need to change `command.name` value to `otelcol`. + repository: otel/opentelemetry-collector-contrib + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value). + digest: "" + +imagePullSecrets: [] + +# OpenTelemetry Collector executable +command: + name: otelcol-contrib + extraArgs: + - --feature-gates=connector.spanmetrics.legacyMetricNames # rename traces_span_metrics_xxx >> xxx + +####################################################################################################################### +# Kubernetes Resources Configuration +####################################################################################################################### +secret: + # When secret.enabled is true, the logzio secret will be created and managed by this Chart. + # If you're managing the logzio secrets by yourself, set to false. + # Note that in order for the default configuration to work properly, you need to: + # 1. Update secrets.name to your custom secret name. + # 2. Include these keys in your secret: env-id, logzio-listener-region, logzio-traces-token, logzio-spm-token. + # To use a custom endpoint, include custom-traces-endpoint, custom-spm-endpoint or both parameters in your secret, + # depending on your needs and set global.customTracesEndpoint and/or global.customSpmEndpoint to `true`. + enabled: true + name: logzio-apm-collector-secret + +configMap: + # Specifies whether a configMap should be created + create: true + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +clusterRole: + # Specifies whether a clusterRole should be created + # Some presets also trigger the creation of a cluster role and cluster role binding. + # If using one of those presets, this field is no-op. + create: true + # Annotations to add to the clusterRole + # Can be used in combination with presets that create a cluster role. + annotations: {} + # The name of the clusterRole to use. + # If not set a name is generated using the fullname template + # Can be used in combination with presets that create a cluster role. + name: "" + # A set of rules as documented here : https://kubernetes.io/docs/reference/access-authn-authz/rbac/ + # Can be used in combination with presets that create a cluster role to add additional rules. + rules: [] + # - apiGroups: + # - '' + # resources: + # - 'pods' + # - 'nodes' + # verbs: + # - 'get' + # - 'list' + # - 'watch' + clusterRoleBinding: + # Annotations to add to the clusterRoleBinding + # Can be used in combination with presets that create a cluster role binding. + annotations: {} + # The name of the clusterRoleBinding to use. + # If not set a name is generated using the fullname template + # Can be used in combination with presets that create a cluster role binding. + name: "" + +service: + # Enable the creation of a Traces Collector Service. + enabled: true + + type: ClusterIP + # type: LoadBalancer + # loadBalancerIP: 1.2.3.4 + # loadBalancerSourceRanges: [] + + # Annotations to add to the Service. + annotations: {} + + ## By default, Service will be created setting 'internalTrafficPolicy: Cluster' + ## unless other value is explicitly set. + ## Setting 'internalTrafficPolicy: Cluster' on a daemonset is not recommended (in such case, use 'internalTrafficPolicy: Local') + # internalTrafficPolicy: Cluster + + ## By default, Service of type 'LoadBalancer' will be created setting 'externalTrafficPolicy: Cluster' + ## unless other value is explicitly set. + ## Possible values are Cluster or Local (https://kubernetes.io/docs/tasks/access-application-cluster/create-external-load-balancer/#preserving-the-client-source-ip) + # externalTrafficPolicy: Cluster + +spmService: + # Only generated if spm.enabled is set to true. + type: ClusterIP + + # Annotations to add to the Service. + annotations: {} + + ## By default, Service will be created setting 'internalTrafficPolicy: Cluster' + ## unless other value is explicitly set. + ## Setting 'internalTrafficPolicy: Cluster' on a daemonset is not recommended (in such case, use 'internalTrafficPolicy: Local') + # internalTrafficPolicy: Cluster + + ## By default, Service of type 'LoadBalancer' will be created setting 'externalTrafficPolicy: Cluster' + ## unless other value is explicitly set. + ## Possible values are Cluster or Local (https://kubernetes.io/docs/tasks/access-application-cluster/create-external-load-balancer/#preserving-the-client-source-ip) + # externalTrafficPolicy: Cluster + +# Configure HPA for Traces Collector. +# Make sure that the `service.type` is `ClusterIP` to utilize K8S ability to automatically distribute traffic across all pod replicas +autoscaling: + # Enable the creation of HPA for autoscaling. + enabled: false + # Annotations to add to the HPA. + annotations: {} + # Control autoscaling scale + minReplicas: 1 + maxReplicas: 10 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Configure VPA for SPM Collector. +# Vertical scaling is used instead of horizontal scaling to ensure the accuracy of SPM aggregations. +# Note: This feature requires the VPA Custom Resource Definitions (CRDs) to be installed. +# Installation guide: https://github.com/kubernetes/autoscaler/blob/master/vertical-pod-autoscaler/docs/installation.md +spmAutoscaling: + # Enable the vertical scaling + enabled: false + # Annotations to add to the HPA. + annotations: {} + # Control scaling limits + minAllowed: + cpu: 50m + memory: 70Mi + maxAllowed: + cpu: 150m + memory: 250Mi + +# Configuration for ports +ports: + otlp: + enabled: true + containerPort: 4317 + servicePort: 4317 + hostPort: 4317 + protocol: TCP + otlp-http: + enabled: true + containerPort: 4318 + servicePort: 4318 + hostPort: 4318 + protocol: TCP + jaeger-compact: + enabled: true + containerPort: 6831 + servicePort: 6831 + hostPort: 6831 + protocol: UDP + jaeger-thrift: + enabled: true + containerPort: 14268 + servicePort: 14268 + hostPort: 14268 + protocol: TCP + jaeger-grpc: + enabled: true + containerPort: 14250 + servicePort: 14250 + hostPort: 14250 + protocol: TCP + zipkin: + enabled: true + containerPort: 9411 + servicePort: 9411 + hostPort: 9411 + protocol: TCP + +# Common labels to add to all otel-collector resources. Evaluated as a template. +additionalLabels: {} +# app.kubernetes.io/part-of: my-app + +####################################################################################################################### +# Pod Configuration +####################################################################################################################### +podSecurityContext: {} +securityContext: {} + +nodeSelector: {} +tolerations: [] +# Set affinity rules for the scheduler to determine where all DaemonSet pods can be placed. +# The following configuration prevent logzio APM collector DaemonSet deployment on fargate nodes +# DaemonSet mode is not used in the current APM chart, this configuration is retained for potential future support. +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/compute-type + operator: DoesNotExist +topologySpreadConstraints: [] + +# Allows for pod scheduler prioritisation +priorityClassName: "" + +extraEnvs: [] +extraEnvsFrom: [] +extraVolumes: [] +extraVolumeMounts: [] + +# When enabled, the chart will set the GOMEMLIMIT env var to 80% of the configured 'resources.limits.memory' +# If no 'resources.limits.memory' are defined, enabling does nothing. +# In a future release this setting will be enabled by default. +# For more details see https://github.com/open-telemetry/opentelemetry-helm-charts/issues/891 +useGOMEMLIMIT: false + +# Resource allocation. +resources: + # guaranteed resource allocation + requests: + cpu: 50m + memory: 70Mi + # upper bound the container can consume + # must be configured if you enable useGOMEMLIMIT + limits: + cpu: 250m + memory: 512Mi + +podAnnotations: {} +podLabels: {} + +# Adding entries to Pod /etc/hosts with HostAliases +# https://kubernetes.io/docs/tasks/network/customize-hosts-file-for-pods/ +hostAliases: [] + # - ip: "1.2.3.4" + # hostnames: + # - "my.host.com" + +# Pod DNS policy ClusterFirst, ClusterFirstWithHostNet, None, Default +dnsPolicy: "" + +# Custom DNS config. Required when DNS policy is None. +dnsConfig: {} + +annotations: {} + +# List of extra sidecars to add +extraContainers: [] +# extraContainers: +# - name: test +# command: +# - cp +# args: +# - /bin/sleep +# - /test/sleep +# image: busybox:latest +# volumeMounts: +# - name: test +# mountPath: /test + +# List of init container specs, e.g. for copying a binary to be executed as a lifecycle hook. +# Another usage of init containers is e.g. initializing filesystem permissions to the OTLP Collector user `10001` in case you are using persistence and the volume is producing a permission denied error for the OTLP Collector container. +initContainers: [] +# initContainers: +# - name: test +# image: busybox:latest +# command: +# - cp +# args: +# - /bin/sleep +# - /test/sleep +# volumeMounts: +# - name: test +# mountPath: /test +# - name: init-fs +# image: busybox:latest +# command: +# - sh +# - '-c' +# - 'chown -R 10001: /var/lib/storage/otc' # use the path given as per `extensions.file_storage.directory` & `extraVolumeMounts[x].mountPath` +# volumeMounts: +# - name: opentelemetry-collector-data # use the name of the volume used for persistence +# mountPath: /var/lib/storage/otc # use the path given as per `extensions.file_storage.directory` & `extraVolumeMounts[x].mountPath` + +# Pod lifecycle policies. +lifecycleHooks: {} +# lifecycleHooks: +# preStop: +# exec: +# command: +# - /test/sleep +# - "5" + +# liveness probe configuration +# Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +livenessProbe: + ## Number of seconds after the container has started before startup, liveness or readiness probes are initiated. + # initialDelaySeconds: 1 + ## How often in seconds to perform the probe. + # periodSeconds: 10 + ## Number of seconds after which the probe times out. + # timeoutSeconds: 1 + ## Minimum consecutive failures for the probe to be considered failed after having succeeded. + # failureThreshold: 1 + ## Duration in seconds the pod needs to terminate gracefully upon probe failure. + # terminationGracePeriodSeconds: 10 + httpGet: + port: 13133 + path: / + +# readiness probe configuration +# Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +readinessProbe: + ## Number of seconds after the container has started before startup, liveness or readiness probes are initiated. + # initialDelaySeconds: 1 + ## How often (in seconds) to perform the probe. + # periodSeconds: 10 + ## Number of seconds after which the probe times out. + # timeoutSeconds: 1 + ## Minimum consecutive successes for the probe to be considered successful after having failed. + # successThreshold: 1 + ## Minimum consecutive failures for the probe to be considered failed after having succeeded. + # failureThreshold: 1 + httpGet: + port: 13133 + path: / diff --git a/tests/apm_metrics_e2e_test.go b/tests/apm_metrics_e2e_test.go new file mode 100644 index 00000000..8ba1b8d9 --- /dev/null +++ b/tests/apm_metrics_e2e_test.go @@ -0,0 +1,153 @@ +package tests + +import ( + "encoding/json" + "fmt" + "go.uber.org/zap" + "io" + "net/http" + "os" + "strings" + "testing" +) + +// MetricResponse represents the structure of the API response +type MetricResponse struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric map[string]string `json:"metric"` + Value []interface{} `json:"value"` + } `json:"result"` + } `json:"data"` +} + +func TestSpmMetricsApm(t *testing.T) { + requiredMetrics := map[string][]string{ + "calls_total": {"k8s_node_name", "k8s_namespace_name", "k8s_pod_name", "span_kind", "operation"}, + "latency_sum": {"k8s_node_name", "k8s_namespace_name", "k8s_pod_name", "span_kind", "operation"}, + "latency_count": {"k8s_node_name", "k8s_namespace_name", "k8s_pod_name", "span_kind", "operation"}, + "latency_bucket": {"k8s_node_name", "k8s_namespace_name", "k8s_pod_name", "span_kind", "operation"}, + } + envId := os.Getenv("ENV_ID") + query := fmt.Sprintf(`{env_id='%s'}`, envId) + testMetrics(t, requiredMetrics, query) +} + +func TestServiceGraphMetricsApm(t *testing.T) { + requiredMetrics := map[string][]string{ + "traces_service_graph_request_total": {"client", "server"}, + "traces_service_graph_request_failed_total": {"client", "server"}, + "traces_service_graph_request_server_seconds_bucket": {"client", "server"}, + "traces_service_graph_request_server_seconds_count": {"client", "server"}, + "traces_service_graph_request_server_seconds_sum": {"client", "server"}, + "traces_service_graph_request_client_seconds_bucket": {"client", "server"}, + "traces_service_graph_request_client_seconds_count": {"client", "server"}, + "traces_service_graph_request_client_seconds_sum": {"client", "server"}, + } + envId := os.Getenv("ENV_ID") + query := fmt.Sprintf(`{client_env_id='%s'}`, envId) + testMetrics(t, requiredMetrics, query) +} + +func testMetrics(t *testing.T, requiredMetrics map[string][]string, query string) { + metricsApiKey := os.Getenv("LOGZIO_METRICS_API_KEY") + if metricsApiKey == "" { + t.Fatalf("LOGZIO_METRICS_API_KEY environment variable not set") + } + + metricResponse, err := fetchMetrics(metricsApiKey, query) + if err != nil { + t.Fatalf("Failed to fetch metrics: %v", err) + } + + if metricResponse.Status != "success" { + t.Errorf("No metrics found") + } + logger.Info("Found metrics", zap.Int("metrics_count", len(metricResponse.Data.Result))) + // Verify required metrics + missingMetrics := verifyMetrics(metricResponse, requiredMetrics) + if len(missingMetrics) > 0 { + var sb strings.Builder + for _, metric := range missingMetrics { + sb.WriteString(metric + "\n") + } + t.Errorf("Missing metrics or labels:\n%s", sb.String()) + } +} + +// fetchMetrics fetches the metrics from the logz.io API +func fetchMetrics(metricsApiKey string, query string) (*MetricResponse, error) { + url := fmt.Sprintf("%s/metrics/prometheus/api/v1/query?query=%s", BaseLogzioApiUrl, query) + client := &http.Client{} + logger.Info("sending api request", zap.String("url", url)) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + req.Header.Set("Accept", "application/json") + req.Header.Set("X-API-TOKEN", metricsApiKey) + + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var metricResponse MetricResponse + err = json.Unmarshal(body, &metricResponse) + if err != nil { + return nil, err + } + + return &metricResponse, nil +} + +// verifyMetrics checks if the required metrics and their labels are present in the response +func verifyMetrics(metricResponse *MetricResponse, requiredMetrics map[string][]string) []string { + missingMetrics := []string{} + + for metricName, requiredLabels := range requiredMetrics { + found := false + for _, result := range metricResponse.Data.Result { + if result.Metric["__name__"] == metricName { + found = true + for _, label := range requiredLabels { + if _, exists := result.Metric[label]; !exists { + missingMetrics = append(missingMetrics, fmt.Sprintf("%s (missing label: %s)", metricName, label)) + } + } + } + } + if !found { + missingMetrics = append(missingMetrics, metricName+" (not found)") + } + } + return deduplicate(missingMetrics) +} + +// deduplicate removes duplicate strings from the input array. +func deduplicate(data []string) []string { + uniqueMap := make(map[string]bool) + var uniqueList []string + + for _, item := range data { + trimmedItem := strings.TrimSpace(item) + if _, exists := uniqueMap[trimmedItem]; !exists { + uniqueMap[trimmedItem] = true + uniqueList = append(uniqueList, trimmedItem) + } + } + + return uniqueList +} diff --git a/tests/resources/otel-demo-apm.yaml b/tests/resources/otel-demo-apm.yaml new file mode 100644 index 00000000..2a1e55da --- /dev/null +++ b/tests/resources/otel-demo-apm.yaml @@ -0,0 +1,16 @@ +default: + envOverrides: + - name: OTEL_COLLECTOR_NAME + value: logzio-apm-collector.default.svc.cluster.local + +opentelemetry-collector: + enabled: false + +jaeger: + enabled: false + +prometheus: + enabled: false + +grafana: + enabled: false diff --git a/tests/resources/tracegen-apm.yaml b/tests/resources/tracegen-apm.yaml new file mode 100644 index 00000000..fdf12d55 --- /dev/null +++ b/tests/resources/tracegen-apm.yaml @@ -0,0 +1,24 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: trace-gen +spec: + selector: + matchLabels: + app: trace-gen + template: + metadata: + labels: + app: trace-gen + spec: + containers: + - name: trace-gen + image: ghcr.io/frzifus/jaeger-otel-test:latest + args: + [ + '-otel.agent.host=logzio-apm-collector', + '-otel.agent.port=4317', + ] + env: + - name: OTEL_SERVICE_NAME + value: 'local-test-service' \ No newline at end of file