Skip to content

Commit

Permalink
feat: add tail sampling (#361)
Browse files Browse the repository at this point in the history
  • Loading branch information
povilasv authored Feb 7, 2024
1 parent 2d06775 commit 669d37b
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 4 deletions.
4 changes: 4 additions & 0 deletions otel-integration/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## OpenTelemtry-Integration

### v0.0.52 / 2024-02-05

- [FEAT] Optionally allow users to use tail sampling for traces.

### v0.0.51 / 2024-02-05

- [FIX] Fix Target allocator endpoint slices permission issue.
Expand Down
13 changes: 9 additions & 4 deletions otel-integration/k8s-helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v2
name: otel-integration
description: OpenTelemetry Integration
version: 0.0.51
version: 0.0.52
keywords:
- OpenTelemetry Collector
- OpenTelemetry Agent
Expand All @@ -11,19 +11,24 @@ keywords:
dependencies:
- name: opentelemetry-collector
alias: opentelemetry-agent
version: "0.79.3"
version: "0.79.4"
repository: https://cgx.jfrog.io/artifactory/coralogix-charts-virtual
condition: opentelemetry-agent.enabled
- name: opentelemetry-collector
alias: opentelemetry-agent-windows
version: "0.79.3"
version: "0.79.4"
repository: https://cgx.jfrog.io/artifactory/coralogix-charts-virtual
condition: opentelemetry-agent-windows.enabled
- name: opentelemetry-collector
alias: opentelemetry-cluster-collector
version: "0.79.3"
version: "0.79.4"
repository: https://cgx.jfrog.io/artifactory/coralogix-charts-virtual
condition: opentelemetry-cluster-collector.enabled
- name: opentelemetry-collector
alias: opentelemetry-gateway
version: "0.79.4"
repository: https://cgx.jfrog.io/artifactory/coralogix-charts-virtual
condition: opentelemetry-gateway.enabled
sources:
- https://github.com/coralogix/opentelemetry-helm-charts/tree/main/charts/opentelemetry-collector
maintainers:
Expand Down
13 changes: 13 additions & 0 deletions otel-integration/k8s-helm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,19 @@ helm upgrade --install otel-coralogix-integration coralogix-charts-virtual/otel-
--render-subchart-notes -f values-crd-override.yaml --set global.clusterName=<cluster_name> --set global.domain=<domain>
```

### Enabling Tail Sampling

If you want to use [Tail Sampling](https://opentelemetry.io/docs/concepts/sampling/#tail-sampling) to reduce the amount of traces using [tail sampling processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor) you can install `otel-integration` using `tail-sampling-values.yaml` values. For example:

```bash
helm repo add coralogix-charts-virtual https://cgx.jfrog.io/artifactory/coralogix-charts-virtual

helm upgrade --install otel-coralogix-integration coralogix-charts-virtual/otel-integration \
--render-subchart-notes -f tail-sampling-values.yaml
```

This change will configure otel-agent pods to send span data to coralogix-opentelemetry-gateway deployment using [loadbalancing exporter](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/loadbalancingexporter). Make sure to configure enough replicas and resource requests and limits to handle the load. Next, you will need to configure [tail sampling processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor) policies with your custom tail sampling policies.

### Enabling scraping of Prometheus custom resources (`ServiceMonitor` and `PodMonitor`)

If you're leveraging the Prometheus Operator custom resources (`ServiceMonitor` and `PodMonitor`) and you would like to keep using them with the OpenTelemetry collector, you can enable the scraping of these resources by a special, optional component called target allocator. This feature is disabled by default and can be enabled by setting the `opentelemetry-agent.targetAllocator.enabled` value to `true` in the `values.yaml` file.
Expand Down
63 changes: 63 additions & 0 deletions otel-integration/k8s-helm/tail-sampling-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
global:
domain: ""
clusterName: ""
defaultApplicationName: "otel"
defaultSubsystemName: "integration"
logLevel: "warn"
collectionInterval: "30s"

opentelemetry-agent:
enabled: true
mode: daemonset
presets:
loadBalancing:
enabled: true
routingKey: "traceID"
hostname: coralogix-opentelemetry-gateway

config:
service:
pipelines:
traces:
exporters:
- loadbalancing

opentelemetry-gateway:
enabled: true
# For production use-cases please increase replicas
# and resource requests and limits
replicaCount: 3
# resources:
# requests:
# cpu: 0.5
# memory: 256Mi
# limits:
# cpu: 2
# memory: 2G

config:
processors:
tail_sampling:
# Update configuration here, with your tail sampling policies
# Docs: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor
decision_wait: 10s
num_traces: 100
expected_new_traces_per_sec: 10
policies:
[
{
name: errors-policy,
type: status_code,
status_code: {status_codes: [ERROR]}
},
{
name: randomized-policy,
type: probabilistic,
probabilistic: {sampling_percentage: 10}
},
]

opentelemetry-cluster-collector:
enabled: true
opentelemetry-agent-windows:
enabled: false
116 changes: 116 additions & 0 deletions otel-integration/k8s-helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -545,3 +545,119 @@ opentelemetry-cluster-collector:

opentelemetry-agent-windows:
enabled: false
opentelemetry-gateway:
enabled: false
mode: deployment
fullnameOverride: coralogix-opentelemetry-gateway
service:
enabled: true
clusterIP: "None"
extraEnvs:
- name: CORALOGIX_PRIVATE_KEY
valueFrom:
secretKeyRef:
name: coralogix-keys
key: PRIVATE_KEY

config:
extensions:
zpages:
endpoint: localhost:55679
pprof:
endpoint: localhost:1777
exporters:
coralogix:
timeout: "30s"
private_key: "${CORALOGIX_PRIVATE_KEY}"
domain: "{{ .Values.global.domain }}"
application_name: "{{ .Values.global.defaultApplicationName }}"
subsystem_name: "{{ .Values.global.defaultSubsystemName }}"
application_name_attributes:
- "k8s.namespace.name"
- "service.namespace"
subsystem_name_attributes:
- "k8s.deployment.name"
- "k8s.statefulset.name"
- "k8s.daemonset.name"
- "k8s.cronjob.name"
- "service.name"
processors:
tail_sampling:
# Update configuration here, with your tail sampling policies
# Docs: https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/tailsamplingprocessor
decision_wait: 10s
num_traces: 100
expected_new_traces_per_sec: 10
policies:
[
{
name: errors-policy,
type: status_code,
status_code: {status_codes: [ERROR]}
},
{
name: randomized-policy,
type: probabilistic,
probabilistic: {sampling_percentage: 10}
},
]
receivers:
prometheus:
config:
scrape_configs:
- job_name: opentelemetry-collector
scrape_interval: 30s
static_configs:
- targets:
- ${MY_POD_IP}:8888
otlp:
protocols:
grpc:
endpoint: ${MY_POD_IP}:4317
service:
telemetry:
resource:
# Supress this attribute, as we don't want the UUID of the collector to be sent,
# instead we rely on instance label generated by Prometheus receiver.
- service.instance.id:
- service.name:
logs:
level: "{{ .Values.global.logLevel }}"
encoding: json
metrics:
address: ${MY_POD_IP}:8888
pipelines:
metrics:
exporters:
- coralogix
processors:
- memory_limiter
- batch
receivers:
- prometheus
traces:
exporters:
- coralogix
processors:
- memory_limiter
- tail_sampling
- batch
receivers:
- otlp


tolerations:
- operator: Exists
ports:
otlp:
enabled: true
otlp-http:
enabled: false
jaeger-compact:
enabled: false
jaeger-thrift:
enabled: false
jaeger-grpc:
enabled: false
zipkin:
enabled: false

0 comments on commit 669d37b

Please sign in to comment.