Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

panic: duplicate metrics collector registration attempted (loki.source.journal) #2105

Open
gouthamve opened this issue Nov 15, 2024 · 1 comment
Labels
bug Something isn't working

Comments

@gouthamve
Copy link
Member

What's wrong?

Alloy panics with duplicate metrics registration

Steps to reproduce

Run the kubernetes-monitoring helm chart with:

      logs: {
        enabled: true,
        podLogs: {
          enabled: true,
        },
        clusterEvents: {
          enabled: true,
        },
        journal: {
          enabled: true,
        },

The journal.enabled=true section adds the following config to the alloy-logs statefulset:

    // Journal Logs
    loki.relabel "journal" {
      // no forward_to is used, the defined rules are used below by the loki.source.journal component
      forward_to = []

      // set the name of the systemd unit
      rule {
        action = "replace"
        source_labels = ["__journal__systemd_unit"]
        replacement = "$1"
        target_label = "unit"
      }
    }

    loki.source.journal "worker" {
      path = "/var/log/journal"
      format_as_json = false
      max_age = "8h"
      relabel_rules = loki.relabel.journal.rules
      labels = {
        job = "integrations/kubernetes/journal",
        instance = env("HOSTNAME"),
      }
      forward_to = [loki.process.journal_logs.receiver]
    }
    loki.process "journal_logs" {
      forward_to = [loki.process.logs_service.receiver]
    }

I think when this config is deployed to a node that doesn't have systemd-journal enabled, it'll crash.

System information

No response

Software version

alloy v1.5.0

Configuration

rule {
          source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
          separator = "/"
          action = "replace"
          replacement = "/var/log/pods/*$1/*.log"
          target_label = "__path__"
        }

        // set the container runtime as a label
        rule {
          action = "replace"
          source_labels = ["__meta_kubernetes_pod_container_id"]
          regex = "^(\\S+):\\/\\/.+$"
          replacement = "$1"
          target_label = "tmp_container_runtime"
        }
      }

      discovery.relabel "filtered_pod_logs" {
        targets = discovery.relabel.pod_logs.output
        rule {  // Drop anything with a "falsy" annotation value
          source_labels = ["__meta_kubernetes_pod_annotation_k8s_grafana_com_logs_autogather"]
          regex = "(false|no|skip)"
          action = "drop"
        }
      }

      local.file_match "pod_logs" {
        path_targets = discovery.relabel.filtered_pod_logs.output
      }

      loki.source.file "pod_logs" {
        targets    = local.file_match.pod_logs.targets
        forward_to = [loki.process.pod_logs.receiver]
      }

      loki.process "pod_logs" {
        stage.match {
          selector = "{tmp_container_runtime=~\"containerd|cri-o\"}"
          // the cri processing stage extracts the following k/v pairs: log, stream, time, flags
          stage.cri {}

          // Set the extract flags and stream values as labels
          stage.labels {
            values = {
              flags  = "",
              stream  = "",
            }
          }
        }

        stage.match {
          selector = "{tmp_container_runtime=\"docker\"}"
          // the docker processing stage extracts the following k/v pairs: log, stream, time
          stage.docker {}

          // Set the extract stream value as a label
          stage.labels {
            values = {
              stream  = "",
            }
          }
        }

        // Drop the filename label, since it's not really useful in the context of Kubernetes, where we already have cluster,
        // namespace, pod, and container labels. Drop any structured metadata. Also drop the temporary
        // container runtime label as it is no longer needed.
        stage.label_drop {
          values = [
            "filename",
            "tmp_container_runtime",
          ]
        }
        forward_to = [loki.process.logs_service.receiver]
      }

      // Logs Service
      remote.kubernetes.secret "logs_service" {
        name = "loki-k8s-monitoring"
        namespace = "monitoring"
      }

      loki.process "logs_service" {
        stage.static_labels {
            values = {
              cluster = "office",
            }
        }
        forward_to = [loki.write.logs_service.receiver]
      }

      // Loki
      loki.write "logs_service" {
        endpoint {
          url = nonsensitive(remote.kubernetes.secret.logs_service.data["host"]) + "/loki/api/v1/push"
          tenant_id = nonsensitive(remote.kubernetes.secret.logs_service.data["tenantId"])

          basic_auth {
            username = nonsensitive(remote.kubernetes.secret.logs_service.data["username"])
            password = remote.kubernetes.secret.logs_service.data["password"]
          }
        }
      }


      logging {
        level  = "info"
        format = "logfmt"
      }
      // Journal Logs
      loki.relabel "journal" {
        // no forward_to is used, the defined rules are used below by the loki.source.journal component
        forward_to = []

        // set the name of the systemd unit
        rule {
          action = "replace"
          source_labels = ["__journal__systemd_unit"]
          replacement = "$1"
          target_label = "unit"
        }
      }

      loki.source.journal "worker" {
        path = "/var/log/journal"
        format_as_json = false
        max_age = "8h"
        relabel_rules = loki.relabel.journal.rules
        labels = {
          job = "integrations/kubernetes/journal",
          instance = env("HOSTNAME"),
        }
        forward_to = [loki.process.journal_logs.receiver]
      }
      loki.process "journal_logs" {
        forward_to = [loki.process.logs_service.receiver]
      }

Logs

➜  monitoring git:(main) ✗ k -n monitoring logs -f k8s-monitoring-alloy-logs-4hlxz
ts=2024-11-15T14:17:54.977481892Z level=info "boringcrypto enabled"=false
ts=2024-11-15T14:17:54.966224057Z level=info source=/go/pkg/mod/github.com/!kim!machine!gun/[email protected]/memlimit/memlimit.go:170 msg="memory is not limited, skipping: %v" package=github.com/KimMachineGun/automemlimit/memlimit !BADKEY="memory is not limited"
ts=2024-11-15T14:17:54.977776493Z level=info msg="no peer discovery configured: both join and discover peers are empty" service=cluster
ts=2024-11-15T14:17:54.977804495Z level=info msg="running usage stats reporter"
ts=2024-11-15T14:17:54.977824621Z level=warn msg="this stdlib function is deprecated; please refer to the documentation for updated usage and alternatives" controller_path=/ controller_id="" function=env
ts=2024-11-15T14:17:54.977853498Z level=warn msg="this stdlib function is deprecated; please refer to the documentation for updated usage and alternatives" controller_path=/ controller_id="" function=env
ts=2024-11-15T14:17:54.977896083Z level=warn msg="this stdlib function is deprecated; please refer to the documentation for updated usage and alternatives" controller_path=/ controller_id="" function=nonsensitive
ts=2024-11-15T14:17:54.977923502Z level=warn msg="this stdlib function is deprecated; please refer to the documentation for updated usage and alternatives" controller_path=/ controller_id="" function=nonsensitive
ts=2024-11-15T14:17:54.977949462Z level=warn msg="this stdlib function is deprecated; please refer to the documentation for updated usage and alternatives" controller_path=/ controller_id="" function=nonsensitive
ts=2024-11-15T14:17:54.977976588Z level=info msg="starting complete graph evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5
ts=2024-11-15T14:17:54.978041634Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=remotecfg duration=256.973µs
ts=2024-11-15T14:17:54.978081594Z level=info msg="applying non-TLS config to HTTP server" service=http
ts=2024-11-15T14:17:54.978104929Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=http duration=35.877µs
ts=2024-11-15T14:17:54.97815014Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=cluster duration=3.209µs
ts=2024-11-15T14:17:54.978185726Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=labelstore duration=13.709µs
ts=2024-11-15T14:17:54.978220144Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=livedebugging duration=21.584µs
ts=2024-11-15T14:17:54.978253396Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=ui duration=3.209µs
ts=2024-11-15T14:17:54.978286357Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=loki.relabel.journal duration=1.124442ms
ts=2024-11-15T14:17:54.97832165Z level=info msg="Using pod service account via in-cluster config" component_path=/ component_id=discovery.kubernetes.pods
ts=2024-11-15T14:17:54.978347027Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=discovery.kubernetes.pods duration=2.42506ms
ts=2024-11-15T14:17:54.978382029Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=discovery.relabel.pod_logs duration=1.136984ms
ts=2024-11-15T14:17:54.978414989Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=discovery.relabel.filtered_pod_logs duration=202.72µs
ts=2024-11-15T14:17:54.9784497Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=local.file_match.pod_logs duration=120.174µs
ts=2024-11-15T14:17:54.978503078Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=logging duration=1.103441ms
ts=2024-11-15T14:17:54.978702298Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=otel duration=121.049µs
ts=2024-11-15T14:17:54.979348378Z level=info msg="Using pod service account via in-cluster config" component_path=/ component_id=remote.kubernetes.secret.logs_service
ts=2024-11-15T14:17:54.998334089Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=remote.kubernetes.secret.logs_service duration=19.522993ms
ts=2024-11-15T14:17:55.002373329Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=loki.write.logs_service duration=3.849937ms
ts=2024-11-15T14:17:55.003622903Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=loki.process.logs_service duration=1.083315ms
ts=2024-11-15T14:17:55.004372823Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=loki.process.journal_logs duration=592.993µs
ts=2024-11-15T14:17:55.00940933Z level=error msg="failed to evaluate config" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node=loki.source.journal.worker err="building component: creating journal reader: failed to open journal in directory \"/var/log/journal\": no such file or directory"
ts=2024-11-15T14:17:55.009616134Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=loki.source.journal.worker duration=5.073551ms
ts=2024-11-15T14:17:55.012655773Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=loki.process.pod_logs duration=2.890588ms
ts=2024-11-15T14:17:55.014358041Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=loki.source.file.pod_logs duration=1.479713ms
ts=2024-11-15T14:17:55.014660225Z level=info msg="finished node evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 node_id=tracing duration=146.717µs
ts=2024-11-15T14:17:55.0147909Z level=info msg="finished complete graph evaluation" controller_path=/ controller_id="" trace_id=ea1918956a5ffa80f8fbbabebe7b63c5 duration=43.959985ms
Error: /etc/alloy/config.alloy:170:1: Failed to build component: building component: creating journal reader: failed to open journal in directory "/var/log/journal": no such file or directory

169 |
170 |   loki.source.journal "worker" {
    |  _^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
171 | |   path = "/var/log/journal"
172 | |   format_as_json = false
173 | |   max_age = "8h"
174 | |   relabel_rules = loki.relabel.journal.rules
175 | |   labels = {
176 | |     job = "integrations/kubernetes/journal",
177 | |     instance = env("HOSTNAME"),
178 | |   }
179 | |   forward_to = [loki.process.journal_logs.receiver]
180 | | }
    | |_^
181 |   loki.process "journal_logs" {

panic: duplicate metrics collector registration attempted

goroutine 170 [running]:
github.com/prometheus/client_golang/prometheus.(*wrappingRegisterer).MustRegister(0x40038e4a50, {0x4003a4f7c0?, 0x0?, 0x0?})
	/go/pkg/mod/github.com/prometheus/[email protected]/prometheus/wrap.go:104 +0x14c
github.com/grafana/alloy/internal/component/loki/source/journal/internal/target.NewMetrics({0xb4bf048, 0x40038e4a50})
	/src/alloy/internal/component/loki/source/journal/internal/target/metrics.go:33 +0x2a8
github.com/grafana/alloy/internal/component/loki/source/journal.New({{0xffff3c7eae98, 0x40038e4930}, {0x40031a71a0, 0x1a}, {0xb433100, 0x40038d7d60}, {0x4003897770, 0x25}, 0x40038cd4e0, {0xb4bf048, ...}, ...}, ...)
	/src/alloy/internal/component/loki/source/journal/journal.go:67 +0xfc
github.com/grafana/alloy/internal/component/loki/source/journal.init.0.func1({{0xffff3c7eae98, 0x40038e4930}, {0x40031a71a0, 0x1a}, {0xb433100, 0x40038d7d60}, {0x4003897770, 0x25}, 0x40038cd4e0, {0xb4bf048, ...}, ...}, ...)
	/src/alloy/internal/component/loki/source/journal/journal.go:31 +0x118
github.com/grafana/alloy/internal/runtime/internal/controller.(*BuiltinComponentNode).evaluate(0x40038e2fc8, 0x4003a680e0)
	/src/alloy/internal/runtime/internal/controller/node_builtin_component.go:275 +0x3a0
github.com/grafana/alloy/internal/runtime/internal/controller.(*BuiltinComponentNode).Evaluate(0x40038e2fc8, 0x9a38260?)
	/src/alloy/internal/runtime/internal/controller/node_builtin_component.go:248 +0x20
github.com/grafana/alloy/internal/runtime/internal/controller.(*Loader).concurrentEvalFn(0x40031a4b60, {0xffff3c7eaeb8, 0x40038e2fc8}, {0xb4e8d68, 0x40037a8a80}, {0xb45e5e8, 0x4003747d40}, 0x40038f82a0)
	/src/alloy/internal/runtime/internal/controller/loader.go:787 +0x520
github.com/grafana/alloy/internal/runtime/internal/controller.(*Loader).EvaluateDependants.func2()
	/src/alloy/internal/runtime/internal/controller/loader.go:736 +0x3c
github.com/grafana/alloy/internal/runtime/internal/worker.(*workQueue).emitNextTask.func1()
	/src/alloy/internal/runtime/internal/worker/worker_pool.go:181 +0x6c
github.com/grafana/alloy/internal/runtime/internal/worker.(*fixedWorkerPool).start.func1()
	/src/alloy/internal/runtime/internal/worker/worker_pool.go:87 +0x68
created by github.com/grafana/alloy/internal/runtime/internal/worker.(*fixedWorkerPool).start in goroutine 1
	/src/alloy/internal/runtime/internal/worker/worker_pool.go:80 +0x2c
@gouthamve gouthamve added the bug Something isn't working label Nov 15, 2024
@wildum
Copy link
Contributor

wildum commented Nov 22, 2024

this was solved by not specifying the path. The panic is not great though

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants