diff --git a/doc/api-extensions.md b/doc/api-extensions.md index e83121c0913f..38a617f315e2 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -2328,3 +2328,9 @@ This is indicated when command `lxc version` is executed or when `/1.0` endpoint This API extension enables setting an `oidc.groups.claim` configuration key. If OIDC authentication is configured and this claim is set, LXD will request this claim in the scope of OIDC flow. The value of the claim will be extracted and might be used to make authorization decisions. + +## `loki_config_instance` + +Adds a new `loki.instance` server configuration key to customize the `instance` field in Loki events. +This can be used to expose the name of the cluster rather than the individual system name sending +the event as that's usually already covered by the `location` field. diff --git a/doc/config_options.txt b/doc/config_options.txt index 68e779468e11..00e7bef1f990 100644 --- a/doc/config_options.txt +++ b/doc/config_options.txt @@ -1615,6 +1615,14 @@ Specify the protocol, name or IP and port. For example `https://loki.example.com ``` +```{config:option} loki.instance server-loki +:defaultdesc: "Local server host name or cluster member name" +:scope: "global" +:shortdesc: "Name to use as the instance field in Loki events." +:type: "string" +This allows replacing the default instance value (server host name) by a more relevant value like a cluster identifier. +``` + ```{config:option} loki.labels server-loki :scope: "global" :shortdesc: "Labels for a Loki log entry" diff --git a/grafana/LXD.json b/grafana/LXD.json index bde4e14d5cfc..3b8f791981eb 100644 --- a/grafana/LXD.json +++ b/grafana/LXD.json @@ -3694,7 +3694,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "builder", - "expr": "{app=\"lxd\", type=\"lifecycle\"}", + "expr": "{app=\"lxd\", type=\"lifecycle\", instance=\"$job\"}", "queryType": "range", "refId": "A" } @@ -3731,7 +3731,7 @@ "uid": "${DS_LOKI}" }, "editorMode": "builder", - "expr": "{app=\"lxd\", type=\"logging\"}", + "expr": "{app=\"lxd\", type=\"logging\", instance=\"$job\"}", "queryType": "range", "refId": "A" } diff --git a/lxd/api_1.0.go b/lxd/api_1.0.go index 87f5ba32aa99..aa4d22e5bdd0 100644 --- a/lxd/api_1.0.go +++ b/lxd/api_1.0.go @@ -831,6 +831,8 @@ func doAPI10UpdateTriggers(d *Daemon, nodeChanged, clusterChanged map[string]str fallthrough case "loki.api.ca_cert": fallthrough + case "loki.instance": + fallthrough case "loki.labels": fallthrough case "loki.loglevel": @@ -956,12 +958,12 @@ func doAPI10UpdateTriggers(d *Daemon, nodeChanged, clusterChanged map[string]str } if lokiChanged { - lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiLabels, lokiLoglevel, lokiTypes := clusterConfig.LokiServer() + lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiInstance, lokiLoglevel, lokiLabels, lokiTypes := clusterConfig.LokiServer() if lokiURL == "" || lokiLoglevel == "" || len(lokiTypes) == 0 { d.internalListener.RemoveHandler("loki") } else { - err := d.setupLoki(lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiLabels, lokiLoglevel, lokiTypes) + err := d.setupLoki(lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiInstance, lokiLoglevel, lokiLabels, lokiTypes) if err != nil { return err } diff --git a/lxd/cluster/config/config.go b/lxd/cluster/config/config.go index 2ff8c343bc38..38471c50e1f5 100644 --- a/lxd/cluster/config/config.go +++ b/lxd/cluster/config/config.go @@ -199,7 +199,7 @@ func (c *Config) InstancesPlacementScriptlet() string { } // LokiServer returns all the Loki settings needed to connect to a server. -func (c *Config) LokiServer() (apiURL string, authUsername string, authPassword string, apiCACert string, labels []string, logLevel string, types []string) { +func (c *Config) LokiServer() (apiURL string, authUsername string, authPassword string, apiCACert string, instance string, logLevel string, labels []string, types []string) { if c.m.GetString("loki.types") != "" { types = strings.Split(c.m.GetString("loki.types"), ",") } @@ -208,7 +208,7 @@ func (c *Config) LokiServer() (apiURL string, authUsername string, authPassword labels = strings.Split(c.m.GetString("loki.labels"), ",") } - return c.m.GetString("loki.api.url"), c.m.GetString("loki.auth.username"), c.m.GetString("loki.auth.password"), c.m.GetString("loki.api.ca_cert"), labels, c.m.GetString("loki.loglevel"), types + return c.m.GetString("loki.api.url"), c.m.GetString("loki.auth.username"), c.m.GetString("loki.auth.password"), c.m.GetString("loki.api.ca_cert"), c.m.GetString("loki.instance"), c.m.GetString("loki.loglevel"), labels, types } // ACME returns all ACME settings needed for certificate renewal. @@ -608,6 +608,15 @@ var ConfigSchema = config.Schema{ // shortdesc: URL to the Loki server "loki.api.url": {}, + // lxdmeta:generate(entities=server; group=loki; key=loki.instance) + // This allows replacing the default instance value (server host name) by a more relevant value like a cluster identifier. + // --- + // type: string + // scope: global + // defaultdesc: Local server host name or cluster member name + // shortdesc: Name to use as the instance field in Loki events. + "loki.instance": {}, + // lxdmeta:generate(entities=server; group=loki; key=loki.labels) // Specify a comma-separated list of values that should be used as labels for a Loki log entry. // --- diff --git a/lxd/daemon.go b/lxd/daemon.go index 6d74f2f2efde..bd82d00bbe3c 100644 --- a/lxd/daemon.go +++ b/lxd/daemon.go @@ -702,22 +702,32 @@ func (d *Daemon) Init() error { return nil } -func (d *Daemon) setupLoki(URL string, cert string, key string, caCert string, labels []string, logLevel string, types []string) error { +func (d *Daemon) setupLoki(URL string, cert string, key string, caCert string, instanceName string, logLevel string, labels []string, types []string) error { + // Stop any existing loki client. if d.lokiClient != nil { d.lokiClient.Stop() } + // Check basic requirements for starting a new client. if URL == "" || logLevel == "" || len(types) == 0 { return nil } + // Validate the URL. u, err := url.Parse(URL) if err != nil { return err } - d.lokiClient = loki.NewClient(d.shutdownCtx, u, cert, key, caCert, labels, logLevel, types) + // Figure out the instance name. + if instanceName == "" { + instanceName = d.serverName + } + + // Start a new client. + d.lokiClient = loki.NewClient(d.shutdownCtx, u, cert, key, caCert, instanceName, logLevel, labels, types) + // Attach the new client to the log handler. d.internalListener.AddHandler("loki", d.lokiClient.HandleEvent) return nil @@ -1297,7 +1307,7 @@ func (d *Daemon) init() error { maasAPIURL, maasAPIKey = d.globalConfig.MAASController() d.gateway.HeartbeatOfflineThreshold = d.globalConfig.OfflineThreshold() - lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiLabels, lokiLoglevel, lokiTypes := d.globalConfig.LokiServer() + lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiInstance, lokiLoglevel, lokiLabels, lokiTypes := d.globalConfig.LokiServer() oidcIssuer, oidcClientID, oidcAudience, oidcGroupsClaim := d.globalConfig.OIDCServer() syslogSocketEnabled := d.localConfig.SyslogSocket() instancePlacementScriptlet := d.globalConfig.InstancesPlacementScriptlet() @@ -1307,7 +1317,7 @@ func (d *Daemon) init() error { // Setup Loki logger. if lokiURL != "" { - err = d.setupLoki(lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiLabels, lokiLoglevel, lokiTypes) + err = d.setupLoki(lokiURL, lokiUsername, lokiPassword, lokiCACert, lokiInstance, lokiLoglevel, lokiLabels, lokiTypes) if err != nil { return err } diff --git a/lxd/loki/loki.go b/lxd/loki/loki.go index 4773e25ad815..55666bd15bb8 100644 --- a/lxd/loki/loki.go +++ b/lxd/loki/loki.go @@ -9,7 +9,6 @@ import ( "io" "net/http" "net/url" - "os" "reflect" "sort" "strconv" @@ -17,7 +16,6 @@ import ( "sync" "time" - "github.com/grafana/dskit/backoff" "github.com/sirupsen/logrus" "github.com/canonical/lxd/shared" @@ -32,17 +30,17 @@ const ( ) type config struct { - backoffConfig backoff.Config - batchSize int - batchWait time.Duration - caCert string - username string - password string - labels []string - logLevel string - timeout time.Duration - types []string - url *url.URL + batchSize int + batchWait time.Duration + caCert string + username string + password string + labels []string + instance string + logLevel string + timeout time.Duration + types []string + url *url.URL } type entry struct { @@ -62,24 +60,20 @@ type Client struct { } // NewClient returns a Client. -func NewClient(ctx context.Context, url *url.URL, username string, password string, caCert string, labels []string, logLevel string, types []string) *Client { +func NewClient(ctx context.Context, u *url.URL, username string, password string, caCert string, instance string, logLevel string, labels []string, types []string) *Client { client := Client{ cfg: config{ - backoffConfig: backoff.Config{ - MinBackoff: 500 * time.Millisecond, - MaxBackoff: 5 * time.Minute, - MaxRetries: 10, - }, batchSize: 10 * 1024, batchWait: 1 * time.Second, caCert: caCert, username: username, password: password, + instance: instance, labels: labels, logLevel: logLevel, timeout: 10 * time.Second, types: types, - url: url, + url: u, }, client: &http.Client{}, ctx: ctx, @@ -167,11 +161,10 @@ func (c *Client) sendBatch(batch *batch) { return } - backoff := backoff.New(c.ctx, c.cfg.backoffConfig) - var status int - for backoff.Ongoing() { + for i := 0; i < 30; i++ { + // Try to send the message. status, err = c.send(c.ctx, buf) if err == nil { return @@ -179,10 +172,11 @@ func (c *Client) sendBatch(batch *batch) { // Only retry 429s, 500s and connection-level errors. if status > 0 && status != 429 && status/100 != 5 { - break + return } - backoff.Wait() + // Retry every 10s. + time.Sleep(10 * time.Second) } } @@ -233,17 +227,12 @@ func (c *Client) HandleEvent(event api.Event) { return } - hostname, err := os.Hostname() - if err != nil { - hostname = "none" - } - entry := entry{ labels: LabelSet{ "app": "lxd", "type": event.Type, "location": event.Location, - "instance": hostname, + "instance": c.cfg.instance, }, Entry: Entry{ Timestamp: event.Timestamp, diff --git a/lxd/metadata/configuration.json b/lxd/metadata/configuration.json index e0c8e9ed0c7f..13737a8fbcaa 100644 --- a/lxd/metadata/configuration.json +++ b/lxd/metadata/configuration.json @@ -1760,6 +1760,15 @@ "type": "string" } }, + { + "loki.instance": { + "defaultdesc": "Local server host name or cluster member name", + "longdesc": "This allows replacing the default instance value (server host name) by a more relevant value like a cluster identifier.", + "scope": "global", + "shortdesc": "Name to use as the instance field in Loki events.", + "type": "string" + } + }, { "loki.labels": { "longdesc": "Specify a comma-separated list of values that should be used as labels for a Loki log entry.", diff --git a/shared/version/api.go b/shared/version/api.go index dba9b88eba80..ee0fbcf181c7 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -393,6 +393,7 @@ var APIExtensions = []string{ "resources_disk_mounted", "server_version_lts", "oidc_groups_claim", + "loki_config_instance", } // APIExtensionsCount returns the number of available API extensions.