diff --git a/pkg/google/gke/gke.go b/pkg/google/gke/gke.go index 853cc0d..f702bfc 100644 --- a/pkg/google/gke/gke.go +++ b/pkg/google/gke/gke.go @@ -145,6 +145,13 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { clusterName := instance.GetClusterName() // We skip instances that do not have a clusterName because they are not associated with an GKE cluster if clusterName == "" { + c.logger.LogAttrs(ctx, + slog.LevelDebug, + "instance does not have a clustername", + slog.String("region", instance.Region), + slog.String("machine_type", instance.MachineType), + slog.String("project", project), + ) continue } labelValues := []string{ @@ -158,7 +165,16 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { } cpuCost, ramCost, err := c.PricingMap.GetCostOfInstance(instance) if err != nil { - return err + // Log out the error and continue processing nodes + // TODO(@pokom): Should we set sane defaults here to emit _something_? + c.logger.LogAttrs(ctx, + slog.LevelError, + err.Error(), + slog.String("machine_type", instance.MachineType), + slog.String("region", instance.Region), + slog.String("project", project), + ) + continue } ch <- prometheus.MustNewConstMetric( gkeNodeCPUHourlyCostDesc, @@ -198,6 +214,16 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { price, err := c.PricingMap.GetCostOfStorage(d.Region(), d.StorageClass()) if err != nil { + c.logger.LogAttrs(ctx, + slog.LevelError, + err.Error(), + slog.String("disk_name", disk.Name), + slog.String("project", project), + slog.String("region", d.Region()), + slog.String("cluster_name", d.Cluster), + slog.String("storage_class", d.StorageClass()), + ) + fmt.Printf("%s error getting cost of storage: %v\n", disk.Name, err) continue } diff --git a/pkg/google/gke/gke_test.go b/pkg/google/gke/gke_test.go index 39760dc..4efb874 100644 --- a/pkg/google/gke/gke_test.go +++ b/pkg/google/gke/gke_test.go @@ -381,6 +381,19 @@ func TestCollector_Collect(t *testing.T) { GkeClusterLabel: "test", }, }, + { + // Add in an instance that does not have a machine type that would exist in the pricing map. + // This test replicates and fixes https://github.com/grafana/cloudcost-exporter/issues/335 + Name: "test-n1-spot", + MachineType: "abc/n8-slim", + Zone: "testing/us-central1-a", + Scheduling: &computev1.Scheduling{ + ProvisioningModel: "SPOT", + }, + Labels: map[string]string{ + GkeClusterLabel: "test", + }, + }, { Name: "test-n2-us-east1", MachineType: "abc/n2-slim",