Skip to content

Commit

Permalink
MON-3934: Parallelize some readonly/non disruptive e2e tests for fast…
Browse files Browse the repository at this point in the history
…er runs

Isolate specific tests to enable parallel execution

Enhance the resilience of some tests and fix those prone to errors.

Fix some tests that were running wrong checks.

Make some the tests idempotent to be easily debugged and run locally
  • Loading branch information
machine424 committed Sep 6, 2024
1 parent 491ab3b commit 00a6f60
Show file tree
Hide file tree
Showing 21 changed files with 369 additions and 414 deletions.
19 changes: 0 additions & 19 deletions pkg/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1488,25 +1488,6 @@ func (c *Client) CreateOrUpdateConfigMap(ctx context.Context, cm *v1.ConfigMap)
return err
}

func (c *Client) DeleteIfExists(ctx context.Context, nsName string) error {
nClient := c.kclient.CoreV1().Namespaces()
_, err := nClient.Get(ctx, nsName, metav1.GetOptions{})
if apierrors.IsNotFound(err) {
// Namespace already deleted
return nil
}

if err != nil {
return fmt.Errorf("retrieving Namespace object failed: %w", err)
}

err = nClient.Delete(ctx, nsName, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("deleting ConfigMap object failed: %w", err)
}
return nil
}

func (c *Client) CreateIfNotExistConfigMap(ctx context.Context, cm *v1.ConfigMap) (*v1.ConfigMap, error) {
cClient := c.kclient.CoreV1().ConfigMaps(cm.GetNamespace())
res, err := cClient.Get(ctx, cm.GetName(), metav1.GetOptions{})
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/alert_relabel_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ const (
)

func TestAlertRelabelConfig(t *testing.T) {
// The test shouldn't be disruptive, safe to run in parallel with others.
t.Parallel()
initialRelabelConfig := prometheusRelabelConfig(t)

// By default, we drop prometheus_replica label + add openshift_io_alert_source = 2
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/alerting_rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ const (
)

func TestAlertingRule(t *testing.T) {
// The test shouldn't be disruptive, safe to run in parallel with others.
t.Parallel()
ctx := context.Background()
alertingRules := f.OpenShiftMonitoringClient.MonitoringV1().AlertingRules(f.Ns)

Expand Down Expand Up @@ -167,6 +169,7 @@ func prometheusRuleCount(t *testing.T) int {
}

func assertPrometheusRuleCount(t *testing.T, count int) {
t.Helper()
currentCount := prometheusRuleCount(t)
if currentCount != count {
t.Fatalf("Different generated PrometheusRule count (%d != %d)", currentCount, count)
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/alertmanager_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ func (wr *webhookReceiver) getAlertsByID(id string) ([]alert, error) {
func (wr *webhookReceiver) tearDown(t *testing.T, f *framework.Framework) {
t.Helper()
err := framework.Poll(time.Second, 5*time.Minute, func() error {
return f.OperatorClient.DeleteIfExists(ctx, wr.namespace)
return f.DeleteNamespace(t, wr.namespace)
})

if err != nil {
Expand Down
54 changes: 7 additions & 47 deletions test/e2e/alertmanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,10 @@ func TestAlertmanagerDataReplication(t *testing.T) {
),
},
} {
t.Run(test.name, test.assertion)
t.Run(test.name, func(t *testing.T) {
t.Parallel()
test.assertion(t)
})
}

// Ensure that the silence has been preserved.
Expand Down Expand Up @@ -465,6 +468,8 @@ func TestAlertmanagerDataReplication(t *testing.T) {

// The Alertmanager API should be protected by authentication/authorization.
func TestAlertmanagerAPI(t *testing.T) {
// The test shouldn't be disruptive, safe to run in parallel with others.
t.Parallel()
err := framework.Poll(5*time.Second, 5*time.Minute, func() error {
body, err := f.AlertmanagerClient.GetAlertmanagerAlerts(
"filter", `alertname="Watchdog"`,
Expand Down Expand Up @@ -747,51 +752,6 @@ func TestAlertmanagerDisabling(t *testing.T) {
})
}

func TestAlertManagerHasAdditionalAlertRelabelConfigs(t *testing.T) {
const (
expectPlatformLabel = "openshift_io_alert_source"
expectPlatformLabelValue = "platform"
)

type Alerts []struct {
Labels map[string]string `json:"labels"`
}

var alerts Alerts

err := framework.Poll(5*time.Second, time.Minute, func() error {
resp, err := f.AlertmanagerClient.Do("GET", "/api/v2/alerts", nil)
if err != nil {
return err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("expecting 200 status code, got %d (%q)", resp.StatusCode, resp.Body)
}

if err := json.NewDecoder(resp.Body).Decode(&alerts); err != nil {
return fmt.Errorf("error decoding alert response")
}

return nil
})
if err != nil {
t.Fatal(err)
}

for _, alert := range alerts {
v, found := alert.Labels[expectPlatformLabel]
if !found {
t.Fatal("expected correct label to be present")
}

if v != expectPlatformLabelValue {
t.Fatalf("expected correct value for %s but got %s", expectPlatformLabel, v)
}
}
}

// TestAlertmanagerConfigPipeline ensures that the AlertManagerConfig CR's
// created in a user namespace can be reconciled and have alerts sent to the
// correct Alertmanager (depending on whether user-defined Alertmanager is
Expand Down Expand Up @@ -907,7 +867,7 @@ func testAlertmanagerConfigPipeline(t *testing.T, wr *webhookReceiver, am *monit
t.Fatal(err)
}

if err := createUWMTestNsIfNotExist(t, f); err != nil {
if err := createUWMTestNsIfNotExist(t, f, userWorkloadTestNs); err != nil {
t.Fatal(err)
}

Expand Down
31 changes: 10 additions & 21 deletions test/e2e/alertmanager_user_workload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,7 @@ func TestUserWorkloadAlertmanager(t *testing.T) {
f.AssertStatefulSetExistsAndRollout("alertmanager-user-workload", f.UserWorkloadMonitoringNs)(t)
f.AssertServiceExists("alertmanager-user-workload", f.UserWorkloadMonitoringNs)(t)

for _, scenario := range []struct {
name string
f func(*testing.T)
}{
{
name: "assert UWM alert access",
f: assertUWMAlertsAccess,
},
} {
t.Run(scenario.name, scenario.f)
}
t.Run("assert UWM alert access", assertUWMAlertsAccess)
}

// assertUWMAlertsAccess ensures that a user can't access all alerts from the UWM alertmanager via the api.
Expand Down Expand Up @@ -79,17 +69,16 @@ func assertUWMAlertsAccess(t *testing.T) {
t.Fatal(err)
}

// The uwm alerts port (9095) is only exposed in-cluster, so we need to use
// port forwarding to access kube-rbac-proxy.
host, cleanUp, err := f.ForwardPort(t, f.UserWorkloadMonitoringNs, "alertmanager-user-workload", 9095)
if err != nil {
t.Fatal(err)
}
defer cleanUp()

client := framework.NewPrometheusClient(host, token)

err = framework.Poll(5*time.Second, time.Minute, func() error {
// The uwm alerts port (9095) is only exposed in-cluster, so we need to use
// port forwarding to access kube-rbac-proxy.
host, cleanUp, err := f.ForwardPort(t, f.UserWorkloadMonitoringNs, "alertmanager-user-workload", 9095)
if err != nil {
t.Fatal(err)
}
defer cleanUp()

client := framework.NewPrometheusClient(host, token)
resp, err := client.Do("GET", "/api/v2/alerts", nil)
if err != nil {
return err
Expand Down
25 changes: 20 additions & 5 deletions test/e2e/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,10 @@ func TestClusterMonitorPrometheusK8Config(t *testing.T) {
assertion: f.AssertPrometheusRuleExists(thanosRule, f.Ns),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -344,7 +347,10 @@ func TestClusterMonitorAlertManagerConfig(t *testing.T) {
),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -664,7 +670,10 @@ func TestUserWorkloadMonitorPrometheusK8Config(t *testing.T) {
assertion: assertQueryLogValueEquals(f.UserWorkloadMonitoringNs, crName, "/tmp/test.log"),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -721,7 +730,10 @@ func TestUserWorkloadMonitorThanosRulerConfig(t *testing.T) {
),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down Expand Up @@ -767,7 +779,10 @@ monitoringPlugin:
),
},
} {
t.Run(tc.name, tc.assertion)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tc.assertion(t)
})
}
}

Expand Down
22 changes: 19 additions & 3 deletions test/e2e/framework/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package framework

import (
"context"
"fmt"
"io/ioutil"
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -15,6 +16,7 @@ import (
"gopkg.in/yaml.v2"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
)
Expand Down Expand Up @@ -174,12 +176,12 @@ func (f *Framework) MustGetStatefulSet(t *testing.T, name, namespace string) *ap
return statefulSet
}

// MustGetPods return all pods from `namespace` within 5 minutes or fail
func (f *Framework) MustGetPods(t *testing.T, namespace string) *v1.PodList {
// MustListPods returns all pods matching labelSelector from `namespace` within 5 minutes or fail
func (f *Framework) MustListPods(t *testing.T, namespace string, labelSelector string) *v1.PodList {
t.Helper()
var pods *v1.PodList
err := wait.Poll(time.Second, 5*time.Minute, func() (bool, error) {
pl, err := f.KubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{})
pl, err := f.KubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
if err != nil {
return false, nil
}
Expand Down Expand Up @@ -212,3 +214,17 @@ func ensureCreatedByTestLabel(obj metav1.Object) {
labels[E2eTestLabelName] = E2eTestLabelValue
}
}

func (f *Framework) DeleteNamespace(t *testing.T, nsName string) error {
t.Helper()
nClient := f.KubeClient.CoreV1().Namespaces()
_, err := nClient.Get(ctx, nsName, metav1.GetOptions{})
if err != nil && !apierrors.IsNotFound(err) {
return fmt.Errorf("retrieving Namespace object failed: %w", err)
}

err = nClient.Delete(ctx, nsName, metav1.DeleteOptions{})
if err != nil {
return fmt.Errorf("deleting Namespace object failed: %w", err)
}
}
Loading

0 comments on commit 00a6f60

Please sign in to comment.