diff --git a/scale_test/aws-credentials.yaml b/scale_test/aws-credentials.yaml new file mode 100644 index 00000000..64cce2f0 --- /dev/null +++ b/scale_test/aws-credentials.yaml @@ -0,0 +1,12 @@ +--- +kind: Secret +apiVersion: v1 +metadata: + name: aws-credentials + labels: + app: scale-test +stringData: + AWS_ACCESS_KEY_ID: {{ .KUADRANT_AWS_ACCESS_KEY_ID }} + AWS_REGION: {{ .KUADRANT_AWS_REGION }} + AWS_SECRET_ACCESS_KEY: {{ .KUADRANT_AWS_SECRET_ACCESS_KEY }} +type: kuadrant.io/aws diff --git a/scale_test/config.yaml b/scale_test/config.yaml new file mode 100644 index 00000000..bbc2e8f5 --- /dev/null +++ b/scale_test/config.yaml @@ -0,0 +1,106 @@ +--- +metricsEndpoints: + - endpoint: {{ .PROMETHEUS_URL }} + token: {{ .PROMETHEUS_TOKEN }} + metrics: + - ./metrics.yaml +{{ if .OS_INDEXING }} + indexer: + type: opensearch + esServers: ["{{ .ES_SERVER }}"] + insecureSkipVerify: true + defaultIndex: kube-burner +{{ else }} + indexer: + type: local + metricsDirectory: ./metrics +{{ end }} +global: + gc: true +jobs: + - name: scale-test-preparations + jobIterations: 1 + qps: 1 + burst: 1 + namespacedIterations: true + namespace: scale-test + waitWhenFinished: true + objects: + - objectTemplate: ./httpbin-deployment.yaml + kind: Deployment + replicas: 1 + waitOptions: + forCondition: "Available" + customStatusPath: ".conditions[].type" + - objectTemplate: ./httpbin-service.yaml + kind: Service + replicas: 1 + - objectTemplate: ./aws-credentials.yaml + kind: Secret + replicas: 1 + inputVars: + KUADRANT_AWS_ACCESS_KEY_ID: "{{ .KUADRANT_AWS_ACCESS_KEY_ID }}" + KUADRANT_AWS_REGION: "{{ .KUADRANT_AWS_REGION }}" + KUADRANT_AWS_SECRET_ACCESS_KEY: "{{ .KUADRANT_AWS_SECRET_ACCESS_KEY }}" + - name: scale-test-main + jobIterations: 1 + qps: 1 + burst: 1 + namespacedIterations: true + namespace: scale-test + waitWhenFinished: true + objects: + - objectTemplate: ./gw.yaml + replicas: 1 + waitOptions: + forCondition: Programmed + customStatusPath: ".conditions[].type" + inputVars: + KUADRANT_ZONE_ROOT_DOMAIN: "{{ .KUADRANT_ZONE_ROOT_DOMAIN }}" + - objectTemplate: ./gw-tls-policy.yaml + replicas: 1 + waitOptions: + forCondition: Enforced + customStatusPath: ".conditions[].type" + - objectTemplate: ./gw-dns-policy.yaml + replicas: 1 + waitOptions: + forCondition: Enforced + customStatusPath: ".conditions[].type" + - objectTemplate: ./gw-rlp.yaml + replicas: 1 + waitOptions: + forCondition: Accepted + customStatusPath: ".conditions[].type" + - objectTemplate: ./gw-auth-policy.yaml + replicas: 1 + waitOptions: + forCondition: Accepted + customStatusPath: ".conditions[].type" + - objectTemplate: ./httproute.yaml + replicas: 1 + waitOptions: + forCondition: Accepted + customStatusPath: ".conditions[].type" + inputVars: + KUADRANT_ZONE_ROOT_DOMAIN: "{{ .KUADRANT_ZONE_ROOT_DOMAIN }}" + - objectTemplate: ./httproute-rlp.yaml + replicas: 1 + waitOptions: + forCondition: Enforced + customStatusPath: ".conditions[].type" + - objectTemplate: ./httproute-auth-policy.yaml + replicas: 1 + waitOptions: + forCondition: Enforced + customStatusPath: ".conditions[].type" + - name: scale-test-safe-dnspolicy-cleanup + jobType: delete + jobIterations: 1 + namespacedIterations: true + namespace: scale-test + waitWhenFinished: true + objects: + - kind: DNSPolicy + apiVersion: kuadrant.io/v1alpha1 + labelSelector: {kube-burner-job: scale-test-main} diff --git a/scale_test/gw-auth-policy.yaml b/scale_test/gw-auth-policy.yaml new file mode 100644 index 00000000..004adc6d --- /dev/null +++ b/scale_test/gw-auth-policy.yaml @@ -0,0 +1,27 @@ +apiVersion: kuadrant.io/v1beta2 +kind: AuthPolicy +metadata: + name: gw-auth-policy-{{.Iteration}} + labels: + app: scale-test +spec: + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: scale-test-{{.Iteration}} + rules: + authorization: + deny-all: + opa: + rego: "allow = false" + response: + unauthorized: + headers: + "content-type": + value: application/json + body: + value: | + { + "error": "Forbidden", + "message": "Access denied by default by the gateway operator. If you are the administrator of the service, create a specific auth policy for the route." + } diff --git a/scale_test/gw-dns-policy.yaml b/scale_test/gw-dns-policy.yaml new file mode 100644 index 00000000..85010952 --- /dev/null +++ b/scale_test/gw-dns-policy.yaml @@ -0,0 +1,13 @@ +apiVersion: kuadrant.io/v1alpha1 +kind: DNSPolicy +metadata: + name: gw-dns-policy-{{.Iteration}} + labels: + app: scale-test +spec: + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: scale-test-{{.Iteration}} + providerRefs: + - name: aws-credentials diff --git a/scale_test/gw-rlp.yaml b/scale_test/gw-rlp.yaml new file mode 100644 index 00000000..8cebce3b --- /dev/null +++ b/scale_test/gw-rlp.yaml @@ -0,0 +1,17 @@ +apiVersion: kuadrant.io/v1beta3 +kind: RateLimitPolicy +metadata: + name: gw-rlp-{{.Iteration}} + labels: + app: scale-test +spec: + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: scale-test-{{.Iteration}} + limits: + "global": + rates: + - limit: 5 + duration: 10 + unit: second diff --git a/scale_test/gw-tls-policy.yaml b/scale_test/gw-tls-policy.yaml new file mode 100644 index 00000000..ad1d1b26 --- /dev/null +++ b/scale_test/gw-tls-policy.yaml @@ -0,0 +1,15 @@ +apiVersion: kuadrant.io/v1alpha1 +kind: TLSPolicy +metadata: + name: gw-tls-policy-{{.Iteration}} + labels: + app: scale-test +spec: + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: scale-test-{{.Iteration}} + issuerRef: + group: cert-manager.io + kind: ClusterIssuer + name: selfsigned-issuer diff --git a/scale_test/gw.yaml b/scale_test/gw.yaml new file mode 100644 index 00000000..b40d2eb5 --- /dev/null +++ b/scale_test/gw.yaml @@ -0,0 +1,21 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: scale-test-{{.Iteration}} + labels: + app: scale-test +spec: + gatewayClassName: istio + listeners: + - allowedRoutes: + namespaces: + from: All + hostname: "*.scale-test-{{.Iteration}}.{{ .KUADRANT_ZONE_ROOT_DOMAIN }}" + name: api + port: 443 + protocol: HTTPS + tls: + mode: Terminate + certificateRefs: + - name: scale-test-{{.Iteration}} + kind: Secret diff --git a/scale_test/httpbin-deployment.yaml b/scale_test/httpbin-deployment.yaml new file mode 100644 index 00000000..291720af --- /dev/null +++ b/scale_test/httpbin-deployment.yaml @@ -0,0 +1,24 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: httpbin + labels: + app: scale-test +spec: + replicas: 1 + selector: + matchLabels: + app: scale-test + template: + metadata: + labels: + app: scale-test + spec: + containers: + - name: httpbin + image: 'quay.io/trepel/httpbin:jsmadis' + ports: + - name: api + containerPort: 8080 + protocol: TCP diff --git a/scale_test/httpbin-service.yaml b/scale_test/httpbin-service.yaml new file mode 100644 index 00000000..9819904b --- /dev/null +++ b/scale_test/httpbin-service.yaml @@ -0,0 +1,15 @@ +--- +kind: Service +apiVersion: v1 +metadata: + name: httpbin + labels: + app: scale-test +spec: + ports: + - name: http + protocol: TCP + port: 8080 + targetPort: api + selector: + app: scale-test diff --git a/scale_test/httproute-auth-policy.yaml b/scale_test/httproute-auth-policy.yaml new file mode 100644 index 00000000..4e90a869 --- /dev/null +++ b/scale_test/httproute-auth-policy.yaml @@ -0,0 +1,16 @@ +apiVersion: kuadrant.io/v1beta2 +kind: AuthPolicy +metadata: + name: httproute-auth-policy-{{.Iteration}} + labels: + app: scale-test +spec: + targetRef: + group: gateway.networking.k8s.io + kind: HTTPRoute + name: scale-test-{{.Iteration}} + rules: + authorization: + allow-all: + opa: + rego: "allow = true" diff --git a/scale_test/httproute-rlp.yaml b/scale_test/httproute-rlp.yaml new file mode 100644 index 00000000..89e34dcd --- /dev/null +++ b/scale_test/httproute-rlp.yaml @@ -0,0 +1,17 @@ +apiVersion: kuadrant.io/v1beta3 +kind: RateLimitPolicy +metadata: + name: httproute-rlp-{{.Iteration}} + labels: + app: scale-test +spec: + targetRef: + group: gateway.networking.k8s.io + kind: HTTPRoute + name: scale-test-{{.Iteration}} + limits: + "httproute-level": + rates: + - limit: 10 + duration: 10 + unit: second diff --git a/scale_test/httproute.yaml b/scale_test/httproute.yaml new file mode 100644 index 00000000..4454fea2 --- /dev/null +++ b/scale_test/httproute.yaml @@ -0,0 +1,24 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: scale-test-{{.Iteration}} + labels: + app: scale-test +spec: + parentRefs: + - group: gateway.networking.k8s.io + kind: Gateway + name: scale-test-{{.Iteration}} + hostnames: + - "api-{{.Iteration}}.scale-test-{{.Iteration}}.{{ .KUADRANT_ZONE_ROOT_DOMAIN }}" + rules: + - backendRefs: + - group: '' + kind: Service + name: httpbin + port: 8080 + weight: 1 + matches: + - path: + type: PathPrefix + value: / diff --git a/scale_test/metrics.yaml b/scale_test/metrics.yaml new file mode 100644 index 00000000..d0197ce8 --- /dev/null +++ b/scale_test/metrics.yaml @@ -0,0 +1,120 @@ +# API server +- query: histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb!~"WATCH", subresource!="log"}[2m])) by (verb,resource,subresource,instance,le)) > 0 + metricName: API99thLatency + +- query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH",subresource!="log"}[2m])) by (verb,instance,resource,code) > 0 + metricName: APIRequestRate + +- query: sum(apiserver_current_inflight_requests{}) by (request_kind) > 0 + metricName: APIInflightRequests + +# Containers & pod metrics +- query: sum(irate(container_cpu_usage_seconds_total{name!="",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler|monitoring|logging|image-registry)"}[2m]) * 100) by (pod, namespace, node) + metricName: podCPU + +- query: sum(container_memory_rss{name!="",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler|monitoring|logging|image-registry)"}) by (pod, namespace, node) + metricName: podMemory + +- query: (sum(rate(container_fs_writes_bytes_total{container!="",device!~".+dm.+"}[5m])) by (device, container, node) and on (node) kube_node_role{role="master"}) > 0 + metricName: containerDiskUsage + +# Kubelet & CRI-O metrics +- query: sum(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m]) * 100) by (node) and on (node) kube_node_role{role="worker"} + metricName: kubeletCPU + +- query: sum(process_resident_memory_bytes{service="kubelet",job="kubelet"}) by (node) and on (node) kube_node_role{role="worker"} + metricName: kubeletMemory + +- query: sum(irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m]) * 100) by (node) and on (node) kube_node_role{role="worker"} + metricName: crioCPU + +- query: sum(process_resident_memory_bytes{service="kubelet",job="crio"}) by (node) and on (node) kube_node_role{role="worker"} + metricName: crioMemory + +# Node metrics +- query: sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) > 0 + metricName: nodeCPU + +- query: avg(node_memory_MemAvailable_bytes) by (instance) + metricName: nodeMemoryAvailable + +- query: avg(node_memory_Active_bytes) by (instance) + metricName: nodeMemoryActive + +- query: avg(node_memory_Cached_bytes) by (instance) + avg(node_memory_Buffers_bytes) by (instance) + metricName: nodeMemoryCached+nodeMemoryBuffers + +- query: irate(node_network_receive_bytes_total{device=~"^(ens|eth|bond|team).*"}[2m]) + metricName: rxNetworkBytes + +- query: irate(node_network_transmit_bytes_total{device=~"^(ens|eth|bond|team).*"}[2m]) + metricName: txNetworkBytes + +- query: rate(node_disk_written_bytes_total{device!~"^(dm|rb).*"}[2m]) + metricName: nodeDiskWrittenBytes + +- query: rate(node_disk_read_bytes_total{device!~"^(dm|rb).*"}[2m]) + metricName: nodeDiskReadBytes + +- query: sum(rate(etcd_server_leader_changes_seen_total[2m])) + metricName: etcdLeaderChangesRate + +# Etcd metrics +- query: etcd_server_is_leader > 0 + metricName: etcdServerIsLeader + +- query: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m])) + metricName: 99thEtcdDiskBackendCommitDurationSeconds + +- query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m])) + metricName: 99thEtcdDiskWalFsyncDurationSeconds + +- query: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) + metricName: 99thEtcdRoundTripTimeSeconds + +- query: etcd_mvcc_db_total_size_in_bytes + metricName: etcdDBPhysicalSizeBytes + +- query: etcd_mvcc_db_total_size_in_use_in_bytes + metricName: etcdDBLogicalSizeBytes + +- query: sum(rate(etcd_object_counts{}[5m])) by (resource) > 0 + metricName: etcdObjectCount + +- query: sum by (cluster_version)(etcd_cluster_version) + metricName: etcdVersion + instant: true + +# Cluster metrics +- query: sum(kube_namespace_status_phase) by (phase) > 0 + metricName: namespaceCount + +- query: sum(kube_pod_status_phase{}) by (phase) + metricName: podStatusCount + +- query: count(kube_secret_info{}) + metricName: secretCount + +- query: count(kube_deployment_labels{}) + metricName: deploymentCount + +- query: count(kube_configmap_info{}) + metricName: configmapCount + +- query: count(kube_service_info{}) + metricName: serviceCount + +- query: count(openshift_route_created{}) + metricName: routeCount + instant: true + +- query: kube_node_role + metricName: nodeRoles + instant: true + +- query: sum(kube_node_status_condition{status="true"}) by (condition) + metricName: nodeStatus + +- query: cluster_version{type="completed"} + metricName: clusterVersion + instant: true diff --git a/scale_test/metrics/.gitignore b/scale_test/metrics/.gitignore new file mode 100644 index 00000000..d6b7ef32 --- /dev/null +++ b/scale_test/metrics/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/scale_test/readme.md b/scale_test/readme.md new file mode 100644 index 00000000..336bd8e5 --- /dev/null +++ b/scale_test/readme.md @@ -0,0 +1,67 @@ +# Control Plane Scale Test + +Control Plane scale testing via kube-burner utility + +## Prerequisities + +This test assumes that Kuadrant together with all the dependencies (Gateway API, Istio, Certificate Manager etc) is installed. A ClusterIssuer (self-signed one is enough) is expected to exist too. Also make sure to port-forward Prometheus instance so that it is possible for kube-burner to query it. + +The following env vars will need to be set to run the tests: + +``` +export KUADRANT_AWS_SECRET_ACCESS_KEY=[key] +export KUADRANT_AWS_ACCESS_KEY_ID=[id] +export KUADRANT_ZONE_ROOT_DOMAIN=[domain] +export KUADRANT_AWS_REGION=[region] +export PROMETHEUS_URL=http://127.0.0.1:9090 +export PROMETHEUS_TOKEN="" +export OS_INDEXING=true # if sending metrics to opensearch/elasticsearch +export ES_SERVER=https://[user]:[password]@[host]:[port] +``` + +## Execution + +`kube-burner init -c ./config.yaml --timeout 5m` + +## Setting up a local cluster for execution + +Follow the instructions in the Prerequisities section. + +Clone the [kuadrant-operator](https://github.com/Kuadrant/kuadrant-operator) repo: + +```bash +CONTAINER_ENGINE=podman make local-setup +``` + +Deploy the observability stack, as per the instructions in https://github.com/Kuadrant/kuadrant-operator/blob/main/config/observability/README.md + +Create the Kuadrant resource: + +```bash +kubectl create -f ./config/samples/kuadrant_v1beta1_kuadrant.yaml -n kuadrant-system +``` + +Create a ClusterIssuer: + +```bash +kubectl apply -f - <