Skip to content

Commit

Permalink
v0.7.4 - External Service (#392)
Browse files Browse the repository at this point in the history
* TPC-DS: Queries updated

* Docs: Read current version number

* TPC-DS: More tests

* Bexhoma: Check endpoints for service not managed by bexhoma

* Bexhoma: Dummy Database Service template

* Bexhoma: Monitor daemonset or sidecar (not both)

* Bexhoma: Test results does not dump any results themselves

* Bexhoma: Show more infos about monitoring processes

* Docs: Notes about Cloud databases

* Example: Cloud Database PostgreSQL compatible

* Docs: Notes about Cloud databases

* Docs: Some more tests

* Bexhoma: config.monitoring_sut to deactivate monitoring of SUT for services outside of K8s

* Bexhoma: Test monitoring tries to fetch sum of memory 5min ago

* TPC-H: for Cloud Service

* Bexhoma: Test monitoring follows redirects

* TPC-H: PostgreSQL loader based on alpine

* More tests

* Bexhoma: Evaluator alpine-based

* Database Service: Dummy template for SUT

* Bexhoma: Alpine and Python 3.12.8 as basis for dbmsbenchmarker

* Dev: Test for summary markdown

* Docs: Notes about cloud databases

* Bexhoma: Debug of monitoring health test shows http status code

* Bexhoma: Back to debian variant for dbmsbenchmarker (Java issues) - EOL

* Docs: Some improvements

* Database Service: More tests

* Docs: Some improvements

* Tests: Clean test cases

* Docs: Some improvements

* Tests: Clean test cases

* Bexhoma: Use simple shell instead of bash to get host infos

* Tests: Clean test cases

* Docs: YugabyteDB changes to monitoring and host info retrieval

* Docs: Status of CockroachDB

* Tests: Clean test cases clouds
  • Loading branch information
perdelt authored Dec 20, 2024
1 parent a01dd09 commit b06d20b
Show file tree
Hide file tree
Showing 85 changed files with 21,615 additions and 1,708 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ docs/html/*
api/*
bexhoma/__pycache__/*
/cluster-monitoring-default.config
logs_tests/local/*
86 changes: 74 additions & 12 deletions benchbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
parser = argparse.ArgumentParser(description=description)
parser.add_argument('mode', help='start sut, also load data or also run the TPC-C queries', choices=['run', 'start', 'load'])
parser.add_argument('-aws', '--aws', help='fix components to node groups at AWS', action='store_true', default=False)
parser.add_argument('-dbms','--dbms', help='DBMS to load the data', choices=['PostgreSQL', 'MySQL', 'MariaDB', 'YugabyteDB', 'CockroachDB'], default=[], action='append')
parser.add_argument('-dbms','--dbms', help='DBMS to load the data', choices=['PostgreSQL', 'MySQL', 'MariaDB', 'YugabyteDB', 'CockroachDB', 'DatabaseService'], default=[], action='append')
parser.add_argument('-db', '--debug', help='dump debug informations', action='store_true')
parser.add_argument('-sl', '--skip-loading', help='do not ingest, start benchmarking immediately', action='store_true', default=False)
parser.add_argument('-cx', '--context', help='context of Kubernetes (for a multi cluster environment), default is current context', default=None)
Expand Down Expand Up @@ -344,23 +344,27 @@
if skip_loading:
config.loading_deactivated = True
config.sut_service_name = "yb-tserver-service" # fix service name of SUT, because it is not managed by bexhoma
config.sut_container_name = "yb-tserver" # fix container name of SUT
config.sut_container_name = '' # fix container name of SUT
def get_worker_pods(self):
"""
Returns a list of all pod names of workers for the current SUT.
Default is component name is 'worker' for a bexhoma managed DBMS.
This is used for example to find the pods of the workers in order to get the host infos (CPU, RAM, node name, ...).
YugabyteDB: This is yb-tserver-0, -1 etc.
:return: list of endpoints
"""
pods_worker = self.experiment.cluster.get_pods(component='worker', configuration=self.configuration, experiment=self.code)
pods_worker = ['yb-tserver-0', 'yb-tserver-1', 'yb-tserver-2']
#pods_worker = self.experiment.cluster.get_pods(app='', component='', configuration='yb-tserver', experiment='')
#print("****************", pods_worker)
return pods_worker
#config.get_worker_pods = types.MethodType(get_worker_pods, config)
config.get_worker_pods = types.MethodType(get_worker_pods, config)
def create_monitoring(self, app='', component='monitoring', experiment='', configuration=''):
"""
Generate a name for the monitoring component.
This is used in a pattern for promql.
Basically this is `{app}-{component}-{configuration}-{experiment}-{client}`.
For YugabyteDB, the service to be monitored is named like 'yb-tserver-'.
For YugabyteDB, the service of the SUT to be monitored is named like 'yb-tserver-'.
:param app: app the component belongs to
:param component: Component, for example sut or monitoring
Expand All @@ -379,28 +383,37 @@ def get_worker_endpoints(self):
Returns all endpoints of a headless service that monitors nodes of a distributed DBMS.
These are IPs of cAdvisor instances.
The endpoint list is to be filled in a config of an instance of Prometheus.
For YugabyteDB the service is fixed to be 'bexhoma-service-monitoring-default' and does not depend on the experiment.
By default, the workers can be found by the name of their component (worker-0 etc).
This is neccessary, when we have sidecar containers attached to workers of a distributed dbms.
:return: list of endpoints
"""
endpoints = self.experiment.cluster.get_service_endpoints(service_name="bexhoma-service-monitoring-default")
endpoints = []
#name_worker = self.generate_component_name(component='worker', configuration=self.configuration, experiment=self.code)
pods_worker = self.get_worker_pods()
for pod in pods_worker:
#endpoint = '{worker}.{service_sut}'.format(worker=pod, service_sut=name_worker)
endpoint = '{worker}'.format(worker=pod)
endpoints.append(endpoint)
print('Worker Endpoint: {endpoint}'.format(endpoint = endpoint))
self.logger.debug("yugabytedb.get_worker_endpoints({})".format(endpoints))
return endpoints
#config.get_worker_endpoints = types.MethodType(get_worker_endpoints, config)
config.get_worker_endpoints = types.MethodType(get_worker_endpoints, config)
def set_metric_of_config(self, metric, host, gpuid):
"""
Returns a promql query.
Parameters in this query are substituted, so that prometheus finds the correct metric.
Example: In 'sum(irate(container_cpu_usage_seconds_total{{container_label_io_kubernetes_pod_name=~"(.*){configuration}-{experiment}(.*)", container_label_io_kubernetes_pod_name=~"(.*){configuration}-{experiment}(.*)", container_label_io_kubernetes_container_name="dbms"}}[1m]))'
configuration and experiment are placeholders and will be replaced by concrete values.
Here: We do not have a SUT that is specific to the experiment or configuration.
YugabyteDB: We do not have a SUT that is specific to the experiment or configuration. The pod names follow a pattern like yb-tserver and there is no container name.
:param metric: Parametrized promql query
:param host: Name of the host the metrics should be collected from
:param gpuid: GPU that the metrics should watch
:return: promql query without parameters
"""
metric = metric.replace(', container="dbms"', '')
metric = metric.replace(', container_label_io_kubernetes_container_name="dbms"', '')
return metric.format(host=host, gpuid=gpuid, configuration='yb-tserver', experiment='')
config.set_metric_of_config = types.MethodType(set_metric_of_config, config)
config.set_loading_parameters(
Expand Down Expand Up @@ -452,8 +465,6 @@ def set_metric_of_config(self, metric, host, gpuid):
)
#print(executor_list)
config.add_benchmark_list(executor_list)
#print(executor_list)
config.add_benchmark_list(executor_list)
cluster.max_sut = 1 # can only run 1 in same cluster because of fixed service
if ("CockroachDB" in args.dbms):# or len(args.dbms) == 0): # not included per default
# CockroachDB
Expand Down Expand Up @@ -508,9 +519,60 @@ def set_metric_of_config(self, metric, host, gpuid):
)
#print(executor_list)
config.add_benchmark_list(executor_list)
#cluster.max_sut = 1 # can only run 1 in same cluster because of fixed service
if ("DatabaseService" in args.dbms):# or len(args.dbms) == 0): # not included per default
# DatabaseService
name_format = 'DatabaseService-{threads}-{pods}-{target}'
config = configurations.benchbase(experiment=experiment, docker='DatabaseService', configuration=name_format.format(threads=loading_threads, pods=loading_pods, target=loading_target), alias='DatabaseService')
config.monitoring_sut = False # cannot be monitored since outside of K8s
if skip_loading:
config.loading_deactivated = True
config.set_loading_parameters(
PARALLEL = str(loading_pods), # =1
SF = SF,
BENCHBASE_BENCH = type_of_benchmark,#'tpcc',
BENCHBASE_PROFILE = 'postgres',
BEXHOMA_DATABASE = 'postgres',
BEXHOMA_HOST = 'bexhoma-service.perdelt.svc.cluster.local',
#BENCHBASE_TARGET = int(target),
BENCHBASE_TERMINALS = loading_threads_per_pod,
BENCHBASE_TIME = SD,
BENCHBASE_ISOLATION = "TRANSACTION_READ_COMMITTED",
)
config.set_loading(parallel=loading_pods, num_pods=loading_pods)
executor_list = []
for factor_benchmarking in num_benchmarking_target_factors:#range(1, 9):#range(1, 2):#range(1, 15):
benchmarking_target = target_base*factor_benchmarking#4*4096*t
for benchmarking_threads in num_benchmarking_threads:
for benchmarking_pods in num_benchmarking_pods:#[1,2]:#[1,8]:#range(2,5):
for num_executor in list_clients:
benchmarking_pods_scaled = num_executor*benchmarking_pods
benchmarking_threads_per_pod = int(benchmarking_threads/benchmarking_pods)
benchmarking_target_per_pod = int(benchmarking_target/benchmarking_pods)
"""
print("benchmarking_target", benchmarking_target)
print("benchmarking_pods", benchmarking_pods)
print("benchmarking_pods_scaled", benchmarking_pods_scaled)
print("benchmarking_threads", benchmarking_threads)
print("benchmarking_threads_per_pod", benchmarking_threads_per_pod)
print("benchmarking_target_per_pod", benchmarking_target_per_pod)
"""
executor_list.append(benchmarking_pods_scaled)
config.add_benchmarking_parameters(
PARALLEL = str(benchmarking_pods_scaled),
SF = SF,
BENCHBASE_BENCH = type_of_benchmark,#'tpcc',
BENCHBASE_PROFILE = 'postgres',
BEXHOMA_DATABASE = 'postgres',
BEXHOMA_HOST = 'bexhoma-service.perdelt.svc.cluster.local',
BENCHBASE_TARGET = benchmarking_target_per_pod,
BENCHBASE_TERMINALS = benchmarking_threads_per_pod,
BENCHBASE_TIME = SD,
BENCHBASE_ISOLATION = "TRANSACTION_READ_COMMITTED",
)
#print(executor_list)
config.add_benchmark_list(executor_list)
cluster.max_sut = 1 # can only run 1 in same cluster because of fixed service
#cluster.max_sut = 1 # can only run 1 in same cluster because of fixed service
##############
### wait for necessary nodegroups to have planned size
##############
Expand Down
17 changes: 12 additions & 5 deletions bexhoma/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import urllib.request
import urllib.parse
from pprint import pprint
from datetime import datetime, timedelta

from dbmsbenchmarker import *

Expand Down Expand Up @@ -865,9 +866,9 @@ def execute_command_in_pod(self, command, pod='', container='', params=''):
#pod = self.activepod
command_clean = command.replace('"','\\"')
if len(container) > 0:
fullcommand = 'kubectl --context {context} exec {pod} --container={container} -- bash -c "{command}"'.format(context=self.context, pod=pod, container=container, command=command_clean)
fullcommand = 'kubectl --context {context} exec {pod} --container={container} -- sh -c "{command}"'.format(context=self.context, pod=pod, container=container, command=command_clean)
else:
fullcommand = 'kubectl --context {context} exec {pod} -- bash -c "{command}"'.format(context=self.context, pod=pod, command=command_clean)
fullcommand = 'kubectl --context {context} exec {pod} -- sh -c "{command}"'.format(context=self.context, pod=pod, command=command_clean)
#fullcommand = 'kubectl exec '+self.activepod+' --container=dbms -- bash -c "'+command_clean+'"'
#print(fullcommand)
self.logger.debug('testbed.execute_command_in_pod({})'.format(fullcommand))
Expand Down Expand Up @@ -1335,22 +1336,28 @@ def test_if_monitoring_healthy(self):
config_K8s = self.config['credentials']['k8s']
if 'service_monitoring' in config_K8s['monitor']:
url = config_K8s['monitor']['service_monitoring'].format(namespace=self.contextdata['namespace'], service="monitoring")
query = "node_memory_MemTotal_bytes"
query = "sum(node_memory_MemTotal_bytes)"
safe_query = urllib.parse.quote_plus(query)
try:
self.logger.debug('Test URL {}'.format(url+"query_range?query="+safe_query+"&start=1&end=2&step=1"))
#code= urllib.request.urlopen(url+"query_range?query="+safe_query+"&start=1&end=2&step=1").getcode()
# curl -ILs www.welt.de | head -n 1|cut -d$' ' -f2
pod_dashboard = self.get_dashboard_pod_name()
self.logger.debug('Inside pod {}'.format(pod_dashboard))
now = datetime.utcnow()
start = now - timedelta(seconds=300) # 5 minutes ago
end = now - timedelta(seconds=240) # 4 minutes ago
cmd = {}
command = "curl -is '{}' | head -n 1|cut -d$' ' -f2".format(url+"query_range?query="+safe_query+"&start=1&end=2&step=1")
query_url = "{url}query_range?query={safe_query}&start={start}&end={end}&step=60".format(url=url, safe_query=safe_query, start=int(start.timestamp()), end=int(end.timestamp()))
self.logger.debug('Test URL {}'.format(query_url))
command = "curl -L --max-time 10 -is '{}' | head -n 1|cut -d$' ' -f2".format(query_url)
#command = "curl -is '{}' | head -n 1|cut -d$' ' -f2".format(url+"query_range?query="+safe_query+"&start=1&end=2&step=1")
self.logger.debug('Command {}'.format(command))
#fullcommand = 'kubectl exec '+self.pod_sut+' --container=dbms -- bash -c "'+command+'"'
#cores = os.popen(fullcommand).read()
stdin, stdout, stderr = self.execute_command_in_pod(pod=pod_dashboard, command=command, container="dashboard")
#print("Return", stdout, stderr)
status = stdout#os.popen(fullcommand).read()
self.logger.debug('Status {}'.format(status))
if len(status)>0:
#return int(status)
#print(int(status))
Expand Down
Loading

0 comments on commit b06d20b

Please sign in to comment.