From 379316552148fe2d5bf159c07f55dceb0427dbf5 Mon Sep 17 00:00:00 2001 From: Pavel Raiskup Date: Thu, 14 Sep 2023 09:52:02 +0200 Subject: [PATCH] backend: provide per-arch & per-owner worker limit The default "max_workers" limit might be too large (e.g. in Fedora Copr it is 45) compared to the number of available workers of some architecture type (e.g. for s390x there's now max 18 workers, and ppc64le is often limited too due to #2869). This patch then brings another FUP mitigation. --- backend/conf/copr-be.conf.example | 4 ++++ .../copr_backend/daemons/build_dispatcher.py | 12 +++++++++--- backend/copr_backend/helpers.py | 4 ++-- backend/copr_backend/rpm_builds.py | 19 ++++++++++++++++++- backend/tests/test_config_reader.py | 10 ++++++++-- backend/tests/test_worker_limits.py | 8 +++++++- 6 files changed, 48 insertions(+), 9 deletions(-) diff --git a/backend/conf/copr-be.conf.example b/backend/conf/copr-be.conf.example index 6210e060d..4f6e88077 100644 --- a/backend/conf/copr-be.conf.example +++ b/backend/conf/copr-be.conf.example @@ -42,6 +42,10 @@ sleeptime=30 # Maximum number of concurrently running tasks per architecture. #builds_max_workers_arch=x86_64=10,ppc64le=12 +# Maximum number of concurrent build workers per architecture and owner. For +# example, give at most 15 ppc64le and 10 s390x machines to one copr owner: +#build_max_workers_arch_per_owner=ppc64le=15,s390x=10 + # Maximum number of concurrently running tasks per project owner. #builds_max_workers_owner=20 diff --git a/backend/copr_backend/daemons/build_dispatcher.py b/backend/copr_backend/daemons/build_dispatcher.py index d074f0ff7..9bd699009 100644 --- a/backend/copr_backend/daemons/build_dispatcher.py +++ b/backend/copr_backend/daemons/build_dispatcher.py @@ -6,6 +6,7 @@ from copr_backend.dispatcher import BackendDispatcher from copr_backend.rpm_builds import ( ArchitectureWorkerLimit, + ArchitectureUserWorkerLimit, BuildTagLimit, RPMBuildWorkerManager, BuildQueueTask, @@ -83,9 +84,14 @@ def __init__(self, backend_opts): super().__init__(backend_opts) self.max_workers = backend_opts.builds_max_workers - for tag_type in ["arch", "tag"]: - lclass = ArchitectureWorkerLimit if tag_type == "arch" else \ - BuildTagLimit + for tag_type in ["arch", "tag", "arch_per_owner"]: + match tag_type: + case "arch": + lclass = ArchitectureWorkerLimit + case "tag": + lclass = BuildTagLimit + case "arch_per_owner": + lclass = ArchitectureUserWorkerLimit for tag, limit in backend_opts.builds_limits[tag_type].items(): self.log.info("setting %s(%s) limit to %s", tag_type, tag, limit) self.limits.append(lclass(tag, limit)) diff --git a/backend/copr_backend/helpers.py b/backend/copr_backend/helpers.py index 05348366d..75fa5e62d 100644 --- a/backend/copr_backend/helpers.py +++ b/backend/copr_backend/helpers.py @@ -215,9 +215,9 @@ def _get_limits_conf(parser): "option. Please use format: " "builds_max_workers_{0} = {1}1=COUNT,{1}2=COUNT") err2 = ("Duplicate left value '{}' in 'builds_max_workers_{}' configuration") - limits = {"arch": {}, "tag": {}} + limits = {"arch": {}, "tag": {}, "arch_per_owner": {}} - for config_type in ["arch", "tag"]: + for config_type in ["arch", "tag", "arch_per_owner"]: option = "builds_max_workers_{}".format(config_type) raw = _get_conf(parser, "backend", option, None) if raw: diff --git a/backend/copr_backend/rpm_builds.py b/backend/copr_backend/rpm_builds.py index 4694e3a63..c2ed145cf 100644 --- a/backend/copr_backend/rpm_builds.py +++ b/backend/copr_backend/rpm_builds.py @@ -2,7 +2,11 @@ Abstraction for RPM and SRPM builds on backend. """ -from copr_common.worker_manager import WorkerManager, PredicateWorkerLimit +from copr_common.worker_manager import ( + GroupWorkerLimit, + WorkerManager, + PredicateWorkerLimit, +) from copr_backend.worker_manager import BackendQueueTask from copr_backend.helpers import get_chroot_arch @@ -133,6 +137,19 @@ def predicate(x): super().__init__(predicate, limit, name="arch_{}".format(architecture)) +class ArchitectureUserWorkerLimit(GroupWorkerLimit): + """ + Limit number of machines of specific architecture we give to a single + Copr owner (user or group). + """ + def __init__(self, architecture, limit): + super().__init__( + lambda x: f"{x.requested_arch}_{x.owner}", + limit, + name=f"arch_{architecture}_owner", + ) + + class BuildTagLimit(PredicateWorkerLimit): """ Limit the amount of concurrently running builds per given build tag. diff --git a/backend/tests/test_config_reader.py b/backend/tests/test_config_reader.py index 5d525cbd2..2a7c338ed 100644 --- a/backend/tests/test_config_reader.py +++ b/backend/tests/test_config_reader.py @@ -37,7 +37,8 @@ def get_minimal_config_file(self): def test_minimal_file_and_defaults(self): opts = BackendConfigReader(self.get_minimal_config_file()).read() assert opts.destdir == "/tmp" - assert opts.builds_limits == {'arch': {}, 'tag': {}, 'owner': 20, 'sandbox': 10} + assert opts.builds_limits == {'arch': {}, 'tag': {}, 'owner': 20, + 'sandbox': 10, 'arch_per_owner': {}} def test_correct_build_limits(self): opts = BackendConfigReader( @@ -48,6 +49,7 @@ def test_correct_build_limits(self): "builds_max_workers_tag = Power9=9\n" "builds_max_workers_owner = 5\n" "builds_max_workers_sandbox = 3\n" + "builds_max_workers_arch_per_owner = ppc64le=11, s390x=5\n" ))).read() assert opts.builds_limits == { 'arch': { @@ -58,7 +60,11 @@ def test_correct_build_limits(self): 'Power9': 9, }, 'owner': 5, - 'sandbox': 3 + 'sandbox': 3, + 'arch_per_owner': { + 'ppc64le': 11, + 's390x': 5, + }, } @pytest.mark.parametrize("broken_config", [ diff --git a/backend/tests/test_worker_limits.py b/backend/tests/test_worker_limits.py index 742bfa872..ac90d3c66 100644 --- a/backend/tests/test_worker_limits.py +++ b/backend/tests/test_worker_limits.py @@ -12,6 +12,7 @@ from copr_backend.worker_manager import BackendQueueTask from copr_backend.rpm_builds import ( ArchitectureWorkerLimit, + ArchitectureUserWorkerLimit, BuildTagLimit, BuildQueueTask, ) @@ -23,16 +24,19 @@ }, { "build_id": 7, "task_id": "7-fedora-rawhide-x86_64", + "chroot": "fedora-rawhide-x86_64", "project_owner": "cecil", "sandbox": "sb1", }, { "build_id": 4, "task_id": "7-fedora-32-x86_64", + "chroot": "fedora-32-x86_64", "project_owner": "bedrich", "sandbox": "sb2", }, { "build_id": 4, "task_id": "7-fedora-31-x86_64", + "chroot": "fedora-31-x86_64", "project_owner": "bedrich", "sandbox": "sb2", "tags": ["special_requirement"], @@ -115,6 +119,7 @@ def test_worker_limit_info(): GroupWorkerLimit(lambda x: x.sandbox, 2, name='sandbox'), ArchitectureWorkerLimit("x86_64", 3), ArchitectureWorkerLimit("aarch64", 2), + ArchitectureUserWorkerLimit("aarch64", 2), BuildTagLimit("special_requirement", 1), ] tasks = [BuildQueueTask(t) for t in TASKS] @@ -128,8 +133,9 @@ def test_worker_limit_info(): 'w:7-fedora-32-x86_64, w:7-fedora-31-x86_64', "limit info: Unnamed 'GroupWorkerLimit' limit, counter: cecil=2, bedrich=2", "limit info: 'sandbox', counter: sb1=1, sb2=2", - "limit info: 'arch_x86_64'", + "limit info: 'arch_x86_64', matching: w:7-fedora-rawhide-x86_64, w:7-fedora-32-x86_64, w:7-fedora-31-x86_64", "limit info: 'arch_aarch64'", + "limit info: 'arch_aarch64_owner', counter: None_cecil=1, x86_64_cecil=1, x86_64_bedrich=2", "limit info: 'tag_special_requirement', matching: w:7-fedora-31-x86_64", ]