From b336697cb80f5f6a95fd80819dbc20323b3cf5d7 Mon Sep 17 00:00:00 2001
From: Kenneth Hoste <kenneth.hoste@ugent.be>
Date: Fri, 24 Nov 2023 23:49:48 +0100
Subject: [PATCH 1/3] add support for specifying bucket name as mapping with
 repo id as key

---
 tasks/build.py  | 13 +++++++------
 tasks/deploy.py | 24 +++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/tasks/build.py b/tasks/build.py
index 7750da02..a486510b 100644
--- a/tasks/build.py
+++ b/tasks/build.py
@@ -38,6 +38,7 @@
 BUILD_JOB_SCRIPT = "build_job_script"
 BUILD_LOGS_DIR = "build_logs_dir"
 BUILD_PERMISSION = "build_permission"
+CFG_DIRNAME = "cfg"
 CONTAINER_CACHEDIR = "container_cachedir"
 CVMFS_CUSTOMIZATIONS = "cvmfs_customizations"
 DEFAULT_JOB_TIME_LIMIT = "24:00:00"
@@ -47,6 +48,7 @@
 INITIAL_COMMENT = "initial_comment"
 JOBS_BASE_DIR = "jobs_base_dir"
 JOB_ARCHITECTURE = "architecture"
+JOB_CFG_FILENAME = "job.cfg"
 JOB_CONTAINER = "container"
 JOB_LOCAL_TMP = "local_tmp"
 JOB_HTTPS_PROXY = "https_proxy"
@@ -64,7 +66,6 @@
 LOCAL_TMP = "local_tmp"
 NO_BUILD_PERMISSION_COMMENT = "no_build_permission_comment"
 REPOS_CFG_DIR = "repos_cfg_dir"
-REPOS_ID = "repo_id"
 REPOS_REPO_NAME = "repo_name"
 REPOS_REPO_VERSION = "repo_version"
 REPOS_CONFIG_BUNDLE = "config_bundle"
@@ -198,8 +199,8 @@ def get_repo_cfg(cfg):
         (dict): dictionary containing repository settings as follows
            - {REPOS_CFG_DIR: path to repository config directory as defined in 'app.cfg'}
            - {REPO_TARGET_MAP: json of REPO_TARGET_MAP value as defined in 'app.cfg'}
-           - for all sections [REPO_ID] defined in REPOS_CFG_DIR/repos.cfg add a
-             mapping {REPO_ID: dictionary containing settings of that section}
+           - for all sections [JOB_REPO_ID] defined in REPOS_CFG_DIR/repos.cfg add a
+             mapping {JOB_REPO_ID: dictionary containing settings of that section}
     """
     fn = sys._getframe().f_code.co_name
 
@@ -489,7 +490,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir,
     """
     fn = sys._getframe().f_code.co_name
 
-    jobcfg_dir = os.path.join(job_dir, 'cfg')
+    jobcfg_dir = os.path.join(job_dir, CFG_DIRNAME)
     # create ini file job.cfg with entries:
     # [site_config]
     # local_tmp = LOCAL_TMP_VALUE
@@ -498,7 +499,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir,
     #
     # [repository]
     # repos_cfg_dir = JOB_CFG_DIR
-    # repo_id = REPO_ID
+    # repo_id = JOB_REPO_ID
     # container = CONTAINER
     # repo_name = REPO_NAME
     # repo_version = REPO_VERSION
@@ -555,7 +556,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir,
     # make sure that <jobcfg_dir> exists
     os.makedirs(jobcfg_dir, exist_ok=True)
 
-    jobcfg_file = os.path.join(jobcfg_dir, 'job.cfg')
+    jobcfg_file = os.path.join(jobcfg_dir, JOB_CFG_FILENAME)
     with open(jobcfg_file, "w") as jcf:
         job_cfg.write(jcf)
 
diff --git a/tasks/deploy.py b/tasks/deploy.py
index 0e76e67f..f1eeded5 100644
--- a/tasks/deploy.py
+++ b/tasks/deploy.py
@@ -22,6 +22,7 @@
 
 # Local application imports (anything from EESSI/eessi-bot-software-layer)
 from connections import github
+from tasks.build import CFG_DIRNAME, JOB_CFG_FILENAME, JOB_REPO_ID, JOB_REPOSITORY
 from tasks.build import get_build_env_cfg
 from tools import config, pr_comments, run_cmd
 
@@ -255,7 +256,28 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number):
     deploycfg = cfg[DEPLOYCFG]
     tarball_upload_script = deploycfg.get(TARBALL_UPLOAD_SCRIPT)
     endpoint_url = deploycfg.get(ENDPOINT_URL) or ''
-    bucket_name = deploycfg.get(BUCKET_NAME)
+    bucket_spec = deploycfg.get(BUCKET_NAME)
+
+    jobcfg_path = os.path.join(job_dir, CFG_DIRNAME, JOB_CFG_FILENAME)
+    jobcfg = config.read_config(jobcfg_path)
+    target_repo_id = jobcfg[JOB_REPOSITORY][JOB_REPO_ID]
+
+    if isinstance(bucket_spec, str):
+        bucket_name = bucket_spec
+        log(f"Using specified bucket: {bucket_name}")
+    elif isinstance(bucket_spec, dict):
+        # bucket spec may be a mapping of target repo id to bucket name
+        bucket_name = bucket_spec.get(target_repo_id)
+        if bucket_name is None:
+            update_pr_comment(tarball, repo_name, pr_number, "not uploaded",
+                              f"failed (no bucket specified for {target_repo_id})")
+            return
+        else:
+            log(f"Using bucket for {target_repo_id}: {bucket_name}")
+    else:
+        update_pr_comment(tarball, repo_name, pr_number, "not uploaded",
+                          f"failed (incorrect bucket spec: {bucket_spec})")
+        return
 
     # run 'eessi-upload-to-staging {abs_path}'
     # (1) construct command line

From 9d7a4b76690b72550a2caa04accf667e86719857 Mon Sep 17 00:00:00 2001
From: Kenneth Hoste <kenneth.hoste@ugent.be>
Date: Sat, 25 Nov 2023 00:05:25 +0100
Subject: [PATCH 2/3] update app.cfg.example + README to mention that
 bucket_name can also be a mapping of target repo id to bucket name

---
 README.md       | 18 ++++++++++++++++--
 app.cfg.example |  5 ++++-
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 317dc921..5a601279 100644
--- a/README.md
+++ b/README.md
@@ -402,10 +402,24 @@ endpoint_url = URL_TO_S3_SERVER
 ```
 `endpoint_url` provides an endpoint (URL) to a server hosting an S3 bucket. The server could be hosted by a commercial cloud provider like AWS or Azure, or running in a private environment, for example, using Minio. The bot uploads tarballs to the bucket which will be periodically scanned by the ingestion procedure at the Stratum 0 server.
 
+
+```ini
+# example: same bucket for all target repos
+bucket_name = "eessi-staging"
 ```
-bucket_name = eessi-staging
+```ini
+# example: bucket to use depends on target repo
+bucket_name = {
+    "eessi-pilot-2023.06": "eessi-staging-2023.06",
+    "eessi.io-2023.06": "software.eessi.io-2023.06",
+}
 ```
-`bucket_name` is the name of the bucket used for uploading of tarballs. The bucket must be available on the default server (`https://${bucket_name}.s3.amazonaws.com`), or the one provided via `endpoint_url`.
+
+`bucket_name` is the name of the bucket used for uploading of tarballs.
+The bucket must be available on the default server (`https://${bucket_name}.s3.amazonaws.com`), or the one provided via `endpoint_url`.
+
+`bucket_name` can be specified as a string value to use the same bucket for all target repos, or it can be mapping from target repo id to bucket name.
+
 
 ```
 upload_policy = once
diff --git a/app.cfg.example b/app.cfg.example
index 922df7ae..7d647980 100644
--- a/app.cfg.example
+++ b/app.cfg.example
@@ -123,7 +123,10 @@ tarball_upload_script = PATH_TO_EESSI_BOT/scripts/eessi-upload-to-staging
 # - The latter variant is used for AWS S3 services.
 endpoint_url = URL_TO_S3_SERVER
 
-# bucket name
+# bucket name:
+# can be a string value, to always use same bucket regardless of target repo,
+# or can be a mapping of target repo id (see also repo_target_map) to bucket name
+# like: bucket_name = {"eessi-pilot-2023.06": "eessi-staging-pilot-2023.06", "eessi.io-2023.06": "software.eessi.io-2023.06"}
 bucket_name = eessi-staging
 
 # upload policy: defines what policy is used for uploading built artefacts

From dfc9baa5f910ab852f46387e87e4ca1444dc5955 Mon Sep 17 00:00:00 2001
From: Kenneth Hoste <kenneth.hoste@ugent.be>
Date: Sat, 25 Nov 2023 17:08:51 +0100
Subject: [PATCH 3/3] parse bucket_name value as Python dictionary if it starts
 with '{'

---
 tasks/deploy.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tasks/deploy.py b/tasks/deploy.py
index f1eeded5..39b85a7b 100644
--- a/tasks/deploy.py
+++ b/tasks/deploy.py
@@ -13,6 +13,7 @@
 # Standard library imports
 from datetime import datetime, timezone
 import glob
+import json
 import os
 import re
 import sys
@@ -258,6 +259,10 @@ def upload_tarball(job_dir, build_target, timestamp, repo_name, pr_number):
     endpoint_url = deploycfg.get(ENDPOINT_URL) or ''
     bucket_spec = deploycfg.get(BUCKET_NAME)
 
+    # if bucket_spec value looks like a dict, try parsing it as such
+    if bucket_spec.lstrip().startswith('{'):
+        bucket_spec = json.loads(bucket_spec)
+
     jobcfg_path = os.path.join(job_dir, CFG_DIRNAME, JOB_CFG_FILENAME)
     jobcfg = config.read_config(jobcfg_path)
     target_repo_id = jobcfg[JOB_REPOSITORY][JOB_REPO_ID]