From 105aaf33793d354515ebb187b34339a604695d8d Mon Sep 17 00:00:00 2001 From: Paul Nilsson Date: Thu, 22 Aug 2024 15:12:30 +0200 Subject: [PATCH] Now reading base URLs from file --- PILOTVERSION | 2 +- pilot.py | 14 ++++++++-- pilot/user/atlas/setup.py | 7 +++++ pilot/user/rubin/setup.py | 13 +++++++-- pilot/util/default.cfg | 3 +++ pilot/util/filehandling.py | 55 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 5 deletions(-) diff --git a/PILOTVERSION b/PILOTVERSION index 8c96a30d..1b582d2a 100644 --- a/PILOTVERSION +++ b/PILOTVERSION @@ -1 +1 @@ -3.8.1.50 \ No newline at end of file +3.8.1.51 \ No newline at end of file diff --git a/pilot.py b/pilot.py index be691250..45e48ebd 100755 --- a/pilot.py +++ b/pilot.py @@ -62,6 +62,7 @@ from pilot.util.filehandling import ( get_pilot_work_dir, mkdirs, + store_base_urls ) from pilot.util.harvester import ( is_harvester_mode, @@ -364,8 +365,6 @@ def get_args() -> Any: required=False, # From v 2.2.1 the site name is internally set help="OBSOLETE: site name (e.g., AGLT2_TEST)", ) - - # graciously stop pilot process after hard limit arg_parser.add_argument( "-j", "--joblabel", @@ -373,6 +372,13 @@ def get_args() -> Any: default="ptest", help="Job prod/source label (default: ptest)", ) + arg_parser.add_argument( + "-g", + "--baseurls", + dest="baseurls", + default="", + help="Comma separated list of base URLs for validation of trf download", + ) # pilot version tag; PR or RC arg_parser.add_argument( @@ -946,6 +952,10 @@ def list_zombies(): add_to_pilot_timing("0", PILOT_START_TIME, time.time(), args) add_to_pilot_timing("1", PILOT_MULTIJOB_START_TIME, time.time(), args) + # store base URLs in a file if set + if args.baseurls: + store_base_urls(args.baseurls) + # if requested by the wrapper via a pilot option, create the main pilot workdir and cd into it args.sourcedir = getcwd() # get_pilot_source_dir() diff --git a/pilot/user/atlas/setup.py b/pilot/user/atlas/setup.py index 831b7c74..a0140015 100644 --- a/pilot/user/atlas/setup.py +++ b/pilot/user/atlas/setup.py @@ -42,6 +42,7 @@ from pilot.util.filehandling import ( copy, head, + read_base_urls, read_file, write_file, ) @@ -432,6 +433,12 @@ def get_valid_base_urls(order: str = None) -> list: valid_base_urls.append(f"http://{base_url}") valid_base_urls.append(f"https://{base_url}") + # add further URLs in case baseurls.txt file exist (URLs specified with option --baseurls) + urls = read_base_urls() + if urls: + for url in urls: + valid_base_urls.append(url) + if order: valid_base_urls = [order] + [url for url in valid_base_urls if url != order] diff --git a/pilot/user/rubin/setup.py b/pilot/user/rubin/setup.py index ecda2a00..268c70e0 100644 --- a/pilot/user/rubin/setup.py +++ b/pilot/user/rubin/setup.py @@ -28,8 +28,11 @@ from pilot.common.errorcodes import ErrorCodes from pilot.util.auxiliary import find_pattern_in_list from pilot.util.container import execute -from pilot.util.filehandling import copy, head - +from pilot.util.filehandling import ( + copy, + head, + read_base_urls +) import logging logger = logging.getLogger(__name__) @@ -131,6 +134,12 @@ def get_valid_base_urls(order: str = None) -> list: valid_base_urls.append(f"http://{base_url}") valid_base_urls.append(f"https://{base_url}") + # add further URLs in case baseurls.txt file exist (URLs specified with option --baseurls) + urls = read_base_urls() + if urls: + for url in urls: + valid_base_urls.append(url) + if order: valid_base_urls = [order] + [url for url in valid_base_urls if url != order] diff --git a/pilot/util/default.cfg b/pilot/util/default.cfg index 55f0b68c..ff3eccf3 100644 --- a/pilot/util/default.cfg +++ b/pilot/util/default.cfg @@ -234,6 +234,9 @@ error_report: payload_error_report.json # These are the maximum memory limits for the various resource types (in MB) memory_limits = {'MCORE': 1001, 'MCORE_HIMEM': 2001, 'MCORE_LOMEM': None, 'SCORE': 1001, 'SCORE_HIMEM': 2001, 'SCORE_LOMEM': None} +# Filename for storing base URLs for payload verification, set via pilot option --baseurls +baseurls: baseurls.txt + ################################ # Container parameters diff --git a/pilot/util/filehandling.py b/pilot/util/filehandling.py index fc9a1a65..56ecfc0f 100644 --- a/pilot/util/filehandling.py +++ b/pilot/util/filehandling.py @@ -45,6 +45,7 @@ from zlib import adler32 from pilot.common.exception import ConversionFailure, FileHandlingFailure, MKDirFailure, NoSuchFile +from pilot.util.config import config from .container import execute from .math import diff_lists @@ -1396,3 +1397,57 @@ def rename(from_name: str, to_name: str) -> bool: logger.warning(f"an error occurred while processing the file: {exc}") return status + + +def get_baseurls_filename() -> str: + """ + Get the base URLs filename. + + :return: base URLs filename (str). + """ + try: + return config.Payload.baseurls + except AttributeError: + return "baseurls.txt" + + +def store_base_urls(baseurls: str): + """ + Store the base URLs for trf verification to a file, if args.baseurls is set. + + :param baseurls: base URLs (str). + """ + if baseurls: + filename = get_baseurls_filename() + path = os.path.join(os.environ.get("PILOT_HOME"), filename) + try: + with open(path, "w", encoding="utf-8") as f: + f.write(baseurls) + except IOError as exc: + logger.warning(f"failed to write base URLs to file: {exc}") + else: + logger.info(f"wrote base URLs to {path}") + + +def read_base_urls() -> list: + """ + Read the base URLs from the baseurls file. + + :return: list of base URLs (list). + """ + baseurls = [] + filename = get_baseurls_filename() + path = os.path.join(os.environ.get("PILOT_HOME"), filename) + try: + with open(path, "r", encoding="utf-8") as f: + # read the base URLs from the file + # the URL list is a comma separated string on a single line + _baseurls = f.read().strip + # convert the string to a list + baseurls = _baseurls.split(",") + except IOError as exc: + logger.warning(f"failed to read base URLs from file: {exc}") + else: + logger.info(f"read base URLs from {path}") + + return baseurls