From 7df82728995b2252ed015cc997d2256e4e9195b8 Mon Sep 17 00:00:00 2001 From: Paul Nilsson Date: Tue, 14 Dec 2021 12:09:37 +0100 Subject: [PATCH 1/3] Always adding payload env vars. Many updates to real-time logger --- PILOTVERSION | 2 +- pilot/user/atlas/common.py | 9 +++------ pilot/util/constants.py | 2 +- pilot/util/realtimelogger.py | 17 +++++++++++------ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/PILOTVERSION b/PILOTVERSION index eea64b135..999fce2f6 100644 --- a/PILOTVERSION +++ b/PILOTVERSION @@ -1 +1 @@ -3.1.0.60 \ No newline at end of file +3.1.0.61 \ No newline at end of file diff --git a/pilot/user/atlas/common.py b/pilot/user/atlas/common.py index 0b8e02b8b..2e4e6271e 100644 --- a/pilot/user/atlas/common.py +++ b/pilot/user/atlas/common.py @@ -444,12 +444,9 @@ def get_payload_command(job): if not cmd.endswith(';'): cmd += '; ' - # only if not using a user container - if not job.imagename: - site = os.environ.get('PILOT_SITENAME', '') - variables = get_payload_environment_variables( - cmd, job.jobid, job.taskid, job.attemptnr, job.processingtype, site, userjob) - cmd = ''.join(variables) + cmd + site = os.environ.get('PILOT_SITENAME', '') + variables = get_payload_environment_variables(cmd, job.jobid, job.taskid, job.attemptnr, job.processingtype, site, userjob) + cmd = ''.join(variables) + cmd # prepend PanDA job id in case it is not there already (e.g. runcontainer jobs) if 'export PandaID' not in cmd: diff --git a/pilot/util/constants.py b/pilot/util/constants.py index 619b0aa69..3e38dd33e 100644 --- a/pilot/util/constants.py +++ b/pilot/util/constants.py @@ -14,7 +14,7 @@ RELEASE = '3' # released number should be fixed at 3 for Pilot 3 VERSION = '1' # version number is '1' for first release, '0' until then, increased for bigger updates REVISION = '0' # revision number should be reset to '0' for every new version release, increased for small updates -BUILD = '60' # build number should be reset to '1' for every new development cycle +BUILD = '61' # build number should be reset to '1' for every new development cycle SUCCESS = 0 FAILURE = 1 diff --git a/pilot/util/realtimelogger.py b/pilot/util/realtimelogger.py index 4bebe3ef3..a99acc17d 100644 --- a/pilot/util/realtimelogger.py +++ b/pilot/util/realtimelogger.py @@ -47,7 +47,7 @@ class RealTimeLogger(Logger): logfiles_default = [] openfiles = {} - def __init__(self, args, info_dic): + def __init__(self, args, info_dic, level=INFO): """ Default init function. @@ -60,10 +60,11 @@ def __init__(self, args, info_dic): :param args: pilot arguments object. :param info_dic: info dictionary. + :param level: logging level (constant). :return: """ - super(RealTimeLogger, self).__init__(name="realTimeLogger", level=INFO) + super(RealTimeLogger, self).__init__(name="realTimeLogger", level=level) RealTimeLogger.glogger = self if not info_dic: @@ -88,6 +89,8 @@ def __init__(self, args, info_dic): _handler = None try: + server = 'aipanda020.cern.ch' + port = 8443 if logtype == "google-cloud-logging": import google.cloud.logging from google.cloud.logging_v2.handlers import CloudLoggingHandler @@ -104,16 +107,18 @@ def __init__(self, args, info_dic): server, port, timeout=5.0, - ssl_verify=False, - ssl_enable=False, + ssl_enable=True, + ssl_verify=True, + user='pilot', + password='XXX' ) # Create the handler _handler = AsynchronousLogstashHandler( host=server, port=port, transport=transport, - ssl_enable=False, - ssl_verify=False, + ssl_enable=True, + ssl_verify=True, database_path='logstash_test.db') else: logger.warning(f'unknown logtype: {logtype}') From 935ec52a6ea91f03674431ef42d2af786942d998 Mon Sep 17 00:00:00 2001 From: Paul Nilsson Date: Tue, 14 Dec 2021 13:56:15 +0100 Subject: [PATCH 2/3] Turned off SSL enabling and verification --- PILOTVERSION | 2 +- pilot/util/constants.py | 2 +- pilot/util/realtimelogger.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/PILOTVERSION b/PILOTVERSION index 999fce2f6..6d941675d 100644 --- a/PILOTVERSION +++ b/PILOTVERSION @@ -1 +1 @@ -3.1.0.61 \ No newline at end of file +3.1.0.62 \ No newline at end of file diff --git a/pilot/util/constants.py b/pilot/util/constants.py index 3e38dd33e..458c48a0b 100644 --- a/pilot/util/constants.py +++ b/pilot/util/constants.py @@ -14,7 +14,7 @@ RELEASE = '3' # released number should be fixed at 3 for Pilot 3 VERSION = '1' # version number is '1' for first release, '0' until then, increased for bigger updates REVISION = '0' # revision number should be reset to '0' for every new version release, increased for small updates -BUILD = '61' # build number should be reset to '1' for every new development cycle +BUILD = '62' # build number should be reset to '1' for every new development cycle SUCCESS = 0 FAILURE = 1 diff --git a/pilot/util/realtimelogger.py b/pilot/util/realtimelogger.py index a99acc17d..c6f569bea 100644 --- a/pilot/util/realtimelogger.py +++ b/pilot/util/realtimelogger.py @@ -107,8 +107,8 @@ def __init__(self, args, info_dic, level=INFO): server, port, timeout=5.0, - ssl_enable=True, - ssl_verify=True, + ssl_enable=False, + ssl_verify=False, user='pilot', password='XXX' ) @@ -117,8 +117,8 @@ def __init__(self, args, info_dic, level=INFO): host=server, port=port, transport=transport, - ssl_enable=True, - ssl_verify=True, + ssl_enable=False, + ssl_verify=False, database_path='logstash_test.db') else: logger.warning(f'unknown logtype: {logtype}') From 2425d9ad06520bd0d5dba814dd8792abbd436cf3 Mon Sep 17 00:00:00 2001 From: Paul Nilsson Date: Tue, 14 Dec 2021 14:40:10 +0100 Subject: [PATCH 3/3] Job monitor thread prevented from dying too soon --- PILOTVERSION | 2 +- pilot/control/job.py | 4 ++-- pilot/util/constants.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/PILOTVERSION b/PILOTVERSION index 6d941675d..27d4f6b57 100644 --- a/PILOTVERSION +++ b/PILOTVERSION @@ -1 +1 @@ -3.1.0.62 \ No newline at end of file +3.1.0.63 \ No newline at end of file diff --git a/pilot/control/job.py b/pilot/control/job.py index e38f14455..afef7728b 100644 --- a/pilot/control/job.py +++ b/pilot/control/job.py @@ -18,7 +18,7 @@ import logging import queue -from json import dumps #, loads +from json import dumps from re import findall from glob import glob @@ -2595,7 +2595,7 @@ def job_monitor(queues, traces, args): # noqa: C901 logger.warning('job state is \'failed\' - order log transfer and abort job_monitor() (2)') _job.stageout = 'log' # only stage-out log file put_in_queue(_job, queues.data_out) - abort = True + #abort = True break elif os.environ.get('PILOT_JOB_STATE') == 'stagein': diff --git a/pilot/util/constants.py b/pilot/util/constants.py index 458c48a0b..34644997b 100644 --- a/pilot/util/constants.py +++ b/pilot/util/constants.py @@ -14,7 +14,7 @@ RELEASE = '3' # released number should be fixed at 3 for Pilot 3 VERSION = '1' # version number is '1' for first release, '0' until then, increased for bigger updates REVISION = '0' # revision number should be reset to '0' for every new version release, increased for small updates -BUILD = '62' # build number should be reset to '1' for every new development cycle +BUILD = '63' # build number should be reset to '1' for every new development cycle SUCCESS = 0 FAILURE = 1