From cb2442a6fcb31d474e85a0972f8e0a71de0f8287 Mon Sep 17 00:00:00 2001 From: Wen Guan Date: Tue, 12 Nov 2024 15:00:49 +0100 Subject: [PATCH 1/3] update idds health check --- main/config_default/healthmonitor_daemon | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/config_default/healthmonitor_daemon b/main/config_default/healthmonitor_daemon index 39acb949..2c09e0a7 100755 --- a/main/config_default/healthmonitor_daemon +++ b/main/config_default/healthmonitor_daemon @@ -1,4 +1,4 @@ #!/bin/bash # while true; do /usr/sbin/logrotate -s /var/log/idds/logrotate.status -d /etc/logrotate.d/idds; sleep 86400; done -while true; do python /opt/idds/config/idds/idds_health_check.py; sleep 600; done +while true; do python /opt/idds/config/idds/idds_health_check.py; sleep 120; done From b72d8a9d5f2faa4394e2e3b9687104b9267d2056 Mon Sep 17 00:00:00 2001 From: Wen Guan Date: Mon, 18 Nov 2024 22:03:37 +0100 Subject: [PATCH 2/3] add function to convert MapResult to dict --- workflow/lib/idds/iworkflow/asyncresult.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/workflow/lib/idds/iworkflow/asyncresult.py b/workflow/lib/idds/iworkflow/asyncresult.py index 85bc7666..4105dc6c 100644 --- a/workflow/lib/idds/iworkflow/asyncresult.py +++ b/workflow/lib/idds/iworkflow/asyncresult.py @@ -146,6 +146,13 @@ def set_result(self, name=None, args=None, key=None, value=None, verbose=False): def get_all_results(self): return self._results + def get_dict_results(self): + return {'results': self._results, 'name_results': self._name_results} + + def set_from_dict_results(self, results): + self._results = results.get('results', {}) + self._name_results = results.get('name_results', {}) + class AsyncResult(Base): From 79006c168c783035eb2e996fdada279bff882d25 Mon Sep 17 00:00:00 2001 From: Wen Guan Date: Mon, 18 Nov 2024 22:07:12 +0100 Subject: [PATCH 3/3] Add function to run shell command with timeout --- common/lib/idds/common/utils.py | 41 +++++++++++++++++++++++++++++++++ main/lib/idds/orm/base/utils.py | 2 +- monitor/data/conf.js | 12 +++++----- 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/common/lib/idds/common/utils.py b/common/lib/idds/common/utils.py index 25fa512a..9b8d1b85 100644 --- a/common/lib/idds/common/utils.py +++ b/common/lib/idds/common/utils.py @@ -1101,3 +1101,44 @@ def get_process_thread_info(): thread_id = hb_thread.ident thread_name = hb_thread.name return hostname, pid, thread_id, thread_name + + +def run_command_with_timeout(command, timeout=600, stdout=sys.stdout, stderr=sys.stderr): + """ + Run a command and monitor its output. Terminate if no output within timeout. + """ + last_output_time = time.time() + + def monitor_output(stream, output, timeout): + nonlocal last_output_time + for line in iter(stream.readline, b""): + output.buffer.write(line) + output.flush() + last_output_time = time.time() # Reset timer on new output + + # Start the process + process = subprocess.Popen(command, + preexec_fn=os.setsid, # setpgrp + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + # Start the monitoring thread + stdout_thread = threading.Thread(target=monitor_output, args=(process.stdout, stdout, timeout)) + stderr_thread = threading.Thread(target=monitor_output, args=(process.stderr, stderr, timeout)) + stdout_thread.start() + stderr_thread.start() + + # monitor the output and enforce timeout + while process.poll() is None: + time_elapsed = time.time() - last_output_time + if time_elapsed > timeout: + print(f"No output for {time_elapsed} seconds. Terminating process.") + kill_all(process) + break + time.sleep(10) # Check every second + + # Wait for the process to complete and join the monitoring thread + stdout_thread.join() + stderr_thread.join() + process.wait() + return process diff --git a/main/lib/idds/orm/base/utils.py b/main/lib/idds/orm/base/utils.py index 10c64edb..178eb9c2 100644 --- a/main/lib/idds/orm/base/utils.py +++ b/main/lib/idds/orm/base/utils.py @@ -47,7 +47,7 @@ def build_database(echo=True, tests=False): conn.execute(CreateSchema(schema)) except Exception as e: print('Cannot create schema, please validate manually if schema creation is needed, continuing:', e) - print(traceback.format_exc()) + # print(traceback.format_exc()) models.register_models(engine) diff --git a/monitor/data/conf.js b/monitor/data/conf.js index 37fe20fe..d8ee0c03 100644 --- a/monitor/data/conf.js +++ b/monitor/data/conf.js @@ -1,9 +1,9 @@ var appConfig = { - 'iddsAPI_request': "https://lxplus961.cern.ch:443/idds/monitor_request/null/null", - 'iddsAPI_transform': "https://lxplus961.cern.ch:443/idds/monitor_transform/null/null", - 'iddsAPI_processing': "https://lxplus961.cern.ch:443/idds/monitor_processing/null/null", - 'iddsAPI_request_detail': "https://lxplus961.cern.ch:443/idds/monitor/null/null/true/false/false", - 'iddsAPI_transform_detail': "https://lxplus961.cern.ch:443/idds/monitor/null/null/false/true/false", - 'iddsAPI_processing_detail': "https://lxplus961.cern.ch:443/idds/monitor/null/null/false/false/true" + 'iddsAPI_request': "https://lxplus947.cern.ch:443/idds/monitor_request/null/null", + 'iddsAPI_transform': "https://lxplus947.cern.ch:443/idds/monitor_transform/null/null", + 'iddsAPI_processing': "https://lxplus947.cern.ch:443/idds/monitor_processing/null/null", + 'iddsAPI_request_detail': "https://lxplus947.cern.ch:443/idds/monitor/null/null/true/false/false", + 'iddsAPI_transform_detail': "https://lxplus947.cern.ch:443/idds/monitor/null/null/false/true/false", + 'iddsAPI_processing_detail': "https://lxplus947.cern.ch:443/idds/monitor/null/null/false/false/true" }