Skip to content

Commit

Permalink
Merge pull request #368 from HSF/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
wguanicedew authored Nov 18, 2024
2 parents 32a23cf + bdb8686 commit 21d692a
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 8 deletions.
41 changes: 41 additions & 0 deletions common/lib/idds/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,3 +1101,44 @@ def get_process_thread_info():
thread_id = hb_thread.ident
thread_name = hb_thread.name
return hostname, pid, thread_id, thread_name


def run_command_with_timeout(command, timeout=600, stdout=sys.stdout, stderr=sys.stderr):
"""
Run a command and monitor its output. Terminate if no output within timeout.
"""
last_output_time = time.time()

def monitor_output(stream, output, timeout):
nonlocal last_output_time
for line in iter(stream.readline, b""):
output.buffer.write(line)
output.flush()
last_output_time = time.time() # Reset timer on new output

# Start the process
process = subprocess.Popen(command,
preexec_fn=os.setsid, # setpgrp
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

# Start the monitoring thread
stdout_thread = threading.Thread(target=monitor_output, args=(process.stdout, stdout, timeout))
stderr_thread = threading.Thread(target=monitor_output, args=(process.stderr, stderr, timeout))
stdout_thread.start()
stderr_thread.start()

# monitor the output and enforce timeout
while process.poll() is None:
time_elapsed = time.time() - last_output_time
if time_elapsed > timeout:
print(f"No output for {time_elapsed} seconds. Terminating process.")
kill_all(process)
break
time.sleep(10) # Check every second

# Wait for the process to complete and join the monitoring thread
stdout_thread.join()
stderr_thread.join()
process.wait()
return process
2 changes: 1 addition & 1 deletion main/config_default/healthmonitor_daemon
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
# while true; do /usr/sbin/logrotate -s /var/log/idds/logrotate.status -d /etc/logrotate.d/idds; sleep 86400; done

while true; do python /opt/idds/config/idds/idds_health_check.py; sleep 600; done
while true; do python /opt/idds/config/idds/idds_health_check.py; sleep 120; done
2 changes: 1 addition & 1 deletion main/lib/idds/orm/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def build_database(echo=True, tests=False):
conn.execute(CreateSchema(schema))
except Exception as e:
print('Cannot create schema, please validate manually if schema creation is needed, continuing:', e)
print(traceback.format_exc())
# print(traceback.format_exc())

models.register_models(engine)

Expand Down
12 changes: 6 additions & 6 deletions monitor/data/conf.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

var appConfig = {
'iddsAPI_request': "https://lxplus961.cern.ch:443/idds/monitor_request/null/null",
'iddsAPI_transform': "https://lxplus961.cern.ch:443/idds/monitor_transform/null/null",
'iddsAPI_processing': "https://lxplus961.cern.ch:443/idds/monitor_processing/null/null",
'iddsAPI_request_detail': "https://lxplus961.cern.ch:443/idds/monitor/null/null/true/false/false",
'iddsAPI_transform_detail': "https://lxplus961.cern.ch:443/idds/monitor/null/null/false/true/false",
'iddsAPI_processing_detail': "https://lxplus961.cern.ch:443/idds/monitor/null/null/false/false/true"
'iddsAPI_request': "https://lxplus947.cern.ch:443/idds/monitor_request/null/null",
'iddsAPI_transform': "https://lxplus947.cern.ch:443/idds/monitor_transform/null/null",
'iddsAPI_processing': "https://lxplus947.cern.ch:443/idds/monitor_processing/null/null",
'iddsAPI_request_detail': "https://lxplus947.cern.ch:443/idds/monitor/null/null/true/false/false",
'iddsAPI_transform_detail': "https://lxplus947.cern.ch:443/idds/monitor/null/null/false/true/false",
'iddsAPI_processing_detail': "https://lxplus947.cern.ch:443/idds/monitor/null/null/false/false/true"
}
7 changes: 7 additions & 0 deletions workflow/lib/idds/iworkflow/asyncresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,13 @@ def set_result(self, name=None, args=None, key=None, value=None, verbose=False):
def get_all_results(self):
return self._results

def get_dict_results(self):
return {'results': self._results, 'name_results': self._name_results}

def set_from_dict_results(self, results):
self._results = results.get('results', {})
self._name_results = results.get('name_results', {})


class AsyncResult(Base):

Expand Down

0 comments on commit 21d692a

Please sign in to comment.