Skip to content

Commit

Permalink
Also store success boolean to pickle output when using multiprocessing (
Browse files Browse the repository at this point in the history
#686)

* also store success boolean to pickle output when using multiprocessing instead of slurm

* update changelog

* merge print and logging instructions; convert print to logging in other places, too

* add logging import

* fix return value propagation in multiprocessing context with serialization of result

* format
  • Loading branch information
philippotto authored Apr 6, 2022
1 parent a081c56 commit 1cf6f50
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 10 deletions.
1 change: 1 addition & 0 deletions cluster_tools/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
[Commits](https://github.com/scalableminds/webknossos-libs/compare/v0.9.17...HEAD)

### Breaking Changes
- The cluster-tools serialize the output of a job in the format `(wasSuccessful, result_value)` to a pickle file if `output_pickle_path` is provided and multiprocessing is used. This is consistent with how it is already done when using a cluster executor (e.g., slurm). [#686](https://github.com/scalableminds/webknossos-libs/pull/686)

### Added

Expand Down
12 changes: 10 additions & 2 deletions cluster_tools/cluster_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import multiprocessing
import os
import tempfile
Expand Down Expand Up @@ -161,12 +162,19 @@ def _execute_and_persist_function(output_pickle_path, *args, **kwargs):
func = args[0]
args = args[1:]

result = func(*args, **kwargs)
try:
result = True, func(*args, **kwargs)
except Exception as exc:
result = False, exc
logging.warning(f"Job computation failed with:\n{exc.__repr__()}")

with open(output_pickle_path, "wb") as file:
pickling.dump(result, file)

return result
if result[0]:
return result[1]
else:
raise result[1]

def map_unordered(self, func, args):

Expand Down
6 changes: 2 additions & 4 deletions cluster_tools/cluster_tools/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ def worker(executor, workerid, job_array_index, job_array_index_offset, cfut_dir

try:
input_file_name = executor.format_infile_name(cfut_dir, workerid_with_idx)
print("trying to read: ", input_file_name)
print("working dir: ", os.getcwd())
logging.debug(f"Trying to read: {input_file_name} (working dir: {os.getcwd()}")

custom_main_path = get_custom_main_path(workerid, executor)
with open(input_file_name, "rb") as f:
Expand Down Expand Up @@ -78,10 +77,9 @@ def worker(executor, workerid, job_array_index, job_array_index_offset, cfut_dir
out = pickling.dumps(result)

except Exception:
print(traceback.format_exc())

result = False, format_remote_exc()
logging.warning("Job computation failed.")
logging.warning(f"Job computation failed with:\n\n{traceback.format_exc()}")
out = pickling.dumps(result)

# The .preliminary postfix is added since the output can
Expand Down
8 changes: 4 additions & 4 deletions cluster_tools/cluster_tools/schedulers/cluster_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def executor_key(cls):
def handle_kill(self, _signum, _frame):
self.wait_thread.stop()
job_ids = ",".join(str(id) for id in self.jobs.keys())
print(
logging.debug(
"A termination signal was registered. The following jobs are still running on the cluster:\n{}".format(
job_ids
)
Expand Down Expand Up @@ -204,7 +204,7 @@ def _completion(self, jobid, failed_early):
if not self.jobs:
self.jobs_empty_cond.notify_all()
if self.debug:
print("job completed: {}".format(jobid), file=sys.stderr)
logging.debug("Job completed: {}".format(jobid), file=sys.stderr)

preliminary_outfile_name = with_preliminary_postfix(outfile_name)
if failed_early:
Expand Down Expand Up @@ -298,7 +298,7 @@ def submit(self, fun, *args, **kwargs):
jobid = jobids_futures[0].result()

if self.debug:
print(f"job submitted: {jobid}", file=sys.stderr)
logging.debug(f"Job submitted: {jobid}", file=sys.stderr)

# Thread will wait for it to finish.
self.wait_thread.waitFor(preliminary_output_pickle_path, jobid)
Expand Down Expand Up @@ -420,7 +420,7 @@ def register_jobs(
jobid = jobid_future.result()
if self.debug:

print(
logging.debug(
"Submitted array job {} with JobId {} and {} subjobs.".format(
batch_description, jobid, len(futs_with_output_paths)
),
Expand Down

0 comments on commit 1cf6f50

Please sign in to comment.