From 223f1f6f8e267d258abd2f299ec6fc4a9b2f1cf8 Mon Sep 17 00:00:00 2001 From: Sheng Zha Date: Sun, 21 Feb 2021 21:13:01 -0500 Subject: [PATCH] Wait for gc clean-up in main thread before fork in DatasetLoader (#1525) cf https://github.com/apache/incubator-mxnet/commit/fee8bf4fbbf8e2b56b006852607bb7df9e14e84f Signed-off-by: Sheng Zha --- src/gluonnlp/data/loading.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gluonnlp/data/loading.py b/src/gluonnlp/data/loading.py index 4027f78866..445e7bc5ce 100644 --- a/src/gluonnlp/data/loading.py +++ b/src/gluonnlp/data/loading.py @@ -24,6 +24,7 @@ import io import os +import gc import glob import pickle import warnings @@ -31,7 +32,7 @@ from functools import partial import numpy as np -from mxnet import context +from mxnet import context, npx from mxnet.gluon.data import ArrayDataset, SimpleDataset from mxnet.gluon.data.dataloader import ForkingPickler, _as_in_context, \ default_mp_batchify_fn, default_batchify_fn @@ -480,12 +481,20 @@ def __init__(self, file_patterns, file_sampler, self._manager = None self._dataset_worker_pool = None if self._num_dataset_workers > 0: + npx.waitall() + import gc + gc.collect() + npx.waitall() self._manager = multiprocessing.Manager() self._dataset_worker_pool = multiprocessing.Pool(self._num_dataset_workers, initializer=_initialize_dataset_worker, initargs=[self._manager]) self._batch_worker_pool = None if self._num_batch_workers > 0: + npx.waitall() + import gc + gc.collect() + npx.waitall() self._batch_worker_pool = multiprocessing.Pool(self._num_batch_workers) if batchify_fn is None: if self._num_batch_workers > 0: