Skip to content

Commit

Permalink
Use time.perf_counter in megatron/data/blendable_dataset.py
Browse files Browse the repository at this point in the history
  • Loading branch information
saforem2 committed Oct 16, 2024
1 parent 63b1901 commit 7ef26bf
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions megatron/data/blendable_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, datasets, weights, size, *,
# Build indicies.
@dlp.log
def _build_indices():
start_time = time.time()
start_time = time.perf_counter()
dataset_index = np.zeros(self.size, dtype=np.int64)
dataset_sample_index = np.zeros(self.size, dtype=np.int64)

Expand Down Expand Up @@ -77,14 +77,14 @@ def _build_indices():
dataset_index, dataset_sample_index = _build_indices()
try:
log.debug(" > saving index map files")
start_time = time.time()
start_time = time.perf_counter()
os.makedirs(os.path.dirname(index_path), exist_ok=True)
with open(desc_path, 'wt') as fd:
fd.write(desc)
np.save(index_path, dataset_index, allow_pickle=True)
np.save(sample_index_path, dataset_sample_index,
allow_pickle=True)
log.info(f" > finished saving index map files in {time.time() - start_time} seconds")
log.info(f" > finished saving index map files in {time.perf_counter() - start_time} seconds")
except OSError:
print(f'There was an error trying to create the data cache directory ({data_cache_path})')
print('or a file in it. This is set with the --data-cache-path argument. Please')
Expand All @@ -108,14 +108,14 @@ def _build_indices():
torch.distributed.barrier(group=mpu.get_pipeline_model_parallel_group())
torch.distributed.barrier(group=mpu.get_data_parallel_group())

start_time = time.time()
start_time = time.perf_counter()
log.info(f'> loading blendable dataset index: {index_path}')
self.dataset_index = np.load(index_path, allow_pickle=True, mmap_mode='r')
assert self.dataset_index.size == self.size
log.info(f'> loading blendable dataset sample index: {sample_index_path}')
self.dataset_sample_index = np.load(sample_index_path, allow_pickle=True, mmap_mode='r')
assert self.dataset_sample_index.size == self.size
log.info(f'> finished loading in {time.time() - start_time} seconds')
log.info(f'> finished loading in {time.perf_counter() - start_time} seconds')
else:
self.dataset_index, self.dataset_sample_index = _build_indices()

Expand Down

0 comments on commit 7ef26bf

Please sign in to comment.