Skip to content

Commit

Permalink
fix shuffle in files
Browse files Browse the repository at this point in the history
  • Loading branch information
Mddct committed Sep 27, 2024
1 parent bf1664c commit 7723cb3
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions wenet/dataset/datapipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ def __init__(self,
dp = IterableWrapperIterDataPipe(filenames)
# 0 shard many jsonl files
dp = dp.shuffle().repeat(cycle).shard(partition)
if shuffle:
self.dp = self.dp.shuffle(buffer_size=shuffle_size)
# 1 read one json file
self.dp = TextLineDataPipe(dp)
self.dp = self.dp.prefetch(prefetch)
Expand Down

0 comments on commit 7723cb3

Please sign in to comment.