Skip to content

Commit

Permalink
Fix buffer size
Browse files Browse the repository at this point in the history
  • Loading branch information
yifanyeung committed Jan 19, 2024
1 parent 7bdde91 commit 69730a7
Show file tree
Hide file tree
Showing 37 changed files with 78 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,9 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
buffer_size=50000,
)
else:
logging.info("Using SimpleCutSampler.")
Expand Down
2 changes: 2 additions & 0 deletions egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
buffer_size=100000,
drop_last=self.args.drop_last,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
buffer_size=30000,
drop_last=True,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ def train_dataloaders(
max_cuts=self.args.max_cuts,
shuffle=False,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
logging.info("About to create train dataloader")
Expand Down
2 changes: 2 additions & 0 deletions egs/ami/ASR/pruned_transducer_stateless7/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ def train_dataloaders(
max_cuts=self.args.max_cuts,
shuffle=False,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
logging.info("About to create train dataloader")
Expand Down
2 changes: 2 additions & 0 deletions egs/ami/SURT/dprnn_zipformer/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ def train_dataloaders(
max_cuts=self.args.max_cuts,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/csj/ASR/local/utils/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ def train_dataloaders(self, cuts_train: CutSet) -> DataLoader:
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/gigaspeech/ASR/zipformer/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/libricss/SURT/dprnn_zipformer/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ def train_dataloaders(
max_cuts=self.args.max_cuts,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/libriheavy/ASR/zipformer/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/libriheavy/ASR/zipformer_prompt_asr/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/librispeech/ASR/pruned2_knowledge/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
bucket_method="equal_duration",
drop_last=True,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)

Expand Down Expand Up @@ -256,6 +258,8 @@ def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader:
max_duration=self.args.max_duration,
shuffle=False,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=False,
)
logging.info("About to create dev dataloader")
Expand All @@ -282,6 +286,8 @@ def test_dataloaders(self, cuts: CutSet) -> DataLoader:
max_duration=self.args.max_duration,
shuffle=False,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
)
logging.debug("About to create test dataloader")
test_dl = DataLoader(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/librispeech/ASR/tiny_transducer_ctc/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/librispeech/WSASR/conformer_ctc2/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/ljspeech/TTS/vits/tts_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/mgb2/ASR/conformer_ctc/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/multi_zh_en/ASR/zipformer/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=False,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
logging.info("About to create train dataloader")
Expand Down
3 changes: 2 additions & 1 deletion egs/swbd/ASR/conformer_ctc/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,9 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
buffer_size=50000,
)
else:
logging.info("Using SimpleCutSampler.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
num_cuts_for_bins_estimate=20000,
buffer_size=60000,
drop_last=self.args.drop_last,
Expand Down
2 changes: 2 additions & 0 deletions egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def train_dataloaders(self) -> DataLoader:
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/vctk/TTS/vits/tts_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
buffer_size=300000,
drop_last=True,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ def train_dataloaders(
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
Expand Down
2 changes: 2 additions & 0 deletions egs/yesno/ASR/tdnn/asr_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ def train_dataloaders(self) -> DataLoader:
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:
Expand Down

0 comments on commit 69730a7

Please sign in to comment.