Skip to content

Commit

Permalink
more
Browse files Browse the repository at this point in the history
  • Loading branch information
albertz committed Jul 8, 2024
1 parent ecd3e2d commit e65f6ed
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion users/zeyer/datasets/librispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _get_spm_vocab(
dim_str = str(dim)
if isinstance(dim, str):
# Not sure if power-of-two or just multiple-of-64, but 10240 has more 2s in it (2048*5) than 10048.
dim = {"20k": 20_480, "10k": 10_240, "5k": 5_120, "4k": 4_096, "1k": 1_024}[dim]
dim = {"20k": 20_480, "10k": 10_240, "5k": 5_120, "4k": 4_096, "1k": 1_024, "512": 512, "128": 128}[dim]
assert isinstance(dim, int) and dim >= 10

# https://github.com/google/sentencepiece/blob/master/doc/options.md
Expand Down
7 changes: 7 additions & 0 deletions users/zeyer/experiments/exp2024_04_23_baselines/ctc.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,14 @@ def py():
("spm4k", "spm", 0.7),
("spm4k", "bpe", 0.01),
("spm1k", None, None), # 6.07
("spm1k", "spm", 0.7),
("spm1k", "bpe", 0.01), # 6.13 (but dev-clean,test-* are better than no sampling)
("spm_bpe1k", None, None),
("spm_bpe1k", "bpe", 0.01),
("spm512", None, None),
("spm512", "bpe", 0.01),
("spm128", None, None),
("spm128", "bpe", 0.01),
]:
train_exp(
f"v6-relPosAttDef-bhv20-11gb-f32-bs15k-accgrad1-mgpu4-pavg100"
Expand Down

0 comments on commit e65f6ed

Please sign in to comment.