diff --git a/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py b/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py index e949e3aed5..af78414063 100644 --- a/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py +++ b/egs/reazonspeech/ASR/local/compute_fbank_reazonspeech.py @@ -68,17 +68,17 @@ def make_cutset_blueprints( ) ) - # Create valid dataset - logging.info("Creating valid cuts.") + # Create dev dataset + logging.info("Creating dev cuts.") cut_sets.append( ( - "valid", + "dev", CutSet.from_manifests( recordings=RecordingSet.from_file( - manifest_dir / "reazonspeech_recordings_valid.jsonl.gz" + manifest_dir / "reazonspeech_recordings_dev.jsonl.gz" ), supervisions=SupervisionSet.from_file( - manifest_dir / "reazonspeech_supervisions_valid.jsonl.gz" + manifest_dir / "reazonspeech_supervisions_dev.jsonl.gz" ), ), ) diff --git a/egs/reazonspeech/ASR/local/display_manifest_statistics.py b/egs/reazonspeech/ASR/local/display_manifest_statistics.py index 48e9dee8de..ace1dd73f5 100644 --- a/egs/reazonspeech/ASR/local/display_manifest_statistics.py +++ b/egs/reazonspeech/ASR/local/display_manifest_statistics.py @@ -45,7 +45,7 @@ def get_parser(): def main(): args = get_parser() - for part in ["train", "valid"]: + for part in ["train", "dev"]: path = args.manifest_dir / f"reazonspeech_cuts_{part}.jsonl.gz" cuts: CutSet = load_manifest(path) diff --git a/egs/reazonspeech/ASR/prepare.sh b/egs/reazonspeech/ASR/prepare.sh index f8e54f58c1..d5e0a94918 100755 --- a/egs/reazonspeech/ASR/prepare.sh +++ b/egs/reazonspeech/ASR/prepare.sh @@ -5,48 +5,82 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python set -eou pipefail -nj=8 +nj=15 stage=-1 stop_stage=100 -reazonspeech_dir=corpus -reazonspeech_manifest_dir=data +# We assume dl_dir (download dir) contains the following +# directories and files. If not, they will be downloaded +# by this script automatically. +# +# - $dl_dir/ReazonSpeech +# You can find FLAC files in this directory. +# You can download them from https://huggingface.co/datasets/reazon-research/reazonspeech +# +# - $dl_dir/dataset.json +# The metadata of the ReazonSpeech dataset. + +dl_dir=$PWD/download . shared/parse_options.sh || exit 1 +# All files generated by this script are saved in "data". +# You can safely remove "data" and rerun this script to regenerate it. mkdir -p data log() { + # This function is from espnet local fname=${BASH_SOURCE[1]##*/} echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" } +log "Running prepare.sh" + +log "dl_dir: $dl_dir" + +if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then + log "Stage 0: Download data" + + # If you have pre-downloaded it to /path/to/ReazonSpeech, + # you can create a symlink + # + # ln -sfv /path/to/ReazonSpeech $dl_dir/ReazonSpeech + # + if [ ! -d $dl_dir/ReazonSpeech/downloads ]; then + # Download small-v1 by default. + lhotse download reazonspeech --subset small-v1 $dl_dir + fi +fi + if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then log "Stage 1: Prepare ReazonSpeech manifest" - if [ ! -e $reazonspeech_manifest_dir/.reazonspeech.done ]; then - lhotse prepare reazonspeech $reazonspeech_dir $reazonspeech_manifest_dir - touch $reazonspeech_manifest_dir/.reazonspeech.done + # We assume that you have downloaded the ReazonSpeech corpus + # to $dl_dir/ReazonSpeech + mkdir -p data/manifests + if [ ! -e data/manifests/.reazonspeech.done ]; then + lhotse prepare reazonspeech -j $nj $dl_dir/ReazonSpeech data/manifests + touch data/manifests/.reazonspeech.done fi fi if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then log "Stage 2: Compute ReazonSpeech fbank" - if [ ! -e $reazonspeech_manifest_dir/.reazonspeech-validated.done ]; then - python local/compute_fbank_reazonspeech.py --manifest-dir $reazonspeech_manifest_dir - python local/validate_manifest.py --manifest $reazonspeech_manifest_dir/reazonspeech_cuts_train.jsonl.gz - python local/validate_manifest.py --manifest $reazonspeech_manifest_dir/reazonspeech_cuts_valid.jsonl.gz - python local/validate_manifest.py --manifest $reazonspeech_manifest_dir/reazonspeech_cuts_test.jsonl.gz - touch $reazonspeech_manifest_dir/.reazonspeech-validated.done + if [ ! -e data/manifests/.reazonspeech-validated.done ]; then + python local/compute_fbank_reazonspeech.py --manifest-dir data/manifests + python local/validate_manifest.py --manifest data/manifests/reazonspeech_cuts_train.jsonl.gz + python local/validate_manifest.py --manifest data/manifests/reazonspeech_cuts_dev.jsonl.gz + python local/validate_manifest.py --manifest data/manifests/reazonspeech_cuts_test.jsonl.gz + touch data/manifests/.reazonspeech-validated.done fi fi if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3: Prepare ReazonSpeech lang_char" - python local/prepare_lang_char.py $reazonspeech_manifest_dir/reazonspeech_cuts_train.jsonl.gz + python local/prepare_lang_char.py data/manifests/reazonspeech_cuts_train.jsonl.gz fi if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then log "Stage 4: Show manifest statistics" - python local/display_manifest_statistics.py --manifest-dir $reazonspeech_manifest_dir > $reazonspeech_manifest_dir/manifest_statistics.txt - cat $reazonspeech_manifest_dir/manifest_statistics.txt -fi + python local/display_manifest_statistics.py --manifest-dir data/manifests > data/manifests/manifest_statistics.txt + cat data/manifests/manifest_statistics.txt +fi \ No newline at end of file diff --git a/egs/reazonspeech/ASR/zipformer/train.py b/egs/reazonspeech/ASR/zipformer/train.py index 16f0d93936..8c6f4bb9a4 100755 --- a/egs/reazonspeech/ASR/zipformer/train.py +++ b/egs/reazonspeech/ASR/zipformer/train.py @@ -1069,9 +1069,6 @@ def save_bad_model(suffix: str = ""): tb_writer, "train/valid_", params.batch_idx_train ) - # print('--------------------debug------------------') - # print(tot_loss) - # print(tot_loss["frames"]) loss_value = tot_loss["loss"] / tot_loss["frames"] params.train_loss = loss_value if params.train_loss < params.best_train_loss: