From f23c8ce9dd2ab0d95f6dc002c0d7e30e7238a5ac Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 29 Oct 2024 15:50:49 +0800 Subject: [PATCH] Fix CI test for gigaspeech (#1787) --- ...eech-pruned-transducer-stateless2-2022-05-12.sh | 14 +++++++++++--- ...speech-lstm-transducer-stateless2-2022-09-03.sh | 4 ++-- .github/workflows/run-gigaspeech-2022-05-13.yml | 6 +++--- ...peech-lstm-transducer-stateless2-2022-09-03.yml | 12 ++++++------ .../ASR/pruned_transducer_stateless2/decode.py | 2 +- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh b/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh index c9e798a68d..8d98fca1ee 100755 --- a/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh +++ b/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh @@ -19,7 +19,7 @@ repo=$(basename $repo_url) echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then mkdir -p pruned_transducer_stateless2/exp ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt ln -s $PWD/$repo/data/lang_bpe_500 data/ @@ -29,8 +29,16 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == ls -lh data/fbank ls -lh pruned_transducer_stateless2/exp - ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz - ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz + pushd data/fbank + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca + + ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz + ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz + popd + log "Decoding dev and test" diff --git a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh index d547bdd450..8f5a8dbb93 100755 --- a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh +++ b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh @@ -162,7 +162,7 @@ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then --ngram-lm-scale -0.16 fi -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then mkdir -p lstm_transducer_stateless2/exp ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt ln -s $PWD/$repo/data/lang_bpe_500 data/ @@ -175,7 +175,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then # use a small value for decoding with CPU max_duration=100 - for method in greedy_search fast_beam_search modified_beam_search; do + for method in greedy_search fast_beam_search; do log "Decoding with $method" ./lstm_transducer_stateless2/decode.py \ diff --git a/.github/workflows/run-gigaspeech-2022-05-13.yml b/.github/workflows/run-gigaspeech-2022-05-13.yml index 2c1d44fbfe..9fd05d94a0 100644 --- a/.github/workflows/run-gigaspeech-2022-05-13.yml +++ b/.github/workflows/run-gigaspeech-2022-05-13.yml @@ -41,7 +41,7 @@ concurrency: jobs: run_gigaspeech_2022_05_13: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' + if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' runs-on: ${{ matrix.os }} strategy: matrix: @@ -106,7 +106,7 @@ jobs: .github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh - name: Display decoding results for gigaspeech pruned_transducer_stateless2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode' shell: bash run: | cd egs/gigaspeech/ASR/ @@ -122,7 +122,7 @@ jobs: - name: Upload decoding results for gigaspeech pruned_transducer_stateless2 uses: actions/upload-artifact@v4 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode' with: name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12 path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/ diff --git a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml index 6a3f4eb40a..1b222da2d4 100644 --- a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml +++ b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml @@ -24,7 +24,7 @@ concurrency: jobs: run_librispeech_lstm_transducer_stateless2_2022_09_03: - if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' + if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' runs-on: ${{ matrix.os }} strategy: matrix: @@ -116,7 +116,7 @@ jobs: .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh - name: Display decoding results for lstm_transducer_stateless2 - if: github.event_name == 'schedule' + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' shell: bash run: | cd egs/librispeech/ASR @@ -130,9 +130,9 @@ jobs: find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - echo "===modified beam search===" - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + # echo "===modified beam search===" + # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - name: Display decoding results for lstm_transducer_stateless2 if: github.event.label.name == 'shallow-fusion' @@ -159,7 +159,7 @@ jobs: - name: Upload decoding results for lstm_transducer_stateless2 uses: actions/upload-artifact@v4 - if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' + if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch' with: name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03 path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/ diff --git a/egs/gigaspeech/ASR/pruned_transducer_stateless2/decode.py b/egs/gigaspeech/ASR/pruned_transducer_stateless2/decode.py index ef430302df..f1efebcb94 100755 --- a/egs/gigaspeech/ASR/pruned_transducer_stateless2/decode.py +++ b/egs/gigaspeech/ASR/pruned_transducer_stateless2/decode.py @@ -260,7 +260,7 @@ def decode_one_batch( Return the decoding result. See above description for the format of the returned dict. """ - device = model.device + device = next(model.parameters()).device feature = batch["inputs"] assert feature.ndim == 3