Skip to content

Commit

Permalink
Fix CI test for gigaspeech (#1787)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Oct 29, 2024
1 parent 516b486 commit f23c8ce
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repo=$(basename $repo_url)

echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p pruned_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
Expand All @@ -29,8 +29,16 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
ls -lh data/fbank
ls -lh pruned_transducer_stateless2/exp

ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz
ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz
pushd data/fbank
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca

ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
popd


log "Decoding dev and test"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
--ngram-lm-scale -0.16
fi

if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
mkdir -p lstm_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
Expand All @@ -175,7 +175,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
# use a small value for decoding with CPU
max_duration=100

for method in greedy_search fast_beam_search modified_beam_search; do
for method in greedy_search fast_beam_search; do
log "Decoding with $method"

./lstm_transducer_stateless2/decode.py \
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/run-gigaspeech-2022-05-13.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ concurrency:

jobs:
run_gigaspeech_2022_05_13:
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down Expand Up @@ -106,7 +106,7 @@ jobs:
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
- name: Display decoding results for gigaspeech pruned_transducer_stateless2
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
shell: bash
run: |
cd egs/gigaspeech/ASR/
Expand All @@ -122,7 +122,7 @@ jobs:
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ concurrency:

jobs:
run_librispeech_lstm_transducer_stateless2_2022_09_03:
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule'
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down Expand Up @@ -116,7 +116,7 @@ jobs:
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
- name: Display decoding results for lstm_transducer_stateless2
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
shell: bash
run: |
cd egs/librispeech/ASR
Expand All @@ -130,9 +130,9 @@ jobs:
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
echo "===modified beam search==="
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
# echo "===modified beam search==="
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
- name: Display decoding results for lstm_transducer_stateless2
if: github.event.label.name == 'shallow-fusion'
Expand All @@ -159,7 +159,7 @@ jobs:
- name: Upload decoding results for lstm_transducer_stateless2
uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR'
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
2 changes: 1 addition & 1 deletion egs/gigaspeech/ASR/pruned_transducer_stateless2/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def decode_one_batch(
Return the decoding result. See above description for the format of
the returned dict.
"""
device = model.device
device = next(model.parameters()).device
feature = batch["inputs"]
assert feature.ndim == 3

Expand Down

0 comments on commit f23c8ce

Please sign in to comment.