From 8378fb3bf6cc8df02ccdb7241113ba95ebbe2c75 Mon Sep 17 00:00:00 2001 From: atila Date: Fri, 8 Nov 2024 16:56:43 -0800 Subject: [PATCH 1/2] Fixes #12 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d6ec120..cf767c9 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ If the above command is successfuly executed, your model will have been publishe Evaluate ([Argmax-](https://huggingface.co/argmaxinc/whisperkit-coreml) or developer-published) models on speech recognition datasets: ```shell -whisperkit-evaluate-model --model-version --output-dir --dataset {librispeech-debug,librispeech,earnings22} +whisperkit-evaluate-model --model-version --output-dir --evaluation-dataset {librispeech-debug,librispeech,earnings22} ``` By default, this command uses the latest `main` branch commits from `WhisperKit` and searches within [Argmax-published](https://huggingface.co/argmaxinc/whisperkit-coreml) model repositories. For optional arguments related to code and model versioning, please see the help menu with `-h` @@ -85,7 +85,7 @@ If you would like to evaluate WhisperKit models on your own dataset: export CUSTOM_EVAL_DATASET="my-dataset-name-on-hub" export DATASET_REPO_OWNER="my-user-or-org-name-on-hub" export MODEL_REPO_ID="my-org/my-whisper-repo-name" # if evaluating self-published models -whisperkit-evaluate-model --model-version --output-dir --dataset my-dataset-name-on-hub +whisperkit-evaluate-model --model-version --output-dir --evaluation-dataset my-dataset-name-on-hub ``` ## Python Inference From 50011f43cd5c560d5730d9af91ac49f0b6690a6f Mon Sep 17 00:00:00 2001 From: atila Date: Fri, 8 Nov 2024 18:37:14 -0800 Subject: [PATCH 2/2] Fixes #15 --- whisperkit/evaluate/evaluate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisperkit/evaluate/evaluate.py b/whisperkit/evaluate/evaluate.py index f3f3ea3..576f168 100644 --- a/whisperkit/evaluate/evaluate.py +++ b/whisperkit/evaluate/evaluate.py @@ -54,7 +54,7 @@ def evaluate(whisper_pipeline: Union[pipelines.WhisperPipeline, pipelines.Whispe logger.info(f"Launching {num_proc} processes to run {whisper_pipeline.__class__.__name__}") with Pool(num_proc) as pool: - results = list(tqdm.tqdm(pool.map( + results = list(tqdm.tqdm(pool.imap( partial(evaluate_sample, whisper_pipeline=whisper_pipeline), dataset), total=len(dataset) ))