Skip to content

Commit

Permalink
add new whisper model dmatekenya/whisper-large-v3-chichewa
Browse files Browse the repository at this point in the history
  • Loading branch information
devxpy committed Dec 5, 2024
1 parent 3d19714 commit fadc910
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 64 deletions.
136 changes: 82 additions & 54 deletions chart/model-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,14 @@ controlnetModelIds: &controlnetModelIds |-
ioclab/control_v1p_sd15_brightness
monster-labs/control_v1p_sd15_qrcode_monster/v2

commonImg: &commonImg "crgooeyprodwestus1.azurecr.io/gooey-gpu-common:3"
commonImgOld: &commonImgOld "crgooeyprodwestus1.azurecr.io/gooey-gpu-common:3"
commonImg: &commonImg "crgooeyprodwestus1.azurecr.io/gooey-gpu-common:8"
retroImg: &retroImg "crgooeyprodwestus1.azurecr.io/gooey-gpu-retro:6"
deforumImg: &deforumImg "crgooeyprodwestus1.azurecr.io/gooey-gpu-deforum_sd:1"

deployments:
- name: "common-diffusion-dreamshaper"
image: *commonImg
image: *commonImgOld
limits:
memory: "35Gi"
env:
Expand All @@ -53,7 +54,7 @@ deployments:
SD_MODEL_IDS: |-
Lykon/DreamShaper
- name: "common-diffusion-stable-diffusion-2-1"
image: *commonImg
image: *commonImgOld
limits:
memory: "35Gi"
env:
Expand All @@ -64,7 +65,7 @@ deployments:
SD_MODEL_IDS: |-
stabilityai/stable-diffusion-2-1
- name: "common-diffusion-dreamlike-photoreal-2"
image: *commonImg
image: *commonImgOld
limits:
memory: "35Gi"
env:
Expand All @@ -75,7 +76,7 @@ deployments:
SD_MODEL_IDS: |-
dreamlike-art/dreamlike-photoreal-2.0
- name: "common-diffusion-stable-diffusion-v1-5"
image: *commonImg
image: *commonImgOld
limits:
memory: "35Gi"
env:
Expand All @@ -87,7 +88,7 @@ deployments:
runwayml/stable-diffusion-v1-5
- name: "common-diffusion-on-demand"
image: *commonImg
image: *commonImgOld
limits:
memory: "50Gi"
env:
Expand All @@ -102,7 +103,7 @@ deployments:
darkstorm2150/Protogen_v5.3_Official_Release
- name: "common-diffusion-inpaint"
image: *commonImg
image: *commonImgOld
limits:
memory: "20Gi"
env:
Expand Down Expand Up @@ -142,7 +143,7 @@ deployments:
epicdream.safetensors
- name: "common-whisper-en-short"
image: *commonImg
image: *commonImgOld
limits:
memory: "20Gi"
env:
Expand All @@ -153,7 +154,7 @@ deployments:
openai/whisper-large-v2
- name: "common-whisper-te-short"
image: *commonImg
image: *commonImgOld
limits:
memory: "20Gi"
env:
Expand All @@ -163,6 +164,69 @@ deployments:
WHISPER_MODEL_IDS: |-
vasista22/whisper-telugu-large-v2
- name: "common-whisper-en-te-long"
image: *commonImgOld
limits:
memory: "40Gi"
env:
QUEUE_PREFIX: "gooey-gpu/long"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
openai/whisper-large-v2
vasista22/whisper-telugu-large-v2
- name: "common-whisper-hi-bho-short"
image: *commonImgOld
limits:
memory: "10Gi"
env:
QUEUE_PREFIX: "gooey-gpu/short"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
vasista22/whisper-hindi-large-v2
Harveenchadha/vakyansh-wav2vec2-bhojpuri-bhom-60
- name: "common-whisper-hi-bho-long"
image: *commonImgOld
limits:
memory: "40Gi"
env:
QUEUE_PREFIX: "gooey-gpu/long"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
vasista22/whisper-hindi-large-v2
Harveenchadha/vakyansh-wav2vec2-bhojpuri-bhom-60
- name: "common-whisper-chichewa-short"
image: *commonImg
limits_gpu: "10Gi"
limits:
memory: "28Gi" # (220 / 80) * 10
env:
QUEUE_PREFIX: "gooey-gpu/short"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
dmatekenya/whisper-large-v3-chichewa
WHISPER_TOKENIZER_FROM: openai/whisper-large-v3
- name: "common-whisper-chichewa-long"
image: *commonImg
autoscaling:
minReplicaCount: 0
limits_gpu: "10Gi"
limits:
memory: "28Gi" # (220 / 80) * 10
env:
QUEUE_PREFIX: "gooey-gpu/long"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
dmatekenya/whisper-large-v3-chichewa
WHISPER_TOKENIZER_FROM: openai/whisper-large-v3

- name: "retro-nemo-asr"
image: *retroImg
limits:
Expand All @@ -175,7 +239,7 @@ deployments:
https://objectstore.e2enetworks.net/indic-asr-public/checkpoints/conformer/stt_hi_conformer_ctc_large_v2.nemo
- name: "common-audio-ldm-bark"
image: *commonImg
image: *commonImgOld
limits:
memory: "20Gi"
env:
Expand All @@ -190,7 +254,7 @@ deployments:
bark
- name: "common-seamless"
image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-common:6"
image: *commonImg
limits_gpu: "10Gi"
limits:
memory: "28Gi" # (220 / 80) * 10
Expand All @@ -201,7 +265,7 @@ deployments:
facebook/seamless-m4t-v2-large
- name: "common-diffusion-instruct-pix2pix"
image: *commonImg
image: *commonImgOld
limits:
memory: "12Gi"
env:
Expand All @@ -211,7 +275,7 @@ deployments:
timbrooks/instruct-pix2pix
- name: "common-diffusion-upscale"
image: *commonImg
image: *commonImgOld
limits:
memory: "52Gi"
env:
Expand All @@ -221,7 +285,7 @@ deployments:
stabilityai/stable-diffusion-x4-upscaler
- name: "common-mms"
image: *commonImg
image: *commonImgOld
limits:
memory: "25Gi"
env:
Expand All @@ -230,44 +294,8 @@ deployments:
MMS_MODEL_IDS: |-
facebook/mms-1b-all
- name: "common-whisper-en-te-long"
image: *commonImg
limits:
memory: "40Gi"
env:
QUEUE_PREFIX: "gooey-gpu/long"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
openai/whisper-large-v2
vasista22/whisper-telugu-large-v2
- name: "common-whisper-hi-bho-long"
image: *commonImg
limits:
memory: "40Gi"
env:
QUEUE_PREFIX: "gooey-gpu/long"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
vasista22/whisper-hindi-large-v2
Harveenchadha/vakyansh-wav2vec2-bhojpuri-bhom-60
- name: "common-whisper-hi-bho-short"
image: *commonImg
limits:
memory: "10Gi"
env:
QUEUE_PREFIX: "gooey-gpu/short"
IMPORTS: |-
common.whisper
WHISPER_MODEL_IDS: |-
vasista22/whisper-hindi-large-v2
Harveenchadha/vakyansh-wav2vec2-bhojpuri-bhom-60
- name: "common-embeddings-1"
image: *commonImg
image: *commonImgOld
autoscaling:
queueLength: 20
limits:
Expand Down Expand Up @@ -344,7 +372,7 @@ deployments:
RealESRGAN_x2plus
- name: "common-llms-afrollama-v1"
image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-common:7"
image: *commonImg
limits_gpu: "30Gi"
limits:
memory: "80Gi" # (220 / 80) * 30
Expand All @@ -355,7 +383,7 @@ deployments:
Jacaranda/AfroLlama_V1
- name: "common-llms-sealion-v2-1"
image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-common:7"
image: *commonImg
limits_gpu: "30Gi"
limits:
memory: "80Gi" # (220 / 80) * 30
Expand All @@ -366,7 +394,7 @@ deployments:
aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
- name: "common-llms-sarvam-2b"
image: "crgooeyprodwestus1.azurecr.io/gooey-gpu-common:7"
image: *commonImg
limits_gpu: "6Gi"
limits:
memory: "16Gi" # (220 / 80) * 6
Expand Down
22 changes: 15 additions & 7 deletions common/whisper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import typing
from functools import lru_cache

import numpy as np
import requests
import torch
import transformers
from transformers import WhisperTokenizer

import gooey_gpu
from api import PipelineInfo, WhisperInputs, AsrOutput
Expand All @@ -20,12 +22,14 @@ def whisper(pipeline: PipelineInfo, inputs: WhisperInputs) -> AsrOutput:
kwargs = {}
if inputs.return_timestamps:
kwargs["return_timestamps"] = True
generate_kwargs = {}
if inputs.language:
kwargs["generate_kwargs"] = dict(
forced_decoder_ids=pipe.tokenizer.get_decoder_prompt_ids(
task=inputs.task, language=inputs.language
)
)
generate_kwargs["language"] = inputs.language
if inputs.task:
generate_kwargs["task"] = inputs.task
if generate_kwargs:
kwargs["generate_kwargs"] = generate_kwargs

# see https://github.com/huggingface/transformers/issues/24707
old_postprocess = pipe.postprocess
if inputs.decoder_kwargs:
Expand Down Expand Up @@ -58,15 +62,19 @@ def postprocess(model_outputs):


@lru_cache
def load_pipe(model_id: str):
def load_pipe(model_id: str) -> transformers.AutomaticSpeechRecognitionPipeline:
print(f"Loading asr model {model_id!r}...")
kwargs = {}
if tokenizer_from := os.environ.get("WHISPER_TOKENIZER_FROM"):
kwargs["tokenizer"] = WhisperTokenizer.from_pretrained(tokenizer_from.strip())
pipe = transformers.pipeline(
"automatic-speech-recognition",
model=model_id,
device=gooey_gpu.DEVICE_ID,
torch_dtype=torch.float16,
**kwargs,
)
return pipe
return typing.cast(transformers.AutomaticSpeechRecognitionPipeline, pipe)


setup_queues(
Expand Down
7 changes: 4 additions & 3 deletions scripts/run-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ docker run \
facebook/mms-1b-all
"\
-e WHISPER_MODEL_IDS="
openai/whisper-large-v2
vasista22/whisper-telugu-large-v2
vasista22/whisper-hindi-large-v2
dmatekenya/whisper-large-v3-chichewa
" \
-e WHISPER_TOKENIZER_FROM="
openai/whisper-large-v3
"\
-e SD_MODEL_IDS="
stabilityai/stable-diffusion-2-inpainting
runwayml/stable-diffusion-inpainting
Expand Down

0 comments on commit fadc910

Please sign in to comment.