Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
jgw96 committed Sep 26, 2024
1 parent a663e84 commit 4a7266f
Show file tree
Hide file tree
Showing 13 changed files with 780 additions and 755 deletions.
953 changes: 604 additions & 349 deletions package-lock.json

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "web-ai-toolkit",
"version": "0.1.8",
"version": "0.2.0",
"repository": "https://github.com/jgw96/web-ai-toolkit",
"keywords": [
"ai",
Expand Down Expand Up @@ -29,12 +29,12 @@
"author": "",
"license": "ISC",
"devDependencies": {
"typescript": "^5.5.3",
"vite": "^5.3.3",
"vite-plugin-dts": "^3.9.1"
"typescript": "^5.6.2",
"vite": "^5.4.8",
"vite-plugin-dts": "^4.2.2"
},
"dependencies": {
"@huggingface/transformers": "^3.0.0-alpha.14",
"@huggingface/transformers": "^3.0.0-alpha.16",
"@xenova/transformers": "^2.17.2"
}
}
17 changes: 7 additions & 10 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export async function transcribeAudioFile(audioFile: Blob, model: string = "Xenova/whisper-tiny", timestamps: boolean = false, language: string = "en-US") {
try {
const { loadTranscriber, doLocalWhisper } = await import("./services/speech-recognition/whisper-ai");
const { loadTranscriber, doLocalWhisper } = await import("./services/speech-recognition/recognition");
await loadTranscriber(model, timestamps, language);
return doLocalWhisper(audioFile, model);
}
Expand All @@ -12,9 +12,8 @@ export async function transcribeAudioFile(audioFile: Blob, model: string = "Xeno

export async function textToSpeech(text: string, model: string = "Xenova/mms-tts-eng") {
try {
const { loadTTS, doLocalTTS } = await import("./services/text-to-speech/text-to-speech");
await loadTTS(model);
return doLocalTTS(text);
const { runSynthesizer } = await import("./services/text-to-speech/tts");
return runSynthesizer(text, model);
}
catch (err) {
console.error(err);
Expand All @@ -24,9 +23,8 @@ export async function textToSpeech(text: string, model: string = "Xenova/mms-tts

export async function summarize(text: string, model: string = "Xenova/distilbart-cnn-6-6") {
try {
const { loadSummarizer, doLocalSummarize } = await import("./services/summarization/summarization");
await loadSummarizer(model);
return doLocalSummarize(text);
const { runSummarizer } = await import("./services/summarization/summarization");
return runSummarizer(text, model);
}
catch (err) {
console.error(err);
Expand All @@ -36,9 +34,8 @@ export async function summarize(text: string, model: string = "Xenova/distilbart

export async function ocr(image: Blob, model: string = "Xenova/trocr-small-printed") {
try {
const { loadOCR, doLocalOCR } = await import("./services/ocr/ocr");
await loadOCR(model);
return doLocalOCR(image);
const { runOCR } = await import("./services/ocr/ocr");
return runOCR(image, model);
}
catch (err) {
console.error(err);
Expand Down
55 changes: 0 additions & 55 deletions src/services/ocr/ocr-worker.ts

This file was deleted.

104 changes: 68 additions & 36 deletions src/services/ocr/ocr.ts
Original file line number Diff line number Diff line change
@@ -1,48 +1,80 @@
let ocrWorker: Worker;
// let ocrWorker: Worker;

// @ts-ignore
import OCRWorker from './ocr-worker?worker&inline';
// // @ts-ignore
// import OCRWorker from './ocr-worker?worker&inline';

export async function loadOCR(model: string): Promise<void> {
return new Promise(async (resolve) => {
if (!ocrWorker) {
ocrWorker = new OCRWorker();
}
// export async function loadOCR(model: string): Promise<void> {
// return new Promise(async (resolve) => {
// if (!ocrWorker) {
// ocrWorker = new OCRWorker();
// }

ocrWorker.onmessage = async (e) => {
if (e.data.type === "loaded") {
resolve();
}
}
// ocrWorker.onmessage = async (e) => {
// if (e.data.type === "loaded") {
// resolve();
// }
// }

// ocrWorker.postMessage({
// type: "load",
// model
// });
// });
// }

// export function doLocalOCR(blob: Blob) {
// return new Promise((resolve, reject) => {
// try {
// ocrWorker.onmessage = async (e) => {
// if (e.data.type === "ocr") {
// resolve(e.data.text);
// }
// else if (e.data.type === "error") {
// reject(e.data.error);
// }
// }

// const dataURL = URL.createObjectURL(blob);

ocrWorker.postMessage({
type: "load",
model
});
// ocrWorker.postMessage({
// type: "ocr",
// blob: dataURL
// });
// }
// catch (err) {
// reject(err);
// }
// });
// }

/* eslint-disable no-async-promise-executor */
import { pipeline, env } from '@huggingface/transformers';

let ocr: any = undefined;

export async function runOCR(image: Blob, model: string = "Xenova/trocr-small-printed") {
return new Promise(async (resolve) => {
if (!ocr) {
await loadOCR(model);
}
const out = await ocr(image);
resolve(out);
});
}

export function doLocalOCR(blob: Blob) {
return new Promise((resolve, reject) => {
try {
ocrWorker.onmessage = async (e) => {
if (e.data.type === "ocr") {
resolve(e.data.text);
}
else if (e.data.type === "error") {
reject(e.data.error);
}
}

const dataURL = URL.createObjectURL(blob);

ocrWorker.postMessage({
type: "ocr",
blob: dataURL
async function loadOCR(model: string): Promise<void> {
return new Promise(async (resolve) => {
if (!ocr) {
env.allowLocalModels = false;
env.useBrowserCache = false;
ocr = await pipeline('image-to-text', model || 'Xenova/trocr-small-printed', {
device: (navigator as any).ml ? "webnn" : "webgpu"
});
console.log("loaded ocr", ocr)
resolve();
}
catch (err) {
reject(err);
else {
resolve();
}
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,49 @@ import { AutomaticSpeechRecognitionPipeline, pipeline, env } from '@huggingface/

let transcriber: AutomaticSpeechRecognitionPipeline | undefined = undefined;

self.onmessage = async (e) => {
if (e.data.type === 'transcribe') {
return new Promise((resolve) => {
console.log("in worker", e.data)
localTranscribe(e.data.blob).then((transcription) => {
console.log("in worker", transcription)
self.postMessage({
type: 'transcribe',
transcription: transcription
export function doLocalWhisper(audioFile: Blob, model: string = "Xenova/whisper-tiny") {
return new Promise(async (resolve, reject) => {
try {
if (!transcriber) {
await loadTranscriber(model || 'Xenova/whisper-tiny', false, 'en');
}

const fileReader = new FileReader();
fileReader.onloadend = async () => {
const audioCTX = new AudioContext({
sampleRate: 16000,
});
resolve(transcription);
})
})
}
else if (e.data.type === "load") {
await loadTranscriber(e.data.model || 'Xenova/whisper-tiny', e.data.timestamps, e.data.language);
self.postMessage({
type: 'loaded'
});
return Promise.resolve();
}
else {
return Promise.reject('Unknown message type');
}
const arrayBuffer = fileReader.result as ArrayBuffer;
const audioData = await audioCTX.decodeAudioData(arrayBuffer);

let audio;
if (audioData.numberOfChannels === 2) {
const SCALING_FACTOR = Math.sqrt(2);

const left = audioData.getChannelData(0);
const right = audioData.getChannelData(1);

audio = new Float32Array(left.length);
for (let i = 0; i < audioData.length; ++i) {
audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
}
} else {
// If the audio is not stereo, we can just use the first channel:
audio = audioData.getChannelData(0);
}

const output = await localTranscribe(audio);
resolve(output);



};
fileReader.readAsArrayBuffer(audioFile);
}
catch (err) {
reject(err);
}
})
}

export async function loadTranscriber(model: string = "Xenova/whisper-tiny", timestamps: boolean, language: string): Promise<void> {
Expand All @@ -49,13 +68,14 @@ export async function loadTranscriber(model: string = "Xenova/whisper-tiny", tim
})
}

export async function localTranscribe(audio: Blob): Promise<string> {
export async function localTranscribe(audio: Float32Array): Promise<string> {
return new Promise(async (resolve, reject) => {
if (transcriber) {
// @ts-ignore
const output = await transcriber(audio, {
chunk_length_s: 30,
stride_length_s: 5,
// @ts-ignore
callback_function: callback_function, // after each generation step
chunk_callback: chunk_callback, // after each chunk is processed
});
Expand Down
Loading

0 comments on commit 4a7266f

Please sign in to comment.