From 1b1ad7f40f7cf51fb24d35aff75187122d1ee9ce Mon Sep 17 00:00:00 2001 From: Matt Hillsdon Date: Thu, 14 Mar 2024 11:07:39 +0000 Subject: [PATCH] Use a Worker per search language Also introduces a better approach to Worker types An alternative would be a separate tsconfig as suggested here with some folder restructuring https://stackoverflow.com/questions/56356655/structuring-a-typescript-project-with-workers --- src/documentation/search/search-client.ts | 58 ++++++++++++++++-- src/documentation/search/search-hooks.tsx | 27 ++++---- src/documentation/search/search.test.ts | 19 +++--- src/documentation/search/search.ts | 61 ++++++------------- src/documentation/search/search.worker.de.ts | 9 +++ .../{search.worker.ts => search.worker.en.ts} | 3 +- src/documentation/search/search.worker.es.ts | 9 +++ src/documentation/search/search.worker.fr.ts | 9 +++ src/documentation/search/search.worker.ja.ts | 9 +++ src/documentation/search/search.worker.ko.ts | 9 +++ src/documentation/search/search.worker.nl.ts | 9 +++ src/language-server/pyright.ts | 1 - tsconfig.json | 2 +- vite.config.ts | 3 - 14 files changed, 155 insertions(+), 73 deletions(-) create mode 100644 src/documentation/search/search.worker.de.ts rename src/documentation/search/{search.worker.ts => search.worker.en.ts} (69%) create mode 100644 src/documentation/search/search.worker.es.ts create mode 100644 src/documentation/search/search.worker.fr.ts create mode 100644 src/documentation/search/search.worker.ja.ts create mode 100644 src/documentation/search/search.worker.ko.ts create mode 100644 src/documentation/search/search.worker.nl.ts diff --git a/src/documentation/search/search-client.ts b/src/documentation/search/search-client.ts index f76a850b8..307802146 100644 --- a/src/documentation/search/search-client.ts +++ b/src/documentation/search/search-client.ts @@ -11,17 +11,14 @@ import { SearchResults, } from "./common"; -// eslint-disable-next-line import/no-webpack-loader-syntax import { ApiDocsResponse } from "../../language-server/apidocs"; import { Toolkit } from "../reference/model"; export class WorkerSearch implements Search { private worker: Worker; private resolveQueue: Array<(value: SearchResults) => void> = []; - constructor() { - this.worker = new Worker(new URL("./search.worker.ts", import.meta.url), { - type: "module", - }); + constructor(public language: string) { + this.worker = workerForLanguage(language); } index(reference: Toolkit, api: ApiDocsResponse) { const message: IndexMessage = { @@ -50,4 +47,55 @@ export class WorkerSearch implements Search { }); return promise; } + + dispose() { + // We just ask nicely so it can respond to any in flight requests + this.worker.postMessage({ + kind: "shutdown", + }); + } } + +const workerForLanguage = (language: string) => { + // See also convertLangToLunrParam + + // Enumerated for code splitting as Vite doesn't support dynamic strings here + // We use a worker per language for because Vite doesn't support using dynamic + // import in a iife Worker and Safari doesn't support module workers. + switch (language.toLowerCase()) { + case "de": { + return new Worker(new URL(`./search.worker.de.ts`, import.meta.url), { + type: "module", + }); + } + case "fr": { + return new Worker(new URL(`./search.worker.fr.ts`, import.meta.url), { + type: "module", + }); + } + case "es-es": { + return new Worker(new URL(`./search.worker.es-es.ts`, import.meta.url), { + type: "module", + }); + } + case "ja": { + return new Worker(new URL(`./search.worker.ja.ts`, import.meta.url), { + type: "module", + }); + } + case "ko": { + return new Worker(new URL(`./search.worker.ko.ts`, import.meta.url), { + type: "module", + }); + } + case "nl": { + return new Worker(new URL(`./search.worker.nl.ts`, import.meta.url), { + type: "module", + }); + } + default: + return new Worker(new URL(`./search.worker.en.ts`, import.meta.url), { + type: "module", + }); + } +}; diff --git a/src/documentation/search/search-hooks.tsx b/src/documentation/search/search-hooks.tsx index 802881c6c..40af5ecad 100644 --- a/src/documentation/search/search-hooks.tsx +++ b/src/documentation/search/search-hooks.tsx @@ -10,17 +10,16 @@ import { useContext, useEffect, useMemo, + useRef, useState, } from "react"; import useIsUnmounted from "../../common/use-is-unmounted"; import { useLogging } from "../../logging/logging-hooks"; import { useSettings } from "../../settings/settings"; import { useDocumentation } from "../documentation-hooks"; -import { Search, SearchResults } from "./common"; +import { SearchResults } from "./common"; import { WorkerSearch } from "./search-client"; -const search: Search = new WorkerSearch(); - type UseSearch = { results: SearchResults | undefined; query: string; @@ -43,19 +42,30 @@ const SearchProvider = ({ children }: { children: ReactNode }) => { const [results, setResults] = useState(); const isUnmounted = useIsUnmounted(); const logging = useLogging(); + const [{ languageId }] = useSettings(); + const search = useRef(); + useEffect(() => { + if (languageId !== search.current?.language) { + search.current?.dispose(); + search.current = new WorkerSearch(languageId); + setQuery(""); + } // Wait for both, no reason to index with just one then redo with both. if (reference.status === "ok" && api) { - search.index(reference.content, api); + search.current.index(reference.content, api); } - }, [reference, api]); + }, [languageId, reference, api]); const debouncedSearch = useMemo( () => debounce(async (newQuery: string) => { + if (!search.current) { + return; + } const trimmedQuery = newQuery.trim(); if (trimmedQuery) { - const results = await search.search(trimmedQuery); + const results = await search.current.search(trimmedQuery); if (!isUnmounted()) { setResults((prevResults) => { if (!prevResults) { @@ -71,11 +81,6 @@ const SearchProvider = ({ children }: { children: ReactNode }) => { [setResults, isUnmounted, logging] ); - const [{ languageId }] = useSettings(); - useEffect(() => { - setQuery(""); - }, [languageId]); - useEffect(() => { debouncedSearch(query); }, [debouncedSearch, query]); diff --git a/src/documentation/search/search.test.ts b/src/documentation/search/search.test.ts index 8f11f2303..fa7cc4125 100644 --- a/src/documentation/search/search.test.ts +++ b/src/documentation/search/search.test.ts @@ -9,12 +9,13 @@ import { Toolkit } from "../reference/model"; import { IndexMessage } from "./common"; import lunrJa from "@microbit/lunr-languages/lunr.ja"; import { - buildReferenceIndex, + buildIndex, buildSearchIndex, SearchableContent, SearchWorker, } from "./search"; import { vi } from "vitest"; +import frLanguageSupport from "@microbit/lunr-languages/lunr.fr"; const searchableReferenceContent: SearchableContent[] = [ { @@ -99,7 +100,9 @@ describe("Search", () => { }); describe("buildReferenceIndex", () => { - it("uses language from the toolkit for the Reference index", async () => { + it("uses language support provided", async () => { + // We used to derive this from the index and dynamically load the right language support + // inside the worker, but switched to a worker per language when movign to Vite const api: ApiDocsResponse = {}; const referenceEn: Toolkit = { id: "reference", @@ -119,12 +122,12 @@ describe("buildReferenceIndex", () => { ...referenceEn, language: "fr", }; - const enIndex = await buildReferenceIndex(referenceEn, api); + const enIndex = await buildIndex(referenceEn, api, undefined); expect(enIndex.search("topic").reference.length).toEqual(1); // "that" is an English stopword expect(enIndex.search("that").reference.length).toEqual(0); - const frIndex = await buildReferenceIndex(referenceFr, api); + const frIndex = await buildIndex(referenceFr, api, frLanguageSupport); expect(frIndex.search("topic").reference.length).toEqual(1); // "that" is not a French stopword expect(frIndex.search("that").reference.length).toEqual(1); @@ -136,9 +139,9 @@ describe("SearchWorker", () => { const postMessage = vi.fn(); const ctx = { postMessage, - } as unknown as Worker; + } as unknown as DedicatedWorkerGlobalScope; - new SearchWorker(ctx); + new SearchWorker(ctx, undefined); ctx.onmessage!( new MessageEvent("message", { @@ -179,9 +182,9 @@ describe("SearchWorker", () => { const postMessage = vi.fn(); const ctx = { postMessage, - } as unknown as Worker; + } as unknown as DedicatedWorkerGlobalScope; - new SearchWorker(ctx); + new SearchWorker(ctx, undefined); const emptyIndex: IndexMessage = { kind: "index", diff --git a/src/documentation/search/search.ts b/src/documentation/search/search.ts index 019faa558..33edb8d5d 100644 --- a/src/documentation/search/search.ts +++ b/src/documentation/search/search.ts @@ -3,11 +3,10 @@ * * SPDX-License-Identifier: MIT */ -import lunr from "lunr"; import multi from "@microbit/lunr-languages/lunr.multi"; import stemmerSupport from "@microbit/lunr-languages/lunr.stemmer.support"; import tinyseg from "@microbit/lunr-languages/tinyseg"; -import { retryAsyncLoad } from "../../common/chunk-util"; +import lunr from "lunr"; import { splitDocString } from "../../editor/codemirror/language-server/docstrings"; import type { ApiDocsEntry, @@ -22,7 +21,7 @@ import { Result, SearchResults, } from "./common"; -import { contextExtracts, fullStringExtracts, Position } from "./extracts"; +import { Position, contextExtracts, fullStringExtracts } from "./extracts"; export const supportedSearchLanguages = [ "de", @@ -233,6 +232,7 @@ export const buildSearchIndex = ( languagePlugin: lunr.Builder.Plugin, ...plugins: lunr.Builder.Plugin[] ): SearchIndex => { + console.log("Building index", language, languagePlugin); let customTokenizer: TokenizerFunction | undefined; const index = lunr(function () { this.ref("id"); @@ -267,21 +267,15 @@ export const buildSearchIndex = ( }; // Exposed for testing. -export const buildReferenceIndex = async ( +export const buildIndex = async ( reference: Toolkit, - api: ApiDocsResponse + api: ApiDocsResponse, + languageSupport: ((l: typeof lunr) => void) | undefined ): Promise => { const language = convertLangToLunrParam(reference.language); - const languageSupport = await retryAsyncLoad(() => - loadLunrLanguageSupport(language) - ); const plugins: lunr.Builder.Plugin[] = []; if (languageSupport && language) { - // Loading plugin for fr makes lunr.fr available but we don't model this in the types. - // Avoid repeatedly initializing them when switching back and forth. - if (!lunr[language]) { - languageSupport(lunr); - } + languageSupport(lunr); plugins.push(lunr[language]); } @@ -310,36 +304,10 @@ export const buildReferenceIndex = async ( ); }; -async function loadLunrLanguageSupport( - language: LunrLanguage | undefined -): Promise void)> { - if (!language) { - // English. - return undefined; - } - // Enumerated for code splitting. - switch (language.toLowerCase()) { - case "de": - return (await import("@microbit/lunr-languages/lunr.de")).default; - case "fr": - return (await import("@microbit/lunr-languages/lunr.fr")).default; - case "es": - return (await import("@microbit/lunr-languages/lunr.es")).default; - case "ja": - return (await import("@microbit/lunr-languages/lunr.ja")).default; - case "ko": - return (await import("@microbit/lunr-languages/lunr.ko")).default; - case "nl": - return (await import("@microbit/lunr-languages/lunr.nl")).default; - default: - // No search support for the language, default to lunr's built-in English support. - return undefined; - } -} - type LunrLanguage = "de" | "es" | "fr" | "ja" | "nl" | "ko"; function convertLangToLunrParam(language: string): LunrLanguage | undefined { + // See also workerForLanguage switch (language.toLowerCase()) { case "de": return "de"; @@ -365,7 +333,10 @@ export class SearchWorker { private recordInitialization: (() => void) | undefined; private initialized: Promise; - constructor(private ctx: Worker) { + constructor( + private ctx: DedicatedWorkerGlobalScope, + private languageSupport: ((l: typeof lunr) => void) | undefined + ) { // We return Promises here just to allow for easy testing. this.ctx.onmessage = async (event: MessageEvent) => { const data = event.data; @@ -373,6 +344,8 @@ export class SearchWorker { return this.query(data as QueryMessage); } else if (data.kind === "index") { return this.index(data as IndexMessage); + } else if (data.kind === "shutdown") { + this.ctx.close(); } else { console.error("Unexpected worker message", event); } @@ -384,7 +357,11 @@ export class SearchWorker { } private async index(message: IndexMessage) { - this.search = await buildReferenceIndex(message.reference, message.api); + this.search = await buildIndex( + message.reference, + message.api, + this.languageSupport + ); this.recordInitialization!(); } diff --git a/src/documentation/search/search.worker.de.ts b/src/documentation/search/search.worker.de.ts new file mode 100644 index 000000000..912e35197 --- /dev/null +++ b/src/documentation/search/search.worker.de.ts @@ -0,0 +1,9 @@ +/** + * (c) 2022, Micro:bit Educational Foundation and contributors + * + * SPDX-License-Identifier: MIT + */ +import { SearchWorker } from "./search"; +import languageSupport from "@microbit/lunr-languages/lunr.de"; + +new SearchWorker(self as DedicatedWorkerGlobalScope, languageSupport); diff --git a/src/documentation/search/search.worker.ts b/src/documentation/search/search.worker.en.ts similarity index 69% rename from src/documentation/search/search.worker.ts rename to src/documentation/search/search.worker.en.ts index 1bd89b9a3..bf6f9884b 100644 --- a/src/documentation/search/search.worker.ts +++ b/src/documentation/search/search.worker.en.ts @@ -5,5 +5,4 @@ */ import { SearchWorker } from "./search"; -// eslint-disable-next-line -new SearchWorker(self as any); +new SearchWorker(self as DedicatedWorkerGlobalScope, undefined); diff --git a/src/documentation/search/search.worker.es.ts b/src/documentation/search/search.worker.es.ts new file mode 100644 index 000000000..d16d21f0e --- /dev/null +++ b/src/documentation/search/search.worker.es.ts @@ -0,0 +1,9 @@ +/** + * (c) 2022, Micro:bit Educational Foundation and contributors + * + * SPDX-License-Identifier: MIT + */ +import { SearchWorker } from "./search"; +import languageSupport from "@microbit/lunr-languages/lunr.es"; + +new SearchWorker(self as DedicatedWorkerGlobalScope, languageSupport); diff --git a/src/documentation/search/search.worker.fr.ts b/src/documentation/search/search.worker.fr.ts new file mode 100644 index 000000000..0ea1c9b1f --- /dev/null +++ b/src/documentation/search/search.worker.fr.ts @@ -0,0 +1,9 @@ +/** + * (c) 2022, Micro:bit Educational Foundation and contributors + * + * SPDX-License-Identifier: MIT + */ +import { SearchWorker } from "./search"; +import languageSupport from "@microbit/lunr-languages/lunr.fr"; + +new SearchWorker(self as DedicatedWorkerGlobalScope, languageSupport); diff --git a/src/documentation/search/search.worker.ja.ts b/src/documentation/search/search.worker.ja.ts new file mode 100644 index 000000000..a99e7eb40 --- /dev/null +++ b/src/documentation/search/search.worker.ja.ts @@ -0,0 +1,9 @@ +/** + * (c) 2022, Micro:bit Educational Foundation and contributors + * + * SPDX-License-Identifier: MIT + */ +import { SearchWorker } from "./search"; +import languageSupport from "@microbit/lunr-languages/lunr.ja"; + +new SearchWorker(self as DedicatedWorkerGlobalScope, languageSupport); diff --git a/src/documentation/search/search.worker.ko.ts b/src/documentation/search/search.worker.ko.ts new file mode 100644 index 000000000..63f163aae --- /dev/null +++ b/src/documentation/search/search.worker.ko.ts @@ -0,0 +1,9 @@ +/** + * (c) 2022, Micro:bit Educational Foundation and contributors + * + * SPDX-License-Identifier: MIT + */ +import { SearchWorker } from "./search"; +import languageSupport from "@microbit/lunr-languages/lunr.ko"; + +new SearchWorker(self as DedicatedWorkerGlobalScope, languageSupport); diff --git a/src/documentation/search/search.worker.nl.ts b/src/documentation/search/search.worker.nl.ts new file mode 100644 index 000000000..49a2ccf4f --- /dev/null +++ b/src/documentation/search/search.worker.nl.ts @@ -0,0 +1,9 @@ +/** + * (c) 2022, Micro:bit Educational Foundation and contributors + * + * SPDX-License-Identifier: MIT + */ +import { SearchWorker } from "./search"; +import languageSupport from "@microbit/lunr-languages/lunr.nl"; + +new SearchWorker(self as DedicatedWorkerGlobalScope, languageSupport); diff --git a/src/language-server/pyright.ts b/src/language-server/pyright.ts index a9824ff95..b976cce20 100644 --- a/src/language-server/pyright.ts +++ b/src/language-server/pyright.ts @@ -75,7 +75,6 @@ export const pyright = async ( // onward. const { initialData, port } = e.data; const background = new Worker(workerScript, { - type: "classic", name: `Pyright-background-${idSuffix}-${++backgroundWorkerCount}`, }); workers.push(background); diff --git a/tsconfig.json b/tsconfig.json index 803a8700c..1d72bb95f 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -2,7 +2,7 @@ "compilerOptions": { "target": "ES2021", "useDefineForClassFields": true, - "lib": ["ES2021", "DOM", "DOM.Iterable"], + "lib": ["ES2021", "DOM", "DOM.Iterable", "WebWorker"], "module": "ESNext", "skipLibCheck": true, diff --git a/vite.config.ts b/vite.config.ts index 3099f0778..34aa4cc8d 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -49,9 +49,6 @@ export default defineConfig(({ mode }) => { outDir: "build", sourcemap: true, }, - worker: { - format: "es", - }, server: { port: 3000, },