diff --git a/binding/web/.gitignore b/binding/web/.gitignore index 21f56948..596b3705 100644 --- a/binding/web/.gitignore +++ b/binding/web/.gitignore @@ -1,7 +1,7 @@ node_modules dist -src/lib/* -src/!lib/.gitkeep +lib/* +!lib/.gitkeep test/*.bin .idea/ test_data.json diff --git a/binding/web/README.md b/binding/web/README.md index 2c4691ed..ba4628c5 100644 --- a/binding/web/README.md +++ b/binding/web/README.md @@ -13,26 +13,13 @@ models. picoLLM Inference Engine is: - Runs on CPU and GPU - Free for open-weight models -## Requirements - -PicoLLM Web Binding uses [SharedArrayBuffer](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SharedArrayBuffer) to generate -text. Modern browsers require the following response headers to allow the usage of `SharedArrayBuffers`: - -``` -Cross-Origin-Opener-Policy: same-origin -Cross-Origin-Embedder-Policy: require-corp -``` - -Refer to our [Web demos](https://github.com/Picovoice/picollm/tree/master/demo/web) for example on creating a server -with the corresponding response headers. - ## Compatibility - Chrome / Edge - Firefox - Safari -**NOTE**: IndexedDB, SIMD and SharedArrayBuffers are required to use `picoLLM`. +**NOTE**: IndexedDB and SIMD are required to use `picoLLM`. ## Installation diff --git a/binding/web/cypress.config.ts b/binding/web/cypress.config.ts index 4e465888..cd9bd0ff 100644 --- a/binding/web/cypress.config.ts +++ b/binding/web/cypress.config.ts @@ -8,14 +8,5 @@ export default defineConfig({ specPattern: 'test/*.test.{js,jsx,ts,tsx}', video: false, screenshotOnRunFailure: false, - setupNodeEvents(on, config) { - on('before:browser:launch', (browser, launchOptions) => { - if (browser.name === 'chrome') { - launchOptions.args.push('--enable-features=SharedArrayBuffer'); - } - - return launchOptions; - }); - }, }, }); diff --git a/binding/web/module.d.ts b/binding/web/module.d.ts index d8f483ef..af7897a8 100644 --- a/binding/web/module.d.ts +++ b/binding/web/module.d.ts @@ -3,11 +3,6 @@ declare module "*.wasm" { export default content; } -declare module "*.txt" { - const content: string; - export default content; -} - declare module 'web-worker:*' { const WorkerFactory: new () => Worker; export default WorkerFactory; diff --git a/binding/web/package.json b/binding/web/package.json index 1d078942..e7e85454 100644 --- a/binding/web/package.json +++ b/binding/web/package.json @@ -22,7 +22,6 @@ "lint": "eslint . --ext .js,.ts", "prepack": "npm-run-all build", "start": "cross-env TARGET='debug' rollup --config --watch", - "serve": "node server.js", "watch": "rollup --config --watch", "format": "prettier --write \"**/*.{js,ts,json}\"", "copywasm": "node scripts/copy_wasm.js", @@ -30,7 +29,7 @@ "test": "cypress run --spec test/picollm.test.ts --browser chrome --headed" }, "dependencies": { - "@picovoice/web-utils": "~1.4.1" + "@picovoice/web-utils": "~1.4.3" }, "devDependencies": { "@babel/core": "^7.21.3", @@ -42,8 +41,9 @@ "@rollup/plugin-node-resolve": "^15.0.1", "@rollup/plugin-terser": "^0.4.0", "@rollup/pluginutils": "^5.0.2", - "@types/emscripten": "^1.39.13", "@types/node": "^18.11.9", + "@types/pv-xpu-web-worker": "../../lib/wasm/dist/types/xpu_web_worker", + "@types/pv-xpu-webgpu": "../../lib/wasm/dist/types/xpu_webgpu", "@typescript-eslint/eslint-plugin": "^5.51.0", "@typescript-eslint/parser": "^5.51.0", "async-mutex": "^0.4.0", @@ -51,9 +51,10 @@ "cypress": "^12.8.1", "eslint": "^8.22.0", "eslint-plugin-cypress": "^2.12.1", - "mime-types": "^2.1.35", "npm-run-all": "^4.1.5", "prettier": "^2.8.3", + "pv-xpu-web-worker": "../../lib/wasm/dist/xpu_web_worker/esm", + "pv-xpu-webgpu": "../../lib/wasm/dist/xpu_webgpu/esm", "rollup": "^2.79.1", "rollup-plugin-typescript2": "^0.34.1", "rollup-plugin-web-worker-loader": "^1.6.1", diff --git a/binding/web/rollup.config.js b/binding/web/rollup.config.js index ba9a2076..813c7a38 100644 --- a/binding/web/rollup.config.js +++ b/binding/web/rollup.config.js @@ -69,7 +69,7 @@ export default { exclude: '**/node_modules/**', }), base64({ - include: ['./**/*.wasm', './**/*.txt'], + include: ['./lib/**/*.wasm'], }) ], }; diff --git a/binding/web/scripts/copy_wasm.js b/binding/web/scripts/copy_wasm.js index b28c2d52..899ac2c6 100644 --- a/binding/web/scripts/copy_wasm.js +++ b/binding/web/scripts/copy_wasm.js @@ -1,11 +1,11 @@ const fs = require("fs"); -const { join, extname } = require("path"); +const { join } = require("path"); const wasmFiles = [ "pv_picollm.wasm", "pv_picollm_simd.wasm", - "pv_picollm.js", - "pv_picollm_simd.js", + "pv_picollm_web_worker_helper.wasm", + "pv_picollm_web_worker_helper_simd.wasm", ] console.log("Copying the WASM model..."); @@ -19,16 +19,12 @@ const sourceDirectory = join( "wasm" ); -const outputDirectory = join(__dirname, "..", "src", "lib"); +const outputDirectory = join(__dirname, "..", "lib"); try { fs.mkdirSync(outputDirectory, { recursive: true }); wasmFiles.forEach(file => { fs.copyFileSync(join(sourceDirectory, file), join(outputDirectory, file)) - const ext = extname(file); - if (ext === ".js") { - fs.copyFileSync(join(sourceDirectory, file), join(outputDirectory, file.replace(ext, ".txt"))); - } }) } catch (error) { console.error(error); diff --git a/binding/web/src/index.ts b/binding/web/src/index.ts index e380bbf8..192f4310 100644 --- a/binding/web/src/index.ts +++ b/binding/web/src/index.ts @@ -41,15 +41,12 @@ import { PicoLLMWorkerResponse, } from './types'; -import picoLLMWasmSimd from './lib/pv_picollm_simd.wasm'; -import picoLLMWasmLib from './lib/pv_picollm_simd.txt'; +import picoLLMWasmSimd from '../lib/pv_picollm_simd.wasm'; import * as PicoLLMErrors from './picollm_errors'; PicoLLM.setWasmSimd(picoLLMWasmSimd); PicoLLMWorker.setWasmSimd(picoLLMWasmSimd); -PicoLLM.setWasmLib(picoLLMWasmLib); -PicoLLMWorker.setWasmLib(picoLLMWasmLib); export { Dialog, diff --git a/binding/web/src/lib/.gitkeep b/binding/web/src/lib/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/binding/web/src/picollm.ts b/binding/web/src/picollm.ts index ae26e892..ff85ec5e 100644 --- a/binding/web/src/picollm.ts +++ b/binding/web/src/picollm.ts @@ -14,15 +14,20 @@ import { Mutex } from 'async-mutex'; import { + aligned_alloc_type, arrayBufferToStringAtIndex, base64ToUint8Array, + buildWasm, isAccessKeyValid, + pv_free_type, + PvError, unsignedAddress, } from '@picovoice/web-utils'; import { simd } from 'wasm-feature-detect'; -import createModule from "./lib/pv_picollm_simd"; +import initXpuWebWorker from 'pv-xpu-web-worker'; +import initXpuWebGPU from "pv-xpu-webgpu"; import { PicoLLMModel, @@ -39,10 +44,45 @@ import { import * as PicoLLMErrors from './picollm_errors'; import { pvStatusToException } from './picollm_errors'; +import picoLLMWebWorkerHelperSimd from '../lib/pv_picollm_web_worker_helper_simd.wasm'; import { loadModel } from './utils'; import { Dialog, DIALOGS } from './dialog'; +export class PicoLLMStreamCallback { + private _wasmMemory: WebAssembly.Memory | undefined; + + private _userCallback?: (token: string) => void; + + public constructor(memory: WebAssembly.Memory) { + this._wasmMemory = memory; + } + + public release(): void { + this._wasmMemory = undefined; + } + + public setUserCallback(userCallback?: (token: string) => void): void { + this._userCallback = userCallback; + } + + public streamCallbackWasm = (tokenAddress: number): void => { + if (this._wasmMemory === undefined) { + return; + } + + const tokenAddressUnsigned = unsignedAddress(tokenAddress); + const memoryBufferUint8 = new Uint8Array(this._wasmMemory.buffer); + const token = arrayBufferToStringAtIndex( + memoryBufferUint8, + tokenAddressUnsigned + ); + if (this._userCallback) { + this._userCallback(token); + } + }; +} + /** * WebAssembly function types */ @@ -52,7 +92,7 @@ type pv_picollm_init_type = ( device: number, object: number ) => Promise; -type pv_picollm_delete_type = (object: number) => void; +type pv_picollm_delete_type = (object: number) => Promise; type pv_picollm_generate_type = ( object: number, prompt: number, @@ -73,12 +113,12 @@ type pv_picollm_generate_type = ( num_completion_tokens: number, completion: number ) => Promise; -type pv_picollm_interrupt_type = (object: number) => number; type pv_picollm_delete_completion_tokens_type = ( object: number, numCompletionTokens: number -) => void; -type pv_picollm_delete_completion_type = (completion: number) => void; +) => Promise; +type pv_picollm_interrupt_type = (object: number) => Promise; +type pv_picollm_delete_completion_type = (completion: number) => Promise; type pv_picollm_tokenize_type = ( object: number, text: number, @@ -86,23 +126,23 @@ type pv_picollm_tokenize_type = ( eos: boolean, numTokens: number, tokens: number -) => number; -type pv_picollm_delete_tokens_type = (tokens: number) => void; +) => Promise; +type pv_picollm_delete_tokens_type = (tokens: number) => Promise; type pv_picollm_forward_type = ( object: number, token: number, numLogits: number, logits: number ) => Promise; -type pv_picollm_delete_logits_type = (logits: number) => void; -type pv_picollm_reset_type = (object: number) => number; -type pv_picollm_model_type = (object: number, model: number) => number; +type pv_picollm_delete_logits_type = (logits: number) => Promise; +type pv_picollm_reset_type = (object: number) => Promise; +type pv_picollm_model_type = (object: number, model: number) => Promise; type pv_picollm_context_length_type = ( object: number, contextLength: number -) => number; -type pv_picollm_version_type = () => number; -type pv_picollm_max_top_choices_type = () => number; +) => Promise; +type pv_picollm_version_type = () => Promise; +type pv_picollm_max_top_choices_type = () => Promise; type pv_picollm_list_hardware_devices_type = ( hardwareDevices: number, numHardwareDevices: number @@ -110,104 +150,66 @@ type pv_picollm_list_hardware_devices_type = ( type pv_picollm_free_hardware_devices_type = ( hardwareDevices: number, numHardwareDevices: number -) => void; -type pv_set_sdk_type = (sdk: number) => void; +) => Promise; +type pv_set_sdk_type = (sdk: number) => Promise; type pv_get_error_stack_type = ( messageStack: number, messageStackDepth: number -) => number; -type pv_free_error_stack_type = (messageStack: number) => void; +) => Promise; +type pv_free_error_stack_type = (messageStack: number) => Promise; /** - * PicoLLM Module Type + * JavaScript/WebAssembly Binding for picoLLM. */ -type PicoLLMModule = EmscriptenModule & { - _pv_free: (address: number) => void; - - _pv_picollm_init: pv_picollm_init_type; - _pv_picollm_interrupt: pv_picollm_interrupt_type; - _pv_picollm_delete: pv_picollm_delete_type; - _pv_picollm_delete_completion_tokens: pv_picollm_delete_completion_tokens_type; - _pv_picollm_delete_completion: pv_picollm_delete_completion_type; - _pv_picollm_delete_tokens: pv_picollm_delete_tokens_type; - _pv_picollm_delete_logits: pv_picollm_delete_logits_type; - _pv_picollm_tokenize: pv_picollm_tokenize_type; - _pv_picollm_reset: pv_picollm_reset_type; - - _pv_picollm_model: pv_picollm_model_type; - _pv_picollm_context_length: pv_picollm_context_length_type; - _pv_picollm_version: pv_picollm_version_type; - _pv_picollm_max_top_choices: pv_picollm_max_top_choices_type; - - _pv_picollm_list_hardware_devices: pv_picollm_list_hardware_devices_type; - _pv_picollm_free_hardware_devices: pv_picollm_free_hardware_devices_type; - - _pv_set_sdk: pv_set_sdk_type; - _pv_get_error_stack: pv_get_error_stack_type; - _pv_free_error_stack: pv_free_error_stack_type; - - // em default functions - addFunction: typeof addFunction; - ccall: typeof ccall; - cwrap: typeof cwrap; -} - -const DEFAULT_DEVICE = 'best'; type PicoLLMWasmOutput = { - module: PicoLLMModule; - - pv_picollm_generate: pv_picollm_generate_type, - pv_picollm_forward: pv_picollm_forward_type, + aligned_alloc: aligned_alloc_type; + memory: WebAssembly.Memory; + pvFree: pv_free_type; contextLength: number; maxTopChoices: number; model: string; version: string; + streamCallback: PicoLLMStreamCallback; objectAddress: number; messageStackAddressAddressAddress: number; messageStackDepthAddress: number; -}; -class PicoLLMStreamCallback { - private readonly _module: PicoLLMModule; - - private _userCallback?: (token: string) => void; - - public constructor(module: PicoLLMModule) { - this._module = module; - } - - public setUserCallback(userCallback?: (token: string) => void): void { - this._userCallback = userCallback; - } - - public streamCallbackWasm = (tokenAddress: number): void => { - if (this._module === undefined) { - return; - } + pvPicoLLMDelete: pv_picollm_delete_type; + pvPicoLLMGenerate: pv_picollm_generate_type; + pvPicoLLMInterrupt: pv_picollm_interrupt_type; + pvPicoLLMDeleteCompletionTokens: pv_picollm_delete_completion_tokens_type; + pvPicoLLMDeleteCompletion: pv_picollm_delete_completion_type; + pvPicoLLMTokenize: pv_picollm_tokenize_type; + pvPicoLLMDeleteTokens: pv_picollm_delete_tokens_type; + pvPicoLLMForward: pv_picollm_forward_type; + pvPicoLLMDeleteLogits: pv_picollm_delete_logits_type; + pvPicoLLMReset: pv_picollm_reset_type; + pvGetErrorStack: pv_get_error_stack_type; + pvFreeErrorStack: pv_free_error_stack_type; +}; - const tokenAddressUnsigned = unsignedAddress(tokenAddress); - const token = arrayBufferToStringAtIndex( - this._module.HEAPU8, - tokenAddressUnsigned - ); - if (this._userCallback) { - this._userCallback(token); - } - }; -} +const DEFAULT_DEVICE = 'best'; -/** - * JavaScript/WebAssembly Binding for picoLLM. - */ export class PicoLLM { - private readonly _module: PicoLLMModule | undefined; - - private readonly _pv_picollm_generate: pv_picollm_generate_type; - private readonly _pv_picollm_forward: pv_picollm_forward_type; - + private readonly _pvPicoLLMDelete: pv_picollm_delete_type; + private readonly _pvPicoLLMGenerate: pv_picollm_generate_type; + private readonly _pvPicoLLMInterrupt: pv_picollm_interrupt_type; + private readonly _pvPicoLLMDeleteCompletionTokens: pv_picollm_delete_completion_tokens_type; + private readonly _pvPicoLLMDeleteCompletion: pv_picollm_delete_completion_type; + private readonly _pvPicoLLMTokenize: pv_picollm_tokenize_type; + private readonly _pvPicoLLMDeleteTokens: pv_picollm_delete_tokens_type; + private readonly _pvPicoLLMForward: pv_picollm_forward_type; + private readonly _pvPicoLLMDeleteLogits: pv_picollm_delete_logits_type; + private readonly _pvPicoLLMReset: pv_picollm_reset_type; + private readonly _pvGetErrorStack: pv_get_error_stack_type; + private readonly _pvFreeErrorStack: pv_free_error_stack_type; + + private _wasmMemory: WebAssembly.Memory | undefined; + private readonly _aligned_alloc: aligned_alloc_type; + private readonly _pvFree: pv_free_type; private readonly _functionMutex: Mutex; private readonly _objectAddress: number; @@ -218,29 +220,38 @@ export class PicoLLM { private readonly _maxTopChoices: number; private readonly _model: string; private readonly _version: string; - private readonly _streamCallback: PicoLLMStreamCallback; - private readonly _streamCallbackFnPointer: number; private static _wasmSimd: string; - private static _wasmLib: string; private static _sdk: string = 'web'; private static _picoLLMMutex = new Mutex(); private constructor(handleWasm: PicoLLMWasmOutput) { - this._module = handleWasm.module; - - this._pv_picollm_generate = handleWasm.pv_picollm_generate; - this._pv_picollm_forward = handleWasm.pv_picollm_forward; - this._contextLength = handleWasm.contextLength; this._maxTopChoices = handleWasm.maxTopChoices; this._model = handleWasm.model; this._version = handleWasm.version; - - this._streamCallback = new PicoLLMStreamCallback(this._module); - this._streamCallbackFnPointer = this._module.addFunction(this._streamCallback.streamCallbackWasm, 'vii'); + this._streamCallback = handleWasm.streamCallback; + + this._pvPicoLLMDelete = handleWasm.pvPicoLLMDelete; + this._pvPicoLLMGenerate = handleWasm.pvPicoLLMGenerate; + this._pvPicoLLMInterrupt = handleWasm.pvPicoLLMInterrupt; + this._pvPicoLLMDeleteCompletionTokens = + handleWasm.pvPicoLLMDeleteCompletionTokens; + this._pvPicoLLMDeleteCompletion = handleWasm.pvPicoLLMDeleteCompletion; + this._pvPicoLLMTokenize = handleWasm.pvPicoLLMTokenize; + this._pvPicoLLMDeleteTokens = handleWasm.pvPicoLLMDeleteTokens; + this._pvPicoLLMForward = handleWasm.pvPicoLLMForward; + this._pvPicoLLMDeleteLogits = handleWasm.pvPicoLLMDeleteLogits; + this._pvPicoLLMReset = handleWasm.pvPicoLLMReset; + + this._pvGetErrorStack = handleWasm.pvGetErrorStack; + this._pvFreeErrorStack = handleWasm.pvFreeErrorStack; + + this._wasmMemory = handleWasm.memory; + this._pvFree = handleWasm.pvFree; + this._aligned_alloc = handleWasm.aligned_alloc; this._functionMutex = new Mutex(); @@ -324,16 +335,6 @@ export class PicoLLM { } } - /** - * Set base64 wasm lib file in text format. - * @param wasmLib Base64'd wasm lib file in text format. - */ - public static setWasmLib(wasmLib: string): void { - if (this._wasmLib === undefined) { - this._wasmLib = wasmLib; - } - } - public static async _init( accessKey: string, modelPath: string, @@ -357,7 +358,7 @@ export class PicoLLM { this._wasmSimd, accessKey, modelPath, - device, + device ); return new PicoLLM(wasmOutput); }) @@ -417,94 +418,119 @@ export class PicoLLM { streamCallback, } = options; + this._streamCallback.setUserCallback(streamCallback); + return new Promise((resolve, reject) => { this._functionMutex .runExclusive(async () => { - if (this._module === undefined) { + if (this._wasmMemory === undefined) { throw new PicoLLMErrors.PicoLLMInvalidStateError( 'Attempted to call PicoLLM generate after release.' ); } + let memoryBufferUint8 = new Uint8Array(this._wasmMemory.buffer); + const encoded = new TextEncoder().encode(prompt); - const promptAddress = this._module._malloc((encoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + const promptAddress = await this._aligned_alloc( + Uint8Array.BYTES_PER_ELEMENT, + (encoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT + ); if (promptAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for prompt' ); } - this._module.HEAPU8.set(encoded, promptAddress); - this._module.HEAPU8[promptAddress + encoded.length] = 0; + memoryBufferUint8.set(encoded, promptAddress); + memoryBufferUint8[promptAddress + encoded.length] = 0; const stopPhrasesAddressAddress = stopPhrases.length === 0 ? 0 - : this._module._malloc(stopPhrases.length * Int32Array.BYTES_PER_ELEMENT); + : await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + stopPhrases.length * Int32Array.BYTES_PER_ELEMENT + ); const stopPhrasesAddressList: number[] = []; for (const stopPhrase of stopPhrases) { const stopPhrasesEncoded = new TextEncoder().encode(stopPhrase); - const stopPhraseAddress = this._module._malloc((stopPhrasesEncoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + const stopPhraseAddress = await this._aligned_alloc( + Uint8Array.BYTES_PER_ELEMENT, + (stopPhrasesEncoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT + ); if (stopPhraseAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for stopPhrase' ); } - this._module.HEAPU8.set(stopPhrasesEncoded, stopPhraseAddress); - this._module.HEAPU8[stopPhraseAddress + stopPhrasesEncoded.length] = 0; + memoryBufferUint8.set(stopPhrasesEncoded, stopPhraseAddress); + memoryBufferUint8[ + stopPhraseAddress + stopPhrasesEncoded.length + ] = 0; stopPhrasesAddressList.push(stopPhraseAddress); } + const memoryBufferInt32 = new Int32Array(this._wasmMemory.buffer); if (stopPhrasesAddressAddress > 0) { - this._module.HEAP32.set( + memoryBufferInt32.set( new Int32Array(stopPhrasesAddressList), stopPhrasesAddressAddress / Int32Array.BYTES_PER_ELEMENT ); } - const usageAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT * 2); + const usageAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT * 2 + ); if (usageAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for usage' ); } - const endpointAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const endpointAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (endpointAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for endpoint' ); } - const numCompletionTokensAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const numCompletionTokensAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (numCompletionTokensAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for numCompletionTokens' ); } - const completionTokensAddressAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const completionTokensAddressAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (completionTokensAddressAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for completionTokens' ); } - const completionAddressAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const completionAddressAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (completionAddressAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for completion' ); } - let streamCallbackFnPointer = 0; - if (streamCallback !== undefined) { - this._streamCallback.setUserCallback(streamCallback); - streamCallbackFnPointer = this._streamCallbackFnPointer; - } - - const status = await this._pv_picollm_generate( + const status = await this._pvPicoLLMGenerate( this._objectAddress, promptAddress, completionTokenLimit, @@ -516,7 +542,7 @@ export class PicoLLM { temperature, topP, numTopChoices, - streamCallbackFnPointer, + 0, 0, usageAddress, endpointAddress, @@ -525,52 +551,69 @@ export class PicoLLM { completionAddressAddress ); - this._module._pv_free(promptAddress); - this._module._pv_free(stopPhrasesAddressAddress); + memoryBufferUint8 = new Uint8Array(this._wasmMemory.buffer); + await this._pvFree(promptAddress); + await this._pvFree(stopPhrasesAddressAddress); for (const stopPhraseAddress of stopPhrasesAddressList) { - this._module._pv_free(stopPhraseAddress); + await this._pvFree(stopPhraseAddress); } + const memoryBufferView = new DataView(this._wasmMemory.buffer); if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - this._module._pv_get_error_stack, - this._module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + this._pvGetErrorStack, + this._pvFreeErrorStack, this._messageStackAddressAddressAddress, this._messageStackDepthAddress, - this._module.HEAP32, - this._module.HEAPU8 + memoryBufferView, + memoryBufferUint8 ); throw pvStatusToException(status, 'Generate failed', messageStack); } const usage: PicoLLMUsage = { - promptTokens: this._module.HEAP32[usageAddress / Int32Array.BYTES_PER_ELEMENT], - completionTokens: this._module.HEAP32[usageAddress / Int32Array.BYTES_PER_ELEMENT + 1], + promptTokens: memoryBufferView.getInt32(usageAddress, true), + completionTokens: memoryBufferView.getInt32( + usageAddress + Int32Array.BYTES_PER_ELEMENT, + true + ), }; - this._module._pv_free(usageAddress); + await this._pvFree(usageAddress); - const endpoint: PicoLLMEndpoint = this._module.HEAP32[endpointAddress / Int32Array.BYTES_PER_ELEMENT]; - this._module._pv_free(endpointAddress); + const endpoint: PicoLLMEndpoint = memoryBufferView.getInt32( + endpointAddress, + true + ); + await this._pvFree(endpointAddress); - const numCompletionTokens = this._module.HEAP32[numCompletionTokensAddress / Int32Array.BYTES_PER_ELEMENT]; - this._module._pv_free(numCompletionTokensAddress); + const numCompletionTokens = memoryBufferView.getInt32( + numCompletionTokensAddress, + true + ); + await this._pvFree(numCompletionTokensAddress); const completionTokensAddress = unsignedAddress( - this._module.HEAP32[completionTokensAddressAddress / Int32Array.BYTES_PER_ELEMENT] + memoryBufferView.getInt32(completionTokensAddressAddress, true) ); let completionTokensPtr = completionTokensAddress; const completionTokens: PicoLLMCompletionToken[] = []; for (let i = 0; i < numCompletionTokens; i++) { - const tokenAddress = unsignedAddress(this._module.HEAP32[completionTokensPtr / Int32Array.BYTES_PER_ELEMENT]); + const tokenAddress = memoryBufferView.getInt32( + completionTokensPtr, + true + ); const completionToken = arrayBufferToStringAtIndex( - this._module.HEAPU8, + memoryBufferUint8, tokenAddress ); completionTokensPtr += Int32Array.BYTES_PER_ELEMENT; - const completionLogProb = this._module.HEAPF32[completionTokensPtr / Float32Array.BYTES_PER_ELEMENT]; + const completionLogProb = memoryBufferView.getFloat32( + completionTokensPtr, + true + ); completionTokensPtr += Float32Array.BYTES_PER_ELEMENT; const token: PicoLLMToken = { @@ -578,24 +621,30 @@ export class PicoLLM { logProb: completionLogProb, }; - const numTopChoicesReturn = this._module.HEAP32[completionTokensPtr / Int32Array.BYTES_PER_ELEMENT]; + const numTopChoicesReturn = memoryBufferView.getInt32( + completionTokensPtr, + true + ); completionTokensPtr += Int32Array.BYTES_PER_ELEMENT; const topChoices: PicoLLMToken[] = []; let topChoicesPtr = unsignedAddress( - this._module.HEAP32[completionTokensPtr / Int32Array.BYTES_PER_ELEMENT] + memoryBufferView.getInt32(completionTokensPtr, true) ); for (let j = 0; j < numTopChoicesReturn; j++) { const topChoiceTokenAddress = unsignedAddress( - this._module.HEAP32[topChoicesPtr / Int32Array.BYTES_PER_ELEMENT] + memoryBufferView.getInt32(topChoicesPtr, true) ); const topChoiceToken = arrayBufferToStringAtIndex( - this._module.HEAPU8, + memoryBufferUint8, topChoiceTokenAddress ); topChoicesPtr += Int32Array.BYTES_PER_ELEMENT; - const topChoiceLogProb = this._module.HEAPF32[topChoicesPtr / Float32Array.BYTES_PER_ELEMENT]; + const topChoiceLogProb = memoryBufferView.getFloat32( + topChoicesPtr, + true + ); topChoicesPtr += Float32Array.BYTES_PER_ELEMENT; topChoices.push({ @@ -611,22 +660,22 @@ export class PicoLLM { }); } - this._module._pv_picollm_delete_completion_tokens( + await this._pvPicoLLMDeleteCompletionTokens( completionTokensAddress, numCompletionTokens ); - this._module._pv_free(completionTokensAddressAddress); + await this._pvFree(completionTokensAddressAddress); + const completionAddress = unsignedAddress( - this._module.HEAP32[completionAddressAddress / Int32Array.BYTES_PER_ELEMENT] + memoryBufferView.getInt32(completionAddressAddress, true) ); const completion = arrayBufferToStringAtIndex( - this._module.HEAPU8, + memoryBufferUint8, completionAddress ); - this._module._pv_picollm_delete_completion(completionAddress); - this._module._pv_free(completionAddressAddress); - + await this._pvPicoLLMDeleteCompletion(completionAddress); + await this._pvFree(completionAddressAddress); return { usage, endpoint, @@ -646,14 +695,14 @@ export class PicoLLM { /** * Interrupts `generate()` if generation is in progress. Otherwise, it has no effect. */ - public interrupt(): void { - if (this._module === undefined) { + public async interrupt(): Promise { + if (this._wasmMemory === undefined) { throw new PicoLLMErrors.PicoLLMInvalidStateError( 'Attempted to call PicoLLM interrupt after release.' ); } - const status = this._module._pv_picollm_interrupt(this._objectAddress); + const status = await this._pvPicoLLMInterrupt(this._objectAddress); if (status !== PvStatus.SUCCESS) { throw pvStatusToException(status, 'Interrupt failed'); } @@ -676,38 +725,49 @@ export class PicoLLM { return new Promise((resolve, reject) => { this._functionMutex .runExclusive(async () => { - if (this._module === undefined) { + if (this._wasmMemory === undefined) { throw new PicoLLMErrors.PicoLLMInvalidStateError( 'Attempted to call PicoLLM tokenize after release.' ); } + let memoryBufferUint8 = new Uint8Array(this._wasmMemory.buffer); + const encoded = new TextEncoder().encode(text); - const textAddress = this._module._malloc((encoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + const textAddress = await this._aligned_alloc( + Uint8Array.BYTES_PER_ELEMENT, + (encoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT + ); if (textAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for text' ); } - this._module.HEAPU8.set(encoded, textAddress); - this._module.HEAPU8[textAddress + encoded.length] = 0; + memoryBufferUint8.set(encoded, textAddress); + memoryBufferUint8[textAddress + encoded.length] = 0; - const numTokensAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const numTokensAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (numTokensAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for numTokens' ); } - const tokensAddressAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const tokensAddressAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (tokensAddressAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for tokens' ); } - const status = this._module._pv_picollm_tokenize( + const status = await this._pvPicoLLMTokenize( this._objectAddress, textAddress, bos, @@ -715,36 +775,42 @@ export class PicoLLM { numTokensAddress, tokensAddressAddress ); - this._module._pv_free(textAddress); + memoryBufferUint8 = new Uint8Array(this._wasmMemory.buffer); + await this._pvFree(textAddress); + + const memoryBufferView = new DataView(this._wasmMemory.buffer); if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - this._module._pv_get_error_stack, - this._module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + this._pvGetErrorStack, + this._pvFreeErrorStack, this._messageStackAddressAddressAddress, this._messageStackDepthAddress, - this._module.HEAP32, - this._module.HEAPU8 + memoryBufferView, + memoryBufferUint8 ); throw pvStatusToException(status, 'Tokenize failed', messageStack); } - const numTokens = this._module.HEAP32[numTokensAddress / Int32Array.BYTES_PER_ELEMENT]; - this._module._pv_free(numTokensAddress); + const numTokens = memoryBufferView.getInt32(numTokensAddress, true); + await this._pvFree(numTokensAddress); const tokens: number[] = []; const tokensAddress = unsignedAddress( - this._module.HEAP32[tokensAddressAddress / Int32Array.BYTES_PER_ELEMENT] + memoryBufferView.getInt32(tokensAddressAddress, true) ); for (let i = 0; i < numTokens; i++) { tokens.push( - this._module.HEAP32[tokensAddress / Int32Array.BYTES_PER_ELEMENT + i] + memoryBufferView.getInt32( + tokensAddress + i * Int32Array.BYTES_PER_ELEMENT, + true + ) ); } - this._module._pv_picollm_delete_tokens(tokensAddress); - this._module._pv_free(tokensAddressAddress); + await this._pvPicoLLMDeleteTokens(tokensAddress); + await this._pvFree(tokensAddressAddress); return tokens; }) @@ -768,57 +834,73 @@ export class PicoLLM { return new Promise((resolve, reject) => { this._functionMutex .runExclusive(async () => { - if (this._module === undefined) { + if (this._wasmMemory === undefined) { throw new PicoLLMErrors.PicoLLMInvalidStateError( 'Attempted to call PicoLLM forward after release.' ); } - const numLogitsAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const numLogitsAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (numLogitsAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for numLogits' ); } - const logitsAddressAddress = this._module._malloc(Int32Array.BYTES_PER_ELEMENT); + const logitsAddressAddress = await this._aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (logitsAddressAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for logits' ); } - const status = await this._pv_picollm_forward( + const status = await this._pvPicoLLMForward( this._objectAddress, token, numLogitsAddress, logitsAddressAddress ); + const memoryBufferUint8 = new Uint8Array(this._wasmMemory.buffer); + const memoryBufferView = new DataView(this._wasmMemory.buffer); + if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - this._module._pv_get_error_stack, - this._module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + this._pvGetErrorStack, + this._pvFreeErrorStack, this._messageStackAddressAddressAddress, this._messageStackDepthAddress, - this._module.HEAP32, - this._module.HEAPU8 + memoryBufferView, + memoryBufferUint8 ); throw pvStatusToException(status, 'Forward failed', messageStack); } - const numLogits = this._module.HEAP32[numLogitsAddress / Int32Array.BYTES_PER_ELEMENT]; - this._module._pv_free(numLogitsAddress); + const numLogits = memoryBufferView.getInt32(numLogitsAddress, true); + await this._pvFree(numLogitsAddress); const logits: number[] = []; - const logitsAddress = unsignedAddress(this._module.HEAP32[logitsAddressAddress / Int32Array.BYTES_PER_ELEMENT]); + const logitsAddress = unsignedAddress( + memoryBufferView.getInt32(logitsAddressAddress, true) + ); for (let i = 0; i < numLogits; i++) { - logits.push(this._module.HEAPF32[logitsAddress / Float32Array.BYTES_PER_ELEMENT + i]); + logits.push( + memoryBufferView.getFloat32( + logitsAddress + i * Float32Array.BYTES_PER_ELEMENT, + true + ) + ); } - this._module._pv_picollm_delete_logits(logitsAddress); - this._module._pv_free(logitsAddressAddress); + await this._pvPicoLLMDeleteLogits(logitsAddress); + await this._pvFree(logitsAddressAddress); return logits; }) @@ -840,21 +922,23 @@ export class PicoLLM { return new Promise((resolve, reject) => { this._functionMutex .runExclusive(async () => { - if (this._module === undefined) { + if (this._wasmMemory === undefined) { throw new PicoLLMErrors.PicoLLMInvalidStateError( 'Attempted to call PicoLLM forward after release.' ); } - const status = this._module._pv_picollm_reset(this._objectAddress); + const status = await this._pvPicoLLMReset(this._objectAddress); + const memoryBufferUint8 = new Uint8Array(this._wasmMemory.buffer); + const memoryBufferView = new DataView(this._wasmMemory.buffer); if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - this._module._pv_get_error_stack, - this._module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + this._pvGetErrorStack, + this._pvFreeErrorStack, this._messageStackAddressAddressAddress, this._messageStackDepthAddress, - this._module.HEAP32, - this._module.HEAPU8 + memoryBufferView, + memoryBufferUint8 ); throw pvStatusToException(status, 'Reset failed', messageStack); @@ -922,13 +1006,13 @@ export class PicoLLM { * Releases resources acquired by WebAssembly module. */ public async release(): Promise { - if (!this._module) { - return; - } + await this._pvPicoLLMDelete(this._objectAddress); + await this._pvFree(this._messageStackAddressAddressAddress); + await this._pvFree(this._messageStackDepthAddress); + this._streamCallback.release(); - this._module._pv_picollm_delete(this._objectAddress); - this._module._pv_free(this._messageStackAddressAddressAddress); - this._module._pv_free(this._messageStackDepthAddress); + delete this._wasmMemory; + this._wasmMemory = undefined; } /** @@ -941,87 +1025,142 @@ export class PicoLLM { return new Promise((resolve, reject) => { PicoLLM._picoLLMMutex .runExclusive(async () => { + const memory: WebAssembly.Memory = new WebAssembly.Memory({ + initial: 4096, + }); + const isSimd = await simd(); if (!isSimd) { throw new PicoLLMErrors.PicoLLMRuntimeError('Unsupported Browser'); } - const blob = new Blob([base64ToUint8Array(this._wasmLib)], { type: 'application/javascript' }); + const picoLLMWorkerWasmBuffer = base64ToUint8Array(picoLLMWebWorkerHelperSimd); + const xpuWebWorkerImports = initXpuWebWorker(memory, picoLLMWorkerWasmBuffer); + const xpuWebGPUImports = initXpuWebGPU(memory, picoLLMWorkerWasmBuffer); - const module: PicoLLMModule = await createModule({ - mainScriptUrlOrBlob: blob, - wasmBinary: base64ToUint8Array(this._wasmSimd), + const pvError = new PvError(); + + const streamCallback = new PicoLLMStreamCallback(memory); + + const exports = await buildWasm(memory, this._wasmSimd, pvError, { + ...xpuWebWorkerImports, + ...xpuWebGPUImports, + stream_callback_wasm: streamCallback.streamCallbackWasm, }); + for (const [k, v] of Object.entries(exports)) { + // @ts-ignore + xpuWebWorkerImports[k] = v; + } + for (const [k, v] of Object.entries(exports)) { + // @ts-ignore + xpuWebGPUImports[k] = v; + } - const hardwareDevicesAddressAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const aligned_alloc = exports.aligned_alloc as aligned_alloc_type; + const pv_free = exports.pv_free as pv_free_type; + const pv_picollm_list_hardware_devices = + exports.pv_picollm_list_hardware_devices as pv_picollm_list_hardware_devices_type; + const pv_picollm_free_hardware_devices = + exports.pv_picollm_free_hardware_devices as pv_picollm_free_hardware_devices_type; + const pv_get_error_stack = + exports.pv_get_error_stack as pv_get_error_stack_type; + const pv_free_error_stack = + exports.pv_free_error_stack as pv_free_error_stack_type; + + const hardwareDevicesAddressAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (hardwareDevicesAddressAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for hardwareDevices' ); } - const numHardwareDevicesAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const numHardwareDevicesAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (numHardwareDevicesAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for numHardwareDevices' ); } - const status: PvStatus = await module._pv_picollm_list_hardware_devices( + const status: PvStatus = await pv_picollm_list_hardware_devices( hardwareDevicesAddressAddress, numHardwareDevicesAddress ); - const messageStackDepthAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const messageStackDepthAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (!messageStackDepthAddress) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory for messageStackDepth' ); } - const messageStackAddressAddressAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const messageStackAddressAddressAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (!messageStackAddressAddressAddress) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory messageStack' ); } + const memoryBufferView = new DataView(memory.buffer); + const memoryBufferUint8 = new Uint8Array(memory.buffer); if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - module._pv_get_error_stack, - module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + pv_get_error_stack, + pv_free_error_stack, messageStackAddressAddressAddress, messageStackDepthAddress, - module.HEAP32, - module.HEAPU8, + memoryBufferView, + memoryBufferUint8 ); - module._pv_free(messageStackAddressAddressAddress); - module._pv_free(messageStackDepthAddress); + await pv_free(messageStackAddressAddressAddress); + await pv_free(messageStackDepthAddress); throw pvStatusToException( status, 'Get context length failed', messageStack, + pvError ); } - module._pv_free(messageStackAddressAddressAddress); - module._pv_free(messageStackDepthAddress); + await pv_free(messageStackAddressAddressAddress); + await pv_free(messageStackDepthAddress); - const numHardwareDevices: number = module.HEAP32[numHardwareDevicesAddress / Int32Array.BYTES_PER_ELEMENT]; - module._pv_free(numHardwareDevicesAddress); + const numHardwareDevices: number = memoryBufferView.getInt32( + numHardwareDevicesAddress, + true + ); + await pv_free(numHardwareDevicesAddress); - const hardwareDevicesAddress = unsignedAddress(module.HEAP32[hardwareDevicesAddressAddress / Int32Array.BYTES_PER_ELEMENT]); + const hardwareDevicesAddress = unsignedAddress( + memoryBufferView.getInt32(hardwareDevicesAddressAddress, true) + ); const hardwareDevices: string[] = []; for (let i = 0; i < numHardwareDevices; i++) { - const deviceAddress = module.HEAP32[hardwareDevicesAddress / Int32Array.BYTES_PER_ELEMENT + i]; - hardwareDevices.push(arrayBufferToStringAtIndex(module.HEAPU8, deviceAddress)); + const deviceAddress = memoryBufferView.getInt32( + hardwareDevicesAddress + i * Int32Array.BYTES_PER_ELEMENT, + true + ); + hardwareDevices.push( + arrayBufferToStringAtIndex(memoryBufferUint8, deviceAddress) + ); } - module._pv_picollm_free_hardware_devices( + await pv_picollm_free_hardware_devices( hardwareDevicesAddress, numHardwareDevices ); - module._pv_free(hardwareDevicesAddressAddress); + await pv_free(hardwareDevicesAddressAddress); return hardwareDevices; }) @@ -1040,78 +1179,150 @@ export class PicoLLM { modelPath: string, device: string ): Promise { - const blob = new Blob([base64ToUint8Array(this._wasmLib)], { type: 'application/javascript' }); + const memory = new WebAssembly.Memory({ initial: 4096 }); - const module: PicoLLMModule = await createModule({ - mainScriptUrlOrBlob: blob, - wasmBinary: base64ToUint8Array(wasmBase64), - }); + let memoryBufferUint8 = new Uint8Array(memory.buffer); + + const picoLLMWorkerWasmBuffer = base64ToUint8Array(picoLLMWebWorkerHelperSimd); + const xpuWebWorkerImports = initXpuWebWorker(memory, picoLLMWorkerWasmBuffer); + const xpuWebGPUImports = initXpuWebGPU(memory, picoLLMWorkerWasmBuffer); + const pvError = new PvError(); - // setup async functions - const pv_picollm_init: pv_picollm_init_type = this.wrapAsyncFunction(module, "pv_picollm_init", 4); - const pv_picollm_generate: pv_picollm_generate_type = this.wrapAsyncFunction(module, "pv_picollm_generate", 18); - const pv_picollm_forward: pv_picollm_forward_type = this.wrapAsyncFunction(module, "pv_picollm_forward", 6); + const streamCallback = new PicoLLMStreamCallback(memory); - const objectAddressAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const exports = await buildWasm(memory, wasmBase64, pvError, { + ...xpuWebWorkerImports, + ...xpuWebGPUImports, + stream_callback_wasm: streamCallback.streamCallbackWasm, + }); + for (const [k, v] of Object.entries(exports)) { + // @ts-ignore + xpuWebWorkerImports[k] = v; + } + for (const [k, v] of Object.entries(exports)) { + // @ts-ignore + xpuWebGPUImports[k] = v; + } + + const aligned_alloc = exports.aligned_alloc as aligned_alloc_type; + const pv_free = exports.pv_free as pv_free_type; + + const pv_picollm_init = exports.pv_picollm_init as pv_picollm_init_type; + const pv_picollm_delete = + exports.pv_picollm_delete as pv_picollm_delete_type; + const pv_picollm_generate = + exports.pv_picollm_generate as pv_picollm_generate_type; + const pv_picollm_interrupt = + exports.pv_picollm_interrupt as pv_picollm_interrupt_type; + const pv_picollm_delete_completion_tokens = + exports.pv_picollm_delete_completion_tokens as pv_picollm_delete_completion_tokens_type; + const pv_picollm_delete_completion = + exports.pv_picollm_delete_completion as pv_picollm_delete_completion_type; + const pv_picollm_tokenize = + exports.pv_picollm_tokenize as pv_picollm_tokenize_type; + const pv_picollm_delete_tokens = + exports.pv_picollm_delete_tokens as pv_picollm_delete_tokens_type; + const pv_picollm_forward = + exports.pv_picollm_forward as pv_picollm_forward_type; + const pv_picollm_delete_logits = + exports.pv_picollm_delete_logits as pv_picollm_delete_logits_type; + const pv_picollm_reset = exports.pv_picollm_reset as pv_picollm_reset_type; + + const pv_picollm_model = exports.pv_picollm_model as pv_picollm_model_type; + const pv_picollm_context_length = + exports.pv_picollm_context_length as pv_picollm_context_length_type; + const pv_picollm_version = + exports.pv_picollm_version as pv_picollm_version_type; + const pv_picollm_max_top_choices = + exports.pv_picollm_max_top_choices as pv_picollm_max_top_choices_type; + const pv_set_sdk = exports.pv_set_sdk as pv_set_sdk_type; + const pv_get_error_stack = + exports.pv_get_error_stack as pv_get_error_stack_type; + const pv_free_error_stack = + exports.pv_free_error_stack as pv_free_error_stack_type; + + const objectAddressAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (objectAddressAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } - const accessKeyAddress = module._malloc((accessKey.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + const accessKeyAddress = await aligned_alloc( + Uint8Array.BYTES_PER_ELEMENT, + (accessKey.length + 1) * Uint8Array.BYTES_PER_ELEMENT + ); if (accessKeyAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } for (let i = 0; i < accessKey.length; i++) { - module.HEAPU8[accessKeyAddress + i] = accessKey.charCodeAt(i); + memoryBufferUint8[accessKeyAddress + i] = accessKey.charCodeAt(i); } - module.HEAPU8[accessKeyAddress + accessKey.length] = 0; + memoryBufferUint8[accessKeyAddress + accessKey.length] = 0; const modelPathEncoded = new TextEncoder().encode(modelPath); - const modelPathAddress = module._malloc((modelPathEncoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + const modelPathAddress = await aligned_alloc( + Uint8Array.BYTES_PER_ELEMENT, + (modelPathEncoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT + ); + if (modelPathAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } - module.HEAPU8.set(modelPathEncoded, modelPathAddress); - module.HEAPU8[modelPathAddress + modelPathEncoded.length] = 0; + memoryBufferUint8.set(modelPathEncoded, modelPathAddress); + memoryBufferUint8[modelPathAddress + modelPathEncoded.length] = 0; - const deviceAddress = module._malloc((device.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + const deviceAddress = await aligned_alloc( + Uint8Array.BYTES_PER_ELEMENT, + (device.length + 1) * Uint8Array.BYTES_PER_ELEMENT + ); if (deviceAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } for (let i = 0; i < device.length; i++) { - module.HEAPU8[deviceAddress + i] = device.charCodeAt(i); + memoryBufferUint8[deviceAddress + i] = device.charCodeAt(i); } - module.HEAPU8[deviceAddress + device.length] = 0; + memoryBufferUint8[deviceAddress + device.length] = 0; const sdkEncoded = new TextEncoder().encode(this._sdk); - const sdkAddress = module._malloc((sdkEncoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + const sdkAddress = await aligned_alloc( + Uint8Array.BYTES_PER_ELEMENT, + (sdkEncoded.length + 1) * Uint8Array.BYTES_PER_ELEMENT + ); if (!sdkAddress) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } - module.HEAPU8.set(sdkEncoded, sdkAddress); - module.HEAPU8[sdkAddress + sdkEncoded.length] = 0; - module._pv_set_sdk(sdkAddress); + memoryBufferUint8.set(sdkEncoded, sdkAddress); + memoryBufferUint8[sdkAddress + sdkEncoded.length] = 0; + await pv_set_sdk(sdkAddress); - const messageStackDepthAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const messageStackDepthAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (!messageStackDepthAddress) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } - const messageStackAddressAddressAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const messageStackAddressAddressAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (!messageStackAddressAddressAddress) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' @@ -1125,103 +1336,116 @@ export class PicoLLM { objectAddressAddress ); - module._pv_free(accessKeyAddress); - module._pv_free(modelPathAddress); - module._pv_free(deviceAddress); + await pv_free(accessKeyAddress); + await pv_free(modelPathAddress); + await pv_free(deviceAddress); + + const memoryBufferView = new DataView(memory.buffer); + memoryBufferUint8 = new Uint8Array(memory.buffer); if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - module._pv_get_error_stack, - module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + pv_get_error_stack, + pv_free_error_stack, messageStackAddressAddressAddress, messageStackDepthAddress, - module.HEAP32, - module.HEAPU8, + memoryBufferView, + memoryBufferUint8 ); throw pvStatusToException( status, 'Initialization failed', messageStack, + pvError ); } - const objectAddress = module.HEAP32[objectAddressAddress / Int32Array.BYTES_PER_ELEMENT]; - module._pv_free(objectAddressAddress); + const objectAddress = memoryBufferView.getInt32(objectAddressAddress, true); + await pv_free(objectAddressAddress); - const maxTopChoices = module._pv_picollm_max_top_choices(); + const maxTopChoices = await pv_picollm_max_top_choices(); - const versionAddress = module._pv_picollm_version(); + const versionAddress = await pv_picollm_version(); const version = arrayBufferToStringAtIndex( - module.HEAPU8, + memoryBufferUint8, versionAddress ); - const contextLengthAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const contextLengthAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); + if (contextLengthAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } - status = module._pv_picollm_context_length( + status = await pv_picollm_context_length( objectAddress, contextLengthAddress ); if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - module._pv_get_error_stack, - module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + pv_get_error_stack, + pv_free_error_stack, messageStackAddressAddressAddress, messageStackDepthAddress, - module.HEAP32, - module.HEAPU8 + memoryBufferView, + memoryBufferUint8 ); throw pvStatusToException( status, 'Get context length failed', messageStack, + pvError ); } - const contextLength = module.HEAP32[contextLengthAddress / Int32Array.BYTES_PER_ELEMENT]; - module._pv_free(contextLengthAddress); + const contextLength = memoryBufferView.getInt32(contextLengthAddress, true); + await pv_free(contextLengthAddress); - const modelAddressAddress = module._malloc(Int32Array.BYTES_PER_ELEMENT); + const modelAddressAddress = await aligned_alloc( + Int32Array.BYTES_PER_ELEMENT, + Int32Array.BYTES_PER_ELEMENT + ); if (modelAddressAddress === 0) { throw new PicoLLMErrors.PicoLLMOutOfMemoryError( 'malloc failed: Cannot allocate memory' ); } - status = module._pv_picollm_model(objectAddress, modelAddressAddress); + status = await pv_picollm_model(objectAddress, modelAddressAddress); if (status !== PvStatus.SUCCESS) { - const messageStack = PicoLLM.getMessageStack( - module._pv_get_error_stack, - module._pv_free_error_stack, + const messageStack = await PicoLLM.getMessageStack( + pv_get_error_stack, + pv_free_error_stack, messageStackAddressAddressAddress, messageStackDepthAddress, - module.HEAP32, - module.HEAPU8 + memoryBufferView, + memoryBufferUint8 ); throw pvStatusToException( status, 'Failed to get model name', messageStack, + pvError ); } - const modelAddress = module.HEAP32[modelAddressAddress / Int32Array.BYTES_PER_ELEMENT]; - module._pv_free(modelAddressAddress); - const model = arrayBufferToStringAtIndex(module.HEAPU8, modelAddress); + const modelAddress = memoryBufferView.getInt32(modelAddressAddress, true); + await pv_free(modelAddressAddress); + const model = arrayBufferToStringAtIndex(memoryBufferUint8, modelAddress); return { - module: module, - - pv_picollm_generate: pv_picollm_generate, - pv_picollm_forward: pv_picollm_forward, + aligned_alloc, + memory: memory, + pvFree: pv_free, + streamCallback: streamCallback, contextLength: contextLength, maxTopChoices: maxTopChoices, model: model, @@ -1230,44 +1454,63 @@ export class PicoLLM { objectAddress: objectAddress, messageStackAddressAddressAddress: messageStackAddressAddressAddress, messageStackDepthAddress: messageStackDepthAddress, + + pvPicoLLMDelete: pv_picollm_delete, + pvPicoLLMGenerate: pv_picollm_generate, + pvPicoLLMInterrupt: pv_picollm_interrupt, + pvPicoLLMDeleteCompletionTokens: pv_picollm_delete_completion_tokens, + pvPicoLLMDeleteCompletion: pv_picollm_delete_completion, + pvPicoLLMTokenize: pv_picollm_tokenize, + pvPicoLLMDeleteTokens: pv_picollm_delete_tokens, + pvPicoLLMForward: pv_picollm_forward, + pvPicoLLMDeleteLogits: pv_picollm_delete_logits, + pvPicoLLMReset: pv_picollm_reset, + + pvGetErrorStack: pv_get_error_stack, + pvFreeErrorStack: pv_free_error_stack, }; } - private static getMessageStack( + private static async getMessageStack( pv_get_error_stack: pv_get_error_stack_type, pv_free_error_stack: pv_free_error_stack_type, messageStackAddressAddressAddress: number, messageStackDepthAddress: number, - memoryBufferInt32: Int32Array, - memoryBufferUint8: Uint8Array, - ): string[] { - const status = pv_get_error_stack(messageStackAddressAddressAddress, messageStackDepthAddress); + memoryBufferView: DataView, + memoryBufferUint8: Uint8Array + ): Promise { + const status = await pv_get_error_stack( + messageStackAddressAddressAddress, + messageStackDepthAddress + ); if (status !== PvStatus.SUCCESS) { - throw new Error(`Unable to get error state: ${status}`); + throw pvStatusToException(status, 'Unable to get PicoLLM error state'); } - const messageStackAddressAddress = memoryBufferInt32[messageStackAddressAddressAddress / Int32Array.BYTES_PER_ELEMENT]; + const messageStackAddressAddress = memoryBufferView.getInt32( + messageStackAddressAddressAddress, + true + ); - const messageStackDepth = memoryBufferInt32[messageStackDepthAddress / Int32Array.BYTES_PER_ELEMENT]; + const messageStackDepth = memoryBufferView.getInt32( + messageStackDepthAddress, + true + ); const messageStack: string[] = []; for (let i = 0; i < messageStackDepth; i++) { - const messageStackAddress = memoryBufferInt32[ - (messageStackAddressAddress / Int32Array.BYTES_PER_ELEMENT) + i]; - const message = arrayBufferToStringAtIndex(memoryBufferUint8, messageStackAddress); + const messageStackAddress = memoryBufferView.getInt32( + messageStackAddressAddress + i * Int32Array.BYTES_PER_ELEMENT, + true + ); + const message = arrayBufferToStringAtIndex( + memoryBufferUint8, + messageStackAddress + ); messageStack.push(message); } - pv_free_error_stack(messageStackAddressAddress); - return messageStack; - } + await pv_free_error_stack(messageStackAddressAddress); - private static wrapAsyncFunction(module: PicoLLMModule, functionName: string, numArgs: number): (...args: any[]) => any { - // @ts-ignore - return module.cwrap( - functionName, - "number", - Array(numArgs).fill("number"), - { async: true } - ); + return messageStack; } } diff --git a/binding/web/src/picollm_worker.ts b/binding/web/src/picollm_worker.ts index 6b22c96d..4ddf27a0 100644 --- a/binding/web/src/picollm_worker.ts +++ b/binding/web/src/picollm_worker.ts @@ -27,7 +27,6 @@ export class PicoLLMWorker { private readonly _model: string; private static _wasmSimd: string; - private static _wasmLib: string; private static _sdk: string = "web"; private constructor(worker: Worker, contextLength: number, maxTopChoices: number, model: string, version: string) { @@ -139,7 +138,6 @@ export class PicoLLMWorker { }, sdk: this._sdk, wasmSimd: this._wasmSimd, - wasmLib: this._wasmLib, }); return returnPromise; @@ -155,16 +153,6 @@ export class PicoLLMWorker { } } - /** - * Set base64 wasm lib file in text format. - * @param wasmLib Base64'd wasm lib file in text format. - */ - public static setWasmLib(wasmLib: string): void { - if (this._wasmLib === undefined) { - this._wasmLib = wasmLib; - } - } - public static setSdk(sdk: string): void { PicoLLMWorker._sdk = sdk; } diff --git a/binding/web/src/picollm_worker_handler.ts b/binding/web/src/picollm_worker_handler.ts index 82a1bd7a..8314a714 100644 --- a/binding/web/src/picollm_worker_handler.ts +++ b/binding/web/src/picollm_worker_handler.ts @@ -17,7 +17,6 @@ import { PicoLLMWorkerFailureResponse, PicoLLMWorkerForwardRequest, PicoLLMWorkerGenerateRequest, - PicoLLMWorkerInterruptRequest, PicoLLMWorkerInitRequest, PicoLLMWorkerRequest, PicoLLMWorkerTokenizeRequest, @@ -68,7 +67,6 @@ const initRequest = async (request: PicoLLMWorkerInitRequest): Promise => { } PicoLLM.setWasmSimd(request.wasmSimd); - PicoLLM.setWasmLib(request.wasmLib); PicoLLM.setSdk(request.sdk); picoLLM = await PicoLLM._init( request.accessKey, @@ -98,11 +96,9 @@ const generateRequest = async ( }; }; -const interruptRequest = async ( - _: PicoLLMWorkerInterruptRequest -): Promise => { +const interruptRequest = async (): Promise => { if (picoLLM !== null) { - picoLLM.interrupt(); + await picoLLM.interrupt(); } }; @@ -172,7 +168,7 @@ self.onmessage = async function ( self.postMessage(await generateRequest(event.data)); break; case 'interrupt': - await interruptRequest(event.data); + await interruptRequest(); break; case 'tokenize': self.postMessage(await tokenizeRequest(event.data)); diff --git a/binding/web/test/picollm.test.ts b/binding/web/test/picollm.test.ts index c8a654c0..76f47b63 100644 --- a/binding/web/test/picollm.test.ts +++ b/binding/web/test/picollm.test.ts @@ -521,7 +521,7 @@ describe('PicoLLM generate tests (worker)', () => { generateTests(); }); -describe.only('PicoLLM Dialog tests', () => { +describe('PicoLLM Dialog tests', () => { it('should be able to get prompt', () => { const data = testData.dialog; const conversation = data.conversation as [string, string][]; diff --git a/binding/web/tsconfig.json b/binding/web/tsconfig.json index d3fbf507..ea8e366f 100644 --- a/binding/web/tsconfig.json +++ b/binding/web/tsconfig.json @@ -14,11 +14,8 @@ "sourceMap": true, "strict": true, "target": "esnext", - "types": ["node", "emscripten"] + "types": ["node"] }, - "include": [ - "src", - "module.d.ts", - ], - "exclude": ["node_modules", "src/lib"] + "include": ["src", "module.d.ts", "node_modules/@picovoice/pv-xpu"], + "exclude": ["node_modules"] } diff --git a/binding/web/yarn.lock b/binding/web/yarn.lock index 99cfefcd..42963a18 100644 --- a/binding/web/yarn.lock +++ b/binding/web/yarn.lock @@ -1119,10 +1119,10 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/web-utils@~1.4.1": - version "1.4.2" - resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.2.tgz#2ddc44552d15fa1a4958e0c3384e58545255eea1" - integrity sha512-pF5Uw3Vm4mOWJ2H3Zc7E/nDr/O7OhbvgEK6W7cx9MNNK3qq51MqiGluPpZ8a2K61BuIzxcNMC1mXWpmIAWVolA== +"@picovoice/web-utils@~1.4.3": + version "1.4.3" + resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.3.tgz#1de0b20d6080c18d295c6df37c09d88bf7c4f555" + integrity sha512-7JN3YYsSD9Gtce6YKG3XqpX49dkeu7jTdbox7rHQA/X/Q3zxopXA9zlCKSq6EIjFbiX2iuzDKUx1XrFa3d8c0w== dependencies: commander "^10.0.1" @@ -1184,11 +1184,6 @@ estree-walker "^2.0.2" picomatch "^2.3.1" -"@types/emscripten@^1.39.13": - version "1.39.13" - resolved "https://registry.yarnpkg.com/@types/emscripten/-/emscripten-1.39.13.tgz#afeb1648648dc096efe57983e20387627306e2aa" - integrity sha512-cFq+fO/isvhvmuP/+Sl4K4jtU6E23DoivtbO4r50e3odaxAiVdbfSYRDdJ4gCdxx+3aRjhphS5ZMwIH4hFy/Cw== - "@types/estree@*", "@types/estree@^1.0.0": version "1.0.5" resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.5.tgz#a6ce3e556e00fd9895dd872dd172ad0d4bd687f4" @@ -1218,6 +1213,12 @@ dependencies: undici-types "~5.26.4" +"@types/pv-xpu-web-worker@../../lib/wasm/dist/types/xpu_web_worker": + version "0.0.0" + +"@types/pv-xpu-webgpu@../../lib/wasm/dist/types/xpu_webgpu": + version "0.0.0" + "@types/resolve@1.20.2": version "1.20.2" resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-1.20.2.tgz#97d26e00cd4a0423b4af620abecf3e6f442b7975" @@ -3045,7 +3046,7 @@ mime-db@1.52.0: resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70" integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== -mime-types@^2.1.12, mime-types@^2.1.35, mime-types@~2.1.19: +mime-types@^2.1.12, mime-types@~2.1.19: version "2.1.35" resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a" integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== @@ -3366,6 +3367,12 @@ punycode@^2.1.0, punycode@^2.1.1: resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5" integrity sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg== +pv-xpu-web-worker@../../lib/wasm/dist/xpu_web_worker/esm: + version "0.0.0" + +pv-xpu-webgpu@../../lib/wasm/dist/xpu_webgpu/esm: + version "0.0.0" + qs@~6.10.3: version "6.10.5" resolved "https://registry.yarnpkg.com/qs/-/qs-6.10.5.tgz#974715920a80ff6a262264acd2c7e6c2a53282b4" diff --git a/demo/web/chat/package.json b/demo/web/chat/package.json index 1c7828f5..22dcdd15 100644 --- a/demo/web/chat/package.json +++ b/demo/web/chat/package.json @@ -1,11 +1,11 @@ { "name": "picollm-web-chat-demo", - "version": "1.0.0", + "version": "1.1.0", "description": "A demo to show how to create a picoLLM inference engine on a web worker and have a back-and-forth conversation with the LLM, similar to ChatGPT.", "main": "index.js", "private": true, "scripts": { - "start": "node server.js" + "start": "yarn run http-server -a localhost -p 5000" }, "keywords": [ "Picovoice", @@ -16,8 +16,7 @@ "author": "Picovoice Inc", "license": "Apache-2.0", "dependencies": { - "@picovoice/picollm-web": "../../../binding/web", - "mime-types": "^2.1.35" + "@picovoice/picollm-web": "../../../binding/web" }, "devDependencies": { "http-server": "^14.0.0" diff --git a/demo/web/chat/server.js b/demo/web/chat/server.js deleted file mode 100644 index 7b9f9b0a..00000000 --- a/demo/web/chat/server.js +++ /dev/null @@ -1,43 +0,0 @@ -const http = require('http'); -const fs = require('fs'); -const path = require('path'); -const mime = require('mime-types'); - -const PORT = process.env.PORT || 5000; -const HOST = '127.0.0.1'; // Listen on localhost -const publicDir = path.join(__dirname); - -const server = http.createServer((req, res) => { - const url = (req.url === '/') ? '/index.html' : req.url; - const filePath = path.join(publicDir, url); - const contentType = mime.lookup(filePath) || 'application/octet-stream'; - - console.log(url) - - fs.readFile(filePath, (err, content) => { - if (err) { - if (err.code === 'ENOENT') { - // File not found - res.writeHead(404, { 'Content-Type': 'text/plain' }); - res.end('404 Not Found'); - } else { - // Server error - res.writeHead(500, { 'Content-Type': 'text/plain' }); - res.end(`500 Internal Server Error: ${err.code}`); - } - } else { - // Success - res.writeHead(200, { - 'Content-Type': contentType, - 'Content-Length': content.length, - 'Cross-Origin-Opener-Policy': 'same-origin', - 'Cross-Origin-Embedder-Policy': 'require-corp' - }); - res.end(content); - } - }); -}); - -server.listen(PORT, HOST, () => { - console.log(`Server is running on http://${HOST}:${PORT}`); -}); diff --git a/demo/web/chat/yarn.lock b/demo/web/chat/yarn.lock index c59a9da9..9297cdf0 100644 --- a/demo/web/chat/yarn.lock +++ b/demo/web/chat/yarn.lock @@ -5,12 +5,12 @@ "@picovoice/picollm-web@../../../binding/web": version "1.1.0" dependencies: - "@picovoice/web-utils" "~1.4.1" + "@picovoice/web-utils" "~1.4.3" -"@picovoice/web-utils@~1.4.1": - version "1.4.2" - resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.2.tgz#2ddc44552d15fa1a4958e0c3384e58545255eea1" - integrity sha512-pF5Uw3Vm4mOWJ2H3Zc7E/nDr/O7OhbvgEK6W7cx9MNNK3qq51MqiGluPpZ8a2K61BuIzxcNMC1mXWpmIAWVolA== +"@picovoice/web-utils@~1.4.3": + version "1.4.3" + resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.3.tgz#1de0b20d6080c18d295c6df37c09d88bf7c4f555" + integrity sha512-7JN3YYsSD9Gtce6YKG3XqpX49dkeu7jTdbox7rHQA/X/Q3zxopXA9zlCKSq6EIjFbiX2iuzDKUx1XrFa3d8c0w== dependencies: commander "^10.0.1" @@ -203,18 +203,6 @@ lodash@^4.17.14: resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== -mime-db@1.52.0: - version "1.52.0" - resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70" - integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== - -mime-types@^2.1.35: - version "2.1.35" - resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a" - integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== - dependencies: - mime-db "1.52.0" - mime@^1.6.0: version "1.6.0" resolved "https://registry.yarnpkg.com/mime/-/mime-1.6.0.tgz#32cd9e5c64553bd58d19a568af452acff04981b1" diff --git a/demo/web/completion/package.json b/demo/web/completion/package.json index 472d1488..60f4a435 100644 --- a/demo/web/completion/package.json +++ b/demo/web/completion/package.json @@ -1,11 +1,11 @@ { "name": "picollm-web-completion-demo", - "version": "1.0.0", + "version": "1.1.0", "description": "A demo to show how to create a picoLLM inference engine on a web worker that accepts a prompt along with a set of optional parameters and generates a single completion.", "main": "index.js", "private": true, "scripts": { - "start": "node server.js" + "start": "yarn run http-server -a localhost -p 5000" }, "keywords": [ "Picovoice", @@ -16,8 +16,7 @@ "author": "Picovoice Inc", "license": "Apache-2.0", "dependencies": { - "@picovoice/picollm-web": "../../../binding/web", - "mime-types": "^2.1.35" + "@picovoice/picollm-web": "../../../binding/web" }, "devDependencies": { "http-server": "^14.0.0" diff --git a/demo/web/completion/server.js b/demo/web/completion/server.js deleted file mode 100644 index 7b9f9b0a..00000000 --- a/demo/web/completion/server.js +++ /dev/null @@ -1,43 +0,0 @@ -const http = require('http'); -const fs = require('fs'); -const path = require('path'); -const mime = require('mime-types'); - -const PORT = process.env.PORT || 5000; -const HOST = '127.0.0.1'; // Listen on localhost -const publicDir = path.join(__dirname); - -const server = http.createServer((req, res) => { - const url = (req.url === '/') ? '/index.html' : req.url; - const filePath = path.join(publicDir, url); - const contentType = mime.lookup(filePath) || 'application/octet-stream'; - - console.log(url) - - fs.readFile(filePath, (err, content) => { - if (err) { - if (err.code === 'ENOENT') { - // File not found - res.writeHead(404, { 'Content-Type': 'text/plain' }); - res.end('404 Not Found'); - } else { - // Server error - res.writeHead(500, { 'Content-Type': 'text/plain' }); - res.end(`500 Internal Server Error: ${err.code}`); - } - } else { - // Success - res.writeHead(200, { - 'Content-Type': contentType, - 'Content-Length': content.length, - 'Cross-Origin-Opener-Policy': 'same-origin', - 'Cross-Origin-Embedder-Policy': 'require-corp' - }); - res.end(content); - } - }); -}); - -server.listen(PORT, HOST, () => { - console.log(`Server is running on http://${HOST}:${PORT}`); -}); diff --git a/demo/web/completion/yarn.lock b/demo/web/completion/yarn.lock index c59a9da9..9297cdf0 100644 --- a/demo/web/completion/yarn.lock +++ b/demo/web/completion/yarn.lock @@ -5,12 +5,12 @@ "@picovoice/picollm-web@../../../binding/web": version "1.1.0" dependencies: - "@picovoice/web-utils" "~1.4.1" + "@picovoice/web-utils" "~1.4.3" -"@picovoice/web-utils@~1.4.1": - version "1.4.2" - resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.2.tgz#2ddc44552d15fa1a4958e0c3384e58545255eea1" - integrity sha512-pF5Uw3Vm4mOWJ2H3Zc7E/nDr/O7OhbvgEK6W7cx9MNNK3qq51MqiGluPpZ8a2K61BuIzxcNMC1mXWpmIAWVolA== +"@picovoice/web-utils@~1.4.3": + version "1.4.3" + resolved "https://registry.yarnpkg.com/@picovoice/web-utils/-/web-utils-1.4.3.tgz#1de0b20d6080c18d295c6df37c09d88bf7c4f555" + integrity sha512-7JN3YYsSD9Gtce6YKG3XqpX49dkeu7jTdbox7rHQA/X/Q3zxopXA9zlCKSq6EIjFbiX2iuzDKUx1XrFa3d8c0w== dependencies: commander "^10.0.1" @@ -203,18 +203,6 @@ lodash@^4.17.14: resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== -mime-db@1.52.0: - version "1.52.0" - resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70" - integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== - -mime-types@^2.1.35: - version "2.1.35" - resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a" - integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== - dependencies: - mime-db "1.52.0" - mime@^1.6.0: version "1.6.0" resolved "https://registry.yarnpkg.com/mime/-/mime-1.6.0.tgz#32cd9e5c64553bd58d19a568af452acff04981b1" diff --git a/lib/wasm/dist/xpu_webgpu/esm/index.js b/lib/wasm/dist/xpu_webgpu/esm/index.js new file mode 100644 index 00000000..d763044a --- /dev/null +++ b/lib/wasm/dist/xpu_webgpu/esm/index.js @@ -0,0 +1,7676 @@ +function asyncGeneratorStep(gen, resolve, reject, _next, _throw, key, arg) { + try { + var info = gen[key](arg); + var value = info.value; + } catch (error) { + reject(error); + return; + } + if (info.done) { + resolve(value); + } else { + Promise.resolve(value).then(_next, _throw); + } +} +function _asyncToGenerator(fn) { + return function () { + var self = this, + args = arguments; + return new Promise(function (resolve, reject) { + var gen = fn.apply(self, args); + function _next(value) { + asyncGeneratorStep(gen, resolve, reject, _next, _throw, "next", value); + } + function _throw(err) { + asyncGeneratorStep(gen, resolve, reject, _next, _throw, "throw", err); + } + _next(undefined); + }); + }; +} + +function _typeof$3(o) { + "@babel/helpers - typeof"; + + return _typeof$3 = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { + return typeof o; + } : function (o) { + return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; + }, _typeof$3(o); +} + +function toPrimitive(t, r) { + if ("object" != _typeof$3(t) || !t) return t; + var e = t[Symbol.toPrimitive]; + if (void 0 !== e) { + var i = e.call(t, r || "default"); + if ("object" != _typeof$3(i)) return i; + throw new TypeError("@@toPrimitive must return a primitive value."); + } + return ("string" === r ? String : Number)(t); +} + +function toPropertyKey(t) { + var i = toPrimitive(t, "string"); + return "symbol" == _typeof$3(i) ? i : String(i); +} + +function _defineProperty(obj, key, value) { + key = toPropertyKey(key); + if (key in obj) { + Object.defineProperty(obj, key, { + value: value, + enumerable: true, + configurable: true, + writable: true + }); + } else { + obj[key] = value; + } + return obj; +} + +function getDefaultExportFromCjs (x) { + return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; +} + +var regeneratorRuntime$2 = {exports: {}}; + +var _typeof$2 = {exports: {}}; + +_typeof$2.exports; + +(function (module) { + function _typeof(o) { + "@babel/helpers - typeof"; + + return (module.exports = _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { + return typeof o; + } : function (o) { + return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; + }, module.exports.__esModule = true, module.exports["default"] = module.exports), _typeof(o); + } + module.exports = _typeof, module.exports.__esModule = true, module.exports["default"] = module.exports; +} (_typeof$2)); + +var _typeofExports$1 = _typeof$2.exports; + +regeneratorRuntime$2.exports; + +(function (module) { + var _typeof = _typeofExports$1["default"]; + function _regeneratorRuntime() { + module.exports = _regeneratorRuntime = function _regeneratorRuntime() { + return e; + }, module.exports.__esModule = true, module.exports["default"] = module.exports; + var t, + e = {}, + r = Object.prototype, + n = r.hasOwnProperty, + o = Object.defineProperty || function (t, e, r) { + t[e] = r.value; + }, + i = "function" == typeof Symbol ? Symbol : {}, + a = i.iterator || "@@iterator", + c = i.asyncIterator || "@@asyncIterator", + u = i.toStringTag || "@@toStringTag"; + function define(t, e, r) { + return Object.defineProperty(t, e, { + value: r, + enumerable: !0, + configurable: !0, + writable: !0 + }), t[e]; + } + try { + define({}, ""); + } catch (t) { + define = function define(t, e, r) { + return t[e] = r; + }; + } + function wrap(t, e, r, n) { + var i = e && e.prototype instanceof Generator ? e : Generator, + a = Object.create(i.prototype), + c = new Context(n || []); + return o(a, "_invoke", { + value: makeInvokeMethod(t, r, c) + }), a; + } + function tryCatch(t, e, r) { + try { + return { + type: "normal", + arg: t.call(e, r) + }; + } catch (t) { + return { + type: "throw", + arg: t + }; + } + } + e.wrap = wrap; + var h = "suspendedStart", + l = "suspendedYield", + f = "executing", + s = "completed", + y = {}; + function Generator() {} + function GeneratorFunction() {} + function GeneratorFunctionPrototype() {} + var p = {}; + define(p, a, function () { + return this; + }); + var d = Object.getPrototypeOf, + v = d && d(d(values([]))); + v && v !== r && n.call(v, a) && (p = v); + var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p); + function defineIteratorMethods(t) { + ["next", "throw", "return"].forEach(function (e) { + define(t, e, function (t) { + return this._invoke(e, t); + }); + }); + } + function AsyncIterator(t, e) { + function invoke(r, o, i, a) { + var c = tryCatch(t[r], t, o); + if ("throw" !== c.type) { + var u = c.arg, + h = u.value; + return h && "object" == _typeof(h) && n.call(h, "__await") ? e.resolve(h.__await).then(function (t) { + invoke("next", t, i, a); + }, function (t) { + invoke("throw", t, i, a); + }) : e.resolve(h).then(function (t) { + u.value = t, i(u); + }, function (t) { + return invoke("throw", t, i, a); + }); + } + a(c.arg); + } + var r; + o(this, "_invoke", { + value: function value(t, n) { + function callInvokeWithMethodAndArg() { + return new e(function (e, r) { + invoke(t, n, e, r); + }); + } + return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg(); + } + }); + } + function makeInvokeMethod(e, r, n) { + var o = h; + return function (i, a) { + if (o === f) throw new Error("Generator is already running"); + if (o === s) { + if ("throw" === i) throw a; + return { + value: t, + done: !0 + }; + } + for (n.method = i, n.arg = a;;) { + var c = n.delegate; + if (c) { + var u = maybeInvokeDelegate(c, n); + if (u) { + if (u === y) continue; + return u; + } + } + if ("next" === n.method) n.sent = n._sent = n.arg;else if ("throw" === n.method) { + if (o === h) throw o = s, n.arg; + n.dispatchException(n.arg); + } else "return" === n.method && n.abrupt("return", n.arg); + o = f; + var p = tryCatch(e, r, n); + if ("normal" === p.type) { + if (o = n.done ? s : l, p.arg === y) continue; + return { + value: p.arg, + done: n.done + }; + } + "throw" === p.type && (o = s, n.method = "throw", n.arg = p.arg); + } + }; + } + function maybeInvokeDelegate(e, r) { + var n = r.method, + o = e.iterator[n]; + if (o === t) return r.delegate = null, "throw" === n && e.iterator["return"] && (r.method = "return", r.arg = t, maybeInvokeDelegate(e, r), "throw" === r.method) || "return" !== n && (r.method = "throw", r.arg = new TypeError("The iterator does not provide a '" + n + "' method")), y; + var i = tryCatch(o, e.iterator, r.arg); + if ("throw" === i.type) return r.method = "throw", r.arg = i.arg, r.delegate = null, y; + var a = i.arg; + return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, "return" !== r.method && (r.method = "next", r.arg = t), r.delegate = null, y) : a : (r.method = "throw", r.arg = new TypeError("iterator result is not an object"), r.delegate = null, y); + } + function pushTryEntry(t) { + var e = { + tryLoc: t[0] + }; + 1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e); + } + function resetTryEntry(t) { + var e = t.completion || {}; + e.type = "normal", delete e.arg, t.completion = e; + } + function Context(t) { + this.tryEntries = [{ + tryLoc: "root" + }], t.forEach(pushTryEntry, this), this.reset(!0); + } + function values(e) { + if (e || "" === e) { + var r = e[a]; + if (r) return r.call(e); + if ("function" == typeof e.next) return e; + if (!isNaN(e.length)) { + var o = -1, + i = function next() { + for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next; + return next.value = t, next.done = !0, next; + }; + return i.next = i; + } + } + throw new TypeError(_typeof(e) + " is not iterable"); + } + return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, "constructor", { + value: GeneratorFunctionPrototype, + configurable: !0 + }), o(GeneratorFunctionPrototype, "constructor", { + value: GeneratorFunction, + configurable: !0 + }), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, "GeneratorFunction"), e.isGeneratorFunction = function (t) { + var e = "function" == typeof t && t.constructor; + return !!e && (e === GeneratorFunction || "GeneratorFunction" === (e.displayName || e.name)); + }, e.mark = function (t) { + return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, "GeneratorFunction")), t.prototype = Object.create(g), t; + }, e.awrap = function (t) { + return { + __await: t + }; + }, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () { + return this; + }), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) { + void 0 === i && (i = Promise); + var a = new AsyncIterator(wrap(t, r, n, o), i); + return e.isGeneratorFunction(r) ? a : a.next().then(function (t) { + return t.done ? t.value : a.next(); + }); + }, defineIteratorMethods(g), define(g, u, "Generator"), define(g, a, function () { + return this; + }), define(g, "toString", function () { + return "[object Generator]"; + }), e.keys = function (t) { + var e = Object(t), + r = []; + for (var n in e) r.push(n); + return r.reverse(), function next() { + for (; r.length;) { + var t = r.pop(); + if (t in e) return next.value = t, next.done = !1, next; + } + return next.done = !0, next; + }; + }, e.values = values, Context.prototype = { + constructor: Context, + reset: function reset(e) { + if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = "next", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) "t" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t); + }, + stop: function stop() { + this.done = !0; + var t = this.tryEntries[0].completion; + if ("throw" === t.type) throw t.arg; + return this.rval; + }, + dispatchException: function dispatchException(e) { + if (this.done) throw e; + var r = this; + function handle(n, o) { + return a.type = "throw", a.arg = e, r.next = n, o && (r.method = "next", r.arg = t), !!o; + } + for (var o = this.tryEntries.length - 1; o >= 0; --o) { + var i = this.tryEntries[o], + a = i.completion; + if ("root" === i.tryLoc) return handle("end"); + if (i.tryLoc <= this.prev) { + var c = n.call(i, "catchLoc"), + u = n.call(i, "finallyLoc"); + if (c && u) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } else if (c) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + } else { + if (!u) throw new Error("try statement without catch or finally"); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } + } + } + }, + abrupt: function abrupt(t, e) { + for (var r = this.tryEntries.length - 1; r >= 0; --r) { + var o = this.tryEntries[r]; + if (o.tryLoc <= this.prev && n.call(o, "finallyLoc") && this.prev < o.finallyLoc) { + var i = o; + break; + } + } + i && ("break" === t || "continue" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null); + var a = i ? i.completion : {}; + return a.type = t, a.arg = e, i ? (this.method = "next", this.next = i.finallyLoc, y) : this.complete(a); + }, + complete: function complete(t, e) { + if ("throw" === t.type) throw t.arg; + return "break" === t.type || "continue" === t.type ? this.next = t.arg : "return" === t.type ? (this.rval = this.arg = t.arg, this.method = "return", this.next = "end") : "normal" === t.type && e && (this.next = e), y; + }, + finish: function finish(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y; + } + }, + "catch": function _catch(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.tryLoc === t) { + var n = r.completion; + if ("throw" === n.type) { + var o = n.arg; + resetTryEntry(r); + } + return o; + } + } + throw new Error("illegal catch attempt"); + }, + delegateYield: function delegateYield(e, r, n) { + return this.delegate = { + iterator: values(e), + resultName: r, + nextLoc: n + }, "next" === this.method && (this.arg = t), y; + } + }, e; + } + module.exports = _regeneratorRuntime, module.exports.__esModule = true, module.exports["default"] = module.exports; +} (regeneratorRuntime$2)); + +var regeneratorRuntimeExports$1 = regeneratorRuntime$2.exports; + +// TODO(Babel 8): Remove this file. + +var runtime$1 = regeneratorRuntimeExports$1(); +var regenerator = runtime$1; + +// Copied from https://github.com/facebook/regenerator/blob/main/packages/runtime/runtime.js#L736= +try { + regeneratorRuntime = runtime$1; +} catch (accidentalStrictMode) { + if (typeof globalThis === "object") { + globalThis.regeneratorRuntime = runtime$1; + } else { + Function("r", "regeneratorRuntime = r")(runtime$1); + } +} + +var _regeneratorRuntime = /*@__PURE__*/getDefaultExportFromCjs(regenerator); + +// NOTE: this list must be up-to-date with browsers listed in +// test/acceptance/useragentstrings.yml +const BROWSER_ALIASES_MAP = { + 'Amazon Silk': 'amazon_silk', + 'Android Browser': 'android', + Bada: 'bada', + BlackBerry: 'blackberry', + Chrome: 'chrome', + Chromium: 'chromium', + Electron: 'electron', + Epiphany: 'epiphany', + Firefox: 'firefox', + Focus: 'focus', + Generic: 'generic', + 'Google Search': 'google_search', + Googlebot: 'googlebot', + 'Internet Explorer': 'ie', + 'K-Meleon': 'k_meleon', + Maxthon: 'maxthon', + 'Microsoft Edge': 'edge', + 'MZ Browser': 'mz', + 'NAVER Whale Browser': 'naver', + Opera: 'opera', + 'Opera Coast': 'opera_coast', + PhantomJS: 'phantomjs', + Puffin: 'puffin', + QupZilla: 'qupzilla', + QQ: 'qq', + QQLite: 'qqlite', + Safari: 'safari', + Sailfish: 'sailfish', + 'Samsung Internet for Android': 'samsung_internet', + SeaMonkey: 'seamonkey', + Sleipnir: 'sleipnir', + Swing: 'swing', + Tizen: 'tizen', + 'UC Browser': 'uc', + Vivaldi: 'vivaldi', + 'WebOS Browser': 'webos', + WeChat: 'wechat', + 'Yandex Browser': 'yandex', + Roku: 'roku', +}; + +const BROWSER_MAP = { + amazon_silk: 'Amazon Silk', + android: 'Android Browser', + bada: 'Bada', + blackberry: 'BlackBerry', + chrome: 'Chrome', + chromium: 'Chromium', + electron: 'Electron', + epiphany: 'Epiphany', + firefox: 'Firefox', + focus: 'Focus', + generic: 'Generic', + googlebot: 'Googlebot', + google_search: 'Google Search', + ie: 'Internet Explorer', + k_meleon: 'K-Meleon', + maxthon: 'Maxthon', + edge: 'Microsoft Edge', + mz: 'MZ Browser', + naver: 'NAVER Whale Browser', + opera: 'Opera', + opera_coast: 'Opera Coast', + phantomjs: 'PhantomJS', + puffin: 'Puffin', + qupzilla: 'QupZilla', + qq: 'QQ Browser', + qqlite: 'QQ Browser Lite', + safari: 'Safari', + sailfish: 'Sailfish', + samsung_internet: 'Samsung Internet for Android', + seamonkey: 'SeaMonkey', + sleipnir: 'Sleipnir', + swing: 'Swing', + tizen: 'Tizen', + uc: 'UC Browser', + vivaldi: 'Vivaldi', + webos: 'WebOS Browser', + wechat: 'WeChat', + yandex: 'Yandex Browser', +}; + +const PLATFORMS_MAP = { + tablet: 'tablet', + mobile: 'mobile', + desktop: 'desktop', + tv: 'tv', +}; + +const OS_MAP = { + WindowsPhone: 'Windows Phone', + Windows: 'Windows', + MacOS: 'macOS', + iOS: 'iOS', + Android: 'Android', + WebOS: 'WebOS', + BlackBerry: 'BlackBerry', + Bada: 'Bada', + Tizen: 'Tizen', + Linux: 'Linux', + ChromeOS: 'Chrome OS', + PlayStation4: 'PlayStation 4', + Roku: 'Roku', +}; + +const ENGINE_MAP = { + EdgeHTML: 'EdgeHTML', + Blink: 'Blink', + Trident: 'Trident', + Presto: 'Presto', + Gecko: 'Gecko', + WebKit: 'WebKit', +}; + +class Utils { + /** + * Get first matched item for a string + * @param {RegExp} regexp + * @param {String} ua + * @return {Array|{index: number, input: string}|*|boolean|string} + */ + static getFirstMatch(regexp, ua) { + const match = ua.match(regexp); + return (match && match.length > 0 && match[1]) || ''; + } + + /** + * Get second matched item for a string + * @param regexp + * @param {String} ua + * @return {Array|{index: number, input: string}|*|boolean|string} + */ + static getSecondMatch(regexp, ua) { + const match = ua.match(regexp); + return (match && match.length > 1 && match[2]) || ''; + } + + /** + * Match a regexp and return a constant or undefined + * @param {RegExp} regexp + * @param {String} ua + * @param {*} _const Any const that will be returned if regexp matches the string + * @return {*} + */ + static matchAndReturnConst(regexp, ua, _const) { + if (regexp.test(ua)) { + return _const; + } + return void (0); + } + + static getWindowsVersionName(version) { + switch (version) { + case 'NT': return 'NT'; + case 'XP': return 'XP'; + case 'NT 5.0': return '2000'; + case 'NT 5.1': return 'XP'; + case 'NT 5.2': return '2003'; + case 'NT 6.0': return 'Vista'; + case 'NT 6.1': return '7'; + case 'NT 6.2': return '8'; + case 'NT 6.3': return '8.1'; + case 'NT 10.0': return '10'; + default: return undefined; + } + } + + /** + * Get macOS version name + * 10.5 - Leopard + * 10.6 - Snow Leopard + * 10.7 - Lion + * 10.8 - Mountain Lion + * 10.9 - Mavericks + * 10.10 - Yosemite + * 10.11 - El Capitan + * 10.12 - Sierra + * 10.13 - High Sierra + * 10.14 - Mojave + * 10.15 - Catalina + * + * @example + * getMacOSVersionName("10.14") // 'Mojave' + * + * @param {string} version + * @return {string} versionName + */ + static getMacOSVersionName(version) { + const v = version.split('.').splice(0, 2).map(s => parseInt(s, 10) || 0); + v.push(0); + if (v[0] !== 10) return undefined; + switch (v[1]) { + case 5: return 'Leopard'; + case 6: return 'Snow Leopard'; + case 7: return 'Lion'; + case 8: return 'Mountain Lion'; + case 9: return 'Mavericks'; + case 10: return 'Yosemite'; + case 11: return 'El Capitan'; + case 12: return 'Sierra'; + case 13: return 'High Sierra'; + case 14: return 'Mojave'; + case 15: return 'Catalina'; + default: return undefined; + } + } + + /** + * Get Android version name + * 1.5 - Cupcake + * 1.6 - Donut + * 2.0 - Eclair + * 2.1 - Eclair + * 2.2 - Froyo + * 2.x - Gingerbread + * 3.x - Honeycomb + * 4.0 - Ice Cream Sandwich + * 4.1 - Jelly Bean + * 4.4 - KitKat + * 5.x - Lollipop + * 6.x - Marshmallow + * 7.x - Nougat + * 8.x - Oreo + * 9.x - Pie + * + * @example + * getAndroidVersionName("7.0") // 'Nougat' + * + * @param {string} version + * @return {string} versionName + */ + static getAndroidVersionName(version) { + const v = version.split('.').splice(0, 2).map(s => parseInt(s, 10) || 0); + v.push(0); + if (v[0] === 1 && v[1] < 5) return undefined; + if (v[0] === 1 && v[1] < 6) return 'Cupcake'; + if (v[0] === 1 && v[1] >= 6) return 'Donut'; + if (v[0] === 2 && v[1] < 2) return 'Eclair'; + if (v[0] === 2 && v[1] === 2) return 'Froyo'; + if (v[0] === 2 && v[1] > 2) return 'Gingerbread'; + if (v[0] === 3) return 'Honeycomb'; + if (v[0] === 4 && v[1] < 1) return 'Ice Cream Sandwich'; + if (v[0] === 4 && v[1] < 4) return 'Jelly Bean'; + if (v[0] === 4 && v[1] >= 4) return 'KitKat'; + if (v[0] === 5) return 'Lollipop'; + if (v[0] === 6) return 'Marshmallow'; + if (v[0] === 7) return 'Nougat'; + if (v[0] === 8) return 'Oreo'; + if (v[0] === 9) return 'Pie'; + return undefined; + } + + /** + * Get version precisions count + * + * @example + * getVersionPrecision("1.10.3") // 3 + * + * @param {string} version + * @return {number} + */ + static getVersionPrecision(version) { + return version.split('.').length; + } + + /** + * Calculate browser version weight + * + * @example + * compareVersions('1.10.2.1', '1.8.2.1.90') // 1 + * compareVersions('1.010.2.1', '1.09.2.1.90'); // 1 + * compareVersions('1.10.2.1', '1.10.2.1'); // 0 + * compareVersions('1.10.2.1', '1.0800.2'); // -1 + * compareVersions('1.10.2.1', '1.10', true); // 0 + * + * @param {String} versionA versions versions to compare + * @param {String} versionB versions versions to compare + * @param {boolean} [isLoose] enable loose comparison + * @return {Number} comparison result: -1 when versionA is lower, + * 1 when versionA is bigger, 0 when both equal + */ + /* eslint consistent-return: 1 */ + static compareVersions(versionA, versionB, isLoose = false) { + // 1) get common precision for both versions, for example for "10.0" and "9" it should be 2 + const versionAPrecision = Utils.getVersionPrecision(versionA); + const versionBPrecision = Utils.getVersionPrecision(versionB); + + let precision = Math.max(versionAPrecision, versionBPrecision); + let lastPrecision = 0; + + const chunks = Utils.map([versionA, versionB], (version) => { + const delta = precision - Utils.getVersionPrecision(version); + + // 2) "9" -> "9.0" (for precision = 2) + const _version = version + new Array(delta + 1).join('.0'); + + // 3) "9.0" -> ["000000000"", "000000009"] + return Utils.map(_version.split('.'), chunk => new Array(20 - chunk.length).join('0') + chunk).reverse(); + }); + + // adjust precision for loose comparison + if (isLoose) { + lastPrecision = precision - Math.min(versionAPrecision, versionBPrecision); + } + + // iterate in reverse order by reversed chunks array + precision -= 1; + while (precision >= lastPrecision) { + // 4) compare: "000000009" > "000000010" = false (but "9" > "10" = true) + if (chunks[0][precision] > chunks[1][precision]) { + return 1; + } + + if (chunks[0][precision] === chunks[1][precision]) { + if (precision === lastPrecision) { + // all version chunks are same + return 0; + } + + precision -= 1; + } else if (chunks[0][precision] < chunks[1][precision]) { + return -1; + } + } + + return undefined; + } + + /** + * Array::map polyfill + * + * @param {Array} arr + * @param {Function} iterator + * @return {Array} + */ + static map(arr, iterator) { + const result = []; + let i; + if (Array.prototype.map) { + return Array.prototype.map.call(arr, iterator); + } + for (i = 0; i < arr.length; i += 1) { + result.push(iterator(arr[i])); + } + return result; + } + + /** + * Array::find polyfill + * + * @param {Array} arr + * @param {Function} predicate + * @return {Array} + */ + static find(arr, predicate) { + let i; + let l; + if (Array.prototype.find) { + return Array.prototype.find.call(arr, predicate); + } + for (i = 0, l = arr.length; i < l; i += 1) { + const value = arr[i]; + if (predicate(value, i)) { + return value; + } + } + return undefined; + } + + /** + * Object::assign polyfill + * + * @param {Object} obj + * @param {Object} ...objs + * @return {Object} + */ + static assign(obj, ...assigners) { + const result = obj; + let i; + let l; + if (Object.assign) { + return Object.assign(obj, ...assigners); + } + for (i = 0, l = assigners.length; i < l; i += 1) { + const assigner = assigners[i]; + if (typeof assigner === 'object' && assigner !== null) { + const keys = Object.keys(assigner); + keys.forEach((key) => { + result[key] = assigner[key]; + }); + } + } + return obj; + } + + /** + * Get short version/alias for a browser name + * + * @example + * getBrowserAlias('Microsoft Edge') // edge + * + * @param {string} browserName + * @return {string} + */ + static getBrowserAlias(browserName) { + return BROWSER_ALIASES_MAP[browserName]; + } + + /** + * Get short version/alias for a browser name + * + * @example + * getBrowserAlias('edge') // Microsoft Edge + * + * @param {string} browserAlias + * @return {string} + */ + static getBrowserTypeByAlias(browserAlias) { + return BROWSER_MAP[browserAlias] || ''; + } +} + +/** + * Browsers' descriptors + * + * The idea of descriptors is simple. You should know about them two simple things: + * 1. Every descriptor has a method or property called `test` and a `describe` method. + * 2. Order of descriptors is important. + * + * More details: + * 1. Method or property `test` serves as a way to detect whether the UA string + * matches some certain browser or not. The `describe` method helps to make a result + * object with params that show some browser-specific things: name, version, etc. + * 2. Order of descriptors is important because a Parser goes through them one by one + * in course. For example, if you insert Chrome's descriptor as the first one, + * more then a half of browsers will be described as Chrome, because they will pass + * the Chrome descriptor's test. + * + * Descriptor's `test` could be a property with an array of RegExps, where every RegExp + * will be applied to a UA string to test it whether it matches or not. + * If a descriptor has two or more regexps in the `test` array it tests them one by one + * with a logical sum operation. Parser stops if it has found any RegExp that matches the UA. + * + * Or `test` could be a method. In that case it gets a Parser instance and should + * return true/false to get the Parser know if this browser descriptor matches the UA or not. + */ + +const commonVersionIdentifier = /version\/(\d+(\.?_?\d+)+)/i; + +const browsersList = [ + /* Googlebot */ + { + test: [/googlebot/i], + describe(ua) { + const browser = { + name: 'Googlebot', + }; + const version = Utils.getFirstMatch(/googlebot\/(\d+(\.\d+))/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Opera < 13.0 */ + { + test: [/opera/i], + describe(ua) { + const browser = { + name: 'Opera', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:opera)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Opera > 13.0 */ + { + test: [/opr\/|opios/i], + describe(ua) { + const browser = { + name: 'Opera', + }; + const version = Utils.getFirstMatch(/(?:opr|opios)[\s/](\S+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/SamsungBrowser/i], + describe(ua) { + const browser = { + name: 'Samsung Internet for Android', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:SamsungBrowser)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/Whale/i], + describe(ua) { + const browser = { + name: 'NAVER Whale Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:whale)[\s/](\d+(?:\.\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/MZBrowser/i], + describe(ua) { + const browser = { + name: 'MZ Browser', + }; + const version = Utils.getFirstMatch(/(?:MZBrowser)[\s/](\d+(?:\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/focus/i], + describe(ua) { + const browser = { + name: 'Focus', + }; + const version = Utils.getFirstMatch(/(?:focus)[\s/](\d+(?:\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/swing/i], + describe(ua) { + const browser = { + name: 'Swing', + }; + const version = Utils.getFirstMatch(/(?:swing)[\s/](\d+(?:\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/coast/i], + describe(ua) { + const browser = { + name: 'Opera Coast', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:coast)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/opt\/\d+(?:.?_?\d+)+/i], + describe(ua) { + const browser = { + name: 'Opera Touch', + }; + const version = Utils.getFirstMatch(/(?:opt)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/yabrowser/i], + describe(ua) { + const browser = { + name: 'Yandex Browser', + }; + const version = Utils.getFirstMatch(/(?:yabrowser)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/ucbrowser/i], + describe(ua) { + const browser = { + name: 'UC Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:ucbrowser)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/Maxthon|mxios/i], + describe(ua) { + const browser = { + name: 'Maxthon', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:Maxthon|mxios)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/epiphany/i], + describe(ua) { + const browser = { + name: 'Epiphany', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:epiphany)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/puffin/i], + describe(ua) { + const browser = { + name: 'Puffin', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:puffin)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/sleipnir/i], + describe(ua) { + const browser = { + name: 'Sleipnir', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:sleipnir)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/k-meleon/i], + describe(ua) { + const browser = { + name: 'K-Meleon', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:k-meleon)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/micromessenger/i], + describe(ua) { + const browser = { + name: 'WeChat', + }; + const version = Utils.getFirstMatch(/(?:micromessenger)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/qqbrowser/i], + describe(ua) { + const browser = { + name: (/qqbrowserlite/i).test(ua) ? 'QQ Browser Lite' : 'QQ Browser', + }; + const version = Utils.getFirstMatch(/(?:qqbrowserlite|qqbrowser)[/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/msie|trident/i], + describe(ua) { + const browser = { + name: 'Internet Explorer', + }; + const version = Utils.getFirstMatch(/(?:msie |rv:)(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/\sedg\//i], + describe(ua) { + const browser = { + name: 'Microsoft Edge', + }; + + const version = Utils.getFirstMatch(/\sedg\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/edg([ea]|ios)/i], + describe(ua) { + const browser = { + name: 'Microsoft Edge', + }; + + const version = Utils.getSecondMatch(/edg([ea]|ios)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/vivaldi/i], + describe(ua) { + const browser = { + name: 'Vivaldi', + }; + const version = Utils.getFirstMatch(/vivaldi\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/seamonkey/i], + describe(ua) { + const browser = { + name: 'SeaMonkey', + }; + const version = Utils.getFirstMatch(/seamonkey\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/sailfish/i], + describe(ua) { + const browser = { + name: 'Sailfish', + }; + + const version = Utils.getFirstMatch(/sailfish\s?browser\/(\d+(\.\d+)?)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/silk/i], + describe(ua) { + const browser = { + name: 'Amazon Silk', + }; + const version = Utils.getFirstMatch(/silk\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/phantom/i], + describe(ua) { + const browser = { + name: 'PhantomJS', + }; + const version = Utils.getFirstMatch(/phantomjs\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/slimerjs/i], + describe(ua) { + const browser = { + name: 'SlimerJS', + }; + const version = Utils.getFirstMatch(/slimerjs\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/blackberry|\bbb\d+/i, /rim\stablet/i], + describe(ua) { + const browser = { + name: 'BlackBerry', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/blackberry[\d]+\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/(web|hpw)[o0]s/i], + describe(ua) { + const browser = { + name: 'WebOS Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/w(?:eb)?[o0]sbrowser\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/bada/i], + describe(ua) { + const browser = { + name: 'Bada', + }; + const version = Utils.getFirstMatch(/dolfin\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/tizen/i], + describe(ua) { + const browser = { + name: 'Tizen', + }; + const version = Utils.getFirstMatch(/(?:tizen\s?)?browser\/(\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/qupzilla/i], + describe(ua) { + const browser = { + name: 'QupZilla', + }; + const version = Utils.getFirstMatch(/(?:qupzilla)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/firefox|iceweasel|fxios/i], + describe(ua) { + const browser = { + name: 'Firefox', + }; + const version = Utils.getFirstMatch(/(?:firefox|iceweasel|fxios)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/electron/i], + describe(ua) { + const browser = { + name: 'Electron', + }; + const version = Utils.getFirstMatch(/(?:electron)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/MiuiBrowser/i], + describe(ua) { + const browser = { + name: 'Miui', + }; + const version = Utils.getFirstMatch(/(?:MiuiBrowser)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/chromium/i], + describe(ua) { + const browser = { + name: 'Chromium', + }; + const version = Utils.getFirstMatch(/(?:chromium)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/chrome|crios|crmo/i], + describe(ua) { + const browser = { + name: 'Chrome', + }; + const version = Utils.getFirstMatch(/(?:chrome|crios|crmo)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/GSA/i], + describe(ua) { + const browser = { + name: 'Google Search', + }; + const version = Utils.getFirstMatch(/(?:GSA)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Android Browser */ + { + test(parser) { + const notLikeAndroid = !parser.test(/like android/i); + const butAndroid = parser.test(/android/i); + return notLikeAndroid && butAndroid; + }, + describe(ua) { + const browser = { + name: 'Android Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* PlayStation 4 */ + { + test: [/playstation 4/i], + describe(ua) { + const browser = { + name: 'PlayStation 4', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Safari */ + { + test: [/safari|applewebkit/i], + describe(ua) { + const browser = { + name: 'Safari', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Something else */ + { + test: [/.*/i], + describe(ua) { + /* Here we try to make sure that there are explicit details about the device + * in order to decide what regexp exactly we want to apply + * (as there is a specific decision based on that conclusion) + */ + const regexpWithoutDeviceSpec = /^(.*)\/(.*) /; + const regexpWithDeviceSpec = /^(.*)\/(.*)[ \t]\((.*)/; + const hasDeviceSpec = ua.search('\\(') !== -1; + const regexp = hasDeviceSpec ? regexpWithDeviceSpec : regexpWithoutDeviceSpec; + return { + name: Utils.getFirstMatch(regexp, ua), + version: Utils.getSecondMatch(regexp, ua), + }; + }, + }, +]; + +var osParsersList = [ + /* Roku */ + { + test: [/Roku\/DVP/], + describe(ua) { + const version = Utils.getFirstMatch(/Roku\/DVP-(\d+\.\d+)/i, ua); + return { + name: OS_MAP.Roku, + version, + }; + }, + }, + + /* Windows Phone */ + { + test: [/windows phone/i], + describe(ua) { + const version = Utils.getFirstMatch(/windows phone (?:os)?\s?(\d+(\.\d+)*)/i, ua); + return { + name: OS_MAP.WindowsPhone, + version, + }; + }, + }, + + /* Windows */ + { + test: [/windows /i], + describe(ua) { + const version = Utils.getFirstMatch(/Windows ((NT|XP)( \d\d?.\d)?)/i, ua); + const versionName = Utils.getWindowsVersionName(version); + + return { + name: OS_MAP.Windows, + version, + versionName, + }; + }, + }, + + /* Firefox on iPad */ + { + test: [/Macintosh(.*?) FxiOS(.*?)\//], + describe(ua) { + const result = { + name: OS_MAP.iOS, + }; + const version = Utils.getSecondMatch(/(Version\/)(\d[\d.]+)/, ua); + if (version) { + result.version = version; + } + return result; + }, + }, + + /* macOS */ + { + test: [/macintosh/i], + describe(ua) { + const version = Utils.getFirstMatch(/mac os x (\d+(\.?_?\d+)+)/i, ua).replace(/[_\s]/g, '.'); + const versionName = Utils.getMacOSVersionName(version); + + const os = { + name: OS_MAP.MacOS, + version, + }; + if (versionName) { + os.versionName = versionName; + } + return os; + }, + }, + + /* iOS */ + { + test: [/(ipod|iphone|ipad)/i], + describe(ua) { + const version = Utils.getFirstMatch(/os (\d+([_\s]\d+)*) like mac os x/i, ua).replace(/[_\s]/g, '.'); + + return { + name: OS_MAP.iOS, + version, + }; + }, + }, + + /* Android */ + { + test(parser) { + const notLikeAndroid = !parser.test(/like android/i); + const butAndroid = parser.test(/android/i); + return notLikeAndroid && butAndroid; + }, + describe(ua) { + const version = Utils.getFirstMatch(/android[\s/-](\d+(\.\d+)*)/i, ua); + const versionName = Utils.getAndroidVersionName(version); + const os = { + name: OS_MAP.Android, + version, + }; + if (versionName) { + os.versionName = versionName; + } + return os; + }, + }, + + /* WebOS */ + { + test: [/(web|hpw)[o0]s/i], + describe(ua) { + const version = Utils.getFirstMatch(/(?:web|hpw)[o0]s\/(\d+(\.\d+)*)/i, ua); + const os = { + name: OS_MAP.WebOS, + }; + + if (version && version.length) { + os.version = version; + } + return os; + }, + }, + + /* BlackBerry */ + { + test: [/blackberry|\bbb\d+/i, /rim\stablet/i], + describe(ua) { + const version = Utils.getFirstMatch(/rim\stablet\sos\s(\d+(\.\d+)*)/i, ua) + || Utils.getFirstMatch(/blackberry\d+\/(\d+([_\s]\d+)*)/i, ua) + || Utils.getFirstMatch(/\bbb(\d+)/i, ua); + + return { + name: OS_MAP.BlackBerry, + version, + }; + }, + }, + + /* Bada */ + { + test: [/bada/i], + describe(ua) { + const version = Utils.getFirstMatch(/bada\/(\d+(\.\d+)*)/i, ua); + + return { + name: OS_MAP.Bada, + version, + }; + }, + }, + + /* Tizen */ + { + test: [/tizen/i], + describe(ua) { + const version = Utils.getFirstMatch(/tizen[/\s](\d+(\.\d+)*)/i, ua); + + return { + name: OS_MAP.Tizen, + version, + }; + }, + }, + + /* Linux */ + { + test: [/linux/i], + describe() { + return { + name: OS_MAP.Linux, + }; + }, + }, + + /* Chrome OS */ + { + test: [/CrOS/], + describe() { + return { + name: OS_MAP.ChromeOS, + }; + }, + }, + + /* Playstation 4 */ + { + test: [/PlayStation 4/], + describe(ua) { + const version = Utils.getFirstMatch(/PlayStation 4[/\s](\d+(\.\d+)*)/i, ua); + return { + name: OS_MAP.PlayStation4, + version, + }; + }, + }, +]; + +/* + * Tablets go first since usually they have more specific + * signs to detect. + */ + +var platformParsersList = [ + /* Googlebot */ + { + test: [/googlebot/i], + describe() { + return { + type: 'bot', + vendor: 'Google', + }; + }, + }, + + /* Huawei */ + { + test: [/huawei/i], + describe(ua) { + const model = Utils.getFirstMatch(/(can-l01)/i, ua) && 'Nova'; + const platform = { + type: PLATFORMS_MAP.mobile, + vendor: 'Huawei', + }; + if (model) { + platform.model = model; + } + return platform; + }, + }, + + /* Nexus Tablet */ + { + test: [/nexus\s*(?:7|8|9|10).*/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Nexus', + }; + }, + }, + + /* iPad */ + { + test: [/ipad/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Apple', + model: 'iPad', + }; + }, + }, + + /* Firefox on iPad */ + { + test: [/Macintosh(.*?) FxiOS(.*?)\//], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Apple', + model: 'iPad', + }; + }, + }, + + /* Amazon Kindle Fire */ + { + test: [/kftt build/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Amazon', + model: 'Kindle Fire HD 7', + }; + }, + }, + + /* Another Amazon Tablet with Silk */ + { + test: [/silk/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Amazon', + }; + }, + }, + + /* Tablet */ + { + test: [/tablet(?! pc)/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + }; + }, + }, + + /* iPod/iPhone */ + { + test(parser) { + const iDevice = parser.test(/ipod|iphone/i); + const likeIDevice = parser.test(/like (ipod|iphone)/i); + return iDevice && !likeIDevice; + }, + describe(ua) { + const model = Utils.getFirstMatch(/(ipod|iphone)/i, ua); + return { + type: PLATFORMS_MAP.mobile, + vendor: 'Apple', + model, + }; + }, + }, + + /* Nexus Mobile */ + { + test: [/nexus\s*[0-6].*/i, /galaxy nexus/i], + describe() { + return { + type: PLATFORMS_MAP.mobile, + vendor: 'Nexus', + }; + }, + }, + + /* Mobile */ + { + test: [/[^-]mobi/i], + describe() { + return { + type: PLATFORMS_MAP.mobile, + }; + }, + }, + + /* BlackBerry */ + { + test(parser) { + return parser.getBrowserName(true) === 'blackberry'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + vendor: 'BlackBerry', + }; + }, + }, + + /* Bada */ + { + test(parser) { + return parser.getBrowserName(true) === 'bada'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + }; + }, + }, + + /* Windows Phone */ + { + test(parser) { + return parser.getBrowserName() === 'windows phone'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + vendor: 'Microsoft', + }; + }, + }, + + /* Android Tablet */ + { + test(parser) { + const osMajorVersion = Number(String(parser.getOSVersion()).split('.')[0]); + return parser.getOSName(true) === 'android' && (osMajorVersion >= 3); + }, + describe() { + return { + type: PLATFORMS_MAP.tablet, + }; + }, + }, + + /* Android Mobile */ + { + test(parser) { + return parser.getOSName(true) === 'android'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + }; + }, + }, + + /* desktop */ + { + test(parser) { + return parser.getOSName(true) === 'macos'; + }, + describe() { + return { + type: PLATFORMS_MAP.desktop, + vendor: 'Apple', + }; + }, + }, + + /* Windows */ + { + test(parser) { + return parser.getOSName(true) === 'windows'; + }, + describe() { + return { + type: PLATFORMS_MAP.desktop, + }; + }, + }, + + /* Linux */ + { + test(parser) { + return parser.getOSName(true) === 'linux'; + }, + describe() { + return { + type: PLATFORMS_MAP.desktop, + }; + }, + }, + + /* PlayStation 4 */ + { + test(parser) { + return parser.getOSName(true) === 'playstation 4'; + }, + describe() { + return { + type: PLATFORMS_MAP.tv, + }; + }, + }, + + /* Roku */ + { + test(parser) { + return parser.getOSName(true) === 'roku'; + }, + describe() { + return { + type: PLATFORMS_MAP.tv, + }; + }, + }, +]; + +/* + * More specific goes first + */ +var enginesParsersList = [ + /* EdgeHTML */ + { + test(parser) { + return parser.getBrowserName(true) === 'microsoft edge'; + }, + describe(ua) { + const isBlinkBased = /\sedg\//i.test(ua); + + // return blink if it's blink-based one + if (isBlinkBased) { + return { + name: ENGINE_MAP.Blink, + }; + } + + // otherwise match the version and return EdgeHTML + const version = Utils.getFirstMatch(/edge\/(\d+(\.?_?\d+)+)/i, ua); + + return { + name: ENGINE_MAP.EdgeHTML, + version, + }; + }, + }, + + /* Trident */ + { + test: [/trident/i], + describe(ua) { + const engine = { + name: ENGINE_MAP.Trident, + }; + + const version = Utils.getFirstMatch(/trident\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, + + /* Presto */ + { + test(parser) { + return parser.test(/presto/i); + }, + describe(ua) { + const engine = { + name: ENGINE_MAP.Presto, + }; + + const version = Utils.getFirstMatch(/presto\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, + + /* Gecko */ + { + test(parser) { + const isGecko = parser.test(/gecko/i); + const likeGecko = parser.test(/like gecko/i); + return isGecko && !likeGecko; + }, + describe(ua) { + const engine = { + name: ENGINE_MAP.Gecko, + }; + + const version = Utils.getFirstMatch(/gecko\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, + + /* Blink */ + { + test: [/(apple)?webkit\/537\.36/i], + describe() { + return { + name: ENGINE_MAP.Blink, + }; + }, + }, + + /* WebKit */ + { + test: [/(apple)?webkit/i], + describe(ua) { + const engine = { + name: ENGINE_MAP.WebKit, + }; + + const version = Utils.getFirstMatch(/webkit\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, +]; + +/** + * The main class that arranges the whole parsing process. + */ +class Parser { + /** + * Create instance of Parser + * + * @param {String} UA User-Agent string + * @param {Boolean} [skipParsing=false] parser can skip parsing in purpose of performance + * improvements if you need to make a more particular parsing + * like {@link Parser#parseBrowser} or {@link Parser#parsePlatform} + * + * @throw {Error} in case of empty UA String + * + * @constructor + */ + constructor(UA, skipParsing = false) { + if (UA === void (0) || UA === null || UA === '') { + throw new Error("UserAgent parameter can't be empty"); + } + + this._ua = UA; + + /** + * @typedef ParsedResult + * @property {Object} browser + * @property {String|undefined} [browser.name] + * Browser name, like `"Chrome"` or `"Internet Explorer"` + * @property {String|undefined} [browser.version] Browser version as a String `"12.01.45334.10"` + * @property {Object} os + * @property {String|undefined} [os.name] OS name, like `"Windows"` or `"macOS"` + * @property {String|undefined} [os.version] OS version, like `"NT 5.1"` or `"10.11.1"` + * @property {String|undefined} [os.versionName] OS name, like `"XP"` or `"High Sierra"` + * @property {Object} platform + * @property {String|undefined} [platform.type] + * platform type, can be either `"desktop"`, `"tablet"` or `"mobile"` + * @property {String|undefined} [platform.vendor] Vendor of the device, + * like `"Apple"` or `"Samsung"` + * @property {String|undefined} [platform.model] Device model, + * like `"iPhone"` or `"Kindle Fire HD 7"` + * @property {Object} engine + * @property {String|undefined} [engine.name] + * Can be any of this: `WebKit`, `Blink`, `Gecko`, `Trident`, `Presto`, `EdgeHTML` + * @property {String|undefined} [engine.version] String version of the engine + */ + this.parsedResult = {}; + + if (skipParsing !== true) { + this.parse(); + } + } + + /** + * Get UserAgent string of current Parser instance + * @return {String} User-Agent String of the current object + * + * @public + */ + getUA() { + return this._ua; + } + + /** + * Test a UA string for a regexp + * @param {RegExp} regex + * @return {Boolean} + */ + test(regex) { + return regex.test(this._ua); + } + + /** + * Get parsed browser object + * @return {Object} + */ + parseBrowser() { + this.parsedResult.browser = {}; + + const browserDescriptor = Utils.find(browsersList, (_browser) => { + if (typeof _browser.test === 'function') { + return _browser.test(this); + } + + if (_browser.test instanceof Array) { + return _browser.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (browserDescriptor) { + this.parsedResult.browser = browserDescriptor.describe(this.getUA()); + } + + return this.parsedResult.browser; + } + + /** + * Get parsed browser object + * @return {Object} + * + * @public + */ + getBrowser() { + if (this.parsedResult.browser) { + return this.parsedResult.browser; + } + + return this.parseBrowser(); + } + + /** + * Get browser's name + * @return {String} Browser's name or an empty string + * + * @public + */ + getBrowserName(toLowerCase) { + if (toLowerCase) { + return String(this.getBrowser().name).toLowerCase() || ''; + } + return this.getBrowser().name || ''; + } + + + /** + * Get browser's version + * @return {String} version of browser + * + * @public + */ + getBrowserVersion() { + return this.getBrowser().version; + } + + /** + * Get OS + * @return {Object} + * + * @example + * this.getOS(); + * { + * name: 'macOS', + * version: '10.11.12' + * } + */ + getOS() { + if (this.parsedResult.os) { + return this.parsedResult.os; + } + + return this.parseOS(); + } + + /** + * Parse OS and save it to this.parsedResult.os + * @return {*|{}} + */ + parseOS() { + this.parsedResult.os = {}; + + const os = Utils.find(osParsersList, (_os) => { + if (typeof _os.test === 'function') { + return _os.test(this); + } + + if (_os.test instanceof Array) { + return _os.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (os) { + this.parsedResult.os = os.describe(this.getUA()); + } + + return this.parsedResult.os; + } + + /** + * Get OS name + * @param {Boolean} [toLowerCase] return lower-cased value + * @return {String} name of the OS — macOS, Windows, Linux, etc. + */ + getOSName(toLowerCase) { + const { name } = this.getOS(); + + if (toLowerCase) { + return String(name).toLowerCase() || ''; + } + + return name || ''; + } + + /** + * Get OS version + * @return {String} full version with dots ('10.11.12', '5.6', etc) + */ + getOSVersion() { + return this.getOS().version; + } + + /** + * Get parsed platform + * @return {{}} + */ + getPlatform() { + if (this.parsedResult.platform) { + return this.parsedResult.platform; + } + + return this.parsePlatform(); + } + + /** + * Get platform name + * @param {Boolean} [toLowerCase=false] + * @return {*} + */ + getPlatformType(toLowerCase = false) { + const { type } = this.getPlatform(); + + if (toLowerCase) { + return String(type).toLowerCase() || ''; + } + + return type || ''; + } + + /** + * Get parsed platform + * @return {{}} + */ + parsePlatform() { + this.parsedResult.platform = {}; + + const platform = Utils.find(platformParsersList, (_platform) => { + if (typeof _platform.test === 'function') { + return _platform.test(this); + } + + if (_platform.test instanceof Array) { + return _platform.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (platform) { + this.parsedResult.platform = platform.describe(this.getUA()); + } + + return this.parsedResult.platform; + } + + /** + * Get parsed engine + * @return {{}} + */ + getEngine() { + if (this.parsedResult.engine) { + return this.parsedResult.engine; + } + + return this.parseEngine(); + } + + /** + * Get engines's name + * @return {String} Engines's name or an empty string + * + * @public + */ + getEngineName(toLowerCase) { + if (toLowerCase) { + return String(this.getEngine().name).toLowerCase() || ''; + } + return this.getEngine().name || ''; + } + + /** + * Get parsed platform + * @return {{}} + */ + parseEngine() { + this.parsedResult.engine = {}; + + const engine = Utils.find(enginesParsersList, (_engine) => { + if (typeof _engine.test === 'function') { + return _engine.test(this); + } + + if (_engine.test instanceof Array) { + return _engine.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (engine) { + this.parsedResult.engine = engine.describe(this.getUA()); + } + + return this.parsedResult.engine; + } + + /** + * Parse full information about the browser + * @returns {Parser} + */ + parse() { + this.parseBrowser(); + this.parseOS(); + this.parsePlatform(); + this.parseEngine(); + + return this; + } + + /** + * Get parsed result + * @return {ParsedResult} + */ + getResult() { + return Utils.assign({}, this.parsedResult); + } + + /** + * Check if parsed browser matches certain conditions + * + * @param {Object} checkTree It's one or two layered object, + * which can include a platform or an OS on the first layer + * and should have browsers specs on the bottom-laying layer + * + * @returns {Boolean|undefined} Whether the browser satisfies the set conditions or not. + * Returns `undefined` when the browser is no described in the checkTree object. + * + * @example + * const browser = Bowser.getParser(window.navigator.userAgent); + * if (browser.satisfies({chrome: '>118.01.1322' })) + * // or with os + * if (browser.satisfies({windows: { chrome: '>118.01.1322' } })) + * // or with platforms + * if (browser.satisfies({desktop: { chrome: '>118.01.1322' } })) + */ + satisfies(checkTree) { + const platformsAndOSes = {}; + let platformsAndOSCounter = 0; + const browsers = {}; + let browsersCounter = 0; + + const allDefinitions = Object.keys(checkTree); + + allDefinitions.forEach((key) => { + const currentDefinition = checkTree[key]; + if (typeof currentDefinition === 'string') { + browsers[key] = currentDefinition; + browsersCounter += 1; + } else if (typeof currentDefinition === 'object') { + platformsAndOSes[key] = currentDefinition; + platformsAndOSCounter += 1; + } + }); + + if (platformsAndOSCounter > 0) { + const platformsAndOSNames = Object.keys(platformsAndOSes); + const OSMatchingDefinition = Utils.find(platformsAndOSNames, name => (this.isOS(name))); + + if (OSMatchingDefinition) { + const osResult = this.satisfies(platformsAndOSes[OSMatchingDefinition]); + + if (osResult !== void 0) { + return osResult; + } + } + + const platformMatchingDefinition = Utils.find( + platformsAndOSNames, + name => (this.isPlatform(name)), + ); + if (platformMatchingDefinition) { + const platformResult = this.satisfies(platformsAndOSes[platformMatchingDefinition]); + + if (platformResult !== void 0) { + return platformResult; + } + } + } + + if (browsersCounter > 0) { + const browserNames = Object.keys(browsers); + const matchingDefinition = Utils.find(browserNames, name => (this.isBrowser(name, true))); + + if (matchingDefinition !== void 0) { + return this.compareVersion(browsers[matchingDefinition]); + } + } + + return undefined; + } + + /** + * Check if the browser name equals the passed string + * @param browserName The string to compare with the browser name + * @param [includingAlias=false] The flag showing whether alias will be included into comparison + * @returns {boolean} + */ + isBrowser(browserName, includingAlias = false) { + const defaultBrowserName = this.getBrowserName().toLowerCase(); + let browserNameLower = browserName.toLowerCase(); + const alias = Utils.getBrowserTypeByAlias(browserNameLower); + + if (includingAlias && alias) { + browserNameLower = alias.toLowerCase(); + } + return browserNameLower === defaultBrowserName; + } + + compareVersion(version) { + let expectedResults = [0]; + let comparableVersion = version; + let isLoose = false; + + const currentBrowserVersion = this.getBrowserVersion(); + + if (typeof currentBrowserVersion !== 'string') { + return void 0; + } + + if (version[0] === '>' || version[0] === '<') { + comparableVersion = version.substr(1); + if (version[1] === '=') { + isLoose = true; + comparableVersion = version.substr(2); + } else { + expectedResults = []; + } + if (version[0] === '>') { + expectedResults.push(1); + } else { + expectedResults.push(-1); + } + } else if (version[0] === '=') { + comparableVersion = version.substr(1); + } else if (version[0] === '~') { + isLoose = true; + comparableVersion = version.substr(1); + } + + return expectedResults.indexOf( + Utils.compareVersions(currentBrowserVersion, comparableVersion, isLoose), + ) > -1; + } + + isOS(osName) { + return this.getOSName(true) === String(osName).toLowerCase(); + } + + isPlatform(platformType) { + return this.getPlatformType(true) === String(platformType).toLowerCase(); + } + + isEngine(engineName) { + return this.getEngineName(true) === String(engineName).toLowerCase(); + } + + /** + * Is anything? Check if the browser is called "anything", + * the OS called "anything" or the platform called "anything" + * @param {String} anything + * @param [includingAlias=false] The flag showing whether alias will be included into comparison + * @returns {Boolean} + */ + is(anything, includingAlias = false) { + return this.isBrowser(anything, includingAlias) || this.isOS(anything) + || this.isPlatform(anything); + } + + /** + * Check if any of the given values satisfies this.is(anything) + * @param {String[]} anythings + * @returns {Boolean} + */ + some(anythings = []) { + return anythings.some(anything => this.is(anything)); + } +} + +/*! + * Bowser - a browser detector + * https://github.com/lancedikson/bowser + * MIT License | (c) Dustin Diaz 2012-2015 + * MIT License | (c) Denis Demchenko 2015-2019 + */ + +/** + * Bowser class. + * Keep it simple as much as it can be. + * It's supposed to work with collections of {@link Parser} instances + * rather then solve one-instance problems. + * All the one-instance stuff is located in Parser class. + * + * @class + * @classdesc Bowser is a static object, that provides an API to the Parsers + * @hideconstructor + */ +class Bowser { + /** + * Creates a {@link Parser} instance + * + * @param {String} UA UserAgent string + * @param {Boolean} [skipParsing=false] Will make the Parser postpone parsing until you ask it + * explicitly. Same as `skipParsing` for {@link Parser}. + * @returns {Parser} + * @throws {Error} when UA is not a String + * + * @example + * const parser = Bowser.getParser(window.navigator.userAgent); + * const result = parser.getResult(); + */ + static getParser(UA, skipParsing = false) { + if (typeof UA !== 'string') { + throw new Error('UserAgent should be a string'); + } + return new Parser(UA, skipParsing); + } + + /** + * Creates a {@link Parser} instance and runs {@link Parser.getResult} immediately + * + * @param UA + * @return {ParsedResult} + * + * @example + * const result = Bowser.parse(window.navigator.userAgent); + */ + static parse(UA) { + return (new Parser(UA)).getResult(); + } + + static get BROWSER_MAP() { + return BROWSER_MAP; + } + + static get ENGINE_MAP() { + return ENGINE_MAP; + } + + static get OS_MAP() { + return OS_MAP; + } + + static get PLATFORMS_MAP() { + return PLATFORMS_MAP; + } +} + +var regeneratorRuntime$1 = {exports: {}}; + +var _typeof$1 = {exports: {}}; + +_typeof$1.exports; + +(function (module) { + function _typeof(o) { + "@babel/helpers - typeof"; + + return (module.exports = _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { + return typeof o; + } : function (o) { + return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; + }, module.exports.__esModule = true, module.exports["default"] = module.exports), _typeof(o); + } + module.exports = _typeof, module.exports.__esModule = true, module.exports["default"] = module.exports; +} (_typeof$1)); + +var _typeofExports = _typeof$1.exports; + +regeneratorRuntime$1.exports; + +(function (module) { + var _typeof = _typeofExports["default"]; + function _regeneratorRuntime() { + module.exports = _regeneratorRuntime = function _regeneratorRuntime() { + return e; + }, module.exports.__esModule = true, module.exports["default"] = module.exports; + var t, + e = {}, + r = Object.prototype, + n = r.hasOwnProperty, + o = Object.defineProperty || function (t, e, r) { + t[e] = r.value; + }, + i = "function" == typeof Symbol ? Symbol : {}, + a = i.iterator || "@@iterator", + c = i.asyncIterator || "@@asyncIterator", + u = i.toStringTag || "@@toStringTag"; + function define(t, e, r) { + return Object.defineProperty(t, e, { + value: r, + enumerable: !0, + configurable: !0, + writable: !0 + }), t[e]; + } + try { + define({}, ""); + } catch (t) { + define = function define(t, e, r) { + return t[e] = r; + }; + } + function wrap(t, e, r, n) { + var i = e && e.prototype instanceof Generator ? e : Generator, + a = Object.create(i.prototype), + c = new Context(n || []); + return o(a, "_invoke", { + value: makeInvokeMethod(t, r, c) + }), a; + } + function tryCatch(t, e, r) { + try { + return { + type: "normal", + arg: t.call(e, r) + }; + } catch (t) { + return { + type: "throw", + arg: t + }; + } + } + e.wrap = wrap; + var h = "suspendedStart", + l = "suspendedYield", + f = "executing", + s = "completed", + y = {}; + function Generator() {} + function GeneratorFunction() {} + function GeneratorFunctionPrototype() {} + var p = {}; + define(p, a, function () { + return this; + }); + var d = Object.getPrototypeOf, + v = d && d(d(values([]))); + v && v !== r && n.call(v, a) && (p = v); + var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p); + function defineIteratorMethods(t) { + ["next", "throw", "return"].forEach(function (e) { + define(t, e, function (t) { + return this._invoke(e, t); + }); + }); + } + function AsyncIterator(t, e) { + function invoke(r, o, i, a) { + var c = tryCatch(t[r], t, o); + if ("throw" !== c.type) { + var u = c.arg, + h = u.value; + return h && "object" == _typeof(h) && n.call(h, "__await") ? e.resolve(h.__await).then(function (t) { + invoke("next", t, i, a); + }, function (t) { + invoke("throw", t, i, a); + }) : e.resolve(h).then(function (t) { + u.value = t, i(u); + }, function (t) { + return invoke("throw", t, i, a); + }); + } + a(c.arg); + } + var r; + o(this, "_invoke", { + value: function value(t, n) { + function callInvokeWithMethodAndArg() { + return new e(function (e, r) { + invoke(t, n, e, r); + }); + } + return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg(); + } + }); + } + function makeInvokeMethod(e, r, n) { + var o = h; + return function (i, a) { + if (o === f) throw new Error("Generator is already running"); + if (o === s) { + if ("throw" === i) throw a; + return { + value: t, + done: !0 + }; + } + for (n.method = i, n.arg = a;;) { + var c = n.delegate; + if (c) { + var u = maybeInvokeDelegate(c, n); + if (u) { + if (u === y) continue; + return u; + } + } + if ("next" === n.method) n.sent = n._sent = n.arg;else if ("throw" === n.method) { + if (o === h) throw o = s, n.arg; + n.dispatchException(n.arg); + } else "return" === n.method && n.abrupt("return", n.arg); + o = f; + var p = tryCatch(e, r, n); + if ("normal" === p.type) { + if (o = n.done ? s : l, p.arg === y) continue; + return { + value: p.arg, + done: n.done + }; + } + "throw" === p.type && (o = s, n.method = "throw", n.arg = p.arg); + } + }; + } + function maybeInvokeDelegate(e, r) { + var n = r.method, + o = e.iterator[n]; + if (o === t) return r.delegate = null, "throw" === n && e.iterator["return"] && (r.method = "return", r.arg = t, maybeInvokeDelegate(e, r), "throw" === r.method) || "return" !== n && (r.method = "throw", r.arg = new TypeError("The iterator does not provide a '" + n + "' method")), y; + var i = tryCatch(o, e.iterator, r.arg); + if ("throw" === i.type) return r.method = "throw", r.arg = i.arg, r.delegate = null, y; + var a = i.arg; + return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, "return" !== r.method && (r.method = "next", r.arg = t), r.delegate = null, y) : a : (r.method = "throw", r.arg = new TypeError("iterator result is not an object"), r.delegate = null, y); + } + function pushTryEntry(t) { + var e = { + tryLoc: t[0] + }; + 1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e); + } + function resetTryEntry(t) { + var e = t.completion || {}; + e.type = "normal", delete e.arg, t.completion = e; + } + function Context(t) { + this.tryEntries = [{ + tryLoc: "root" + }], t.forEach(pushTryEntry, this), this.reset(!0); + } + function values(e) { + if (e || "" === e) { + var r = e[a]; + if (r) return r.call(e); + if ("function" == typeof e.next) return e; + if (!isNaN(e.length)) { + var o = -1, + i = function next() { + for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next; + return next.value = t, next.done = !0, next; + }; + return i.next = i; + } + } + throw new TypeError(_typeof(e) + " is not iterable"); + } + return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, "constructor", { + value: GeneratorFunctionPrototype, + configurable: !0 + }), o(GeneratorFunctionPrototype, "constructor", { + value: GeneratorFunction, + configurable: !0 + }), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, "GeneratorFunction"), e.isGeneratorFunction = function (t) { + var e = "function" == typeof t && t.constructor; + return !!e && (e === GeneratorFunction || "GeneratorFunction" === (e.displayName || e.name)); + }, e.mark = function (t) { + return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, "GeneratorFunction")), t.prototype = Object.create(g), t; + }, e.awrap = function (t) { + return { + __await: t + }; + }, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () { + return this; + }), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) { + void 0 === i && (i = Promise); + var a = new AsyncIterator(wrap(t, r, n, o), i); + return e.isGeneratorFunction(r) ? a : a.next().then(function (t) { + return t.done ? t.value : a.next(); + }); + }, defineIteratorMethods(g), define(g, u, "Generator"), define(g, a, function () { + return this; + }), define(g, "toString", function () { + return "[object Generator]"; + }), e.keys = function (t) { + var e = Object(t), + r = []; + for (var n in e) r.push(n); + return r.reverse(), function next() { + for (; r.length;) { + var t = r.pop(); + if (t in e) return next.value = t, next.done = !1, next; + } + return next.done = !0, next; + }; + }, e.values = values, Context.prototype = { + constructor: Context, + reset: function reset(e) { + if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = "next", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) "t" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t); + }, + stop: function stop() { + this.done = !0; + var t = this.tryEntries[0].completion; + if ("throw" === t.type) throw t.arg; + return this.rval; + }, + dispatchException: function dispatchException(e) { + if (this.done) throw e; + var r = this; + function handle(n, o) { + return a.type = "throw", a.arg = e, r.next = n, o && (r.method = "next", r.arg = t), !!o; + } + for (var o = this.tryEntries.length - 1; o >= 0; --o) { + var i = this.tryEntries[o], + a = i.completion; + if ("root" === i.tryLoc) return handle("end"); + if (i.tryLoc <= this.prev) { + var c = n.call(i, "catchLoc"), + u = n.call(i, "finallyLoc"); + if (c && u) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } else if (c) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + } else { + if (!u) throw new Error("try statement without catch or finally"); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } + } + } + }, + abrupt: function abrupt(t, e) { + for (var r = this.tryEntries.length - 1; r >= 0; --r) { + var o = this.tryEntries[r]; + if (o.tryLoc <= this.prev && n.call(o, "finallyLoc") && this.prev < o.finallyLoc) { + var i = o; + break; + } + } + i && ("break" === t || "continue" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null); + var a = i ? i.completion : {}; + return a.type = t, a.arg = e, i ? (this.method = "next", this.next = i.finallyLoc, y) : this.complete(a); + }, + complete: function complete(t, e) { + if ("throw" === t.type) throw t.arg; + return "break" === t.type || "continue" === t.type ? this.next = t.arg : "return" === t.type ? (this.rval = this.arg = t.arg, this.method = "return", this.next = "end") : "normal" === t.type && e && (this.next = e), y; + }, + finish: function finish(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y; + } + }, + "catch": function _catch(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.tryLoc === t) { + var n = r.completion; + if ("throw" === n.type) { + var o = n.arg; + resetTryEntry(r); + } + return o; + } + } + throw new Error("illegal catch attempt"); + }, + delegateYield: function delegateYield(e, r, n) { + return this.delegate = { + iterator: values(e), + resultName: r, + nextLoc: n + }, "next" === this.method && (this.arg = t), y; + } + }, e; + } + module.exports = _regeneratorRuntime, module.exports.__esModule = true, module.exports["default"] = module.exports; +} (regeneratorRuntime$1)); + +var regeneratorRuntimeExports = regeneratorRuntime$1.exports; + +// TODO(Babel 8): Remove this file. + +var runtime = regeneratorRuntimeExports(); + +// Copied from https://github.com/facebook/regenerator/blob/main/packages/runtime/runtime.js#L736= +try { + regeneratorRuntime = runtime; +} catch (accidentalStrictMode) { + if (typeof globalThis === "object") { + globalThis.regeneratorRuntime = runtime; + } else { + Function("r", "regeneratorRuntime = r")(runtime); + } +} + +function _typeof(o) { + "@babel/helpers - typeof"; + + return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { + return typeof o; + } : function (o) { + return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; + }, _typeof(o); +} + +function _classCallCheck$1(instance, Constructor) { + if (!(instance instanceof Constructor)) { + throw new TypeError("Cannot call a class as a function"); + } +} + +function _toPrimitive(input, hint) { + if (_typeof(input) !== "object" || input === null) return input; + var prim = input[Symbol.toPrimitive]; + if (prim !== undefined) { + var res = prim.call(input, hint || "default"); + if (_typeof(res) !== "object") return res; + throw new TypeError("@@toPrimitive must return a primitive value."); + } + return (hint === "string" ? String : Number)(input); +} + +function _toPropertyKey(arg) { + var key = _toPrimitive(arg, "string"); + return _typeof(key) === "symbol" ? key : String(key); +} + +function _defineProperties$1(target, props) { + for (var i = 0; i < props.length; i++) { + var descriptor = props[i]; + descriptor.enumerable = descriptor.enumerable || false; + descriptor.configurable = true; + if ("value" in descriptor) descriptor.writable = true; + Object.defineProperty(target, _toPropertyKey(descriptor.key), descriptor); + } +} +function _createClass$1(Constructor, protoProps, staticProps) { + if (protoProps) _defineProperties$1(Constructor.prototype, protoProps); + if (staticProps) _defineProperties$1(Constructor, staticProps); + Object.defineProperty(Constructor, "prototype", { + writable: false + }); + return Constructor; +} + +/* + Copyright 2022-2023 Picovoice Inc. + + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" + file accompanying this source. + + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + specific language governing permissions and limitations under the License. +*/ +/** + * BasePvFile Class + * This class mocks the file system using in-memory storage. + */ +var PvFile = /*#__PURE__*/function () { + function PvFile() { + _classCallCheck$1(this, PvFile); + this._path = ''; + } + /** + * Getter for file's meta information. + */ + _createClass$1(PvFile, [{ + key: "meta", + get: function get() { + if (this._meta === undefined) { + return undefined; + } + return Object.assign({ + version: 0 + }, this._meta); + } + }, { + key: "pageSize", + get: function get() { + return undefined; + } + /** + * Get the file pointer from the _filePtrs map. + * @param ptr The pointer to BasePvFile instance to get from the map. + * @returns BasePvFile returns the current file instance related to ptr. + */ + }], [{ + key: "getPtr", + value: function getPtr(ptr) { + if (PvFile._filePtrs.has(ptr)) { + return PvFile._filePtrs.get(ptr); + } + throw new Error('File instance not found.'); + } + /** + * Saves the BasePvFile instance to the map with an associated ptr. + * @param ptr The file pointer to save as the key. + * @param instance The BasePvFile instance to save as the value. + */ + }, { + key: "setPtr", + value: function setPtr(ptr, instance) { + PvFile._filePtrs.set(ptr, instance); + } + /** + * Removes the ptr from the _filePtrs map. + * @param ptr The file pointer to remove. + */ + }, { + key: "removePtr", + value: function removePtr(ptr) { + PvFile._filePtrs["delete"](ptr); + } + }]); + return PvFile; +}(); +PvFile._filePtrs = new Map(); +/** + * Cast a signed address to unsigned address. + * + * @param address The address to cast to unsigned address. + */ +function unsignedAddress(address) { + if (address < 0) { + return address >>> 0; + } + return address; +} + +function _arrayWithHoles(arr) { + if (Array.isArray(arr)) return arr; +} + +function _iterableToArrayLimit(r, l) { + var t = null == r ? null : "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; + if (null != t) { + var e, + n, + i, + u, + a = [], + f = !0, + o = !1; + try { + if (i = (t = t.call(r)).next, 0 === l) { + if (Object(t) !== t) return; + f = !1; + } else for (; !(f = (e = i.call(t)).done) && (a.push(e.value), a.length !== l); f = !0); + } catch (r) { + o = !0, n = r; + } finally { + try { + if (!f && null != t["return"] && (u = t["return"](), Object(u) !== u)) return; + } finally { + if (o) throw n; + } + } + return a; + } +} + +function _arrayLikeToArray$1(arr, len) { + if (len == null || len > arr.length) len = arr.length; + for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; + return arr2; +} + +function _unsupportedIterableToArray$1(o, minLen) { + if (!o) return; + if (typeof o === "string") return _arrayLikeToArray$1(o, minLen); + var n = Object.prototype.toString.call(o).slice(8, -1); + if (n === "Object" && o.constructor) n = o.constructor.name; + if (n === "Map" || n === "Set") return Array.from(o); + if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray$1(o, minLen); +} + +function _nonIterableRest() { + throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); +} + +function _slicedToArray(arr, i) { + return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _unsupportedIterableToArray$1(arr, i) || _nonIterableRest(); +} + +function _classCallCheck(instance, Constructor) { + if (!(instance instanceof Constructor)) { + throw new TypeError("Cannot call a class as a function"); + } +} + +function _defineProperties(target, props) { + for (var i = 0; i < props.length; i++) { + var descriptor = props[i]; + descriptor.enumerable = descriptor.enumerable || false; + descriptor.configurable = true; + if ("value" in descriptor) descriptor.writable = true; + Object.defineProperty(target, toPropertyKey(descriptor.key), descriptor); + } +} +function _createClass(Constructor, protoProps, staticProps) { + if (protoProps) _defineProperties(Constructor.prototype, protoProps); + if (staticProps) _defineProperties(Constructor, staticProps); + Object.defineProperty(Constructor, "prototype", { + writable: false + }); + return Constructor; +} + +function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; } +function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); } +function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; return arr2; } +var PvWebGPUDevice = /*#__PURE__*/function () { + function PvWebGPUDevice(device, adapterInfo) { + _classCallCheck(this, PvWebGPUDevice); + _defineProperty(this, "_numCommandsEncoded", void 0); + _defineProperty(this, "_commandEncoder", void 0); + _defineProperty(this, "_passEncoder", void 0); + _defineProperty(this, "_stageBuffersPendingMap", void 0); + _defineProperty(this, "_uniformBuffersPendingRelease", void 0); + _defineProperty(this, "device", void 0); + _defineProperty(this, "bufferReusePool", void 0); + _defineProperty(this, "shaders", void 0); + _defineProperty(this, "isTimerEnabled", void 0); + _defineProperty(this, "timestampBuffers", void 0); + _defineProperty(this, "shaderTimes", void 0); + _defineProperty(this, "adapterInfo", void 0); + this._numCommandsEncoded = 0; + this._commandEncoder = null; + this._passEncoder = null; + this._stageBuffersPendingMap = []; + this._uniformBuffersPendingRelease = []; + this.device = device; + this.bufferReusePool = new Map(); + this.shaders = {}; + this.shaderTimes = {}; + this.timestampBuffers = {}; + this.isTimerEnabled = false; + this.adapterInfo = adapterInfo; + } + _createClass(PvWebGPUDevice, [{ + key: "getBufferKey", + value: function getBufferKey(sizeInBytes, usage) { + return "".concat(usage, "_").concat(sizeInBytes); + } + }, { + key: "commandEncoder", + get: function get() { + if (!this._commandEncoder) { + this._commandEncoder = this.device.createCommandEncoder(); + } + return this._commandEncoder; + } + }, { + key: "numCommandsEncoded", + get: function get() { + return this._numCommandsEncoded; + }, + set: function set(value) { + this._numCommandsEncoded = value; + if (this._numCommandsEncoded >= 16) { + this.endComputePass(); + this.flushCommandEncoder(); + } + } + }, { + key: "endComputePass", + value: function endComputePass() { + if (this._passEncoder) { + this._passEncoder.end(); + this._passEncoder = null; + } + } + }, { + key: "getBuffer", + value: function getBuffer(sizeBytes, usage) { + var mappedAtCreation = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; + var label = arguments.length > 3 ? arguments[3] : undefined; + var key = this.getBufferKey(sizeBytes, usage); + if (this.bufferReusePool.has(key)) { + var buffers = this.bufferReusePool.get(key); + if (buffers && buffers.length > 0) { + return buffers.pop(); + } + } + return this.device.createBuffer({ + size: sizeBytes * Uint8Array.BYTES_PER_ELEMENT, + usage: usage, + mappedAtCreation: mappedAtCreation, + label: label + }); + } + }, { + key: "scheduleUniformBufferForRelease", + value: function scheduleUniformBufferForRelease(buffer) { + this._uniformBuffersPendingRelease.push(buffer); + } + }, { + key: "releaseBuffer", + value: function releaseBuffer(buffer) { + var clearBuffer = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true; + if (clearBuffer) { + this.endComputePass(); + this.commandEncoder.clearBuffer(buffer, 0, buffer.size); + } + var key = this.getBufferKey(buffer.size, buffer.usage); + if (!this.bufferReusePool.has(key)) { + this.bufferReusePool.set(key, []); + } + this.bufferReusePool.get(key).push(buffer); + } + }, { + key: "sync", + value: function () { + var _sync = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee() { + var _this = this; + var _iterator, _step, k, buffers, _iterator3, _step3, b, _loop, _i, _Object$entries; + return _regeneratorRuntime.wrap(function _callee$(_context3) { + while (1) switch (_context3.prev = _context3.next) { + case 0: + this.flushCommandEncoder(); + _context3.next = 3; + return this.device.queue.onSubmittedWorkDone(); + case 3: + _iterator = _createForOfIteratorHelper(this.bufferReusePool.keys()); + try { + for (_iterator.s(); !(_step = _iterator.n()).done;) { + k = _step.value; + buffers = this.bufferReusePool.get(k); + if (buffers && buffers.length > 0) { + _iterator3 = _createForOfIteratorHelper(buffers); + try { + for (_iterator3.s(); !(_step3 = _iterator3.n()).done;) { + b = _step3.value; + b === null || b === void 0 || b.destroy(); + } + } catch (err) { + _iterator3.e(err); + } finally { + _iterator3.f(); + } + } + } + } catch (err) { + _iterator.e(err); + } finally { + _iterator.f(); + } + this.bufferReusePool.clear(); + _loop = /*#__PURE__*/_regeneratorRuntime.mark(function _loop() { + var _Object$entries$_i, shaderName, timestampBuffers, _iterator2, _step2, _loop2; + return _regeneratorRuntime.wrap(function _loop$(_context2) { + while (1) switch (_context2.prev = _context2.next) { + case 0: + _Object$entries$_i = _slicedToArray(_Object$entries[_i], 2), shaderName = _Object$entries$_i[0], timestampBuffers = _Object$entries$_i[1]; + if (!_this.shaderTimes[shaderName]) { + _this.shaderTimes[shaderName] = []; + } + _iterator2 = _createForOfIteratorHelper(timestampBuffers); + _context2.prev = 3; + _loop2 = /*#__PURE__*/_regeneratorRuntime.mark(function _loop2() { + var timestampBuffer; + return _regeneratorRuntime.wrap(function _loop2$(_context) { + while (1) switch (_context.prev = _context.next) { + case 0: + timestampBuffer = _step2.value; + timestampBuffer.mapAsync(GPUMapMode.READ).then(function () { + var times = new BigInt64Array(timestampBuffer.getMappedRange()); + var timeDif = times[1] - times[0]; + timestampBuffer.unmap(); + timestampBuffer.destroy(); + _this.shaderTimes[shaderName].push(timeDif); + }); + case 2: + case "end": + return _context.stop(); + } + }, _loop2); + }); + _iterator2.s(); + case 6: + if ((_step2 = _iterator2.n()).done) { + _context2.next = 10; + break; + } + return _context2.delegateYield(_loop2(), "t0", 8); + case 8: + _context2.next = 6; + break; + case 10: + _context2.next = 15; + break; + case 12: + _context2.prev = 12; + _context2.t1 = _context2["catch"](3); + _iterator2.e(_context2.t1); + case 15: + _context2.prev = 15; + _iterator2.f(); + return _context2.finish(15); + case 18: + case "end": + return _context2.stop(); + } + }, _loop, null, [[3, 12, 15, 18]]); + }); + _i = 0, _Object$entries = Object.entries(this.timestampBuffers); + case 8: + if (!(_i < _Object$entries.length)) { + _context3.next = 13; + break; + } + return _context3.delegateYield(_loop(), "t0", 10); + case 10: + _i++; + _context3.next = 8; + break; + case 13: + this.timestampBuffers = {}; + case 14: + case "end": + return _context3.stop(); + } + }, _callee, this); + })); + function sync() { + return _sync.apply(this, arguments); + } + return sync; + }() + }, { + key: "reportShaderTimes", + value: function reportShaderTimes() { + for (var _i2 = 0, _Object$entries2 = Object.entries(this.shaderTimes); _i2 < _Object$entries2.length; _i2++) { + var _Object$entries2$_i = _slicedToArray(_Object$entries2[_i2], 2), + shaderName = _Object$entries2$_i[0], + shaderTimes = _Object$entries2$_i[1]; + var timeSum = 0n; + var _iterator4 = _createForOfIteratorHelper(shaderTimes), + _step4; + try { + for (_iterator4.s(); !(_step4 = _iterator4.n()).done;) { + var shaderTime = _step4.value; + timeSum = timeSum + shaderTime; + } + } catch (err) { + _iterator4.e(err); + } finally { + _iterator4.f(); + } + var totalSeconds = Number(timeSum) * 1e-9; + var avgSeconds = (totalSeconds / shaderTimes.length).toFixed(7); + console.log("".concat(shaderName, ", ").concat(totalSeconds.toFixed(5), ", ").concat(avgSeconds)); + } + this.shaderTimes = {}; + } + }, { + key: "flushCommandEncoder", + value: function flushCommandEncoder() { + var _this2 = this; + this.device.queue.submit([this.commandEncoder.finish()]); + this._commandEncoder = null; + this._numCommandsEncoded = 0; + this._stageBuffersPendingMap.forEach(function (buffer) { + buffer.destroy(); + }); + this._stageBuffersPendingMap = []; + this._uniformBuffersPendingRelease.forEach(function (buffer) { + _this2.releaseBuffer(buffer, false); + }); + this._uniformBuffersPendingRelease = []; + } + }, { + key: "writeBuffer", + value: function writeBuffer(sizeBytes, offset, srcArray, dstBuffer) { + var stagingBuffer = this.getBuffer(sizeBytes, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, true); + new Uint8Array(stagingBuffer.getMappedRange()).set(srcArray); + stagingBuffer.unmap(); + this._stageBuffersPendingMap.push(stagingBuffer); + this.endComputePass(); + this.commandEncoder.copyBufferToBuffer(stagingBuffer, 0, dstBuffer, offset, sizeBytes); + this.numCommandsEncoded++; + } + }, { + key: "dispatchComputerShader", + value: function dispatchComputerShader(bindGroup, pipeline, shaderName, workgroupCountX, workgroupCountY, workgroupCountZ) { + if (this.isTimerEnabled) { + var querySet = this.device.createQuerySet({ + type: 'timestamp', + count: 2 + }); + var timestampWrites = { + querySet: querySet, + beginningOfPassWriteIndex: 0, + endOfPassWriteIndex: 1 + }; + this.endComputePass(); + this._passEncoder = this.commandEncoder.beginComputePass({ + timestampWrites: timestampWrites + }); + this._passEncoder.setBindGroup(0, bindGroup); + this._passEncoder.setPipeline(pipeline); + this._passEncoder.dispatchWorkgroups(workgroupCountX, workgroupCountY, workgroupCountZ); + this.endComputePass(); + var size = 2 * BigInt64Array.BYTES_PER_ELEMENT; + var resolveBuffer = this.device.createBuffer({ + size: size, + usage: GPUBufferUsage.QUERY_RESOLVE | GPUBufferUsage.COPY_SRC + }); + this.commandEncoder.resolveQuerySet(querySet, 0, 2, resolveBuffer, 0); + var resultBuffer = this.device.createBuffer({ + size: size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ + }); + this.commandEncoder.copyBufferToBuffer(resolveBuffer, 0, resultBuffer, 0, size); + if (!this.timestampBuffers[shaderName]) { + this.timestampBuffers[shaderName] = []; + } + this.timestampBuffers[shaderName].push(resultBuffer); + this.numCommandsEncoded += 3; + } else { + if (!this._passEncoder) { + this._passEncoder = this.commandEncoder.beginComputePass(); + } + this._passEncoder.setBindGroup(0, bindGroup); + this._passEncoder.setPipeline(pipeline); + this._passEncoder.dispatchWorkgroups(workgroupCountX, workgroupCountY, workgroupCountZ); + this.numCommandsEncoded++; + } + } + }]); + return PvWebGPUDevice; +}(); + +var PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE = 256; +var PV_PICOLLM_WEBGPU_MAX_GRID_DIM = 65535; +var gpuDevices = new Map(); +var gpuBuffers = new Map(); +var emptyShader = "\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main_empty() {}\n"; +var shaderEntryPoint = 'main'; + +var PRECOMPUTE_ENCODING_SHADER_NAME = "pv_picollm_attention_precompute_encoding_shader"; +var attentionPrecomputeEncodingShaderSource = "\nstruct argsStruct {\n dimension: u32,\n steps: u32,\n theta: f32,\n encoding_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let ds = local_id.x;\n \n for (var t = ts; t < args.steps; t += num_workgroups.x) {\n for (var d = ds; d < (args.dimension / 2u); d += workgroup_size_x) {\n let f = 2u * d;\n let x = f32(t) / pow(args.theta, f32(f) / f32(args.dimension));\n let encoding_idx = args.encoding_offset + (t * args.dimension) + f;\n encoding[encoding_idx] = cos(x);\n encoding[encoding_idx + 1] = sin(x);\n }\n } \n}\n\n".concat(emptyShader); +var loadAttentionPrecomputeEncodingShader = function loadAttentionPrecomputeEncodingShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention precompute encoding bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention precompute encoding pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention precompute encoding shader module", + code: attentionPrecomputeEncodingShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention precompute encoding pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var ENCODE_ROPE_INTERLEAVED_SHADER_NAME = "pv_picollm_attention_encode_rope_interleaved_shader"; +var attentionEncodeRopeInterleavedShaderSource = "\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let x_start = args.x_offset + (t * args.num_heads + h) * args.head_dimension;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < (args.head_dimension / 2u); d += workgroup_size_x) {\n let i = 2u * d;\n let x_idx = x_start + i;\n let encoding_idx = encoding_start + i;\n \n let re = x[x_idx];\n let im = x[x_idx + 1];\n x[x_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[x_idx + 1] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader); +var loadAttentionEncodeRopeInterleavedShader = function loadAttentionEncodeRopeInterleavedShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention encode rope interleave bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention encode rope interleave pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention encode rope interleave shader module", + code: attentionEncodeRopeInterleavedShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention encode rope interleave pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var ENCODE_SHADER_NAME = "pv_picollm_attention_encode_shader"; +var attentionEncodeShaderSource = "\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let half_rope = (args.rope_dimension / 2);\n let xr_start = args.x_offset + ((t * args.num_heads + h) * args.head_dimension);\n let xi_start = xr_start + half_rope;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < half_rope; d += workgroup_size_x) {\n let xr_idx = xr_start + d;\n let xi_idx = xi_start + d;\n let encoding_idx = encoding_start + (2 * d);\n\n let re = x[xr_idx];\n let im = x[xi_idx];\n x[xr_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[xi_idx] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader); +var loadAttentionEncodeShader = function loadAttentionEncodeShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention encode bind layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention encode pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention encode shader", + code: attentionEncodeShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention encode pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var DOT_PRODUCT_SHADER_NAME = "pv_picollm_attention_dot_product_shader"; +var attentionDotProductShaderSource = "\nstruct argsStruct { \n n: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n norm: f32,\n length1: u32,\n num_keys: u32,\n query_offset: u32,\n keys_offset: u32,\n key_intercepts_offset: u32,\n key_slopes_offset: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar query: array;\n\n@group(0) @binding(2)\nvar keys: array;\n\n@group(0) @binding(3)\nvar key_intercepts: array;\n\n@group(0) @binding(4)\nvar key_slopes: array;\n\n@group(0) @binding(5)\nvar scores: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n \n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n \n let keys_local_a = args.keys_offset + (start_index * args.head_dimension);\n let key_intercepts_local_a = args.key_intercepts_offset + start_index; \n let key_slopes_local_a = args.key_slopes_offset + start_index;\n \n let keys_local_b = args.keys_offset + (head_offset * args.head_dimension);\n let key_intercepts_local_b = args.key_intercepts_offset + head_offset; \n let key_slopes_local_b = args.key_slopes_offset + head_offset;\n \n let scores_local = args.scores_offset + (global_id.x * args.num_keys);\n let query_local = args.query_offset + (((global_id.x * args.n) + args.tq) * args.head_dimension);\n \n for (var i = 0u; i < args.head_dimension; i++) { \n for (var k = 0u; k < args.num_keys; k++) {\n if (k < args.length1) { \n let key_idx = keys_local_a + (k * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_a + k] + (key_slopes[key_slopes_local_a + k] * key_val));\n scores[scores_local + k] += tmp;\n }\n else {\n let j = k - args.length1;\n let key_idx = keys_local_b + (j * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_b + j] + (key_slopes[key_slopes_local_b + j] * key_val));\n scores[scores_local + k] += tmp;\n }\n } \n }\n \n for (var k = 0u; k < args.num_keys; k++) {\n scores[scores_local + k] *= args.norm;\n }\n}\n\n".concat(emptyShader); +var loadAttentionDotProductShader = function loadAttentionDotProductShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention dot product bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 5, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention dot product pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention dot product shader module", + code: attentionDotProductShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention dot product pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var SOFTMAX_SHADER_NAME = "pv_picollm_attention_softmax_shader"; +var attentionSoftmaxShaderSource = "\nstruct argsStruct { \n num_heads: u32,\n num_keys: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar scores: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let scores_start = args.scores_offset + (global_id.x * args.num_keys);\n \n var max_index: u32 = 0;\n for (var i = 1u; i < args.num_keys; i++) {\n if (scores[scores_start + i] > scores[scores_start + max_index]) {\n max_index = i;\n }\n }\n let max: f32 = scores[scores_start + max_index];\n\n var sum: f32 = 0.0;\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] = exp(scores[scores_start + i] - max);\n sum += scores[scores_start + i];\n }\n\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] /= sum;\n }\n}\n\n".concat(emptyShader); +var loadAttentionSoftmaxShader = function loadAttentionSoftmaxShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention softmax bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention softmax pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention softmax shader module", + code: attentionSoftmaxShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention softmax pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var FIR_SHADER_NAME = "pv_picollm_attention_fir_shader"; +var attentionFirShaderSource = "\nstruct argsStruct { \n length1: u32,\n length2: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n values_offset: u32,\n value_intercepts_offset: u32,\n value_slopes_offset: u32,\n scores_offset: u32,\n output_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar values: array;\n\n@group(0) @binding(2)\nvar value_intercepts: array;\n\n@group(0) @binding(3)\nvar value_slopes: array;\n\n@group(0) @binding(4)\nvar scores: array;\n\n@group(0) @binding(5)\nvar output: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n\n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n\n let values_local_a = args.values_offset + (start_index * args.head_dimension);\n let value_intercepts_local_a = args.value_intercepts_offset + start_index;\n let value_slopes_local_a = args.value_slopes_offset + start_index;\n let values_local_b = args.values_offset + (head_offset * args.head_dimension);\n let value_intercepts_local_b = args.value_intercepts_offset + head_offset;\n let value_slopes_local_b = args.value_slopes_offset + head_offset;\n let scores_local = args.scores_offset + (global_id.x * (args.length1 + args.length2));\n let output_local = args.output_offset + (((args.tq * args.num_heads) + global_id.x) * args.head_dimension);\n\n for (var i = 0u; i < args.head_dimension; i++) {\n var tmp: f32 = 0.0;\n for (var k = 0u; k < args.length1; k++) {\n let value_idx = values_local_a + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + k] * (value_intercepts[value_intercepts_local_a + k] + (value_slopes[value_slopes_local_a + k] * value_val)); \n }\n for (var k = 0u; k < args.length2; k++) {\n let value_idx = values_local_b + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + args.length1 + k] * (value_intercepts[value_intercepts_local_b + k] + (value_slopes[value_slopes_local_b + k] * value_val)); \n }\n output[output_local + i] = tmp;\n }\n}\n\n".concat(emptyShader); +var loadAttentionFirShader = function loadAttentionFirShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention fir bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 5, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention fir pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention fir shader module", + code: attentionFirShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention fir pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var UPDATE_KV_SHADER_NAME = "pv_picollm_attention_update_kv_shader"; +var attentionUpdateKvShaderSource = "\nstruct argsStruct {\n n: u32,\n num_kv_heads: u32,\n window_length: u32,\n position: u32,\n head_dimension: u32,\n tf_offset: u32,\n kv_offset: u32,\n kv_intercepts_offset: u32,\n kv_slopes_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar kv: array;\n\n@group(0) @binding(3)\nvar kv_intercepts: array;\n\n@group(0) @binding(4)\nvar kv_slopes: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x >= args.num_kv_heads) {\n return;\n }\n \n for (var i = 0u; i < args.n; i++) {\n let index = (global_id.x * args.window_length) + ((args.position + i) % args.window_length);\n let tf_start = args.tf_offset + (((i * args.num_kv_heads) + global_id.x) * args.head_dimension);\n let kv_start = args.kv_offset + ((index * args.head_dimension) / 4);\n let kv_intercepts_start = args.kv_intercepts_offset + index;\n let kv_slopes_start = args.kv_slopes_offset + index;\n \n var xmax = tf[tf_start]; \n var xmin = tf[tf_start]; \n \n for (var j = 1u; j < args.head_dimension; j++) {\n xmax = max(xmax, tf[tf_start + j]);\n xmin = min(xmin, tf[tf_start + j]);\n }\n\n kv_intercepts[kv_intercepts_start] = xmin;\n kv_slopes[kv_slopes_start] = f32(xmax - xmin) / 255.0;\n\n for (var j = 0u; j < args.head_dimension; j++) {\n let kv_idx = kv_start + (j / 4);\n let kv_val = u32(round((tf[tf_start + j] - xmin) / kv_slopes[kv_slopes_start])); \n kv[kv_idx] = insertBits(kv[kv_idx], extractBits(kv_val, 0u, 8u), (j * 8u) % 32u, 8u); \n }\n }\n}\n\n".concat(emptyShader); +var loadAttentionUpdateKvShader = function loadAttentionUpdateKvShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention update kv bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention update kv pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention update kv shader module", + code: attentionUpdateKvShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention update kv pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var TRANSPOSE_QUERY_SHADER_NAME = "pv_picollm_attention_transpose_query_shader"; +var attentionTransposeQueryShaderSource = "\nstruct argsStruct {\n n: u32,\n num_heads: u32,\n head_dimension: u32,\n tf_offset: u32,\n hf_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar hf: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n\n if (workgroup_id.x >= args.num_heads || workgroup_id.y >= args.n || local_id.x >= args.head_dimension) {\n return;\n }\n \n let tf_idx = args.tf_offset + (workgroup_id.y * args.num_heads * args.head_dimension) + (workgroup_id.x * args.head_dimension) + local_id.x; \n let hf_idx = args.hf_offset + (workgroup_id.x * args.n * args.head_dimension) + (workgroup_id.y * args.head_dimension) + local_id.x; \n hf[hf_idx] = tf[tf_idx];\n}\n\n".concat(emptyShader); +var loadAttentionTransposeQueryShader = function loadAttentionTransposeQueryShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention transpose query bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention transpose query pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention transpose query shader module", + code: attentionTransposeQueryShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention transpose query pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var attentionShaders = _defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty({}, PRECOMPUTE_ENCODING_SHADER_NAME, loadAttentionPrecomputeEncodingShader), ENCODE_ROPE_INTERLEAVED_SHADER_NAME, loadAttentionEncodeRopeInterleavedShader), ENCODE_SHADER_NAME, loadAttentionEncodeShader), DOT_PRODUCT_SHADER_NAME, loadAttentionDotProductShader), SOFTMAX_SHADER_NAME, loadAttentionSoftmaxShader), FIR_SHADER_NAME, loadAttentionFirShader), UPDATE_KV_SHADER_NAME, loadAttentionUpdateKvShader), TRANSPOSE_QUERY_SHADER_NAME, loadAttentionTransposeQueryShader); +var getPicollmAttentionWebGpuFunctions = function getPicollmAttentionWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmAttentionPrecomputeEncodingWebGpu = function pvPicollmAttentionPrecomputeEncodingWebGpu(objAddress, encodingAddress, encodingOffset, dimension, steps, theta, statusAddress) { + var _gpuBuffers$get; + objAddress = unsignedAddress(objAddress); + encodingAddress = unsignedAddress(encodingAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[PRECOMPUTE_ENCODING_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var encodingBuffer = (_gpuBuffers$get = gpuBuffers.get(encodingAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!encodingBuffer) { + console.error('Encoding buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention precompute encoding arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, dimension, true); + view.setUint32(4, steps, true); + view.setFloat32(8, theta, true); + view.setUint32(12, encodingOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention precompute encoding bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: encodingBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, PRECOMPUTE_ENCODING_SHADER_NAME, steps); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionEncodeWebGpu = function pvPicollmAttentionEncodeWebGpu(objAddress, isRopeInterleaved, xAddress, xOffset, n, numHeads, headDimension, ropeDimension, position, encodingAddress, encodingOffset, statusAddress) { + var _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + encodingAddress = unsignedAddress(encodingAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shaderName = isRopeInterleaved ? ENCODE_ROPE_INTERLEAVED_SHADER_NAME : ENCODE_SHADER_NAME; + var shader = obj.shaders[shaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var encodingBuffer = (_gpuBuffers$get2 = gpuBuffers.get(encodingAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!encodingBuffer) { + console.error('Encoding buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get3 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(7 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention encode arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, numHeads, headDimension, ropeDimension, position, encodingOffset / 4, xOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention encode bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: encodingBuffer + } + }, { + binding: 2, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, shaderName, Math.min(n, PV_PICOLLM_WEBGPU_MAX_GRID_DIM), numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionDotProductWebGpu = function pvPicollmAttentionDotProductWebGpu(objAddress, queryAddress, queryOffset, keysAddress, keysOffset, keyInterceptsAddress, keyInterceptsOffset, keySlopesAddress, keySlopesOffset, n, tq, headDimension, numHeads, numKvHeads, windowLength, start, norm, length1, length2, numKeys, scoresAddress, scoresOffset, statusAddress) { + var _gpuBuffers$get4, _gpuBuffers$get5, _gpuBuffers$get6, _gpuBuffers$get7, _gpuBuffers$get8; + objAddress = unsignedAddress(objAddress); + queryAddress = unsignedAddress(queryAddress); + keysAddress = unsignedAddress(keysAddress); + keyInterceptsAddress = unsignedAddress(keyInterceptsAddress); + keySlopesAddress = unsignedAddress(keySlopesAddress); + scoresAddress = unsignedAddress(scoresAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[DOT_PRODUCT_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var queryBuffer = (_gpuBuffers$get4 = gpuBuffers.get(queryAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!queryBuffer) { + console.error('query buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var keysBuffer = (_gpuBuffers$get5 = gpuBuffers.get(keysAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!keysBuffer) { + console.error('keys buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var keyInterceptsBuffer = (_gpuBuffers$get6 = gpuBuffers.get(keyInterceptsAddress)) === null || _gpuBuffers$get6 === void 0 ? void 0 : _gpuBuffers$get6.buffer; + if (!keyInterceptsBuffer) { + console.error('key intercepts buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var keySlopesBuffer = (_gpuBuffers$get7 = gpuBuffers.get(keySlopesAddress)) === null || _gpuBuffers$get7 === void 0 ? void 0 : _gpuBuffers$get7.buffer; + if (!keySlopesBuffer) { + console.error('key slopes buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var scoresBuffer = (_gpuBuffers$get8 = gpuBuffers.get(scoresAddress)) === null || _gpuBuffers$get8 === void 0 ? void 0 : _gpuBuffers$get8.buffer; + if (!scoresBuffer) { + console.error('scores buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(15 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention dot product arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, n, true); + view.setUint32(4, tq, true); + view.setUint32(8, headDimension, true); + view.setUint32(12, numHeads, true); + view.setUint32(16, numKvHeads, true); + view.setUint32(20, windowLength, true); + view.setUint32(24, start, true); + view.setFloat32(28, norm, true); + view.setUint32(32, length1, true); + view.setUint32(36, numKeys, true); + view.setUint32(40, scoresOffset, true); + view.setUint32(44, queryOffset / 4, true); + view.setUint32(48, keysOffset, true); + view.setUint32(52, keyInterceptsOffset / 4, true); + view.setUint32(56, keySlopesOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention dot product bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: queryBuffer + } + }, { + binding: 2, + resource: { + buffer: keysBuffer + } + }, { + binding: 3, + resource: { + buffer: keyInterceptsBuffer + } + }, { + binding: 4, + resource: { + buffer: keySlopesBuffer + } + }, { + binding: 5, + resource: { + buffer: scoresBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, DOT_PRODUCT_SHADER_NAME, numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionSoftmaxWebGpu = function pvPicollmAttentionSoftmaxWebGpu(objAddress, scoresAddress, scoresOffset, numHeads, numKeys, statusAddress) { + var _gpuBuffers$get9; + objAddress = unsignedAddress(objAddress); + scoresAddress = unsignedAddress(scoresAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[SOFTMAX_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var scoresBuffer = (_gpuBuffers$get9 = gpuBuffers.get(scoresAddress)) === null || _gpuBuffers$get9 === void 0 ? void 0 : _gpuBuffers$get9.buffer; + if (!scoresBuffer) { + console.error('Scores buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention softmax arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([numHeads, numKeys, scoresOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention softmax bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: scoresBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, SOFTMAX_SHADER_NAME, numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionFirWebGpu = function pvPicollmAttentionFirWebGpu(objAddress, valuesAddress, valuesOffset, valueInterceptsAddress, valueInterceptsOffset, valueSlopesAddress, valueSlopesOffset, length1, length2, tq, headDimension, numHeads, numKvHeads, windowLength, start, scoresAddress, scoresOffset, outputAddress, outputOffset, statusAddress) { + var _gpuBuffers$get10, _gpuBuffers$get11, _gpuBuffers$get12, _gpuBuffers$get13, _gpuBuffers$get14; + objAddress = unsignedAddress(objAddress); + valuesAddress = unsignedAddress(valuesAddress); + valueInterceptsAddress = unsignedAddress(valueInterceptsAddress); + valueSlopesAddress = unsignedAddress(valueSlopesAddress); + scoresAddress = unsignedAddress(scoresAddress); + outputAddress = unsignedAddress(outputAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[FIR_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var valuesBuffer = (_gpuBuffers$get10 = gpuBuffers.get(valuesAddress)) === null || _gpuBuffers$get10 === void 0 ? void 0 : _gpuBuffers$get10.buffer; + if (!valuesBuffer) { + console.error('values buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var valueInterceptsBuffer = (_gpuBuffers$get11 = gpuBuffers.get(valueInterceptsAddress)) === null || _gpuBuffers$get11 === void 0 ? void 0 : _gpuBuffers$get11.buffer; + if (!valueInterceptsBuffer) { + console.error('value intercepts buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var valueSlopesBuffer = (_gpuBuffers$get12 = gpuBuffers.get(valueSlopesAddress)) === null || _gpuBuffers$get12 === void 0 ? void 0 : _gpuBuffers$get12.buffer; + if (!valueSlopesBuffer) { + console.error('value slopes buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var scoresBuffer = (_gpuBuffers$get13 = gpuBuffers.get(scoresAddress)) === null || _gpuBuffers$get13 === void 0 ? void 0 : _gpuBuffers$get13.buffer; + if (!scoresBuffer) { + console.error('scores buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var outputBuffer = (_gpuBuffers$get14 = gpuBuffers.get(outputAddress)) === null || _gpuBuffers$get14 === void 0 ? void 0 : _gpuBuffers$get14.buffer; + if (!outputBuffer) { + console.error('output buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(13 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention fir arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([length1, length2, tq, headDimension, numHeads, numKvHeads, windowLength, start, valuesOffset, valueInterceptsOffset / 4, valueSlopesOffset / 4, scoresOffset / 4, outputOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention fir bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: valuesBuffer + } + }, { + binding: 2, + resource: { + buffer: valueInterceptsBuffer + } + }, { + binding: 3, + resource: { + buffer: valueSlopesBuffer + } + }, { + binding: 4, + resource: { + buffer: scoresBuffer + } + }, { + binding: 5, + resource: { + buffer: outputBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, FIR_SHADER_NAME, numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionUpdateKvWebGpu = function pvPicollmAttentionUpdateKvWebGpu(objAddress, tfAddress, tfOffset, n, kvAddress, kvOffset, kvInterceptsAddress, kvInterceptsOffset, kvSlopesAddress, kvSlopesOffset, numKvHeads, windowLength, position, headDimension, statusAddress) { + var _gpuBuffers$get15, _gpuBuffers$get16, _gpuBuffers$get17, _gpuBuffers$get18; + objAddress = unsignedAddress(objAddress); + tfAddress = unsignedAddress(tfAddress); + kvAddress = unsignedAddress(kvAddress); + kvInterceptsAddress = unsignedAddress(kvInterceptsAddress); + kvSlopesAddress = unsignedAddress(kvSlopesAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[UPDATE_KV_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var tfBuffer = (_gpuBuffers$get15 = gpuBuffers.get(tfAddress)) === null || _gpuBuffers$get15 === void 0 ? void 0 : _gpuBuffers$get15.buffer; + if (!tfBuffer) { + console.error('tf buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var kvBuffer = (_gpuBuffers$get16 = gpuBuffers.get(kvAddress)) === null || _gpuBuffers$get16 === void 0 ? void 0 : _gpuBuffers$get16.buffer; + if (!kvBuffer) { + console.error('KV buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var kvInterceptsBuffer = (_gpuBuffers$get17 = gpuBuffers.get(kvInterceptsAddress)) === null || _gpuBuffers$get17 === void 0 ? void 0 : _gpuBuffers$get17.buffer; + if (!kvInterceptsBuffer) { + console.error('KV intercept buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var kvSlopesBuffer = (_gpuBuffers$get18 = gpuBuffers.get(kvSlopesAddress)) === null || _gpuBuffers$get18 === void 0 ? void 0 : _gpuBuffers$get18.buffer; + if (!kvSlopesBuffer) { + console.error('KV slopes buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(9 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention update kv arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, numKvHeads, windowLength, position, headDimension, tfOffset / 4, kvOffset, kvInterceptsOffset / 4, kvSlopesOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention update kv bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: tfBuffer + } + }, { + binding: 2, + resource: { + buffer: kvBuffer + } + }, { + binding: 3, + resource: { + buffer: kvInterceptsBuffer + } + }, { + binding: 4, + resource: { + buffer: kvSlopesBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, UPDATE_KV_SHADER_NAME, numKvHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionTransposeQueryWebGpu = function pvPicollmAttentionTransposeQueryWebGpu(objAddress, tfAddress, tfOffset, hfAddress, hfOffset, n, numHeads, headDimension, statusAddress) { + var _gpuBuffers$get19, _gpuBuffers$get20; + objAddress = unsignedAddress(objAddress); + tfAddress = unsignedAddress(tfAddress); + hfAddress = unsignedAddress(hfAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[TRANSPOSE_QUERY_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var tfBuffer = (_gpuBuffers$get19 = gpuBuffers.get(tfAddress)) === null || _gpuBuffers$get19 === void 0 ? void 0 : _gpuBuffers$get19.buffer; + if (!tfBuffer) { + console.error('tf buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var hfBuffer = (_gpuBuffers$get20 = gpuBuffers.get(hfAddress)) === null || _gpuBuffers$get20 === void 0 ? void 0 : _gpuBuffers$get20.buffer; + if (!hfBuffer) { + console.error('hf buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(5 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention transpose query arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, numHeads, headDimension, tfOffset / 4, hfOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention transpose query bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: tfBuffer + } + }, { + binding: 2, + resource: { + buffer: hfBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, TRANSPOSE_QUERY_SHADER_NAME, numHeads, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_attention_precompute_encoding_webgpu_wasm: pvPicollmAttentionPrecomputeEncodingWebGpu, + pv_picollm_attention_encode_webgpu_wasm: pvPicollmAttentionEncodeWebGpu, + pv_picollm_attention_dot_product_webgpu_wasm: pvPicollmAttentionDotProductWebGpu, + pv_picollm_attention_softmax_webgpu_wasm: pvPicollmAttentionSoftmaxWebGpu, + pv_picollm_attention_fir_webgpu_wasm: pvPicollmAttentionFirWebGpu, + pv_picollm_attention_update_kv_webgpu_wasm: pvPicollmAttentionUpdateKvWebGpu, + pv_picollm_attention_transpose_query_webgpu_wasm: pvPicollmAttentionTransposeQueryWebGpu + }; +}; + +var SILU_SHADER_NAME = "pv_picollm_feed_forward_silu_shader"; +var feedForwardSiluShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = x[global_id.x] / (1.0 + exp(-x[global_id.x]));\n}\n\n".concat(emptyShader); +var loadFeedForwardSiluShader = function loadFeedForwardSiluShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff silu bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff silu pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff silu shader module", + code: feedForwardSiluShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff silu pipline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var GELU_SHADER_NAME = "pv_picollm_feed_forward_gelu_shader"; +var feedForwardGeluShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\nconst a1: f32 = 0.254829592;\nconst a2: f32 = -0.284496736;\nconst a3: f32 = 1.421413741;\nconst a4: f32 = -1.453152027;\nconst a5: f32 = 1.061405429;\nconst p: f32 = 0.3275911;\n\n// A&S formula 7.1.26\nfn erf(x: f32) -> f32 { \n var sign: f32 = 1.0;\n if (x < 0) {\n sign = -1.0;\n }\n var x_abs: f32 = abs(x);\n \n let t: f32 = 1.0 / fma(p, x_abs, 1.0);\n let y: f32 = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs);\n\n return sign * y;\n}\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1.0 + erf(x[global_id.x] * 0.7071067811865475));\n}\n\n".concat(emptyShader); +var loadFeedForwardGeluShader = function loadFeedForwardGeluShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff gelu bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff gelu pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff gelu shader module", + code: feedForwardGeluShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff gelu pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var ALMOST_GELU_SHADER_NAME = "pv_picollm_feed_forward_almost_gelu_shader"; +var feedForwardAlmostGeluShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1 + tanh(0.7978845608028654 * (x[global_id.x] + (0.044715f * x[global_id.x] * x[global_id.x] * x[global_id.x]))));\n}\n\n".concat(emptyShader); +var loadFeedForwardAlmostGeluShader = function loadFeedForwardAlmostGeluShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff almost gelu bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff almost gelu pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff almost gelu shader module", + code: feedForwardAlmostGeluShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff almost gelu pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var MULTIPLY_BUFFERS_SHADER_NAME = "pv_picollm_feed_forward_multiply_buffers_shader"; +var feedForwardMultiplyBuffersShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n y[global_id.x] *= x[global_id.x];\n}\n\n".concat(emptyShader); +var loadFeedForwardMultiplyBuffersShader = function loadFeedForwardMultiplyBuffersShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff multiply buffers bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff multiply buffers pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff multiply buffers shader module", + code: feedForwardMultiplyBuffersShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff multiply buffers pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var feedForwardShaders = _defineProperty(_defineProperty(_defineProperty(_defineProperty({}, SILU_SHADER_NAME, loadFeedForwardSiluShader), GELU_SHADER_NAME, loadFeedForwardGeluShader), ALMOST_GELU_SHADER_NAME, loadFeedForwardAlmostGeluShader), MULTIPLY_BUFFERS_SHADER_NAME, loadFeedForwardMultiplyBuffersShader); +var getPicollmFeedForwardWebGpuFunctions = function getPicollmFeedForwardWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmFeedForwardSiluWebGpu = function pvPicollmFeedForwardSiluWebGpu(objAddress, n, xAddress, statusAddress) { + var _gpuBuffers$get; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[SILU_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff silu arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff silu bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, SILU_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmFeedForwardGeluWebGpu = function pvPicollmFeedForwardGeluWebGpu(objAddress, n, xAddress, statusAddress) { + var _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[GELU_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get2 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff gelu arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff gelu bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, GELU_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmFeedForwardAlmostGeluWebGpu = function pvPicollmFeedForwardAlmostGeluWebGpu(objAddress, n, xAddress, statusAddress) { + var _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ALMOST_GELU_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get3 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff almost gelu arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff almost gelu bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ALMOST_GELU_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmFeedForwardMultiplyBuffersWebGpu = function pvPicollmFeedForwardMultiplyBuffersWebGpu(objAddress, n, xAddress, yAddress, statusAddress) { + var _gpuBuffers$get4, _gpuBuffers$get5; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[MULTIPLY_BUFFERS_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get4 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get5 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff multiply buffers arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff multiply buffers bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, MULTIPLY_BUFFERS_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_feed_forward_silu_webgpu_wasm: pvPicollmFeedForwardSiluWebGpu, + pv_picollm_feed_forward_gelu_webgpu_wasm: pvPicollmFeedForwardGeluWebGpu, + pv_picollm_feed_forward_almost_gelu_webgpu_wasm: pvPicollmFeedForwardAlmostGeluWebGpu, + pv_picollm_feed_forward_multiply_buffers_webgpu_wasm: pvPicollmFeedForwardMultiplyBuffersWebGpu + }; +}; + +var FORWARD_SHADER_NAME$1 = "pv_picollm_gate_forward_shader"; +var gateForwardShaderSource = "\n\nstruct pv_picollm_gate_ix_t {\n i: u32,\n x: f32,\n}\n\n@group(0) @binding(0)\nvar y: array;\n\n@group(0) @binding(1)\nvar indices: array;\n\n@group(0) @binding(2)\nvar weights: array;\n\noverride n: u32 = 0;\noverride k: u32 = 0;\noverride num_experts: u32 = 0;\n\noverride y_offset: u32 = 0;\noverride indices_offset: u32 = 0;\noverride weights_offset: u32 = 0;\n\nvar ixs: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(global_invocation_id) global_id : vec3\n) {\n if (global_id.x >= n) {\n return;\n }\n \n var y_start: u32 = y_offset + global_id.x * num_experts;\n for (var j = 0u; j < num_experts; j++) {\n ixs[j].i = j;\n ixs[j].x = y[y_start + j];\n }\n\n for (var i = 0u; i < num_experts - 1; i++) {\n for (var j = 0u; j < num_experts - i - 1; j++) {\n if (ixs[j].x < ixs[j + 1].x) {\n let tmp = ixs[j];\n ixs[j] = ixs[j + 1];\n ixs[j + 1] = tmp;\n }\n }\n }\n\n for (var j = 0u; j < k; j++) {\n indices[indices_offset + (global_id.x * k) + j] = ixs[j].i;\n weights[weights_offset + (global_id.x * k) + j] = ixs[j].x;\n }\n\n var max_weight: f32 = weights[weights_offset + (global_id.x * k)];\n for (var j = 1u; j < k; j++) {\n max_weight = max(max_weight, weights[weights_offset + (global_id.x * k) + j]);\n }\n\n var sum_weight: f32 = 0.0;\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] = exp(weights[weights_offset + (global_id.x * k) + j] - max_weight);\n sum_weight += weights[weights_offset + (global_id.x * k) + j];\n }\n\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] /= sum_weight;\n }\n}\n\n".concat(emptyShader); +var loadGateForwardShader = function loadGateForwardShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "gate forward bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "gate forward pipeline", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "gate forward shader module", + code: gateForwardShaderSource + }); + var computePipeline = device.createComputePipeline({ + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + num_experts: 1 + } + } + }); + return { + computePipeline: computePipeline, + pipelineLayout: pipelineLayout, + shaderModule: shaderModule + }; +}; +var gateForwardShader = _defineProperty({}, FORWARD_SHADER_NAME$1, loadGateForwardShader); +var getPicollmGateWebGpuFunctions = function getPicollmGateWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmGateForwardWebGpu = function pvPicollmGateForwardWebGpu(objAddress, n, k, numExperts, yAddress, yOffset, indicesAddress, indicesOffset, weightsAddress, weightsOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + yAddress = unsignedAddress(yAddress); + indicesAddress = unsignedAddress(indicesAddress); + weightsAddress = unsignedAddress(weightsAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[FORWARD_SHADER_NAME$1]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + // TODO: create this in setup, once we add args to setup procedure + var pipeline = obj.device.createComputePipeline({ + label: "gate forward pipeline", + layout: shader.pipelineLayout, + compute: { + module: shader.shaderModule, + entryPoint: shaderEntryPoint, + constants: { + n: n, + k: k, + num_experts: numExperts, + y_offset: yOffset / 4, + indices_offset: indicesOffset / 4, + weights_offset: weightsOffset / 4 + } + } + }); + var yBuffer = (_gpuBuffers$get = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var indicesBuffer = (_gpuBuffers$get2 = gpuBuffers.get(indicesAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!indicesBuffer) { + console.error('Indices buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var weightsBuffer = (_gpuBuffers$get3 = gpuBuffers.get(weightsAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!weightsBuffer) { + console.error('Weights buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var bindGroup = obj.device.createBindGroup({ + label: "gate forward bind group", + layout: pipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: yBuffer + } + }, { + binding: 1, + resource: { + buffer: indicesBuffer + } + }, { + binding: 2, + resource: { + buffer: weightsBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, pipeline, FORWARD_SHADER_NAME$1, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_gate_forward_webgpu_wasm: pvPicollmGateForwardWebGpu + }; +}; + +var ADD_TO_BUFFER_SHADER_NAME$1 = "pv_picollm_moe_transformer_add_to_buffer_shader"; +var moeTransformerAddToBufferShaderSource = "\nstruct argsStruct {\n n: u32, \n x_offset: u32,\n buffer_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n\n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader); +var loadMoeTransformerAddToBufferShader = function loadMoeTransformerAddToBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "moe transformer add to buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "moe transformer add to buffer pipeline", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "moe transformer add to buffer shader module", + code: moeTransformerAddToBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "moe transformer add to buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME = "pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_shader"; +var moeTransformerMultiplyWeightAndToBufferShaderSource = "\nstruct argsStruct {\n n: u32, \n weights_index: u32,\n y_index: u32,\n weights_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weights: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + args.y_index + global_id.x] += weights[args.weights_index] + x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader); +var loadMoeTransformerMultiplyWeightAndAddToBufferShader = function loadMoeTransformerMultiplyWeightAndAddToBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "moe transformer multiply weight and add to buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "moe transformer multiply weight and add to buffer pipeline", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "moe transformer multiply weight and add to buffer shader module", + code: moeTransformerMultiplyWeightAndToBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "moe transformer multiply weight and add to buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var ADD_BUFFERS_SHADER_NAME$1 = "pv_picollm_moe_transformer_add_buffers_shader"; +var moeTransformerAddBuffersShaderSource = "\nstruct argsStruct {\n n: u32, \n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader); +var loadMoeTransformerAddBuffersShader = function loadMoeTransformerAddBuffersShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "moe transformer add buffers bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "moe transformer add buffers pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "moe transformer add buffers shader module", + code: moeTransformerAddBuffersShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "moe transformer add buffers pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var moeTransformerForwardShaders = _defineProperty(_defineProperty(_defineProperty({}, ADD_TO_BUFFER_SHADER_NAME$1, loadMoeTransformerAddToBufferShader), MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME, loadMoeTransformerMultiplyWeightAndAddToBufferShader), ADD_BUFFERS_SHADER_NAME$1, loadMoeTransformerAddBuffersShader); +var getPicollmMoeTransformerWebGpuFunctions = function getPicollmMoeTransformerWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmMoeTransformerAddToBufferWebGpu = function pvPicollmMoeTransformerAddToBufferWebGpu(objAddress, n, xAddress, xOffset, bufferAddress, bufferOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + bufferAddress = unsignedAddress(bufferAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_TO_BUFFER_SHADER_NAME$1]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var bufferBuffer = (_gpuBuffers$get2 = gpuBuffers.get(bufferAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!bufferBuffer) { + console.error('buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "moe transformer add to buffer arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, xOffset, bufferOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "moe transformer add to buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: bufferBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_TO_BUFFER_SHADER_NAME$1, n); + setStatus(statusAddress, 0); + }; + var pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu = function pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu(objAddress, n, weightsIndex, yIndex, weightsAddress, weightsOffset, xAddress, xOffset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get3, _gpuBuffers$get4, _gpuBuffers$get5; + objAddress = unsignedAddress(objAddress); + weightsAddress = unsignedAddress(weightsAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var weightsBuffer = (_gpuBuffers$get3 = gpuBuffers.get(weightsAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!weightsBuffer) { + console.error('weights has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get4 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!xBuffer) { + console.error('buffer2 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get5 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!yBuffer) { + console.error('y has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(6 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "moe transformer multiply weight and add to buffer arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, weightsIndex, yIndex, weightsOffset, xOffset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "moe transformer multiply weight and add to buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightsBuffer + } + }, { + binding: 2, + resource: { + buffer: xBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME, n); + setStatus(statusAddress, 0); + }; + var pvPicollmMoeTransformerAddBuffersWebGpu = function pvPicollmMoeTransformerAddBuffersWebGpu(objAddress, n, buffer1Address, buffer1Offset, buffer2Address, buffer2Offset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get6, _gpuBuffers$get7, _gpuBuffers$get8; + objAddress = unsignedAddress(objAddress); + buffer1Address = unsignedAddress(buffer1Address); + buffer2Address = unsignedAddress(buffer2Address); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_BUFFERS_SHADER_NAME$1]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var buffer1Buffer = (_gpuBuffers$get6 = gpuBuffers.get(buffer1Address)) === null || _gpuBuffers$get6 === void 0 ? void 0 : _gpuBuffers$get6.buffer; + if (!buffer1Buffer) { + console.error('buffer1 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var buffer2Buffer = (_gpuBuffers$get7 = gpuBuffers.get(buffer2Address)) === null || _gpuBuffers$get7 === void 0 ? void 0 : _gpuBuffers$get7.buffer; + if (!buffer2Buffer) { + console.error('buffer2 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get8 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get8 === void 0 ? void 0 : _gpuBuffers$get8.buffer; + if (!yBuffer) { + console.error('y has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "moe transformer add buffers arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, buffer1Offset, buffer2Offset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "moe transformer add buffers bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: buffer1Buffer + } + }, { + binding: 2, + resource: { + buffer: buffer2Buffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_BUFFERS_SHADER_NAME$1, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm: pvPicollmMoeTransformerAddToBufferWebGpu, + pv_picollm_moe_transformer_add_buffers_webgpu_wasm: pvPicollmMoeTransformerAddBuffersWebGpu, + pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm: pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu + }; +}; + +var sdataReduce = "\n for (var s: u32 = workgroup_size_x / 2; s > 0; s >>= 1) {\n if tid < s {\n sdata[tid] += sdata[tid + s];\n }\n workgroupBarrier();\n }\n"; +var dividePadFunction = "\n fn divide_pad(a: u32, b: u32) -> u32 { \n return (a + b - 1) / b;\n }\n"; + +var FORWARD_MULTI_BUFFER_SHADER_NAME$1 = "pv_picollm_norm_forward_multi_buffer_shader"; +var FORWARD_SINGLE_BUFFER_SHADER_NAME$1 = "pv_picollm_norm_forward_single_buffer_shader"; +var normForwardShaderSource = function normForwardShaderSource(isMulti) { + return "\nstruct argsStruct {\n n: u32,\n dimension: u32,\n remainder: u32,\n remainder_start: u32, \n eps: f32, \n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n".concat(isMulti ? "\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3) \nvar y: array;\n" : " \n@group(0) @binding(2)\nvar x: array;\n", "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\nvar sdata: array, workgroup_size_x>;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n let tid = local_id.x;\n let m = workgroup_id.x;\n let block_size = workgroup_size_x;\n \n var power_vec: vec4;\n let x_start: u32 = args.x_offset + (m * args.dimension);\n let skip = tid * 4;\n let shift = (block_size * 4);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = x_start + j + skip; \n\n let x_vec = vec4(\n x[local_index],\n x[local_index + 1],\n x[local_index + 2],\n x[local_index + 3]);\n \n power_vec += x_vec * x_vec; \n } \n \n if (tid == 0 && args.remainder > 0) {\n var remainder_vec = vec4(0.0, 0.0, 0.0, 0.0);\n let x_idx = x_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) { \n remainder_vec[j] = x[x_idx + j];\n } \n power_vec += remainder_vec * remainder_vec;\n }\n \n sdata[tid] = power_vec;\n workgroupBarrier();\n\n ").concat(sdataReduce, "\n \n let power = sdata[0].x + sdata[0].y + sdata[0].z + sdata[0].w;\n let norm: vec4 = vec4(1.0 / sqrt((power / f32(args.dimension)) + args.eps));\n \n let y_start: u32 = args.y_offset + (m * args.dimension);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = j + skip;\n let x_idx = x_start + local_index;\n let x_vec = vec4(\n x[x_idx],\n x[x_idx + 1],\n x[x_idx + 2],\n x[x_idx + 3]);\n \n let weight_vec = vec4(\n weight[local_index],\n weight[local_index + 1],\n weight[local_index + 2],\n weight[local_index + 3]);\n let y_vec = x_vec * norm * weight_vec;\n \n let y_idx = y_start + local_index;\n").concat(isMulti ? " \n y[y_idx] = y_vec.x;\n y[y_idx + 1] = y_vec.y;\n y[y_idx + 2] = y_vec.z;\n y[y_idx + 3] = y_vec.w;\n" : " \n x[y_idx] = y_vec.x;\n x[y_idx + 1] = y_vec.y;\n x[y_idx + 2] = y_vec.z;\n x[y_idx + 3] = y_vec.w;\n", " \n }\n \n if (tid == 0 && args.remainder > 0) {\n let x_idx = x_start + args.remainder_start;\n let weight_idx = args.remainder_start; \n let y_idx = y_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) {\n").concat(isMulti ? " \n y[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n" : " \n x[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n", " \n } \n }\n}\n\n").concat(emptyShader); +}; +var loadNormForwardShader = function loadNormForwardShader(device, isMulti) { + var entries = [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }]; + if (isMulti) { + entries.push({ + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }); + entries.push({ + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }); + } else { + entries.push({ + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }); + } + var bindGroupLayout = device.createBindGroupLayout({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer bind group layout"), + entries: entries + }); + var pipelineLayout = device.createPipelineLayout({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer shader module"), + code: normForwardShaderSource(isMulti) + }); + var computePipeline = device.createComputePipeline({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var normForwardShader = _defineProperty(_defineProperty({}, FORWARD_SINGLE_BUFFER_SHADER_NAME$1, function (device) { + return loadNormForwardShader(device, false); +}), FORWARD_MULTI_BUFFER_SHADER_NAME$1, function (device) { + return loadNormForwardShader(device, true); +}); +var getPicollmNormWebGpuFunctions = function getPicollmNormWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmNormForwardWebGpu = function pvPicollmNormForwardWebGpu(objAddress, dimension, eps, weightAddress, n, xOffset, xAddress, yOffset, yAddress, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + weightAddress = unsignedAddress(weightAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shaderName = xAddress === yAddress ? FORWARD_SINGLE_BUFFER_SHADER_NAME$1 : FORWARD_MULTI_BUFFER_SHADER_NAME$1; + var shader = obj.shaders[shaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var weightBuffer = (_gpuBuffers$get = gpuBuffers.get(weightAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!weightBuffer) { + console.error('Weight buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get2 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var remainder = dimension % 4; + var remainder_start = dimension - remainder; + var argsBuffer = obj.getBuffer(7 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "norm forward multi buffer arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, n, true); + view.setUint32(4, dimension, true); + view.setUint32(8, remainder, true); + view.setUint32(12, remainder_start, true); + view.setFloat32(16, eps, true); + view.setUint32(20, xOffset / 4, true); + view.setUint32(24, yOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var entries = [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightBuffer + } + }]; + if (xAddress === yAddress) { + entries.push({ + binding: 2, + resource: { + buffer: yBuffer + } + }); + } else { + var _gpuBuffers$get3; + var xBuffer = (_gpuBuffers$get3 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + entries.push({ + binding: 2, + resource: { + buffer: xBuffer + } + }); + entries.push({ + binding: 3, + resource: { + buffer: yBuffer + } + }); + } + var bindGroup = obj.device.createBindGroup({ + label: "norm forward ".concat(xAddress === yAddress ? "single" : "multi", " buffer bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: entries + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, shaderName, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_norm_forward_webgpu_wasm: pvPicollmNormForwardWebGpu + }; +}; + +var FORWARD_MULTI_BUFFER_SHADER_NAME = "pv_picollm_norm_layer_forward_multi_buffer_shader"; +var normLayerForwardMultiBufferShaderSource = "\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar x: array;\n\n@group(0) @binding(4)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += x[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (x[x_start + j] - mean) * (x[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((x[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader); +var loadNormLayerForwardMultiBufferShader = function loadNormLayerForwardMultiBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "norm layer forward multi buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "norm layer forward multi buffer pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "norm layer forward multi buffer shader module", + code: normLayerForwardMultiBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "norm layer forward multi buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var FORWARD_SINGLE_BUFFER_SHADER_NAME = "pv_picollm_norm_layer_forward_single_buffer_shader"; +var normLayerForwardSingleBufferShaderSource = "\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += y[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (y[x_start + j] - mean) * (y[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((y[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader); +var loadNormLayerForwardSingleBufferShader = function loadNormLayerForwardSingleBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "norm layer forward single buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "norm layer forward single buffer pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "norm layer forward single buffer shader module", + code: normLayerForwardSingleBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "norm layer forward single buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var normLayerForwardShader = _defineProperty(_defineProperty({}, FORWARD_SINGLE_BUFFER_SHADER_NAME, loadNormLayerForwardSingleBufferShader), FORWARD_MULTI_BUFFER_SHADER_NAME, loadNormLayerForwardMultiBufferShader); +var getPicollmNormLayerWebGpuFunctions = function getPicollmNormLayerWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmNormLayerForwardWebGpu = function pvPicollmNormLayerForwardWebGpu(objAddress, dimension, eps, weightAddress, weightOffset, biasAddress, biasOffset, n, xAddress, xOffset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + weightAddress = unsignedAddress(weightAddress); + biasAddress = unsignedAddress(biasAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shaderName = xAddress === yAddress ? FORWARD_SINGLE_BUFFER_SHADER_NAME : FORWARD_MULTI_BUFFER_SHADER_NAME; + var shader = obj.shaders[shaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var weightBuffer = (_gpuBuffers$get = gpuBuffers.get(weightAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!weightBuffer) { + console.error('weight buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var biasBuffer = (_gpuBuffers$get2 = gpuBuffers.get(biasAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!biasBuffer) { + console.error('bias buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get3 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(7 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "norm layer forward multi buffer arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, n, true); + view.setUint32(4, dimension, true); + view.setFloat32(8, eps, true); + view.setUint32(12, weightOffset / 4, true); + view.setUint32(16, biasOffset / 4, true); + view.setUint32(20, xOffset / 4, true); + view.setUint32(24, yOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup; + if (xAddress === yAddress) { + bindGroup = obj.device.createBindGroup({ + label: "norm layer forward single buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightBuffer + } + }, { + binding: 2, + resource: { + buffer: biasBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + } else { + var _gpuBuffers$get4; + var xBuffer = (_gpuBuffers$get4 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + bindGroup = obj.device.createBindGroup({ + label: "norm layer forward multi buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightBuffer + } + }, { + binding: 2, + resource: { + buffer: biasBuffer + } + }, { + binding: 3, + resource: { + buffer: xBuffer + } + }, { + binding: 4, + resource: { + buffer: yBuffer + } + }] + }); + } + obj.dispatchComputerShader(bindGroup, shader.computePipeline, shaderName, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_norm_layer_forward_webgpu_wasm: pvPicollmNormLayerForwardWebGpu + }; +}; + +var ADD_TO_BUFFER_SHADER_NAME = "pv_picollm_transformer_add_to_buffer_shader"; +var transformerAddToBufferShaderSource = "\nstruct argsStruct {\n n: u32,\n x_offset: u32,\n buffer_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader); +var loadTransformerAddToBufferShader = function loadTransformerAddToBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "transformer add to buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "transformer add to buffer pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "transformer add to buffer shader module", + code: transformerAddToBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "transformer add to buffer compute", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var ADD_BUFFERS_SHADER_NAME = "pv_picollm_transformer_add_buffers_shader"; +var transformerAddBuffersShaderSource = "\n\nstruct argsStruct {\n n: u32,\n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader); +var loadTransformerAddBuffersShader = function loadTransformerAddBuffersShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "transformer add buffers bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "transformer add buffers pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "transformer add buffers shader module", + code: transformerAddBuffersShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "transformer add buffers pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var transformerForwardShaders = _defineProperty(_defineProperty({}, ADD_TO_BUFFER_SHADER_NAME, loadTransformerAddToBufferShader), ADD_BUFFERS_SHADER_NAME, loadTransformerAddBuffersShader); +var getPicollmTransformerWebGpuFunctions = function getPicollmTransformerWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmTransformerAddToBufferWebGpu = function pvPicollmTransformerAddToBufferWebGpu(objAddress, n, xAddress, xOffset, bufferAddress, bufferOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + bufferAddress = unsignedAddress(bufferAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_TO_BUFFER_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var bufferBuffer = (_gpuBuffers$get2 = gpuBuffers.get(bufferAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!bufferBuffer) { + console.error('buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "transformer add to buffer arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, xOffset, bufferOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "transformer add to buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: bufferBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_TO_BUFFER_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmTransformerAddBuffersWebGpu = function pvPicollmTransformerAddBuffersWebGpu(objAddress, n, buffer1Address, buffer1Offset, buffer2Address, buffer2Offset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get3, _gpuBuffers$get4, _gpuBuffers$get5; + objAddress = unsignedAddress(objAddress); + buffer1Address = unsignedAddress(buffer1Address); + buffer2Address = unsignedAddress(buffer2Address); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_BUFFERS_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var buffer1Buffer = (_gpuBuffers$get3 = gpuBuffers.get(buffer1Address)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!buffer1Buffer) { + console.error('buffer1 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var buffer2Buffer = (_gpuBuffers$get4 = gpuBuffers.get(buffer2Address)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!buffer2Buffer) { + console.error('buffer2 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get5 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!yBuffer) { + console.error('y has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "transformer add buffers arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, buffer1Offset, buffer2Offset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "transformer add buffers bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: buffer1Buffer + } + }, { + binding: 2, + resource: { + buffer: buffer2Buffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_BUFFERS_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_transformer_add_to_buffer_webgpu_wasm: pvPicollmTransformerAddToBufferWebGpu, + pv_picollm_transformer_add_buffers_webgpu_wasm: pvPicollmTransformerAddBuffersWebGpu + }; +}; + +var FORWARD_SHADER_NAME = "pv_picollm_weight_float_forward_shader"; +var weightFloatForwardShaderSource = "\n\nstruct argsStruct {\n nr: u32,\n nc: u32,\n w_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar w: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n if (local_id.x >= args.nr) {\n return;\n }\n let x_start: u32 = args.x_offset + (workgroup_id.x * args.nc);\n let y_idx: u32 = local_id.x + args.y_offset + (workgroup_id.x * args.nr);\n \n let w_start: u32 = args.w_offset + (local_id.x * args.nc);\n for (var j = 0u; j < args.nc; j++) {\n y[y_idx] += w[w_start + j] * x[x_start + j]; \n }\n}\n\n".concat(emptyShader); +var loadWeightFloatForwardShader = function loadWeightFloatForwardShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight float forward bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight float forward pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight float forward shader module", + code: weightFloatForwardShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight float forward pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var weightFloatForwardShader = _defineProperty({}, FORWARD_SHADER_NAME, loadWeightFloatForwardShader); +var getPicollmWeightFloatWebGpuFunctions = function getPicollmWeightFloatWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmWeightFloatForwardWebGpu = function pvPicollmWeightFloatForwardWebGpu(objAddress, n, nc, nr, wOffset, wAddress, xOffset, xAddress, yOffset, yAddress, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + wAddress = unsignedAddress(wAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[FORWARD_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var wBuffer = (_gpuBuffers$get = gpuBuffers.get(wAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!wBuffer) { + console.error('W buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get2 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get3 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(5 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight float forward arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([nr, nc, wOffset, xOffset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight float forward bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: wBuffer + } + }, { + binding: 2, + resource: { + buffer: xBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, FORWARD_SHADER_NAME, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_weight_float_forward_webgpu_wasm: pvPicollmWeightFloatForwardWebGpu + }; +}; + +var rowsPerBlock = 16; +var columnsPerBlock = 8; +var preprocessDim = 16; +var weightBlockSize = 256; +var unpackBlock128BitDepth3 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_3(packed_offset: u32) {\n let val_0 = blocks[packed_offset]; \n unpacked[0] = extractBits(val_0, 0u, 3u);\n unpacked[1] = extractBits(val_0, 3u, 3u);\n unpacked[2] = extractBits(val_0, 6u, 3u);\n unpacked[3] = extractBits(val_0, 9u, 3u);\n unpacked[4] = extractBits(val_0, 12u, 3u);\n unpacked[5] = extractBits(val_0, 15u, 3u);\n unpacked[6] = extractBits(val_0, 18u, 3u);\n unpacked[7] = extractBits(val_0, 21u, 3u);\n unpacked[8] = extractBits(val_0, 24u, 3u);\n unpacked[9] = extractBits(val_0, 27u, 3u);\n unpacked[10] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_1, 0u, 1u), 2u, 1u);\n unpacked[11] = extractBits(val_1, 1u, 3u);\n unpacked[12] = extractBits(val_1, 4u, 3u);\n unpacked[13] = extractBits(val_1, 7u, 3u);\n unpacked[14] = extractBits(val_1, 10u, 3u);\n unpacked[15] = extractBits(val_1, 13u, 3u);\n unpacked[16] = extractBits(val_1, 16u, 3u);\n unpacked[17] = extractBits(val_1, 19u, 3u);\n unpacked[18] = extractBits(val_1, 22u, 3u);\n unpacked[19] = extractBits(val_1, 25u, 3u);\n unpacked[20] = extractBits(val_1, 28u, 3u);\n unpacked[21] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_2, 0u, 2u), 1u, 2u); \n unpacked[22] = extractBits(val_2, 2u, 3u);\n unpacked[23] = extractBits(val_2, 5u, 3u);\n unpacked[24] = extractBits(val_2, 8u, 3u);\n unpacked[25] = extractBits(val_2, 11u, 3u);\n unpacked[26] = extractBits(val_2, 14u, 3u);\n unpacked[27] = extractBits(val_2, 17u, 3u);\n unpacked[28] = extractBits(val_2, 20u, 3u);\n unpacked[29] = extractBits(val_2, 23u, 3u);\n unpacked[30] = extractBits(val_2, 26u, 3u);\n unpacked[31] = extractBits(val_2, 29u, 3u);\n \n let val_3 = blocks[packed_offset + 3]; \n unpacked[32] = extractBits(val_3, 0u, 3u);\n unpacked[33] = extractBits(val_3, 3u, 3u);\n unpacked[34] = extractBits(val_3, 6u, 3u);\n unpacked[35] = extractBits(val_3, 9u, 3u);\n unpacked[36] = extractBits(val_3, 12u, 3u);\n unpacked[37] = extractBits(val_3, 15u, 3u);\n unpacked[38] = extractBits(val_3, 18u, 3u);\n unpacked[39] = extractBits(val_3, 21u, 3u);\n unpacked[40] = extractBits(val_3, 24u, 3u);\n unpacked[41] = extractBits(val_3, 27u, 3u);\n unpacked[42] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_4, 0u, 1u), 2u, 1u);\n unpacked[43] = extractBits(val_4, 1u, 3u);\n unpacked[44] = extractBits(val_4, 4u, 3u);\n unpacked[45] = extractBits(val_4, 7u, 3u);\n unpacked[46] = extractBits(val_4, 10u, 3u);\n unpacked[47] = extractBits(val_4, 13u, 3u);\n unpacked[48] = extractBits(val_4, 16u, 3u);\n unpacked[49] = extractBits(val_4, 19u, 3u);\n unpacked[50] = extractBits(val_4, 22u, 3u);\n unpacked[51] = extractBits(val_4, 25u, 3u);\n unpacked[52] = extractBits(val_4, 28u, 3u);\n unpacked[53] = extractBits(val_4, 31u, 1u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_5, 0u, 2u), 1u, 2u); \n unpacked[54] = extractBits(val_5, 2u, 3u);\n unpacked[55] = extractBits(val_5, 5u, 3u);\n unpacked[56] = extractBits(val_5, 8u, 3u);\n unpacked[57] = extractBits(val_5, 11u, 3u);\n unpacked[58] = extractBits(val_5, 14u, 3u);\n unpacked[59] = extractBits(val_5, 17u, 3u);\n unpacked[60] = extractBits(val_5, 20u, 3u);\n unpacked[61] = extractBits(val_5, 23u, 3u);\n unpacked[62] = extractBits(val_5, 26u, 3u);\n unpacked[63] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[64] = extractBits(val_6, 0u, 3u);\n unpacked[65] = extractBits(val_6, 3u, 3u);\n unpacked[66] = extractBits(val_6, 6u, 3u);\n unpacked[67] = extractBits(val_6, 9u, 3u);\n unpacked[68] = extractBits(val_6, 12u, 3u);\n unpacked[69] = extractBits(val_6, 15u, 3u);\n unpacked[70] = extractBits(val_6, 18u, 3u);\n unpacked[71] = extractBits(val_6, 21u, 3u);\n unpacked[72] = extractBits(val_6, 24u, 3u);\n unpacked[73] = extractBits(val_6, 27u, 3u);\n unpacked[74] = extractBits(val_6, 30u, 2u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_7, 0u, 1u), 2u, 1u);\n unpacked[75] = extractBits(val_7, 1u, 3u);\n unpacked[76] = extractBits(val_7, 4u, 3u);\n unpacked[77] = extractBits(val_7, 7u, 3u);\n unpacked[78] = extractBits(val_7, 10u, 3u);\n unpacked[79] = extractBits(val_7, 13u, 3u);\n unpacked[80] = extractBits(val_7, 16u, 3u);\n unpacked[81] = extractBits(val_7, 19u, 3u);\n unpacked[82] = extractBits(val_7, 22u, 3u);\n unpacked[83] = extractBits(val_7, 25u, 3u);\n unpacked[84] = extractBits(val_7, 28u, 3u);\n unpacked[85] = extractBits(val_7, 31u, 1u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_8, 0u, 2u), 1u, 2u); \n unpacked[86] = extractBits(val_8, 2u, 3u);\n unpacked[87] = extractBits(val_8, 5u, 3u);\n unpacked[88] = extractBits(val_8, 8u, 3u);\n unpacked[89] = extractBits(val_8, 11u, 3u);\n unpacked[90] = extractBits(val_8, 14u, 3u);\n unpacked[91] = extractBits(val_8, 17u, 3u);\n unpacked[92] = extractBits(val_8, 20u, 3u);\n unpacked[93] = extractBits(val_8, 23u, 3u);\n unpacked[94] = extractBits(val_8, 26u, 3u);\n unpacked[95] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[96] = extractBits(val_9, 0u, 3u);\n unpacked[97] = extractBits(val_9, 3u, 3u);\n unpacked[98] = extractBits(val_9, 6u, 3u);\n unpacked[99] = extractBits(val_9, 9u, 3u);\n unpacked[100] = extractBits(val_9, 12u, 3u);\n unpacked[101] = extractBits(val_9, 15u, 3u);\n unpacked[102] = extractBits(val_9, 18u, 3u);\n unpacked[103] = extractBits(val_9, 21u, 3u);\n unpacked[104] = extractBits(val_9, 24u, 3u);\n unpacked[105] = extractBits(val_9, 27u, 3u);\n unpacked[106] = extractBits(val_9, 30u, 2u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_10, 0u, 1u), 2u, 1u);\n unpacked[107] = extractBits(val_10, 1u, 3u);\n unpacked[108] = extractBits(val_10, 4u, 3u);\n unpacked[109] = extractBits(val_10, 7u, 3u);\n unpacked[110] = extractBits(val_10, 10u, 3u);\n unpacked[111] = extractBits(val_10, 13u, 3u);\n unpacked[112] = extractBits(val_10, 16u, 3u);\n unpacked[113] = extractBits(val_10, 19u, 3u);\n unpacked[114] = extractBits(val_10, 22u, 3u);\n unpacked[115] = extractBits(val_10, 25u, 3u);\n unpacked[116] = extractBits(val_10, 28u, 3u);\n unpacked[117] = extractBits(val_10, 31u, 1u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_11, 0u, 2u), 1u, 2u); \n unpacked[118] = extractBits(val_11, 2u, 3u);\n unpacked[119] = extractBits(val_11, 5u, 3u);\n unpacked[120] = extractBits(val_11, 8u, 3u);\n unpacked[121] = extractBits(val_11, 11u, 3u);\n unpacked[122] = extractBits(val_11, 14u, 3u);\n unpacked[123] = extractBits(val_11, 17u, 3u);\n unpacked[124] = extractBits(val_11, 20u, 3u);\n unpacked[125] = extractBits(val_11, 23u, 3u);\n unpacked[126] = extractBits(val_11, 26u, 3u);\n unpacked[127] = extractBits(val_11, 29u, 3u);\n}\n"; +var unpackBlock128BitDepth5 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_5(packed_offset: u32) { \n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 5u);\n unpacked[1] = extractBits(val_0, 5u, 5u);\n unpacked[2] = extractBits(val_0, 10u, 5u);\n unpacked[3] = extractBits(val_0, 15u, 5u);\n unpacked[4] = extractBits(val_0, 20u, 5u);\n unpacked[5] = extractBits(val_0, 25u, 5u);\n unpacked[6] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[6] = insertBits(unpacked[6], extractBits(val_1, 0u, 3u), 2u, 3u); \n unpacked[7] = extractBits(val_1, 3u, 5u);\n unpacked[8] = extractBits(val_1, 8u, 5u);\n unpacked[9] = extractBits(val_1, 13u, 5u);\n unpacked[10] = extractBits(val_1, 18u, 5u);\n unpacked[11] = extractBits(val_1, 23u, 5u);\n unpacked[12] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[12] = insertBits(unpacked[12], extractBits(val_2, 0u, 1u), 4u, 1u);\n unpacked[13] = extractBits(val_2, 1u, 5u);\n unpacked[14] = extractBits(val_2, 6u, 5u);\n unpacked[15] = extractBits(val_2, 11u, 5u);\n unpacked[16] = extractBits(val_2, 16u, 5u);\n unpacked[17] = extractBits(val_2, 21u, 5u);\n unpacked[18] = extractBits(val_2, 26u, 5u);\n unpacked[19] = extractBits(val_2, 31u, 1u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[19] = insertBits(unpacked[19], extractBits(val_3, 0u, 4u), 1u, 4u);\n unpacked[20] = extractBits(val_3, 4u, 5u);\n unpacked[21] = extractBits(val_3, 9u, 5u);\n unpacked[22] = extractBits(val_3, 14u, 5u);\n unpacked[23] = extractBits(val_3, 19u, 5u);\n unpacked[24] = extractBits(val_3, 24u, 5u);\n unpacked[25] = extractBits(val_3, 29u, 3u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[25] = insertBits(unpacked[25], extractBits(val_4, 0u, 2u), 3u, 2u);\n unpacked[26] = extractBits(val_4, 2u, 5u);\n unpacked[27] = extractBits(val_4, 7u, 5u);\n unpacked[28] = extractBits(val_4, 12u, 5u);\n unpacked[29] = extractBits(val_4, 17u, 5u);\n unpacked[30] = extractBits(val_4, 22u, 5u);\n unpacked[31] = extractBits(val_4, 27u, 5u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[32] = extractBits(val_5, 0u, 5u);\n unpacked[33] = extractBits(val_5, 5u, 5u);\n unpacked[34] = extractBits(val_5, 10u, 5u);\n unpacked[35] = extractBits(val_5, 15u, 5u);\n unpacked[36] = extractBits(val_5, 20u, 5u);\n unpacked[37] = extractBits(val_5, 25u, 5u);\n unpacked[38] = extractBits(val_5, 30u, 2u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[38] = insertBits(unpacked[38], extractBits(val_6, 0u, 3u), 2u, 3u);\n unpacked[39] = extractBits(val_6, 3u, 5u);\n unpacked[40] = extractBits(val_6, 8u, 5u);\n unpacked[41] = extractBits(val_6, 13u, 5u);\n unpacked[42] = extractBits(val_6, 18u, 5u);\n unpacked[43] = extractBits(val_6, 23u, 5u);\n unpacked[44] = extractBits(val_6, 28u, 4u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[44] = insertBits(unpacked[44], extractBits(val_7, 0u, 1u), 4u, 1u);\n unpacked[45] = extractBits(val_7, 1u, 5u);\n unpacked[46] = extractBits(val_7, 6u, 5u);\n unpacked[47] = extractBits(val_7, 11u, 5u);\n unpacked[48] = extractBits(val_7, 16u, 5u);\n unpacked[49] = extractBits(val_7, 21u, 5u);\n unpacked[50] = extractBits(val_7, 26u, 5u);\n unpacked[51] = extractBits(val_7, 31u, 1u);\n\n let val_8 = blocks[packed_offset + 8];\n unpacked[51] = insertBits(unpacked[51], extractBits(val_8, 0u, 4u), 1u, 4u);\n unpacked[52] = extractBits(val_8, 4u, 5u);\n unpacked[53] = extractBits(val_8, 9u, 5u);\n unpacked[54] = extractBits(val_8, 14u, 5u);\n unpacked[55] = extractBits(val_8, 19u, 5u);\n unpacked[56] = extractBits(val_8, 24u, 5u);\n unpacked[57] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[57] = insertBits(unpacked[57], extractBits(val_9, 0u, 2u), 3u, 2u);\n unpacked[58] = extractBits(val_9, 2u, 5u);\n unpacked[59] = extractBits(val_9, 7u, 5u);\n unpacked[60] = extractBits(val_9, 12u, 5u);\n unpacked[61] = extractBits(val_9, 17u, 5u);\n unpacked[62] = extractBits(val_9, 22u, 5u);\n unpacked[63] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[64] = extractBits(val_10, 0u, 5u);\n unpacked[65] = extractBits(val_10, 5u, 5u);\n unpacked[66] = extractBits(val_10, 10u, 5u);\n unpacked[67] = extractBits(val_10, 15u, 5u);\n unpacked[68] = extractBits(val_10, 20u, 5u);\n unpacked[69] = extractBits(val_10, 25u, 5u);\n unpacked[70] = extractBits(val_10, 30u, 2u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[70] = insertBits(unpacked[70], extractBits(val_11, 0u, 3u), 2u, 3u);\n unpacked[71] = extractBits(val_11, 3u, 5u);\n unpacked[72] = extractBits(val_11, 8u, 5u);\n unpacked[73] = extractBits(val_11, 13u, 5u);\n unpacked[74] = extractBits(val_11, 18u, 5u);\n unpacked[75] = extractBits(val_11, 23u, 5u);\n unpacked[76] = extractBits(val_11, 28u, 4u);\n\n let val_12 = blocks[packed_offset + 12];\n unpacked[76] = insertBits(unpacked[76], extractBits(val_12, 0u, 1u), 4u, 1u);\n unpacked[77] = extractBits(val_12, 1u, 5u);\n unpacked[78] = extractBits(val_12, 6u, 5u);\n unpacked[79] = extractBits(val_12, 11u, 5u);\n unpacked[80] = extractBits(val_12, 16u, 5u);\n unpacked[81] = extractBits(val_12, 21u, 5u);\n unpacked[82] = extractBits(val_12, 26u, 5u);\n unpacked[83] = extractBits(val_12, 31u, 1u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[83] = insertBits(unpacked[83], extractBits(val_13, 0u, 4u), 1u, 4u);\n unpacked[84] = extractBits(val_13, 4u, 5u);\n unpacked[85] = extractBits(val_13, 9u, 5u);\n unpacked[86] = extractBits(val_13, 14u, 5u);\n unpacked[87] = extractBits(val_13, 19u, 5u);\n unpacked[88] = extractBits(val_13, 24u, 5u);\n unpacked[89] = extractBits(val_13, 29u, 3u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[89] = insertBits(unpacked[89], extractBits(val_14, 0u, 2u), 3u, 2u);\n unpacked[90] = extractBits(val_14, 2u, 5u);\n unpacked[91] = extractBits(val_14, 7u, 5u);\n unpacked[92] = extractBits(val_14, 12u, 5u);\n unpacked[93] = extractBits(val_14, 17u, 5u);\n unpacked[94] = extractBits(val_14, 22u, 5u);\n unpacked[95] = extractBits(val_14, 27u, 5u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[96] = extractBits(val_15, 0u, 5u);\n unpacked[97] = extractBits(val_15, 5u, 5u);\n unpacked[98] = extractBits(val_15, 10u, 5u);\n unpacked[99] = extractBits(val_15, 15u, 5u);\n unpacked[100] = extractBits(val_15, 20u, 5u);\n unpacked[101] = extractBits(val_15, 25u, 5u);\n unpacked[102] = extractBits(val_15, 30u, 2u);\n\n let val_16 = blocks[packed_offset + 16];\n unpacked[102] = insertBits(unpacked[102], extractBits(val_16, 0u, 3u), 2u, 3u);\n unpacked[103] = extractBits(val_16, 3u, 5u);\n unpacked[104] = extractBits(val_16, 8u, 5u);\n unpacked[105] = extractBits(val_16, 13u, 5u);\n unpacked[106] = extractBits(val_16, 18u, 5u);\n unpacked[107] = extractBits(val_16, 23u, 5u);\n unpacked[108] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[108] = insertBits(unpacked[108], extractBits(val_17, 0u, 1u), 4u, 1u);\n unpacked[109] = extractBits(val_17, 1u, 5u);\n unpacked[110] = extractBits(val_17, 6u, 5u);\n unpacked[111] = extractBits(val_17, 11u, 5u);\n unpacked[112] = extractBits(val_17, 16u, 5u);\n unpacked[113] = extractBits(val_17, 21u, 5u);\n unpacked[114] = extractBits(val_17, 26u, 5u);\n unpacked[115] = extractBits(val_17, 31u, 1u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[115] = insertBits(unpacked[115], extractBits(val_18, 0u, 4u), 1u, 4u);\n unpacked[116] = extractBits(val_18, 4u, 5u);\n unpacked[117] = extractBits(val_18, 9u, 5u);\n unpacked[118] = extractBits(val_18, 14u, 5u);\n unpacked[119] = extractBits(val_18, 19u, 5u);\n unpacked[120] = extractBits(val_18, 24u, 5u);\n unpacked[121] = extractBits(val_18, 29u, 3u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[121] = insertBits(unpacked[121], extractBits(val_19, 0u, 2u), 3u, 2u);\n unpacked[122] = extractBits(val_19, 2u, 5u);\n unpacked[123] = extractBits(val_19, 7u, 5u);\n unpacked[124] = extractBits(val_19, 12u, 5u);\n unpacked[125] = extractBits(val_19, 17u, 5u);\n unpacked[126] = extractBits(val_19, 22u, 5u);\n unpacked[127] = extractBits(val_19, 27u, 5u);\n}\n"; +var unpackBlock128BitDepth6 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_6(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 6u);\n unpacked[1] = extractBits(val_0, 6u, 6u);\n unpacked[2] = extractBits(val_0, 12u, 6u);\n unpacked[3] = extractBits(val_0, 18u, 6u);\n unpacked[4] = extractBits(val_0, 24u, 6u);\n unpacked[5] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[5] = insertBits(unpacked[5], extractBits(val_1, 0u, 4u), 2u, 4u);\n unpacked[6] = extractBits(val_1, 4u, 6u);\n unpacked[7] = extractBits(val_1, 10u, 6u);\n unpacked[8] = extractBits(val_1, 16u, 6u);\n unpacked[9] = extractBits(val_1, 22u, 6u);\n unpacked[10] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_2, 0u, 2u), 4u, 2u);\n unpacked[11] = extractBits(val_2, 2u, 6u);\n unpacked[12] = extractBits(val_2, 8u, 6u);\n unpacked[13] = extractBits(val_2, 14u, 6u);\n unpacked[14] = extractBits(val_2, 20u, 6u);\n unpacked[15] = extractBits(val_2, 26u, 6u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[16] = extractBits(val_3, 0u, 6u);\n unpacked[17] = extractBits(val_3, 6u, 6u);\n unpacked[18] = extractBits(val_3, 12u, 6u);\n unpacked[19] = extractBits(val_3, 18u, 6u);\n unpacked[20] = extractBits(val_3, 24u, 6u);\n unpacked[21] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_4, 0u, 4u), 2u, 4u);\n unpacked[22] = extractBits(val_4, 4u, 6u);\n unpacked[23] = extractBits(val_4, 10u, 6u);\n unpacked[24] = extractBits(val_4, 16u, 6u);\n unpacked[25] = extractBits(val_4, 22u, 6u);\n unpacked[26] = extractBits(val_4, 28u, 4u);\n\n let val_5 = blocks[packed_offset + 5];\n unpacked[26] = insertBits(unpacked[26], extractBits(val_5, 0u, 2u), 4u, 2u);\n unpacked[27] = extractBits(val_5, 2u, 6u);\n unpacked[28] = extractBits(val_5, 8u, 6u);\n unpacked[29] = extractBits(val_5, 14u, 6u);\n unpacked[30] = extractBits(val_5, 20u, 6u);\n unpacked[31] = extractBits(val_5, 26u, 6u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[32] = extractBits(val_6, 0u, 6u);\n unpacked[33] = extractBits(val_6, 6u, 6u);\n unpacked[34] = extractBits(val_6, 12u, 6u);\n unpacked[35] = extractBits(val_6, 18u, 6u);\n unpacked[36] = extractBits(val_6, 24u, 6u);\n unpacked[37] = extractBits(val_6, 30u, 2u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[37] = insertBits(unpacked[37], extractBits(val_7, 0u, 4u), 2u, 4u);\n unpacked[38] = extractBits(val_7, 4u, 6u);\n unpacked[39] = extractBits(val_7, 10u, 6u);\n unpacked[40] = extractBits(val_7, 16u, 6u);\n unpacked[41] = extractBits(val_7, 22u, 6u);\n unpacked[42] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_8, 0u, 2u), 4u, 2u);\n unpacked[43] = extractBits(val_8, 2u, 6u);\n unpacked[44] = extractBits(val_8, 8u, 6u);\n unpacked[45] = extractBits(val_8, 14u, 6u);\n unpacked[46] = extractBits(val_8, 20u, 6u);\n unpacked[47] = extractBits(val_8, 26u, 6u);\n\n let val_9 = blocks[packed_offset + 9];\n unpacked[48] = extractBits(val_9, 0u, 6u);\n unpacked[49] = extractBits(val_9, 6u, 6u);\n unpacked[50] = extractBits(val_9, 12u, 6u);\n unpacked[51] = extractBits(val_9, 18u, 6u);\n unpacked[52] = extractBits(val_9, 24u, 6u);\n unpacked[53] = extractBits(val_9, 30u, 2u);\n\n let val_10 = blocks[packed_offset + 10];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_10, 0u, 4u), 2u, 4u);\n unpacked[54] = extractBits(val_10, 4u, 6u);\n unpacked[55] = extractBits(val_10, 10u, 6u);\n unpacked[56] = extractBits(val_10, 16u, 6u);\n unpacked[57] = extractBits(val_10, 22u, 6u);\n unpacked[58] = extractBits(val_10, 28u, 4u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[58] = insertBits(unpacked[58], extractBits(val_11, 0u, 2u), 4u, 2u);\n unpacked[59] = extractBits(val_11, 2u, 6u);\n unpacked[60] = extractBits(val_11, 8u, 6u);\n unpacked[61] = extractBits(val_11, 14u, 6u);\n unpacked[62] = extractBits(val_11, 20u, 6u);\n unpacked[63] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[64] = extractBits(val_12, 0u, 6u);\n unpacked[65] = extractBits(val_12, 6u, 6u);\n unpacked[66] = extractBits(val_12, 12u, 6u);\n unpacked[67] = extractBits(val_12, 18u, 6u);\n unpacked[68] = extractBits(val_12, 24u, 6u);\n unpacked[69] = extractBits(val_12, 30u, 2u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[69] = insertBits(unpacked[69], extractBits(val_13, 0u, 4u), 2u, 4u);\n unpacked[70] = extractBits(val_13, 4u, 6u);\n unpacked[71] = extractBits(val_13, 10u, 6u);\n unpacked[72] = extractBits(val_13, 16u, 6u);\n unpacked[73] = extractBits(val_13, 22u, 6u);\n unpacked[74] = extractBits(val_13, 28u, 4u);\n\n let val_14 = blocks[packed_offset + 14];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_14, 0u, 2u), 4u, 2u);\n unpacked[75] = extractBits(val_14, 2u, 6u);\n unpacked[76] = extractBits(val_14, 8u, 6u);\n unpacked[77] = extractBits(val_14, 14u, 6u);\n unpacked[78] = extractBits(val_14, 20u, 6u);\n unpacked[79] = extractBits(val_14, 26u, 6u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[80] = extractBits(val_15, 0u, 6u);\n unpacked[81] = extractBits(val_15, 6u, 6u);\n unpacked[82] = extractBits(val_15, 12u, 6u);\n unpacked[83] = extractBits(val_15, 18u, 6u);\n unpacked[84] = extractBits(val_15, 24u, 6u);\n unpacked[85] = extractBits(val_15, 30u, 2u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_16, 0u, 4u), 2u, 4u);\n unpacked[86] = extractBits(val_16, 4u, 6u);\n unpacked[87] = extractBits(val_16, 10u, 6u);\n unpacked[88] = extractBits(val_16, 16u, 6u);\n unpacked[89] = extractBits(val_16, 22u, 6u);\n unpacked[90] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[90] = insertBits(unpacked[90], extractBits(val_17, 0u, 2u), 4u, 2u);\n unpacked[91] = extractBits(val_17, 2u, 6u);\n unpacked[92] = extractBits(val_17, 8u, 6u);\n unpacked[93] = extractBits(val_17, 14u, 6u);\n unpacked[94] = extractBits(val_17, 20u, 6u);\n unpacked[95] = extractBits(val_17, 26u, 6u);\n\n let val_18 = blocks[packed_offset + 18];\n unpacked[96] = extractBits(val_18, 0u, 6u);\n unpacked[97] = extractBits(val_18, 6u, 6u);\n unpacked[98] = extractBits(val_18, 12u, 6u);\n unpacked[99] = extractBits(val_18, 18u, 6u);\n unpacked[100] = extractBits(val_18, 24u, 6u);\n unpacked[101] = extractBits(val_18, 30u, 2u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[101] = insertBits(unpacked[101], extractBits(val_19, 0u, 4u), 2u, 4u);\n unpacked[102] = extractBits(val_19, 4u, 6u);\n unpacked[103] = extractBits(val_19, 10u, 6u);\n unpacked[104] = extractBits(val_19, 16u, 6u);\n unpacked[105] = extractBits(val_19, 22u, 6u);\n unpacked[106] = extractBits(val_19, 28u, 4u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_20, 0u, 2u), 4u, 2u);\n unpacked[107] = extractBits(val_20, 2u, 6u);\n unpacked[108] = extractBits(val_20, 8u, 6u);\n unpacked[109] = extractBits(val_20, 14u, 6u);\n unpacked[110] = extractBits(val_20, 20u, 6u);\n unpacked[111] = extractBits(val_20, 26u, 6u);\n\n let val_21 = blocks[packed_offset + 21];\n unpacked[112] = extractBits(val_21, 0u, 6u);\n unpacked[113] = extractBits(val_21, 6u, 6u);\n unpacked[114] = extractBits(val_21, 12u, 6u);\n unpacked[115] = extractBits(val_21, 18u, 6u);\n unpacked[116] = extractBits(val_21, 24u, 6u);\n unpacked[117] = extractBits(val_21, 30u, 2u);\n\n let val_22 = blocks[packed_offset + 22];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_22, 0u, 4u), 2u, 4u);\n unpacked[118] = extractBits(val_22, 4u, 6u);\n unpacked[119] = extractBits(val_22, 10u, 6u);\n unpacked[120] = extractBits(val_22, 16u, 6u);\n unpacked[121] = extractBits(val_22, 22u, 6u);\n unpacked[122] = extractBits(val_22, 28u, 4u);\n\n let val_23 = blocks[packed_offset + 23];\n unpacked[122] = insertBits(unpacked[122], extractBits(val_23, 0u, 2u), 4u, 2u);\n unpacked[123] = extractBits(val_23, 2u, 6u);\n unpacked[124] = extractBits(val_23, 8u, 6u);\n unpacked[125] = extractBits(val_23, 14u, 6u);\n unpacked[126] = extractBits(val_23, 20u, 6u);\n unpacked[127] = extractBits(val_23, 26u, 6u);\n}\n"; +var unpackBlock128BitDepth7 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_7(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 7u);\n unpacked[1] = extractBits(val_0, 7u, 7u);\n unpacked[2] = extractBits(val_0, 14u, 7u);\n unpacked[3] = extractBits(val_0, 21u, 7u);\n unpacked[4] = extractBits(val_0, 28u, 4u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[4] = insertBits(unpacked[4], extractBits(val_1, 0u, 3u), 4u, 3u);\n unpacked[5] = extractBits(val_1, 3u, 7u);\n unpacked[6] = extractBits(val_1, 10u, 7u);\n unpacked[7] = extractBits(val_1, 17u, 7u);\n unpacked[8] = extractBits(val_1, 24u, 7u);\n unpacked[9] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[9] = insertBits(unpacked[9], extractBits(val_2, 0u, 6u), 1u, 6u);\n unpacked[10] = extractBits(val_2, 6u, 7u);\n unpacked[11] = extractBits(val_2, 13u, 7u);\n unpacked[12] = extractBits(val_2, 20u, 7u);\n unpacked[13] = extractBits(val_2, 27u, 5u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[13] = insertBits(unpacked[13], extractBits(val_3, 0u, 2u), 5u, 2u);\n unpacked[14] = extractBits(val_3, 2u, 7u);\n unpacked[15] = extractBits(val_3, 9u, 7u);\n unpacked[16] = extractBits(val_3, 16u, 7u);\n unpacked[17] = extractBits(val_3, 23u, 7u);\n unpacked[18] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[18] = insertBits(unpacked[18], extractBits(val_4, 0u, 5u), 2u, 5u);\n unpacked[19] = extractBits(val_4, 5u, 7u);\n unpacked[20] = extractBits(val_4, 12u, 7u);\n unpacked[21] = extractBits(val_4, 19u, 7u);\n unpacked[22] = extractBits(val_4, 26u, 6u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[22] = insertBits(unpacked[22], extractBits(val_5, 0u, 1u), 6u, 1u);\n unpacked[23] = extractBits(val_5, 1u, 7u);\n unpacked[24] = extractBits(val_5, 8u, 7u);\n unpacked[25] = extractBits(val_5, 15u, 7u);\n unpacked[26] = extractBits(val_5, 22u, 7u);\n unpacked[27] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[27] = insertBits(unpacked[27], extractBits(val_6, 0u, 4u), 3u, 4u);\n unpacked[28] = extractBits(val_6, 4u, 7u);\n unpacked[29] = extractBits(val_6, 11u, 7u);\n unpacked[30] = extractBits(val_6, 18u, 7u);\n unpacked[31] = extractBits(val_6, 25u, 7u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[32] = extractBits(val_7, 0u, 7u);\n unpacked[33] = extractBits(val_7, 7u, 7u);\n unpacked[34] = extractBits(val_7, 14u, 7u);\n unpacked[35] = extractBits(val_7, 21u, 7u);\n unpacked[36] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[36] = insertBits(unpacked[36], extractBits(val_8, 0u, 3u), 4u, 3u);\n unpacked[37] = extractBits(val_8, 3u, 7u);\n unpacked[38] = extractBits(val_8, 10u, 7u);\n unpacked[39] = extractBits(val_8, 17u, 7u);\n unpacked[40] = extractBits(val_8, 24u, 7u);\n unpacked[41] = extractBits(val_8, 31u, 1u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[41] = insertBits(unpacked[41], extractBits(val_9, 0u, 6u), 1u, 6u);\n unpacked[42] = extractBits(val_9, 6u, 7u);\n unpacked[43] = extractBits(val_9, 13u, 7u);\n unpacked[44] = extractBits(val_9, 20u, 7u);\n unpacked[45] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[45] = insertBits(unpacked[45], extractBits(val_10, 0u, 2u), 5u, 2u);\n unpacked[46] = extractBits(val_10, 2u, 7u);\n unpacked[47] = extractBits(val_10, 9u, 7u);\n unpacked[48] = extractBits(val_10, 16u, 7u);\n unpacked[49] = extractBits(val_10, 23u, 7u);\n unpacked[50] = extractBits(val_10, 30u, 2u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[50] = insertBits(unpacked[50], extractBits(val_11, 0u, 5u), 2u, 5u);\n unpacked[51] = extractBits(val_11, 5u, 7u);\n unpacked[52] = extractBits(val_11, 12u, 7u);\n unpacked[53] = extractBits(val_11, 19u, 7u);\n unpacked[54] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[54] = insertBits(unpacked[54], extractBits(val_12, 0u, 1u), 6u, 1u);\n unpacked[55] = extractBits(val_12, 1u, 7u);\n unpacked[56] = extractBits(val_12, 8u, 7u);\n unpacked[57] = extractBits(val_12, 15u, 7u);\n unpacked[58] = extractBits(val_12, 22u, 7u);\n unpacked[59] = extractBits(val_12, 29u, 3u);\n \n let val_13 = blocks[packed_offset + 13];\n unpacked[59] = insertBits(unpacked[59], extractBits(val_13, 0u, 4u), 3u, 4u);\n unpacked[60] = extractBits(val_13, 4u, 7u);\n unpacked[61] = extractBits(val_13, 11u, 7u);\n unpacked[62] = extractBits(val_13, 18u, 7u);\n unpacked[63] = extractBits(val_13, 25u, 7u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[64] = extractBits(val_14, 0u, 7u);\n unpacked[65] = extractBits(val_14, 7u, 7u);\n unpacked[66] = extractBits(val_14, 14u, 7u);\n unpacked[67] = extractBits(val_14, 21u, 7u);\n unpacked[68] = extractBits(val_14, 28u, 4u);\n \n let val_15 = blocks[packed_offset + 15];\n unpacked[68] = insertBits(unpacked[68], extractBits(val_15, 0u, 3u), 4u, 3u);\n unpacked[69] = extractBits(val_15, 3u, 7u);\n unpacked[70] = extractBits(val_15, 10u, 7u);\n unpacked[71] = extractBits(val_15, 17u, 7u);\n unpacked[72] = extractBits(val_15, 24u, 7u);\n unpacked[73] = extractBits(val_15, 31u, 1u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[73] = insertBits(unpacked[73], extractBits(val_16, 0u, 6u), 1u, 6u);\n unpacked[74] = extractBits(val_16, 6u, 7u);\n unpacked[75] = extractBits(val_16, 13u, 7u);\n unpacked[76] = extractBits(val_16, 20u, 7u);\n unpacked[77] = extractBits(val_16, 27u, 5u);\n \n let val_17 = blocks[packed_offset + 17];\n unpacked[77] = insertBits(unpacked[77], extractBits(val_17, 0u, 2u), 5u, 2u);\n unpacked[78] = extractBits(val_17, 2u, 7u);\n unpacked[79] = extractBits(val_17, 9u, 7u);\n unpacked[80] = extractBits(val_17, 16u, 7u);\n unpacked[81] = extractBits(val_17, 23u, 7u);\n unpacked[82] = extractBits(val_17, 30u, 2u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[82] = insertBits(unpacked[82], extractBits(val_18, 0u, 5u), 2u, 5u);\n unpacked[83] = extractBits(val_18, 5u, 7u);\n unpacked[84] = extractBits(val_18, 12u, 7u);\n unpacked[85] = extractBits(val_18, 19u, 7u);\n unpacked[86] = extractBits(val_18, 26u, 6u);\n \n let val_19 = blocks[packed_offset + 19];\n unpacked[86] = insertBits(unpacked[86], extractBits(val_19, 0u, 1u), 6u, 1u);\n unpacked[87] = extractBits(val_19, 1u, 7u);\n unpacked[88] = extractBits(val_19, 8u, 7u);\n unpacked[89] = extractBits(val_19, 15u, 7u);\n unpacked[90] = extractBits(val_19, 22u, 7u);\n unpacked[91] = extractBits(val_19, 29u, 3u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[91] = insertBits(unpacked[91], extractBits(val_20, 0u, 4u), 3u, 4u);\n unpacked[92] = extractBits(val_20, 4u, 7u);\n unpacked[93] = extractBits(val_20, 11u, 7u);\n unpacked[94] = extractBits(val_20, 18u, 7u);\n unpacked[95] = extractBits(val_20, 25u, 7u);\n \n let val_21 = blocks[packed_offset + 21];\n unpacked[96] = extractBits(val_21, 0u, 7u);\n unpacked[97] = extractBits(val_21, 7u, 7u);\n unpacked[98] = extractBits(val_21, 14u, 7u);\n unpacked[99] = extractBits(val_21, 21u, 7u);\n unpacked[100] = extractBits(val_21, 28u, 4u);\n \n let val_22 = blocks[packed_offset + 22];\n unpacked[100] = insertBits(unpacked[100], extractBits(val_22, 0u, 3u), 4u, 3u);\n unpacked[101] = extractBits(val_22, 3u, 7u);\n unpacked[102] = extractBits(val_22, 10u, 7u);\n unpacked[103] = extractBits(val_22, 17u, 7u);\n unpacked[104] = extractBits(val_22, 24u, 7u);\n unpacked[105] = extractBits(val_22, 31u, 1u);\n \n let val_23 = blocks[packed_offset + 23];\n unpacked[105] = insertBits(unpacked[105], extractBits(val_23, 0u, 6u), 1u, 6u);\n unpacked[106] = extractBits(val_23, 6u, 7u);\n unpacked[107] = extractBits(val_23, 13u, 7u);\n unpacked[108] = extractBits(val_23, 20u, 7u);\n unpacked[109] = extractBits(val_23, 27u, 5u);\n \n let val_24 = blocks[packed_offset + 24];\n unpacked[109] = insertBits(unpacked[109], extractBits(val_24, 0u, 2u), 5u, 2u);\n unpacked[110] = extractBits(val_24, 2u, 7u);\n unpacked[111] = extractBits(val_24, 9u, 7u);\n unpacked[112] = extractBits(val_24, 16u, 7u);\n unpacked[113] = extractBits(val_24, 23u, 7u);\n unpacked[114] = extractBits(val_24, 30u, 2u);\n \n let val_25 = blocks[packed_offset + 25];\n unpacked[114] = insertBits(unpacked[114], extractBits(val_25, 0u, 5u), 2u, 5u);\n unpacked[115] = extractBits(val_25, 5u, 7u);\n unpacked[116] = extractBits(val_25, 12u, 7u);\n unpacked[117] = extractBits(val_25, 19u, 7u);\n unpacked[118] = extractBits(val_25, 26u, 6u);\n \n let val_26 = blocks[packed_offset + 26];\n unpacked[118] = insertBits(unpacked[118], extractBits(val_26, 0u, 1u), 6u, 1u);\n unpacked[119] = extractBits(val_26, 1u, 7u);\n unpacked[120] = extractBits(val_26, 8u, 7u);\n unpacked[121] = extractBits(val_26, 15u, 7u);\n unpacked[122] = extractBits(val_26, 22u, 7u);\n unpacked[123] = extractBits(val_26, 29u, 3u);\n \n let val_27 = blocks[packed_offset + 27];\n unpacked[123] = insertBits(unpacked[123], extractBits(val_27, 0u, 4u), 3u, 4u);\n unpacked[124] = extractBits(val_27, 4u, 7u);\n unpacked[125] = extractBits(val_27, 11u, 7u);\n unpacked[126] = extractBits(val_27, 18u, 7u);\n unpacked[127] = extractBits(val_27, 25u, 7u); \n}\n"; +var fromFP510Function = "\n\nconst exponents: array = array(\n 2.9103830456733704e-11, \n 5.820766091346741e-11, \n 1.1641532182693481e-10, \n 2.3283064365386963e-10,\n 4.656612873077393e-10, \n 9.313225746154785e-10, \n 1.862645149230957e-09, \n 3.725290298461914e-09,\n 7.450580596923828e-09, \n 1.4901161193847656e-08, \n 2.9802322387695312e-08, \n 5.960464477539063e-08,\n 1.1920928955078125e-07, \n 2.384185791015625e-07, \n 4.76837158203125e-07, \n 9.5367431640625e-07,\n 1.9073486328125e-06, \n 3.814697265625e-06, \n 7.62939453125e-06, \n 1.52587890625e-05, \n 3.0517578125e-05,\n 6.103515625e-05, \n 0.0001220703125, \n 0.000244140625, \n 0.00048828125, \n 0.0009765625, \n 0.001953125, \n 0.00390625,\n 0.0078125, \n 0.015625, \n 0.03125, \n 0.0625);\n\nfn from_fp510(x: u32) -> f32 {\n let exponent = f32(exponents[extractBits(x, 10u, 5u)]); \n let fractional = f32(extractBits(x, 0u, 10u)); \n let abs = exponent * fractional;\n return abs * (1.0 - (2.0 * f32(extractBits(x, 15u, 1u))));\n}\n"; + +var preprocessBlocks3BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth3, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 12u); \n unpack_block_128_bit_depth_3(blocks_start); \n \n let b01: u32 = blocks_start;\n let b2: u32 = blocks_start + 8u;\n \n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n\n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), (r * 16u) % 32u, 16u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), (r * 8u) % 32u, 8u); \n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n \n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), ((r * 16u) % 32u) + (2u * c), 2u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), ((r * 8u) % 32u) + c, 1u); \n }\n }\n}\n\n").concat(emptyShader); +var preprocessBlocks5BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth5, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n\n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 20u);\n unpack_block_128_bit_depth_5(blocks_start); \n \n let b03: u32 = blocks_start;\n let b4: u32 = blocks_start + 16u;\n\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), (r * 8u) % 32u, 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n } \n}\n\n").concat(emptyShader); +var preprocessBlocks6BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth6, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 24u);\n unpack_block_128_bit_depth_6(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u; \n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n }\n\n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n }\n }\n}\n\n").concat(emptyShader); +var preprocessBlocks7BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth7, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 28u);\n unpack_block_128_bit_depth_7(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u;\n let b6: u32 = blocks_start + 24u; \n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u), 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n }\n}\n\n").concat(emptyShader); +var preprocessShaderSources = { + 3: preprocessBlocks3BitShaderSource, + 5: preprocessBlocks5BitShaderSource, + 6: preprocessBlocks6BitShaderSource, + 7: preprocessBlocks7BitShaderSource +}; +var preprocessShaderNames = { + 3: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_3bit_shader", + 5: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_5bit_shader", + 6: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_6bit_shader", + 7: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_7bit_shader" +}; + +var BM = 8; +var BN = 32; +var TM = 2; +var TN = 16; +var TC = rowsPerBlock * BM * BN / (TM * TN); +var constantSnippet = "\nconst BM = ".concat(BM, "u;\nconst BN = ").concat(BN, "u;\n\nconst TM = ").concat(TM, "u;\nconst TN = ").concat(TN, "u;\n\nconst TC = ").concat(TC, "u;\n\nconst ROW_PER_BLOCK = ").concat(rowsPerBlock, "u;\nconst COL_PER_BLOCK = ").concat(columnsPerBlock, "u;\n\nconst VEC_COL_PER_BLOCK = COL_PER_BLOCK / 4u;\n\nconst block_size: u32 = (COL_PER_BLOCK * ROW_PER_BLOCK * bit_depth) / 32u;\n\n"); +var forwardMultipleInputArgsSnippet = "\nstruct argsStruct {\n n: u32,\n m: u32,\n total_nbc: u32,\n k: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array;\n"; +var forwardMultipleSharedPrivateMemSnippet = "\nvar shared_x: array, BN * VEC_COL_PER_BLOCK>;\nvar shared_ab: array;\nvar shared_w: array, BM * ROW_PER_BLOCK * VEC_COL_PER_BLOCK>;\n\nvar local_x: array, TN * VEC_COL_PER_BLOCK>;\nvar local_x_sums: array;\nvar local_results: array;\n"; +var forwardMultipleLocalVarSnippet = "\n let tid = local_id.x;\n let bm_idx = workgroup_id.x;\n let bn_idx = workgroup_id.y;\n\n let local_bm_idx = bm_idx * BM;\n let local_bn_idx = bn_idx * BN;\n \n let n_idx = tid % (BN / TN);\n let k_idx = tid / (BN / TN) / (BM * ROW_PER_BLOCK / TM);\n let m_idx = tid / (BN / TN) % (BM * ROW_PER_BLOCK / TM);\n"; +var forwardMultipleLoadW1Bit = "\n let b0 = blocks[src + (row / 4u)];\n\n let b0_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b0_offset = b0_offset_base + (c * 4u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b0, b0_offset, 1u)), \n f32(extractBits(b0, b0_offset + 1, 1u)),\n f32(extractBits(b0, b0_offset + 2, 1u)),\n f32(extractBits(b0, b0_offset + 3, 1u)));\n }\n"; +var forwardMultipleLoadW2Bit = "\n let b01 = blocks[src + (row / 2u)];\n \n let b01_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b01_offset = b01_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(extractBits(b01, b01_offset, 2u)), \n f32(extractBits(b01, b01_offset + 2, 2u)),\n f32(extractBits(b01, b01_offset + 4, 2u)),\n f32(extractBits(b01, b01_offset + 6, 2u)));\n } \n"; +var forwardMultipleLoadW3Bit = "\n let b01 = blocks[src + (row / 2u)];\n let b2 = blocks[src + 8u + (row / 4u)]; \n\n let b01_offset_base = (row * 16u) % 32u;\n let b2_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b01_offset = b01_offset_base + (c * 8u);\n let b2_offset = b2_offset_base + (c * 4u);\n\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b01, b01_offset, 2u), extractBits(b2, b2_offset, 1u), 2u, 1u)), \n f32(insertBits(extractBits(b01, b01_offset + 2, 2u), extractBits(b2, b2_offset + 1, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 4, 2u), extractBits(b2, b2_offset + 2, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 6, 2u), extractBits(b2, b2_offset + 3, 1u), 2u, 1u))); \n }\n"; +var forwardMultipleLoadW4Bit = "\n let b03 = blocks[src + row];\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b03, b03_offset, 4u)), \n f32(extractBits(b03, b03_offset + 4, 4u)),\n f32(extractBits(b03, b03_offset + 8, 4u)),\n f32(extractBits(b03, b03_offset + 12, 4u)));\n }\n"; +var forwardMultipleLoadW5Bit = "\n let b03 = blocks[src + row];\n let b4 = blocks[src + 16u + (row / 4u)];\n \n let b4_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b4_offset = b4_offset_base + (c * 4u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b4, b4_offset, 1u), 4u, 1u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b4, b4_offset + 1, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b4, b4_offset + 2, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b4, b4_offset + 3, 1u), 4u, 1u)));\n }\n"; +var forwardMultipleLoadW6Bit = "\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u)));\n }\n"; +var forwardMultipleLoadW7Bit = "\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n let b6 = blocks[src + 24u + (row / 4u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n let b6_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n let b6_offset = b6_offset_base + (c * 4u);\n \n shared_w[dst + c] = vec4(\n f32(insertBits(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u), extractBits(b6, b6_offset, 1u), 6u, 1u)), \n f32(insertBits(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u), extractBits(b6, b6_offset + 1, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u), extractBits(b6, b6_offset + 2, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u), extractBits(b6, b6_offset + 3, 1u), 6u, 1u)));\n }\n"; +var forwardMultipleLoadW8Bit = "\n let b07_offset = src + (row * 2);\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b07 = blocks[b07_offset + c];\n shared_w[dst + c] = vec4(\n f32(extractBits(b07, 0u, 8u)), \n f32(extractBits(b07, 8u, 8u)),\n f32(extractBits(b07, 16u, 8u)),\n f32(extractBits(b07, 24u, 8u)));\n }\n"; +var forwardLoadWBitDepthSnippets = { + 1: forwardMultipleLoadW1Bit, + 2: forwardMultipleLoadW2Bit, + 3: forwardMultipleLoadW3Bit, + 4: forwardMultipleLoadW4Bit, + 5: forwardMultipleLoadW5Bit, + 6: forwardMultipleLoadW6Bit, + 7: forwardMultipleLoadW7Bit, + 8: forwardMultipleLoadW8Bit +}; +var forwardMultipleLoadXSnippet = "\n let total_work_x = VEC_COL_PER_BLOCK * BN;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_x, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_x) { \n let n_load_idx = local_bn_idx + idx / VEC_COL_PER_BLOCK;\n let inner_idx = idx % VEC_COL_PER_BLOCK;\n \n if (bk_idx < args.k && n_load_idx < args.n) { \n let x_idx = (args.x_offset / 4u) + ((bk_idx * args.n + n_load_idx) * VEC_COL_PER_BLOCK + inner_idx); \n shared_x[idx] = x[x_idx];\n } else {\n shared_x[idx] = vec4(0.0);\n }\n }\n }\n"; +var forwardMultipleLoadABSnippet = "\n let total_work_ab = BM * 2;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_ab, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_ab) {\n let m_load_idx = local_bm_idx + idx / 2; \n let inner_idx = (idx % 2) * 16u;\n \n if (m_load_idx < args.m && bk_idx < args.k) {\n let ab_bits = extractBits(metas[args.metas_offset + (m_load_idx * args.k + bk_idx)], inner_idx, 16u);\n shared_ab[idx] = from_fp510(ab_bits); \n } else {\n shared_ab[idx] = 0.0;\n }\n }\n }\n"; +var forwardMultipleLoadWSnippet = function forwardMultipleLoadWSnippet(bitDepth) { + return "\n let total_work_w = BM * ROW_PER_BLOCK;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_w, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_w) {\n let m_load_idx = local_bm_idx + idx / ROW_PER_BLOCK;\n let row = idx % ROW_PER_BLOCK;\n let dst = idx * VEC_COL_PER_BLOCK;\n\n if (m_load_idx < args.m) {\n let src = args.blocks_offset + (m_load_idx * args.k + bk_idx) * block_size;\n ".concat(forwardLoadWBitDepthSnippets[bitDepth], "\n } else { \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n shared_w[dst + c] = vec4(0.0);\n }\n }\n }\n }\n"); +}; +var forwardMultipleCopyXSnippet = "\nfor (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n var x_sum_vec = vec4(0.0); \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n let shared_x_idx = (n_idx * TN + tn_idx) * VEC_COL_PER_BLOCK + (k_idx * VEC_COL_PER_BLOCK);\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n local_x[local_x_idx + c] = shared_x[shared_x_idx + c];\n x_sum_vec += local_x[local_x_idx + c];\n }\n local_x_sums[tn_idx] = x_sum_vec.x + x_sum_vec.y + x_sum_vec.z + x_sum_vec.w; \n}\n"; +var forwardMultipleComputeResultsSnippet = "\n for (var tm_idx = 0u; tm_idx < TM; tm_idx++) { \n let shared_ab_idx = ((m_idx * TM + tm_idx) / ROW_PER_BLOCK + k_idx) * 2;\n let alpha = shared_ab[shared_ab_idx];\n let beta = shared_ab[shared_ab_idx + 1]; \n let shared_w_idx = ((m_idx * TM + tm_idx) + k_idx) * VEC_COL_PER_BLOCK;\n \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n \n var swx_vec = vec4(0.0); \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n swx_vec += shared_w[shared_w_idx + c] * local_x[local_x_idx + c];\n }\n let swx = swx_vec.x + swx_vec.y + swx_vec.z + swx_vec.w;\n \n let kappa = alpha * local_x_sums[tn_idx]; \n let results_idx = tm_idx * TN + tn_idx;\n local_results[results_idx] += kappa + (beta * swx);\n }\n }\n"; +var forwardMultipleWriteResultsSnippet = "\nfor (var tm_idx = 0u; tm_idx < TM; tm_idx++) {\n let row = local_bm_idx * ROW_PER_BLOCK + (m_idx * TM + tm_idx); \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let col = local_bn_idx + (n_idx * TN + tn_idx);\n if (row < args.m * ROW_PER_BLOCK && col < args.n) {\n let y_idx = args.y_offset + ((row / ROW_PER_BLOCK) * args.n + col) * ROW_PER_BLOCK + (row % ROW_PER_BLOCK);\n let results_idx = tm_idx * TN + tn_idx;\n \n y[y_idx] += local_results[results_idx];\n }\n }\n}\n"; +var forwardMultipleShaderSources = function forwardMultipleShaderSources(bitDepth) { + return "\n\n".concat(forwardMultipleInputArgsSnippet, "\n\n").concat(constantSnippet, "\n\n").concat(forwardMultipleSharedPrivateMemSnippet, "\n\n").concat(fromFP510Function, "\n\n").concat(dividePadFunction, "\n\nconst bit_depth: u32 = ").concat(bitDepth, "u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n \n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n ").concat(forwardMultipleLocalVarSnippet, "\n \n for (var bk_idx = 0u; bk_idx < args.k; bk_idx++) { \n ").concat(forwardMultipleLoadXSnippet, "\n ").concat(forwardMultipleLoadABSnippet, " \n ").concat(forwardMultipleLoadWSnippet(bitDepth), " \n workgroupBarrier();\n \n ").concat(forwardMultipleCopyXSnippet, "\n ").concat(forwardMultipleComputeResultsSnippet, "\n workgroupBarrier();\n }\n \n ").concat(forwardMultipleWriteResultsSnippet, "\n}\n\n").concat(emptyShader, "\n"); +}; + +var forwardShuffleXShaderSource = "\nstruct argsStruct {\n n: u32,\n shape1: u32,\n x_offset: u32,\n indices_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar indices: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape1) {\n return;\n } \n\n let b = global_id.x;\n let i = global_id.y;\n \n let c = i / 8u;\n let j = i % 8u;\n y[((c * args.n) + b) * 8 + j] = x[args.x_offset + (b * args.shape1) + indices[args.indices_offset + i]];\n}\n\n".concat(emptyShader, "\n"); +var forwardSingleReduceYShaderSource = "\nstruct argsStruct {\n nvr: u32,\n nbc: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar y: array>;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x > args.nvr) {\n return;\n }\n\n let x_start = global_id.x * args.nbc;\n var sum: vec4 = vec4(0.0, 0.0, 0.0, 0.0);\n for (var i = 0u; i < args.nbc; i++) {\n sum += x[x_start + i]; \n }\n y[global_id.x] += sum;\n}\n\n".concat(emptyShader); +var forwardShuffleYShaderSource = "\nstruct argsStruct {\n n: u32,\n shape0: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape0) {\n return;\n } \n \n let b = global_id.x;\n let i = global_id.y;\n \n let r = i / 16u;\n let j = i % 16u;\n y[(b * args.shape0) + (r * 16) + j] = x[(((r * args.n) + b) * 16) + j];\n}\n\n".concat(emptyShader); +var addBiasShaderSource = "\nstruct argsStruct {\n dimension: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar bias: array;\n\n@group(0) @binding(2)\nvar y: array;\n\n".concat(fromFP510Function, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n y[(global_id.x * args.dimension) + global_id.y] += bias[global_id.y];\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth1ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 4u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b0_start = row_blocks_start + br_offset + (bc * block_size);\n var b0_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b0 = blocks[b0_start];\n \n let w0_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w0_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w0_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w0_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 8u;\n \n let w1_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w1_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w1_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w1_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 16u;\n \n let w2_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w2_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w2_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w2_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 24u;\n \n let w3_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w3_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w3_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w3_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth2ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 8u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b01_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b01 = blocks[b01_start];\n \n let w0_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w0_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w0_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w0_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w1_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w1_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w1_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w1_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u; \n b01 = blocks[b01_start + 1u];\n \n let w2_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w2_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w2_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w2_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w3_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w3_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w3_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w3_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth3ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 12u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id : vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n\n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b2_start = row_blocks_start + br_offset + (bc * block_size) + 8u;\n var b01_offset = 0u;\n var b2_offset = 0u;\n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) { \n \n var b01 = blocks[b01_start];\n var b2 = blocks[b2_start];\n \n var b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n var b2_bit = extractBits(b2, b2_offset + j, 1u);\n let w0_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 8u;\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w1_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 16u; \n b01 = blocks[b01_start + 1u];\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w2_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 24u; \n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w3_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 0u; \n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n \n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth4ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 16u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start]; \n \n let w0_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w0_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w0_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w0_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 1];\n \n let w1_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w1_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w1_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w1_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 2];\n \n let w2_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w2_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w2_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w2_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 3];\n \n let w3_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w3_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w3_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w3_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth5ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 20u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b4_start = row_blocks_start + br_offset + (bc * block_size) + 16u;\n \n var b4_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start];\n var b4 = blocks[b4_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w0_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 1];\n b4_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w1_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 2];\n b4_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w2_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 3];\n b4_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w3_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b4_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth6ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 24u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n \n var b45_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w0_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w0_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w0_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w0_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w1_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w1_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w1_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w1_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u)); \n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w2_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w2_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w2_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w2_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w3_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w3_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w3_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w3_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b45_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth7ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 28u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n var b6_start = row_blocks_start + br_offset + (bc * block_size) + 24u;\n \n var b45_offset = 0u;\n var b6_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n var b6 = blocks[b6_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n var b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w0_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w0_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w0_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w0_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n b6_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w1_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w1_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w1_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w1_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n b6_offset = 16u;\n\n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w2_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w2_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w2_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w2_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n b6_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w3_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w3_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w3_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w3_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b45_offset = 0u;\n b6_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleBitDepth8ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 32u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b07_start = row_blocks_start + (br_offset * 8u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < 2; j++) {\n \n var b07 = blocks[b07_start + j];\n \n let w0_0 = f32(extractBits(b07, 0u, 8u)); \n let w0_1 = f32(extractBits(b07, 8u, 8u)); \n let w0_2 = f32(extractBits(b07, 16u, 8u));\n let w0_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 2 + j];\n \n let w1_0 = f32(extractBits(b07, 0u, 8u)); \n let w1_1 = f32(extractBits(b07, 8u, 8u)); \n let w1_2 = f32(extractBits(b07, 16u, 8u));\n let w1_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 4 + j];\n \n let w2_0 = f32(extractBits(b07, 0u, 8u)); \n let w2_1 = f32(extractBits(b07, 8u, 8u)); \n let w2_2 = f32(extractBits(b07, 16u, 8u));\n let w2_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 6 + j];\n \n let w3_0 = f32(extractBits(b07, 0u, 8u)); \n let w3_1 = f32(extractBits(b07, 8u, 8u)); \n let w3_2 = f32(extractBits(b07, 16u, 8u));\n let w3_3 = f32(extractBits(b07, 24u, 8u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_vec = x[x_start + j];\n res[j] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); +var forwardSingleShaderSources = { + 1: forwardSingleBitDepth1ShaderSource, + 2: forwardSingleBitDepth2ShaderSource, + 3: forwardSingleBitDepth3ShaderSource, + 4: forwardSingleBitDepth4ShaderSource, + 5: forwardSingleBitDepth5ShaderSource, + 6: forwardSingleBitDepth6ShaderSource, + 7: forwardSingleBitDepth7ShaderSource, + 8: forwardSingleBitDepth8ShaderSource +}; +var forwardSingleShaderNames = { + 1: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_1_shader", + 2: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_2_shader", + 3: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_3_shader", + 4: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_4_shader", + 5: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_5_shader", + 6: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_6_shader", + 7: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_7_shader", + 8: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_8_shader" +}; +var forwardShaderSources = { + 1: forwardMultipleShaderSources(1), + 2: forwardMultipleShaderSources(2), + 3: forwardMultipleShaderSources(3), + 4: forwardMultipleShaderSources(4), + 5: forwardMultipleShaderSources(5), + 6: forwardMultipleShaderSources(6), + 7: forwardMultipleShaderSources(7), + 8: forwardMultipleShaderSources(8) +}; +var forwardShaderNames = { + 1: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_1_shader", + 2: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_2_shader", + 3: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_3_shader", + 4: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_4_shader", + 5: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_5_shader", + 6: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_6_shader", + 7: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_7_shader", + 8: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_8_shader" +}; +var forwardShuffleXShaderName = "pv_picollm_weight_block_mixed_16x8_forward_shuffle_x_shader"; +var forwardShuffleYShaderName = "pv_picollm_weight_block_mixed_16x8_forward_shuffle_y_shader"; +var addBiasShaderName = "pv_picollm_weight_block_mixed_16x8_add_bias_shader"; +var forwardSingleReduceYShaderName = "pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_shader"; + +var _weightBlockMixed16x; +var loadPreprocessBlocksShader = function loadPreprocessBlocksShader(device, bitDepth) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight preprocess blocks ".concat(bitDepth, " bind group layout"), + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight preprocess blocks ".concat(bitDepth, " pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight preprocess blocks ".concat(bitDepth, " shader module"), + code: preprocessShaderSources[bitDepth] + }); + var computePipeline = device.createComputePipeline({ + label: "weight preprocess blocks ".concat(bitDepth, " pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: preprocessDim, + workgroup_size_y: preprocessDim + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var loadForwardShuffleXShader = function loadForwardShuffleXShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight shuffle x bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight shuffle x pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight shuffle x shader module", + code: forwardShuffleXShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight shuffle x pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_y: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var loadForwardSingleReduceYShader = function loadForwardSingleReduceYShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight single reduce y bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight single reduce y pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight single reduce y shader module", + code: forwardSingleReduceYShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight single reduce y pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var loadForwardShuffleYShader = function loadForwardShuffleYShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight shuffle y bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight shuffle y pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight shuffle y shader module", + code: forwardShuffleYShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight shuffle y pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var loadForwardSingleShader = function loadForwardSingleShader(device, bitDepth) { + var entries = [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }]; + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight forward single ".concat(bitDepth, " bind group layout"), + entries: entries + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight forward single ".concat(bitDepth, " pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight forward single ".concat(bitDepth, " shader module"), + code: forwardSingleShaderSources[bitDepth] + }); + var computePipeline = device.createComputePipeline({ + label: "weight forward single ".concat(bitDepth, " pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: weightBlockSize, + workgroup_size_y: 1 + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var loadForwardShader = function loadForwardShader(device, bitDepth) { + var entries = [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }]; + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight forward multi ".concat(bitDepth, " bind group layout"), + entries: entries + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight forward multi ".concat(bitDepth, " pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight forward multi ".concat(bitDepth, " shader module"), + code: forwardShaderSources[bitDepth] + }); + var computePipeline = device.createComputePipeline({ + label: "weight forward multi ".concat(bitDepth, " pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: TC + } + } + }); + return { + computePipeline: computePipeline + }; +}; +var loadAddBiasShader = function loadAddBiasShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight add bias bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight add bias pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight add bias shader module", + code: addBiasShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight add bias pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; +}; +var weightBlockMixed16x8Shaders = (_weightBlockMixed16x = {}, _defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x, preprocessShaderNames[3], function (device) { + return loadPreprocessBlocksShader(device, 3); +}), preprocessShaderNames[5], function (device) { + return loadPreprocessBlocksShader(device, 5); +}), preprocessShaderNames[6], function (device) { + return loadPreprocessBlocksShader(device, 6); +}), preprocessShaderNames[7], function (device) { + return loadPreprocessBlocksShader(device, 7); +}), forwardShuffleXShaderName, loadForwardShuffleXShader), forwardShuffleYShaderName, loadForwardShuffleYShader), forwardSingleReduceYShaderName, loadForwardSingleReduceYShader), forwardSingleShaderNames[1], function (device) { + return loadForwardSingleShader(device, 1); +}), forwardSingleShaderNames[2], function (device) { + return loadForwardSingleShader(device, 2); +}), forwardSingleShaderNames[3], function (device) { + return loadForwardSingleShader(device, 3); +}), _defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x, forwardSingleShaderNames[4], function (device) { + return loadForwardSingleShader(device, 4); +}), forwardSingleShaderNames[5], function (device) { + return loadForwardSingleShader(device, 5); +}), forwardSingleShaderNames[6], function (device) { + return loadForwardSingleShader(device, 6); +}), forwardSingleShaderNames[7], function (device) { + return loadForwardSingleShader(device, 7); +}), forwardSingleShaderNames[8], function (device) { + return loadForwardSingleShader(device, 8); +}), forwardShaderNames[1], function (device) { + return loadForwardShader(device, 1); +}), forwardShaderNames[2], function (device) { + return loadForwardShader(device, 2); +}), forwardShaderNames[3], function (device) { + return loadForwardShader(device, 3); +}), forwardShaderNames[4], function (device) { + return loadForwardShader(device, 4); +}), forwardShaderNames[5], function (device) { + return loadForwardShader(device, 5); +}), _defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x, forwardShaderNames[6], function (device) { + return loadForwardShader(device, 6); +}), forwardShaderNames[7], function (device) { + return loadForwardShader(device, 7); +}), forwardShaderNames[8], function (device) { + return loadForwardShader(device, 8); +}), addBiasShaderName, loadAddBiasShader)); +var getPicollmWeightBlockMixed16x8WebGpuFunctions = function getPicollmWeightBlockMixed16x8WebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmPreprocessBlocksWebGpu = function pvPicollmPreprocessBlocksWebGpu(objAddress, bitDepth, blocksAddress, blocksOffsetBytes, nbr, nbc, statusAddress) { + var _gpuBuffers$get; + objAddress = unsignedAddress(objAddress); + blocksAddress = unsignedAddress(blocksAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[preprocessShaderNames[bitDepth]]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var blocksBuffer = (_gpuBuffers$get = gpuBuffers.get(blocksAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!blocksBuffer) { + console.error('blocks buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight preprocess blocks ".concat(bitDepth, " arg buffer")); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([nbr, nbc, blocksOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight preprocess blocks ".concat(bitDepth, " bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: blocksBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, preprocessShaderNames[bitDepth], Math.ceil(nbr / preprocessDim), Math.ceil(nbc / preprocessDim)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardSingleShuffleXWebGpu = function pvPicollmForwardSingleShuffleXWebGpu(objAddress, xAddress, xOffsetBytes, indicesAddress, indicesOffsetBytes, shape1, yAddress, statusAddress) { + var _gpuBuffers$get2, _gpuBuffers$get3, _gpuBuffers$get4; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + indicesAddress = unsignedAddress(indicesAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShuffleXShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get2 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var indicesBuffer = (_gpuBuffers$get3 = gpuBuffers.get(indicesAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!indicesBuffer) { + console.error('Indices buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get4 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight shuffle x arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([1, shape1, xOffsetBytes / 4, indicesOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward single shuffle x bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: indicesBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShuffleXShaderName + "_single", 1, Math.ceil(shape1 / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardSingleWebGpu = function pvPicollmForwardSingleWebGpu(objAddress, bitDepth, xAddress, xOffsetBytes, metasAddress, metasOffsetBytes, blocksAddress, blocksOffsetBytes, nbr, totalNbc, bitDepthNbc, yAddress, yOffsetBytes, statusAddress) { + var _gpuBuffers$get5, _gpuBuffers$get6, _gpuBuffers$get7, _gpuBuffers$get8; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + metasAddress = unsignedAddress(metasAddress); + blocksAddress = unsignedAddress(blocksAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardSingleShaderNames[bitDepth]]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get5 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var metasBuffer = (_gpuBuffers$get6 = gpuBuffers.get(metasAddress)) === null || _gpuBuffers$get6 === void 0 ? void 0 : _gpuBuffers$get6.buffer; + if (!metasBuffer) { + console.error('Metas buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var blocksBuffer = (_gpuBuffers$get7 = gpuBuffers.get(blocksAddress)) === null || _gpuBuffers$get7 === void 0 ? void 0 : _gpuBuffers$get7.buffer; + if (!blocksBuffer) { + console.error('Blocks buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get8 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get8 === void 0 ? void 0 : _gpuBuffers$get8.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(8 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight forward single ".concat(bitDepth, " arg buffer")); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([1, nbr, totalNbc, bitDepthNbc, xOffsetBytes / 4, metasOffsetBytes / 4, blocksOffsetBytes / 4, yOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var entries = [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: metasBuffer + } + }, { + binding: 3, + resource: { + buffer: blocksBuffer + } + }, { + binding: 4, + resource: { + buffer: yBuffer + } + }]; + var bindGroup = obj.device.createBindGroup({ + label: "weight forward single ".concat(bitDepth, " bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: entries + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardSingleShaderNames[bitDepth], Math.ceil(nbr * 4 / weightBlockSize), bitDepthNbc); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardSingleReduceYWebGpu = function pvPicollmForwardSingleReduceYWebGpu(objAddress, nbr, nbc, xAddress, yAddress, statusAddress) { + var _gpuBuffers$get9, _gpuBuffers$get10; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardSingleReduceYShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get9 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get9 === void 0 ? void 0 : _gpuBuffers$get9.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get10 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get10 === void 0 ? void 0 : _gpuBuffers$get10.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(2 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight single reduce y arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([nbr * 4, nbc])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward single reduce y bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardSingleReduceYShaderName, Math.ceil(nbr * 4 / weightBlockSize)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardMultipleShuffleXWebGpu = function pvPicollmForwardMultipleShuffleXWebGpu(objAddress, xAddress, xOffsetBytes, indicesAddress, indicesOffsetBytes, n, shape1, yAddress, statusAddress) { + var _gpuBuffers$get11, _gpuBuffers$get12, _gpuBuffers$get13; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + indicesAddress = unsignedAddress(indicesAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShuffleXShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get11 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get11 === void 0 ? void 0 : _gpuBuffers$get11.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var indicesBuffer = (_gpuBuffers$get12 = gpuBuffers.get(indicesAddress)) === null || _gpuBuffers$get12 === void 0 ? void 0 : _gpuBuffers$get12.buffer; + if (!indicesBuffer) { + console.error('Indices buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get13 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get13 === void 0 ? void 0 : _gpuBuffers$get13.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight multi shuffle x arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, shape1, xOffsetBytes / 4, indicesOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward multiple shuffle x bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: indicesBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShuffleXShaderName + "_multi", n, shape1); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardMultipleWebGpu = function pvPicollmForwardMultipleWebGpu(objAddress, bitDepth, xAddress, xOffsetBytes, metasAddress, metasOffsetBytes, blocksAddress, blocksOffsetBytes, nbc, nbr, n, yAddress, yOffsetBytes, statusAddress) { + var _gpuBuffers$get14, _gpuBuffers$get15, _gpuBuffers$get16, _gpuBuffers$get17; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + metasAddress = unsignedAddress(metasAddress); + blocksAddress = unsignedAddress(blocksAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShaderNames[bitDepth]]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get14 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get14 === void 0 ? void 0 : _gpuBuffers$get14.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var metasBuffer = (_gpuBuffers$get15 = gpuBuffers.get(metasAddress)) === null || _gpuBuffers$get15 === void 0 ? void 0 : _gpuBuffers$get15.buffer; + if (!metasBuffer) { + console.error('Metas buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var blocksBuffer = (_gpuBuffers$get16 = gpuBuffers.get(blocksAddress)) === null || _gpuBuffers$get16 === void 0 ? void 0 : _gpuBuffers$get16.buffer; + if (!blocksBuffer) { + console.error('Blocks buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get17 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get17 === void 0 ? void 0 : _gpuBuffers$get17.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(8 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight forward multi ".concat(bitDepth, " arg buffer")); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, nbr, 0, nbc, xOffsetBytes / 4, metasOffsetBytes / 4, blocksOffsetBytes / 4, yOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var entries = [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: metasBuffer + } + }, { + binding: 3, + resource: { + buffer: blocksBuffer + } + }, { + binding: 4, + resource: { + buffer: yBuffer + } + }]; + var bindGroup = obj.device.createBindGroup({ + label: "weight forward multi ".concat(bitDepth, " bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: entries + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShaderNames[bitDepth], Math.ceil(nbr / BM), Math.ceil(n / BN)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardMultipleShuffleYWebGpu = function pvPicollmForwardMultipleShuffleYWebGpu(objAddress, n, shape0, xAddress, yAddress, statusAddress) { + var _gpuBuffers$get18, _gpuBuffers$get19; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShuffleYShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get18 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get18 === void 0 ? void 0 : _gpuBuffers$get18.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get19 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get19 === void 0 ? void 0 : _gpuBuffers$get19.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(2 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight shuffle y arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, shape0])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward multiple shuffle y bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShuffleYShaderName, n, shape0); + setStatus(statusAddress, 0); + }; + var pvPicollmAddBiasWebGpu = function pvPicollmAddBiasWebGpu(objAddress, n, dimension, biasAddress, yAddress, statusAddress) { + var _gpuBuffers$get20, _gpuBuffers$get21; + objAddress = unsignedAddress(objAddress); + biasAddress = unsignedAddress(biasAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[addBiasShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var biasBuffer = (_gpuBuffers$get20 = gpuBuffers.get(biasAddress)) === null || _gpuBuffers$get20 === void 0 ? void 0 : _gpuBuffers$get20.buffer; + if (!biasBuffer) { + console.error('Bias buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get21 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get21 === void 0 ? void 0 : _gpuBuffers$get21.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight add bias arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([dimension])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight add bias bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: biasBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, addBiasShaderName, n, dimension); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm: pvPicollmPreprocessBlocksWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm: pvPicollmForwardSingleShuffleXWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm: pvPicollmForwardSingleWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm: pvPicollmForwardSingleReduceYWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm: pvPicollmForwardMultipleShuffleXWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm: pvPicollmForwardMultipleWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm: pvPicollmForwardMultipleShuffleYWebGpu, + pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm: pvPicollmAddBiasWebGpu + }; +}; + +function ownKeys(e, r) { var t = Object.keys(e); if (Object.getOwnPropertySymbols) { var o = Object.getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return Object.getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; } +function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { _defineProperty(e, r, t[r]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r)); }); } return e; } +var shaders = _objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread({}, attentionShaders), feedForwardShaders), gateForwardShader), moeTransformerForwardShaders), normForwardShader), normLayerForwardShader), transformerForwardShaders), weightFloatForwardShader), weightBlockMixed16x8Shaders); +function arrayBufferToStringAtIndex(arrayBuffer, indexStart) { + var indexEnd = indexStart; + while (arrayBuffer[indexEnd] !== 0) { + indexEnd++; + } + var utf8decoder = new TextDecoder('utf-8'); + return utf8decoder.decode(arrayBuffer.subarray(indexStart, indexEnd)); +} +var initXpu = function initXpu(memory, _wasm) { + var setInt = function setInt(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvXpuDeviceInit = /*#__PURE__*/function () { + var _ref = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee(objAddress, statusAddress) { + var adapter, device, adapterInfo; + return _regeneratorRuntime.wrap(function _callee$(_context) { + while (1) switch (_context.prev = _context.next) { + case 0: + objAddress = unsignedAddress(objAddress); + statusAddress = unsignedAddress(statusAddress); + _context.prev = 2; + if (!(typeof window !== "undefined" && !window.isSecureContext)) { + _context.next = 7; + break; + } + console.error('WebGPU is only available in secure contexts (e.g. HTTPS)'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 7: + if (navigator.gpu) { + _context.next = 11; + break; + } + console.error('WebGPU not supported.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 11: + _context.next = 13; + return navigator.gpu.requestAdapter(); + case 13: + adapter = _context.sent; + if (adapter) { + _context.next = 18; + break; + } + console.error('WebGPU not supported, please enable it in your browser.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 18: + _context.next = 20; + return adapter.requestDevice({ + requiredFeatures: ["timestamp-query"], + requiredLimits: { + maxBufferSize: 1073741824, + maxStorageBufferBindingSize: 1073741824 + } + }); + case 20: + device = _context.sent; + if (device) { + _context.next = 25; + break; + } + console.error('Could not find a compatible WebGPU device.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 25: + _context.next = 27; + return adapter.requestAdapterInfo(); + case 27: + adapterInfo = _context.sent; + if (adapterInfo) { + _context.next = 32; + break; + } + console.error('Could not retrieve WebGPU adapter info.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 32: + gpuDevices.set(objAddress, new PvWebGPUDevice(device, adapterInfo)); + setInt(statusAddress, 0); + _context.next = 40; + break; + case 36: + _context.prev = 36; + _context.t0 = _context["catch"](2); + console.error(_context.t0); + setInt(statusAddress, -1); + case 40: + case "end": + return _context.stop(); + } + }, _callee, null, [[2, 36]]); + })); + return function pvXpuDeviceInit(_x, _x2) { + return _ref.apply(this, arguments); + }; + }(); + var pvXpuDeviceInfo = /*#__PURE__*/function () { + var _ref2 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee2(objAddress, browserNameAddressAddress, browserVersionAddressAddress, osNameAddressAddress, deviceArchitectureAddressAddress, deviceVendorAddressAddress, maxBufferSizeAddress, maxComputeWorkgroupStorageSizeAddress, maxComputeInvocationsPerWorkgroupAddress, statusAddress) { + var obj, aligned_alloc, uaParser, memoryBufferUint8, browserName, browserNameAddress, i, browserVersion, browserVersionAddress, _i, osName, osNameAddress, _i2, deviceArchitecture, deviceArchitectureAddress, _i3, deviceVendor, deviceVendorAddress, _i4; + return _regeneratorRuntime.wrap(function _callee2$(_context2) { + while (1) switch (_context2.prev = _context2.next) { + case 0: + objAddress = unsignedAddress(objAddress); + browserNameAddressAddress = unsignedAddress(browserNameAddressAddress); + browserVersionAddressAddress = unsignedAddress(browserVersionAddressAddress); + osNameAddressAddress = unsignedAddress(osNameAddressAddress); + deviceArchitectureAddressAddress = unsignedAddress(deviceArchitectureAddressAddress); + deviceVendorAddressAddress = unsignedAddress(deviceVendorAddressAddress); + maxBufferSizeAddress = unsignedAddress(maxBufferSizeAddress); + maxComputeWorkgroupStorageSizeAddress = unsignedAddress(maxComputeWorkgroupStorageSizeAddress); + maxComputeInvocationsPerWorkgroupAddress = unsignedAddress(maxComputeInvocationsPerWorkgroupAddress); + statusAddress = unsignedAddress(statusAddress); + _context2.prev = 10; + obj = gpuDevices.get(objAddress); + if (obj) { + _context2.next = 16; + break; + } + console.error('WebGPU device has not been initialized'); + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 16: + aligned_alloc = imports.aligned_alloc; + uaParser = Bowser.getParser(navigator.userAgent); + memoryBufferUint8 = new Uint8Array(memory.buffer); + browserName = uaParser.getBrowserName(); + _context2.next = 22; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (browserName.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 22: + browserNameAddress = _context2.sent; + if (!(browserNameAddress === 0)) { + _context2.next = 26; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 26: + setInt(browserNameAddressAddress, browserNameAddress); + for (i = 0; i < browserName.length; i++) { + memoryBufferUint8[browserNameAddress + i] = browserName.charCodeAt(i); + } + memoryBufferUint8[browserNameAddress + browserName.length] = 0; + browserVersion = uaParser.getBrowserVersion(); + _context2.next = 32; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (browserVersion.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 32: + browserVersionAddress = _context2.sent; + if (!(browserVersionAddress === 0)) { + _context2.next = 36; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 36: + setInt(browserVersionAddressAddress, browserVersionAddress); + for (_i = 0; _i < browserVersion.length; _i++) { + memoryBufferUint8[browserVersionAddress + _i] = browserVersion.charCodeAt(_i); + } + memoryBufferUint8[browserVersionAddress + browserVersion.length] = 0; + osName = uaParser.getOSName(); + _context2.next = 42; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (osName.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 42: + osNameAddress = _context2.sent; + if (!(osNameAddress === 0)) { + _context2.next = 46; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 46: + setInt(osNameAddressAddress, osNameAddress); + for (_i2 = 0; _i2 < osName.length; _i2++) { + memoryBufferUint8[osNameAddress + _i2] = osName.charCodeAt(_i2); + } + memoryBufferUint8[osNameAddress + osName.length] = 0; + deviceArchitecture = obj.adapterInfo.architecture; + _context2.next = 52; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (deviceArchitecture.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 52: + deviceArchitectureAddress = _context2.sent; + if (!(deviceArchitectureAddress === 0)) { + _context2.next = 56; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 56: + setInt(deviceArchitectureAddressAddress, deviceArchitectureAddress); + for (_i3 = 0; _i3 < deviceArchitecture.length; _i3++) { + memoryBufferUint8[deviceArchitectureAddress + _i3] = deviceArchitecture.charCodeAt(_i3); + } + memoryBufferUint8[deviceArchitectureAddress + deviceArchitecture.length] = 0; + deviceVendor = obj.adapterInfo.vendor; + _context2.next = 62; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (deviceVendor.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 62: + deviceVendorAddress = _context2.sent; + if (!(deviceVendorAddress === 0)) { + _context2.next = 66; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 66: + setInt(deviceVendorAddressAddress, deviceVendorAddress); + for (_i4 = 0; _i4 < deviceVendor.length; _i4++) { + memoryBufferUint8[deviceVendorAddress + _i4] = deviceVendor.charCodeAt(_i4); + } + memoryBufferUint8[deviceVendorAddress + deviceVendor.length] = 0; + setInt(maxBufferSizeAddress, obj.device.limits.maxBufferSize); + setInt(maxComputeWorkgroupStorageSizeAddress, obj.device.limits.maxComputeWorkgroupStorageSize); + setInt(maxComputeInvocationsPerWorkgroupAddress, obj.device.limits.maxComputeInvocationsPerWorkgroup); + setInt(statusAddress, 0); + _context2.next = 79; + break; + case 75: + _context2.prev = 75; + _context2.t0 = _context2["catch"](10); + console.error(_context2.t0); + setInt(statusAddress, -1); + case 79: + case "end": + return _context2.stop(); + } + }, _callee2, null, [[10, 75]]); + })); + return function pvXpuDeviceInfo(_x3, _x4, _x5, _x6, _x7, _x8, _x9, _x10, _x11, _x12) { + return _ref2.apply(this, arguments); + }; + }(); + var pvXpuDeviceCleanup = function pvXpuDeviceCleanup(objAddress) { + objAddress = unsignedAddress(objAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + return; + } + gpuDevices["delete"](objAddress); + }; + var pvXpuDeviceLoadShaderFunc = function pvXpuDeviceLoadShaderFunc(objAddress, shaderNameAddress, statusAddress) { + objAddress = unsignedAddress(objAddress); + shaderNameAddress = unsignedAddress(shaderNameAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + setInt(statusAddress, -1); + return; + } + var memoryBufferUint8 = new Uint8Array(memory.buffer); + var shaderName = arrayBufferToStringAtIndex(memoryBufferUint8, shaderNameAddress); + if (!shaders[shaderName]) { + console.error("WebGPU device could not find shader with name ".concat(shaderName)); + setInt(statusAddress, -1); + return; + } + var shaderLoadFunc = shaders[shaderName]; + obj.shaders[shaderName] = shaderLoadFunc(obj.device); + setInt(statusAddress, 0); + }; + var pvXpuDeviceWait = /*#__PURE__*/function () { + var _ref3 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee3(objAddress, statusAddress) { + var obj; + return _regeneratorRuntime.wrap(function _callee3$(_context3) { + while (1) switch (_context3.prev = _context3.next) { + case 0: + objAddress = unsignedAddress(objAddress); + statusAddress = unsignedAddress(statusAddress); + obj = gpuDevices.get(objAddress); + if (obj) { + _context3.next = 6; + break; + } + setInt(statusAddress, -1); + return _context3.abrupt("return"); + case 6: + _context3.next = 8; + return obj.sync(); + case 8: + setInt(statusAddress, 0); + case 9: + case "end": + return _context3.stop(); + } + }, _callee3); + })); + return function pvXpuDeviceWait(_x13, _x14) { + return _ref3.apply(this, arguments); + }; + }(); + var pvXpuDeviceMemAlloc = function pvXpuDeviceMemAlloc(objAddress, memAddress, sizeBytes, isOutput, statusAddress) { + objAddress = unsignedAddress(objAddress); + memAddress = unsignedAddress(memAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + setInt(statusAddress, -1); + return; + } + var usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST; + if (isOutput) { + usage |= GPUBufferUsage.COPY_SRC; + } + gpuBuffers.set(memAddress, { + deviceAddress: objAddress, + buffer: obj.getBuffer(sizeBytes, usage) + }); + setInt(statusAddress, 0); + }; + var pvXpuDeviceMemFree = function pvXpuDeviceMemFree(memAddress) { + memAddress = unsignedAddress(memAddress); + if (gpuBuffers.has(memAddress)) { + var gpuBuffer = gpuBuffers.get(memAddress); + if (!gpuBuffer || !gpuBuffer.buffer || !gpuBuffer.deviceAddress) { + console.error('GPU buffer has not been allocated'); + return; + } + var obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + obj.releaseBuffer(gpuBuffer.buffer); + gpuBuffers["delete"](memAddress); + } + }; + var pvXpuDeviceMemCopyToXpu = function pvXpuDeviceMemCopyToXpu(memAddress, hostAddress, offset, sizeBytes) { + memAddress = unsignedAddress(memAddress); + hostAddress = unsignedAddress(hostAddress); + if (hostAddress < 0) { + console.error('Invalid host address', memAddress, hostAddress, offset, sizeBytes); + return; + } + var gpuBuffer = gpuBuffers.get(memAddress); + if (!gpuBuffer || !gpuBuffer.buffer || !gpuBuffer.deviceAddress) { + console.error('GPU buffer has not been allocated'); + return; + } + var obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + var memoryBufferUint8 = new Uint8Array(memory.buffer); + obj.writeBuffer(sizeBytes, offset, memoryBufferUint8.slice(hostAddress, hostAddress + sizeBytes), gpuBuffer.buffer); + }; + var pvXpuDeviceMemCopyFromXpu = /*#__PURE__*/function () { + var _ref4 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee4(memAddress, hostAddress, offset, sizeBytes) { + var gpuBuffer, obj, stageBuffer, mappedBuffer, memoryBufferUint8; + return _regeneratorRuntime.wrap(function _callee4$(_context4) { + while (1) switch (_context4.prev = _context4.next) { + case 0: + memAddress = unsignedAddress(memAddress); + hostAddress = unsignedAddress(hostAddress); + if (!(hostAddress < 0)) { + _context4.next = 5; + break; + } + console.error('Invalid host address', memAddress, hostAddress, offset, sizeBytes); + return _context4.abrupt("return"); + case 5: + gpuBuffer = gpuBuffers.get(memAddress); + if (gpuBuffer !== null && gpuBuffer !== void 0 && gpuBuffer.buffer) { + _context4.next = 9; + break; + } + console.error('GPU buffer has not been allocated'); + return _context4.abrupt("return"); + case 9: + obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (obj) { + _context4.next = 13; + break; + } + console.error('WebGPU device has not been initialized'); + return _context4.abrupt("return"); + case 13: + stageBuffer = obj.getBuffer((offset + sizeBytes) * Uint8Array.BYTES_PER_ELEMENT, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST); + obj.endComputePass(); + obj.commandEncoder.copyBufferToBuffer(gpuBuffer.buffer, 0, stageBuffer, 0, stageBuffer.size); + _context4.next = 18; + return obj.sync(); + case 18: + _context4.next = 20; + return stageBuffer.mapAsync(GPUMapMode.READ, 0, sizeBytes + offset); + case 20: + mappedBuffer = new Uint8Array(stageBuffer.getMappedRange(0, sizeBytes + offset)); + memoryBufferUint8 = new Uint8Array(memory.buffer); + memoryBufferUint8.set(mappedBuffer.slice(offset, sizeBytes + offset), hostAddress); + stageBuffer.unmap(); + obj.releaseBuffer(stageBuffer); + case 25: + case "end": + return _context4.stop(); + } + }, _callee4); + })); + return function pvXpuDeviceMemCopyFromXpu(_x15, _x16, _x17, _x18) { + return _ref4.apply(this, arguments); + }; + }(); + var pvXpuDeviceMemMemset = function pvXpuDeviceMemMemset(memAddress, fillByte, sizeBytes) { + memAddress = unsignedAddress(memAddress); + var gpuBuffer = gpuBuffers.get(memAddress); + if (!gpuBuffer || !gpuBuffer.buffer || !gpuBuffer.deviceAddress) { + console.error('GPU buffer has not been allocated'); + return; + } + var obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + if (fillByte === 0) { + obj.endComputePass(); + obj.commandEncoder.clearBuffer(gpuBuffer.buffer, 0, sizeBytes); + obj.numCommandsEncoded++; + } else { + var stagingBuffer = new Uint8Array(sizeBytes); + stagingBuffer.fill(fillByte); + obj.writeBuffer(sizeBytes, 0, stagingBuffer, gpuBuffer.buffer); + } + }; + var pvXpuDeviceTimerStart = function pvXpuDeviceTimerStart(objAddress) { + objAddress = unsignedAddress(objAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + obj.isTimerEnabled = true; + }; + var pvXpuDeviceTimerStop = /*#__PURE__*/function () { + var _ref5 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee5(objAddress) { + var obj; + return _regeneratorRuntime.wrap(function _callee5$(_context5) { + while (1) switch (_context5.prev = _context5.next) { + case 0: + objAddress = unsignedAddress(objAddress); + obj = gpuDevices.get(objAddress); + if (obj) { + _context5.next = 5; + break; + } + console.error('WebGPU device has not been initialized'); + return _context5.abrupt("return"); + case 5: + _context5.next = 7; + return obj.sync(); + case 7: + obj.reportShaderTimes(); + case 8: + case "end": + return _context5.stop(); + } + }, _callee5); + })); + return function pvXpuDeviceTimerStop(_x19) { + return _ref5.apply(this, arguments); + }; + }(); + var imports = _objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread({ + aligned_alloc: function aligned_alloc(alignment, size) { + throw new Error("aligned_alloc was not passed in from parent module"); + }, + pv_xpu_webgpu_device_init_wasm: pvXpuDeviceInit, + pv_xpu_webgpu_device_info_wasm: pvXpuDeviceInfo, + pv_xpu_webgpu_device_cleanup_wasm: pvXpuDeviceCleanup, + pv_xpu_webgpu_device_load_shader_func_wasm: pvXpuDeviceLoadShaderFunc, + pv_xpu_webgpu_device_wait_wasm: pvXpuDeviceWait, + pv_xpu_webgpu_device_mem_alloc_wasm: pvXpuDeviceMemAlloc, + pv_xpu_webgpu_device_mem_free_wasm: pvXpuDeviceMemFree, + pv_xpu_webgpu_device_mem_copy_to_xpu_wasm: pvXpuDeviceMemCopyToXpu, + pv_xpu_webgpu_device_mem_copy_from_xpu_wasm: pvXpuDeviceMemCopyFromXpu, + pv_xpu_webgpu_device_mem_memset_wasm: pvXpuDeviceMemMemset, + pv_xpu_webgpu_timer_start_wasm: pvXpuDeviceTimerStart, + pv_xpu_webgpu_timer_stop_wasm: pvXpuDeviceTimerStop + }, getPicollmAttentionWebGpuFunctions(memory)), getPicollmGateWebGpuFunctions(memory)), getPicollmFeedForwardWebGpuFunctions(memory)), getPicollmMoeTransformerWebGpuFunctions(memory)), getPicollmNormLayerWebGpuFunctions(memory)), getPicollmNormWebGpuFunctions(memory)), getPicollmTransformerWebGpuFunctions(memory)), getPicollmWeightFloatWebGpuFunctions(memory)), getPicollmWeightBlockMixed16x8WebGpuFunctions(memory)); + return imports; +}; + +export { initXpu as default }; diff --git a/lib/wasm/dist/xpu_webgpu/esm/index.min.js b/lib/wasm/dist/xpu_webgpu/esm/index.min.js new file mode 100644 index 00000000..0d2137ef --- /dev/null +++ b/lib/wasm/dist/xpu_webgpu/esm/index.min.js @@ -0,0 +1,7 @@ +function e(e,t,r,n,a,i,u){try{var o=e[i](u),s=o.value}catch(e){return void r(e)}o.done?t(s):Promise.resolve(s).then(n,a)}function t(t){return function(){var r=this,n=arguments;return new Promise((function(a,i){var u=t.apply(r,n);function o(t){e(u,a,i,o,s,"next",t)}function s(t){e(u,a,i,o,s,"throw",t)}o(void 0)}))}}function r(e){return r="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},r(e)}function n(e){var t=function(e,t){if("object"!=r(e)||!e)return e;var n=e[Symbol.toPrimitive];if(void 0!==n){var a=n.call(e,t||"default");if("object"!=r(a))return a;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===t?String:Number)(e)}(e,"string");return"symbol"==r(t)?t:String(t)}function a(e,t,r){return(t=n(t))in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function i(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var u={exports:{}},o={exports:{}};!function(e){function t(r){return e.exports=t="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},e.exports.__esModule=!0,e.exports.default=e.exports,t(r)}e.exports=t,e.exports.__esModule=!0,e.exports.default=e.exports}(o);var s=o.exports;!function(e){var t=s.default;function r(){e.exports=r=function(){return a},e.exports.__esModule=!0,e.exports.default=e.exports;var n,a={},i=Object.prototype,u=i.hasOwnProperty,o=Object.defineProperty||function(e,t,r){e[t]=r.value},s="function"==typeof Symbol?Symbol:{},c=s.iterator||"@@iterator",l=s.asyncIterator||"@@asyncIterator",_=s.toStringTag||"@@toStringTag";function d(e,t,r){return Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}),e[t]}try{d({},"")}catch(n){d=function(e,t,r){return e[t]=r}}function f(e,t,r,n){var a=t&&t.prototype instanceof m?t:m,i=Object.create(a.prototype),u=new O(n||[]);return o(i,"_invoke",{value:z(e,r,u)}),i}function b(e,t,r){try{return{type:"normal",arg:e.call(t,r)}}catch(e){return{type:"throw",arg:e}}}a.wrap=f;var p="suspendedStart",g="suspendedYield",v="executing",x="completed",k={};function m(){}function h(){}function y(){}var B={};d(B,c,(function(){return this}));var w=Object.getPrototypeOf,P=w&&w(w(G([])));P&&P!==i&&u.call(P,c)&&(B=P);var S=y.prototype=m.prototype=Object.create(B);function E(e){["next","throw","return"].forEach((function(t){d(e,t,(function(e){return this._invoke(t,e)}))}))}function U(e,r){function n(a,i,o,s){var c=b(e[a],e,i);if("throw"!==c.type){var l=c.arg,_=l.value;return _&&"object"==t(_)&&u.call(_,"__await")?r.resolve(_.__await).then((function(e){n("next",e,o,s)}),(function(e){n("throw",e,o,s)})):r.resolve(_).then((function(e){l.value=e,o(l)}),(function(e){return n("throw",e,o,s)}))}s(c.arg)}var a;o(this,"_invoke",{value:function(e,t){function i(){return new r((function(r,a){n(e,t,r,a)}))}return a=a?a.then(i,i):i()}})}function z(e,t,r){var a=p;return function(i,u){if(a===v)throw new Error("Generator is already running");if(a===x){if("throw"===i)throw u;return{value:n,done:!0}}for(r.method=i,r.arg=u;;){var o=r.delegate;if(o){var s=j(o,r);if(s){if(s===k)continue;return s}}if("next"===r.method)r.sent=r._sent=r.arg;else if("throw"===r.method){if(a===p)throw a=x,r.arg;r.dispatchException(r.arg)}else"return"===r.method&&r.abrupt("return",r.arg);a=v;var c=b(e,t,r);if("normal"===c.type){if(a=r.done?x:g,c.arg===k)continue;return{value:c.arg,done:r.done}}"throw"===c.type&&(a=x,r.method="throw",r.arg=c.arg)}}}function j(e,t){var r=t.method,a=e.iterator[r];if(a===n)return t.delegate=null,"throw"===r&&e.iterator.return&&(t.method="return",t.arg=n,j(e,t),"throw"===t.method)||"return"!==r&&(t.method="throw",t.arg=new TypeError("The iterator does not provide a '"+r+"' method")),k;var i=b(a,e.iterator,t.arg);if("throw"===i.type)return t.method="throw",t.arg=i.arg,t.delegate=null,k;var u=i.arg;return u?u.done?(t[e.resultName]=u.value,t.next=e.nextLoc,"return"!==t.method&&(t.method="next",t.arg=n),t.delegate=null,k):u:(t.method="throw",t.arg=new TypeError("iterator result is not an object"),t.delegate=null,k)}function M(e){var t={tryLoc:e[0]};1 in e&&(t.catchLoc=e[1]),2 in e&&(t.finallyLoc=e[2],t.afterLoc=e[3]),this.tryEntries.push(t)}function C(e){var t=e.completion||{};t.type="normal",delete t.arg,e.completion=t}function O(e){this.tryEntries=[{tryLoc:"root"}],e.forEach(M,this),this.reset(!0)}function G(e){if(e||""===e){var r=e[c];if(r)return r.call(e);if("function"==typeof e.next)return e;if(!isNaN(e.length)){var a=-1,i=function t(){for(;++a=0;--a){var i=this.tryEntries[a],o=i.completion;if("root"===i.tryLoc)return r("end");if(i.tryLoc<=this.prev){var s=u.call(i,"catchLoc"),c=u.call(i,"finallyLoc");if(s&&c){if(this.prev=0;--r){var n=this.tryEntries[r];if(n.tryLoc<=this.prev&&u.call(n,"finallyLoc")&&this.prev=0;--t){var r=this.tryEntries[t];if(r.finallyLoc===e)return this.complete(r.completion,r.afterLoc),C(r),k}},catch:function(e){for(var t=this.tryEntries.length-1;t>=0;--t){var r=this.tryEntries[t];if(r.tryLoc===e){var n=r.completion;if("throw"===n.type){var a=n.arg;C(r)}return a}}throw new Error("illegal catch attempt")},delegateYield:function(e,t,r){return this.delegate={iterator:G(e),resultName:t,nextLoc:r},"next"===this.method&&(this.arg=n),k}},a}e.exports=r,e.exports.__esModule=!0,e.exports.default=e.exports}(u);var c=(0,u.exports)(),l=c;try{regeneratorRuntime=c}catch(e){"object"==typeof globalThis?globalThis.regeneratorRuntime=c:Function("r","regeneratorRuntime = r")(c)}var _=i(l);const d={"Amazon Silk":"amazon_silk","Android Browser":"android",Bada:"bada",BlackBerry:"blackberry",Chrome:"chrome",Chromium:"chromium",Electron:"electron",Epiphany:"epiphany",Firefox:"firefox",Focus:"focus",Generic:"generic","Google Search":"google_search",Googlebot:"googlebot","Internet Explorer":"ie","K-Meleon":"k_meleon",Maxthon:"maxthon","Microsoft Edge":"edge","MZ Browser":"mz","NAVER Whale Browser":"naver",Opera:"opera","Opera Coast":"opera_coast",PhantomJS:"phantomjs",Puffin:"puffin",QupZilla:"qupzilla",QQ:"qq",QQLite:"qqlite",Safari:"safari",Sailfish:"sailfish","Samsung Internet for Android":"samsung_internet",SeaMonkey:"seamonkey",Sleipnir:"sleipnir",Swing:"swing",Tizen:"tizen","UC Browser":"uc",Vivaldi:"vivaldi","WebOS Browser":"webos",WeChat:"wechat","Yandex Browser":"yandex",Roku:"roku"},f={amazon_silk:"Amazon Silk",android:"Android Browser",bada:"Bada",blackberry:"BlackBerry",chrome:"Chrome",chromium:"Chromium",electron:"Electron",epiphany:"Epiphany",firefox:"Firefox",focus:"Focus",generic:"Generic",googlebot:"Googlebot",google_search:"Google Search",ie:"Internet Explorer",k_meleon:"K-Meleon",maxthon:"Maxthon",edge:"Microsoft Edge",mz:"MZ Browser",naver:"NAVER Whale Browser",opera:"Opera",opera_coast:"Opera Coast",phantomjs:"PhantomJS",puffin:"Puffin",qupzilla:"QupZilla",qq:"QQ Browser",qqlite:"QQ Browser Lite",safari:"Safari",sailfish:"Sailfish",samsung_internet:"Samsung Internet for Android",seamonkey:"SeaMonkey",sleipnir:"Sleipnir",swing:"Swing",tizen:"Tizen",uc:"UC Browser",vivaldi:"Vivaldi",webos:"WebOS Browser",wechat:"WeChat",yandex:"Yandex Browser"},b={tablet:"tablet",mobile:"mobile",desktop:"desktop",tv:"tv"},p={WindowsPhone:"Windows Phone",Windows:"Windows",MacOS:"macOS",iOS:"iOS",Android:"Android",WebOS:"WebOS",BlackBerry:"BlackBerry",Bada:"Bada",Tizen:"Tizen",Linux:"Linux",ChromeOS:"Chrome OS",PlayStation4:"PlayStation 4",Roku:"Roku"},g={EdgeHTML:"EdgeHTML",Blink:"Blink",Trident:"Trident",Presto:"Presto",Gecko:"Gecko",WebKit:"WebKit"};class v{static getFirstMatch(e,t){const r=t.match(e);return r&&r.length>0&&r[1]||""}static getSecondMatch(e,t){const r=t.match(e);return r&&r.length>1&&r[2]||""}static matchAndReturnConst(e,t,r){if(e.test(t))return r}static getWindowsVersionName(e){switch(e){case"NT":return"NT";case"XP":case"NT 5.1":return"XP";case"NT 5.0":return"2000";case"NT 5.2":return"2003";case"NT 6.0":return"Vista";case"NT 6.1":return"7";case"NT 6.2":return"8";case"NT 6.3":return"8.1";case"NT 10.0":return"10";default:return}}static getMacOSVersionName(e){const t=e.split(".").splice(0,2).map((e=>parseInt(e,10)||0));if(t.push(0),10===t[0])switch(t[1]){case 5:return"Leopard";case 6:return"Snow Leopard";case 7:return"Lion";case 8:return"Mountain Lion";case 9:return"Mavericks";case 10:return"Yosemite";case 11:return"El Capitan";case 12:return"Sierra";case 13:return"High Sierra";case 14:return"Mojave";case 15:return"Catalina";default:return}}static getAndroidVersionName(e){const t=e.split(".").splice(0,2).map((e=>parseInt(e,10)||0));if(t.push(0),!(1===t[0]&&t[1]<5))return 1===t[0]&&t[1]<6?"Cupcake":1===t[0]&&t[1]>=6?"Donut":2===t[0]&&t[1]<2?"Eclair":2===t[0]&&2===t[1]?"Froyo":2===t[0]&&t[1]>2?"Gingerbread":3===t[0]?"Honeycomb":4===t[0]&&t[1]<1?"Ice Cream Sandwich":4===t[0]&&t[1]<4?"Jelly Bean":4===t[0]&&t[1]>=4?"KitKat":5===t[0]?"Lollipop":6===t[0]?"Marshmallow":7===t[0]?"Nougat":8===t[0]?"Oreo":9===t[0]?"Pie":void 0}static getVersionPrecision(e){return e.split(".").length}static compareVersions(e,t,r=!1){const n=v.getVersionPrecision(e),a=v.getVersionPrecision(t);let i=Math.max(n,a),u=0;const o=v.map([e,t],(e=>{const t=i-v.getVersionPrecision(e),r=e+new Array(t+1).join(".0");return v.map(r.split("."),(e=>new Array(20-e.length).join("0")+e)).reverse()}));for(r&&(u=i-Math.min(n,a)),i-=1;i>=u;){if(o[0][i]>o[1][i])return 1;if(o[0][i]===o[1][i]){if(i===u)return 0;i-=1}else if(o[0][i]{r[t]=e[t]}))}}return e}static getBrowserAlias(e){return d[e]}static getBrowserTypeByAlias(e){return f[e]||""}}const x=/version\/(\d+(\.?_?\d+)+)/i,k=[{test:[/googlebot/i],describe(e){const t={name:"Googlebot"},r=v.getFirstMatch(/googlebot\/(\d+(\.\d+))/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/opera/i],describe(e){const t={name:"Opera"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:opera)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/opr\/|opios/i],describe(e){const t={name:"Opera"},r=v.getFirstMatch(/(?:opr|opios)[\s/](\S+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/SamsungBrowser/i],describe(e){const t={name:"Samsung Internet for Android"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:SamsungBrowser)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/Whale/i],describe(e){const t={name:"NAVER Whale Browser"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:whale)[\s/](\d+(?:\.\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/MZBrowser/i],describe(e){const t={name:"MZ Browser"},r=v.getFirstMatch(/(?:MZBrowser)[\s/](\d+(?:\.\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/focus/i],describe(e){const t={name:"Focus"},r=v.getFirstMatch(/(?:focus)[\s/](\d+(?:\.\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/swing/i],describe(e){const t={name:"Swing"},r=v.getFirstMatch(/(?:swing)[\s/](\d+(?:\.\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/coast/i],describe(e){const t={name:"Opera Coast"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:coast)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/opt\/\d+(?:.?_?\d+)+/i],describe(e){const t={name:"Opera Touch"},r=v.getFirstMatch(/(?:opt)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/yabrowser/i],describe(e){const t={name:"Yandex Browser"},r=v.getFirstMatch(/(?:yabrowser)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/ucbrowser/i],describe(e){const t={name:"UC Browser"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:ucbrowser)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/Maxthon|mxios/i],describe(e){const t={name:"Maxthon"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:Maxthon|mxios)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/epiphany/i],describe(e){const t={name:"Epiphany"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:epiphany)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/puffin/i],describe(e){const t={name:"Puffin"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:puffin)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/sleipnir/i],describe(e){const t={name:"Sleipnir"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:sleipnir)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/k-meleon/i],describe(e){const t={name:"K-Meleon"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:k-meleon)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/micromessenger/i],describe(e){const t={name:"WeChat"},r=v.getFirstMatch(/(?:micromessenger)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/qqbrowser/i],describe(e){const t={name:/qqbrowserlite/i.test(e)?"QQ Browser Lite":"QQ Browser"},r=v.getFirstMatch(/(?:qqbrowserlite|qqbrowser)[/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/msie|trident/i],describe(e){const t={name:"Internet Explorer"},r=v.getFirstMatch(/(?:msie |rv:)(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/\sedg\//i],describe(e){const t={name:"Microsoft Edge"},r=v.getFirstMatch(/\sedg\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/edg([ea]|ios)/i],describe(e){const t={name:"Microsoft Edge"},r=v.getSecondMatch(/edg([ea]|ios)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/vivaldi/i],describe(e){const t={name:"Vivaldi"},r=v.getFirstMatch(/vivaldi\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/seamonkey/i],describe(e){const t={name:"SeaMonkey"},r=v.getFirstMatch(/seamonkey\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/sailfish/i],describe(e){const t={name:"Sailfish"},r=v.getFirstMatch(/sailfish\s?browser\/(\d+(\.\d+)?)/i,e);return r&&(t.version=r),t}},{test:[/silk/i],describe(e){const t={name:"Amazon Silk"},r=v.getFirstMatch(/silk\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/phantom/i],describe(e){const t={name:"PhantomJS"},r=v.getFirstMatch(/phantomjs\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/slimerjs/i],describe(e){const t={name:"SlimerJS"},r=v.getFirstMatch(/slimerjs\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(e){const t={name:"BlackBerry"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/blackberry[\d]+\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/(web|hpw)[o0]s/i],describe(e){const t={name:"WebOS Browser"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/w(?:eb)?[o0]sbrowser\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/bada/i],describe(e){const t={name:"Bada"},r=v.getFirstMatch(/dolfin\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/tizen/i],describe(e){const t={name:"Tizen"},r=v.getFirstMatch(/(?:tizen\s?)?browser\/(\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/qupzilla/i],describe(e){const t={name:"QupZilla"},r=v.getFirstMatch(/(?:qupzilla)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/firefox|iceweasel|fxios/i],describe(e){const t={name:"Firefox"},r=v.getFirstMatch(/(?:firefox|iceweasel|fxios)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/electron/i],describe(e){const t={name:"Electron"},r=v.getFirstMatch(/(?:electron)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/MiuiBrowser/i],describe(e){const t={name:"Miui"},r=v.getFirstMatch(/(?:MiuiBrowser)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/chromium/i],describe(e){const t={name:"Chromium"},r=v.getFirstMatch(/(?:chromium)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/chrome|crios|crmo/i],describe(e){const t={name:"Chrome"},r=v.getFirstMatch(/(?:chrome|crios|crmo)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/GSA/i],describe(e){const t={name:"Google Search"},r=v.getFirstMatch(/(?:GSA)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test(e){const t=!e.test(/like android/i),r=e.test(/android/i);return t&&r},describe(e){const t={name:"Android Browser"},r=v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/playstation 4/i],describe(e){const t={name:"PlayStation 4"},r=v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/safari|applewebkit/i],describe(e){const t={name:"Safari"},r=v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/.*/i],describe(e){const t=-1!==e.search("\\(")?/^(.*)\/(.*)[ \t]\((.*)/:/^(.*)\/(.*) /;return{name:v.getFirstMatch(t,e),version:v.getSecondMatch(t,e)}}}];var m=[{test:[/Roku\/DVP/],describe(e){const t=v.getFirstMatch(/Roku\/DVP-(\d+\.\d+)/i,e);return{name:p.Roku,version:t}}},{test:[/windows phone/i],describe(e){const t=v.getFirstMatch(/windows phone (?:os)?\s?(\d+(\.\d+)*)/i,e);return{name:p.WindowsPhone,version:t}}},{test:[/windows /i],describe(e){const t=v.getFirstMatch(/Windows ((NT|XP)( \d\d?.\d)?)/i,e),r=v.getWindowsVersionName(t);return{name:p.Windows,version:t,versionName:r}}},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe(e){const t={name:p.iOS},r=v.getSecondMatch(/(Version\/)(\d[\d.]+)/,e);return r&&(t.version=r),t}},{test:[/macintosh/i],describe(e){const t=v.getFirstMatch(/mac os x (\d+(\.?_?\d+)+)/i,e).replace(/[_\s]/g,"."),r=v.getMacOSVersionName(t),n={name:p.MacOS,version:t};return r&&(n.versionName=r),n}},{test:[/(ipod|iphone|ipad)/i],describe(e){const t=v.getFirstMatch(/os (\d+([_\s]\d+)*) like mac os x/i,e).replace(/[_\s]/g,".");return{name:p.iOS,version:t}}},{test(e){const t=!e.test(/like android/i),r=e.test(/android/i);return t&&r},describe(e){const t=v.getFirstMatch(/android[\s/-](\d+(\.\d+)*)/i,e),r=v.getAndroidVersionName(t),n={name:p.Android,version:t};return r&&(n.versionName=r),n}},{test:[/(web|hpw)[o0]s/i],describe(e){const t=v.getFirstMatch(/(?:web|hpw)[o0]s\/(\d+(\.\d+)*)/i,e),r={name:p.WebOS};return t&&t.length&&(r.version=t),r}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(e){const t=v.getFirstMatch(/rim\stablet\sos\s(\d+(\.\d+)*)/i,e)||v.getFirstMatch(/blackberry\d+\/(\d+([_\s]\d+)*)/i,e)||v.getFirstMatch(/\bbb(\d+)/i,e);return{name:p.BlackBerry,version:t}}},{test:[/bada/i],describe(e){const t=v.getFirstMatch(/bada\/(\d+(\.\d+)*)/i,e);return{name:p.Bada,version:t}}},{test:[/tizen/i],describe(e){const t=v.getFirstMatch(/tizen[/\s](\d+(\.\d+)*)/i,e);return{name:p.Tizen,version:t}}},{test:[/linux/i],describe:()=>({name:p.Linux})},{test:[/CrOS/],describe:()=>({name:p.ChromeOS})},{test:[/PlayStation 4/],describe(e){const t=v.getFirstMatch(/PlayStation 4[/\s](\d+(\.\d+)*)/i,e);return{name:p.PlayStation4,version:t}}}],h=[{test:[/googlebot/i],describe:()=>({type:"bot",vendor:"Google"})},{test:[/huawei/i],describe(e){const t=v.getFirstMatch(/(can-l01)/i,e)&&"Nova",r={type:b.mobile,vendor:"Huawei"};return t&&(r.model=t),r}},{test:[/nexus\s*(?:7|8|9|10).*/i],describe:()=>({type:b.tablet,vendor:"Nexus"})},{test:[/ipad/i],describe:()=>({type:b.tablet,vendor:"Apple",model:"iPad"})},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe:()=>({type:b.tablet,vendor:"Apple",model:"iPad"})},{test:[/kftt build/i],describe:()=>({type:b.tablet,vendor:"Amazon",model:"Kindle Fire HD 7"})},{test:[/silk/i],describe:()=>({type:b.tablet,vendor:"Amazon"})},{test:[/tablet(?! pc)/i],describe:()=>({type:b.tablet})},{test(e){const t=e.test(/ipod|iphone/i),r=e.test(/like (ipod|iphone)/i);return t&&!r},describe(e){const t=v.getFirstMatch(/(ipod|iphone)/i,e);return{type:b.mobile,vendor:"Apple",model:t}}},{test:[/nexus\s*[0-6].*/i,/galaxy nexus/i],describe:()=>({type:b.mobile,vendor:"Nexus"})},{test:[/[^-]mobi/i],describe:()=>({type:b.mobile})},{test:e=>"blackberry"===e.getBrowserName(!0),describe:()=>({type:b.mobile,vendor:"BlackBerry"})},{test:e=>"bada"===e.getBrowserName(!0),describe:()=>({type:b.mobile})},{test:e=>"windows phone"===e.getBrowserName(),describe:()=>({type:b.mobile,vendor:"Microsoft"})},{test(e){const t=Number(String(e.getOSVersion()).split(".")[0]);return"android"===e.getOSName(!0)&&t>=3},describe:()=>({type:b.tablet})},{test:e=>"android"===e.getOSName(!0),describe:()=>({type:b.mobile})},{test:e=>"macos"===e.getOSName(!0),describe:()=>({type:b.desktop,vendor:"Apple"})},{test:e=>"windows"===e.getOSName(!0),describe:()=>({type:b.desktop})},{test:e=>"linux"===e.getOSName(!0),describe:()=>({type:b.desktop})},{test:e=>"playstation 4"===e.getOSName(!0),describe:()=>({type:b.tv})},{test:e=>"roku"===e.getOSName(!0),describe:()=>({type:b.tv})}],y=[{test:e=>"microsoft edge"===e.getBrowserName(!0),describe(e){if(/\sedg\//i.test(e))return{name:g.Blink};const t=v.getFirstMatch(/edge\/(\d+(\.?_?\d+)+)/i,e);return{name:g.EdgeHTML,version:t}}},{test:[/trident/i],describe(e){const t={name:g.Trident},r=v.getFirstMatch(/trident\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:e=>e.test(/presto/i),describe(e){const t={name:g.Presto},r=v.getFirstMatch(/presto\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test(e){const t=e.test(/gecko/i),r=e.test(/like gecko/i);return t&&!r},describe(e){const t={name:g.Gecko},r=v.getFirstMatch(/gecko\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/(apple)?webkit\/537\.36/i],describe:()=>({name:g.Blink})},{test:[/(apple)?webkit/i],describe(e){const t={name:g.WebKit},r=v.getFirstMatch(/webkit\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}}];class B{constructor(e,t=!1){if(null==e||""===e)throw new Error("UserAgent parameter can't be empty");this._ua=e,this.parsedResult={},!0!==t&&this.parse()}getUA(){return this._ua}test(e){return e.test(this._ua)}parseBrowser(){this.parsedResult.browser={};const e=v.find(k,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.browser=e.describe(this.getUA())),this.parsedResult.browser}getBrowser(){return this.parsedResult.browser?this.parsedResult.browser:this.parseBrowser()}getBrowserName(e){return e?String(this.getBrowser().name).toLowerCase()||"":this.getBrowser().name||""}getBrowserVersion(){return this.getBrowser().version}getOS(){return this.parsedResult.os?this.parsedResult.os:this.parseOS()}parseOS(){this.parsedResult.os={};const e=v.find(m,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.os=e.describe(this.getUA())),this.parsedResult.os}getOSName(e){const{name:t}=this.getOS();return e?String(t).toLowerCase()||"":t||""}getOSVersion(){return this.getOS().version}getPlatform(){return this.parsedResult.platform?this.parsedResult.platform:this.parsePlatform()}getPlatformType(e=!1){const{type:t}=this.getPlatform();return e?String(t).toLowerCase()||"":t||""}parsePlatform(){this.parsedResult.platform={};const e=v.find(h,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.platform=e.describe(this.getUA())),this.parsedResult.platform}getEngine(){return this.parsedResult.engine?this.parsedResult.engine:this.parseEngine()}getEngineName(e){return e?String(this.getEngine().name).toLowerCase()||"":this.getEngine().name||""}parseEngine(){this.parsedResult.engine={};const e=v.find(y,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.engine=e.describe(this.getUA())),this.parsedResult.engine}parse(){return this.parseBrowser(),this.parseOS(),this.parsePlatform(),this.parseEngine(),this}getResult(){return v.assign({},this.parsedResult)}satisfies(e){const t={};let r=0;const n={};let a=0;if(Object.keys(e).forEach((i=>{const u=e[i];"string"==typeof u?(n[i]=u,a+=1):"object"==typeof u&&(t[i]=u,r+=1)})),r>0){const e=Object.keys(t),r=v.find(e,(e=>this.isOS(e)));if(r){const e=this.satisfies(t[r]);if(void 0!==e)return e}const n=v.find(e,(e=>this.isPlatform(e)));if(n){const e=this.satisfies(t[n]);if(void 0!==e)return e}}if(a>0){const e=Object.keys(n),t=v.find(e,(e=>this.isBrowser(e,!0)));if(void 0!==t)return this.compareVersion(n[t])}}isBrowser(e,t=!1){const r=this.getBrowserName().toLowerCase();let n=e.toLowerCase();const a=v.getBrowserTypeByAlias(n);return t&&a&&(n=a.toLowerCase()),n===r}compareVersion(e){let t=[0],r=e,n=!1;const a=this.getBrowserVersion();if("string"==typeof a)return">"===e[0]||"<"===e[0]?(r=e.substr(1),"="===e[1]?(n=!0,r=e.substr(2)):t=[],">"===e[0]?t.push(1):t.push(-1)):"="===e[0]?r=e.substr(1):"~"===e[0]&&(n=!0,r=e.substr(1)),t.indexOf(v.compareVersions(a,r,n))>-1}isOS(e){return this.getOSName(!0)===String(e).toLowerCase()}isPlatform(e){return this.getPlatformType(!0)===String(e).toLowerCase()}isEngine(e){return this.getEngineName(!0)===String(e).toLowerCase()}is(e,t=!1){return this.isBrowser(e,t)||this.isOS(e)||this.isPlatform(e)}some(e=[]){return e.some((e=>this.is(e)))}} +/*! + * Bowser - a browser detector + * https://github.com/lancedikson/bowser + * MIT License | (c) Dustin Diaz 2012-2015 + * MIT License | (c) Denis Demchenko 2015-2019 + */class w{static getParser(e,t=!1){if("string"!=typeof e)throw new Error("UserAgent should be a string");return new B(e,t)}static parse(e){return new B(e).getResult()}static get BROWSER_MAP(){return f}static get ENGINE_MAP(){return g}static get OS_MAP(){return p}static get PLATFORMS_MAP(){return b}}var P={exports:{}},S={exports:{}};!function(e){function t(r){return e.exports=t="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},e.exports.__esModule=!0,e.exports.default=e.exports,t(r)}e.exports=t,e.exports.__esModule=!0,e.exports.default=e.exports}(S);var E=S.exports;!function(e){var t=E.default;function r(){e.exports=r=function(){return a},e.exports.__esModule=!0,e.exports.default=e.exports;var n,a={},i=Object.prototype,u=i.hasOwnProperty,o=Object.defineProperty||function(e,t,r){e[t]=r.value},s="function"==typeof Symbol?Symbol:{},c=s.iterator||"@@iterator",l=s.asyncIterator||"@@asyncIterator",_=s.toStringTag||"@@toStringTag";function d(e,t,r){return Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}),e[t]}try{d({},"")}catch(n){d=function(e,t,r){return e[t]=r}}function f(e,t,r,n){var a=t&&t.prototype instanceof m?t:m,i=Object.create(a.prototype),u=new O(n||[]);return o(i,"_invoke",{value:z(e,r,u)}),i}function b(e,t,r){try{return{type:"normal",arg:e.call(t,r)}}catch(e){return{type:"throw",arg:e}}}a.wrap=f;var p="suspendedStart",g="suspendedYield",v="executing",x="completed",k={};function m(){}function h(){}function y(){}var B={};d(B,c,(function(){return this}));var w=Object.getPrototypeOf,P=w&&w(w(G([])));P&&P!==i&&u.call(P,c)&&(B=P);var S=y.prototype=m.prototype=Object.create(B);function E(e){["next","throw","return"].forEach((function(t){d(e,t,(function(e){return this._invoke(t,e)}))}))}function U(e,r){function n(a,i,o,s){var c=b(e[a],e,i);if("throw"!==c.type){var l=c.arg,_=l.value;return _&&"object"==t(_)&&u.call(_,"__await")?r.resolve(_.__await).then((function(e){n("next",e,o,s)}),(function(e){n("throw",e,o,s)})):r.resolve(_).then((function(e){l.value=e,o(l)}),(function(e){return n("throw",e,o,s)}))}s(c.arg)}var a;o(this,"_invoke",{value:function(e,t){function i(){return new r((function(r,a){n(e,t,r,a)}))}return a=a?a.then(i,i):i()}})}function z(e,t,r){var a=p;return function(i,u){if(a===v)throw new Error("Generator is already running");if(a===x){if("throw"===i)throw u;return{value:n,done:!0}}for(r.method=i,r.arg=u;;){var o=r.delegate;if(o){var s=j(o,r);if(s){if(s===k)continue;return s}}if("next"===r.method)r.sent=r._sent=r.arg;else if("throw"===r.method){if(a===p)throw a=x,r.arg;r.dispatchException(r.arg)}else"return"===r.method&&r.abrupt("return",r.arg);a=v;var c=b(e,t,r);if("normal"===c.type){if(a=r.done?x:g,c.arg===k)continue;return{value:c.arg,done:r.done}}"throw"===c.type&&(a=x,r.method="throw",r.arg=c.arg)}}}function j(e,t){var r=t.method,a=e.iterator[r];if(a===n)return t.delegate=null,"throw"===r&&e.iterator.return&&(t.method="return",t.arg=n,j(e,t),"throw"===t.method)||"return"!==r&&(t.method="throw",t.arg=new TypeError("The iterator does not provide a '"+r+"' method")),k;var i=b(a,e.iterator,t.arg);if("throw"===i.type)return t.method="throw",t.arg=i.arg,t.delegate=null,k;var u=i.arg;return u?u.done?(t[e.resultName]=u.value,t.next=e.nextLoc,"return"!==t.method&&(t.method="next",t.arg=n),t.delegate=null,k):u:(t.method="throw",t.arg=new TypeError("iterator result is not an object"),t.delegate=null,k)}function M(e){var t={tryLoc:e[0]};1 in e&&(t.catchLoc=e[1]),2 in e&&(t.finallyLoc=e[2],t.afterLoc=e[3]),this.tryEntries.push(t)}function C(e){var t=e.completion||{};t.type="normal",delete t.arg,e.completion=t}function O(e){this.tryEntries=[{tryLoc:"root"}],e.forEach(M,this),this.reset(!0)}function G(e){if(e||""===e){var r=e[c];if(r)return r.call(e);if("function"==typeof e.next)return e;if(!isNaN(e.length)){var a=-1,i=function t(){for(;++a=0;--a){var i=this.tryEntries[a],o=i.completion;if("root"===i.tryLoc)return r("end");if(i.tryLoc<=this.prev){var s=u.call(i,"catchLoc"),c=u.call(i,"finallyLoc");if(s&&c){if(this.prev=0;--r){var n=this.tryEntries[r];if(n.tryLoc<=this.prev&&u.call(n,"finallyLoc")&&this.prev=0;--t){var r=this.tryEntries[t];if(r.finallyLoc===e)return this.complete(r.completion,r.afterLoc),C(r),k}},catch:function(e){for(var t=this.tryEntries.length-1;t>=0;--t){var r=this.tryEntries[t];if(r.tryLoc===e){var n=r.completion;if("throw"===n.type){var a=n.arg;C(r)}return a}}throw new Error("illegal catch attempt")},delegateYield:function(e,t,r){return this.delegate={iterator:G(e),resultName:t,nextLoc:r},"next"===this.method&&(this.arg=n),k}},a}e.exports=r,e.exports.__esModule=!0,e.exports.default=e.exports}(P);var U=(0,P.exports)();try{regeneratorRuntime=U}catch(e){"object"==typeof globalThis?globalThis.regeneratorRuntime=U:Function("r","regeneratorRuntime = r")(U)}function z(e){return z="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},z(e)}function j(e){var t=function(e,t){if("object"!==z(e)||null===e)return e;var r=e[Symbol.toPrimitive];if(void 0!==r){var n=r.call(e,t||"default");if("object"!==z(n))return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===t?String:Number)(e)}(e,"string");return"symbol"===z(t)?t:String(t)}function M(e,t){for(var r=0;r>>0:e}function O(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r=e.length?{done:!0}:{done:!1,value:e[n++]}},e:function(e){throw e},f:a}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var i,u=!0,o=!1;return{s:function(){r=r.call(e)},n:function(){var e=r.next();return u=e.done,e},e:function(e){o=!0,i=e},f:function(){try{u||null==r.return||r.return()}finally{if(o)throw i}}}}function R(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r=16&&(this.endComputePass(),this.flushCommandEncoder())}},{key:"endComputePass",value:function(){this._passEncoder&&(this._passEncoder.end(),this._passEncoder=null)}},{key:"getBuffer",value:function(e,t){var r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0,a=this.getBufferKey(e,t);if(this.bufferReusePool.has(a)){var i=this.bufferReusePool.get(a);if(i&&i.length>0)return i.pop()}return this.device.createBuffer({size:e*Uint8Array.BYTES_PER_ELEMENT,usage:t,mappedAtCreation:r,label:n})}},{key:"scheduleUniformBufferForRelease",value:function(e){this._uniformBuffersPendingRelease.push(e)}},{key:"releaseBuffer",value:function(e){(!(arguments.length>1&&void 0!==arguments[1])||arguments[1])&&(this.endComputePass(),this.commandEncoder.clearBuffer(e,0,e.size));var t=this.getBufferKey(e.size,e.usage);this.bufferReusePool.has(t)||this.bufferReusePool.set(t,[]),this.bufferReusePool.get(t).push(e)}},{key:"sync",value:(u=t(_.mark((function e(){var t,r,n,a,i,u,o,s,c,l,d=this;return _.wrap((function(e){for(;;)switch(e.prev=e.next){case 0:return this.flushCommandEncoder(),e.next=3,this.device.queue.onSubmittedWorkDone();case 3:t=L(this.bufferReusePool.keys());try{for(t.s();!(r=t.n()).done;)if(n=r.value,(a=this.bufferReusePool.get(n))&&a.length>0){i=L(a);try{for(i.s();!(u=i.n()).done;)null==(o=u.value)||o.destroy()}catch(e){i.e(e)}finally{i.f()}}}catch(e){t.e(e)}finally{t.f()}this.bufferReusePool.clear(),s=_.mark((function e(){var t,r,n,a,i,u;return _.wrap((function(e){for(;;)switch(e.prev=e.next){case 0:t=G(l[c],2),r=t[0],n=t[1],d.shaderTimes[r]||(d.shaderTimes[r]=[]),a=L(n),e.prev=3,u=_.mark((function e(){var t;return _.wrap((function(e){for(;;)switch(e.prev=e.next){case 0:(t=i.value).mapAsync(GPUMapMode.READ).then((function(){var e=new BigInt64Array(t.getMappedRange()),n=e[1]-e[0];t.unmap(),t.destroy(),d.shaderTimes[r].push(n)}));case 2:case"end":return e.stop()}}),e)})),a.s();case 6:if((i=a.n()).done){e.next=10;break}return e.delegateYield(u(),"t0",8);case 8:e.next=6;break;case 10:e.next=15;break;case 12:e.prev=12,e.t1=e.catch(3),a.e(e.t1);case 15:return e.prev=15,a.f(),e.finish(15);case 18:case"end":return e.stop()}}),e,null,[[3,12,15,18]])})),c=0,l=Object.entries(this.timestampBuffers);case 8:if(!(c args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n".concat(e?"\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3) \nvar y: array;\n":" \n@group(0) @binding(2)\nvar x: array;\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\nvar sdata: array, workgroup_size_x>;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n let tid = local_id.x;\n let m = workgroup_id.x;\n let block_size = workgroup_size_x;\n \n var power_vec: vec4;\n let x_start: u32 = args.x_offset + (m * args.dimension);\n let skip = tid * 4;\n let shift = (block_size * 4);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = x_start + j + skip; \n\n let x_vec = vec4(\n x[local_index],\n x[local_index + 1],\n x[local_index + 2],\n x[local_index + 3]);\n \n power_vec += x_vec * x_vec; \n } \n \n if (tid == 0 && args.remainder > 0) {\n var remainder_vec = vec4(0.0, 0.0, 0.0, 0.0);\n let x_idx = x_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) { \n remainder_vec[j] = x[x_idx + j];\n } \n power_vec += remainder_vec * remainder_vec;\n }\n \n sdata[tid] = power_vec;\n workgroupBarrier();\n\n ").concat("\n for (var s: u32 = workgroup_size_x / 2; s > 0; s >>= 1) {\n if tid < s {\n sdata[tid] += sdata[tid + s];\n }\n workgroupBarrier();\n }\n","\n \n let power = sdata[0].x + sdata[0].y + sdata[0].z + sdata[0].w;\n let norm: vec4 = vec4(1.0 / sqrt((power / f32(args.dimension)) + args.eps));\n \n let y_start: u32 = args.y_offset + (m * args.dimension);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = j + skip;\n let x_idx = x_start + local_index;\n let x_vec = vec4(\n x[x_idx],\n x[x_idx + 1],\n x[x_idx + 2],\n x[x_idx + 3]);\n \n let weight_vec = vec4(\n weight[local_index],\n weight[local_index + 1],\n weight[local_index + 2],\n weight[local_index + 3]);\n let y_vec = x_vec * norm * weight_vec;\n \n let y_idx = y_start + local_index;\n").concat(e?" \n y[y_idx] = y_vec.x;\n y[y_idx + 1] = y_vec.y;\n y[y_idx + 2] = y_vec.z;\n y[y_idx + 3] = y_vec.w;\n":" \n x[y_idx] = y_vec.x;\n x[y_idx + 1] = y_vec.y;\n x[y_idx + 2] = y_vec.z;\n x[y_idx + 3] = y_vec.w;\n"," \n }\n \n if (tid == 0 && args.remainder > 0) {\n let x_idx = x_start + args.remainder_start;\n let weight_idx = args.remainder_start; \n let y_idx = y_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) {\n").concat(e?" \n y[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n":" \n x[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n"," \n } \n }\n}\n\n").concat(I)},je=function(e,t){var r=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}}];t?(r.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}}),r.push({binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}})):r.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}});var n=e.createBindGroupLayout({label:"norm forward ".concat(t?"multi":"single"," buffer bind group layout"),entries:r}),a=e.createPipelineLayout({label:"norm forward ".concat(t?"multi":"single"," buffer pipeline layout"),bindGroupLayouts:[n]}),i=e.createShaderModule({label:"norm forward ".concat(t?"multi":"single"," buffer shader module"),code:ze(t)});return{computePipeline:e.createComputePipeline({label:"norm forward ".concat(t?"multi":"single"," buffer pipeline"),layout:a,compute:{module:i,entryPoint:q,constants:{workgroup_size_x:N}}})}},Me=a(a({},Ue,(function(e){return je(e,!1)})),Ee,(function(e){return je(e,!0)})),Ce="pv_picollm_norm_layer_forward_multi_buffer_shader",Oe="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar x: array;\n\n@group(0) @binding(4)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += x[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (x[x_start + j] - mean) * (x[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((x[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(I),Ge="pv_picollm_norm_layer_forward_single_buffer_shader",Te="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += y[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (y[x_start + j] - mean) * (y[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((y[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(I),Le=a(a({},Ge,(function(e){var t=e.createBindGroupLayout({label:"norm layer forward single buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"norm layer forward single buffer pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"norm layer forward single buffer shader module",code:Te});return{computePipeline:e.createComputePipeline({label:"norm layer forward single buffer pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Ce,(function(e){var t=e.createBindGroupLayout({label:"norm layer forward multi buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"norm layer forward multi buffer pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"norm layer forward multi buffer shader module",code:Oe});return{computePipeline:e.createComputePipeline({label:"norm layer forward multi buffer pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Re="pv_picollm_transformer_add_to_buffer_shader",Ae="\nstruct argsStruct {\n n: u32,\n x_offset: u32,\n buffer_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(I),Fe="pv_picollm_transformer_add_buffers_shader",Ne="\n\nstruct argsStruct {\n n: u32,\n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(I),Ye=a(a({},Re,(function(e){var t=e.createBindGroupLayout({label:"transformer add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"transformer add to buffer pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"transformer add to buffer shader module",code:Ae});return{computePipeline:e.createComputePipeline({label:"transformer add to buffer compute",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Fe,(function(e){var t=e.createBindGroupLayout({label:"transformer add buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"transformer add buffers pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"transformer add buffers shader module",code:Ne});return{computePipeline:e.createComputePipeline({label:"transformer add buffers pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),We="pv_picollm_weight_float_forward_shader",Ie="\n\nstruct argsStruct {\n nr: u32,\n nc: u32,\n w_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar w: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n if (local_id.x >= args.nr) {\n return;\n }\n let x_start: u32 = args.x_offset + (workgroup_id.x * args.nc);\n let y_idx: u32 = local_id.x + args.y_offset + (workgroup_id.x * args.nr);\n \n let w_start: u32 = args.w_offset + (local_id.x * args.nc);\n for (var j = 0u; j < args.nc; j++) {\n y[y_idx] += w[w_start + j] * x[x_start + j]; \n }\n}\n\n".concat(I),qe=a({},We,(function(e){var t=e.createBindGroupLayout({label:"weight float forward bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight float forward pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight float forward shader module",code:Ie});return{computePipeline:e.createComputePipeline({label:"weight float forward pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Ve=16,Ke="\n\nconst exponents: array = array(\n 2.9103830456733704e-11, \n 5.820766091346741e-11, \n 1.1641532182693481e-10, \n 2.3283064365386963e-10,\n 4.656612873077393e-10, \n 9.313225746154785e-10, \n 1.862645149230957e-09, \n 3.725290298461914e-09,\n 7.450580596923828e-09, \n 1.4901161193847656e-08, \n 2.9802322387695312e-08, \n 5.960464477539063e-08,\n 1.1920928955078125e-07, \n 2.384185791015625e-07, \n 4.76837158203125e-07, \n 9.5367431640625e-07,\n 1.9073486328125e-06, \n 3.814697265625e-06, \n 7.62939453125e-06, \n 1.52587890625e-05, \n 3.0517578125e-05,\n 6.103515625e-05, \n 0.0001220703125, \n 0.000244140625, \n 0.00048828125, \n 0.0009765625, \n 0.001953125, \n 0.00390625,\n 0.0078125, \n 0.015625, \n 0.03125, \n 0.0625);\n\nfn from_fp510(x: u32) -> f32 {\n let exponent = f32(exponents[extractBits(x, 10u, 5u)]); \n let fractional = f32(extractBits(x, 0u, 10u)); \n let abs = exponent * fractional;\n return abs * (1.0 - (2.0 * f32(extractBits(x, 15u, 1u))));\n}\n",De={3:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_3(packed_offset: u32) {\n let val_0 = blocks[packed_offset]; \n unpacked[0] = extractBits(val_0, 0u, 3u);\n unpacked[1] = extractBits(val_0, 3u, 3u);\n unpacked[2] = extractBits(val_0, 6u, 3u);\n unpacked[3] = extractBits(val_0, 9u, 3u);\n unpacked[4] = extractBits(val_0, 12u, 3u);\n unpacked[5] = extractBits(val_0, 15u, 3u);\n unpacked[6] = extractBits(val_0, 18u, 3u);\n unpacked[7] = extractBits(val_0, 21u, 3u);\n unpacked[8] = extractBits(val_0, 24u, 3u);\n unpacked[9] = extractBits(val_0, 27u, 3u);\n unpacked[10] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_1, 0u, 1u), 2u, 1u);\n unpacked[11] = extractBits(val_1, 1u, 3u);\n unpacked[12] = extractBits(val_1, 4u, 3u);\n unpacked[13] = extractBits(val_1, 7u, 3u);\n unpacked[14] = extractBits(val_1, 10u, 3u);\n unpacked[15] = extractBits(val_1, 13u, 3u);\n unpacked[16] = extractBits(val_1, 16u, 3u);\n unpacked[17] = extractBits(val_1, 19u, 3u);\n unpacked[18] = extractBits(val_1, 22u, 3u);\n unpacked[19] = extractBits(val_1, 25u, 3u);\n unpacked[20] = extractBits(val_1, 28u, 3u);\n unpacked[21] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_2, 0u, 2u), 1u, 2u); \n unpacked[22] = extractBits(val_2, 2u, 3u);\n unpacked[23] = extractBits(val_2, 5u, 3u);\n unpacked[24] = extractBits(val_2, 8u, 3u);\n unpacked[25] = extractBits(val_2, 11u, 3u);\n unpacked[26] = extractBits(val_2, 14u, 3u);\n unpacked[27] = extractBits(val_2, 17u, 3u);\n unpacked[28] = extractBits(val_2, 20u, 3u);\n unpacked[29] = extractBits(val_2, 23u, 3u);\n unpacked[30] = extractBits(val_2, 26u, 3u);\n unpacked[31] = extractBits(val_2, 29u, 3u);\n \n let val_3 = blocks[packed_offset + 3]; \n unpacked[32] = extractBits(val_3, 0u, 3u);\n unpacked[33] = extractBits(val_3, 3u, 3u);\n unpacked[34] = extractBits(val_3, 6u, 3u);\n unpacked[35] = extractBits(val_3, 9u, 3u);\n unpacked[36] = extractBits(val_3, 12u, 3u);\n unpacked[37] = extractBits(val_3, 15u, 3u);\n unpacked[38] = extractBits(val_3, 18u, 3u);\n unpacked[39] = extractBits(val_3, 21u, 3u);\n unpacked[40] = extractBits(val_3, 24u, 3u);\n unpacked[41] = extractBits(val_3, 27u, 3u);\n unpacked[42] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_4, 0u, 1u), 2u, 1u);\n unpacked[43] = extractBits(val_4, 1u, 3u);\n unpacked[44] = extractBits(val_4, 4u, 3u);\n unpacked[45] = extractBits(val_4, 7u, 3u);\n unpacked[46] = extractBits(val_4, 10u, 3u);\n unpacked[47] = extractBits(val_4, 13u, 3u);\n unpacked[48] = extractBits(val_4, 16u, 3u);\n unpacked[49] = extractBits(val_4, 19u, 3u);\n unpacked[50] = extractBits(val_4, 22u, 3u);\n unpacked[51] = extractBits(val_4, 25u, 3u);\n unpacked[52] = extractBits(val_4, 28u, 3u);\n unpacked[53] = extractBits(val_4, 31u, 1u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_5, 0u, 2u), 1u, 2u); \n unpacked[54] = extractBits(val_5, 2u, 3u);\n unpacked[55] = extractBits(val_5, 5u, 3u);\n unpacked[56] = extractBits(val_5, 8u, 3u);\n unpacked[57] = extractBits(val_5, 11u, 3u);\n unpacked[58] = extractBits(val_5, 14u, 3u);\n unpacked[59] = extractBits(val_5, 17u, 3u);\n unpacked[60] = extractBits(val_5, 20u, 3u);\n unpacked[61] = extractBits(val_5, 23u, 3u);\n unpacked[62] = extractBits(val_5, 26u, 3u);\n unpacked[63] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[64] = extractBits(val_6, 0u, 3u);\n unpacked[65] = extractBits(val_6, 3u, 3u);\n unpacked[66] = extractBits(val_6, 6u, 3u);\n unpacked[67] = extractBits(val_6, 9u, 3u);\n unpacked[68] = extractBits(val_6, 12u, 3u);\n unpacked[69] = extractBits(val_6, 15u, 3u);\n unpacked[70] = extractBits(val_6, 18u, 3u);\n unpacked[71] = extractBits(val_6, 21u, 3u);\n unpacked[72] = extractBits(val_6, 24u, 3u);\n unpacked[73] = extractBits(val_6, 27u, 3u);\n unpacked[74] = extractBits(val_6, 30u, 2u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_7, 0u, 1u), 2u, 1u);\n unpacked[75] = extractBits(val_7, 1u, 3u);\n unpacked[76] = extractBits(val_7, 4u, 3u);\n unpacked[77] = extractBits(val_7, 7u, 3u);\n unpacked[78] = extractBits(val_7, 10u, 3u);\n unpacked[79] = extractBits(val_7, 13u, 3u);\n unpacked[80] = extractBits(val_7, 16u, 3u);\n unpacked[81] = extractBits(val_7, 19u, 3u);\n unpacked[82] = extractBits(val_7, 22u, 3u);\n unpacked[83] = extractBits(val_7, 25u, 3u);\n unpacked[84] = extractBits(val_7, 28u, 3u);\n unpacked[85] = extractBits(val_7, 31u, 1u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_8, 0u, 2u), 1u, 2u); \n unpacked[86] = extractBits(val_8, 2u, 3u);\n unpacked[87] = extractBits(val_8, 5u, 3u);\n unpacked[88] = extractBits(val_8, 8u, 3u);\n unpacked[89] = extractBits(val_8, 11u, 3u);\n unpacked[90] = extractBits(val_8, 14u, 3u);\n unpacked[91] = extractBits(val_8, 17u, 3u);\n unpacked[92] = extractBits(val_8, 20u, 3u);\n unpacked[93] = extractBits(val_8, 23u, 3u);\n unpacked[94] = extractBits(val_8, 26u, 3u);\n unpacked[95] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[96] = extractBits(val_9, 0u, 3u);\n unpacked[97] = extractBits(val_9, 3u, 3u);\n unpacked[98] = extractBits(val_9, 6u, 3u);\n unpacked[99] = extractBits(val_9, 9u, 3u);\n unpacked[100] = extractBits(val_9, 12u, 3u);\n unpacked[101] = extractBits(val_9, 15u, 3u);\n unpacked[102] = extractBits(val_9, 18u, 3u);\n unpacked[103] = extractBits(val_9, 21u, 3u);\n unpacked[104] = extractBits(val_9, 24u, 3u);\n unpacked[105] = extractBits(val_9, 27u, 3u);\n unpacked[106] = extractBits(val_9, 30u, 2u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_10, 0u, 1u), 2u, 1u);\n unpacked[107] = extractBits(val_10, 1u, 3u);\n unpacked[108] = extractBits(val_10, 4u, 3u);\n unpacked[109] = extractBits(val_10, 7u, 3u);\n unpacked[110] = extractBits(val_10, 10u, 3u);\n unpacked[111] = extractBits(val_10, 13u, 3u);\n unpacked[112] = extractBits(val_10, 16u, 3u);\n unpacked[113] = extractBits(val_10, 19u, 3u);\n unpacked[114] = extractBits(val_10, 22u, 3u);\n unpacked[115] = extractBits(val_10, 25u, 3u);\n unpacked[116] = extractBits(val_10, 28u, 3u);\n unpacked[117] = extractBits(val_10, 31u, 1u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_11, 0u, 2u), 1u, 2u); \n unpacked[118] = extractBits(val_11, 2u, 3u);\n unpacked[119] = extractBits(val_11, 5u, 3u);\n unpacked[120] = extractBits(val_11, 8u, 3u);\n unpacked[121] = extractBits(val_11, 11u, 3u);\n unpacked[122] = extractBits(val_11, 14u, 3u);\n unpacked[123] = extractBits(val_11, 17u, 3u);\n unpacked[124] = extractBits(val_11, 20u, 3u);\n unpacked[125] = extractBits(val_11, 23u, 3u);\n unpacked[126] = extractBits(val_11, 26u, 3u);\n unpacked[127] = extractBits(val_11, 29u, 3u);\n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 12u); \n unpack_block_128_bit_depth_3(blocks_start); \n \n let b01: u32 = blocks_start;\n let b2: u32 = blocks_start + 8u;\n \n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n\n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), (r * 16u) % 32u, 16u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), (r * 8u) % 32u, 8u); \n }\n \n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n \n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), ((r * 16u) % 32u) + (2u * c), 2u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), ((r * 8u) % 32u) + c, 1u); \n }\n }\n}\n\n").concat(I),5:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_5(packed_offset: u32) { \n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 5u);\n unpacked[1] = extractBits(val_0, 5u, 5u);\n unpacked[2] = extractBits(val_0, 10u, 5u);\n unpacked[3] = extractBits(val_0, 15u, 5u);\n unpacked[4] = extractBits(val_0, 20u, 5u);\n unpacked[5] = extractBits(val_0, 25u, 5u);\n unpacked[6] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[6] = insertBits(unpacked[6], extractBits(val_1, 0u, 3u), 2u, 3u); \n unpacked[7] = extractBits(val_1, 3u, 5u);\n unpacked[8] = extractBits(val_1, 8u, 5u);\n unpacked[9] = extractBits(val_1, 13u, 5u);\n unpacked[10] = extractBits(val_1, 18u, 5u);\n unpacked[11] = extractBits(val_1, 23u, 5u);\n unpacked[12] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[12] = insertBits(unpacked[12], extractBits(val_2, 0u, 1u), 4u, 1u);\n unpacked[13] = extractBits(val_2, 1u, 5u);\n unpacked[14] = extractBits(val_2, 6u, 5u);\n unpacked[15] = extractBits(val_2, 11u, 5u);\n unpacked[16] = extractBits(val_2, 16u, 5u);\n unpacked[17] = extractBits(val_2, 21u, 5u);\n unpacked[18] = extractBits(val_2, 26u, 5u);\n unpacked[19] = extractBits(val_2, 31u, 1u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[19] = insertBits(unpacked[19], extractBits(val_3, 0u, 4u), 1u, 4u);\n unpacked[20] = extractBits(val_3, 4u, 5u);\n unpacked[21] = extractBits(val_3, 9u, 5u);\n unpacked[22] = extractBits(val_3, 14u, 5u);\n unpacked[23] = extractBits(val_3, 19u, 5u);\n unpacked[24] = extractBits(val_3, 24u, 5u);\n unpacked[25] = extractBits(val_3, 29u, 3u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[25] = insertBits(unpacked[25], extractBits(val_4, 0u, 2u), 3u, 2u);\n unpacked[26] = extractBits(val_4, 2u, 5u);\n unpacked[27] = extractBits(val_4, 7u, 5u);\n unpacked[28] = extractBits(val_4, 12u, 5u);\n unpacked[29] = extractBits(val_4, 17u, 5u);\n unpacked[30] = extractBits(val_4, 22u, 5u);\n unpacked[31] = extractBits(val_4, 27u, 5u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[32] = extractBits(val_5, 0u, 5u);\n unpacked[33] = extractBits(val_5, 5u, 5u);\n unpacked[34] = extractBits(val_5, 10u, 5u);\n unpacked[35] = extractBits(val_5, 15u, 5u);\n unpacked[36] = extractBits(val_5, 20u, 5u);\n unpacked[37] = extractBits(val_5, 25u, 5u);\n unpacked[38] = extractBits(val_5, 30u, 2u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[38] = insertBits(unpacked[38], extractBits(val_6, 0u, 3u), 2u, 3u);\n unpacked[39] = extractBits(val_6, 3u, 5u);\n unpacked[40] = extractBits(val_6, 8u, 5u);\n unpacked[41] = extractBits(val_6, 13u, 5u);\n unpacked[42] = extractBits(val_6, 18u, 5u);\n unpacked[43] = extractBits(val_6, 23u, 5u);\n unpacked[44] = extractBits(val_6, 28u, 4u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[44] = insertBits(unpacked[44], extractBits(val_7, 0u, 1u), 4u, 1u);\n unpacked[45] = extractBits(val_7, 1u, 5u);\n unpacked[46] = extractBits(val_7, 6u, 5u);\n unpacked[47] = extractBits(val_7, 11u, 5u);\n unpacked[48] = extractBits(val_7, 16u, 5u);\n unpacked[49] = extractBits(val_7, 21u, 5u);\n unpacked[50] = extractBits(val_7, 26u, 5u);\n unpacked[51] = extractBits(val_7, 31u, 1u);\n\n let val_8 = blocks[packed_offset + 8];\n unpacked[51] = insertBits(unpacked[51], extractBits(val_8, 0u, 4u), 1u, 4u);\n unpacked[52] = extractBits(val_8, 4u, 5u);\n unpacked[53] = extractBits(val_8, 9u, 5u);\n unpacked[54] = extractBits(val_8, 14u, 5u);\n unpacked[55] = extractBits(val_8, 19u, 5u);\n unpacked[56] = extractBits(val_8, 24u, 5u);\n unpacked[57] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[57] = insertBits(unpacked[57], extractBits(val_9, 0u, 2u), 3u, 2u);\n unpacked[58] = extractBits(val_9, 2u, 5u);\n unpacked[59] = extractBits(val_9, 7u, 5u);\n unpacked[60] = extractBits(val_9, 12u, 5u);\n unpacked[61] = extractBits(val_9, 17u, 5u);\n unpacked[62] = extractBits(val_9, 22u, 5u);\n unpacked[63] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[64] = extractBits(val_10, 0u, 5u);\n unpacked[65] = extractBits(val_10, 5u, 5u);\n unpacked[66] = extractBits(val_10, 10u, 5u);\n unpacked[67] = extractBits(val_10, 15u, 5u);\n unpacked[68] = extractBits(val_10, 20u, 5u);\n unpacked[69] = extractBits(val_10, 25u, 5u);\n unpacked[70] = extractBits(val_10, 30u, 2u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[70] = insertBits(unpacked[70], extractBits(val_11, 0u, 3u), 2u, 3u);\n unpacked[71] = extractBits(val_11, 3u, 5u);\n unpacked[72] = extractBits(val_11, 8u, 5u);\n unpacked[73] = extractBits(val_11, 13u, 5u);\n unpacked[74] = extractBits(val_11, 18u, 5u);\n unpacked[75] = extractBits(val_11, 23u, 5u);\n unpacked[76] = extractBits(val_11, 28u, 4u);\n\n let val_12 = blocks[packed_offset + 12];\n unpacked[76] = insertBits(unpacked[76], extractBits(val_12, 0u, 1u), 4u, 1u);\n unpacked[77] = extractBits(val_12, 1u, 5u);\n unpacked[78] = extractBits(val_12, 6u, 5u);\n unpacked[79] = extractBits(val_12, 11u, 5u);\n unpacked[80] = extractBits(val_12, 16u, 5u);\n unpacked[81] = extractBits(val_12, 21u, 5u);\n unpacked[82] = extractBits(val_12, 26u, 5u);\n unpacked[83] = extractBits(val_12, 31u, 1u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[83] = insertBits(unpacked[83], extractBits(val_13, 0u, 4u), 1u, 4u);\n unpacked[84] = extractBits(val_13, 4u, 5u);\n unpacked[85] = extractBits(val_13, 9u, 5u);\n unpacked[86] = extractBits(val_13, 14u, 5u);\n unpacked[87] = extractBits(val_13, 19u, 5u);\n unpacked[88] = extractBits(val_13, 24u, 5u);\n unpacked[89] = extractBits(val_13, 29u, 3u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[89] = insertBits(unpacked[89], extractBits(val_14, 0u, 2u), 3u, 2u);\n unpacked[90] = extractBits(val_14, 2u, 5u);\n unpacked[91] = extractBits(val_14, 7u, 5u);\n unpacked[92] = extractBits(val_14, 12u, 5u);\n unpacked[93] = extractBits(val_14, 17u, 5u);\n unpacked[94] = extractBits(val_14, 22u, 5u);\n unpacked[95] = extractBits(val_14, 27u, 5u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[96] = extractBits(val_15, 0u, 5u);\n unpacked[97] = extractBits(val_15, 5u, 5u);\n unpacked[98] = extractBits(val_15, 10u, 5u);\n unpacked[99] = extractBits(val_15, 15u, 5u);\n unpacked[100] = extractBits(val_15, 20u, 5u);\n unpacked[101] = extractBits(val_15, 25u, 5u);\n unpacked[102] = extractBits(val_15, 30u, 2u);\n\n let val_16 = blocks[packed_offset + 16];\n unpacked[102] = insertBits(unpacked[102], extractBits(val_16, 0u, 3u), 2u, 3u);\n unpacked[103] = extractBits(val_16, 3u, 5u);\n unpacked[104] = extractBits(val_16, 8u, 5u);\n unpacked[105] = extractBits(val_16, 13u, 5u);\n unpacked[106] = extractBits(val_16, 18u, 5u);\n unpacked[107] = extractBits(val_16, 23u, 5u);\n unpacked[108] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[108] = insertBits(unpacked[108], extractBits(val_17, 0u, 1u), 4u, 1u);\n unpacked[109] = extractBits(val_17, 1u, 5u);\n unpacked[110] = extractBits(val_17, 6u, 5u);\n unpacked[111] = extractBits(val_17, 11u, 5u);\n unpacked[112] = extractBits(val_17, 16u, 5u);\n unpacked[113] = extractBits(val_17, 21u, 5u);\n unpacked[114] = extractBits(val_17, 26u, 5u);\n unpacked[115] = extractBits(val_17, 31u, 1u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[115] = insertBits(unpacked[115], extractBits(val_18, 0u, 4u), 1u, 4u);\n unpacked[116] = extractBits(val_18, 4u, 5u);\n unpacked[117] = extractBits(val_18, 9u, 5u);\n unpacked[118] = extractBits(val_18, 14u, 5u);\n unpacked[119] = extractBits(val_18, 19u, 5u);\n unpacked[120] = extractBits(val_18, 24u, 5u);\n unpacked[121] = extractBits(val_18, 29u, 3u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[121] = insertBits(unpacked[121], extractBits(val_19, 0u, 2u), 3u, 2u);\n unpacked[122] = extractBits(val_19, 2u, 5u);\n unpacked[123] = extractBits(val_19, 7u, 5u);\n unpacked[124] = extractBits(val_19, 12u, 5u);\n unpacked[125] = extractBits(val_19, 17u, 5u);\n unpacked[126] = extractBits(val_19, 22u, 5u);\n unpacked[127] = extractBits(val_19, 27u, 5u);\n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n\n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 20u);\n unpack_block_128_bit_depth_5(blocks_start); \n \n let b03: u32 = blocks_start;\n let b4: u32 = blocks_start + 16u;\n\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), (r * 8u) % 32u, 8u);\n }\n \n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n } \n}\n\n").concat(I),6:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_6(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 6u);\n unpacked[1] = extractBits(val_0, 6u, 6u);\n unpacked[2] = extractBits(val_0, 12u, 6u);\n unpacked[3] = extractBits(val_0, 18u, 6u);\n unpacked[4] = extractBits(val_0, 24u, 6u);\n unpacked[5] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[5] = insertBits(unpacked[5], extractBits(val_1, 0u, 4u), 2u, 4u);\n unpacked[6] = extractBits(val_1, 4u, 6u);\n unpacked[7] = extractBits(val_1, 10u, 6u);\n unpacked[8] = extractBits(val_1, 16u, 6u);\n unpacked[9] = extractBits(val_1, 22u, 6u);\n unpacked[10] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_2, 0u, 2u), 4u, 2u);\n unpacked[11] = extractBits(val_2, 2u, 6u);\n unpacked[12] = extractBits(val_2, 8u, 6u);\n unpacked[13] = extractBits(val_2, 14u, 6u);\n unpacked[14] = extractBits(val_2, 20u, 6u);\n unpacked[15] = extractBits(val_2, 26u, 6u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[16] = extractBits(val_3, 0u, 6u);\n unpacked[17] = extractBits(val_3, 6u, 6u);\n unpacked[18] = extractBits(val_3, 12u, 6u);\n unpacked[19] = extractBits(val_3, 18u, 6u);\n unpacked[20] = extractBits(val_3, 24u, 6u);\n unpacked[21] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_4, 0u, 4u), 2u, 4u);\n unpacked[22] = extractBits(val_4, 4u, 6u);\n unpacked[23] = extractBits(val_4, 10u, 6u);\n unpacked[24] = extractBits(val_4, 16u, 6u);\n unpacked[25] = extractBits(val_4, 22u, 6u);\n unpacked[26] = extractBits(val_4, 28u, 4u);\n\n let val_5 = blocks[packed_offset + 5];\n unpacked[26] = insertBits(unpacked[26], extractBits(val_5, 0u, 2u), 4u, 2u);\n unpacked[27] = extractBits(val_5, 2u, 6u);\n unpacked[28] = extractBits(val_5, 8u, 6u);\n unpacked[29] = extractBits(val_5, 14u, 6u);\n unpacked[30] = extractBits(val_5, 20u, 6u);\n unpacked[31] = extractBits(val_5, 26u, 6u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[32] = extractBits(val_6, 0u, 6u);\n unpacked[33] = extractBits(val_6, 6u, 6u);\n unpacked[34] = extractBits(val_6, 12u, 6u);\n unpacked[35] = extractBits(val_6, 18u, 6u);\n unpacked[36] = extractBits(val_6, 24u, 6u);\n unpacked[37] = extractBits(val_6, 30u, 2u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[37] = insertBits(unpacked[37], extractBits(val_7, 0u, 4u), 2u, 4u);\n unpacked[38] = extractBits(val_7, 4u, 6u);\n unpacked[39] = extractBits(val_7, 10u, 6u);\n unpacked[40] = extractBits(val_7, 16u, 6u);\n unpacked[41] = extractBits(val_7, 22u, 6u);\n unpacked[42] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_8, 0u, 2u), 4u, 2u);\n unpacked[43] = extractBits(val_8, 2u, 6u);\n unpacked[44] = extractBits(val_8, 8u, 6u);\n unpacked[45] = extractBits(val_8, 14u, 6u);\n unpacked[46] = extractBits(val_8, 20u, 6u);\n unpacked[47] = extractBits(val_8, 26u, 6u);\n\n let val_9 = blocks[packed_offset + 9];\n unpacked[48] = extractBits(val_9, 0u, 6u);\n unpacked[49] = extractBits(val_9, 6u, 6u);\n unpacked[50] = extractBits(val_9, 12u, 6u);\n unpacked[51] = extractBits(val_9, 18u, 6u);\n unpacked[52] = extractBits(val_9, 24u, 6u);\n unpacked[53] = extractBits(val_9, 30u, 2u);\n\n let val_10 = blocks[packed_offset + 10];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_10, 0u, 4u), 2u, 4u);\n unpacked[54] = extractBits(val_10, 4u, 6u);\n unpacked[55] = extractBits(val_10, 10u, 6u);\n unpacked[56] = extractBits(val_10, 16u, 6u);\n unpacked[57] = extractBits(val_10, 22u, 6u);\n unpacked[58] = extractBits(val_10, 28u, 4u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[58] = insertBits(unpacked[58], extractBits(val_11, 0u, 2u), 4u, 2u);\n unpacked[59] = extractBits(val_11, 2u, 6u);\n unpacked[60] = extractBits(val_11, 8u, 6u);\n unpacked[61] = extractBits(val_11, 14u, 6u);\n unpacked[62] = extractBits(val_11, 20u, 6u);\n unpacked[63] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[64] = extractBits(val_12, 0u, 6u);\n unpacked[65] = extractBits(val_12, 6u, 6u);\n unpacked[66] = extractBits(val_12, 12u, 6u);\n unpacked[67] = extractBits(val_12, 18u, 6u);\n unpacked[68] = extractBits(val_12, 24u, 6u);\n unpacked[69] = extractBits(val_12, 30u, 2u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[69] = insertBits(unpacked[69], extractBits(val_13, 0u, 4u), 2u, 4u);\n unpacked[70] = extractBits(val_13, 4u, 6u);\n unpacked[71] = extractBits(val_13, 10u, 6u);\n unpacked[72] = extractBits(val_13, 16u, 6u);\n unpacked[73] = extractBits(val_13, 22u, 6u);\n unpacked[74] = extractBits(val_13, 28u, 4u);\n\n let val_14 = blocks[packed_offset + 14];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_14, 0u, 2u), 4u, 2u);\n unpacked[75] = extractBits(val_14, 2u, 6u);\n unpacked[76] = extractBits(val_14, 8u, 6u);\n unpacked[77] = extractBits(val_14, 14u, 6u);\n unpacked[78] = extractBits(val_14, 20u, 6u);\n unpacked[79] = extractBits(val_14, 26u, 6u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[80] = extractBits(val_15, 0u, 6u);\n unpacked[81] = extractBits(val_15, 6u, 6u);\n unpacked[82] = extractBits(val_15, 12u, 6u);\n unpacked[83] = extractBits(val_15, 18u, 6u);\n unpacked[84] = extractBits(val_15, 24u, 6u);\n unpacked[85] = extractBits(val_15, 30u, 2u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_16, 0u, 4u), 2u, 4u);\n unpacked[86] = extractBits(val_16, 4u, 6u);\n unpacked[87] = extractBits(val_16, 10u, 6u);\n unpacked[88] = extractBits(val_16, 16u, 6u);\n unpacked[89] = extractBits(val_16, 22u, 6u);\n unpacked[90] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[90] = insertBits(unpacked[90], extractBits(val_17, 0u, 2u), 4u, 2u);\n unpacked[91] = extractBits(val_17, 2u, 6u);\n unpacked[92] = extractBits(val_17, 8u, 6u);\n unpacked[93] = extractBits(val_17, 14u, 6u);\n unpacked[94] = extractBits(val_17, 20u, 6u);\n unpacked[95] = extractBits(val_17, 26u, 6u);\n\n let val_18 = blocks[packed_offset + 18];\n unpacked[96] = extractBits(val_18, 0u, 6u);\n unpacked[97] = extractBits(val_18, 6u, 6u);\n unpacked[98] = extractBits(val_18, 12u, 6u);\n unpacked[99] = extractBits(val_18, 18u, 6u);\n unpacked[100] = extractBits(val_18, 24u, 6u);\n unpacked[101] = extractBits(val_18, 30u, 2u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[101] = insertBits(unpacked[101], extractBits(val_19, 0u, 4u), 2u, 4u);\n unpacked[102] = extractBits(val_19, 4u, 6u);\n unpacked[103] = extractBits(val_19, 10u, 6u);\n unpacked[104] = extractBits(val_19, 16u, 6u);\n unpacked[105] = extractBits(val_19, 22u, 6u);\n unpacked[106] = extractBits(val_19, 28u, 4u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_20, 0u, 2u), 4u, 2u);\n unpacked[107] = extractBits(val_20, 2u, 6u);\n unpacked[108] = extractBits(val_20, 8u, 6u);\n unpacked[109] = extractBits(val_20, 14u, 6u);\n unpacked[110] = extractBits(val_20, 20u, 6u);\n unpacked[111] = extractBits(val_20, 26u, 6u);\n\n let val_21 = blocks[packed_offset + 21];\n unpacked[112] = extractBits(val_21, 0u, 6u);\n unpacked[113] = extractBits(val_21, 6u, 6u);\n unpacked[114] = extractBits(val_21, 12u, 6u);\n unpacked[115] = extractBits(val_21, 18u, 6u);\n unpacked[116] = extractBits(val_21, 24u, 6u);\n unpacked[117] = extractBits(val_21, 30u, 2u);\n\n let val_22 = blocks[packed_offset + 22];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_22, 0u, 4u), 2u, 4u);\n unpacked[118] = extractBits(val_22, 4u, 6u);\n unpacked[119] = extractBits(val_22, 10u, 6u);\n unpacked[120] = extractBits(val_22, 16u, 6u);\n unpacked[121] = extractBits(val_22, 22u, 6u);\n unpacked[122] = extractBits(val_22, 28u, 4u);\n\n let val_23 = blocks[packed_offset + 23];\n unpacked[122] = insertBits(unpacked[122], extractBits(val_23, 0u, 2u), 4u, 2u);\n unpacked[123] = extractBits(val_23, 2u, 6u);\n unpacked[124] = extractBits(val_23, 8u, 6u);\n unpacked[125] = extractBits(val_23, 14u, 6u);\n unpacked[126] = extractBits(val_23, 20u, 6u);\n unpacked[127] = extractBits(val_23, 26u, 6u);\n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 24u);\n unpack_block_128_bit_depth_6(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u; \n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n }\n\n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n }\n }\n}\n\n").concat(I),7:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_7(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 7u);\n unpacked[1] = extractBits(val_0, 7u, 7u);\n unpacked[2] = extractBits(val_0, 14u, 7u);\n unpacked[3] = extractBits(val_0, 21u, 7u);\n unpacked[4] = extractBits(val_0, 28u, 4u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[4] = insertBits(unpacked[4], extractBits(val_1, 0u, 3u), 4u, 3u);\n unpacked[5] = extractBits(val_1, 3u, 7u);\n unpacked[6] = extractBits(val_1, 10u, 7u);\n unpacked[7] = extractBits(val_1, 17u, 7u);\n unpacked[8] = extractBits(val_1, 24u, 7u);\n unpacked[9] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[9] = insertBits(unpacked[9], extractBits(val_2, 0u, 6u), 1u, 6u);\n unpacked[10] = extractBits(val_2, 6u, 7u);\n unpacked[11] = extractBits(val_2, 13u, 7u);\n unpacked[12] = extractBits(val_2, 20u, 7u);\n unpacked[13] = extractBits(val_2, 27u, 5u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[13] = insertBits(unpacked[13], extractBits(val_3, 0u, 2u), 5u, 2u);\n unpacked[14] = extractBits(val_3, 2u, 7u);\n unpacked[15] = extractBits(val_3, 9u, 7u);\n unpacked[16] = extractBits(val_3, 16u, 7u);\n unpacked[17] = extractBits(val_3, 23u, 7u);\n unpacked[18] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[18] = insertBits(unpacked[18], extractBits(val_4, 0u, 5u), 2u, 5u);\n unpacked[19] = extractBits(val_4, 5u, 7u);\n unpacked[20] = extractBits(val_4, 12u, 7u);\n unpacked[21] = extractBits(val_4, 19u, 7u);\n unpacked[22] = extractBits(val_4, 26u, 6u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[22] = insertBits(unpacked[22], extractBits(val_5, 0u, 1u), 6u, 1u);\n unpacked[23] = extractBits(val_5, 1u, 7u);\n unpacked[24] = extractBits(val_5, 8u, 7u);\n unpacked[25] = extractBits(val_5, 15u, 7u);\n unpacked[26] = extractBits(val_5, 22u, 7u);\n unpacked[27] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[27] = insertBits(unpacked[27], extractBits(val_6, 0u, 4u), 3u, 4u);\n unpacked[28] = extractBits(val_6, 4u, 7u);\n unpacked[29] = extractBits(val_6, 11u, 7u);\n unpacked[30] = extractBits(val_6, 18u, 7u);\n unpacked[31] = extractBits(val_6, 25u, 7u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[32] = extractBits(val_7, 0u, 7u);\n unpacked[33] = extractBits(val_7, 7u, 7u);\n unpacked[34] = extractBits(val_7, 14u, 7u);\n unpacked[35] = extractBits(val_7, 21u, 7u);\n unpacked[36] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[36] = insertBits(unpacked[36], extractBits(val_8, 0u, 3u), 4u, 3u);\n unpacked[37] = extractBits(val_8, 3u, 7u);\n unpacked[38] = extractBits(val_8, 10u, 7u);\n unpacked[39] = extractBits(val_8, 17u, 7u);\n unpacked[40] = extractBits(val_8, 24u, 7u);\n unpacked[41] = extractBits(val_8, 31u, 1u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[41] = insertBits(unpacked[41], extractBits(val_9, 0u, 6u), 1u, 6u);\n unpacked[42] = extractBits(val_9, 6u, 7u);\n unpacked[43] = extractBits(val_9, 13u, 7u);\n unpacked[44] = extractBits(val_9, 20u, 7u);\n unpacked[45] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[45] = insertBits(unpacked[45], extractBits(val_10, 0u, 2u), 5u, 2u);\n unpacked[46] = extractBits(val_10, 2u, 7u);\n unpacked[47] = extractBits(val_10, 9u, 7u);\n unpacked[48] = extractBits(val_10, 16u, 7u);\n unpacked[49] = extractBits(val_10, 23u, 7u);\n unpacked[50] = extractBits(val_10, 30u, 2u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[50] = insertBits(unpacked[50], extractBits(val_11, 0u, 5u), 2u, 5u);\n unpacked[51] = extractBits(val_11, 5u, 7u);\n unpacked[52] = extractBits(val_11, 12u, 7u);\n unpacked[53] = extractBits(val_11, 19u, 7u);\n unpacked[54] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[54] = insertBits(unpacked[54], extractBits(val_12, 0u, 1u), 6u, 1u);\n unpacked[55] = extractBits(val_12, 1u, 7u);\n unpacked[56] = extractBits(val_12, 8u, 7u);\n unpacked[57] = extractBits(val_12, 15u, 7u);\n unpacked[58] = extractBits(val_12, 22u, 7u);\n unpacked[59] = extractBits(val_12, 29u, 3u);\n \n let val_13 = blocks[packed_offset + 13];\n unpacked[59] = insertBits(unpacked[59], extractBits(val_13, 0u, 4u), 3u, 4u);\n unpacked[60] = extractBits(val_13, 4u, 7u);\n unpacked[61] = extractBits(val_13, 11u, 7u);\n unpacked[62] = extractBits(val_13, 18u, 7u);\n unpacked[63] = extractBits(val_13, 25u, 7u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[64] = extractBits(val_14, 0u, 7u);\n unpacked[65] = extractBits(val_14, 7u, 7u);\n unpacked[66] = extractBits(val_14, 14u, 7u);\n unpacked[67] = extractBits(val_14, 21u, 7u);\n unpacked[68] = extractBits(val_14, 28u, 4u);\n \n let val_15 = blocks[packed_offset + 15];\n unpacked[68] = insertBits(unpacked[68], extractBits(val_15, 0u, 3u), 4u, 3u);\n unpacked[69] = extractBits(val_15, 3u, 7u);\n unpacked[70] = extractBits(val_15, 10u, 7u);\n unpacked[71] = extractBits(val_15, 17u, 7u);\n unpacked[72] = extractBits(val_15, 24u, 7u);\n unpacked[73] = extractBits(val_15, 31u, 1u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[73] = insertBits(unpacked[73], extractBits(val_16, 0u, 6u), 1u, 6u);\n unpacked[74] = extractBits(val_16, 6u, 7u);\n unpacked[75] = extractBits(val_16, 13u, 7u);\n unpacked[76] = extractBits(val_16, 20u, 7u);\n unpacked[77] = extractBits(val_16, 27u, 5u);\n \n let val_17 = blocks[packed_offset + 17];\n unpacked[77] = insertBits(unpacked[77], extractBits(val_17, 0u, 2u), 5u, 2u);\n unpacked[78] = extractBits(val_17, 2u, 7u);\n unpacked[79] = extractBits(val_17, 9u, 7u);\n unpacked[80] = extractBits(val_17, 16u, 7u);\n unpacked[81] = extractBits(val_17, 23u, 7u);\n unpacked[82] = extractBits(val_17, 30u, 2u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[82] = insertBits(unpacked[82], extractBits(val_18, 0u, 5u), 2u, 5u);\n unpacked[83] = extractBits(val_18, 5u, 7u);\n unpacked[84] = extractBits(val_18, 12u, 7u);\n unpacked[85] = extractBits(val_18, 19u, 7u);\n unpacked[86] = extractBits(val_18, 26u, 6u);\n \n let val_19 = blocks[packed_offset + 19];\n unpacked[86] = insertBits(unpacked[86], extractBits(val_19, 0u, 1u), 6u, 1u);\n unpacked[87] = extractBits(val_19, 1u, 7u);\n unpacked[88] = extractBits(val_19, 8u, 7u);\n unpacked[89] = extractBits(val_19, 15u, 7u);\n unpacked[90] = extractBits(val_19, 22u, 7u);\n unpacked[91] = extractBits(val_19, 29u, 3u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[91] = insertBits(unpacked[91], extractBits(val_20, 0u, 4u), 3u, 4u);\n unpacked[92] = extractBits(val_20, 4u, 7u);\n unpacked[93] = extractBits(val_20, 11u, 7u);\n unpacked[94] = extractBits(val_20, 18u, 7u);\n unpacked[95] = extractBits(val_20, 25u, 7u);\n \n let val_21 = blocks[packed_offset + 21];\n unpacked[96] = extractBits(val_21, 0u, 7u);\n unpacked[97] = extractBits(val_21, 7u, 7u);\n unpacked[98] = extractBits(val_21, 14u, 7u);\n unpacked[99] = extractBits(val_21, 21u, 7u);\n unpacked[100] = extractBits(val_21, 28u, 4u);\n \n let val_22 = blocks[packed_offset + 22];\n unpacked[100] = insertBits(unpacked[100], extractBits(val_22, 0u, 3u), 4u, 3u);\n unpacked[101] = extractBits(val_22, 3u, 7u);\n unpacked[102] = extractBits(val_22, 10u, 7u);\n unpacked[103] = extractBits(val_22, 17u, 7u);\n unpacked[104] = extractBits(val_22, 24u, 7u);\n unpacked[105] = extractBits(val_22, 31u, 1u);\n \n let val_23 = blocks[packed_offset + 23];\n unpacked[105] = insertBits(unpacked[105], extractBits(val_23, 0u, 6u), 1u, 6u);\n unpacked[106] = extractBits(val_23, 6u, 7u);\n unpacked[107] = extractBits(val_23, 13u, 7u);\n unpacked[108] = extractBits(val_23, 20u, 7u);\n unpacked[109] = extractBits(val_23, 27u, 5u);\n \n let val_24 = blocks[packed_offset + 24];\n unpacked[109] = insertBits(unpacked[109], extractBits(val_24, 0u, 2u), 5u, 2u);\n unpacked[110] = extractBits(val_24, 2u, 7u);\n unpacked[111] = extractBits(val_24, 9u, 7u);\n unpacked[112] = extractBits(val_24, 16u, 7u);\n unpacked[113] = extractBits(val_24, 23u, 7u);\n unpacked[114] = extractBits(val_24, 30u, 2u);\n \n let val_25 = blocks[packed_offset + 25];\n unpacked[114] = insertBits(unpacked[114], extractBits(val_25, 0u, 5u), 2u, 5u);\n unpacked[115] = extractBits(val_25, 5u, 7u);\n unpacked[116] = extractBits(val_25, 12u, 7u);\n unpacked[117] = extractBits(val_25, 19u, 7u);\n unpacked[118] = extractBits(val_25, 26u, 6u);\n \n let val_26 = blocks[packed_offset + 26];\n unpacked[118] = insertBits(unpacked[118], extractBits(val_26, 0u, 1u), 6u, 1u);\n unpacked[119] = extractBits(val_26, 1u, 7u);\n unpacked[120] = extractBits(val_26, 8u, 7u);\n unpacked[121] = extractBits(val_26, 15u, 7u);\n unpacked[122] = extractBits(val_26, 22u, 7u);\n unpacked[123] = extractBits(val_26, 29u, 3u);\n \n let val_27 = blocks[packed_offset + 27];\n unpacked[123] = insertBits(unpacked[123], extractBits(val_27, 0u, 4u), 3u, 4u);\n unpacked[124] = extractBits(val_27, 4u, 7u);\n unpacked[125] = extractBits(val_27, 11u, 7u);\n unpacked[126] = extractBits(val_27, 18u, 7u);\n unpacked[127] = extractBits(val_27, 25u, 7u); \n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 28u);\n unpack_block_128_bit_depth_7(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u;\n let b6: u32 = blocks_start + 24u; \n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u), 8u);\n }\n \n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n }\n}\n\n").concat(I)},Qe={3:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_3bit_shader",5:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_5bit_shader",6:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_6bit_shader",7:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_7bit_shader"},Xe="\nconst BM = ".concat(8,"u;\nconst BN = ").concat(32,"u;\n\nconst TM = ").concat(2,"u;\nconst TN = ").concat(16,"u;\n\nconst TC = ").concat(128,"u;\n\nconst ROW_PER_BLOCK = ").concat(Ve,"u;\nconst COL_PER_BLOCK = ").concat(8,"u;\n\nconst VEC_COL_PER_BLOCK = COL_PER_BLOCK / 4u;\n\nconst block_size: u32 = (COL_PER_BLOCK * ROW_PER_BLOCK * bit_depth) / 32u;\n\n"),He={1:"\n let b0 = blocks[src + (row / 4u)];\n\n let b0_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b0_offset = b0_offset_base + (c * 4u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b0, b0_offset, 1u)), \n f32(extractBits(b0, b0_offset + 1, 1u)),\n f32(extractBits(b0, b0_offset + 2, 1u)),\n f32(extractBits(b0, b0_offset + 3, 1u)));\n }\n",2:"\n let b01 = blocks[src + (row / 2u)];\n \n let b01_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b01_offset = b01_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(extractBits(b01, b01_offset, 2u)), \n f32(extractBits(b01, b01_offset + 2, 2u)),\n f32(extractBits(b01, b01_offset + 4, 2u)),\n f32(extractBits(b01, b01_offset + 6, 2u)));\n } \n",3:"\n let b01 = blocks[src + (row / 2u)];\n let b2 = blocks[src + 8u + (row / 4u)]; \n\n let b01_offset_base = (row * 16u) % 32u;\n let b2_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b01_offset = b01_offset_base + (c * 8u);\n let b2_offset = b2_offset_base + (c * 4u);\n\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b01, b01_offset, 2u), extractBits(b2, b2_offset, 1u), 2u, 1u)), \n f32(insertBits(extractBits(b01, b01_offset + 2, 2u), extractBits(b2, b2_offset + 1, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 4, 2u), extractBits(b2, b2_offset + 2, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 6, 2u), extractBits(b2, b2_offset + 3, 1u), 2u, 1u))); \n }\n",4:"\n let b03 = blocks[src + row];\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b03, b03_offset, 4u)), \n f32(extractBits(b03, b03_offset + 4, 4u)),\n f32(extractBits(b03, b03_offset + 8, 4u)),\n f32(extractBits(b03, b03_offset + 12, 4u)));\n }\n",5:"\n let b03 = blocks[src + row];\n let b4 = blocks[src + 16u + (row / 4u)];\n \n let b4_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b4_offset = b4_offset_base + (c * 4u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b4, b4_offset, 1u), 4u, 1u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b4, b4_offset + 1, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b4, b4_offset + 2, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b4, b4_offset + 3, 1u), 4u, 1u)));\n }\n",6:"\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u)));\n }\n",7:"\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n let b6 = blocks[src + 24u + (row / 4u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n let b6_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n let b6_offset = b6_offset_base + (c * 4u);\n \n shared_w[dst + c] = vec4(\n f32(insertBits(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u), extractBits(b6, b6_offset, 1u), 6u, 1u)), \n f32(insertBits(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u), extractBits(b6, b6_offset + 1, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u), extractBits(b6, b6_offset + 2, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u), extractBits(b6, b6_offset + 3, 1u), 6u, 1u)));\n }\n",8:"\n let b07_offset = src + (row * 2);\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b07 = blocks[b07_offset + c];\n shared_w[dst + c] = vec4(\n f32(extractBits(b07, 0u, 8u)), \n f32(extractBits(b07, 8u, 8u)),\n f32(extractBits(b07, 16u, 8u)),\n f32(extractBits(b07, 24u, 8u)));\n }\n"},Ze=function(e){return"\n\n".concat("\nstruct argsStruct {\n n: u32,\n m: u32,\n total_nbc: u32,\n k: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array;\n","\n\n").concat(Xe,"\n\n").concat("\nvar shared_x: array, BN * VEC_COL_PER_BLOCK>;\nvar shared_ab: array;\nvar shared_w: array, BM * ROW_PER_BLOCK * VEC_COL_PER_BLOCK>;\n\nvar local_x: array, TN * VEC_COL_PER_BLOCK>;\nvar local_x_sums: array;\nvar local_results: array;\n","\n\n").concat(Ke,"\n\n").concat("\n fn divide_pad(a: u32, b: u32) -> u32 { \n return (a + b - 1) / b;\n }\n","\n\nconst bit_depth: u32 = ").concat(e,"u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n \n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n ").concat("\n let tid = local_id.x;\n let bm_idx = workgroup_id.x;\n let bn_idx = workgroup_id.y;\n\n let local_bm_idx = bm_idx * BM;\n let local_bn_idx = bn_idx * BN;\n \n let n_idx = tid % (BN / TN);\n let k_idx = tid / (BN / TN) / (BM * ROW_PER_BLOCK / TM);\n let m_idx = tid / (BN / TN) % (BM * ROW_PER_BLOCK / TM);\n","\n \n for (var bk_idx = 0u; bk_idx < args.k; bk_idx++) { \n ").concat("\n let total_work_x = VEC_COL_PER_BLOCK * BN;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_x, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_x) { \n let n_load_idx = local_bn_idx + idx / VEC_COL_PER_BLOCK;\n let inner_idx = idx % VEC_COL_PER_BLOCK;\n \n if (bk_idx < args.k && n_load_idx < args.n) { \n let x_idx = (args.x_offset / 4u) + ((bk_idx * args.n + n_load_idx) * VEC_COL_PER_BLOCK + inner_idx); \n shared_x[idx] = x[x_idx];\n } else {\n shared_x[idx] = vec4(0.0);\n }\n }\n }\n","\n ").concat("\n let total_work_ab = BM * 2;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_ab, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_ab) {\n let m_load_idx = local_bm_idx + idx / 2; \n let inner_idx = (idx % 2) * 16u;\n \n if (m_load_idx < args.m && bk_idx < args.k) {\n let ab_bits = extractBits(metas[args.metas_offset + (m_load_idx * args.k + bk_idx)], inner_idx, 16u);\n shared_ab[idx] = from_fp510(ab_bits); \n } else {\n shared_ab[idx] = 0.0;\n }\n }\n }\n"," \n ").concat(function(e){return"\n let total_work_w = BM * ROW_PER_BLOCK;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_w, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_w) {\n let m_load_idx = local_bm_idx + idx / ROW_PER_BLOCK;\n let row = idx % ROW_PER_BLOCK;\n let dst = idx * VEC_COL_PER_BLOCK;\n\n if (m_load_idx < args.m) {\n let src = args.blocks_offset + (m_load_idx * args.k + bk_idx) * block_size;\n ".concat(He[e],"\n } else { \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n shared_w[dst + c] = vec4(0.0);\n }\n }\n }\n }\n")}(e)," \n workgroupBarrier();\n \n ").concat("\nfor (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n var x_sum_vec = vec4(0.0); \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n let shared_x_idx = (n_idx * TN + tn_idx) * VEC_COL_PER_BLOCK + (k_idx * VEC_COL_PER_BLOCK);\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n local_x[local_x_idx + c] = shared_x[shared_x_idx + c];\n x_sum_vec += local_x[local_x_idx + c];\n }\n local_x_sums[tn_idx] = x_sum_vec.x + x_sum_vec.y + x_sum_vec.z + x_sum_vec.w; \n}\n","\n ").concat("\n for (var tm_idx = 0u; tm_idx < TM; tm_idx++) { \n let shared_ab_idx = ((m_idx * TM + tm_idx) / ROW_PER_BLOCK + k_idx) * 2;\n let alpha = shared_ab[shared_ab_idx];\n let beta = shared_ab[shared_ab_idx + 1]; \n let shared_w_idx = ((m_idx * TM + tm_idx) + k_idx) * VEC_COL_PER_BLOCK;\n \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n \n var swx_vec = vec4(0.0); \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n swx_vec += shared_w[shared_w_idx + c] * local_x[local_x_idx + c];\n }\n let swx = swx_vec.x + swx_vec.y + swx_vec.z + swx_vec.w;\n \n let kappa = alpha * local_x_sums[tn_idx]; \n let results_idx = tm_idx * TN + tn_idx;\n local_results[results_idx] += kappa + (beta * swx);\n }\n }\n","\n workgroupBarrier();\n }\n \n ").concat("\nfor (var tm_idx = 0u; tm_idx < TM; tm_idx++) {\n let row = local_bm_idx * ROW_PER_BLOCK + (m_idx * TM + tm_idx); \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let col = local_bn_idx + (n_idx * TN + tn_idx);\n if (row < args.m * ROW_PER_BLOCK && col < args.n) {\n let y_idx = args.y_offset + ((row / ROW_PER_BLOCK) * args.n + col) * ROW_PER_BLOCK + (row % ROW_PER_BLOCK);\n let results_idx = tm_idx * TN + tn_idx;\n \n y[y_idx] += local_results[results_idx];\n }\n }\n}\n","\n}\n\n").concat(I,"\n")},Je="\nstruct argsStruct {\n n: u32,\n shape1: u32,\n x_offset: u32,\n indices_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar indices: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape1) {\n return;\n } \n\n let b = global_id.x;\n let i = global_id.y;\n \n let c = i / 8u;\n let j = i % 8u;\n y[((c * args.n) + b) * 8 + j] = x[args.x_offset + (b * args.shape1) + indices[args.indices_offset + i]];\n}\n\n".concat(I,"\n"),$e="\nstruct argsStruct {\n nvr: u32,\n nbc: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar y: array>;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x > args.nvr) {\n return;\n }\n\n let x_start = global_id.x * args.nbc;\n var sum: vec4 = vec4(0.0, 0.0, 0.0, 0.0);\n for (var i = 0u; i < args.nbc; i++) {\n sum += x[x_start + i]; \n }\n y[global_id.x] += sum;\n}\n\n".concat(I),et="\nstruct argsStruct {\n n: u32,\n shape0: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape0) {\n return;\n } \n \n let b = global_id.x;\n let i = global_id.y;\n \n let r = i / 16u;\n let j = i % 16u;\n y[(b * args.shape0) + (r * 16) + j] = x[(((r * args.n) + b) * 16) + j];\n}\n\n".concat(I),tt="\nstruct argsStruct {\n dimension: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar bias: array;\n\n@group(0) @binding(2)\nvar y: array;\n\n".concat(Ke,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n y[(global_id.x * args.dimension) + global_id.y] += bias[global_id.y];\n}\n\n").concat(I),rt={1:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 4u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b0_start = row_blocks_start + br_offset + (bc * block_size);\n var b0_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b0 = blocks[b0_start];\n \n let w0_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w0_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w0_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w0_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 8u;\n \n let w1_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w1_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w1_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w1_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 16u;\n \n let w2_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w2_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w2_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w2_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 24u;\n \n let w3_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w3_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w3_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w3_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),2:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 8u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b01_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b01 = blocks[b01_start];\n \n let w0_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w0_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w0_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w0_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w1_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w1_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w1_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w1_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u; \n b01 = blocks[b01_start + 1u];\n \n let w2_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w2_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w2_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w2_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w3_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w3_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w3_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w3_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),3:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 12u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id : vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n\n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b2_start = row_blocks_start + br_offset + (bc * block_size) + 8u;\n var b01_offset = 0u;\n var b2_offset = 0u;\n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) { \n \n var b01 = blocks[b01_start];\n var b2 = blocks[b2_start];\n \n var b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n var b2_bit = extractBits(b2, b2_offset + j, 1u);\n let w0_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 8u;\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w1_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 16u; \n b01 = blocks[b01_start + 1u];\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w2_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 24u; \n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w3_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 0u; \n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n \n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),4:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 16u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start]; \n \n let w0_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w0_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w0_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w0_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 1];\n \n let w1_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w1_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w1_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w1_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 2];\n \n let w2_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w2_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w2_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w2_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 3];\n \n let w3_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w3_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w3_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w3_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),5:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 20u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b4_start = row_blocks_start + br_offset + (bc * block_size) + 16u;\n \n var b4_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b4 = blocks[b4_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w0_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 1];\n b4_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w1_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 2];\n b4_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w2_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 3];\n b4_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w3_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b4_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),6:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 24u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n \n var b45_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w0_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w0_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w0_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w0_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w1_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w1_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w1_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w1_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u)); \n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w2_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w2_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w2_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w2_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w3_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w3_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w3_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w3_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b45_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),7:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 28u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n var b6_start = row_blocks_start + br_offset + (bc * block_size) + 24u;\n \n var b45_offset = 0u;\n var b6_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n var b6 = blocks[b6_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n var b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w0_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w0_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w0_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w0_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n b6_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w1_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w1_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w1_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w1_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n b6_offset = 16u;\n\n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w2_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w2_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w2_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w2_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n b6_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w3_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w3_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w3_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w3_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b45_offset = 0u;\n b6_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),8:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 32u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b07_start = row_blocks_start + (br_offset * 8u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < 2; j++) {\n \n var b07 = blocks[b07_start + j];\n \n let w0_0 = f32(extractBits(b07, 0u, 8u)); \n let w0_1 = f32(extractBits(b07, 8u, 8u)); \n let w0_2 = f32(extractBits(b07, 16u, 8u));\n let w0_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 2 + j];\n \n let w1_0 = f32(extractBits(b07, 0u, 8u)); \n let w1_1 = f32(extractBits(b07, 8u, 8u)); \n let w1_2 = f32(extractBits(b07, 16u, 8u));\n let w1_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 4 + j];\n \n let w2_0 = f32(extractBits(b07, 0u, 8u)); \n let w2_1 = f32(extractBits(b07, 8u, 8u)); \n let w2_2 = f32(extractBits(b07, 16u, 8u));\n let w2_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 6 + j];\n \n let w3_0 = f32(extractBits(b07, 0u, 8u)); \n let w3_1 = f32(extractBits(b07, 8u, 8u)); \n let w3_2 = f32(extractBits(b07, 16u, 8u));\n let w3_3 = f32(extractBits(b07, 24u, 8u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_vec = x[x_start + j];\n res[j] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I)},nt={1:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_8_shader"},at={1:Ze(1),2:Ze(2),3:Ze(3),4:Ze(4),5:Ze(5),6:Ze(6),7:Ze(7),8:Ze(8)},it={1:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_8_shader"},ut="pv_picollm_weight_block_mixed_16x8_forward_shuffle_x_shader",ot="pv_picollm_weight_block_mixed_16x8_forward_shuffle_y_shader",st="pv_picollm_weight_block_mixed_16x8_add_bias_shader",ct="pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_shader",lt=function(e,t){var r=e.createBindGroupLayout({label:"weight preprocess blocks ".concat(t," bind group layout"),entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),n=e.createPipelineLayout({label:"weight preprocess blocks ".concat(t," pipeline layout"),bindGroupLayouts:[r]}),a=e.createShaderModule({label:"weight preprocess blocks ".concat(t," shader module"),code:De[t]});return{computePipeline:e.createComputePipeline({label:"weight preprocess blocks ".concat(t," pipeline"),layout:n,compute:{module:a,entryPoint:q,constants:{workgroup_size_x:16,workgroup_size_y:16}}})}},_t=function(e,t){var r=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}],n=e.createBindGroupLayout({label:"weight forward single ".concat(t," bind group layout"),entries:r}),a=e.createPipelineLayout({label:"weight forward single ".concat(t," pipeline layout"),bindGroupLayouts:[n]}),i=e.createShaderModule({label:"weight forward single ".concat(t," shader module"),code:rt[t]});return{computePipeline:e.createComputePipeline({label:"weight forward single ".concat(t," pipeline"),layout:a,compute:{module:i,entryPoint:q,constants:{workgroup_size_x:256,workgroup_size_y:1}}})}},dt=function(e,t){var r=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}],n=e.createBindGroupLayout({label:"weight forward multi ".concat(t," bind group layout"),entries:r}),a=e.createPipelineLayout({label:"weight forward multi ".concat(t," pipeline layout"),bindGroupLayouts:[n]}),i=e.createShaderModule({label:"weight forward multi ".concat(t," shader module"),code:at[t]});return{computePipeline:e.createComputePipeline({label:"weight forward multi ".concat(t," pipeline"),layout:a,compute:{module:i,entryPoint:q,constants:{workgroup_size_x:128}}})}},ft=(a(a(a(a(a(a(a(a(a(a(A={},Qe[3],(function(e){return lt(e,3)})),Qe[5],(function(e){return lt(e,5)})),Qe[6],(function(e){return lt(e,6)})),Qe[7],(function(e){return lt(e,7)})),ut,(function(e){var t=e.createBindGroupLayout({label:"weight shuffle x bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight shuffle x pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight shuffle x shader module",code:Je});return{computePipeline:e.createComputePipeline({label:"weight shuffle x pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_y:N}}})}})),ot,(function(e){var t=e.createBindGroupLayout({label:"weight shuffle y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight shuffle y pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight shuffle y shader module",code:et});return{computePipeline:e.createComputePipeline({label:"weight shuffle y pipeline",layout:r,compute:{module:n,entryPoint:q}})}})),ct,(function(e){var t=e.createBindGroupLayout({label:"weight single reduce y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight single reduce y pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight single reduce y shader module",code:$e});return{computePipeline:e.createComputePipeline({label:"weight single reduce y pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),nt[1],(function(e){return _t(e,1)})),nt[2],(function(e){return _t(e,2)})),nt[3],(function(e){return _t(e,3)})),a(a(a(a(a(a(a(a(a(a(A,nt[4],(function(e){return _t(e,4)})),nt[5],(function(e){return _t(e,5)})),nt[6],(function(e){return _t(e,6)})),nt[7],(function(e){return _t(e,7)})),nt[8],(function(e){return _t(e,8)})),it[1],(function(e){return dt(e,1)})),it[2],(function(e){return dt(e,2)})),it[3],(function(e){return dt(e,3)})),it[4],(function(e){return dt(e,4)})),it[5],(function(e){return dt(e,5)})),a(a(a(a(A,it[6],(function(e){return dt(e,6)})),it[7],(function(e){return dt(e,7)})),it[8],(function(e){return dt(e,8)})),st,(function(e){var t=e.createBindGroupLayout({label:"weight add bias bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight add bias pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight add bias shader module",code:tt});return{computePipeline:e.createComputePipeline({label:"weight add bias pipeline",layout:r,compute:{module:n,entryPoint:q}})}})));function bt(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function pt(e){for(var t=1;t= 0; --o) { + var i = this.tryEntries[o], + a = i.completion; + if ("root" === i.tryLoc) return handle("end"); + if (i.tryLoc <= this.prev) { + var c = n.call(i, "catchLoc"), + u = n.call(i, "finallyLoc"); + if (c && u) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } else if (c) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + } else { + if (!u) throw new Error("try statement without catch or finally"); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } + } + } + }, + abrupt: function abrupt(t, e) { + for (var r = this.tryEntries.length - 1; r >= 0; --r) { + var o = this.tryEntries[r]; + if (o.tryLoc <= this.prev && n.call(o, "finallyLoc") && this.prev < o.finallyLoc) { + var i = o; + break; + } + } + i && ("break" === t || "continue" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null); + var a = i ? i.completion : {}; + return a.type = t, a.arg = e, i ? (this.method = "next", this.next = i.finallyLoc, y) : this.complete(a); + }, + complete: function complete(t, e) { + if ("throw" === t.type) throw t.arg; + return "break" === t.type || "continue" === t.type ? this.next = t.arg : "return" === t.type ? (this.rval = this.arg = t.arg, this.method = "return", this.next = "end") : "normal" === t.type && e && (this.next = e), y; + }, + finish: function finish(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y; + } + }, + "catch": function _catch(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.tryLoc === t) { + var n = r.completion; + if ("throw" === n.type) { + var o = n.arg; + resetTryEntry(r); + } + return o; + } + } + throw new Error("illegal catch attempt"); + }, + delegateYield: function delegateYield(e, r, n) { + return this.delegate = { + iterator: values(e), + resultName: r, + nextLoc: n + }, "next" === this.method && (this.arg = t), y; + } + }, e; + } + module.exports = _regeneratorRuntime, module.exports.__esModule = true, module.exports["default"] = module.exports; + } (regeneratorRuntime$2)); + + var regeneratorRuntimeExports$1 = regeneratorRuntime$2.exports; + + // TODO(Babel 8): Remove this file. + + var runtime$1 = regeneratorRuntimeExports$1(); + var regenerator = runtime$1; + + // Copied from https://github.com/facebook/regenerator/blob/main/packages/runtime/runtime.js#L736= + try { + regeneratorRuntime = runtime$1; + } catch (accidentalStrictMode) { + if (typeof globalThis === "object") { + globalThis.regeneratorRuntime = runtime$1; + } else { + Function("r", "regeneratorRuntime = r")(runtime$1); + } + } + + var _regeneratorRuntime = /*@__PURE__*/getDefaultExportFromCjs(regenerator); + + // NOTE: this list must be up-to-date with browsers listed in + // test/acceptance/useragentstrings.yml + const BROWSER_ALIASES_MAP = { + 'Amazon Silk': 'amazon_silk', + 'Android Browser': 'android', + Bada: 'bada', + BlackBerry: 'blackberry', + Chrome: 'chrome', + Chromium: 'chromium', + Electron: 'electron', + Epiphany: 'epiphany', + Firefox: 'firefox', + Focus: 'focus', + Generic: 'generic', + 'Google Search': 'google_search', + Googlebot: 'googlebot', + 'Internet Explorer': 'ie', + 'K-Meleon': 'k_meleon', + Maxthon: 'maxthon', + 'Microsoft Edge': 'edge', + 'MZ Browser': 'mz', + 'NAVER Whale Browser': 'naver', + Opera: 'opera', + 'Opera Coast': 'opera_coast', + PhantomJS: 'phantomjs', + Puffin: 'puffin', + QupZilla: 'qupzilla', + QQ: 'qq', + QQLite: 'qqlite', + Safari: 'safari', + Sailfish: 'sailfish', + 'Samsung Internet for Android': 'samsung_internet', + SeaMonkey: 'seamonkey', + Sleipnir: 'sleipnir', + Swing: 'swing', + Tizen: 'tizen', + 'UC Browser': 'uc', + Vivaldi: 'vivaldi', + 'WebOS Browser': 'webos', + WeChat: 'wechat', + 'Yandex Browser': 'yandex', + Roku: 'roku', + }; + + const BROWSER_MAP = { + amazon_silk: 'Amazon Silk', + android: 'Android Browser', + bada: 'Bada', + blackberry: 'BlackBerry', + chrome: 'Chrome', + chromium: 'Chromium', + electron: 'Electron', + epiphany: 'Epiphany', + firefox: 'Firefox', + focus: 'Focus', + generic: 'Generic', + googlebot: 'Googlebot', + google_search: 'Google Search', + ie: 'Internet Explorer', + k_meleon: 'K-Meleon', + maxthon: 'Maxthon', + edge: 'Microsoft Edge', + mz: 'MZ Browser', + naver: 'NAVER Whale Browser', + opera: 'Opera', + opera_coast: 'Opera Coast', + phantomjs: 'PhantomJS', + puffin: 'Puffin', + qupzilla: 'QupZilla', + qq: 'QQ Browser', + qqlite: 'QQ Browser Lite', + safari: 'Safari', + sailfish: 'Sailfish', + samsung_internet: 'Samsung Internet for Android', + seamonkey: 'SeaMonkey', + sleipnir: 'Sleipnir', + swing: 'Swing', + tizen: 'Tizen', + uc: 'UC Browser', + vivaldi: 'Vivaldi', + webos: 'WebOS Browser', + wechat: 'WeChat', + yandex: 'Yandex Browser', + }; + + const PLATFORMS_MAP = { + tablet: 'tablet', + mobile: 'mobile', + desktop: 'desktop', + tv: 'tv', + }; + + const OS_MAP = { + WindowsPhone: 'Windows Phone', + Windows: 'Windows', + MacOS: 'macOS', + iOS: 'iOS', + Android: 'Android', + WebOS: 'WebOS', + BlackBerry: 'BlackBerry', + Bada: 'Bada', + Tizen: 'Tizen', + Linux: 'Linux', + ChromeOS: 'Chrome OS', + PlayStation4: 'PlayStation 4', + Roku: 'Roku', + }; + + const ENGINE_MAP = { + EdgeHTML: 'EdgeHTML', + Blink: 'Blink', + Trident: 'Trident', + Presto: 'Presto', + Gecko: 'Gecko', + WebKit: 'WebKit', + }; + + class Utils { + /** + * Get first matched item for a string + * @param {RegExp} regexp + * @param {String} ua + * @return {Array|{index: number, input: string}|*|boolean|string} + */ + static getFirstMatch(regexp, ua) { + const match = ua.match(regexp); + return (match && match.length > 0 && match[1]) || ''; + } + + /** + * Get second matched item for a string + * @param regexp + * @param {String} ua + * @return {Array|{index: number, input: string}|*|boolean|string} + */ + static getSecondMatch(regexp, ua) { + const match = ua.match(regexp); + return (match && match.length > 1 && match[2]) || ''; + } + + /** + * Match a regexp and return a constant or undefined + * @param {RegExp} regexp + * @param {String} ua + * @param {*} _const Any const that will be returned if regexp matches the string + * @return {*} + */ + static matchAndReturnConst(regexp, ua, _const) { + if (regexp.test(ua)) { + return _const; + } + return void (0); + } + + static getWindowsVersionName(version) { + switch (version) { + case 'NT': return 'NT'; + case 'XP': return 'XP'; + case 'NT 5.0': return '2000'; + case 'NT 5.1': return 'XP'; + case 'NT 5.2': return '2003'; + case 'NT 6.0': return 'Vista'; + case 'NT 6.1': return '7'; + case 'NT 6.2': return '8'; + case 'NT 6.3': return '8.1'; + case 'NT 10.0': return '10'; + default: return undefined; + } + } + + /** + * Get macOS version name + * 10.5 - Leopard + * 10.6 - Snow Leopard + * 10.7 - Lion + * 10.8 - Mountain Lion + * 10.9 - Mavericks + * 10.10 - Yosemite + * 10.11 - El Capitan + * 10.12 - Sierra + * 10.13 - High Sierra + * 10.14 - Mojave + * 10.15 - Catalina + * + * @example + * getMacOSVersionName("10.14") // 'Mojave' + * + * @param {string} version + * @return {string} versionName + */ + static getMacOSVersionName(version) { + const v = version.split('.').splice(0, 2).map(s => parseInt(s, 10) || 0); + v.push(0); + if (v[0] !== 10) return undefined; + switch (v[1]) { + case 5: return 'Leopard'; + case 6: return 'Snow Leopard'; + case 7: return 'Lion'; + case 8: return 'Mountain Lion'; + case 9: return 'Mavericks'; + case 10: return 'Yosemite'; + case 11: return 'El Capitan'; + case 12: return 'Sierra'; + case 13: return 'High Sierra'; + case 14: return 'Mojave'; + case 15: return 'Catalina'; + default: return undefined; + } + } + + /** + * Get Android version name + * 1.5 - Cupcake + * 1.6 - Donut + * 2.0 - Eclair + * 2.1 - Eclair + * 2.2 - Froyo + * 2.x - Gingerbread + * 3.x - Honeycomb + * 4.0 - Ice Cream Sandwich + * 4.1 - Jelly Bean + * 4.4 - KitKat + * 5.x - Lollipop + * 6.x - Marshmallow + * 7.x - Nougat + * 8.x - Oreo + * 9.x - Pie + * + * @example + * getAndroidVersionName("7.0") // 'Nougat' + * + * @param {string} version + * @return {string} versionName + */ + static getAndroidVersionName(version) { + const v = version.split('.').splice(0, 2).map(s => parseInt(s, 10) || 0); + v.push(0); + if (v[0] === 1 && v[1] < 5) return undefined; + if (v[0] === 1 && v[1] < 6) return 'Cupcake'; + if (v[0] === 1 && v[1] >= 6) return 'Donut'; + if (v[0] === 2 && v[1] < 2) return 'Eclair'; + if (v[0] === 2 && v[1] === 2) return 'Froyo'; + if (v[0] === 2 && v[1] > 2) return 'Gingerbread'; + if (v[0] === 3) return 'Honeycomb'; + if (v[0] === 4 && v[1] < 1) return 'Ice Cream Sandwich'; + if (v[0] === 4 && v[1] < 4) return 'Jelly Bean'; + if (v[0] === 4 && v[1] >= 4) return 'KitKat'; + if (v[0] === 5) return 'Lollipop'; + if (v[0] === 6) return 'Marshmallow'; + if (v[0] === 7) return 'Nougat'; + if (v[0] === 8) return 'Oreo'; + if (v[0] === 9) return 'Pie'; + return undefined; + } + + /** + * Get version precisions count + * + * @example + * getVersionPrecision("1.10.3") // 3 + * + * @param {string} version + * @return {number} + */ + static getVersionPrecision(version) { + return version.split('.').length; + } + + /** + * Calculate browser version weight + * + * @example + * compareVersions('1.10.2.1', '1.8.2.1.90') // 1 + * compareVersions('1.010.2.1', '1.09.2.1.90'); // 1 + * compareVersions('1.10.2.1', '1.10.2.1'); // 0 + * compareVersions('1.10.2.1', '1.0800.2'); // -1 + * compareVersions('1.10.2.1', '1.10', true); // 0 + * + * @param {String} versionA versions versions to compare + * @param {String} versionB versions versions to compare + * @param {boolean} [isLoose] enable loose comparison + * @return {Number} comparison result: -1 when versionA is lower, + * 1 when versionA is bigger, 0 when both equal + */ + /* eslint consistent-return: 1 */ + static compareVersions(versionA, versionB, isLoose = false) { + // 1) get common precision for both versions, for example for "10.0" and "9" it should be 2 + const versionAPrecision = Utils.getVersionPrecision(versionA); + const versionBPrecision = Utils.getVersionPrecision(versionB); + + let precision = Math.max(versionAPrecision, versionBPrecision); + let lastPrecision = 0; + + const chunks = Utils.map([versionA, versionB], (version) => { + const delta = precision - Utils.getVersionPrecision(version); + + // 2) "9" -> "9.0" (for precision = 2) + const _version = version + new Array(delta + 1).join('.0'); + + // 3) "9.0" -> ["000000000"", "000000009"] + return Utils.map(_version.split('.'), chunk => new Array(20 - chunk.length).join('0') + chunk).reverse(); + }); + + // adjust precision for loose comparison + if (isLoose) { + lastPrecision = precision - Math.min(versionAPrecision, versionBPrecision); + } + + // iterate in reverse order by reversed chunks array + precision -= 1; + while (precision >= lastPrecision) { + // 4) compare: "000000009" > "000000010" = false (but "9" > "10" = true) + if (chunks[0][precision] > chunks[1][precision]) { + return 1; + } + + if (chunks[0][precision] === chunks[1][precision]) { + if (precision === lastPrecision) { + // all version chunks are same + return 0; + } + + precision -= 1; + } else if (chunks[0][precision] < chunks[1][precision]) { + return -1; + } + } + + return undefined; + } + + /** + * Array::map polyfill + * + * @param {Array} arr + * @param {Function} iterator + * @return {Array} + */ + static map(arr, iterator) { + const result = []; + let i; + if (Array.prototype.map) { + return Array.prototype.map.call(arr, iterator); + } + for (i = 0; i < arr.length; i += 1) { + result.push(iterator(arr[i])); + } + return result; + } + + /** + * Array::find polyfill + * + * @param {Array} arr + * @param {Function} predicate + * @return {Array} + */ + static find(arr, predicate) { + let i; + let l; + if (Array.prototype.find) { + return Array.prototype.find.call(arr, predicate); + } + for (i = 0, l = arr.length; i < l; i += 1) { + const value = arr[i]; + if (predicate(value, i)) { + return value; + } + } + return undefined; + } + + /** + * Object::assign polyfill + * + * @param {Object} obj + * @param {Object} ...objs + * @return {Object} + */ + static assign(obj, ...assigners) { + const result = obj; + let i; + let l; + if (Object.assign) { + return Object.assign(obj, ...assigners); + } + for (i = 0, l = assigners.length; i < l; i += 1) { + const assigner = assigners[i]; + if (typeof assigner === 'object' && assigner !== null) { + const keys = Object.keys(assigner); + keys.forEach((key) => { + result[key] = assigner[key]; + }); + } + } + return obj; + } + + /** + * Get short version/alias for a browser name + * + * @example + * getBrowserAlias('Microsoft Edge') // edge + * + * @param {string} browserName + * @return {string} + */ + static getBrowserAlias(browserName) { + return BROWSER_ALIASES_MAP[browserName]; + } + + /** + * Get short version/alias for a browser name + * + * @example + * getBrowserAlias('edge') // Microsoft Edge + * + * @param {string} browserAlias + * @return {string} + */ + static getBrowserTypeByAlias(browserAlias) { + return BROWSER_MAP[browserAlias] || ''; + } + } + + /** + * Browsers' descriptors + * + * The idea of descriptors is simple. You should know about them two simple things: + * 1. Every descriptor has a method or property called `test` and a `describe` method. + * 2. Order of descriptors is important. + * + * More details: + * 1. Method or property `test` serves as a way to detect whether the UA string + * matches some certain browser or not. The `describe` method helps to make a result + * object with params that show some browser-specific things: name, version, etc. + * 2. Order of descriptors is important because a Parser goes through them one by one + * in course. For example, if you insert Chrome's descriptor as the first one, + * more then a half of browsers will be described as Chrome, because they will pass + * the Chrome descriptor's test. + * + * Descriptor's `test` could be a property with an array of RegExps, where every RegExp + * will be applied to a UA string to test it whether it matches or not. + * If a descriptor has two or more regexps in the `test` array it tests them one by one + * with a logical sum operation. Parser stops if it has found any RegExp that matches the UA. + * + * Or `test` could be a method. In that case it gets a Parser instance and should + * return true/false to get the Parser know if this browser descriptor matches the UA or not. + */ + + const commonVersionIdentifier = /version\/(\d+(\.?_?\d+)+)/i; + + const browsersList = [ + /* Googlebot */ + { + test: [/googlebot/i], + describe(ua) { + const browser = { + name: 'Googlebot', + }; + const version = Utils.getFirstMatch(/googlebot\/(\d+(\.\d+))/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Opera < 13.0 */ + { + test: [/opera/i], + describe(ua) { + const browser = { + name: 'Opera', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:opera)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Opera > 13.0 */ + { + test: [/opr\/|opios/i], + describe(ua) { + const browser = { + name: 'Opera', + }; + const version = Utils.getFirstMatch(/(?:opr|opios)[\s/](\S+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/SamsungBrowser/i], + describe(ua) { + const browser = { + name: 'Samsung Internet for Android', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:SamsungBrowser)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/Whale/i], + describe(ua) { + const browser = { + name: 'NAVER Whale Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:whale)[\s/](\d+(?:\.\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/MZBrowser/i], + describe(ua) { + const browser = { + name: 'MZ Browser', + }; + const version = Utils.getFirstMatch(/(?:MZBrowser)[\s/](\d+(?:\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/focus/i], + describe(ua) { + const browser = { + name: 'Focus', + }; + const version = Utils.getFirstMatch(/(?:focus)[\s/](\d+(?:\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/swing/i], + describe(ua) { + const browser = { + name: 'Swing', + }; + const version = Utils.getFirstMatch(/(?:swing)[\s/](\d+(?:\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/coast/i], + describe(ua) { + const browser = { + name: 'Opera Coast', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:coast)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/opt\/\d+(?:.?_?\d+)+/i], + describe(ua) { + const browser = { + name: 'Opera Touch', + }; + const version = Utils.getFirstMatch(/(?:opt)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/yabrowser/i], + describe(ua) { + const browser = { + name: 'Yandex Browser', + }; + const version = Utils.getFirstMatch(/(?:yabrowser)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/ucbrowser/i], + describe(ua) { + const browser = { + name: 'UC Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:ucbrowser)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/Maxthon|mxios/i], + describe(ua) { + const browser = { + name: 'Maxthon', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:Maxthon|mxios)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/epiphany/i], + describe(ua) { + const browser = { + name: 'Epiphany', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:epiphany)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/puffin/i], + describe(ua) { + const browser = { + name: 'Puffin', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:puffin)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/sleipnir/i], + describe(ua) { + const browser = { + name: 'Sleipnir', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:sleipnir)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/k-meleon/i], + describe(ua) { + const browser = { + name: 'K-Meleon', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/(?:k-meleon)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/micromessenger/i], + describe(ua) { + const browser = { + name: 'WeChat', + }; + const version = Utils.getFirstMatch(/(?:micromessenger)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/qqbrowser/i], + describe(ua) { + const browser = { + name: (/qqbrowserlite/i).test(ua) ? 'QQ Browser Lite' : 'QQ Browser', + }; + const version = Utils.getFirstMatch(/(?:qqbrowserlite|qqbrowser)[/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/msie|trident/i], + describe(ua) { + const browser = { + name: 'Internet Explorer', + }; + const version = Utils.getFirstMatch(/(?:msie |rv:)(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/\sedg\//i], + describe(ua) { + const browser = { + name: 'Microsoft Edge', + }; + + const version = Utils.getFirstMatch(/\sedg\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/edg([ea]|ios)/i], + describe(ua) { + const browser = { + name: 'Microsoft Edge', + }; + + const version = Utils.getSecondMatch(/edg([ea]|ios)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/vivaldi/i], + describe(ua) { + const browser = { + name: 'Vivaldi', + }; + const version = Utils.getFirstMatch(/vivaldi\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/seamonkey/i], + describe(ua) { + const browser = { + name: 'SeaMonkey', + }; + const version = Utils.getFirstMatch(/seamonkey\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/sailfish/i], + describe(ua) { + const browser = { + name: 'Sailfish', + }; + + const version = Utils.getFirstMatch(/sailfish\s?browser\/(\d+(\.\d+)?)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/silk/i], + describe(ua) { + const browser = { + name: 'Amazon Silk', + }; + const version = Utils.getFirstMatch(/silk\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/phantom/i], + describe(ua) { + const browser = { + name: 'PhantomJS', + }; + const version = Utils.getFirstMatch(/phantomjs\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/slimerjs/i], + describe(ua) { + const browser = { + name: 'SlimerJS', + }; + const version = Utils.getFirstMatch(/slimerjs\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/blackberry|\bbb\d+/i, /rim\stablet/i], + describe(ua) { + const browser = { + name: 'BlackBerry', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/blackberry[\d]+\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/(web|hpw)[o0]s/i], + describe(ua) { + const browser = { + name: 'WebOS Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua) || Utils.getFirstMatch(/w(?:eb)?[o0]sbrowser\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/bada/i], + describe(ua) { + const browser = { + name: 'Bada', + }; + const version = Utils.getFirstMatch(/dolfin\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/tizen/i], + describe(ua) { + const browser = { + name: 'Tizen', + }; + const version = Utils.getFirstMatch(/(?:tizen\s?)?browser\/(\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/qupzilla/i], + describe(ua) { + const browser = { + name: 'QupZilla', + }; + const version = Utils.getFirstMatch(/(?:qupzilla)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/firefox|iceweasel|fxios/i], + describe(ua) { + const browser = { + name: 'Firefox', + }; + const version = Utils.getFirstMatch(/(?:firefox|iceweasel|fxios)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/electron/i], + describe(ua) { + const browser = { + name: 'Electron', + }; + const version = Utils.getFirstMatch(/(?:electron)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/MiuiBrowser/i], + describe(ua) { + const browser = { + name: 'Miui', + }; + const version = Utils.getFirstMatch(/(?:MiuiBrowser)[\s/](\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/chromium/i], + describe(ua) { + const browser = { + name: 'Chromium', + }; + const version = Utils.getFirstMatch(/(?:chromium)[\s/](\d+(\.?_?\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/chrome|crios|crmo/i], + describe(ua) { + const browser = { + name: 'Chrome', + }; + const version = Utils.getFirstMatch(/(?:chrome|crios|crmo)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + { + test: [/GSA/i], + describe(ua) { + const browser = { + name: 'Google Search', + }; + const version = Utils.getFirstMatch(/(?:GSA)\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Android Browser */ + { + test(parser) { + const notLikeAndroid = !parser.test(/like android/i); + const butAndroid = parser.test(/android/i); + return notLikeAndroid && butAndroid; + }, + describe(ua) { + const browser = { + name: 'Android Browser', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* PlayStation 4 */ + { + test: [/playstation 4/i], + describe(ua) { + const browser = { + name: 'PlayStation 4', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Safari */ + { + test: [/safari|applewebkit/i], + describe(ua) { + const browser = { + name: 'Safari', + }; + const version = Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Something else */ + { + test: [/.*/i], + describe(ua) { + /* Here we try to make sure that there are explicit details about the device + * in order to decide what regexp exactly we want to apply + * (as there is a specific decision based on that conclusion) + */ + const regexpWithoutDeviceSpec = /^(.*)\/(.*) /; + const regexpWithDeviceSpec = /^(.*)\/(.*)[ \t]\((.*)/; + const hasDeviceSpec = ua.search('\\(') !== -1; + const regexp = hasDeviceSpec ? regexpWithDeviceSpec : regexpWithoutDeviceSpec; + return { + name: Utils.getFirstMatch(regexp, ua), + version: Utils.getSecondMatch(regexp, ua), + }; + }, + }, + ]; + + var osParsersList = [ + /* Roku */ + { + test: [/Roku\/DVP/], + describe(ua) { + const version = Utils.getFirstMatch(/Roku\/DVP-(\d+\.\d+)/i, ua); + return { + name: OS_MAP.Roku, + version, + }; + }, + }, + + /* Windows Phone */ + { + test: [/windows phone/i], + describe(ua) { + const version = Utils.getFirstMatch(/windows phone (?:os)?\s?(\d+(\.\d+)*)/i, ua); + return { + name: OS_MAP.WindowsPhone, + version, + }; + }, + }, + + /* Windows */ + { + test: [/windows /i], + describe(ua) { + const version = Utils.getFirstMatch(/Windows ((NT|XP)( \d\d?.\d)?)/i, ua); + const versionName = Utils.getWindowsVersionName(version); + + return { + name: OS_MAP.Windows, + version, + versionName, + }; + }, + }, + + /* Firefox on iPad */ + { + test: [/Macintosh(.*?) FxiOS(.*?)\//], + describe(ua) { + const result = { + name: OS_MAP.iOS, + }; + const version = Utils.getSecondMatch(/(Version\/)(\d[\d.]+)/, ua); + if (version) { + result.version = version; + } + return result; + }, + }, + + /* macOS */ + { + test: [/macintosh/i], + describe(ua) { + const version = Utils.getFirstMatch(/mac os x (\d+(\.?_?\d+)+)/i, ua).replace(/[_\s]/g, '.'); + const versionName = Utils.getMacOSVersionName(version); + + const os = { + name: OS_MAP.MacOS, + version, + }; + if (versionName) { + os.versionName = versionName; + } + return os; + }, + }, + + /* iOS */ + { + test: [/(ipod|iphone|ipad)/i], + describe(ua) { + const version = Utils.getFirstMatch(/os (\d+([_\s]\d+)*) like mac os x/i, ua).replace(/[_\s]/g, '.'); + + return { + name: OS_MAP.iOS, + version, + }; + }, + }, + + /* Android */ + { + test(parser) { + const notLikeAndroid = !parser.test(/like android/i); + const butAndroid = parser.test(/android/i); + return notLikeAndroid && butAndroid; + }, + describe(ua) { + const version = Utils.getFirstMatch(/android[\s/-](\d+(\.\d+)*)/i, ua); + const versionName = Utils.getAndroidVersionName(version); + const os = { + name: OS_MAP.Android, + version, + }; + if (versionName) { + os.versionName = versionName; + } + return os; + }, + }, + + /* WebOS */ + { + test: [/(web|hpw)[o0]s/i], + describe(ua) { + const version = Utils.getFirstMatch(/(?:web|hpw)[o0]s\/(\d+(\.\d+)*)/i, ua); + const os = { + name: OS_MAP.WebOS, + }; + + if (version && version.length) { + os.version = version; + } + return os; + }, + }, + + /* BlackBerry */ + { + test: [/blackberry|\bbb\d+/i, /rim\stablet/i], + describe(ua) { + const version = Utils.getFirstMatch(/rim\stablet\sos\s(\d+(\.\d+)*)/i, ua) + || Utils.getFirstMatch(/blackberry\d+\/(\d+([_\s]\d+)*)/i, ua) + || Utils.getFirstMatch(/\bbb(\d+)/i, ua); + + return { + name: OS_MAP.BlackBerry, + version, + }; + }, + }, + + /* Bada */ + { + test: [/bada/i], + describe(ua) { + const version = Utils.getFirstMatch(/bada\/(\d+(\.\d+)*)/i, ua); + + return { + name: OS_MAP.Bada, + version, + }; + }, + }, + + /* Tizen */ + { + test: [/tizen/i], + describe(ua) { + const version = Utils.getFirstMatch(/tizen[/\s](\d+(\.\d+)*)/i, ua); + + return { + name: OS_MAP.Tizen, + version, + }; + }, + }, + + /* Linux */ + { + test: [/linux/i], + describe() { + return { + name: OS_MAP.Linux, + }; + }, + }, + + /* Chrome OS */ + { + test: [/CrOS/], + describe() { + return { + name: OS_MAP.ChromeOS, + }; + }, + }, + + /* Playstation 4 */ + { + test: [/PlayStation 4/], + describe(ua) { + const version = Utils.getFirstMatch(/PlayStation 4[/\s](\d+(\.\d+)*)/i, ua); + return { + name: OS_MAP.PlayStation4, + version, + }; + }, + }, + ]; + + /* + * Tablets go first since usually they have more specific + * signs to detect. + */ + + var platformParsersList = [ + /* Googlebot */ + { + test: [/googlebot/i], + describe() { + return { + type: 'bot', + vendor: 'Google', + }; + }, + }, + + /* Huawei */ + { + test: [/huawei/i], + describe(ua) { + const model = Utils.getFirstMatch(/(can-l01)/i, ua) && 'Nova'; + const platform = { + type: PLATFORMS_MAP.mobile, + vendor: 'Huawei', + }; + if (model) { + platform.model = model; + } + return platform; + }, + }, + + /* Nexus Tablet */ + { + test: [/nexus\s*(?:7|8|9|10).*/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Nexus', + }; + }, + }, + + /* iPad */ + { + test: [/ipad/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Apple', + model: 'iPad', + }; + }, + }, + + /* Firefox on iPad */ + { + test: [/Macintosh(.*?) FxiOS(.*?)\//], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Apple', + model: 'iPad', + }; + }, + }, + + /* Amazon Kindle Fire */ + { + test: [/kftt build/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Amazon', + model: 'Kindle Fire HD 7', + }; + }, + }, + + /* Another Amazon Tablet with Silk */ + { + test: [/silk/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + vendor: 'Amazon', + }; + }, + }, + + /* Tablet */ + { + test: [/tablet(?! pc)/i], + describe() { + return { + type: PLATFORMS_MAP.tablet, + }; + }, + }, + + /* iPod/iPhone */ + { + test(parser) { + const iDevice = parser.test(/ipod|iphone/i); + const likeIDevice = parser.test(/like (ipod|iphone)/i); + return iDevice && !likeIDevice; + }, + describe(ua) { + const model = Utils.getFirstMatch(/(ipod|iphone)/i, ua); + return { + type: PLATFORMS_MAP.mobile, + vendor: 'Apple', + model, + }; + }, + }, + + /* Nexus Mobile */ + { + test: [/nexus\s*[0-6].*/i, /galaxy nexus/i], + describe() { + return { + type: PLATFORMS_MAP.mobile, + vendor: 'Nexus', + }; + }, + }, + + /* Mobile */ + { + test: [/[^-]mobi/i], + describe() { + return { + type: PLATFORMS_MAP.mobile, + }; + }, + }, + + /* BlackBerry */ + { + test(parser) { + return parser.getBrowserName(true) === 'blackberry'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + vendor: 'BlackBerry', + }; + }, + }, + + /* Bada */ + { + test(parser) { + return parser.getBrowserName(true) === 'bada'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + }; + }, + }, + + /* Windows Phone */ + { + test(parser) { + return parser.getBrowserName() === 'windows phone'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + vendor: 'Microsoft', + }; + }, + }, + + /* Android Tablet */ + { + test(parser) { + const osMajorVersion = Number(String(parser.getOSVersion()).split('.')[0]); + return parser.getOSName(true) === 'android' && (osMajorVersion >= 3); + }, + describe() { + return { + type: PLATFORMS_MAP.tablet, + }; + }, + }, + + /* Android Mobile */ + { + test(parser) { + return parser.getOSName(true) === 'android'; + }, + describe() { + return { + type: PLATFORMS_MAP.mobile, + }; + }, + }, + + /* desktop */ + { + test(parser) { + return parser.getOSName(true) === 'macos'; + }, + describe() { + return { + type: PLATFORMS_MAP.desktop, + vendor: 'Apple', + }; + }, + }, + + /* Windows */ + { + test(parser) { + return parser.getOSName(true) === 'windows'; + }, + describe() { + return { + type: PLATFORMS_MAP.desktop, + }; + }, + }, + + /* Linux */ + { + test(parser) { + return parser.getOSName(true) === 'linux'; + }, + describe() { + return { + type: PLATFORMS_MAP.desktop, + }; + }, + }, + + /* PlayStation 4 */ + { + test(parser) { + return parser.getOSName(true) === 'playstation 4'; + }, + describe() { + return { + type: PLATFORMS_MAP.tv, + }; + }, + }, + + /* Roku */ + { + test(parser) { + return parser.getOSName(true) === 'roku'; + }, + describe() { + return { + type: PLATFORMS_MAP.tv, + }; + }, + }, + ]; + + /* + * More specific goes first + */ + var enginesParsersList = [ + /* EdgeHTML */ + { + test(parser) { + return parser.getBrowserName(true) === 'microsoft edge'; + }, + describe(ua) { + const isBlinkBased = /\sedg\//i.test(ua); + + // return blink if it's blink-based one + if (isBlinkBased) { + return { + name: ENGINE_MAP.Blink, + }; + } + + // otherwise match the version and return EdgeHTML + const version = Utils.getFirstMatch(/edge\/(\d+(\.?_?\d+)+)/i, ua); + + return { + name: ENGINE_MAP.EdgeHTML, + version, + }; + }, + }, + + /* Trident */ + { + test: [/trident/i], + describe(ua) { + const engine = { + name: ENGINE_MAP.Trident, + }; + + const version = Utils.getFirstMatch(/trident\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, + + /* Presto */ + { + test(parser) { + return parser.test(/presto/i); + }, + describe(ua) { + const engine = { + name: ENGINE_MAP.Presto, + }; + + const version = Utils.getFirstMatch(/presto\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, + + /* Gecko */ + { + test(parser) { + const isGecko = parser.test(/gecko/i); + const likeGecko = parser.test(/like gecko/i); + return isGecko && !likeGecko; + }, + describe(ua) { + const engine = { + name: ENGINE_MAP.Gecko, + }; + + const version = Utils.getFirstMatch(/gecko\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, + + /* Blink */ + { + test: [/(apple)?webkit\/537\.36/i], + describe() { + return { + name: ENGINE_MAP.Blink, + }; + }, + }, + + /* WebKit */ + { + test: [/(apple)?webkit/i], + describe(ua) { + const engine = { + name: ENGINE_MAP.WebKit, + }; + + const version = Utils.getFirstMatch(/webkit\/(\d+(\.?_?\d+)+)/i, ua); + + if (version) { + engine.version = version; + } + + return engine; + }, + }, + ]; + + /** + * The main class that arranges the whole parsing process. + */ + class Parser { + /** + * Create instance of Parser + * + * @param {String} UA User-Agent string + * @param {Boolean} [skipParsing=false] parser can skip parsing in purpose of performance + * improvements if you need to make a more particular parsing + * like {@link Parser#parseBrowser} or {@link Parser#parsePlatform} + * + * @throw {Error} in case of empty UA String + * + * @constructor + */ + constructor(UA, skipParsing = false) { + if (UA === void (0) || UA === null || UA === '') { + throw new Error("UserAgent parameter can't be empty"); + } + + this._ua = UA; + + /** + * @typedef ParsedResult + * @property {Object} browser + * @property {String|undefined} [browser.name] + * Browser name, like `"Chrome"` or `"Internet Explorer"` + * @property {String|undefined} [browser.version] Browser version as a String `"12.01.45334.10"` + * @property {Object} os + * @property {String|undefined} [os.name] OS name, like `"Windows"` or `"macOS"` + * @property {String|undefined} [os.version] OS version, like `"NT 5.1"` or `"10.11.1"` + * @property {String|undefined} [os.versionName] OS name, like `"XP"` or `"High Sierra"` + * @property {Object} platform + * @property {String|undefined} [platform.type] + * platform type, can be either `"desktop"`, `"tablet"` or `"mobile"` + * @property {String|undefined} [platform.vendor] Vendor of the device, + * like `"Apple"` or `"Samsung"` + * @property {String|undefined} [platform.model] Device model, + * like `"iPhone"` or `"Kindle Fire HD 7"` + * @property {Object} engine + * @property {String|undefined} [engine.name] + * Can be any of this: `WebKit`, `Blink`, `Gecko`, `Trident`, `Presto`, `EdgeHTML` + * @property {String|undefined} [engine.version] String version of the engine + */ + this.parsedResult = {}; + + if (skipParsing !== true) { + this.parse(); + } + } + + /** + * Get UserAgent string of current Parser instance + * @return {String} User-Agent String of the current object + * + * @public + */ + getUA() { + return this._ua; + } + + /** + * Test a UA string for a regexp + * @param {RegExp} regex + * @return {Boolean} + */ + test(regex) { + return regex.test(this._ua); + } + + /** + * Get parsed browser object + * @return {Object} + */ + parseBrowser() { + this.parsedResult.browser = {}; + + const browserDescriptor = Utils.find(browsersList, (_browser) => { + if (typeof _browser.test === 'function') { + return _browser.test(this); + } + + if (_browser.test instanceof Array) { + return _browser.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (browserDescriptor) { + this.parsedResult.browser = browserDescriptor.describe(this.getUA()); + } + + return this.parsedResult.browser; + } + + /** + * Get parsed browser object + * @return {Object} + * + * @public + */ + getBrowser() { + if (this.parsedResult.browser) { + return this.parsedResult.browser; + } + + return this.parseBrowser(); + } + + /** + * Get browser's name + * @return {String} Browser's name or an empty string + * + * @public + */ + getBrowserName(toLowerCase) { + if (toLowerCase) { + return String(this.getBrowser().name).toLowerCase() || ''; + } + return this.getBrowser().name || ''; + } + + + /** + * Get browser's version + * @return {String} version of browser + * + * @public + */ + getBrowserVersion() { + return this.getBrowser().version; + } + + /** + * Get OS + * @return {Object} + * + * @example + * this.getOS(); + * { + * name: 'macOS', + * version: '10.11.12' + * } + */ + getOS() { + if (this.parsedResult.os) { + return this.parsedResult.os; + } + + return this.parseOS(); + } + + /** + * Parse OS and save it to this.parsedResult.os + * @return {*|{}} + */ + parseOS() { + this.parsedResult.os = {}; + + const os = Utils.find(osParsersList, (_os) => { + if (typeof _os.test === 'function') { + return _os.test(this); + } + + if (_os.test instanceof Array) { + return _os.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (os) { + this.parsedResult.os = os.describe(this.getUA()); + } + + return this.parsedResult.os; + } + + /** + * Get OS name + * @param {Boolean} [toLowerCase] return lower-cased value + * @return {String} name of the OS — macOS, Windows, Linux, etc. + */ + getOSName(toLowerCase) { + const { name } = this.getOS(); + + if (toLowerCase) { + return String(name).toLowerCase() || ''; + } + + return name || ''; + } + + /** + * Get OS version + * @return {String} full version with dots ('10.11.12', '5.6', etc) + */ + getOSVersion() { + return this.getOS().version; + } + + /** + * Get parsed platform + * @return {{}} + */ + getPlatform() { + if (this.parsedResult.platform) { + return this.parsedResult.platform; + } + + return this.parsePlatform(); + } + + /** + * Get platform name + * @param {Boolean} [toLowerCase=false] + * @return {*} + */ + getPlatformType(toLowerCase = false) { + const { type } = this.getPlatform(); + + if (toLowerCase) { + return String(type).toLowerCase() || ''; + } + + return type || ''; + } + + /** + * Get parsed platform + * @return {{}} + */ + parsePlatform() { + this.parsedResult.platform = {}; + + const platform = Utils.find(platformParsersList, (_platform) => { + if (typeof _platform.test === 'function') { + return _platform.test(this); + } + + if (_platform.test instanceof Array) { + return _platform.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (platform) { + this.parsedResult.platform = platform.describe(this.getUA()); + } + + return this.parsedResult.platform; + } + + /** + * Get parsed engine + * @return {{}} + */ + getEngine() { + if (this.parsedResult.engine) { + return this.parsedResult.engine; + } + + return this.parseEngine(); + } + + /** + * Get engines's name + * @return {String} Engines's name or an empty string + * + * @public + */ + getEngineName(toLowerCase) { + if (toLowerCase) { + return String(this.getEngine().name).toLowerCase() || ''; + } + return this.getEngine().name || ''; + } + + /** + * Get parsed platform + * @return {{}} + */ + parseEngine() { + this.parsedResult.engine = {}; + + const engine = Utils.find(enginesParsersList, (_engine) => { + if (typeof _engine.test === 'function') { + return _engine.test(this); + } + + if (_engine.test instanceof Array) { + return _engine.test.some(condition => this.test(condition)); + } + + throw new Error("Browser's test function is not valid"); + }); + + if (engine) { + this.parsedResult.engine = engine.describe(this.getUA()); + } + + return this.parsedResult.engine; + } + + /** + * Parse full information about the browser + * @returns {Parser} + */ + parse() { + this.parseBrowser(); + this.parseOS(); + this.parsePlatform(); + this.parseEngine(); + + return this; + } + + /** + * Get parsed result + * @return {ParsedResult} + */ + getResult() { + return Utils.assign({}, this.parsedResult); + } + + /** + * Check if parsed browser matches certain conditions + * + * @param {Object} checkTree It's one or two layered object, + * which can include a platform or an OS on the first layer + * and should have browsers specs on the bottom-laying layer + * + * @returns {Boolean|undefined} Whether the browser satisfies the set conditions or not. + * Returns `undefined` when the browser is no described in the checkTree object. + * + * @example + * const browser = Bowser.getParser(window.navigator.userAgent); + * if (browser.satisfies({chrome: '>118.01.1322' })) + * // or with os + * if (browser.satisfies({windows: { chrome: '>118.01.1322' } })) + * // or with platforms + * if (browser.satisfies({desktop: { chrome: '>118.01.1322' } })) + */ + satisfies(checkTree) { + const platformsAndOSes = {}; + let platformsAndOSCounter = 0; + const browsers = {}; + let browsersCounter = 0; + + const allDefinitions = Object.keys(checkTree); + + allDefinitions.forEach((key) => { + const currentDefinition = checkTree[key]; + if (typeof currentDefinition === 'string') { + browsers[key] = currentDefinition; + browsersCounter += 1; + } else if (typeof currentDefinition === 'object') { + platformsAndOSes[key] = currentDefinition; + platformsAndOSCounter += 1; + } + }); + + if (platformsAndOSCounter > 0) { + const platformsAndOSNames = Object.keys(platformsAndOSes); + const OSMatchingDefinition = Utils.find(platformsAndOSNames, name => (this.isOS(name))); + + if (OSMatchingDefinition) { + const osResult = this.satisfies(platformsAndOSes[OSMatchingDefinition]); + + if (osResult !== void 0) { + return osResult; + } + } + + const platformMatchingDefinition = Utils.find( + platformsAndOSNames, + name => (this.isPlatform(name)), + ); + if (platformMatchingDefinition) { + const platformResult = this.satisfies(platformsAndOSes[platformMatchingDefinition]); + + if (platformResult !== void 0) { + return platformResult; + } + } + } + + if (browsersCounter > 0) { + const browserNames = Object.keys(browsers); + const matchingDefinition = Utils.find(browserNames, name => (this.isBrowser(name, true))); + + if (matchingDefinition !== void 0) { + return this.compareVersion(browsers[matchingDefinition]); + } + } + + return undefined; + } + + /** + * Check if the browser name equals the passed string + * @param browserName The string to compare with the browser name + * @param [includingAlias=false] The flag showing whether alias will be included into comparison + * @returns {boolean} + */ + isBrowser(browserName, includingAlias = false) { + const defaultBrowserName = this.getBrowserName().toLowerCase(); + let browserNameLower = browserName.toLowerCase(); + const alias = Utils.getBrowserTypeByAlias(browserNameLower); + + if (includingAlias && alias) { + browserNameLower = alias.toLowerCase(); + } + return browserNameLower === defaultBrowserName; + } + + compareVersion(version) { + let expectedResults = [0]; + let comparableVersion = version; + let isLoose = false; + + const currentBrowserVersion = this.getBrowserVersion(); + + if (typeof currentBrowserVersion !== 'string') { + return void 0; + } + + if (version[0] === '>' || version[0] === '<') { + comparableVersion = version.substr(1); + if (version[1] === '=') { + isLoose = true; + comparableVersion = version.substr(2); + } else { + expectedResults = []; + } + if (version[0] === '>') { + expectedResults.push(1); + } else { + expectedResults.push(-1); + } + } else if (version[0] === '=') { + comparableVersion = version.substr(1); + } else if (version[0] === '~') { + isLoose = true; + comparableVersion = version.substr(1); + } + + return expectedResults.indexOf( + Utils.compareVersions(currentBrowserVersion, comparableVersion, isLoose), + ) > -1; + } + + isOS(osName) { + return this.getOSName(true) === String(osName).toLowerCase(); + } + + isPlatform(platformType) { + return this.getPlatformType(true) === String(platformType).toLowerCase(); + } + + isEngine(engineName) { + return this.getEngineName(true) === String(engineName).toLowerCase(); + } + + /** + * Is anything? Check if the browser is called "anything", + * the OS called "anything" or the platform called "anything" + * @param {String} anything + * @param [includingAlias=false] The flag showing whether alias will be included into comparison + * @returns {Boolean} + */ + is(anything, includingAlias = false) { + return this.isBrowser(anything, includingAlias) || this.isOS(anything) + || this.isPlatform(anything); + } + + /** + * Check if any of the given values satisfies this.is(anything) + * @param {String[]} anythings + * @returns {Boolean} + */ + some(anythings = []) { + return anythings.some(anything => this.is(anything)); + } + } + + /*! + * Bowser - a browser detector + * https://github.com/lancedikson/bowser + * MIT License | (c) Dustin Diaz 2012-2015 + * MIT License | (c) Denis Demchenko 2015-2019 + */ + + /** + * Bowser class. + * Keep it simple as much as it can be. + * It's supposed to work with collections of {@link Parser} instances + * rather then solve one-instance problems. + * All the one-instance stuff is located in Parser class. + * + * @class + * @classdesc Bowser is a static object, that provides an API to the Parsers + * @hideconstructor + */ + class Bowser { + /** + * Creates a {@link Parser} instance + * + * @param {String} UA UserAgent string + * @param {Boolean} [skipParsing=false] Will make the Parser postpone parsing until you ask it + * explicitly. Same as `skipParsing` for {@link Parser}. + * @returns {Parser} + * @throws {Error} when UA is not a String + * + * @example + * const parser = Bowser.getParser(window.navigator.userAgent); + * const result = parser.getResult(); + */ + static getParser(UA, skipParsing = false) { + if (typeof UA !== 'string') { + throw new Error('UserAgent should be a string'); + } + return new Parser(UA, skipParsing); + } + + /** + * Creates a {@link Parser} instance and runs {@link Parser.getResult} immediately + * + * @param UA + * @return {ParsedResult} + * + * @example + * const result = Bowser.parse(window.navigator.userAgent); + */ + static parse(UA) { + return (new Parser(UA)).getResult(); + } + + static get BROWSER_MAP() { + return BROWSER_MAP; + } + + static get ENGINE_MAP() { + return ENGINE_MAP; + } + + static get OS_MAP() { + return OS_MAP; + } + + static get PLATFORMS_MAP() { + return PLATFORMS_MAP; + } + } + + var regeneratorRuntime$1 = {exports: {}}; + + var _typeof$1 = {exports: {}}; + + _typeof$1.exports; + + (function (module) { + function _typeof(o) { + "@babel/helpers - typeof"; + + return (module.exports = _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { + return typeof o; + } : function (o) { + return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; + }, module.exports.__esModule = true, module.exports["default"] = module.exports), _typeof(o); + } + module.exports = _typeof, module.exports.__esModule = true, module.exports["default"] = module.exports; + } (_typeof$1)); + + var _typeofExports = _typeof$1.exports; + + regeneratorRuntime$1.exports; + + (function (module) { + var _typeof = _typeofExports["default"]; + function _regeneratorRuntime() { + module.exports = _regeneratorRuntime = function _regeneratorRuntime() { + return e; + }, module.exports.__esModule = true, module.exports["default"] = module.exports; + var t, + e = {}, + r = Object.prototype, + n = r.hasOwnProperty, + o = Object.defineProperty || function (t, e, r) { + t[e] = r.value; + }, + i = "function" == typeof Symbol ? Symbol : {}, + a = i.iterator || "@@iterator", + c = i.asyncIterator || "@@asyncIterator", + u = i.toStringTag || "@@toStringTag"; + function define(t, e, r) { + return Object.defineProperty(t, e, { + value: r, + enumerable: !0, + configurable: !0, + writable: !0 + }), t[e]; + } + try { + define({}, ""); + } catch (t) { + define = function define(t, e, r) { + return t[e] = r; + }; + } + function wrap(t, e, r, n) { + var i = e && e.prototype instanceof Generator ? e : Generator, + a = Object.create(i.prototype), + c = new Context(n || []); + return o(a, "_invoke", { + value: makeInvokeMethod(t, r, c) + }), a; + } + function tryCatch(t, e, r) { + try { + return { + type: "normal", + arg: t.call(e, r) + }; + } catch (t) { + return { + type: "throw", + arg: t + }; + } + } + e.wrap = wrap; + var h = "suspendedStart", + l = "suspendedYield", + f = "executing", + s = "completed", + y = {}; + function Generator() {} + function GeneratorFunction() {} + function GeneratorFunctionPrototype() {} + var p = {}; + define(p, a, function () { + return this; + }); + var d = Object.getPrototypeOf, + v = d && d(d(values([]))); + v && v !== r && n.call(v, a) && (p = v); + var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p); + function defineIteratorMethods(t) { + ["next", "throw", "return"].forEach(function (e) { + define(t, e, function (t) { + return this._invoke(e, t); + }); + }); + } + function AsyncIterator(t, e) { + function invoke(r, o, i, a) { + var c = tryCatch(t[r], t, o); + if ("throw" !== c.type) { + var u = c.arg, + h = u.value; + return h && "object" == _typeof(h) && n.call(h, "__await") ? e.resolve(h.__await).then(function (t) { + invoke("next", t, i, a); + }, function (t) { + invoke("throw", t, i, a); + }) : e.resolve(h).then(function (t) { + u.value = t, i(u); + }, function (t) { + return invoke("throw", t, i, a); + }); + } + a(c.arg); + } + var r; + o(this, "_invoke", { + value: function value(t, n) { + function callInvokeWithMethodAndArg() { + return new e(function (e, r) { + invoke(t, n, e, r); + }); + } + return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg(); + } + }); + } + function makeInvokeMethod(e, r, n) { + var o = h; + return function (i, a) { + if (o === f) throw new Error("Generator is already running"); + if (o === s) { + if ("throw" === i) throw a; + return { + value: t, + done: !0 + }; + } + for (n.method = i, n.arg = a;;) { + var c = n.delegate; + if (c) { + var u = maybeInvokeDelegate(c, n); + if (u) { + if (u === y) continue; + return u; + } + } + if ("next" === n.method) n.sent = n._sent = n.arg;else if ("throw" === n.method) { + if (o === h) throw o = s, n.arg; + n.dispatchException(n.arg); + } else "return" === n.method && n.abrupt("return", n.arg); + o = f; + var p = tryCatch(e, r, n); + if ("normal" === p.type) { + if (o = n.done ? s : l, p.arg === y) continue; + return { + value: p.arg, + done: n.done + }; + } + "throw" === p.type && (o = s, n.method = "throw", n.arg = p.arg); + } + }; + } + function maybeInvokeDelegate(e, r) { + var n = r.method, + o = e.iterator[n]; + if (o === t) return r.delegate = null, "throw" === n && e.iterator["return"] && (r.method = "return", r.arg = t, maybeInvokeDelegate(e, r), "throw" === r.method) || "return" !== n && (r.method = "throw", r.arg = new TypeError("The iterator does not provide a '" + n + "' method")), y; + var i = tryCatch(o, e.iterator, r.arg); + if ("throw" === i.type) return r.method = "throw", r.arg = i.arg, r.delegate = null, y; + var a = i.arg; + return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, "return" !== r.method && (r.method = "next", r.arg = t), r.delegate = null, y) : a : (r.method = "throw", r.arg = new TypeError("iterator result is not an object"), r.delegate = null, y); + } + function pushTryEntry(t) { + var e = { + tryLoc: t[0] + }; + 1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e); + } + function resetTryEntry(t) { + var e = t.completion || {}; + e.type = "normal", delete e.arg, t.completion = e; + } + function Context(t) { + this.tryEntries = [{ + tryLoc: "root" + }], t.forEach(pushTryEntry, this), this.reset(!0); + } + function values(e) { + if (e || "" === e) { + var r = e[a]; + if (r) return r.call(e); + if ("function" == typeof e.next) return e; + if (!isNaN(e.length)) { + var o = -1, + i = function next() { + for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next; + return next.value = t, next.done = !0, next; + }; + return i.next = i; + } + } + throw new TypeError(_typeof(e) + " is not iterable"); + } + return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, "constructor", { + value: GeneratorFunctionPrototype, + configurable: !0 + }), o(GeneratorFunctionPrototype, "constructor", { + value: GeneratorFunction, + configurable: !0 + }), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, "GeneratorFunction"), e.isGeneratorFunction = function (t) { + var e = "function" == typeof t && t.constructor; + return !!e && (e === GeneratorFunction || "GeneratorFunction" === (e.displayName || e.name)); + }, e.mark = function (t) { + return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, "GeneratorFunction")), t.prototype = Object.create(g), t; + }, e.awrap = function (t) { + return { + __await: t + }; + }, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () { + return this; + }), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) { + void 0 === i && (i = Promise); + var a = new AsyncIterator(wrap(t, r, n, o), i); + return e.isGeneratorFunction(r) ? a : a.next().then(function (t) { + return t.done ? t.value : a.next(); + }); + }, defineIteratorMethods(g), define(g, u, "Generator"), define(g, a, function () { + return this; + }), define(g, "toString", function () { + return "[object Generator]"; + }), e.keys = function (t) { + var e = Object(t), + r = []; + for (var n in e) r.push(n); + return r.reverse(), function next() { + for (; r.length;) { + var t = r.pop(); + if (t in e) return next.value = t, next.done = !1, next; + } + return next.done = !0, next; + }; + }, e.values = values, Context.prototype = { + constructor: Context, + reset: function reset(e) { + if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = "next", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) "t" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t); + }, + stop: function stop() { + this.done = !0; + var t = this.tryEntries[0].completion; + if ("throw" === t.type) throw t.arg; + return this.rval; + }, + dispatchException: function dispatchException(e) { + if (this.done) throw e; + var r = this; + function handle(n, o) { + return a.type = "throw", a.arg = e, r.next = n, o && (r.method = "next", r.arg = t), !!o; + } + for (var o = this.tryEntries.length - 1; o >= 0; --o) { + var i = this.tryEntries[o], + a = i.completion; + if ("root" === i.tryLoc) return handle("end"); + if (i.tryLoc <= this.prev) { + var c = n.call(i, "catchLoc"), + u = n.call(i, "finallyLoc"); + if (c && u) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } else if (c) { + if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); + } else { + if (!u) throw new Error("try statement without catch or finally"); + if (this.prev < i.finallyLoc) return handle(i.finallyLoc); + } + } + } + }, + abrupt: function abrupt(t, e) { + for (var r = this.tryEntries.length - 1; r >= 0; --r) { + var o = this.tryEntries[r]; + if (o.tryLoc <= this.prev && n.call(o, "finallyLoc") && this.prev < o.finallyLoc) { + var i = o; + break; + } + } + i && ("break" === t || "continue" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null); + var a = i ? i.completion : {}; + return a.type = t, a.arg = e, i ? (this.method = "next", this.next = i.finallyLoc, y) : this.complete(a); + }, + complete: function complete(t, e) { + if ("throw" === t.type) throw t.arg; + return "break" === t.type || "continue" === t.type ? this.next = t.arg : "return" === t.type ? (this.rval = this.arg = t.arg, this.method = "return", this.next = "end") : "normal" === t.type && e && (this.next = e), y; + }, + finish: function finish(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y; + } + }, + "catch": function _catch(t) { + for (var e = this.tryEntries.length - 1; e >= 0; --e) { + var r = this.tryEntries[e]; + if (r.tryLoc === t) { + var n = r.completion; + if ("throw" === n.type) { + var o = n.arg; + resetTryEntry(r); + } + return o; + } + } + throw new Error("illegal catch attempt"); + }, + delegateYield: function delegateYield(e, r, n) { + return this.delegate = { + iterator: values(e), + resultName: r, + nextLoc: n + }, "next" === this.method && (this.arg = t), y; + } + }, e; + } + module.exports = _regeneratorRuntime, module.exports.__esModule = true, module.exports["default"] = module.exports; + } (regeneratorRuntime$1)); + + var regeneratorRuntimeExports = regeneratorRuntime$1.exports; + + // TODO(Babel 8): Remove this file. + + var runtime = regeneratorRuntimeExports(); + + // Copied from https://github.com/facebook/regenerator/blob/main/packages/runtime/runtime.js#L736= + try { + regeneratorRuntime = runtime; + } catch (accidentalStrictMode) { + if (typeof globalThis === "object") { + globalThis.regeneratorRuntime = runtime; + } else { + Function("r", "regeneratorRuntime = r")(runtime); + } + } + + function _typeof(o) { + "@babel/helpers - typeof"; + + return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { + return typeof o; + } : function (o) { + return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; + }, _typeof(o); + } + + function _classCallCheck$1(instance, Constructor) { + if (!(instance instanceof Constructor)) { + throw new TypeError("Cannot call a class as a function"); + } + } + + function _toPrimitive(input, hint) { + if (_typeof(input) !== "object" || input === null) return input; + var prim = input[Symbol.toPrimitive]; + if (prim !== undefined) { + var res = prim.call(input, hint || "default"); + if (_typeof(res) !== "object") return res; + throw new TypeError("@@toPrimitive must return a primitive value."); + } + return (hint === "string" ? String : Number)(input); + } + + function _toPropertyKey(arg) { + var key = _toPrimitive(arg, "string"); + return _typeof(key) === "symbol" ? key : String(key); + } + + function _defineProperties$1(target, props) { + for (var i = 0; i < props.length; i++) { + var descriptor = props[i]; + descriptor.enumerable = descriptor.enumerable || false; + descriptor.configurable = true; + if ("value" in descriptor) descriptor.writable = true; + Object.defineProperty(target, _toPropertyKey(descriptor.key), descriptor); + } + } + function _createClass$1(Constructor, protoProps, staticProps) { + if (protoProps) _defineProperties$1(Constructor.prototype, protoProps); + if (staticProps) _defineProperties$1(Constructor, staticProps); + Object.defineProperty(Constructor, "prototype", { + writable: false + }); + return Constructor; + } + + /* + Copyright 2022-2023 Picovoice Inc. + + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" + file accompanying this source. + + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + specific language governing permissions and limitations under the License. + */ + /** + * BasePvFile Class + * This class mocks the file system using in-memory storage. + */ + var PvFile = /*#__PURE__*/function () { + function PvFile() { + _classCallCheck$1(this, PvFile); + this._path = ''; + } + /** + * Getter for file's meta information. + */ + _createClass$1(PvFile, [{ + key: "meta", + get: function get() { + if (this._meta === undefined) { + return undefined; + } + return Object.assign({ + version: 0 + }, this._meta); + } + }, { + key: "pageSize", + get: function get() { + return undefined; + } + /** + * Get the file pointer from the _filePtrs map. + * @param ptr The pointer to BasePvFile instance to get from the map. + * @returns BasePvFile returns the current file instance related to ptr. + */ + }], [{ + key: "getPtr", + value: function getPtr(ptr) { + if (PvFile._filePtrs.has(ptr)) { + return PvFile._filePtrs.get(ptr); + } + throw new Error('File instance not found.'); + } + /** + * Saves the BasePvFile instance to the map with an associated ptr. + * @param ptr The file pointer to save as the key. + * @param instance The BasePvFile instance to save as the value. + */ + }, { + key: "setPtr", + value: function setPtr(ptr, instance) { + PvFile._filePtrs.set(ptr, instance); + } + /** + * Removes the ptr from the _filePtrs map. + * @param ptr The file pointer to remove. + */ + }, { + key: "removePtr", + value: function removePtr(ptr) { + PvFile._filePtrs["delete"](ptr); + } + }]); + return PvFile; + }(); + PvFile._filePtrs = new Map(); + /** + * Cast a signed address to unsigned address. + * + * @param address The address to cast to unsigned address. + */ + function unsignedAddress(address) { + if (address < 0) { + return address >>> 0; + } + return address; + } + + function _arrayWithHoles(arr) { + if (Array.isArray(arr)) return arr; + } + + function _iterableToArrayLimit(r, l) { + var t = null == r ? null : "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; + if (null != t) { + var e, + n, + i, + u, + a = [], + f = !0, + o = !1; + try { + if (i = (t = t.call(r)).next, 0 === l) { + if (Object(t) !== t) return; + f = !1; + } else for (; !(f = (e = i.call(t)).done) && (a.push(e.value), a.length !== l); f = !0); + } catch (r) { + o = !0, n = r; + } finally { + try { + if (!f && null != t["return"] && (u = t["return"](), Object(u) !== u)) return; + } finally { + if (o) throw n; + } + } + return a; + } + } + + function _arrayLikeToArray$1(arr, len) { + if (len == null || len > arr.length) len = arr.length; + for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; + return arr2; + } + + function _unsupportedIterableToArray$1(o, minLen) { + if (!o) return; + if (typeof o === "string") return _arrayLikeToArray$1(o, minLen); + var n = Object.prototype.toString.call(o).slice(8, -1); + if (n === "Object" && o.constructor) n = o.constructor.name; + if (n === "Map" || n === "Set") return Array.from(o); + if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray$1(o, minLen); + } + + function _nonIterableRest() { + throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); + } + + function _slicedToArray(arr, i) { + return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _unsupportedIterableToArray$1(arr, i) || _nonIterableRest(); + } + + function _classCallCheck(instance, Constructor) { + if (!(instance instanceof Constructor)) { + throw new TypeError("Cannot call a class as a function"); + } + } + + function _defineProperties(target, props) { + for (var i = 0; i < props.length; i++) { + var descriptor = props[i]; + descriptor.enumerable = descriptor.enumerable || false; + descriptor.configurable = true; + if ("value" in descriptor) descriptor.writable = true; + Object.defineProperty(target, toPropertyKey(descriptor.key), descriptor); + } + } + function _createClass(Constructor, protoProps, staticProps) { + if (protoProps) _defineProperties(Constructor.prototype, protoProps); + if (staticProps) _defineProperties(Constructor, staticProps); + Object.defineProperty(Constructor, "prototype", { + writable: false + }); + return Constructor; + } + + function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it["return"] != null) it["return"](); } finally { if (didErr) throw err; } } }; } + function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); } + function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; return arr2; } + var PvWebGPUDevice = /*#__PURE__*/function () { + function PvWebGPUDevice(device, adapterInfo) { + _classCallCheck(this, PvWebGPUDevice); + _defineProperty(this, "_numCommandsEncoded", void 0); + _defineProperty(this, "_commandEncoder", void 0); + _defineProperty(this, "_passEncoder", void 0); + _defineProperty(this, "_stageBuffersPendingMap", void 0); + _defineProperty(this, "_uniformBuffersPendingRelease", void 0); + _defineProperty(this, "device", void 0); + _defineProperty(this, "bufferReusePool", void 0); + _defineProperty(this, "shaders", void 0); + _defineProperty(this, "isTimerEnabled", void 0); + _defineProperty(this, "timestampBuffers", void 0); + _defineProperty(this, "shaderTimes", void 0); + _defineProperty(this, "adapterInfo", void 0); + this._numCommandsEncoded = 0; + this._commandEncoder = null; + this._passEncoder = null; + this._stageBuffersPendingMap = []; + this._uniformBuffersPendingRelease = []; + this.device = device; + this.bufferReusePool = new Map(); + this.shaders = {}; + this.shaderTimes = {}; + this.timestampBuffers = {}; + this.isTimerEnabled = false; + this.adapterInfo = adapterInfo; + } + _createClass(PvWebGPUDevice, [{ + key: "getBufferKey", + value: function getBufferKey(sizeInBytes, usage) { + return "".concat(usage, "_").concat(sizeInBytes); + } + }, { + key: "commandEncoder", + get: function get() { + if (!this._commandEncoder) { + this._commandEncoder = this.device.createCommandEncoder(); + } + return this._commandEncoder; + } + }, { + key: "numCommandsEncoded", + get: function get() { + return this._numCommandsEncoded; + }, + set: function set(value) { + this._numCommandsEncoded = value; + if (this._numCommandsEncoded >= 16) { + this.endComputePass(); + this.flushCommandEncoder(); + } + } + }, { + key: "endComputePass", + value: function endComputePass() { + if (this._passEncoder) { + this._passEncoder.end(); + this._passEncoder = null; + } + } + }, { + key: "getBuffer", + value: function getBuffer(sizeBytes, usage) { + var mappedAtCreation = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; + var label = arguments.length > 3 ? arguments[3] : undefined; + var key = this.getBufferKey(sizeBytes, usage); + if (this.bufferReusePool.has(key)) { + var buffers = this.bufferReusePool.get(key); + if (buffers && buffers.length > 0) { + return buffers.pop(); + } + } + return this.device.createBuffer({ + size: sizeBytes * Uint8Array.BYTES_PER_ELEMENT, + usage: usage, + mappedAtCreation: mappedAtCreation, + label: label + }); + } + }, { + key: "scheduleUniformBufferForRelease", + value: function scheduleUniformBufferForRelease(buffer) { + this._uniformBuffersPendingRelease.push(buffer); + } + }, { + key: "releaseBuffer", + value: function releaseBuffer(buffer) { + var clearBuffer = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true; + if (clearBuffer) { + this.endComputePass(); + this.commandEncoder.clearBuffer(buffer, 0, buffer.size); + } + var key = this.getBufferKey(buffer.size, buffer.usage); + if (!this.bufferReusePool.has(key)) { + this.bufferReusePool.set(key, []); + } + this.bufferReusePool.get(key).push(buffer); + } + }, { + key: "sync", + value: function () { + var _sync = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee() { + var _this = this; + var _iterator, _step, k, buffers, _iterator3, _step3, b, _loop, _i, _Object$entries; + return _regeneratorRuntime.wrap(function _callee$(_context3) { + while (1) switch (_context3.prev = _context3.next) { + case 0: + this.flushCommandEncoder(); + _context3.next = 3; + return this.device.queue.onSubmittedWorkDone(); + case 3: + _iterator = _createForOfIteratorHelper(this.bufferReusePool.keys()); + try { + for (_iterator.s(); !(_step = _iterator.n()).done;) { + k = _step.value; + buffers = this.bufferReusePool.get(k); + if (buffers && buffers.length > 0) { + _iterator3 = _createForOfIteratorHelper(buffers); + try { + for (_iterator3.s(); !(_step3 = _iterator3.n()).done;) { + b = _step3.value; + b === null || b === void 0 || b.destroy(); + } + } catch (err) { + _iterator3.e(err); + } finally { + _iterator3.f(); + } + } + } + } catch (err) { + _iterator.e(err); + } finally { + _iterator.f(); + } + this.bufferReusePool.clear(); + _loop = /*#__PURE__*/_regeneratorRuntime.mark(function _loop() { + var _Object$entries$_i, shaderName, timestampBuffers, _iterator2, _step2, _loop2; + return _regeneratorRuntime.wrap(function _loop$(_context2) { + while (1) switch (_context2.prev = _context2.next) { + case 0: + _Object$entries$_i = _slicedToArray(_Object$entries[_i], 2), shaderName = _Object$entries$_i[0], timestampBuffers = _Object$entries$_i[1]; + if (!_this.shaderTimes[shaderName]) { + _this.shaderTimes[shaderName] = []; + } + _iterator2 = _createForOfIteratorHelper(timestampBuffers); + _context2.prev = 3; + _loop2 = /*#__PURE__*/_regeneratorRuntime.mark(function _loop2() { + var timestampBuffer; + return _regeneratorRuntime.wrap(function _loop2$(_context) { + while (1) switch (_context.prev = _context.next) { + case 0: + timestampBuffer = _step2.value; + timestampBuffer.mapAsync(GPUMapMode.READ).then(function () { + var times = new BigInt64Array(timestampBuffer.getMappedRange()); + var timeDif = times[1] - times[0]; + timestampBuffer.unmap(); + timestampBuffer.destroy(); + _this.shaderTimes[shaderName].push(timeDif); + }); + case 2: + case "end": + return _context.stop(); + } + }, _loop2); + }); + _iterator2.s(); + case 6: + if ((_step2 = _iterator2.n()).done) { + _context2.next = 10; + break; + } + return _context2.delegateYield(_loop2(), "t0", 8); + case 8: + _context2.next = 6; + break; + case 10: + _context2.next = 15; + break; + case 12: + _context2.prev = 12; + _context2.t1 = _context2["catch"](3); + _iterator2.e(_context2.t1); + case 15: + _context2.prev = 15; + _iterator2.f(); + return _context2.finish(15); + case 18: + case "end": + return _context2.stop(); + } + }, _loop, null, [[3, 12, 15, 18]]); + }); + _i = 0, _Object$entries = Object.entries(this.timestampBuffers); + case 8: + if (!(_i < _Object$entries.length)) { + _context3.next = 13; + break; + } + return _context3.delegateYield(_loop(), "t0", 10); + case 10: + _i++; + _context3.next = 8; + break; + case 13: + this.timestampBuffers = {}; + case 14: + case "end": + return _context3.stop(); + } + }, _callee, this); + })); + function sync() { + return _sync.apply(this, arguments); + } + return sync; + }() + }, { + key: "reportShaderTimes", + value: function reportShaderTimes() { + for (var _i2 = 0, _Object$entries2 = Object.entries(this.shaderTimes); _i2 < _Object$entries2.length; _i2++) { + var _Object$entries2$_i = _slicedToArray(_Object$entries2[_i2], 2), + shaderName = _Object$entries2$_i[0], + shaderTimes = _Object$entries2$_i[1]; + var timeSum = 0n; + var _iterator4 = _createForOfIteratorHelper(shaderTimes), + _step4; + try { + for (_iterator4.s(); !(_step4 = _iterator4.n()).done;) { + var shaderTime = _step4.value; + timeSum = timeSum + shaderTime; + } + } catch (err) { + _iterator4.e(err); + } finally { + _iterator4.f(); + } + var totalSeconds = Number(timeSum) * 1e-9; + var avgSeconds = (totalSeconds / shaderTimes.length).toFixed(7); + console.log("".concat(shaderName, ", ").concat(totalSeconds.toFixed(5), ", ").concat(avgSeconds)); + } + this.shaderTimes = {}; + } + }, { + key: "flushCommandEncoder", + value: function flushCommandEncoder() { + var _this2 = this; + this.device.queue.submit([this.commandEncoder.finish()]); + this._commandEncoder = null; + this._numCommandsEncoded = 0; + this._stageBuffersPendingMap.forEach(function (buffer) { + buffer.destroy(); + }); + this._stageBuffersPendingMap = []; + this._uniformBuffersPendingRelease.forEach(function (buffer) { + _this2.releaseBuffer(buffer, false); + }); + this._uniformBuffersPendingRelease = []; + } + }, { + key: "writeBuffer", + value: function writeBuffer(sizeBytes, offset, srcArray, dstBuffer) { + var stagingBuffer = this.getBuffer(sizeBytes, GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC, true); + new Uint8Array(stagingBuffer.getMappedRange()).set(srcArray); + stagingBuffer.unmap(); + this._stageBuffersPendingMap.push(stagingBuffer); + this.endComputePass(); + this.commandEncoder.copyBufferToBuffer(stagingBuffer, 0, dstBuffer, offset, sizeBytes); + this.numCommandsEncoded++; + } + }, { + key: "dispatchComputerShader", + value: function dispatchComputerShader(bindGroup, pipeline, shaderName, workgroupCountX, workgroupCountY, workgroupCountZ) { + if (this.isTimerEnabled) { + var querySet = this.device.createQuerySet({ + type: 'timestamp', + count: 2 + }); + var timestampWrites = { + querySet: querySet, + beginningOfPassWriteIndex: 0, + endOfPassWriteIndex: 1 + }; + this.endComputePass(); + this._passEncoder = this.commandEncoder.beginComputePass({ + timestampWrites: timestampWrites + }); + this._passEncoder.setBindGroup(0, bindGroup); + this._passEncoder.setPipeline(pipeline); + this._passEncoder.dispatchWorkgroups(workgroupCountX, workgroupCountY, workgroupCountZ); + this.endComputePass(); + var size = 2 * BigInt64Array.BYTES_PER_ELEMENT; + var resolveBuffer = this.device.createBuffer({ + size: size, + usage: GPUBufferUsage.QUERY_RESOLVE | GPUBufferUsage.COPY_SRC + }); + this.commandEncoder.resolveQuerySet(querySet, 0, 2, resolveBuffer, 0); + var resultBuffer = this.device.createBuffer({ + size: size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ + }); + this.commandEncoder.copyBufferToBuffer(resolveBuffer, 0, resultBuffer, 0, size); + if (!this.timestampBuffers[shaderName]) { + this.timestampBuffers[shaderName] = []; + } + this.timestampBuffers[shaderName].push(resultBuffer); + this.numCommandsEncoded += 3; + } else { + if (!this._passEncoder) { + this._passEncoder = this.commandEncoder.beginComputePass(); + } + this._passEncoder.setBindGroup(0, bindGroup); + this._passEncoder.setPipeline(pipeline); + this._passEncoder.dispatchWorkgroups(workgroupCountX, workgroupCountY, workgroupCountZ); + this.numCommandsEncoded++; + } + } + }]); + return PvWebGPUDevice; + }(); + + var PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE = 256; + var PV_PICOLLM_WEBGPU_MAX_GRID_DIM = 65535; + var gpuDevices = new Map(); + var gpuBuffers = new Map(); + var emptyShader = "\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main_empty() {}\n"; + var shaderEntryPoint = 'main'; + + var PRECOMPUTE_ENCODING_SHADER_NAME = "pv_picollm_attention_precompute_encoding_shader"; + var attentionPrecomputeEncodingShaderSource = "\nstruct argsStruct {\n dimension: u32,\n steps: u32,\n theta: f32,\n encoding_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let ds = local_id.x;\n \n for (var t = ts; t < args.steps; t += num_workgroups.x) {\n for (var d = ds; d < (args.dimension / 2u); d += workgroup_size_x) {\n let f = 2u * d;\n let x = f32(t) / pow(args.theta, f32(f) / f32(args.dimension));\n let encoding_idx = args.encoding_offset + (t * args.dimension) + f;\n encoding[encoding_idx] = cos(x);\n encoding[encoding_idx + 1] = sin(x);\n }\n } \n}\n\n".concat(emptyShader); + var loadAttentionPrecomputeEncodingShader = function loadAttentionPrecomputeEncodingShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention precompute encoding bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention precompute encoding pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention precompute encoding shader module", + code: attentionPrecomputeEncodingShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention precompute encoding pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var ENCODE_ROPE_INTERLEAVED_SHADER_NAME = "pv_picollm_attention_encode_rope_interleaved_shader"; + var attentionEncodeRopeInterleavedShaderSource = "\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let x_start = args.x_offset + (t * args.num_heads + h) * args.head_dimension;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < (args.head_dimension / 2u); d += workgroup_size_x) {\n let i = 2u * d;\n let x_idx = x_start + i;\n let encoding_idx = encoding_start + i;\n \n let re = x[x_idx];\n let im = x[x_idx + 1];\n x[x_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[x_idx + 1] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader); + var loadAttentionEncodeRopeInterleavedShader = function loadAttentionEncodeRopeInterleavedShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention encode rope interleave bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention encode rope interleave pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention encode rope interleave shader module", + code: attentionEncodeRopeInterleavedShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention encode rope interleave pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var ENCODE_SHADER_NAME = "pv_picollm_attention_encode_shader"; + var attentionEncodeShaderSource = "\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let half_rope = (args.rope_dimension / 2);\n let xr_start = args.x_offset + ((t * args.num_heads + h) * args.head_dimension);\n let xi_start = xr_start + half_rope;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < half_rope; d += workgroup_size_x) {\n let xr_idx = xr_start + d;\n let xi_idx = xi_start + d;\n let encoding_idx = encoding_start + (2 * d);\n\n let re = x[xr_idx];\n let im = x[xi_idx];\n x[xr_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[xi_idx] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader); + var loadAttentionEncodeShader = function loadAttentionEncodeShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention encode bind layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention encode pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention encode shader", + code: attentionEncodeShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention encode pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var DOT_PRODUCT_SHADER_NAME = "pv_picollm_attention_dot_product_shader"; + var attentionDotProductShaderSource = "\nstruct argsStruct { \n n: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n norm: f32,\n length1: u32,\n num_keys: u32,\n query_offset: u32,\n keys_offset: u32,\n key_intercepts_offset: u32,\n key_slopes_offset: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar query: array;\n\n@group(0) @binding(2)\nvar keys: array;\n\n@group(0) @binding(3)\nvar key_intercepts: array;\n\n@group(0) @binding(4)\nvar key_slopes: array;\n\n@group(0) @binding(5)\nvar scores: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n \n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n \n let keys_local_a = args.keys_offset + (start_index * args.head_dimension);\n let key_intercepts_local_a = args.key_intercepts_offset + start_index; \n let key_slopes_local_a = args.key_slopes_offset + start_index;\n \n let keys_local_b = args.keys_offset + (head_offset * args.head_dimension);\n let key_intercepts_local_b = args.key_intercepts_offset + head_offset; \n let key_slopes_local_b = args.key_slopes_offset + head_offset;\n \n let scores_local = args.scores_offset + (global_id.x * args.num_keys);\n let query_local = args.query_offset + (((global_id.x * args.n) + args.tq) * args.head_dimension);\n \n for (var i = 0u; i < args.head_dimension; i++) { \n for (var k = 0u; k < args.num_keys; k++) {\n if (k < args.length1) { \n let key_idx = keys_local_a + (k * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_a + k] + (key_slopes[key_slopes_local_a + k] * key_val));\n scores[scores_local + k] += tmp;\n }\n else {\n let j = k - args.length1;\n let key_idx = keys_local_b + (j * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_b + j] + (key_slopes[key_slopes_local_b + j] * key_val));\n scores[scores_local + k] += tmp;\n }\n } \n }\n \n for (var k = 0u; k < args.num_keys; k++) {\n scores[scores_local + k] *= args.norm;\n }\n}\n\n".concat(emptyShader); + var loadAttentionDotProductShader = function loadAttentionDotProductShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention dot product bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 5, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention dot product pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention dot product shader module", + code: attentionDotProductShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention dot product pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var SOFTMAX_SHADER_NAME = "pv_picollm_attention_softmax_shader"; + var attentionSoftmaxShaderSource = "\nstruct argsStruct { \n num_heads: u32,\n num_keys: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar scores: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let scores_start = args.scores_offset + (global_id.x * args.num_keys);\n \n var max_index: u32 = 0;\n for (var i = 1u; i < args.num_keys; i++) {\n if (scores[scores_start + i] > scores[scores_start + max_index]) {\n max_index = i;\n }\n }\n let max: f32 = scores[scores_start + max_index];\n\n var sum: f32 = 0.0;\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] = exp(scores[scores_start + i] - max);\n sum += scores[scores_start + i];\n }\n\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] /= sum;\n }\n}\n\n".concat(emptyShader); + var loadAttentionSoftmaxShader = function loadAttentionSoftmaxShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention softmax bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention softmax pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention softmax shader module", + code: attentionSoftmaxShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention softmax pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var FIR_SHADER_NAME = "pv_picollm_attention_fir_shader"; + var attentionFirShaderSource = "\nstruct argsStruct { \n length1: u32,\n length2: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n values_offset: u32,\n value_intercepts_offset: u32,\n value_slopes_offset: u32,\n scores_offset: u32,\n output_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar values: array;\n\n@group(0) @binding(2)\nvar value_intercepts: array;\n\n@group(0) @binding(3)\nvar value_slopes: array;\n\n@group(0) @binding(4)\nvar scores: array;\n\n@group(0) @binding(5)\nvar output: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n\n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n\n let values_local_a = args.values_offset + (start_index * args.head_dimension);\n let value_intercepts_local_a = args.value_intercepts_offset + start_index;\n let value_slopes_local_a = args.value_slopes_offset + start_index;\n let values_local_b = args.values_offset + (head_offset * args.head_dimension);\n let value_intercepts_local_b = args.value_intercepts_offset + head_offset;\n let value_slopes_local_b = args.value_slopes_offset + head_offset;\n let scores_local = args.scores_offset + (global_id.x * (args.length1 + args.length2));\n let output_local = args.output_offset + (((args.tq * args.num_heads) + global_id.x) * args.head_dimension);\n\n for (var i = 0u; i < args.head_dimension; i++) {\n var tmp: f32 = 0.0;\n for (var k = 0u; k < args.length1; k++) {\n let value_idx = values_local_a + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + k] * (value_intercepts[value_intercepts_local_a + k] + (value_slopes[value_slopes_local_a + k] * value_val)); \n }\n for (var k = 0u; k < args.length2; k++) {\n let value_idx = values_local_b + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + args.length1 + k] * (value_intercepts[value_intercepts_local_b + k] + (value_slopes[value_slopes_local_b + k] * value_val)); \n }\n output[output_local + i] = tmp;\n }\n}\n\n".concat(emptyShader); + var loadAttentionFirShader = function loadAttentionFirShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention fir bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 5, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention fir pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention fir shader module", + code: attentionFirShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention fir pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var UPDATE_KV_SHADER_NAME = "pv_picollm_attention_update_kv_shader"; + var attentionUpdateKvShaderSource = "\nstruct argsStruct {\n n: u32,\n num_kv_heads: u32,\n window_length: u32,\n position: u32,\n head_dimension: u32,\n tf_offset: u32,\n kv_offset: u32,\n kv_intercepts_offset: u32,\n kv_slopes_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar kv: array;\n\n@group(0) @binding(3)\nvar kv_intercepts: array;\n\n@group(0) @binding(4)\nvar kv_slopes: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x >= args.num_kv_heads) {\n return;\n }\n \n for (var i = 0u; i < args.n; i++) {\n let index = (global_id.x * args.window_length) + ((args.position + i) % args.window_length);\n let tf_start = args.tf_offset + (((i * args.num_kv_heads) + global_id.x) * args.head_dimension);\n let kv_start = args.kv_offset + ((index * args.head_dimension) / 4);\n let kv_intercepts_start = args.kv_intercepts_offset + index;\n let kv_slopes_start = args.kv_slopes_offset + index;\n \n var xmax = tf[tf_start]; \n var xmin = tf[tf_start]; \n \n for (var j = 1u; j < args.head_dimension; j++) {\n xmax = max(xmax, tf[tf_start + j]);\n xmin = min(xmin, tf[tf_start + j]);\n }\n\n kv_intercepts[kv_intercepts_start] = xmin;\n kv_slopes[kv_slopes_start] = f32(xmax - xmin) / 255.0;\n\n for (var j = 0u; j < args.head_dimension; j++) {\n let kv_idx = kv_start + (j / 4);\n let kv_val = u32(round((tf[tf_start + j] - xmin) / kv_slopes[kv_slopes_start])); \n kv[kv_idx] = insertBits(kv[kv_idx], extractBits(kv_val, 0u, 8u), (j * 8u) % 32u, 8u); \n }\n }\n}\n\n".concat(emptyShader); + var loadAttentionUpdateKvShader = function loadAttentionUpdateKvShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention update kv bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention update kv pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention update kv shader module", + code: attentionUpdateKvShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention update kv pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var TRANSPOSE_QUERY_SHADER_NAME = "pv_picollm_attention_transpose_query_shader"; + var attentionTransposeQueryShaderSource = "\nstruct argsStruct {\n n: u32,\n num_heads: u32,\n head_dimension: u32,\n tf_offset: u32,\n hf_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar hf: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n\n if (workgroup_id.x >= args.num_heads || workgroup_id.y >= args.n || local_id.x >= args.head_dimension) {\n return;\n }\n \n let tf_idx = args.tf_offset + (workgroup_id.y * args.num_heads * args.head_dimension) + (workgroup_id.x * args.head_dimension) + local_id.x; \n let hf_idx = args.hf_offset + (workgroup_id.x * args.n * args.head_dimension) + (workgroup_id.y * args.head_dimension) + local_id.x; \n hf[hf_idx] = tf[tf_idx];\n}\n\n".concat(emptyShader); + var loadAttentionTransposeQueryShader = function loadAttentionTransposeQueryShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "attention transpose query bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "attention transpose query pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "attention transpose query shader module", + code: attentionTransposeQueryShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "attention transpose query pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var attentionShaders = _defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty({}, PRECOMPUTE_ENCODING_SHADER_NAME, loadAttentionPrecomputeEncodingShader), ENCODE_ROPE_INTERLEAVED_SHADER_NAME, loadAttentionEncodeRopeInterleavedShader), ENCODE_SHADER_NAME, loadAttentionEncodeShader), DOT_PRODUCT_SHADER_NAME, loadAttentionDotProductShader), SOFTMAX_SHADER_NAME, loadAttentionSoftmaxShader), FIR_SHADER_NAME, loadAttentionFirShader), UPDATE_KV_SHADER_NAME, loadAttentionUpdateKvShader), TRANSPOSE_QUERY_SHADER_NAME, loadAttentionTransposeQueryShader); + var getPicollmAttentionWebGpuFunctions = function getPicollmAttentionWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmAttentionPrecomputeEncodingWebGpu = function pvPicollmAttentionPrecomputeEncodingWebGpu(objAddress, encodingAddress, encodingOffset, dimension, steps, theta, statusAddress) { + var _gpuBuffers$get; + objAddress = unsignedAddress(objAddress); + encodingAddress = unsignedAddress(encodingAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[PRECOMPUTE_ENCODING_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var encodingBuffer = (_gpuBuffers$get = gpuBuffers.get(encodingAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!encodingBuffer) { + console.error('Encoding buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention precompute encoding arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, dimension, true); + view.setUint32(4, steps, true); + view.setFloat32(8, theta, true); + view.setUint32(12, encodingOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention precompute encoding bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: encodingBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, PRECOMPUTE_ENCODING_SHADER_NAME, steps); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionEncodeWebGpu = function pvPicollmAttentionEncodeWebGpu(objAddress, isRopeInterleaved, xAddress, xOffset, n, numHeads, headDimension, ropeDimension, position, encodingAddress, encodingOffset, statusAddress) { + var _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + encodingAddress = unsignedAddress(encodingAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shaderName = isRopeInterleaved ? ENCODE_ROPE_INTERLEAVED_SHADER_NAME : ENCODE_SHADER_NAME; + var shader = obj.shaders[shaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var encodingBuffer = (_gpuBuffers$get2 = gpuBuffers.get(encodingAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!encodingBuffer) { + console.error('Encoding buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get3 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(7 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention encode arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, numHeads, headDimension, ropeDimension, position, encodingOffset / 4, xOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention encode bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: encodingBuffer + } + }, { + binding: 2, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, shaderName, Math.min(n, PV_PICOLLM_WEBGPU_MAX_GRID_DIM), numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionDotProductWebGpu = function pvPicollmAttentionDotProductWebGpu(objAddress, queryAddress, queryOffset, keysAddress, keysOffset, keyInterceptsAddress, keyInterceptsOffset, keySlopesAddress, keySlopesOffset, n, tq, headDimension, numHeads, numKvHeads, windowLength, start, norm, length1, length2, numKeys, scoresAddress, scoresOffset, statusAddress) { + var _gpuBuffers$get4, _gpuBuffers$get5, _gpuBuffers$get6, _gpuBuffers$get7, _gpuBuffers$get8; + objAddress = unsignedAddress(objAddress); + queryAddress = unsignedAddress(queryAddress); + keysAddress = unsignedAddress(keysAddress); + keyInterceptsAddress = unsignedAddress(keyInterceptsAddress); + keySlopesAddress = unsignedAddress(keySlopesAddress); + scoresAddress = unsignedAddress(scoresAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[DOT_PRODUCT_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var queryBuffer = (_gpuBuffers$get4 = gpuBuffers.get(queryAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!queryBuffer) { + console.error('query buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var keysBuffer = (_gpuBuffers$get5 = gpuBuffers.get(keysAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!keysBuffer) { + console.error('keys buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var keyInterceptsBuffer = (_gpuBuffers$get6 = gpuBuffers.get(keyInterceptsAddress)) === null || _gpuBuffers$get6 === void 0 ? void 0 : _gpuBuffers$get6.buffer; + if (!keyInterceptsBuffer) { + console.error('key intercepts buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var keySlopesBuffer = (_gpuBuffers$get7 = gpuBuffers.get(keySlopesAddress)) === null || _gpuBuffers$get7 === void 0 ? void 0 : _gpuBuffers$get7.buffer; + if (!keySlopesBuffer) { + console.error('key slopes buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var scoresBuffer = (_gpuBuffers$get8 = gpuBuffers.get(scoresAddress)) === null || _gpuBuffers$get8 === void 0 ? void 0 : _gpuBuffers$get8.buffer; + if (!scoresBuffer) { + console.error('scores buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(15 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention dot product arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, n, true); + view.setUint32(4, tq, true); + view.setUint32(8, headDimension, true); + view.setUint32(12, numHeads, true); + view.setUint32(16, numKvHeads, true); + view.setUint32(20, windowLength, true); + view.setUint32(24, start, true); + view.setFloat32(28, norm, true); + view.setUint32(32, length1, true); + view.setUint32(36, numKeys, true); + view.setUint32(40, scoresOffset, true); + view.setUint32(44, queryOffset / 4, true); + view.setUint32(48, keysOffset, true); + view.setUint32(52, keyInterceptsOffset / 4, true); + view.setUint32(56, keySlopesOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention dot product bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: queryBuffer + } + }, { + binding: 2, + resource: { + buffer: keysBuffer + } + }, { + binding: 3, + resource: { + buffer: keyInterceptsBuffer + } + }, { + binding: 4, + resource: { + buffer: keySlopesBuffer + } + }, { + binding: 5, + resource: { + buffer: scoresBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, DOT_PRODUCT_SHADER_NAME, numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionSoftmaxWebGpu = function pvPicollmAttentionSoftmaxWebGpu(objAddress, scoresAddress, scoresOffset, numHeads, numKeys, statusAddress) { + var _gpuBuffers$get9; + objAddress = unsignedAddress(objAddress); + scoresAddress = unsignedAddress(scoresAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[SOFTMAX_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var scoresBuffer = (_gpuBuffers$get9 = gpuBuffers.get(scoresAddress)) === null || _gpuBuffers$get9 === void 0 ? void 0 : _gpuBuffers$get9.buffer; + if (!scoresBuffer) { + console.error('Scores buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention softmax arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([numHeads, numKeys, scoresOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention softmax bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: scoresBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, SOFTMAX_SHADER_NAME, numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionFirWebGpu = function pvPicollmAttentionFirWebGpu(objAddress, valuesAddress, valuesOffset, valueInterceptsAddress, valueInterceptsOffset, valueSlopesAddress, valueSlopesOffset, length1, length2, tq, headDimension, numHeads, numKvHeads, windowLength, start, scoresAddress, scoresOffset, outputAddress, outputOffset, statusAddress) { + var _gpuBuffers$get10, _gpuBuffers$get11, _gpuBuffers$get12, _gpuBuffers$get13, _gpuBuffers$get14; + objAddress = unsignedAddress(objAddress); + valuesAddress = unsignedAddress(valuesAddress); + valueInterceptsAddress = unsignedAddress(valueInterceptsAddress); + valueSlopesAddress = unsignedAddress(valueSlopesAddress); + scoresAddress = unsignedAddress(scoresAddress); + outputAddress = unsignedAddress(outputAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[FIR_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var valuesBuffer = (_gpuBuffers$get10 = gpuBuffers.get(valuesAddress)) === null || _gpuBuffers$get10 === void 0 ? void 0 : _gpuBuffers$get10.buffer; + if (!valuesBuffer) { + console.error('values buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var valueInterceptsBuffer = (_gpuBuffers$get11 = gpuBuffers.get(valueInterceptsAddress)) === null || _gpuBuffers$get11 === void 0 ? void 0 : _gpuBuffers$get11.buffer; + if (!valueInterceptsBuffer) { + console.error('value intercepts buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var valueSlopesBuffer = (_gpuBuffers$get12 = gpuBuffers.get(valueSlopesAddress)) === null || _gpuBuffers$get12 === void 0 ? void 0 : _gpuBuffers$get12.buffer; + if (!valueSlopesBuffer) { + console.error('value slopes buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var scoresBuffer = (_gpuBuffers$get13 = gpuBuffers.get(scoresAddress)) === null || _gpuBuffers$get13 === void 0 ? void 0 : _gpuBuffers$get13.buffer; + if (!scoresBuffer) { + console.error('scores buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var outputBuffer = (_gpuBuffers$get14 = gpuBuffers.get(outputAddress)) === null || _gpuBuffers$get14 === void 0 ? void 0 : _gpuBuffers$get14.buffer; + if (!outputBuffer) { + console.error('output buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(13 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention fir arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([length1, length2, tq, headDimension, numHeads, numKvHeads, windowLength, start, valuesOffset, valueInterceptsOffset / 4, valueSlopesOffset / 4, scoresOffset / 4, outputOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention fir bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: valuesBuffer + } + }, { + binding: 2, + resource: { + buffer: valueInterceptsBuffer + } + }, { + binding: 3, + resource: { + buffer: valueSlopesBuffer + } + }, { + binding: 4, + resource: { + buffer: scoresBuffer + } + }, { + binding: 5, + resource: { + buffer: outputBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, FIR_SHADER_NAME, numHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionUpdateKvWebGpu = function pvPicollmAttentionUpdateKvWebGpu(objAddress, tfAddress, tfOffset, n, kvAddress, kvOffset, kvInterceptsAddress, kvInterceptsOffset, kvSlopesAddress, kvSlopesOffset, numKvHeads, windowLength, position, headDimension, statusAddress) { + var _gpuBuffers$get15, _gpuBuffers$get16, _gpuBuffers$get17, _gpuBuffers$get18; + objAddress = unsignedAddress(objAddress); + tfAddress = unsignedAddress(tfAddress); + kvAddress = unsignedAddress(kvAddress); + kvInterceptsAddress = unsignedAddress(kvInterceptsAddress); + kvSlopesAddress = unsignedAddress(kvSlopesAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[UPDATE_KV_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var tfBuffer = (_gpuBuffers$get15 = gpuBuffers.get(tfAddress)) === null || _gpuBuffers$get15 === void 0 ? void 0 : _gpuBuffers$get15.buffer; + if (!tfBuffer) { + console.error('tf buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var kvBuffer = (_gpuBuffers$get16 = gpuBuffers.get(kvAddress)) === null || _gpuBuffers$get16 === void 0 ? void 0 : _gpuBuffers$get16.buffer; + if (!kvBuffer) { + console.error('KV buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var kvInterceptsBuffer = (_gpuBuffers$get17 = gpuBuffers.get(kvInterceptsAddress)) === null || _gpuBuffers$get17 === void 0 ? void 0 : _gpuBuffers$get17.buffer; + if (!kvInterceptsBuffer) { + console.error('KV intercept buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var kvSlopesBuffer = (_gpuBuffers$get18 = gpuBuffers.get(kvSlopesAddress)) === null || _gpuBuffers$get18 === void 0 ? void 0 : _gpuBuffers$get18.buffer; + if (!kvSlopesBuffer) { + console.error('KV slopes buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(9 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention update kv arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, numKvHeads, windowLength, position, headDimension, tfOffset / 4, kvOffset, kvInterceptsOffset / 4, kvSlopesOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention update kv bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: tfBuffer + } + }, { + binding: 2, + resource: { + buffer: kvBuffer + } + }, { + binding: 3, + resource: { + buffer: kvInterceptsBuffer + } + }, { + binding: 4, + resource: { + buffer: kvSlopesBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, UPDATE_KV_SHADER_NAME, numKvHeads); + setStatus(statusAddress, 0); + }; + var pvPicollmAttentionTransposeQueryWebGpu = function pvPicollmAttentionTransposeQueryWebGpu(objAddress, tfAddress, tfOffset, hfAddress, hfOffset, n, numHeads, headDimension, statusAddress) { + var _gpuBuffers$get19, _gpuBuffers$get20; + objAddress = unsignedAddress(objAddress); + tfAddress = unsignedAddress(tfAddress); + hfAddress = unsignedAddress(hfAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[TRANSPOSE_QUERY_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var tfBuffer = (_gpuBuffers$get19 = gpuBuffers.get(tfAddress)) === null || _gpuBuffers$get19 === void 0 ? void 0 : _gpuBuffers$get19.buffer; + if (!tfBuffer) { + console.error('tf buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var hfBuffer = (_gpuBuffers$get20 = gpuBuffers.get(hfAddress)) === null || _gpuBuffers$get20 === void 0 ? void 0 : _gpuBuffers$get20.buffer; + if (!hfBuffer) { + console.error('hf buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(5 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "attention transpose query arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, numHeads, headDimension, tfOffset / 4, hfOffset / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "attention transpose query bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: tfBuffer + } + }, { + binding: 2, + resource: { + buffer: hfBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, TRANSPOSE_QUERY_SHADER_NAME, numHeads, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_attention_precompute_encoding_webgpu_wasm: pvPicollmAttentionPrecomputeEncodingWebGpu, + pv_picollm_attention_encode_webgpu_wasm: pvPicollmAttentionEncodeWebGpu, + pv_picollm_attention_dot_product_webgpu_wasm: pvPicollmAttentionDotProductWebGpu, + pv_picollm_attention_softmax_webgpu_wasm: pvPicollmAttentionSoftmaxWebGpu, + pv_picollm_attention_fir_webgpu_wasm: pvPicollmAttentionFirWebGpu, + pv_picollm_attention_update_kv_webgpu_wasm: pvPicollmAttentionUpdateKvWebGpu, + pv_picollm_attention_transpose_query_webgpu_wasm: pvPicollmAttentionTransposeQueryWebGpu + }; + }; + + var SILU_SHADER_NAME = "pv_picollm_feed_forward_silu_shader"; + var feedForwardSiluShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = x[global_id.x] / (1.0 + exp(-x[global_id.x]));\n}\n\n".concat(emptyShader); + var loadFeedForwardSiluShader = function loadFeedForwardSiluShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff silu bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff silu pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff silu shader module", + code: feedForwardSiluShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff silu pipline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var GELU_SHADER_NAME = "pv_picollm_feed_forward_gelu_shader"; + var feedForwardGeluShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\nconst a1: f32 = 0.254829592;\nconst a2: f32 = -0.284496736;\nconst a3: f32 = 1.421413741;\nconst a4: f32 = -1.453152027;\nconst a5: f32 = 1.061405429;\nconst p: f32 = 0.3275911;\n\n// A&S formula 7.1.26\nfn erf(x: f32) -> f32 { \n var sign: f32 = 1.0;\n if (x < 0) {\n sign = -1.0;\n }\n var x_abs: f32 = abs(x);\n \n let t: f32 = 1.0 / fma(p, x_abs, 1.0);\n let y: f32 = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs);\n\n return sign * y;\n}\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1.0 + erf(x[global_id.x] * 0.7071067811865475));\n}\n\n".concat(emptyShader); + var loadFeedForwardGeluShader = function loadFeedForwardGeluShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff gelu bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff gelu pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff gelu shader module", + code: feedForwardGeluShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff gelu pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var ALMOST_GELU_SHADER_NAME = "pv_picollm_feed_forward_almost_gelu_shader"; + var feedForwardAlmostGeluShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1 + tanh(0.7978845608028654 * (x[global_id.x] + (0.044715f * x[global_id.x] * x[global_id.x] * x[global_id.x]))));\n}\n\n".concat(emptyShader); + var loadFeedForwardAlmostGeluShader = function loadFeedForwardAlmostGeluShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff almost gelu bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff almost gelu pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff almost gelu shader module", + code: feedForwardAlmostGeluShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff almost gelu pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var MULTIPLY_BUFFERS_SHADER_NAME = "pv_picollm_feed_forward_multiply_buffers_shader"; + var feedForwardMultiplyBuffersShaderSource = "\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n y[global_id.x] *= x[global_id.x];\n}\n\n".concat(emptyShader); + var loadFeedForwardMultiplyBuffersShader = function loadFeedForwardMultiplyBuffersShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "ff multiply buffers bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "ff multiply buffers pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "ff multiply buffers shader module", + code: feedForwardMultiplyBuffersShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "ff multiply buffers pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var feedForwardShaders = _defineProperty(_defineProperty(_defineProperty(_defineProperty({}, SILU_SHADER_NAME, loadFeedForwardSiluShader), GELU_SHADER_NAME, loadFeedForwardGeluShader), ALMOST_GELU_SHADER_NAME, loadFeedForwardAlmostGeluShader), MULTIPLY_BUFFERS_SHADER_NAME, loadFeedForwardMultiplyBuffersShader); + var getPicollmFeedForwardWebGpuFunctions = function getPicollmFeedForwardWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmFeedForwardSiluWebGpu = function pvPicollmFeedForwardSiluWebGpu(objAddress, n, xAddress, statusAddress) { + var _gpuBuffers$get; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[SILU_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff silu arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff silu bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, SILU_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmFeedForwardGeluWebGpu = function pvPicollmFeedForwardGeluWebGpu(objAddress, n, xAddress, statusAddress) { + var _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[GELU_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get2 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff gelu arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff gelu bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, GELU_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmFeedForwardAlmostGeluWebGpu = function pvPicollmFeedForwardAlmostGeluWebGpu(objAddress, n, xAddress, statusAddress) { + var _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ALMOST_GELU_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get3 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff almost gelu arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff almost gelu bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ALMOST_GELU_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmFeedForwardMultiplyBuffersWebGpu = function pvPicollmFeedForwardMultiplyBuffersWebGpu(objAddress, n, xAddress, yAddress, statusAddress) { + var _gpuBuffers$get4, _gpuBuffers$get5; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[MULTIPLY_BUFFERS_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get4 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get5 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "ff multiply buffers arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "ff multiply buffers bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, MULTIPLY_BUFFERS_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_feed_forward_silu_webgpu_wasm: pvPicollmFeedForwardSiluWebGpu, + pv_picollm_feed_forward_gelu_webgpu_wasm: pvPicollmFeedForwardGeluWebGpu, + pv_picollm_feed_forward_almost_gelu_webgpu_wasm: pvPicollmFeedForwardAlmostGeluWebGpu, + pv_picollm_feed_forward_multiply_buffers_webgpu_wasm: pvPicollmFeedForwardMultiplyBuffersWebGpu + }; + }; + + var FORWARD_SHADER_NAME$1 = "pv_picollm_gate_forward_shader"; + var gateForwardShaderSource = "\n\nstruct pv_picollm_gate_ix_t {\n i: u32,\n x: f32,\n}\n\n@group(0) @binding(0)\nvar y: array;\n\n@group(0) @binding(1)\nvar indices: array;\n\n@group(0) @binding(2)\nvar weights: array;\n\noverride n: u32 = 0;\noverride k: u32 = 0;\noverride num_experts: u32 = 0;\n\noverride y_offset: u32 = 0;\noverride indices_offset: u32 = 0;\noverride weights_offset: u32 = 0;\n\nvar ixs: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(global_invocation_id) global_id : vec3\n) {\n if (global_id.x >= n) {\n return;\n }\n \n var y_start: u32 = y_offset + global_id.x * num_experts;\n for (var j = 0u; j < num_experts; j++) {\n ixs[j].i = j;\n ixs[j].x = y[y_start + j];\n }\n\n for (var i = 0u; i < num_experts - 1; i++) {\n for (var j = 0u; j < num_experts - i - 1; j++) {\n if (ixs[j].x < ixs[j + 1].x) {\n let tmp = ixs[j];\n ixs[j] = ixs[j + 1];\n ixs[j + 1] = tmp;\n }\n }\n }\n\n for (var j = 0u; j < k; j++) {\n indices[indices_offset + (global_id.x * k) + j] = ixs[j].i;\n weights[weights_offset + (global_id.x * k) + j] = ixs[j].x;\n }\n\n var max_weight: f32 = weights[weights_offset + (global_id.x * k)];\n for (var j = 1u; j < k; j++) {\n max_weight = max(max_weight, weights[weights_offset + (global_id.x * k) + j]);\n }\n\n var sum_weight: f32 = 0.0;\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] = exp(weights[weights_offset + (global_id.x * k) + j] - max_weight);\n sum_weight += weights[weights_offset + (global_id.x * k) + j];\n }\n\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] /= sum_weight;\n }\n}\n\n".concat(emptyShader); + var loadGateForwardShader = function loadGateForwardShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "gate forward bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "gate forward pipeline", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "gate forward shader module", + code: gateForwardShaderSource + }); + var computePipeline = device.createComputePipeline({ + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + num_experts: 1 + } + } + }); + return { + computePipeline: computePipeline, + pipelineLayout: pipelineLayout, + shaderModule: shaderModule + }; + }; + var gateForwardShader = _defineProperty({}, FORWARD_SHADER_NAME$1, loadGateForwardShader); + var getPicollmGateWebGpuFunctions = function getPicollmGateWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmGateForwardWebGpu = function pvPicollmGateForwardWebGpu(objAddress, n, k, numExperts, yAddress, yOffset, indicesAddress, indicesOffset, weightsAddress, weightsOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + yAddress = unsignedAddress(yAddress); + indicesAddress = unsignedAddress(indicesAddress); + weightsAddress = unsignedAddress(weightsAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[FORWARD_SHADER_NAME$1]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + // TODO: create this in setup, once we add args to setup procedure + var pipeline = obj.device.createComputePipeline({ + label: "gate forward pipeline", + layout: shader.pipelineLayout, + compute: { + module: shader.shaderModule, + entryPoint: shaderEntryPoint, + constants: { + n: n, + k: k, + num_experts: numExperts, + y_offset: yOffset / 4, + indices_offset: indicesOffset / 4, + weights_offset: weightsOffset / 4 + } + } + }); + var yBuffer = (_gpuBuffers$get = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var indicesBuffer = (_gpuBuffers$get2 = gpuBuffers.get(indicesAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!indicesBuffer) { + console.error('Indices buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var weightsBuffer = (_gpuBuffers$get3 = gpuBuffers.get(weightsAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!weightsBuffer) { + console.error('Weights buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var bindGroup = obj.device.createBindGroup({ + label: "gate forward bind group", + layout: pipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: yBuffer + } + }, { + binding: 1, + resource: { + buffer: indicesBuffer + } + }, { + binding: 2, + resource: { + buffer: weightsBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, pipeline, FORWARD_SHADER_NAME$1, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_gate_forward_webgpu_wasm: pvPicollmGateForwardWebGpu + }; + }; + + var ADD_TO_BUFFER_SHADER_NAME$1 = "pv_picollm_moe_transformer_add_to_buffer_shader"; + var moeTransformerAddToBufferShaderSource = "\nstruct argsStruct {\n n: u32, \n x_offset: u32,\n buffer_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n\n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader); + var loadMoeTransformerAddToBufferShader = function loadMoeTransformerAddToBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "moe transformer add to buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "moe transformer add to buffer pipeline", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "moe transformer add to buffer shader module", + code: moeTransformerAddToBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "moe transformer add to buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME = "pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_shader"; + var moeTransformerMultiplyWeightAndToBufferShaderSource = "\nstruct argsStruct {\n n: u32, \n weights_index: u32,\n y_index: u32,\n weights_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weights: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + args.y_index + global_id.x] += weights[args.weights_index] + x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader); + var loadMoeTransformerMultiplyWeightAndAddToBufferShader = function loadMoeTransformerMultiplyWeightAndAddToBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "moe transformer multiply weight and add to buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "moe transformer multiply weight and add to buffer pipeline", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "moe transformer multiply weight and add to buffer shader module", + code: moeTransformerMultiplyWeightAndToBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "moe transformer multiply weight and add to buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var ADD_BUFFERS_SHADER_NAME$1 = "pv_picollm_moe_transformer_add_buffers_shader"; + var moeTransformerAddBuffersShaderSource = "\nstruct argsStruct {\n n: u32, \n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader); + var loadMoeTransformerAddBuffersShader = function loadMoeTransformerAddBuffersShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "moe transformer add buffers bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "moe transformer add buffers pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "moe transformer add buffers shader module", + code: moeTransformerAddBuffersShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "moe transformer add buffers pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var moeTransformerForwardShaders = _defineProperty(_defineProperty(_defineProperty({}, ADD_TO_BUFFER_SHADER_NAME$1, loadMoeTransformerAddToBufferShader), MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME, loadMoeTransformerMultiplyWeightAndAddToBufferShader), ADD_BUFFERS_SHADER_NAME$1, loadMoeTransformerAddBuffersShader); + var getPicollmMoeTransformerWebGpuFunctions = function getPicollmMoeTransformerWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmMoeTransformerAddToBufferWebGpu = function pvPicollmMoeTransformerAddToBufferWebGpu(objAddress, n, xAddress, xOffset, bufferAddress, bufferOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + bufferAddress = unsignedAddress(bufferAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_TO_BUFFER_SHADER_NAME$1]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var bufferBuffer = (_gpuBuffers$get2 = gpuBuffers.get(bufferAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!bufferBuffer) { + console.error('buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "moe transformer add to buffer arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, xOffset, bufferOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "moe transformer add to buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: bufferBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_TO_BUFFER_SHADER_NAME$1, n); + setStatus(statusAddress, 0); + }; + var pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu = function pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu(objAddress, n, weightsIndex, yIndex, weightsAddress, weightsOffset, xAddress, xOffset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get3, _gpuBuffers$get4, _gpuBuffers$get5; + objAddress = unsignedAddress(objAddress); + weightsAddress = unsignedAddress(weightsAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var weightsBuffer = (_gpuBuffers$get3 = gpuBuffers.get(weightsAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!weightsBuffer) { + console.error('weights has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get4 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!xBuffer) { + console.error('buffer2 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get5 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!yBuffer) { + console.error('y has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(6 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "moe transformer multiply weight and add to buffer arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, weightsIndex, yIndex, weightsOffset, xOffset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "moe transformer multiply weight and add to buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightsBuffer + } + }, { + binding: 2, + resource: { + buffer: xBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME, n); + setStatus(statusAddress, 0); + }; + var pvPicollmMoeTransformerAddBuffersWebGpu = function pvPicollmMoeTransformerAddBuffersWebGpu(objAddress, n, buffer1Address, buffer1Offset, buffer2Address, buffer2Offset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get6, _gpuBuffers$get7, _gpuBuffers$get8; + objAddress = unsignedAddress(objAddress); + buffer1Address = unsignedAddress(buffer1Address); + buffer2Address = unsignedAddress(buffer2Address); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_BUFFERS_SHADER_NAME$1]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var buffer1Buffer = (_gpuBuffers$get6 = gpuBuffers.get(buffer1Address)) === null || _gpuBuffers$get6 === void 0 ? void 0 : _gpuBuffers$get6.buffer; + if (!buffer1Buffer) { + console.error('buffer1 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var buffer2Buffer = (_gpuBuffers$get7 = gpuBuffers.get(buffer2Address)) === null || _gpuBuffers$get7 === void 0 ? void 0 : _gpuBuffers$get7.buffer; + if (!buffer2Buffer) { + console.error('buffer2 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get8 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get8 === void 0 ? void 0 : _gpuBuffers$get8.buffer; + if (!yBuffer) { + console.error('y has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "moe transformer add buffers arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, buffer1Offset, buffer2Offset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "moe transformer add buffers bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: buffer1Buffer + } + }, { + binding: 2, + resource: { + buffer: buffer2Buffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_BUFFERS_SHADER_NAME$1, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm: pvPicollmMoeTransformerAddToBufferWebGpu, + pv_picollm_moe_transformer_add_buffers_webgpu_wasm: pvPicollmMoeTransformerAddBuffersWebGpu, + pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm: pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu + }; + }; + + var sdataReduce = "\n for (var s: u32 = workgroup_size_x / 2; s > 0; s >>= 1) {\n if tid < s {\n sdata[tid] += sdata[tid + s];\n }\n workgroupBarrier();\n }\n"; + var dividePadFunction = "\n fn divide_pad(a: u32, b: u32) -> u32 { \n return (a + b - 1) / b;\n }\n"; + + var FORWARD_MULTI_BUFFER_SHADER_NAME$1 = "pv_picollm_norm_forward_multi_buffer_shader"; + var FORWARD_SINGLE_BUFFER_SHADER_NAME$1 = "pv_picollm_norm_forward_single_buffer_shader"; + var normForwardShaderSource = function normForwardShaderSource(isMulti) { + return "\nstruct argsStruct {\n n: u32,\n dimension: u32,\n remainder: u32,\n remainder_start: u32, \n eps: f32, \n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n".concat(isMulti ? "\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3) \nvar y: array;\n" : " \n@group(0) @binding(2)\nvar x: array;\n", "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\nvar sdata: array, workgroup_size_x>;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n let tid = local_id.x;\n let m = workgroup_id.x;\n let block_size = workgroup_size_x;\n \n var power_vec: vec4;\n let x_start: u32 = args.x_offset + (m * args.dimension);\n let skip = tid * 4;\n let shift = (block_size * 4);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = x_start + j + skip; \n\n let x_vec = vec4(\n x[local_index],\n x[local_index + 1],\n x[local_index + 2],\n x[local_index + 3]);\n \n power_vec += x_vec * x_vec; \n } \n \n if (tid == 0 && args.remainder > 0) {\n var remainder_vec = vec4(0.0, 0.0, 0.0, 0.0);\n let x_idx = x_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) { \n remainder_vec[j] = x[x_idx + j];\n } \n power_vec += remainder_vec * remainder_vec;\n }\n \n sdata[tid] = power_vec;\n workgroupBarrier();\n\n ").concat(sdataReduce, "\n \n let power = sdata[0].x + sdata[0].y + sdata[0].z + sdata[0].w;\n let norm: vec4 = vec4(1.0 / sqrt((power / f32(args.dimension)) + args.eps));\n \n let y_start: u32 = args.y_offset + (m * args.dimension);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = j + skip;\n let x_idx = x_start + local_index;\n let x_vec = vec4(\n x[x_idx],\n x[x_idx + 1],\n x[x_idx + 2],\n x[x_idx + 3]);\n \n let weight_vec = vec4(\n weight[local_index],\n weight[local_index + 1],\n weight[local_index + 2],\n weight[local_index + 3]);\n let y_vec = x_vec * norm * weight_vec;\n \n let y_idx = y_start + local_index;\n").concat(isMulti ? " \n y[y_idx] = y_vec.x;\n y[y_idx + 1] = y_vec.y;\n y[y_idx + 2] = y_vec.z;\n y[y_idx + 3] = y_vec.w;\n" : " \n x[y_idx] = y_vec.x;\n x[y_idx + 1] = y_vec.y;\n x[y_idx + 2] = y_vec.z;\n x[y_idx + 3] = y_vec.w;\n", " \n }\n \n if (tid == 0 && args.remainder > 0) {\n let x_idx = x_start + args.remainder_start;\n let weight_idx = args.remainder_start; \n let y_idx = y_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) {\n").concat(isMulti ? " \n y[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n" : " \n x[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n", " \n } \n }\n}\n\n").concat(emptyShader); + }; + var loadNormForwardShader = function loadNormForwardShader(device, isMulti) { + var entries = [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }]; + if (isMulti) { + entries.push({ + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }); + entries.push({ + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }); + } else { + entries.push({ + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }); + } + var bindGroupLayout = device.createBindGroupLayout({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer bind group layout"), + entries: entries + }); + var pipelineLayout = device.createPipelineLayout({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer shader module"), + code: normForwardShaderSource(isMulti) + }); + var computePipeline = device.createComputePipeline({ + label: "norm forward ".concat(isMulti ? "multi" : "single", " buffer pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var normForwardShader = _defineProperty(_defineProperty({}, FORWARD_SINGLE_BUFFER_SHADER_NAME$1, function (device) { + return loadNormForwardShader(device, false); + }), FORWARD_MULTI_BUFFER_SHADER_NAME$1, function (device) { + return loadNormForwardShader(device, true); + }); + var getPicollmNormWebGpuFunctions = function getPicollmNormWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmNormForwardWebGpu = function pvPicollmNormForwardWebGpu(objAddress, dimension, eps, weightAddress, n, xOffset, xAddress, yOffset, yAddress, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + weightAddress = unsignedAddress(weightAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shaderName = xAddress === yAddress ? FORWARD_SINGLE_BUFFER_SHADER_NAME$1 : FORWARD_MULTI_BUFFER_SHADER_NAME$1; + var shader = obj.shaders[shaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var weightBuffer = (_gpuBuffers$get = gpuBuffers.get(weightAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!weightBuffer) { + console.error('Weight buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get2 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var remainder = dimension % 4; + var remainder_start = dimension - remainder; + var argsBuffer = obj.getBuffer(7 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "norm forward multi buffer arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, n, true); + view.setUint32(4, dimension, true); + view.setUint32(8, remainder, true); + view.setUint32(12, remainder_start, true); + view.setFloat32(16, eps, true); + view.setUint32(20, xOffset / 4, true); + view.setUint32(24, yOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var entries = [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightBuffer + } + }]; + if (xAddress === yAddress) { + entries.push({ + binding: 2, + resource: { + buffer: yBuffer + } + }); + } else { + var _gpuBuffers$get3; + var xBuffer = (_gpuBuffers$get3 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + entries.push({ + binding: 2, + resource: { + buffer: xBuffer + } + }); + entries.push({ + binding: 3, + resource: { + buffer: yBuffer + } + }); + } + var bindGroup = obj.device.createBindGroup({ + label: "norm forward ".concat(xAddress === yAddress ? "single" : "multi", " buffer bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: entries + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, shaderName, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_norm_forward_webgpu_wasm: pvPicollmNormForwardWebGpu + }; + }; + + var FORWARD_MULTI_BUFFER_SHADER_NAME = "pv_picollm_norm_layer_forward_multi_buffer_shader"; + var normLayerForwardMultiBufferShaderSource = "\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar x: array;\n\n@group(0) @binding(4)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += x[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (x[x_start + j] - mean) * (x[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((x[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader); + var loadNormLayerForwardMultiBufferShader = function loadNormLayerForwardMultiBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "norm layer forward multi buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "norm layer forward multi buffer pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "norm layer forward multi buffer shader module", + code: normLayerForwardMultiBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "norm layer forward multi buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var FORWARD_SINGLE_BUFFER_SHADER_NAME = "pv_picollm_norm_layer_forward_single_buffer_shader"; + var normLayerForwardSingleBufferShaderSource = "\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += y[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (y[x_start + j] - mean) * (y[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((y[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader); + var loadNormLayerForwardSingleBufferShader = function loadNormLayerForwardSingleBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "norm layer forward single buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "norm layer forward single buffer pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "norm layer forward single buffer shader module", + code: normLayerForwardSingleBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "norm layer forward single buffer pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var normLayerForwardShader = _defineProperty(_defineProperty({}, FORWARD_SINGLE_BUFFER_SHADER_NAME, loadNormLayerForwardSingleBufferShader), FORWARD_MULTI_BUFFER_SHADER_NAME, loadNormLayerForwardMultiBufferShader); + var getPicollmNormLayerWebGpuFunctions = function getPicollmNormLayerWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmNormLayerForwardWebGpu = function pvPicollmNormLayerForwardWebGpu(objAddress, dimension, eps, weightAddress, weightOffset, biasAddress, biasOffset, n, xAddress, xOffset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + weightAddress = unsignedAddress(weightAddress); + biasAddress = unsignedAddress(biasAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shaderName = xAddress === yAddress ? FORWARD_SINGLE_BUFFER_SHADER_NAME : FORWARD_MULTI_BUFFER_SHADER_NAME; + var shader = obj.shaders[shaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var weightBuffer = (_gpuBuffers$get = gpuBuffers.get(weightAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!weightBuffer) { + console.error('weight buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var biasBuffer = (_gpuBuffers$get2 = gpuBuffers.get(biasAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!biasBuffer) { + console.error('bias buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get3 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(7 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "norm layer forward multi buffer arg buffer"); + var buffer = new ArrayBuffer(argsBuffer.size); + var view = new DataView(buffer); + view.setUint32(0, n, true); + view.setUint32(4, dimension, true); + view.setFloat32(8, eps, true); + view.setUint32(12, weightOffset / 4, true); + view.setUint32(16, biasOffset / 4, true); + view.setUint32(20, xOffset / 4, true); + view.setUint32(24, yOffset / 4, true); + obj.device.queue.writeBuffer(argsBuffer, 0, buffer); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup; + if (xAddress === yAddress) { + bindGroup = obj.device.createBindGroup({ + label: "norm layer forward single buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightBuffer + } + }, { + binding: 2, + resource: { + buffer: biasBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + } else { + var _gpuBuffers$get4; + var xBuffer = (_gpuBuffers$get4 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + bindGroup = obj.device.createBindGroup({ + label: "norm layer forward multi buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: weightBuffer + } + }, { + binding: 2, + resource: { + buffer: biasBuffer + } + }, { + binding: 3, + resource: { + buffer: xBuffer + } + }, { + binding: 4, + resource: { + buffer: yBuffer + } + }] + }); + } + obj.dispatchComputerShader(bindGroup, shader.computePipeline, shaderName, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_norm_layer_forward_webgpu_wasm: pvPicollmNormLayerForwardWebGpu + }; + }; + + var ADD_TO_BUFFER_SHADER_NAME = "pv_picollm_transformer_add_to_buffer_shader"; + var transformerAddToBufferShaderSource = "\nstruct argsStruct {\n n: u32,\n x_offset: u32,\n buffer_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader); + var loadTransformerAddToBufferShader = function loadTransformerAddToBufferShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "transformer add to buffer bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "transformer add to buffer pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "transformer add to buffer shader module", + code: transformerAddToBufferShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "transformer add to buffer compute", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var ADD_BUFFERS_SHADER_NAME = "pv_picollm_transformer_add_buffers_shader"; + var transformerAddBuffersShaderSource = "\n\nstruct argsStruct {\n n: u32,\n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader); + var loadTransformerAddBuffersShader = function loadTransformerAddBuffersShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "transformer add buffers bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "transformer add buffers pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "transformer add buffers shader module", + code: transformerAddBuffersShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "transformer add buffers pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var transformerForwardShaders = _defineProperty(_defineProperty({}, ADD_TO_BUFFER_SHADER_NAME, loadTransformerAddToBufferShader), ADD_BUFFERS_SHADER_NAME, loadTransformerAddBuffersShader); + var getPicollmTransformerWebGpuFunctions = function getPicollmTransformerWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmTransformerAddToBufferWebGpu = function pvPicollmTransformerAddToBufferWebGpu(objAddress, n, xAddress, xOffset, bufferAddress, bufferOffset, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + bufferAddress = unsignedAddress(bufferAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_TO_BUFFER_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!xBuffer) { + console.error('x buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var bufferBuffer = (_gpuBuffers$get2 = gpuBuffers.get(bufferAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!bufferBuffer) { + console.error('buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "transformer add to buffer arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, xOffset, bufferOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "transformer add to buffer bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: bufferBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_TO_BUFFER_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmTransformerAddBuffersWebGpu = function pvPicollmTransformerAddBuffersWebGpu(objAddress, n, buffer1Address, buffer1Offset, buffer2Address, buffer2Offset, yAddress, yOffset, statusAddress) { + var _gpuBuffers$get3, _gpuBuffers$get4, _gpuBuffers$get5; + objAddress = unsignedAddress(objAddress); + buffer1Address = unsignedAddress(buffer1Address); + buffer2Address = unsignedAddress(buffer2Address); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[ADD_BUFFERS_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var buffer1Buffer = (_gpuBuffers$get3 = gpuBuffers.get(buffer1Address)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!buffer1Buffer) { + console.error('buffer1 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var buffer2Buffer = (_gpuBuffers$get4 = gpuBuffers.get(buffer2Address)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!buffer2Buffer) { + console.error('buffer2 has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get5 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!yBuffer) { + console.error('y has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "transformer add buffers arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, buffer1Offset, buffer2Offset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "transformer add buffers bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: buffer1Buffer + } + }, { + binding: 2, + resource: { + buffer: buffer2Buffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, ADD_BUFFERS_SHADER_NAME, Math.ceil(n / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_transformer_add_to_buffer_webgpu_wasm: pvPicollmTransformerAddToBufferWebGpu, + pv_picollm_transformer_add_buffers_webgpu_wasm: pvPicollmTransformerAddBuffersWebGpu + }; + }; + + var FORWARD_SHADER_NAME = "pv_picollm_weight_float_forward_shader"; + var weightFloatForwardShaderSource = "\n\nstruct argsStruct {\n nr: u32,\n nc: u32,\n w_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar w: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n if (local_id.x >= args.nr) {\n return;\n }\n let x_start: u32 = args.x_offset + (workgroup_id.x * args.nc);\n let y_idx: u32 = local_id.x + args.y_offset + (workgroup_id.x * args.nr);\n \n let w_start: u32 = args.w_offset + (local_id.x * args.nc);\n for (var j = 0u; j < args.nc; j++) {\n y[y_idx] += w[w_start + j] * x[x_start + j]; \n }\n}\n\n".concat(emptyShader); + var loadWeightFloatForwardShader = function loadWeightFloatForwardShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight float forward bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight float forward pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight float forward shader module", + code: weightFloatForwardShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight float forward pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var weightFloatForwardShader = _defineProperty({}, FORWARD_SHADER_NAME, loadWeightFloatForwardShader); + var getPicollmWeightFloatWebGpuFunctions = function getPicollmWeightFloatWebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmWeightFloatForwardWebGpu = function pvPicollmWeightFloatForwardWebGpu(objAddress, n, nc, nr, wOffset, wAddress, xOffset, xAddress, yOffset, yAddress, statusAddress) { + var _gpuBuffers$get, _gpuBuffers$get2, _gpuBuffers$get3; + objAddress = unsignedAddress(objAddress); + wAddress = unsignedAddress(wAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[FORWARD_SHADER_NAME]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var wBuffer = (_gpuBuffers$get = gpuBuffers.get(wAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!wBuffer) { + console.error('W buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get2 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get3 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(5 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight float forward arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([nr, nc, wOffset, xOffset, yOffset])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight float forward bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: wBuffer + } + }, { + binding: 2, + resource: { + buffer: xBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, FORWARD_SHADER_NAME, n); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_weight_float_forward_webgpu_wasm: pvPicollmWeightFloatForwardWebGpu + }; + }; + + var rowsPerBlock = 16; + var columnsPerBlock = 8; + var preprocessDim = 16; + var weightBlockSize = 256; + var unpackBlock128BitDepth3 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_3(packed_offset: u32) {\n let val_0 = blocks[packed_offset]; \n unpacked[0] = extractBits(val_0, 0u, 3u);\n unpacked[1] = extractBits(val_0, 3u, 3u);\n unpacked[2] = extractBits(val_0, 6u, 3u);\n unpacked[3] = extractBits(val_0, 9u, 3u);\n unpacked[4] = extractBits(val_0, 12u, 3u);\n unpacked[5] = extractBits(val_0, 15u, 3u);\n unpacked[6] = extractBits(val_0, 18u, 3u);\n unpacked[7] = extractBits(val_0, 21u, 3u);\n unpacked[8] = extractBits(val_0, 24u, 3u);\n unpacked[9] = extractBits(val_0, 27u, 3u);\n unpacked[10] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_1, 0u, 1u), 2u, 1u);\n unpacked[11] = extractBits(val_1, 1u, 3u);\n unpacked[12] = extractBits(val_1, 4u, 3u);\n unpacked[13] = extractBits(val_1, 7u, 3u);\n unpacked[14] = extractBits(val_1, 10u, 3u);\n unpacked[15] = extractBits(val_1, 13u, 3u);\n unpacked[16] = extractBits(val_1, 16u, 3u);\n unpacked[17] = extractBits(val_1, 19u, 3u);\n unpacked[18] = extractBits(val_1, 22u, 3u);\n unpacked[19] = extractBits(val_1, 25u, 3u);\n unpacked[20] = extractBits(val_1, 28u, 3u);\n unpacked[21] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_2, 0u, 2u), 1u, 2u); \n unpacked[22] = extractBits(val_2, 2u, 3u);\n unpacked[23] = extractBits(val_2, 5u, 3u);\n unpacked[24] = extractBits(val_2, 8u, 3u);\n unpacked[25] = extractBits(val_2, 11u, 3u);\n unpacked[26] = extractBits(val_2, 14u, 3u);\n unpacked[27] = extractBits(val_2, 17u, 3u);\n unpacked[28] = extractBits(val_2, 20u, 3u);\n unpacked[29] = extractBits(val_2, 23u, 3u);\n unpacked[30] = extractBits(val_2, 26u, 3u);\n unpacked[31] = extractBits(val_2, 29u, 3u);\n \n let val_3 = blocks[packed_offset + 3]; \n unpacked[32] = extractBits(val_3, 0u, 3u);\n unpacked[33] = extractBits(val_3, 3u, 3u);\n unpacked[34] = extractBits(val_3, 6u, 3u);\n unpacked[35] = extractBits(val_3, 9u, 3u);\n unpacked[36] = extractBits(val_3, 12u, 3u);\n unpacked[37] = extractBits(val_3, 15u, 3u);\n unpacked[38] = extractBits(val_3, 18u, 3u);\n unpacked[39] = extractBits(val_3, 21u, 3u);\n unpacked[40] = extractBits(val_3, 24u, 3u);\n unpacked[41] = extractBits(val_3, 27u, 3u);\n unpacked[42] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_4, 0u, 1u), 2u, 1u);\n unpacked[43] = extractBits(val_4, 1u, 3u);\n unpacked[44] = extractBits(val_4, 4u, 3u);\n unpacked[45] = extractBits(val_4, 7u, 3u);\n unpacked[46] = extractBits(val_4, 10u, 3u);\n unpacked[47] = extractBits(val_4, 13u, 3u);\n unpacked[48] = extractBits(val_4, 16u, 3u);\n unpacked[49] = extractBits(val_4, 19u, 3u);\n unpacked[50] = extractBits(val_4, 22u, 3u);\n unpacked[51] = extractBits(val_4, 25u, 3u);\n unpacked[52] = extractBits(val_4, 28u, 3u);\n unpacked[53] = extractBits(val_4, 31u, 1u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_5, 0u, 2u), 1u, 2u); \n unpacked[54] = extractBits(val_5, 2u, 3u);\n unpacked[55] = extractBits(val_5, 5u, 3u);\n unpacked[56] = extractBits(val_5, 8u, 3u);\n unpacked[57] = extractBits(val_5, 11u, 3u);\n unpacked[58] = extractBits(val_5, 14u, 3u);\n unpacked[59] = extractBits(val_5, 17u, 3u);\n unpacked[60] = extractBits(val_5, 20u, 3u);\n unpacked[61] = extractBits(val_5, 23u, 3u);\n unpacked[62] = extractBits(val_5, 26u, 3u);\n unpacked[63] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[64] = extractBits(val_6, 0u, 3u);\n unpacked[65] = extractBits(val_6, 3u, 3u);\n unpacked[66] = extractBits(val_6, 6u, 3u);\n unpacked[67] = extractBits(val_6, 9u, 3u);\n unpacked[68] = extractBits(val_6, 12u, 3u);\n unpacked[69] = extractBits(val_6, 15u, 3u);\n unpacked[70] = extractBits(val_6, 18u, 3u);\n unpacked[71] = extractBits(val_6, 21u, 3u);\n unpacked[72] = extractBits(val_6, 24u, 3u);\n unpacked[73] = extractBits(val_6, 27u, 3u);\n unpacked[74] = extractBits(val_6, 30u, 2u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_7, 0u, 1u), 2u, 1u);\n unpacked[75] = extractBits(val_7, 1u, 3u);\n unpacked[76] = extractBits(val_7, 4u, 3u);\n unpacked[77] = extractBits(val_7, 7u, 3u);\n unpacked[78] = extractBits(val_7, 10u, 3u);\n unpacked[79] = extractBits(val_7, 13u, 3u);\n unpacked[80] = extractBits(val_7, 16u, 3u);\n unpacked[81] = extractBits(val_7, 19u, 3u);\n unpacked[82] = extractBits(val_7, 22u, 3u);\n unpacked[83] = extractBits(val_7, 25u, 3u);\n unpacked[84] = extractBits(val_7, 28u, 3u);\n unpacked[85] = extractBits(val_7, 31u, 1u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_8, 0u, 2u), 1u, 2u); \n unpacked[86] = extractBits(val_8, 2u, 3u);\n unpacked[87] = extractBits(val_8, 5u, 3u);\n unpacked[88] = extractBits(val_8, 8u, 3u);\n unpacked[89] = extractBits(val_8, 11u, 3u);\n unpacked[90] = extractBits(val_8, 14u, 3u);\n unpacked[91] = extractBits(val_8, 17u, 3u);\n unpacked[92] = extractBits(val_8, 20u, 3u);\n unpacked[93] = extractBits(val_8, 23u, 3u);\n unpacked[94] = extractBits(val_8, 26u, 3u);\n unpacked[95] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[96] = extractBits(val_9, 0u, 3u);\n unpacked[97] = extractBits(val_9, 3u, 3u);\n unpacked[98] = extractBits(val_9, 6u, 3u);\n unpacked[99] = extractBits(val_9, 9u, 3u);\n unpacked[100] = extractBits(val_9, 12u, 3u);\n unpacked[101] = extractBits(val_9, 15u, 3u);\n unpacked[102] = extractBits(val_9, 18u, 3u);\n unpacked[103] = extractBits(val_9, 21u, 3u);\n unpacked[104] = extractBits(val_9, 24u, 3u);\n unpacked[105] = extractBits(val_9, 27u, 3u);\n unpacked[106] = extractBits(val_9, 30u, 2u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_10, 0u, 1u), 2u, 1u);\n unpacked[107] = extractBits(val_10, 1u, 3u);\n unpacked[108] = extractBits(val_10, 4u, 3u);\n unpacked[109] = extractBits(val_10, 7u, 3u);\n unpacked[110] = extractBits(val_10, 10u, 3u);\n unpacked[111] = extractBits(val_10, 13u, 3u);\n unpacked[112] = extractBits(val_10, 16u, 3u);\n unpacked[113] = extractBits(val_10, 19u, 3u);\n unpacked[114] = extractBits(val_10, 22u, 3u);\n unpacked[115] = extractBits(val_10, 25u, 3u);\n unpacked[116] = extractBits(val_10, 28u, 3u);\n unpacked[117] = extractBits(val_10, 31u, 1u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_11, 0u, 2u), 1u, 2u); \n unpacked[118] = extractBits(val_11, 2u, 3u);\n unpacked[119] = extractBits(val_11, 5u, 3u);\n unpacked[120] = extractBits(val_11, 8u, 3u);\n unpacked[121] = extractBits(val_11, 11u, 3u);\n unpacked[122] = extractBits(val_11, 14u, 3u);\n unpacked[123] = extractBits(val_11, 17u, 3u);\n unpacked[124] = extractBits(val_11, 20u, 3u);\n unpacked[125] = extractBits(val_11, 23u, 3u);\n unpacked[126] = extractBits(val_11, 26u, 3u);\n unpacked[127] = extractBits(val_11, 29u, 3u);\n}\n"; + var unpackBlock128BitDepth5 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_5(packed_offset: u32) { \n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 5u);\n unpacked[1] = extractBits(val_0, 5u, 5u);\n unpacked[2] = extractBits(val_0, 10u, 5u);\n unpacked[3] = extractBits(val_0, 15u, 5u);\n unpacked[4] = extractBits(val_0, 20u, 5u);\n unpacked[5] = extractBits(val_0, 25u, 5u);\n unpacked[6] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[6] = insertBits(unpacked[6], extractBits(val_1, 0u, 3u), 2u, 3u); \n unpacked[7] = extractBits(val_1, 3u, 5u);\n unpacked[8] = extractBits(val_1, 8u, 5u);\n unpacked[9] = extractBits(val_1, 13u, 5u);\n unpacked[10] = extractBits(val_1, 18u, 5u);\n unpacked[11] = extractBits(val_1, 23u, 5u);\n unpacked[12] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[12] = insertBits(unpacked[12], extractBits(val_2, 0u, 1u), 4u, 1u);\n unpacked[13] = extractBits(val_2, 1u, 5u);\n unpacked[14] = extractBits(val_2, 6u, 5u);\n unpacked[15] = extractBits(val_2, 11u, 5u);\n unpacked[16] = extractBits(val_2, 16u, 5u);\n unpacked[17] = extractBits(val_2, 21u, 5u);\n unpacked[18] = extractBits(val_2, 26u, 5u);\n unpacked[19] = extractBits(val_2, 31u, 1u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[19] = insertBits(unpacked[19], extractBits(val_3, 0u, 4u), 1u, 4u);\n unpacked[20] = extractBits(val_3, 4u, 5u);\n unpacked[21] = extractBits(val_3, 9u, 5u);\n unpacked[22] = extractBits(val_3, 14u, 5u);\n unpacked[23] = extractBits(val_3, 19u, 5u);\n unpacked[24] = extractBits(val_3, 24u, 5u);\n unpacked[25] = extractBits(val_3, 29u, 3u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[25] = insertBits(unpacked[25], extractBits(val_4, 0u, 2u), 3u, 2u);\n unpacked[26] = extractBits(val_4, 2u, 5u);\n unpacked[27] = extractBits(val_4, 7u, 5u);\n unpacked[28] = extractBits(val_4, 12u, 5u);\n unpacked[29] = extractBits(val_4, 17u, 5u);\n unpacked[30] = extractBits(val_4, 22u, 5u);\n unpacked[31] = extractBits(val_4, 27u, 5u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[32] = extractBits(val_5, 0u, 5u);\n unpacked[33] = extractBits(val_5, 5u, 5u);\n unpacked[34] = extractBits(val_5, 10u, 5u);\n unpacked[35] = extractBits(val_5, 15u, 5u);\n unpacked[36] = extractBits(val_5, 20u, 5u);\n unpacked[37] = extractBits(val_5, 25u, 5u);\n unpacked[38] = extractBits(val_5, 30u, 2u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[38] = insertBits(unpacked[38], extractBits(val_6, 0u, 3u), 2u, 3u);\n unpacked[39] = extractBits(val_6, 3u, 5u);\n unpacked[40] = extractBits(val_6, 8u, 5u);\n unpacked[41] = extractBits(val_6, 13u, 5u);\n unpacked[42] = extractBits(val_6, 18u, 5u);\n unpacked[43] = extractBits(val_6, 23u, 5u);\n unpacked[44] = extractBits(val_6, 28u, 4u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[44] = insertBits(unpacked[44], extractBits(val_7, 0u, 1u), 4u, 1u);\n unpacked[45] = extractBits(val_7, 1u, 5u);\n unpacked[46] = extractBits(val_7, 6u, 5u);\n unpacked[47] = extractBits(val_7, 11u, 5u);\n unpacked[48] = extractBits(val_7, 16u, 5u);\n unpacked[49] = extractBits(val_7, 21u, 5u);\n unpacked[50] = extractBits(val_7, 26u, 5u);\n unpacked[51] = extractBits(val_7, 31u, 1u);\n\n let val_8 = blocks[packed_offset + 8];\n unpacked[51] = insertBits(unpacked[51], extractBits(val_8, 0u, 4u), 1u, 4u);\n unpacked[52] = extractBits(val_8, 4u, 5u);\n unpacked[53] = extractBits(val_8, 9u, 5u);\n unpacked[54] = extractBits(val_8, 14u, 5u);\n unpacked[55] = extractBits(val_8, 19u, 5u);\n unpacked[56] = extractBits(val_8, 24u, 5u);\n unpacked[57] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[57] = insertBits(unpacked[57], extractBits(val_9, 0u, 2u), 3u, 2u);\n unpacked[58] = extractBits(val_9, 2u, 5u);\n unpacked[59] = extractBits(val_9, 7u, 5u);\n unpacked[60] = extractBits(val_9, 12u, 5u);\n unpacked[61] = extractBits(val_9, 17u, 5u);\n unpacked[62] = extractBits(val_9, 22u, 5u);\n unpacked[63] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[64] = extractBits(val_10, 0u, 5u);\n unpacked[65] = extractBits(val_10, 5u, 5u);\n unpacked[66] = extractBits(val_10, 10u, 5u);\n unpacked[67] = extractBits(val_10, 15u, 5u);\n unpacked[68] = extractBits(val_10, 20u, 5u);\n unpacked[69] = extractBits(val_10, 25u, 5u);\n unpacked[70] = extractBits(val_10, 30u, 2u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[70] = insertBits(unpacked[70], extractBits(val_11, 0u, 3u), 2u, 3u);\n unpacked[71] = extractBits(val_11, 3u, 5u);\n unpacked[72] = extractBits(val_11, 8u, 5u);\n unpacked[73] = extractBits(val_11, 13u, 5u);\n unpacked[74] = extractBits(val_11, 18u, 5u);\n unpacked[75] = extractBits(val_11, 23u, 5u);\n unpacked[76] = extractBits(val_11, 28u, 4u);\n\n let val_12 = blocks[packed_offset + 12];\n unpacked[76] = insertBits(unpacked[76], extractBits(val_12, 0u, 1u), 4u, 1u);\n unpacked[77] = extractBits(val_12, 1u, 5u);\n unpacked[78] = extractBits(val_12, 6u, 5u);\n unpacked[79] = extractBits(val_12, 11u, 5u);\n unpacked[80] = extractBits(val_12, 16u, 5u);\n unpacked[81] = extractBits(val_12, 21u, 5u);\n unpacked[82] = extractBits(val_12, 26u, 5u);\n unpacked[83] = extractBits(val_12, 31u, 1u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[83] = insertBits(unpacked[83], extractBits(val_13, 0u, 4u), 1u, 4u);\n unpacked[84] = extractBits(val_13, 4u, 5u);\n unpacked[85] = extractBits(val_13, 9u, 5u);\n unpacked[86] = extractBits(val_13, 14u, 5u);\n unpacked[87] = extractBits(val_13, 19u, 5u);\n unpacked[88] = extractBits(val_13, 24u, 5u);\n unpacked[89] = extractBits(val_13, 29u, 3u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[89] = insertBits(unpacked[89], extractBits(val_14, 0u, 2u), 3u, 2u);\n unpacked[90] = extractBits(val_14, 2u, 5u);\n unpacked[91] = extractBits(val_14, 7u, 5u);\n unpacked[92] = extractBits(val_14, 12u, 5u);\n unpacked[93] = extractBits(val_14, 17u, 5u);\n unpacked[94] = extractBits(val_14, 22u, 5u);\n unpacked[95] = extractBits(val_14, 27u, 5u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[96] = extractBits(val_15, 0u, 5u);\n unpacked[97] = extractBits(val_15, 5u, 5u);\n unpacked[98] = extractBits(val_15, 10u, 5u);\n unpacked[99] = extractBits(val_15, 15u, 5u);\n unpacked[100] = extractBits(val_15, 20u, 5u);\n unpacked[101] = extractBits(val_15, 25u, 5u);\n unpacked[102] = extractBits(val_15, 30u, 2u);\n\n let val_16 = blocks[packed_offset + 16];\n unpacked[102] = insertBits(unpacked[102], extractBits(val_16, 0u, 3u), 2u, 3u);\n unpacked[103] = extractBits(val_16, 3u, 5u);\n unpacked[104] = extractBits(val_16, 8u, 5u);\n unpacked[105] = extractBits(val_16, 13u, 5u);\n unpacked[106] = extractBits(val_16, 18u, 5u);\n unpacked[107] = extractBits(val_16, 23u, 5u);\n unpacked[108] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[108] = insertBits(unpacked[108], extractBits(val_17, 0u, 1u), 4u, 1u);\n unpacked[109] = extractBits(val_17, 1u, 5u);\n unpacked[110] = extractBits(val_17, 6u, 5u);\n unpacked[111] = extractBits(val_17, 11u, 5u);\n unpacked[112] = extractBits(val_17, 16u, 5u);\n unpacked[113] = extractBits(val_17, 21u, 5u);\n unpacked[114] = extractBits(val_17, 26u, 5u);\n unpacked[115] = extractBits(val_17, 31u, 1u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[115] = insertBits(unpacked[115], extractBits(val_18, 0u, 4u), 1u, 4u);\n unpacked[116] = extractBits(val_18, 4u, 5u);\n unpacked[117] = extractBits(val_18, 9u, 5u);\n unpacked[118] = extractBits(val_18, 14u, 5u);\n unpacked[119] = extractBits(val_18, 19u, 5u);\n unpacked[120] = extractBits(val_18, 24u, 5u);\n unpacked[121] = extractBits(val_18, 29u, 3u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[121] = insertBits(unpacked[121], extractBits(val_19, 0u, 2u), 3u, 2u);\n unpacked[122] = extractBits(val_19, 2u, 5u);\n unpacked[123] = extractBits(val_19, 7u, 5u);\n unpacked[124] = extractBits(val_19, 12u, 5u);\n unpacked[125] = extractBits(val_19, 17u, 5u);\n unpacked[126] = extractBits(val_19, 22u, 5u);\n unpacked[127] = extractBits(val_19, 27u, 5u);\n}\n"; + var unpackBlock128BitDepth6 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_6(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 6u);\n unpacked[1] = extractBits(val_0, 6u, 6u);\n unpacked[2] = extractBits(val_0, 12u, 6u);\n unpacked[3] = extractBits(val_0, 18u, 6u);\n unpacked[4] = extractBits(val_0, 24u, 6u);\n unpacked[5] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[5] = insertBits(unpacked[5], extractBits(val_1, 0u, 4u), 2u, 4u);\n unpacked[6] = extractBits(val_1, 4u, 6u);\n unpacked[7] = extractBits(val_1, 10u, 6u);\n unpacked[8] = extractBits(val_1, 16u, 6u);\n unpacked[9] = extractBits(val_1, 22u, 6u);\n unpacked[10] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_2, 0u, 2u), 4u, 2u);\n unpacked[11] = extractBits(val_2, 2u, 6u);\n unpacked[12] = extractBits(val_2, 8u, 6u);\n unpacked[13] = extractBits(val_2, 14u, 6u);\n unpacked[14] = extractBits(val_2, 20u, 6u);\n unpacked[15] = extractBits(val_2, 26u, 6u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[16] = extractBits(val_3, 0u, 6u);\n unpacked[17] = extractBits(val_3, 6u, 6u);\n unpacked[18] = extractBits(val_3, 12u, 6u);\n unpacked[19] = extractBits(val_3, 18u, 6u);\n unpacked[20] = extractBits(val_3, 24u, 6u);\n unpacked[21] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_4, 0u, 4u), 2u, 4u);\n unpacked[22] = extractBits(val_4, 4u, 6u);\n unpacked[23] = extractBits(val_4, 10u, 6u);\n unpacked[24] = extractBits(val_4, 16u, 6u);\n unpacked[25] = extractBits(val_4, 22u, 6u);\n unpacked[26] = extractBits(val_4, 28u, 4u);\n\n let val_5 = blocks[packed_offset + 5];\n unpacked[26] = insertBits(unpacked[26], extractBits(val_5, 0u, 2u), 4u, 2u);\n unpacked[27] = extractBits(val_5, 2u, 6u);\n unpacked[28] = extractBits(val_5, 8u, 6u);\n unpacked[29] = extractBits(val_5, 14u, 6u);\n unpacked[30] = extractBits(val_5, 20u, 6u);\n unpacked[31] = extractBits(val_5, 26u, 6u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[32] = extractBits(val_6, 0u, 6u);\n unpacked[33] = extractBits(val_6, 6u, 6u);\n unpacked[34] = extractBits(val_6, 12u, 6u);\n unpacked[35] = extractBits(val_6, 18u, 6u);\n unpacked[36] = extractBits(val_6, 24u, 6u);\n unpacked[37] = extractBits(val_6, 30u, 2u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[37] = insertBits(unpacked[37], extractBits(val_7, 0u, 4u), 2u, 4u);\n unpacked[38] = extractBits(val_7, 4u, 6u);\n unpacked[39] = extractBits(val_7, 10u, 6u);\n unpacked[40] = extractBits(val_7, 16u, 6u);\n unpacked[41] = extractBits(val_7, 22u, 6u);\n unpacked[42] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_8, 0u, 2u), 4u, 2u);\n unpacked[43] = extractBits(val_8, 2u, 6u);\n unpacked[44] = extractBits(val_8, 8u, 6u);\n unpacked[45] = extractBits(val_8, 14u, 6u);\n unpacked[46] = extractBits(val_8, 20u, 6u);\n unpacked[47] = extractBits(val_8, 26u, 6u);\n\n let val_9 = blocks[packed_offset + 9];\n unpacked[48] = extractBits(val_9, 0u, 6u);\n unpacked[49] = extractBits(val_9, 6u, 6u);\n unpacked[50] = extractBits(val_9, 12u, 6u);\n unpacked[51] = extractBits(val_9, 18u, 6u);\n unpacked[52] = extractBits(val_9, 24u, 6u);\n unpacked[53] = extractBits(val_9, 30u, 2u);\n\n let val_10 = blocks[packed_offset + 10];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_10, 0u, 4u), 2u, 4u);\n unpacked[54] = extractBits(val_10, 4u, 6u);\n unpacked[55] = extractBits(val_10, 10u, 6u);\n unpacked[56] = extractBits(val_10, 16u, 6u);\n unpacked[57] = extractBits(val_10, 22u, 6u);\n unpacked[58] = extractBits(val_10, 28u, 4u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[58] = insertBits(unpacked[58], extractBits(val_11, 0u, 2u), 4u, 2u);\n unpacked[59] = extractBits(val_11, 2u, 6u);\n unpacked[60] = extractBits(val_11, 8u, 6u);\n unpacked[61] = extractBits(val_11, 14u, 6u);\n unpacked[62] = extractBits(val_11, 20u, 6u);\n unpacked[63] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[64] = extractBits(val_12, 0u, 6u);\n unpacked[65] = extractBits(val_12, 6u, 6u);\n unpacked[66] = extractBits(val_12, 12u, 6u);\n unpacked[67] = extractBits(val_12, 18u, 6u);\n unpacked[68] = extractBits(val_12, 24u, 6u);\n unpacked[69] = extractBits(val_12, 30u, 2u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[69] = insertBits(unpacked[69], extractBits(val_13, 0u, 4u), 2u, 4u);\n unpacked[70] = extractBits(val_13, 4u, 6u);\n unpacked[71] = extractBits(val_13, 10u, 6u);\n unpacked[72] = extractBits(val_13, 16u, 6u);\n unpacked[73] = extractBits(val_13, 22u, 6u);\n unpacked[74] = extractBits(val_13, 28u, 4u);\n\n let val_14 = blocks[packed_offset + 14];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_14, 0u, 2u), 4u, 2u);\n unpacked[75] = extractBits(val_14, 2u, 6u);\n unpacked[76] = extractBits(val_14, 8u, 6u);\n unpacked[77] = extractBits(val_14, 14u, 6u);\n unpacked[78] = extractBits(val_14, 20u, 6u);\n unpacked[79] = extractBits(val_14, 26u, 6u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[80] = extractBits(val_15, 0u, 6u);\n unpacked[81] = extractBits(val_15, 6u, 6u);\n unpacked[82] = extractBits(val_15, 12u, 6u);\n unpacked[83] = extractBits(val_15, 18u, 6u);\n unpacked[84] = extractBits(val_15, 24u, 6u);\n unpacked[85] = extractBits(val_15, 30u, 2u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_16, 0u, 4u), 2u, 4u);\n unpacked[86] = extractBits(val_16, 4u, 6u);\n unpacked[87] = extractBits(val_16, 10u, 6u);\n unpacked[88] = extractBits(val_16, 16u, 6u);\n unpacked[89] = extractBits(val_16, 22u, 6u);\n unpacked[90] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[90] = insertBits(unpacked[90], extractBits(val_17, 0u, 2u), 4u, 2u);\n unpacked[91] = extractBits(val_17, 2u, 6u);\n unpacked[92] = extractBits(val_17, 8u, 6u);\n unpacked[93] = extractBits(val_17, 14u, 6u);\n unpacked[94] = extractBits(val_17, 20u, 6u);\n unpacked[95] = extractBits(val_17, 26u, 6u);\n\n let val_18 = blocks[packed_offset + 18];\n unpacked[96] = extractBits(val_18, 0u, 6u);\n unpacked[97] = extractBits(val_18, 6u, 6u);\n unpacked[98] = extractBits(val_18, 12u, 6u);\n unpacked[99] = extractBits(val_18, 18u, 6u);\n unpacked[100] = extractBits(val_18, 24u, 6u);\n unpacked[101] = extractBits(val_18, 30u, 2u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[101] = insertBits(unpacked[101], extractBits(val_19, 0u, 4u), 2u, 4u);\n unpacked[102] = extractBits(val_19, 4u, 6u);\n unpacked[103] = extractBits(val_19, 10u, 6u);\n unpacked[104] = extractBits(val_19, 16u, 6u);\n unpacked[105] = extractBits(val_19, 22u, 6u);\n unpacked[106] = extractBits(val_19, 28u, 4u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_20, 0u, 2u), 4u, 2u);\n unpacked[107] = extractBits(val_20, 2u, 6u);\n unpacked[108] = extractBits(val_20, 8u, 6u);\n unpacked[109] = extractBits(val_20, 14u, 6u);\n unpacked[110] = extractBits(val_20, 20u, 6u);\n unpacked[111] = extractBits(val_20, 26u, 6u);\n\n let val_21 = blocks[packed_offset + 21];\n unpacked[112] = extractBits(val_21, 0u, 6u);\n unpacked[113] = extractBits(val_21, 6u, 6u);\n unpacked[114] = extractBits(val_21, 12u, 6u);\n unpacked[115] = extractBits(val_21, 18u, 6u);\n unpacked[116] = extractBits(val_21, 24u, 6u);\n unpacked[117] = extractBits(val_21, 30u, 2u);\n\n let val_22 = blocks[packed_offset + 22];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_22, 0u, 4u), 2u, 4u);\n unpacked[118] = extractBits(val_22, 4u, 6u);\n unpacked[119] = extractBits(val_22, 10u, 6u);\n unpacked[120] = extractBits(val_22, 16u, 6u);\n unpacked[121] = extractBits(val_22, 22u, 6u);\n unpacked[122] = extractBits(val_22, 28u, 4u);\n\n let val_23 = blocks[packed_offset + 23];\n unpacked[122] = insertBits(unpacked[122], extractBits(val_23, 0u, 2u), 4u, 2u);\n unpacked[123] = extractBits(val_23, 2u, 6u);\n unpacked[124] = extractBits(val_23, 8u, 6u);\n unpacked[125] = extractBits(val_23, 14u, 6u);\n unpacked[126] = extractBits(val_23, 20u, 6u);\n unpacked[127] = extractBits(val_23, 26u, 6u);\n}\n"; + var unpackBlock128BitDepth7 = "\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_7(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 7u);\n unpacked[1] = extractBits(val_0, 7u, 7u);\n unpacked[2] = extractBits(val_0, 14u, 7u);\n unpacked[3] = extractBits(val_0, 21u, 7u);\n unpacked[4] = extractBits(val_0, 28u, 4u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[4] = insertBits(unpacked[4], extractBits(val_1, 0u, 3u), 4u, 3u);\n unpacked[5] = extractBits(val_1, 3u, 7u);\n unpacked[6] = extractBits(val_1, 10u, 7u);\n unpacked[7] = extractBits(val_1, 17u, 7u);\n unpacked[8] = extractBits(val_1, 24u, 7u);\n unpacked[9] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[9] = insertBits(unpacked[9], extractBits(val_2, 0u, 6u), 1u, 6u);\n unpacked[10] = extractBits(val_2, 6u, 7u);\n unpacked[11] = extractBits(val_2, 13u, 7u);\n unpacked[12] = extractBits(val_2, 20u, 7u);\n unpacked[13] = extractBits(val_2, 27u, 5u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[13] = insertBits(unpacked[13], extractBits(val_3, 0u, 2u), 5u, 2u);\n unpacked[14] = extractBits(val_3, 2u, 7u);\n unpacked[15] = extractBits(val_3, 9u, 7u);\n unpacked[16] = extractBits(val_3, 16u, 7u);\n unpacked[17] = extractBits(val_3, 23u, 7u);\n unpacked[18] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[18] = insertBits(unpacked[18], extractBits(val_4, 0u, 5u), 2u, 5u);\n unpacked[19] = extractBits(val_4, 5u, 7u);\n unpacked[20] = extractBits(val_4, 12u, 7u);\n unpacked[21] = extractBits(val_4, 19u, 7u);\n unpacked[22] = extractBits(val_4, 26u, 6u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[22] = insertBits(unpacked[22], extractBits(val_5, 0u, 1u), 6u, 1u);\n unpacked[23] = extractBits(val_5, 1u, 7u);\n unpacked[24] = extractBits(val_5, 8u, 7u);\n unpacked[25] = extractBits(val_5, 15u, 7u);\n unpacked[26] = extractBits(val_5, 22u, 7u);\n unpacked[27] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[27] = insertBits(unpacked[27], extractBits(val_6, 0u, 4u), 3u, 4u);\n unpacked[28] = extractBits(val_6, 4u, 7u);\n unpacked[29] = extractBits(val_6, 11u, 7u);\n unpacked[30] = extractBits(val_6, 18u, 7u);\n unpacked[31] = extractBits(val_6, 25u, 7u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[32] = extractBits(val_7, 0u, 7u);\n unpacked[33] = extractBits(val_7, 7u, 7u);\n unpacked[34] = extractBits(val_7, 14u, 7u);\n unpacked[35] = extractBits(val_7, 21u, 7u);\n unpacked[36] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[36] = insertBits(unpacked[36], extractBits(val_8, 0u, 3u), 4u, 3u);\n unpacked[37] = extractBits(val_8, 3u, 7u);\n unpacked[38] = extractBits(val_8, 10u, 7u);\n unpacked[39] = extractBits(val_8, 17u, 7u);\n unpacked[40] = extractBits(val_8, 24u, 7u);\n unpacked[41] = extractBits(val_8, 31u, 1u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[41] = insertBits(unpacked[41], extractBits(val_9, 0u, 6u), 1u, 6u);\n unpacked[42] = extractBits(val_9, 6u, 7u);\n unpacked[43] = extractBits(val_9, 13u, 7u);\n unpacked[44] = extractBits(val_9, 20u, 7u);\n unpacked[45] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[45] = insertBits(unpacked[45], extractBits(val_10, 0u, 2u), 5u, 2u);\n unpacked[46] = extractBits(val_10, 2u, 7u);\n unpacked[47] = extractBits(val_10, 9u, 7u);\n unpacked[48] = extractBits(val_10, 16u, 7u);\n unpacked[49] = extractBits(val_10, 23u, 7u);\n unpacked[50] = extractBits(val_10, 30u, 2u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[50] = insertBits(unpacked[50], extractBits(val_11, 0u, 5u), 2u, 5u);\n unpacked[51] = extractBits(val_11, 5u, 7u);\n unpacked[52] = extractBits(val_11, 12u, 7u);\n unpacked[53] = extractBits(val_11, 19u, 7u);\n unpacked[54] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[54] = insertBits(unpacked[54], extractBits(val_12, 0u, 1u), 6u, 1u);\n unpacked[55] = extractBits(val_12, 1u, 7u);\n unpacked[56] = extractBits(val_12, 8u, 7u);\n unpacked[57] = extractBits(val_12, 15u, 7u);\n unpacked[58] = extractBits(val_12, 22u, 7u);\n unpacked[59] = extractBits(val_12, 29u, 3u);\n \n let val_13 = blocks[packed_offset + 13];\n unpacked[59] = insertBits(unpacked[59], extractBits(val_13, 0u, 4u), 3u, 4u);\n unpacked[60] = extractBits(val_13, 4u, 7u);\n unpacked[61] = extractBits(val_13, 11u, 7u);\n unpacked[62] = extractBits(val_13, 18u, 7u);\n unpacked[63] = extractBits(val_13, 25u, 7u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[64] = extractBits(val_14, 0u, 7u);\n unpacked[65] = extractBits(val_14, 7u, 7u);\n unpacked[66] = extractBits(val_14, 14u, 7u);\n unpacked[67] = extractBits(val_14, 21u, 7u);\n unpacked[68] = extractBits(val_14, 28u, 4u);\n \n let val_15 = blocks[packed_offset + 15];\n unpacked[68] = insertBits(unpacked[68], extractBits(val_15, 0u, 3u), 4u, 3u);\n unpacked[69] = extractBits(val_15, 3u, 7u);\n unpacked[70] = extractBits(val_15, 10u, 7u);\n unpacked[71] = extractBits(val_15, 17u, 7u);\n unpacked[72] = extractBits(val_15, 24u, 7u);\n unpacked[73] = extractBits(val_15, 31u, 1u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[73] = insertBits(unpacked[73], extractBits(val_16, 0u, 6u), 1u, 6u);\n unpacked[74] = extractBits(val_16, 6u, 7u);\n unpacked[75] = extractBits(val_16, 13u, 7u);\n unpacked[76] = extractBits(val_16, 20u, 7u);\n unpacked[77] = extractBits(val_16, 27u, 5u);\n \n let val_17 = blocks[packed_offset + 17];\n unpacked[77] = insertBits(unpacked[77], extractBits(val_17, 0u, 2u), 5u, 2u);\n unpacked[78] = extractBits(val_17, 2u, 7u);\n unpacked[79] = extractBits(val_17, 9u, 7u);\n unpacked[80] = extractBits(val_17, 16u, 7u);\n unpacked[81] = extractBits(val_17, 23u, 7u);\n unpacked[82] = extractBits(val_17, 30u, 2u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[82] = insertBits(unpacked[82], extractBits(val_18, 0u, 5u), 2u, 5u);\n unpacked[83] = extractBits(val_18, 5u, 7u);\n unpacked[84] = extractBits(val_18, 12u, 7u);\n unpacked[85] = extractBits(val_18, 19u, 7u);\n unpacked[86] = extractBits(val_18, 26u, 6u);\n \n let val_19 = blocks[packed_offset + 19];\n unpacked[86] = insertBits(unpacked[86], extractBits(val_19, 0u, 1u), 6u, 1u);\n unpacked[87] = extractBits(val_19, 1u, 7u);\n unpacked[88] = extractBits(val_19, 8u, 7u);\n unpacked[89] = extractBits(val_19, 15u, 7u);\n unpacked[90] = extractBits(val_19, 22u, 7u);\n unpacked[91] = extractBits(val_19, 29u, 3u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[91] = insertBits(unpacked[91], extractBits(val_20, 0u, 4u), 3u, 4u);\n unpacked[92] = extractBits(val_20, 4u, 7u);\n unpacked[93] = extractBits(val_20, 11u, 7u);\n unpacked[94] = extractBits(val_20, 18u, 7u);\n unpacked[95] = extractBits(val_20, 25u, 7u);\n \n let val_21 = blocks[packed_offset + 21];\n unpacked[96] = extractBits(val_21, 0u, 7u);\n unpacked[97] = extractBits(val_21, 7u, 7u);\n unpacked[98] = extractBits(val_21, 14u, 7u);\n unpacked[99] = extractBits(val_21, 21u, 7u);\n unpacked[100] = extractBits(val_21, 28u, 4u);\n \n let val_22 = blocks[packed_offset + 22];\n unpacked[100] = insertBits(unpacked[100], extractBits(val_22, 0u, 3u), 4u, 3u);\n unpacked[101] = extractBits(val_22, 3u, 7u);\n unpacked[102] = extractBits(val_22, 10u, 7u);\n unpacked[103] = extractBits(val_22, 17u, 7u);\n unpacked[104] = extractBits(val_22, 24u, 7u);\n unpacked[105] = extractBits(val_22, 31u, 1u);\n \n let val_23 = blocks[packed_offset + 23];\n unpacked[105] = insertBits(unpacked[105], extractBits(val_23, 0u, 6u), 1u, 6u);\n unpacked[106] = extractBits(val_23, 6u, 7u);\n unpacked[107] = extractBits(val_23, 13u, 7u);\n unpacked[108] = extractBits(val_23, 20u, 7u);\n unpacked[109] = extractBits(val_23, 27u, 5u);\n \n let val_24 = blocks[packed_offset + 24];\n unpacked[109] = insertBits(unpacked[109], extractBits(val_24, 0u, 2u), 5u, 2u);\n unpacked[110] = extractBits(val_24, 2u, 7u);\n unpacked[111] = extractBits(val_24, 9u, 7u);\n unpacked[112] = extractBits(val_24, 16u, 7u);\n unpacked[113] = extractBits(val_24, 23u, 7u);\n unpacked[114] = extractBits(val_24, 30u, 2u);\n \n let val_25 = blocks[packed_offset + 25];\n unpacked[114] = insertBits(unpacked[114], extractBits(val_25, 0u, 5u), 2u, 5u);\n unpacked[115] = extractBits(val_25, 5u, 7u);\n unpacked[116] = extractBits(val_25, 12u, 7u);\n unpacked[117] = extractBits(val_25, 19u, 7u);\n unpacked[118] = extractBits(val_25, 26u, 6u);\n \n let val_26 = blocks[packed_offset + 26];\n unpacked[118] = insertBits(unpacked[118], extractBits(val_26, 0u, 1u), 6u, 1u);\n unpacked[119] = extractBits(val_26, 1u, 7u);\n unpacked[120] = extractBits(val_26, 8u, 7u);\n unpacked[121] = extractBits(val_26, 15u, 7u);\n unpacked[122] = extractBits(val_26, 22u, 7u);\n unpacked[123] = extractBits(val_26, 29u, 3u);\n \n let val_27 = blocks[packed_offset + 27];\n unpacked[123] = insertBits(unpacked[123], extractBits(val_27, 0u, 4u), 3u, 4u);\n unpacked[124] = extractBits(val_27, 4u, 7u);\n unpacked[125] = extractBits(val_27, 11u, 7u);\n unpacked[126] = extractBits(val_27, 18u, 7u);\n unpacked[127] = extractBits(val_27, 25u, 7u); \n}\n"; + var fromFP510Function = "\n\nconst exponents: array = array(\n 2.9103830456733704e-11, \n 5.820766091346741e-11, \n 1.1641532182693481e-10, \n 2.3283064365386963e-10,\n 4.656612873077393e-10, \n 9.313225746154785e-10, \n 1.862645149230957e-09, \n 3.725290298461914e-09,\n 7.450580596923828e-09, \n 1.4901161193847656e-08, \n 2.9802322387695312e-08, \n 5.960464477539063e-08,\n 1.1920928955078125e-07, \n 2.384185791015625e-07, \n 4.76837158203125e-07, \n 9.5367431640625e-07,\n 1.9073486328125e-06, \n 3.814697265625e-06, \n 7.62939453125e-06, \n 1.52587890625e-05, \n 3.0517578125e-05,\n 6.103515625e-05, \n 0.0001220703125, \n 0.000244140625, \n 0.00048828125, \n 0.0009765625, \n 0.001953125, \n 0.00390625,\n 0.0078125, \n 0.015625, \n 0.03125, \n 0.0625);\n\nfn from_fp510(x: u32) -> f32 {\n let exponent = f32(exponents[extractBits(x, 10u, 5u)]); \n let fractional = f32(extractBits(x, 0u, 10u)); \n let abs = exponent * fractional;\n return abs * (1.0 - (2.0 * f32(extractBits(x, 15u, 1u))));\n}\n"; + + var preprocessBlocks3BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth3, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 12u); \n unpack_block_128_bit_depth_3(blocks_start); \n \n let b01: u32 = blocks_start;\n let b2: u32 = blocks_start + 8u;\n \n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n\n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), (r * 16u) % 32u, 16u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), (r * 8u) % 32u, 8u); \n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n \n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), ((r * 16u) % 32u) + (2u * c), 2u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), ((r * 8u) % 32u) + c, 1u); \n }\n }\n}\n\n").concat(emptyShader); + var preprocessBlocks5BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth5, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n\n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 20u);\n unpack_block_128_bit_depth_5(blocks_start); \n \n let b03: u32 = blocks_start;\n let b4: u32 = blocks_start + 16u;\n\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), (r * 8u) % 32u, 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n } \n}\n\n").concat(emptyShader); + var preprocessBlocks6BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth6, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 24u);\n unpack_block_128_bit_depth_6(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u; \n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n }\n\n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n }\n }\n}\n\n").concat(emptyShader); + var preprocessBlocks7BitShaderSource = "\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth7, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 28u);\n unpack_block_128_bit_depth_7(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u;\n let b6: u32 = blocks_start + 24u; \n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u), 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock, "u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock, "u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock, "u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n }\n}\n\n").concat(emptyShader); + var preprocessShaderSources = { + 3: preprocessBlocks3BitShaderSource, + 5: preprocessBlocks5BitShaderSource, + 6: preprocessBlocks6BitShaderSource, + 7: preprocessBlocks7BitShaderSource + }; + var preprocessShaderNames = { + 3: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_3bit_shader", + 5: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_5bit_shader", + 6: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_6bit_shader", + 7: "pv_picollm_weight_block_mixed_16x8_preprocess_blocks_7bit_shader" + }; + + var BM = 8; + var BN = 32; + var TM = 2; + var TN = 16; + var TC = rowsPerBlock * BM * BN / (TM * TN); + var constantSnippet = "\nconst BM = ".concat(BM, "u;\nconst BN = ").concat(BN, "u;\n\nconst TM = ").concat(TM, "u;\nconst TN = ").concat(TN, "u;\n\nconst TC = ").concat(TC, "u;\n\nconst ROW_PER_BLOCK = ").concat(rowsPerBlock, "u;\nconst COL_PER_BLOCK = ").concat(columnsPerBlock, "u;\n\nconst VEC_COL_PER_BLOCK = COL_PER_BLOCK / 4u;\n\nconst block_size: u32 = (COL_PER_BLOCK * ROW_PER_BLOCK * bit_depth) / 32u;\n\n"); + var forwardMultipleInputArgsSnippet = "\nstruct argsStruct {\n n: u32,\n m: u32,\n total_nbc: u32,\n k: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array;\n"; + var forwardMultipleSharedPrivateMemSnippet = "\nvar shared_x: array, BN * VEC_COL_PER_BLOCK>;\nvar shared_ab: array;\nvar shared_w: array, BM * ROW_PER_BLOCK * VEC_COL_PER_BLOCK>;\n\nvar local_x: array, TN * VEC_COL_PER_BLOCK>;\nvar local_x_sums: array;\nvar local_results: array;\n"; + var forwardMultipleLocalVarSnippet = "\n let tid = local_id.x;\n let bm_idx = workgroup_id.x;\n let bn_idx = workgroup_id.y;\n\n let local_bm_idx = bm_idx * BM;\n let local_bn_idx = bn_idx * BN;\n \n let n_idx = tid % (BN / TN);\n let k_idx = tid / (BN / TN) / (BM * ROW_PER_BLOCK / TM);\n let m_idx = tid / (BN / TN) % (BM * ROW_PER_BLOCK / TM);\n"; + var forwardMultipleLoadW1Bit = "\n let b0 = blocks[src + (row / 4u)];\n\n let b0_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b0_offset = b0_offset_base + (c * 4u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b0, b0_offset, 1u)), \n f32(extractBits(b0, b0_offset + 1, 1u)),\n f32(extractBits(b0, b0_offset + 2, 1u)),\n f32(extractBits(b0, b0_offset + 3, 1u)));\n }\n"; + var forwardMultipleLoadW2Bit = "\n let b01 = blocks[src + (row / 2u)];\n \n let b01_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b01_offset = b01_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(extractBits(b01, b01_offset, 2u)), \n f32(extractBits(b01, b01_offset + 2, 2u)),\n f32(extractBits(b01, b01_offset + 4, 2u)),\n f32(extractBits(b01, b01_offset + 6, 2u)));\n } \n"; + var forwardMultipleLoadW3Bit = "\n let b01 = blocks[src + (row / 2u)];\n let b2 = blocks[src + 8u + (row / 4u)]; \n\n let b01_offset_base = (row * 16u) % 32u;\n let b2_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b01_offset = b01_offset_base + (c * 8u);\n let b2_offset = b2_offset_base + (c * 4u);\n\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b01, b01_offset, 2u), extractBits(b2, b2_offset, 1u), 2u, 1u)), \n f32(insertBits(extractBits(b01, b01_offset + 2, 2u), extractBits(b2, b2_offset + 1, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 4, 2u), extractBits(b2, b2_offset + 2, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 6, 2u), extractBits(b2, b2_offset + 3, 1u), 2u, 1u))); \n }\n"; + var forwardMultipleLoadW4Bit = "\n let b03 = blocks[src + row];\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b03, b03_offset, 4u)), \n f32(extractBits(b03, b03_offset + 4, 4u)),\n f32(extractBits(b03, b03_offset + 8, 4u)),\n f32(extractBits(b03, b03_offset + 12, 4u)));\n }\n"; + var forwardMultipleLoadW5Bit = "\n let b03 = blocks[src + row];\n let b4 = blocks[src + 16u + (row / 4u)];\n \n let b4_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b4_offset = b4_offset_base + (c * 4u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b4, b4_offset, 1u), 4u, 1u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b4, b4_offset + 1, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b4, b4_offset + 2, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b4, b4_offset + 3, 1u), 4u, 1u)));\n }\n"; + var forwardMultipleLoadW6Bit = "\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u)));\n }\n"; + var forwardMultipleLoadW7Bit = "\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n let b6 = blocks[src + 24u + (row / 4u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n let b6_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n let b6_offset = b6_offset_base + (c * 4u);\n \n shared_w[dst + c] = vec4(\n f32(insertBits(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u), extractBits(b6, b6_offset, 1u), 6u, 1u)), \n f32(insertBits(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u), extractBits(b6, b6_offset + 1, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u), extractBits(b6, b6_offset + 2, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u), extractBits(b6, b6_offset + 3, 1u), 6u, 1u)));\n }\n"; + var forwardMultipleLoadW8Bit = "\n let b07_offset = src + (row * 2);\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b07 = blocks[b07_offset + c];\n shared_w[dst + c] = vec4(\n f32(extractBits(b07, 0u, 8u)), \n f32(extractBits(b07, 8u, 8u)),\n f32(extractBits(b07, 16u, 8u)),\n f32(extractBits(b07, 24u, 8u)));\n }\n"; + var forwardLoadWBitDepthSnippets = { + 1: forwardMultipleLoadW1Bit, + 2: forwardMultipleLoadW2Bit, + 3: forwardMultipleLoadW3Bit, + 4: forwardMultipleLoadW4Bit, + 5: forwardMultipleLoadW5Bit, + 6: forwardMultipleLoadW6Bit, + 7: forwardMultipleLoadW7Bit, + 8: forwardMultipleLoadW8Bit + }; + var forwardMultipleLoadXSnippet = "\n let total_work_x = VEC_COL_PER_BLOCK * BN;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_x, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_x) { \n let n_load_idx = local_bn_idx + idx / VEC_COL_PER_BLOCK;\n let inner_idx = idx % VEC_COL_PER_BLOCK;\n \n if (bk_idx < args.k && n_load_idx < args.n) { \n let x_idx = (args.x_offset / 4u) + ((bk_idx * args.n + n_load_idx) * VEC_COL_PER_BLOCK + inner_idx); \n shared_x[idx] = x[x_idx];\n } else {\n shared_x[idx] = vec4(0.0);\n }\n }\n }\n"; + var forwardMultipleLoadABSnippet = "\n let total_work_ab = BM * 2;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_ab, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_ab) {\n let m_load_idx = local_bm_idx + idx / 2; \n let inner_idx = (idx % 2) * 16u;\n \n if (m_load_idx < args.m && bk_idx < args.k) {\n let ab_bits = extractBits(metas[args.metas_offset + (m_load_idx * args.k + bk_idx)], inner_idx, 16u);\n shared_ab[idx] = from_fp510(ab_bits); \n } else {\n shared_ab[idx] = 0.0;\n }\n }\n }\n"; + var forwardMultipleLoadWSnippet = function forwardMultipleLoadWSnippet(bitDepth) { + return "\n let total_work_w = BM * ROW_PER_BLOCK;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_w, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_w) {\n let m_load_idx = local_bm_idx + idx / ROW_PER_BLOCK;\n let row = idx % ROW_PER_BLOCK;\n let dst = idx * VEC_COL_PER_BLOCK;\n\n if (m_load_idx < args.m) {\n let src = args.blocks_offset + (m_load_idx * args.k + bk_idx) * block_size;\n ".concat(forwardLoadWBitDepthSnippets[bitDepth], "\n } else { \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n shared_w[dst + c] = vec4(0.0);\n }\n }\n }\n }\n"); + }; + var forwardMultipleCopyXSnippet = "\nfor (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n var x_sum_vec = vec4(0.0); \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n let shared_x_idx = (n_idx * TN + tn_idx) * VEC_COL_PER_BLOCK + (k_idx * VEC_COL_PER_BLOCK);\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n local_x[local_x_idx + c] = shared_x[shared_x_idx + c];\n x_sum_vec += local_x[local_x_idx + c];\n }\n local_x_sums[tn_idx] = x_sum_vec.x + x_sum_vec.y + x_sum_vec.z + x_sum_vec.w; \n}\n"; + var forwardMultipleComputeResultsSnippet = "\n for (var tm_idx = 0u; tm_idx < TM; tm_idx++) { \n let shared_ab_idx = ((m_idx * TM + tm_idx) / ROW_PER_BLOCK + k_idx) * 2;\n let alpha = shared_ab[shared_ab_idx];\n let beta = shared_ab[shared_ab_idx + 1]; \n let shared_w_idx = ((m_idx * TM + tm_idx) + k_idx) * VEC_COL_PER_BLOCK;\n \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n \n var swx_vec = vec4(0.0); \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n swx_vec += shared_w[shared_w_idx + c] * local_x[local_x_idx + c];\n }\n let swx = swx_vec.x + swx_vec.y + swx_vec.z + swx_vec.w;\n \n let kappa = alpha * local_x_sums[tn_idx]; \n let results_idx = tm_idx * TN + tn_idx;\n local_results[results_idx] += kappa + (beta * swx);\n }\n }\n"; + var forwardMultipleWriteResultsSnippet = "\nfor (var tm_idx = 0u; tm_idx < TM; tm_idx++) {\n let row = local_bm_idx * ROW_PER_BLOCK + (m_idx * TM + tm_idx); \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let col = local_bn_idx + (n_idx * TN + tn_idx);\n if (row < args.m * ROW_PER_BLOCK && col < args.n) {\n let y_idx = args.y_offset + ((row / ROW_PER_BLOCK) * args.n + col) * ROW_PER_BLOCK + (row % ROW_PER_BLOCK);\n let results_idx = tm_idx * TN + tn_idx;\n \n y[y_idx] += local_results[results_idx];\n }\n }\n}\n"; + var forwardMultipleShaderSources = function forwardMultipleShaderSources(bitDepth) { + return "\n\n".concat(forwardMultipleInputArgsSnippet, "\n\n").concat(constantSnippet, "\n\n").concat(forwardMultipleSharedPrivateMemSnippet, "\n\n").concat(fromFP510Function, "\n\n").concat(dividePadFunction, "\n\nconst bit_depth: u32 = ").concat(bitDepth, "u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n \n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n ").concat(forwardMultipleLocalVarSnippet, "\n \n for (var bk_idx = 0u; bk_idx < args.k; bk_idx++) { \n ").concat(forwardMultipleLoadXSnippet, "\n ").concat(forwardMultipleLoadABSnippet, " \n ").concat(forwardMultipleLoadWSnippet(bitDepth), " \n workgroupBarrier();\n \n ").concat(forwardMultipleCopyXSnippet, "\n ").concat(forwardMultipleComputeResultsSnippet, "\n workgroupBarrier();\n }\n \n ").concat(forwardMultipleWriteResultsSnippet, "\n}\n\n").concat(emptyShader, "\n"); + }; + + var forwardShuffleXShaderSource = "\nstruct argsStruct {\n n: u32,\n shape1: u32,\n x_offset: u32,\n indices_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar indices: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape1) {\n return;\n } \n\n let b = global_id.x;\n let i = global_id.y;\n \n let c = i / 8u;\n let j = i % 8u;\n y[((c * args.n) + b) * 8 + j] = x[args.x_offset + (b * args.shape1) + indices[args.indices_offset + i]];\n}\n\n".concat(emptyShader, "\n"); + var forwardSingleReduceYShaderSource = "\nstruct argsStruct {\n nvr: u32,\n nbc: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar y: array>;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x > args.nvr) {\n return;\n }\n\n let x_start = global_id.x * args.nbc;\n var sum: vec4 = vec4(0.0, 0.0, 0.0, 0.0);\n for (var i = 0u; i < args.nbc; i++) {\n sum += x[x_start + i]; \n }\n y[global_id.x] += sum;\n}\n\n".concat(emptyShader); + var forwardShuffleYShaderSource = "\nstruct argsStruct {\n n: u32,\n shape0: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape0) {\n return;\n } \n \n let b = global_id.x;\n let i = global_id.y;\n \n let r = i / 16u;\n let j = i % 16u;\n y[(b * args.shape0) + (r * 16) + j] = x[(((r * args.n) + b) * 16) + j];\n}\n\n".concat(emptyShader); + var addBiasShaderSource = "\nstruct argsStruct {\n dimension: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar bias: array;\n\n@group(0) @binding(2)\nvar y: array;\n\n".concat(fromFP510Function, "\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n y[(global_id.x * args.dimension) + global_id.y] += bias[global_id.y];\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth1ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 4u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b0_start = row_blocks_start + br_offset + (bc * block_size);\n var b0_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b0 = blocks[b0_start];\n \n let w0_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w0_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w0_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w0_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 8u;\n \n let w1_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w1_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w1_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w1_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 16u;\n \n let w2_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w2_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w2_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w2_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 24u;\n \n let w3_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w3_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w3_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w3_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth2ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 8u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b01_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b01 = blocks[b01_start];\n \n let w0_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w0_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w0_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w0_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w1_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w1_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w1_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w1_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u; \n b01 = blocks[b01_start + 1u];\n \n let w2_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w2_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w2_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w2_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w3_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w3_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w3_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w3_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth3ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 12u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id : vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n\n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b2_start = row_blocks_start + br_offset + (bc * block_size) + 8u;\n var b01_offset = 0u;\n var b2_offset = 0u;\n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) { \n \n var b01 = blocks[b01_start];\n var b2 = blocks[b2_start];\n \n var b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n var b2_bit = extractBits(b2, b2_offset + j, 1u);\n let w0_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 8u;\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w1_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 16u; \n b01 = blocks[b01_start + 1u];\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w2_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 24u; \n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w3_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 0u; \n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n \n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth4ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 16u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start]; \n \n let w0_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w0_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w0_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w0_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 1];\n \n let w1_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w1_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w1_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w1_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 2];\n \n let w2_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w2_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w2_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w2_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 3];\n \n let w3_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w3_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w3_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w3_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth5ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 20u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b4_start = row_blocks_start + br_offset + (bc * block_size) + 16u;\n \n var b4_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start];\n var b4 = blocks[b4_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w0_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 1];\n b4_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w1_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 2];\n b4_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w2_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 3];\n b4_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w3_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b4_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth6ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 24u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n \n var b45_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w0_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w0_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w0_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w0_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w1_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w1_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w1_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w1_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u)); \n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w2_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w2_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w2_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w2_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w3_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w3_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w3_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w3_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b45_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth7ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 28u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n var b6_start = row_blocks_start + br_offset + (bc * block_size) + 24u;\n \n var b45_offset = 0u;\n var b6_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock, "; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n var b6 = blocks[b6_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n var b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w0_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w0_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w0_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w0_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n b6_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w1_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w1_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w1_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w1_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n b6_offset = 16u;\n\n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w2_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w2_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w2_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w2_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n b6_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w3_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w3_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w3_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w3_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b45_offset = 0u;\n b6_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleBitDepth8ShaderSource = "\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function, "\n\nconst block_size: u32 = 32u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock, ";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b07_start = row_blocks_start + (br_offset * 8u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < 2; j++) {\n \n var b07 = blocks[b07_start + j];\n \n let w0_0 = f32(extractBits(b07, 0u, 8u)); \n let w0_1 = f32(extractBits(b07, 8u, 8u)); \n let w0_2 = f32(extractBits(b07, 16u, 8u));\n let w0_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 2 + j];\n \n let w1_0 = f32(extractBits(b07, 0u, 8u)); \n let w1_1 = f32(extractBits(b07, 8u, 8u)); \n let w1_2 = f32(extractBits(b07, 16u, 8u));\n let w1_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 4 + j];\n \n let w2_0 = f32(extractBits(b07, 0u, 8u)); \n let w2_1 = f32(extractBits(b07, 8u, 8u)); \n let w2_2 = f32(extractBits(b07, 16u, 8u));\n let w2_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 6 + j];\n \n let w3_0 = f32(extractBits(b07, 0u, 8u)); \n let w3_1 = f32(extractBits(b07, 8u, 8u)); \n let w3_2 = f32(extractBits(b07, 16u, 8u));\n let w3_3 = f32(extractBits(b07, 24u, 8u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_vec = x[x_start + j];\n res[j] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader); + var forwardSingleShaderSources = { + 1: forwardSingleBitDepth1ShaderSource, + 2: forwardSingleBitDepth2ShaderSource, + 3: forwardSingleBitDepth3ShaderSource, + 4: forwardSingleBitDepth4ShaderSource, + 5: forwardSingleBitDepth5ShaderSource, + 6: forwardSingleBitDepth6ShaderSource, + 7: forwardSingleBitDepth7ShaderSource, + 8: forwardSingleBitDepth8ShaderSource + }; + var forwardSingleShaderNames = { + 1: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_1_shader", + 2: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_2_shader", + 3: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_3_shader", + 4: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_4_shader", + 5: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_5_shader", + 6: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_6_shader", + 7: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_7_shader", + 8: "pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_8_shader" + }; + var forwardShaderSources = { + 1: forwardMultipleShaderSources(1), + 2: forwardMultipleShaderSources(2), + 3: forwardMultipleShaderSources(3), + 4: forwardMultipleShaderSources(4), + 5: forwardMultipleShaderSources(5), + 6: forwardMultipleShaderSources(6), + 7: forwardMultipleShaderSources(7), + 8: forwardMultipleShaderSources(8) + }; + var forwardShaderNames = { + 1: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_1_shader", + 2: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_2_shader", + 3: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_3_shader", + 4: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_4_shader", + 5: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_5_shader", + 6: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_6_shader", + 7: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_7_shader", + 8: "pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_8_shader" + }; + var forwardShuffleXShaderName = "pv_picollm_weight_block_mixed_16x8_forward_shuffle_x_shader"; + var forwardShuffleYShaderName = "pv_picollm_weight_block_mixed_16x8_forward_shuffle_y_shader"; + var addBiasShaderName = "pv_picollm_weight_block_mixed_16x8_add_bias_shader"; + var forwardSingleReduceYShaderName = "pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_shader"; + + var _weightBlockMixed16x; + var loadPreprocessBlocksShader = function loadPreprocessBlocksShader(device, bitDepth) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight preprocess blocks ".concat(bitDepth, " bind group layout"), + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight preprocess blocks ".concat(bitDepth, " pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight preprocess blocks ".concat(bitDepth, " shader module"), + code: preprocessShaderSources[bitDepth] + }); + var computePipeline = device.createComputePipeline({ + label: "weight preprocess blocks ".concat(bitDepth, " pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: preprocessDim, + workgroup_size_y: preprocessDim + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var loadForwardShuffleXShader = function loadForwardShuffleXShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight shuffle x bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight shuffle x pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight shuffle x shader module", + code: forwardShuffleXShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight shuffle x pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_y: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var loadForwardSingleReduceYShader = function loadForwardSingleReduceYShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight single reduce y bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight single reduce y pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight single reduce y shader module", + code: forwardSingleReduceYShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight single reduce y pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var loadForwardShuffleYShader = function loadForwardShuffleYShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight shuffle y bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight shuffle y pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight shuffle y shader module", + code: forwardShuffleYShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight shuffle y pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var loadForwardSingleShader = function loadForwardSingleShader(device, bitDepth) { + var entries = [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }]; + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight forward single ".concat(bitDepth, " bind group layout"), + entries: entries + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight forward single ".concat(bitDepth, " pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight forward single ".concat(bitDepth, " shader module"), + code: forwardSingleShaderSources[bitDepth] + }); + var computePipeline = device.createComputePipeline({ + label: "weight forward single ".concat(bitDepth, " pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: weightBlockSize, + workgroup_size_y: 1 + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var loadForwardShader = function loadForwardShader(device, bitDepth) { + var entries = [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 4, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }]; + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight forward multi ".concat(bitDepth, " bind group layout"), + entries: entries + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight forward multi ".concat(bitDepth, " pipeline layout"), + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight forward multi ".concat(bitDepth, " shader module"), + code: forwardShaderSources[bitDepth] + }); + var computePipeline = device.createComputePipeline({ + label: "weight forward multi ".concat(bitDepth, " pipeline"), + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint, + constants: { + workgroup_size_x: TC + } + } + }); + return { + computePipeline: computePipeline + }; + }; + var loadAddBiasShader = function loadAddBiasShader(device) { + var bindGroupLayout = device.createBindGroupLayout({ + label: "weight add bias bind group layout", + entries: [{ + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'uniform' + } + }, { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'read-only-storage' + } + }, { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: 'storage' + } + }] + }); + var pipelineLayout = device.createPipelineLayout({ + label: "weight add bias pipeline layout", + bindGroupLayouts: [bindGroupLayout] + }); + var shaderModule = device.createShaderModule({ + label: "weight add bias shader module", + code: addBiasShaderSource + }); + var computePipeline = device.createComputePipeline({ + label: "weight add bias pipeline", + layout: pipelineLayout, + compute: { + module: shaderModule, + entryPoint: shaderEntryPoint + } + }); + return { + computePipeline: computePipeline + }; + }; + var weightBlockMixed16x8Shaders = (_weightBlockMixed16x = {}, _defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x, preprocessShaderNames[3], function (device) { + return loadPreprocessBlocksShader(device, 3); + }), preprocessShaderNames[5], function (device) { + return loadPreprocessBlocksShader(device, 5); + }), preprocessShaderNames[6], function (device) { + return loadPreprocessBlocksShader(device, 6); + }), preprocessShaderNames[7], function (device) { + return loadPreprocessBlocksShader(device, 7); + }), forwardShuffleXShaderName, loadForwardShuffleXShader), forwardShuffleYShaderName, loadForwardShuffleYShader), forwardSingleReduceYShaderName, loadForwardSingleReduceYShader), forwardSingleShaderNames[1], function (device) { + return loadForwardSingleShader(device, 1); + }), forwardSingleShaderNames[2], function (device) { + return loadForwardSingleShader(device, 2); + }), forwardSingleShaderNames[3], function (device) { + return loadForwardSingleShader(device, 3); + }), _defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x, forwardSingleShaderNames[4], function (device) { + return loadForwardSingleShader(device, 4); + }), forwardSingleShaderNames[5], function (device) { + return loadForwardSingleShader(device, 5); + }), forwardSingleShaderNames[6], function (device) { + return loadForwardSingleShader(device, 6); + }), forwardSingleShaderNames[7], function (device) { + return loadForwardSingleShader(device, 7); + }), forwardSingleShaderNames[8], function (device) { + return loadForwardSingleShader(device, 8); + }), forwardShaderNames[1], function (device) { + return loadForwardShader(device, 1); + }), forwardShaderNames[2], function (device) { + return loadForwardShader(device, 2); + }), forwardShaderNames[3], function (device) { + return loadForwardShader(device, 3); + }), forwardShaderNames[4], function (device) { + return loadForwardShader(device, 4); + }), forwardShaderNames[5], function (device) { + return loadForwardShader(device, 5); + }), _defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x, forwardShaderNames[6], function (device) { + return loadForwardShader(device, 6); + }), forwardShaderNames[7], function (device) { + return loadForwardShader(device, 7); + }), forwardShaderNames[8], function (device) { + return loadForwardShader(device, 8); + }), addBiasShaderName, loadAddBiasShader)); + var getPicollmWeightBlockMixed16x8WebGpuFunctions = function getPicollmWeightBlockMixed16x8WebGpuFunctions(memory) { + var setStatus = function setStatus(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvPicollmPreprocessBlocksWebGpu = function pvPicollmPreprocessBlocksWebGpu(objAddress, bitDepth, blocksAddress, blocksOffsetBytes, nbr, nbc, statusAddress) { + var _gpuBuffers$get; + objAddress = unsignedAddress(objAddress); + blocksAddress = unsignedAddress(blocksAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[preprocessShaderNames[bitDepth]]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var blocksBuffer = (_gpuBuffers$get = gpuBuffers.get(blocksAddress)) === null || _gpuBuffers$get === void 0 ? void 0 : _gpuBuffers$get.buffer; + if (!blocksBuffer) { + console.error('blocks buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(3 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight preprocess blocks ".concat(bitDepth, " arg buffer")); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([nbr, nbc, blocksOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight preprocess blocks ".concat(bitDepth, " bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: blocksBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, preprocessShaderNames[bitDepth], Math.ceil(nbr / preprocessDim), Math.ceil(nbc / preprocessDim)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardSingleShuffleXWebGpu = function pvPicollmForwardSingleShuffleXWebGpu(objAddress, xAddress, xOffsetBytes, indicesAddress, indicesOffsetBytes, shape1, yAddress, statusAddress) { + var _gpuBuffers$get2, _gpuBuffers$get3, _gpuBuffers$get4; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + indicesAddress = unsignedAddress(indicesAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShuffleXShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get2 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get2 === void 0 ? void 0 : _gpuBuffers$get2.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var indicesBuffer = (_gpuBuffers$get3 = gpuBuffers.get(indicesAddress)) === null || _gpuBuffers$get3 === void 0 ? void 0 : _gpuBuffers$get3.buffer; + if (!indicesBuffer) { + console.error('Indices buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get4 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get4 === void 0 ? void 0 : _gpuBuffers$get4.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight shuffle x arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([1, shape1, xOffsetBytes / 4, indicesOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward single shuffle x bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: indicesBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShuffleXShaderName + "_single", 1, Math.ceil(shape1 / PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardSingleWebGpu = function pvPicollmForwardSingleWebGpu(objAddress, bitDepth, xAddress, xOffsetBytes, metasAddress, metasOffsetBytes, blocksAddress, blocksOffsetBytes, nbr, totalNbc, bitDepthNbc, yAddress, yOffsetBytes, statusAddress) { + var _gpuBuffers$get5, _gpuBuffers$get6, _gpuBuffers$get7, _gpuBuffers$get8; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + metasAddress = unsignedAddress(metasAddress); + blocksAddress = unsignedAddress(blocksAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardSingleShaderNames[bitDepth]]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get5 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get5 === void 0 ? void 0 : _gpuBuffers$get5.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var metasBuffer = (_gpuBuffers$get6 = gpuBuffers.get(metasAddress)) === null || _gpuBuffers$get6 === void 0 ? void 0 : _gpuBuffers$get6.buffer; + if (!metasBuffer) { + console.error('Metas buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var blocksBuffer = (_gpuBuffers$get7 = gpuBuffers.get(blocksAddress)) === null || _gpuBuffers$get7 === void 0 ? void 0 : _gpuBuffers$get7.buffer; + if (!blocksBuffer) { + console.error('Blocks buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get8 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get8 === void 0 ? void 0 : _gpuBuffers$get8.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(8 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight forward single ".concat(bitDepth, " arg buffer")); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([1, nbr, totalNbc, bitDepthNbc, xOffsetBytes / 4, metasOffsetBytes / 4, blocksOffsetBytes / 4, yOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var entries = [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: metasBuffer + } + }, { + binding: 3, + resource: { + buffer: blocksBuffer + } + }, { + binding: 4, + resource: { + buffer: yBuffer + } + }]; + var bindGroup = obj.device.createBindGroup({ + label: "weight forward single ".concat(bitDepth, " bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: entries + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardSingleShaderNames[bitDepth], Math.ceil(nbr * 4 / weightBlockSize), bitDepthNbc); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardSingleReduceYWebGpu = function pvPicollmForwardSingleReduceYWebGpu(objAddress, nbr, nbc, xAddress, yAddress, statusAddress) { + var _gpuBuffers$get9, _gpuBuffers$get10; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardSingleReduceYShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get9 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get9 === void 0 ? void 0 : _gpuBuffers$get9.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get10 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get10 === void 0 ? void 0 : _gpuBuffers$get10.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(2 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight single reduce y arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([nbr * 4, nbc])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward single reduce y bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardSingleReduceYShaderName, Math.ceil(nbr * 4 / weightBlockSize)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardMultipleShuffleXWebGpu = function pvPicollmForwardMultipleShuffleXWebGpu(objAddress, xAddress, xOffsetBytes, indicesAddress, indicesOffsetBytes, n, shape1, yAddress, statusAddress) { + var _gpuBuffers$get11, _gpuBuffers$get12, _gpuBuffers$get13; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + indicesAddress = unsignedAddress(indicesAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShuffleXShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get11 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get11 === void 0 ? void 0 : _gpuBuffers$get11.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var indicesBuffer = (_gpuBuffers$get12 = gpuBuffers.get(indicesAddress)) === null || _gpuBuffers$get12 === void 0 ? void 0 : _gpuBuffers$get12.buffer; + if (!indicesBuffer) { + console.error('Indices buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get13 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get13 === void 0 ? void 0 : _gpuBuffers$get13.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(4 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight multi shuffle x arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, shape1, xOffsetBytes / 4, indicesOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward multiple shuffle x bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: indicesBuffer + } + }, { + binding: 3, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShuffleXShaderName + "_multi", n, shape1); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardMultipleWebGpu = function pvPicollmForwardMultipleWebGpu(objAddress, bitDepth, xAddress, xOffsetBytes, metasAddress, metasOffsetBytes, blocksAddress, blocksOffsetBytes, nbc, nbr, n, yAddress, yOffsetBytes, statusAddress) { + var _gpuBuffers$get14, _gpuBuffers$get15, _gpuBuffers$get16, _gpuBuffers$get17; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + metasAddress = unsignedAddress(metasAddress); + blocksAddress = unsignedAddress(blocksAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShaderNames[bitDepth]]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get14 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get14 === void 0 ? void 0 : _gpuBuffers$get14.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var metasBuffer = (_gpuBuffers$get15 = gpuBuffers.get(metasAddress)) === null || _gpuBuffers$get15 === void 0 ? void 0 : _gpuBuffers$get15.buffer; + if (!metasBuffer) { + console.error('Metas buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var blocksBuffer = (_gpuBuffers$get16 = gpuBuffers.get(blocksAddress)) === null || _gpuBuffers$get16 === void 0 ? void 0 : _gpuBuffers$get16.buffer; + if (!blocksBuffer) { + console.error('Blocks buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get17 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get17 === void 0 ? void 0 : _gpuBuffers$get17.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(8 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight forward multi ".concat(bitDepth, " arg buffer")); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, nbr, 0, nbc, xOffsetBytes / 4, metasOffsetBytes / 4, blocksOffsetBytes / 4, yOffsetBytes / 4])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var entries = [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: metasBuffer + } + }, { + binding: 3, + resource: { + buffer: blocksBuffer + } + }, { + binding: 4, + resource: { + buffer: yBuffer + } + }]; + var bindGroup = obj.device.createBindGroup({ + label: "weight forward multi ".concat(bitDepth, " bind group"), + layout: shader.computePipeline.getBindGroupLayout(0), + entries: entries + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShaderNames[bitDepth], Math.ceil(nbr / BM), Math.ceil(n / BN)); + setStatus(statusAddress, 0); + }; + var pvPicollmForwardMultipleShuffleYWebGpu = function pvPicollmForwardMultipleShuffleYWebGpu(objAddress, n, shape0, xAddress, yAddress, statusAddress) { + var _gpuBuffers$get18, _gpuBuffers$get19; + objAddress = unsignedAddress(objAddress); + xAddress = unsignedAddress(xAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[forwardShuffleYShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var xBuffer = (_gpuBuffers$get18 = gpuBuffers.get(xAddress)) === null || _gpuBuffers$get18 === void 0 ? void 0 : _gpuBuffers$get18.buffer; + if (!xBuffer) { + console.error('X buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get19 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get19 === void 0 ? void 0 : _gpuBuffers$get19.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(2 * Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight shuffle y arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([n, shape0])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight forward multiple shuffle y bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: xBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, forwardShuffleYShaderName, n, shape0); + setStatus(statusAddress, 0); + }; + var pvPicollmAddBiasWebGpu = function pvPicollmAddBiasWebGpu(objAddress, n, dimension, biasAddress, yAddress, statusAddress) { + var _gpuBuffers$get20, _gpuBuffers$get21; + objAddress = unsignedAddress(objAddress); + biasAddress = unsignedAddress(biasAddress); + yAddress = unsignedAddress(yAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj || !obj.device) { + console.error('WebGPU device has not been initialized'); + setStatus(statusAddress, -1); + return; + } + var shader = obj.shaders[addBiasShaderName]; + if (!shader) { + console.error('Shader has not been loaded'); + setStatus(statusAddress, -1); + return; + } + var biasBuffer = (_gpuBuffers$get20 = gpuBuffers.get(biasAddress)) === null || _gpuBuffers$get20 === void 0 ? void 0 : _gpuBuffers$get20.buffer; + if (!biasBuffer) { + console.error('Bias buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var yBuffer = (_gpuBuffers$get21 = gpuBuffers.get(yAddress)) === null || _gpuBuffers$get21 === void 0 ? void 0 : _gpuBuffers$get21.buffer; + if (!yBuffer) { + console.error('Y buffer has not been allocated'); + setStatus(statusAddress, -1); + return; + } + var argsBuffer = obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT, GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, false, "weight add bias arg buffer"); + obj.device.queue.writeBuffer(argsBuffer, 0, new Uint32Array([dimension])); + obj.scheduleUniformBufferForRelease(argsBuffer); + var bindGroup = obj.device.createBindGroup({ + label: "weight add bias bind group", + layout: shader.computePipeline.getBindGroupLayout(0), + entries: [{ + binding: 0, + resource: { + buffer: argsBuffer + } + }, { + binding: 1, + resource: { + buffer: biasBuffer + } + }, { + binding: 2, + resource: { + buffer: yBuffer + } + }] + }); + obj.dispatchComputerShader(bindGroup, shader.computePipeline, addBiasShaderName, n, dimension); + setStatus(statusAddress, 0); + }; + return { + pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm: pvPicollmPreprocessBlocksWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm: pvPicollmForwardSingleShuffleXWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm: pvPicollmForwardSingleWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm: pvPicollmForwardSingleReduceYWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm: pvPicollmForwardMultipleShuffleXWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm: pvPicollmForwardMultipleWebGpu, + pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm: pvPicollmForwardMultipleShuffleYWebGpu, + pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm: pvPicollmAddBiasWebGpu + }; + }; + + function ownKeys(e, r) { var t = Object.keys(e); if (Object.getOwnPropertySymbols) { var o = Object.getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return Object.getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; } + function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { _defineProperty(e, r, t[r]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r)); }); } return e; } + var shaders = _objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread({}, attentionShaders), feedForwardShaders), gateForwardShader), moeTransformerForwardShaders), normForwardShader), normLayerForwardShader), transformerForwardShaders), weightFloatForwardShader), weightBlockMixed16x8Shaders); + function arrayBufferToStringAtIndex(arrayBuffer, indexStart) { + var indexEnd = indexStart; + while (arrayBuffer[indexEnd] !== 0) { + indexEnd++; + } + var utf8decoder = new TextDecoder('utf-8'); + return utf8decoder.decode(arrayBuffer.subarray(indexStart, indexEnd)); + } + var initXpu = function initXpu(memory, _wasm) { + var setInt = function setInt(statusAddress, value) { + var memoryBufferInt32 = new Int32Array(memory.buffer); + memoryBufferInt32[statusAddress / Int32Array.BYTES_PER_ELEMENT] = value; + }; + var pvXpuDeviceInit = /*#__PURE__*/function () { + var _ref = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee(objAddress, statusAddress) { + var adapter, device, adapterInfo; + return _regeneratorRuntime.wrap(function _callee$(_context) { + while (1) switch (_context.prev = _context.next) { + case 0: + objAddress = unsignedAddress(objAddress); + statusAddress = unsignedAddress(statusAddress); + _context.prev = 2; + if (!(typeof window !== "undefined" && !window.isSecureContext)) { + _context.next = 7; + break; + } + console.error('WebGPU is only available in secure contexts (e.g. HTTPS)'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 7: + if (navigator.gpu) { + _context.next = 11; + break; + } + console.error('WebGPU not supported.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 11: + _context.next = 13; + return navigator.gpu.requestAdapter(); + case 13: + adapter = _context.sent; + if (adapter) { + _context.next = 18; + break; + } + console.error('WebGPU not supported, please enable it in your browser.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 18: + _context.next = 20; + return adapter.requestDevice({ + requiredFeatures: ["timestamp-query"], + requiredLimits: { + maxBufferSize: 1073741824, + maxStorageBufferBindingSize: 1073741824 + } + }); + case 20: + device = _context.sent; + if (device) { + _context.next = 25; + break; + } + console.error('Could not find a compatible WebGPU device.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 25: + _context.next = 27; + return adapter.requestAdapterInfo(); + case 27: + adapterInfo = _context.sent; + if (adapterInfo) { + _context.next = 32; + break; + } + console.error('Could not retrieve WebGPU adapter info.'); + setInt(statusAddress, -1); + return _context.abrupt("return"); + case 32: + gpuDevices.set(objAddress, new PvWebGPUDevice(device, adapterInfo)); + setInt(statusAddress, 0); + _context.next = 40; + break; + case 36: + _context.prev = 36; + _context.t0 = _context["catch"](2); + console.error(_context.t0); + setInt(statusAddress, -1); + case 40: + case "end": + return _context.stop(); + } + }, _callee, null, [[2, 36]]); + })); + return function pvXpuDeviceInit(_x, _x2) { + return _ref.apply(this, arguments); + }; + }(); + var pvXpuDeviceInfo = /*#__PURE__*/function () { + var _ref2 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee2(objAddress, browserNameAddressAddress, browserVersionAddressAddress, osNameAddressAddress, deviceArchitectureAddressAddress, deviceVendorAddressAddress, maxBufferSizeAddress, maxComputeWorkgroupStorageSizeAddress, maxComputeInvocationsPerWorkgroupAddress, statusAddress) { + var obj, aligned_alloc, uaParser, memoryBufferUint8, browserName, browserNameAddress, i, browserVersion, browserVersionAddress, _i, osName, osNameAddress, _i2, deviceArchitecture, deviceArchitectureAddress, _i3, deviceVendor, deviceVendorAddress, _i4; + return _regeneratorRuntime.wrap(function _callee2$(_context2) { + while (1) switch (_context2.prev = _context2.next) { + case 0: + objAddress = unsignedAddress(objAddress); + browserNameAddressAddress = unsignedAddress(browserNameAddressAddress); + browserVersionAddressAddress = unsignedAddress(browserVersionAddressAddress); + osNameAddressAddress = unsignedAddress(osNameAddressAddress); + deviceArchitectureAddressAddress = unsignedAddress(deviceArchitectureAddressAddress); + deviceVendorAddressAddress = unsignedAddress(deviceVendorAddressAddress); + maxBufferSizeAddress = unsignedAddress(maxBufferSizeAddress); + maxComputeWorkgroupStorageSizeAddress = unsignedAddress(maxComputeWorkgroupStorageSizeAddress); + maxComputeInvocationsPerWorkgroupAddress = unsignedAddress(maxComputeInvocationsPerWorkgroupAddress); + statusAddress = unsignedAddress(statusAddress); + _context2.prev = 10; + obj = gpuDevices.get(objAddress); + if (obj) { + _context2.next = 16; + break; + } + console.error('WebGPU device has not been initialized'); + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 16: + aligned_alloc = imports.aligned_alloc; + uaParser = Bowser.getParser(navigator.userAgent); + memoryBufferUint8 = new Uint8Array(memory.buffer); + browserName = uaParser.getBrowserName(); + _context2.next = 22; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (browserName.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 22: + browserNameAddress = _context2.sent; + if (!(browserNameAddress === 0)) { + _context2.next = 26; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 26: + setInt(browserNameAddressAddress, browserNameAddress); + for (i = 0; i < browserName.length; i++) { + memoryBufferUint8[browserNameAddress + i] = browserName.charCodeAt(i); + } + memoryBufferUint8[browserNameAddress + browserName.length] = 0; + browserVersion = uaParser.getBrowserVersion(); + _context2.next = 32; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (browserVersion.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 32: + browserVersionAddress = _context2.sent; + if (!(browserVersionAddress === 0)) { + _context2.next = 36; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 36: + setInt(browserVersionAddressAddress, browserVersionAddress); + for (_i = 0; _i < browserVersion.length; _i++) { + memoryBufferUint8[browserVersionAddress + _i] = browserVersion.charCodeAt(_i); + } + memoryBufferUint8[browserVersionAddress + browserVersion.length] = 0; + osName = uaParser.getOSName(); + _context2.next = 42; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (osName.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 42: + osNameAddress = _context2.sent; + if (!(osNameAddress === 0)) { + _context2.next = 46; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 46: + setInt(osNameAddressAddress, osNameAddress); + for (_i2 = 0; _i2 < osName.length; _i2++) { + memoryBufferUint8[osNameAddress + _i2] = osName.charCodeAt(_i2); + } + memoryBufferUint8[osNameAddress + osName.length] = 0; + deviceArchitecture = obj.adapterInfo.architecture; + _context2.next = 52; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (deviceArchitecture.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 52: + deviceArchitectureAddress = _context2.sent; + if (!(deviceArchitectureAddress === 0)) { + _context2.next = 56; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 56: + setInt(deviceArchitectureAddressAddress, deviceArchitectureAddress); + for (_i3 = 0; _i3 < deviceArchitecture.length; _i3++) { + memoryBufferUint8[deviceArchitectureAddress + _i3] = deviceArchitecture.charCodeAt(_i3); + } + memoryBufferUint8[deviceArchitectureAddress + deviceArchitecture.length] = 0; + deviceVendor = obj.adapterInfo.vendor; + _context2.next = 62; + return aligned_alloc(Uint8Array.BYTES_PER_ELEMENT, (deviceVendor.length + 1) * Uint8Array.BYTES_PER_ELEMENT); + case 62: + deviceVendorAddress = _context2.sent; + if (!(deviceVendorAddress === 0)) { + _context2.next = 66; + break; + } + setInt(statusAddress, -1); + return _context2.abrupt("return"); + case 66: + setInt(deviceVendorAddressAddress, deviceVendorAddress); + for (_i4 = 0; _i4 < deviceVendor.length; _i4++) { + memoryBufferUint8[deviceVendorAddress + _i4] = deviceVendor.charCodeAt(_i4); + } + memoryBufferUint8[deviceVendorAddress + deviceVendor.length] = 0; + setInt(maxBufferSizeAddress, obj.device.limits.maxBufferSize); + setInt(maxComputeWorkgroupStorageSizeAddress, obj.device.limits.maxComputeWorkgroupStorageSize); + setInt(maxComputeInvocationsPerWorkgroupAddress, obj.device.limits.maxComputeInvocationsPerWorkgroup); + setInt(statusAddress, 0); + _context2.next = 79; + break; + case 75: + _context2.prev = 75; + _context2.t0 = _context2["catch"](10); + console.error(_context2.t0); + setInt(statusAddress, -1); + case 79: + case "end": + return _context2.stop(); + } + }, _callee2, null, [[10, 75]]); + })); + return function pvXpuDeviceInfo(_x3, _x4, _x5, _x6, _x7, _x8, _x9, _x10, _x11, _x12) { + return _ref2.apply(this, arguments); + }; + }(); + var pvXpuDeviceCleanup = function pvXpuDeviceCleanup(objAddress) { + objAddress = unsignedAddress(objAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + return; + } + gpuDevices["delete"](objAddress); + }; + var pvXpuDeviceLoadShaderFunc = function pvXpuDeviceLoadShaderFunc(objAddress, shaderNameAddress, statusAddress) { + objAddress = unsignedAddress(objAddress); + shaderNameAddress = unsignedAddress(shaderNameAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + setInt(statusAddress, -1); + return; + } + var memoryBufferUint8 = new Uint8Array(memory.buffer); + var shaderName = arrayBufferToStringAtIndex(memoryBufferUint8, shaderNameAddress); + if (!shaders[shaderName]) { + console.error("WebGPU device could not find shader with name ".concat(shaderName)); + setInt(statusAddress, -1); + return; + } + var shaderLoadFunc = shaders[shaderName]; + obj.shaders[shaderName] = shaderLoadFunc(obj.device); + setInt(statusAddress, 0); + }; + var pvXpuDeviceWait = /*#__PURE__*/function () { + var _ref3 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee3(objAddress, statusAddress) { + var obj; + return _regeneratorRuntime.wrap(function _callee3$(_context3) { + while (1) switch (_context3.prev = _context3.next) { + case 0: + objAddress = unsignedAddress(objAddress); + statusAddress = unsignedAddress(statusAddress); + obj = gpuDevices.get(objAddress); + if (obj) { + _context3.next = 6; + break; + } + setInt(statusAddress, -1); + return _context3.abrupt("return"); + case 6: + _context3.next = 8; + return obj.sync(); + case 8: + setInt(statusAddress, 0); + case 9: + case "end": + return _context3.stop(); + } + }, _callee3); + })); + return function pvXpuDeviceWait(_x13, _x14) { + return _ref3.apply(this, arguments); + }; + }(); + var pvXpuDeviceMemAlloc = function pvXpuDeviceMemAlloc(objAddress, memAddress, sizeBytes, isOutput, statusAddress) { + objAddress = unsignedAddress(objAddress); + memAddress = unsignedAddress(memAddress); + statusAddress = unsignedAddress(statusAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + setInt(statusAddress, -1); + return; + } + var usage = GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST; + if (isOutput) { + usage |= GPUBufferUsage.COPY_SRC; + } + gpuBuffers.set(memAddress, { + deviceAddress: objAddress, + buffer: obj.getBuffer(sizeBytes, usage) + }); + setInt(statusAddress, 0); + }; + var pvXpuDeviceMemFree = function pvXpuDeviceMemFree(memAddress) { + memAddress = unsignedAddress(memAddress); + if (gpuBuffers.has(memAddress)) { + var gpuBuffer = gpuBuffers.get(memAddress); + if (!gpuBuffer || !gpuBuffer.buffer || !gpuBuffer.deviceAddress) { + console.error('GPU buffer has not been allocated'); + return; + } + var obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + obj.releaseBuffer(gpuBuffer.buffer); + gpuBuffers["delete"](memAddress); + } + }; + var pvXpuDeviceMemCopyToXpu = function pvXpuDeviceMemCopyToXpu(memAddress, hostAddress, offset, sizeBytes) { + memAddress = unsignedAddress(memAddress); + hostAddress = unsignedAddress(hostAddress); + if (hostAddress < 0) { + console.error('Invalid host address', memAddress, hostAddress, offset, sizeBytes); + return; + } + var gpuBuffer = gpuBuffers.get(memAddress); + if (!gpuBuffer || !gpuBuffer.buffer || !gpuBuffer.deviceAddress) { + console.error('GPU buffer has not been allocated'); + return; + } + var obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + var memoryBufferUint8 = new Uint8Array(memory.buffer); + obj.writeBuffer(sizeBytes, offset, memoryBufferUint8.slice(hostAddress, hostAddress + sizeBytes), gpuBuffer.buffer); + }; + var pvXpuDeviceMemCopyFromXpu = /*#__PURE__*/function () { + var _ref4 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee4(memAddress, hostAddress, offset, sizeBytes) { + var gpuBuffer, obj, stageBuffer, mappedBuffer, memoryBufferUint8; + return _regeneratorRuntime.wrap(function _callee4$(_context4) { + while (1) switch (_context4.prev = _context4.next) { + case 0: + memAddress = unsignedAddress(memAddress); + hostAddress = unsignedAddress(hostAddress); + if (!(hostAddress < 0)) { + _context4.next = 5; + break; + } + console.error('Invalid host address', memAddress, hostAddress, offset, sizeBytes); + return _context4.abrupt("return"); + case 5: + gpuBuffer = gpuBuffers.get(memAddress); + if (gpuBuffer !== null && gpuBuffer !== void 0 && gpuBuffer.buffer) { + _context4.next = 9; + break; + } + console.error('GPU buffer has not been allocated'); + return _context4.abrupt("return"); + case 9: + obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (obj) { + _context4.next = 13; + break; + } + console.error('WebGPU device has not been initialized'); + return _context4.abrupt("return"); + case 13: + stageBuffer = obj.getBuffer((offset + sizeBytes) * Uint8Array.BYTES_PER_ELEMENT, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST); + obj.endComputePass(); + obj.commandEncoder.copyBufferToBuffer(gpuBuffer.buffer, 0, stageBuffer, 0, stageBuffer.size); + _context4.next = 18; + return obj.sync(); + case 18: + _context4.next = 20; + return stageBuffer.mapAsync(GPUMapMode.READ, 0, sizeBytes + offset); + case 20: + mappedBuffer = new Uint8Array(stageBuffer.getMappedRange(0, sizeBytes + offset)); + memoryBufferUint8 = new Uint8Array(memory.buffer); + memoryBufferUint8.set(mappedBuffer.slice(offset, sizeBytes + offset), hostAddress); + stageBuffer.unmap(); + obj.releaseBuffer(stageBuffer); + case 25: + case "end": + return _context4.stop(); + } + }, _callee4); + })); + return function pvXpuDeviceMemCopyFromXpu(_x15, _x16, _x17, _x18) { + return _ref4.apply(this, arguments); + }; + }(); + var pvXpuDeviceMemMemset = function pvXpuDeviceMemMemset(memAddress, fillByte, sizeBytes) { + memAddress = unsignedAddress(memAddress); + var gpuBuffer = gpuBuffers.get(memAddress); + if (!gpuBuffer || !gpuBuffer.buffer || !gpuBuffer.deviceAddress) { + console.error('GPU buffer has not been allocated'); + return; + } + var obj = gpuDevices.get(gpuBuffer.deviceAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + if (fillByte === 0) { + obj.endComputePass(); + obj.commandEncoder.clearBuffer(gpuBuffer.buffer, 0, sizeBytes); + obj.numCommandsEncoded++; + } else { + var stagingBuffer = new Uint8Array(sizeBytes); + stagingBuffer.fill(fillByte); + obj.writeBuffer(sizeBytes, 0, stagingBuffer, gpuBuffer.buffer); + } + }; + var pvXpuDeviceTimerStart = function pvXpuDeviceTimerStart(objAddress) { + objAddress = unsignedAddress(objAddress); + var obj = gpuDevices.get(objAddress); + if (!obj) { + console.error('WebGPU device has not been initialized'); + return; + } + obj.isTimerEnabled = true; + }; + var pvXpuDeviceTimerStop = /*#__PURE__*/function () { + var _ref5 = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime.mark(function _callee5(objAddress) { + var obj; + return _regeneratorRuntime.wrap(function _callee5$(_context5) { + while (1) switch (_context5.prev = _context5.next) { + case 0: + objAddress = unsignedAddress(objAddress); + obj = gpuDevices.get(objAddress); + if (obj) { + _context5.next = 5; + break; + } + console.error('WebGPU device has not been initialized'); + return _context5.abrupt("return"); + case 5: + _context5.next = 7; + return obj.sync(); + case 7: + obj.reportShaderTimes(); + case 8: + case "end": + return _context5.stop(); + } + }, _callee5); + })); + return function pvXpuDeviceTimerStop(_x19) { + return _ref5.apply(this, arguments); + }; + }(); + var imports = _objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread(_objectSpread({ + aligned_alloc: function aligned_alloc(alignment, size) { + throw new Error("aligned_alloc was not passed in from parent module"); + }, + pv_xpu_webgpu_device_init_wasm: pvXpuDeviceInit, + pv_xpu_webgpu_device_info_wasm: pvXpuDeviceInfo, + pv_xpu_webgpu_device_cleanup_wasm: pvXpuDeviceCleanup, + pv_xpu_webgpu_device_load_shader_func_wasm: pvXpuDeviceLoadShaderFunc, + pv_xpu_webgpu_device_wait_wasm: pvXpuDeviceWait, + pv_xpu_webgpu_device_mem_alloc_wasm: pvXpuDeviceMemAlloc, + pv_xpu_webgpu_device_mem_free_wasm: pvXpuDeviceMemFree, + pv_xpu_webgpu_device_mem_copy_to_xpu_wasm: pvXpuDeviceMemCopyToXpu, + pv_xpu_webgpu_device_mem_copy_from_xpu_wasm: pvXpuDeviceMemCopyFromXpu, + pv_xpu_webgpu_device_mem_memset_wasm: pvXpuDeviceMemMemset, + pv_xpu_webgpu_timer_start_wasm: pvXpuDeviceTimerStart, + pv_xpu_webgpu_timer_stop_wasm: pvXpuDeviceTimerStop + }, getPicollmAttentionWebGpuFunctions(memory)), getPicollmGateWebGpuFunctions(memory)), getPicollmFeedForwardWebGpuFunctions(memory)), getPicollmMoeTransformerWebGpuFunctions(memory)), getPicollmNormLayerWebGpuFunctions(memory)), getPicollmNormWebGpuFunctions(memory)), getPicollmTransformerWebGpuFunctions(memory)), getPicollmWeightFloatWebGpuFunctions(memory)), getPicollmWeightBlockMixed16x8WebGpuFunctions(memory)); + return imports; + }; + + return initXpu; + +})(); diff --git a/lib/wasm/dist/xpu_webgpu/iife/index.min.js b/lib/wasm/dist/xpu_webgpu/iife/index.min.js new file mode 100644 index 00000000..f64df4e1 --- /dev/null +++ b/lib/wasm/dist/xpu_webgpu/iife/index.min.js @@ -0,0 +1,7 @@ +var XpuWebgpu=function(){"use strict";function e(e,t,r,n,a,i,u){try{var o=e[i](u),s=o.value}catch(e){return void r(e)}o.done?t(s):Promise.resolve(s).then(n,a)}function t(t){return function(){var r=this,n=arguments;return new Promise((function(a,i){var u=t.apply(r,n);function o(t){e(u,a,i,o,s,"next",t)}function s(t){e(u,a,i,o,s,"throw",t)}o(void 0)}))}}function r(e){return r="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},r(e)}function n(e){var t=function(e,t){if("object"!=r(e)||!e)return e;var n=e[Symbol.toPrimitive];if(void 0!==n){var a=n.call(e,t||"default");if("object"!=r(a))return a;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===t?String:Number)(e)}(e,"string");return"symbol"==r(t)?t:String(t)}function a(e,t,r){return(t=n(t))in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function i(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var u={exports:{}},o={exports:{}};!function(e){function t(r){return e.exports=t="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},e.exports.__esModule=!0,e.exports.default=e.exports,t(r)}e.exports=t,e.exports.__esModule=!0,e.exports.default=e.exports}(o);var s=o.exports;!function(e){var t=s.default;function r(){e.exports=r=function(){return a},e.exports.__esModule=!0,e.exports.default=e.exports;var n,a={},i=Object.prototype,u=i.hasOwnProperty,o=Object.defineProperty||function(e,t,r){e[t]=r.value},s="function"==typeof Symbol?Symbol:{},c=s.iterator||"@@iterator",l=s.asyncIterator||"@@asyncIterator",_=s.toStringTag||"@@toStringTag";function d(e,t,r){return Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}),e[t]}try{d({},"")}catch(n){d=function(e,t,r){return e[t]=r}}function f(e,t,r,n){var a=t&&t.prototype instanceof m?t:m,i=Object.create(a.prototype),u=new O(n||[]);return o(i,"_invoke",{value:z(e,r,u)}),i}function b(e,t,r){try{return{type:"normal",arg:e.call(t,r)}}catch(e){return{type:"throw",arg:e}}}a.wrap=f;var p="suspendedStart",g="suspendedYield",v="executing",x="completed",k={};function m(){}function h(){}function y(){}var B={};d(B,c,(function(){return this}));var w=Object.getPrototypeOf,P=w&&w(w(G([])));P&&P!==i&&u.call(P,c)&&(B=P);var S=y.prototype=m.prototype=Object.create(B);function E(e){["next","throw","return"].forEach((function(t){d(e,t,(function(e){return this._invoke(t,e)}))}))}function U(e,r){function n(a,i,o,s){var c=b(e[a],e,i);if("throw"!==c.type){var l=c.arg,_=l.value;return _&&"object"==t(_)&&u.call(_,"__await")?r.resolve(_.__await).then((function(e){n("next",e,o,s)}),(function(e){n("throw",e,o,s)})):r.resolve(_).then((function(e){l.value=e,o(l)}),(function(e){return n("throw",e,o,s)}))}s(c.arg)}var a;o(this,"_invoke",{value:function(e,t){function i(){return new r((function(r,a){n(e,t,r,a)}))}return a=a?a.then(i,i):i()}})}function z(e,t,r){var a=p;return function(i,u){if(a===v)throw new Error("Generator is already running");if(a===x){if("throw"===i)throw u;return{value:n,done:!0}}for(r.method=i,r.arg=u;;){var o=r.delegate;if(o){var s=j(o,r);if(s){if(s===k)continue;return s}}if("next"===r.method)r.sent=r._sent=r.arg;else if("throw"===r.method){if(a===p)throw a=x,r.arg;r.dispatchException(r.arg)}else"return"===r.method&&r.abrupt("return",r.arg);a=v;var c=b(e,t,r);if("normal"===c.type){if(a=r.done?x:g,c.arg===k)continue;return{value:c.arg,done:r.done}}"throw"===c.type&&(a=x,r.method="throw",r.arg=c.arg)}}}function j(e,t){var r=t.method,a=e.iterator[r];if(a===n)return t.delegate=null,"throw"===r&&e.iterator.return&&(t.method="return",t.arg=n,j(e,t),"throw"===t.method)||"return"!==r&&(t.method="throw",t.arg=new TypeError("The iterator does not provide a '"+r+"' method")),k;var i=b(a,e.iterator,t.arg);if("throw"===i.type)return t.method="throw",t.arg=i.arg,t.delegate=null,k;var u=i.arg;return u?u.done?(t[e.resultName]=u.value,t.next=e.nextLoc,"return"!==t.method&&(t.method="next",t.arg=n),t.delegate=null,k):u:(t.method="throw",t.arg=new TypeError("iterator result is not an object"),t.delegate=null,k)}function M(e){var t={tryLoc:e[0]};1 in e&&(t.catchLoc=e[1]),2 in e&&(t.finallyLoc=e[2],t.afterLoc=e[3]),this.tryEntries.push(t)}function C(e){var t=e.completion||{};t.type="normal",delete t.arg,e.completion=t}function O(e){this.tryEntries=[{tryLoc:"root"}],e.forEach(M,this),this.reset(!0)}function G(e){if(e||""===e){var r=e[c];if(r)return r.call(e);if("function"==typeof e.next)return e;if(!isNaN(e.length)){var a=-1,i=function t(){for(;++a=0;--a){var i=this.tryEntries[a],o=i.completion;if("root"===i.tryLoc)return r("end");if(i.tryLoc<=this.prev){var s=u.call(i,"catchLoc"),c=u.call(i,"finallyLoc");if(s&&c){if(this.prev=0;--r){var n=this.tryEntries[r];if(n.tryLoc<=this.prev&&u.call(n,"finallyLoc")&&this.prev=0;--t){var r=this.tryEntries[t];if(r.finallyLoc===e)return this.complete(r.completion,r.afterLoc),C(r),k}},catch:function(e){for(var t=this.tryEntries.length-1;t>=0;--t){var r=this.tryEntries[t];if(r.tryLoc===e){var n=r.completion;if("throw"===n.type){var a=n.arg;C(r)}return a}}throw new Error("illegal catch attempt")},delegateYield:function(e,t,r){return this.delegate={iterator:G(e),resultName:t,nextLoc:r},"next"===this.method&&(this.arg=n),k}},a}e.exports=r,e.exports.__esModule=!0,e.exports.default=e.exports}(u);var c=(0,u.exports)(),l=c;try{regeneratorRuntime=c}catch(e){"object"==typeof globalThis?globalThis.regeneratorRuntime=c:Function("r","regeneratorRuntime = r")(c)}var _=i(l);const d={"Amazon Silk":"amazon_silk","Android Browser":"android",Bada:"bada",BlackBerry:"blackberry",Chrome:"chrome",Chromium:"chromium",Electron:"electron",Epiphany:"epiphany",Firefox:"firefox",Focus:"focus",Generic:"generic","Google Search":"google_search",Googlebot:"googlebot","Internet Explorer":"ie","K-Meleon":"k_meleon",Maxthon:"maxthon","Microsoft Edge":"edge","MZ Browser":"mz","NAVER Whale Browser":"naver",Opera:"opera","Opera Coast":"opera_coast",PhantomJS:"phantomjs",Puffin:"puffin",QupZilla:"qupzilla",QQ:"qq",QQLite:"qqlite",Safari:"safari",Sailfish:"sailfish","Samsung Internet for Android":"samsung_internet",SeaMonkey:"seamonkey",Sleipnir:"sleipnir",Swing:"swing",Tizen:"tizen","UC Browser":"uc",Vivaldi:"vivaldi","WebOS Browser":"webos",WeChat:"wechat","Yandex Browser":"yandex",Roku:"roku"},f={amazon_silk:"Amazon Silk",android:"Android Browser",bada:"Bada",blackberry:"BlackBerry",chrome:"Chrome",chromium:"Chromium",electron:"Electron",epiphany:"Epiphany",firefox:"Firefox",focus:"Focus",generic:"Generic",googlebot:"Googlebot",google_search:"Google Search",ie:"Internet Explorer",k_meleon:"K-Meleon",maxthon:"Maxthon",edge:"Microsoft Edge",mz:"MZ Browser",naver:"NAVER Whale Browser",opera:"Opera",opera_coast:"Opera Coast",phantomjs:"PhantomJS",puffin:"Puffin",qupzilla:"QupZilla",qq:"QQ Browser",qqlite:"QQ Browser Lite",safari:"Safari",sailfish:"Sailfish",samsung_internet:"Samsung Internet for Android",seamonkey:"SeaMonkey",sleipnir:"Sleipnir",swing:"Swing",tizen:"Tizen",uc:"UC Browser",vivaldi:"Vivaldi",webos:"WebOS Browser",wechat:"WeChat",yandex:"Yandex Browser"},b={tablet:"tablet",mobile:"mobile",desktop:"desktop",tv:"tv"},p={WindowsPhone:"Windows Phone",Windows:"Windows",MacOS:"macOS",iOS:"iOS",Android:"Android",WebOS:"WebOS",BlackBerry:"BlackBerry",Bada:"Bada",Tizen:"Tizen",Linux:"Linux",ChromeOS:"Chrome OS",PlayStation4:"PlayStation 4",Roku:"Roku"},g={EdgeHTML:"EdgeHTML",Blink:"Blink",Trident:"Trident",Presto:"Presto",Gecko:"Gecko",WebKit:"WebKit"};class v{static getFirstMatch(e,t){const r=t.match(e);return r&&r.length>0&&r[1]||""}static getSecondMatch(e,t){const r=t.match(e);return r&&r.length>1&&r[2]||""}static matchAndReturnConst(e,t,r){if(e.test(t))return r}static getWindowsVersionName(e){switch(e){case"NT":return"NT";case"XP":case"NT 5.1":return"XP";case"NT 5.0":return"2000";case"NT 5.2":return"2003";case"NT 6.0":return"Vista";case"NT 6.1":return"7";case"NT 6.2":return"8";case"NT 6.3":return"8.1";case"NT 10.0":return"10";default:return}}static getMacOSVersionName(e){const t=e.split(".").splice(0,2).map((e=>parseInt(e,10)||0));if(t.push(0),10===t[0])switch(t[1]){case 5:return"Leopard";case 6:return"Snow Leopard";case 7:return"Lion";case 8:return"Mountain Lion";case 9:return"Mavericks";case 10:return"Yosemite";case 11:return"El Capitan";case 12:return"Sierra";case 13:return"High Sierra";case 14:return"Mojave";case 15:return"Catalina";default:return}}static getAndroidVersionName(e){const t=e.split(".").splice(0,2).map((e=>parseInt(e,10)||0));if(t.push(0),!(1===t[0]&&t[1]<5))return 1===t[0]&&t[1]<6?"Cupcake":1===t[0]&&t[1]>=6?"Donut":2===t[0]&&t[1]<2?"Eclair":2===t[0]&&2===t[1]?"Froyo":2===t[0]&&t[1]>2?"Gingerbread":3===t[0]?"Honeycomb":4===t[0]&&t[1]<1?"Ice Cream Sandwich":4===t[0]&&t[1]<4?"Jelly Bean":4===t[0]&&t[1]>=4?"KitKat":5===t[0]?"Lollipop":6===t[0]?"Marshmallow":7===t[0]?"Nougat":8===t[0]?"Oreo":9===t[0]?"Pie":void 0}static getVersionPrecision(e){return e.split(".").length}static compareVersions(e,t,r=!1){const n=v.getVersionPrecision(e),a=v.getVersionPrecision(t);let i=Math.max(n,a),u=0;const o=v.map([e,t],(e=>{const t=i-v.getVersionPrecision(e),r=e+new Array(t+1).join(".0");return v.map(r.split("."),(e=>new Array(20-e.length).join("0")+e)).reverse()}));for(r&&(u=i-Math.min(n,a)),i-=1;i>=u;){if(o[0][i]>o[1][i])return 1;if(o[0][i]===o[1][i]){if(i===u)return 0;i-=1}else if(o[0][i]{r[t]=e[t]}))}}return e}static getBrowserAlias(e){return d[e]}static getBrowserTypeByAlias(e){return f[e]||""}}const x=/version\/(\d+(\.?_?\d+)+)/i,k=[{test:[/googlebot/i],describe(e){const t={name:"Googlebot"},r=v.getFirstMatch(/googlebot\/(\d+(\.\d+))/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/opera/i],describe(e){const t={name:"Opera"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:opera)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/opr\/|opios/i],describe(e){const t={name:"Opera"},r=v.getFirstMatch(/(?:opr|opios)[\s/](\S+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/SamsungBrowser/i],describe(e){const t={name:"Samsung Internet for Android"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:SamsungBrowser)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/Whale/i],describe(e){const t={name:"NAVER Whale Browser"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:whale)[\s/](\d+(?:\.\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/MZBrowser/i],describe(e){const t={name:"MZ Browser"},r=v.getFirstMatch(/(?:MZBrowser)[\s/](\d+(?:\.\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/focus/i],describe(e){const t={name:"Focus"},r=v.getFirstMatch(/(?:focus)[\s/](\d+(?:\.\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/swing/i],describe(e){const t={name:"Swing"},r=v.getFirstMatch(/(?:swing)[\s/](\d+(?:\.\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/coast/i],describe(e){const t={name:"Opera Coast"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:coast)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/opt\/\d+(?:.?_?\d+)+/i],describe(e){const t={name:"Opera Touch"},r=v.getFirstMatch(/(?:opt)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/yabrowser/i],describe(e){const t={name:"Yandex Browser"},r=v.getFirstMatch(/(?:yabrowser)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/ucbrowser/i],describe(e){const t={name:"UC Browser"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:ucbrowser)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/Maxthon|mxios/i],describe(e){const t={name:"Maxthon"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:Maxthon|mxios)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/epiphany/i],describe(e){const t={name:"Epiphany"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:epiphany)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/puffin/i],describe(e){const t={name:"Puffin"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:puffin)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/sleipnir/i],describe(e){const t={name:"Sleipnir"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:sleipnir)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/k-meleon/i],describe(e){const t={name:"K-Meleon"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/(?:k-meleon)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/micromessenger/i],describe(e){const t={name:"WeChat"},r=v.getFirstMatch(/(?:micromessenger)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/qqbrowser/i],describe(e){const t={name:/qqbrowserlite/i.test(e)?"QQ Browser Lite":"QQ Browser"},r=v.getFirstMatch(/(?:qqbrowserlite|qqbrowser)[/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/msie|trident/i],describe(e){const t={name:"Internet Explorer"},r=v.getFirstMatch(/(?:msie |rv:)(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/\sedg\//i],describe(e){const t={name:"Microsoft Edge"},r=v.getFirstMatch(/\sedg\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/edg([ea]|ios)/i],describe(e){const t={name:"Microsoft Edge"},r=v.getSecondMatch(/edg([ea]|ios)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/vivaldi/i],describe(e){const t={name:"Vivaldi"},r=v.getFirstMatch(/vivaldi\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/seamonkey/i],describe(e){const t={name:"SeaMonkey"},r=v.getFirstMatch(/seamonkey\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/sailfish/i],describe(e){const t={name:"Sailfish"},r=v.getFirstMatch(/sailfish\s?browser\/(\d+(\.\d+)?)/i,e);return r&&(t.version=r),t}},{test:[/silk/i],describe(e){const t={name:"Amazon Silk"},r=v.getFirstMatch(/silk\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/phantom/i],describe(e){const t={name:"PhantomJS"},r=v.getFirstMatch(/phantomjs\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/slimerjs/i],describe(e){const t={name:"SlimerJS"},r=v.getFirstMatch(/slimerjs\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(e){const t={name:"BlackBerry"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/blackberry[\d]+\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/(web|hpw)[o0]s/i],describe(e){const t={name:"WebOS Browser"},r=v.getFirstMatch(x,e)||v.getFirstMatch(/w(?:eb)?[o0]sbrowser\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/bada/i],describe(e){const t={name:"Bada"},r=v.getFirstMatch(/dolfin\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/tizen/i],describe(e){const t={name:"Tizen"},r=v.getFirstMatch(/(?:tizen\s?)?browser\/(\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/qupzilla/i],describe(e){const t={name:"QupZilla"},r=v.getFirstMatch(/(?:qupzilla)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/firefox|iceweasel|fxios/i],describe(e){const t={name:"Firefox"},r=v.getFirstMatch(/(?:firefox|iceweasel|fxios)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/electron/i],describe(e){const t={name:"Electron"},r=v.getFirstMatch(/(?:electron)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/MiuiBrowser/i],describe(e){const t={name:"Miui"},r=v.getFirstMatch(/(?:MiuiBrowser)[\s/](\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/chromium/i],describe(e){const t={name:"Chromium"},r=v.getFirstMatch(/(?:chromium)[\s/](\d+(\.?_?\d+)+)/i,e)||v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/chrome|crios|crmo/i],describe(e){const t={name:"Chrome"},r=v.getFirstMatch(/(?:chrome|crios|crmo)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/GSA/i],describe(e){const t={name:"Google Search"},r=v.getFirstMatch(/(?:GSA)\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test(e){const t=!e.test(/like android/i),r=e.test(/android/i);return t&&r},describe(e){const t={name:"Android Browser"},r=v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/playstation 4/i],describe(e){const t={name:"PlayStation 4"},r=v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/safari|applewebkit/i],describe(e){const t={name:"Safari"},r=v.getFirstMatch(x,e);return r&&(t.version=r),t}},{test:[/.*/i],describe(e){const t=-1!==e.search("\\(")?/^(.*)\/(.*)[ \t]\((.*)/:/^(.*)\/(.*) /;return{name:v.getFirstMatch(t,e),version:v.getSecondMatch(t,e)}}}];var m=[{test:[/Roku\/DVP/],describe(e){const t=v.getFirstMatch(/Roku\/DVP-(\d+\.\d+)/i,e);return{name:p.Roku,version:t}}},{test:[/windows phone/i],describe(e){const t=v.getFirstMatch(/windows phone (?:os)?\s?(\d+(\.\d+)*)/i,e);return{name:p.WindowsPhone,version:t}}},{test:[/windows /i],describe(e){const t=v.getFirstMatch(/Windows ((NT|XP)( \d\d?.\d)?)/i,e),r=v.getWindowsVersionName(t);return{name:p.Windows,version:t,versionName:r}}},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe(e){const t={name:p.iOS},r=v.getSecondMatch(/(Version\/)(\d[\d.]+)/,e);return r&&(t.version=r),t}},{test:[/macintosh/i],describe(e){const t=v.getFirstMatch(/mac os x (\d+(\.?_?\d+)+)/i,e).replace(/[_\s]/g,"."),r=v.getMacOSVersionName(t),n={name:p.MacOS,version:t};return r&&(n.versionName=r),n}},{test:[/(ipod|iphone|ipad)/i],describe(e){const t=v.getFirstMatch(/os (\d+([_\s]\d+)*) like mac os x/i,e).replace(/[_\s]/g,".");return{name:p.iOS,version:t}}},{test(e){const t=!e.test(/like android/i),r=e.test(/android/i);return t&&r},describe(e){const t=v.getFirstMatch(/android[\s/-](\d+(\.\d+)*)/i,e),r=v.getAndroidVersionName(t),n={name:p.Android,version:t};return r&&(n.versionName=r),n}},{test:[/(web|hpw)[o0]s/i],describe(e){const t=v.getFirstMatch(/(?:web|hpw)[o0]s\/(\d+(\.\d+)*)/i,e),r={name:p.WebOS};return t&&t.length&&(r.version=t),r}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(e){const t=v.getFirstMatch(/rim\stablet\sos\s(\d+(\.\d+)*)/i,e)||v.getFirstMatch(/blackberry\d+\/(\d+([_\s]\d+)*)/i,e)||v.getFirstMatch(/\bbb(\d+)/i,e);return{name:p.BlackBerry,version:t}}},{test:[/bada/i],describe(e){const t=v.getFirstMatch(/bada\/(\d+(\.\d+)*)/i,e);return{name:p.Bada,version:t}}},{test:[/tizen/i],describe(e){const t=v.getFirstMatch(/tizen[/\s](\d+(\.\d+)*)/i,e);return{name:p.Tizen,version:t}}},{test:[/linux/i],describe:()=>({name:p.Linux})},{test:[/CrOS/],describe:()=>({name:p.ChromeOS})},{test:[/PlayStation 4/],describe(e){const t=v.getFirstMatch(/PlayStation 4[/\s](\d+(\.\d+)*)/i,e);return{name:p.PlayStation4,version:t}}}],h=[{test:[/googlebot/i],describe:()=>({type:"bot",vendor:"Google"})},{test:[/huawei/i],describe(e){const t=v.getFirstMatch(/(can-l01)/i,e)&&"Nova",r={type:b.mobile,vendor:"Huawei"};return t&&(r.model=t),r}},{test:[/nexus\s*(?:7|8|9|10).*/i],describe:()=>({type:b.tablet,vendor:"Nexus"})},{test:[/ipad/i],describe:()=>({type:b.tablet,vendor:"Apple",model:"iPad"})},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe:()=>({type:b.tablet,vendor:"Apple",model:"iPad"})},{test:[/kftt build/i],describe:()=>({type:b.tablet,vendor:"Amazon",model:"Kindle Fire HD 7"})},{test:[/silk/i],describe:()=>({type:b.tablet,vendor:"Amazon"})},{test:[/tablet(?! pc)/i],describe:()=>({type:b.tablet})},{test(e){const t=e.test(/ipod|iphone/i),r=e.test(/like (ipod|iphone)/i);return t&&!r},describe(e){const t=v.getFirstMatch(/(ipod|iphone)/i,e);return{type:b.mobile,vendor:"Apple",model:t}}},{test:[/nexus\s*[0-6].*/i,/galaxy nexus/i],describe:()=>({type:b.mobile,vendor:"Nexus"})},{test:[/[^-]mobi/i],describe:()=>({type:b.mobile})},{test:e=>"blackberry"===e.getBrowserName(!0),describe:()=>({type:b.mobile,vendor:"BlackBerry"})},{test:e=>"bada"===e.getBrowserName(!0),describe:()=>({type:b.mobile})},{test:e=>"windows phone"===e.getBrowserName(),describe:()=>({type:b.mobile,vendor:"Microsoft"})},{test(e){const t=Number(String(e.getOSVersion()).split(".")[0]);return"android"===e.getOSName(!0)&&t>=3},describe:()=>({type:b.tablet})},{test:e=>"android"===e.getOSName(!0),describe:()=>({type:b.mobile})},{test:e=>"macos"===e.getOSName(!0),describe:()=>({type:b.desktop,vendor:"Apple"})},{test:e=>"windows"===e.getOSName(!0),describe:()=>({type:b.desktop})},{test:e=>"linux"===e.getOSName(!0),describe:()=>({type:b.desktop})},{test:e=>"playstation 4"===e.getOSName(!0),describe:()=>({type:b.tv})},{test:e=>"roku"===e.getOSName(!0),describe:()=>({type:b.tv})}],y=[{test:e=>"microsoft edge"===e.getBrowserName(!0),describe(e){if(/\sedg\//i.test(e))return{name:g.Blink};const t=v.getFirstMatch(/edge\/(\d+(\.?_?\d+)+)/i,e);return{name:g.EdgeHTML,version:t}}},{test:[/trident/i],describe(e){const t={name:g.Trident},r=v.getFirstMatch(/trident\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:e=>e.test(/presto/i),describe(e){const t={name:g.Presto},r=v.getFirstMatch(/presto\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test(e){const t=e.test(/gecko/i),r=e.test(/like gecko/i);return t&&!r},describe(e){const t={name:g.Gecko},r=v.getFirstMatch(/gecko\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}},{test:[/(apple)?webkit\/537\.36/i],describe:()=>({name:g.Blink})},{test:[/(apple)?webkit/i],describe(e){const t={name:g.WebKit},r=v.getFirstMatch(/webkit\/(\d+(\.?_?\d+)+)/i,e);return r&&(t.version=r),t}}];class B{constructor(e,t=!1){if(null==e||""===e)throw new Error("UserAgent parameter can't be empty");this._ua=e,this.parsedResult={},!0!==t&&this.parse()}getUA(){return this._ua}test(e){return e.test(this._ua)}parseBrowser(){this.parsedResult.browser={};const e=v.find(k,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.browser=e.describe(this.getUA())),this.parsedResult.browser}getBrowser(){return this.parsedResult.browser?this.parsedResult.browser:this.parseBrowser()}getBrowserName(e){return e?String(this.getBrowser().name).toLowerCase()||"":this.getBrowser().name||""}getBrowserVersion(){return this.getBrowser().version}getOS(){return this.parsedResult.os?this.parsedResult.os:this.parseOS()}parseOS(){this.parsedResult.os={};const e=v.find(m,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.os=e.describe(this.getUA())),this.parsedResult.os}getOSName(e){const{name:t}=this.getOS();return e?String(t).toLowerCase()||"":t||""}getOSVersion(){return this.getOS().version}getPlatform(){return this.parsedResult.platform?this.parsedResult.platform:this.parsePlatform()}getPlatformType(e=!1){const{type:t}=this.getPlatform();return e?String(t).toLowerCase()||"":t||""}parsePlatform(){this.parsedResult.platform={};const e=v.find(h,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.platform=e.describe(this.getUA())),this.parsedResult.platform}getEngine(){return this.parsedResult.engine?this.parsedResult.engine:this.parseEngine()}getEngineName(e){return e?String(this.getEngine().name).toLowerCase()||"":this.getEngine().name||""}parseEngine(){this.parsedResult.engine={};const e=v.find(y,(e=>{if("function"==typeof e.test)return e.test(this);if(e.test instanceof Array)return e.test.some((e=>this.test(e)));throw new Error("Browser's test function is not valid")}));return e&&(this.parsedResult.engine=e.describe(this.getUA())),this.parsedResult.engine}parse(){return this.parseBrowser(),this.parseOS(),this.parsePlatform(),this.parseEngine(),this}getResult(){return v.assign({},this.parsedResult)}satisfies(e){const t={};let r=0;const n={};let a=0;if(Object.keys(e).forEach((i=>{const u=e[i];"string"==typeof u?(n[i]=u,a+=1):"object"==typeof u&&(t[i]=u,r+=1)})),r>0){const e=Object.keys(t),r=v.find(e,(e=>this.isOS(e)));if(r){const e=this.satisfies(t[r]);if(void 0!==e)return e}const n=v.find(e,(e=>this.isPlatform(e)));if(n){const e=this.satisfies(t[n]);if(void 0!==e)return e}}if(a>0){const e=Object.keys(n),t=v.find(e,(e=>this.isBrowser(e,!0)));if(void 0!==t)return this.compareVersion(n[t])}}isBrowser(e,t=!1){const r=this.getBrowserName().toLowerCase();let n=e.toLowerCase();const a=v.getBrowserTypeByAlias(n);return t&&a&&(n=a.toLowerCase()),n===r}compareVersion(e){let t=[0],r=e,n=!1;const a=this.getBrowserVersion();if("string"==typeof a)return">"===e[0]||"<"===e[0]?(r=e.substr(1),"="===e[1]?(n=!0,r=e.substr(2)):t=[],">"===e[0]?t.push(1):t.push(-1)):"="===e[0]?r=e.substr(1):"~"===e[0]&&(n=!0,r=e.substr(1)),t.indexOf(v.compareVersions(a,r,n))>-1}isOS(e){return this.getOSName(!0)===String(e).toLowerCase()}isPlatform(e){return this.getPlatformType(!0)===String(e).toLowerCase()}isEngine(e){return this.getEngineName(!0)===String(e).toLowerCase()}is(e,t=!1){return this.isBrowser(e,t)||this.isOS(e)||this.isPlatform(e)}some(e=[]){return e.some((e=>this.is(e)))}} +/*! + * Bowser - a browser detector + * https://github.com/lancedikson/bowser + * MIT License | (c) Dustin Diaz 2012-2015 + * MIT License | (c) Denis Demchenko 2015-2019 + */class w{static getParser(e,t=!1){if("string"!=typeof e)throw new Error("UserAgent should be a string");return new B(e,t)}static parse(e){return new B(e).getResult()}static get BROWSER_MAP(){return f}static get ENGINE_MAP(){return g}static get OS_MAP(){return p}static get PLATFORMS_MAP(){return b}}var P={exports:{}},S={exports:{}};!function(e){function t(r){return e.exports=t="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},e.exports.__esModule=!0,e.exports.default=e.exports,t(r)}e.exports=t,e.exports.__esModule=!0,e.exports.default=e.exports}(S);var E=S.exports;!function(e){var t=E.default;function r(){e.exports=r=function(){return a},e.exports.__esModule=!0,e.exports.default=e.exports;var n,a={},i=Object.prototype,u=i.hasOwnProperty,o=Object.defineProperty||function(e,t,r){e[t]=r.value},s="function"==typeof Symbol?Symbol:{},c=s.iterator||"@@iterator",l=s.asyncIterator||"@@asyncIterator",_=s.toStringTag||"@@toStringTag";function d(e,t,r){return Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}),e[t]}try{d({},"")}catch(n){d=function(e,t,r){return e[t]=r}}function f(e,t,r,n){var a=t&&t.prototype instanceof m?t:m,i=Object.create(a.prototype),u=new O(n||[]);return o(i,"_invoke",{value:z(e,r,u)}),i}function b(e,t,r){try{return{type:"normal",arg:e.call(t,r)}}catch(e){return{type:"throw",arg:e}}}a.wrap=f;var p="suspendedStart",g="suspendedYield",v="executing",x="completed",k={};function m(){}function h(){}function y(){}var B={};d(B,c,(function(){return this}));var w=Object.getPrototypeOf,P=w&&w(w(G([])));P&&P!==i&&u.call(P,c)&&(B=P);var S=y.prototype=m.prototype=Object.create(B);function E(e){["next","throw","return"].forEach((function(t){d(e,t,(function(e){return this._invoke(t,e)}))}))}function U(e,r){function n(a,i,o,s){var c=b(e[a],e,i);if("throw"!==c.type){var l=c.arg,_=l.value;return _&&"object"==t(_)&&u.call(_,"__await")?r.resolve(_.__await).then((function(e){n("next",e,o,s)}),(function(e){n("throw",e,o,s)})):r.resolve(_).then((function(e){l.value=e,o(l)}),(function(e){return n("throw",e,o,s)}))}s(c.arg)}var a;o(this,"_invoke",{value:function(e,t){function i(){return new r((function(r,a){n(e,t,r,a)}))}return a=a?a.then(i,i):i()}})}function z(e,t,r){var a=p;return function(i,u){if(a===v)throw new Error("Generator is already running");if(a===x){if("throw"===i)throw u;return{value:n,done:!0}}for(r.method=i,r.arg=u;;){var o=r.delegate;if(o){var s=j(o,r);if(s){if(s===k)continue;return s}}if("next"===r.method)r.sent=r._sent=r.arg;else if("throw"===r.method){if(a===p)throw a=x,r.arg;r.dispatchException(r.arg)}else"return"===r.method&&r.abrupt("return",r.arg);a=v;var c=b(e,t,r);if("normal"===c.type){if(a=r.done?x:g,c.arg===k)continue;return{value:c.arg,done:r.done}}"throw"===c.type&&(a=x,r.method="throw",r.arg=c.arg)}}}function j(e,t){var r=t.method,a=e.iterator[r];if(a===n)return t.delegate=null,"throw"===r&&e.iterator.return&&(t.method="return",t.arg=n,j(e,t),"throw"===t.method)||"return"!==r&&(t.method="throw",t.arg=new TypeError("The iterator does not provide a '"+r+"' method")),k;var i=b(a,e.iterator,t.arg);if("throw"===i.type)return t.method="throw",t.arg=i.arg,t.delegate=null,k;var u=i.arg;return u?u.done?(t[e.resultName]=u.value,t.next=e.nextLoc,"return"!==t.method&&(t.method="next",t.arg=n),t.delegate=null,k):u:(t.method="throw",t.arg=new TypeError("iterator result is not an object"),t.delegate=null,k)}function M(e){var t={tryLoc:e[0]};1 in e&&(t.catchLoc=e[1]),2 in e&&(t.finallyLoc=e[2],t.afterLoc=e[3]),this.tryEntries.push(t)}function C(e){var t=e.completion||{};t.type="normal",delete t.arg,e.completion=t}function O(e){this.tryEntries=[{tryLoc:"root"}],e.forEach(M,this),this.reset(!0)}function G(e){if(e||""===e){var r=e[c];if(r)return r.call(e);if("function"==typeof e.next)return e;if(!isNaN(e.length)){var a=-1,i=function t(){for(;++a=0;--a){var i=this.tryEntries[a],o=i.completion;if("root"===i.tryLoc)return r("end");if(i.tryLoc<=this.prev){var s=u.call(i,"catchLoc"),c=u.call(i,"finallyLoc");if(s&&c){if(this.prev=0;--r){var n=this.tryEntries[r];if(n.tryLoc<=this.prev&&u.call(n,"finallyLoc")&&this.prev=0;--t){var r=this.tryEntries[t];if(r.finallyLoc===e)return this.complete(r.completion,r.afterLoc),C(r),k}},catch:function(e){for(var t=this.tryEntries.length-1;t>=0;--t){var r=this.tryEntries[t];if(r.tryLoc===e){var n=r.completion;if("throw"===n.type){var a=n.arg;C(r)}return a}}throw new Error("illegal catch attempt")},delegateYield:function(e,t,r){return this.delegate={iterator:G(e),resultName:t,nextLoc:r},"next"===this.method&&(this.arg=n),k}},a}e.exports=r,e.exports.__esModule=!0,e.exports.default=e.exports}(P);var U=(0,P.exports)();try{regeneratorRuntime=U}catch(e){"object"==typeof globalThis?globalThis.regeneratorRuntime=U:Function("r","regeneratorRuntime = r")(U)}function z(e){return z="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},z(e)}function j(e){var t=function(e,t){if("object"!==z(e)||null===e)return e;var r=e[Symbol.toPrimitive];if(void 0!==r){var n=r.call(e,t||"default");if("object"!==z(n))return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===t?String:Number)(e)}(e,"string");return"symbol"===z(t)?t:String(t)}function M(e,t){for(var r=0;r>>0:e}function O(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r=e.length?{done:!0}:{done:!1,value:e[n++]}},e:function(e){throw e},f:a}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var i,u=!0,o=!1;return{s:function(){r=r.call(e)},n:function(){var e=r.next();return u=e.done,e},e:function(e){o=!0,i=e},f:function(){try{u||null==r.return||r.return()}finally{if(o)throw i}}}}function R(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r=16&&(this.endComputePass(),this.flushCommandEncoder())}},{key:"endComputePass",value:function(){this._passEncoder&&(this._passEncoder.end(),this._passEncoder=null)}},{key:"getBuffer",value:function(e,t){var r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0,a=this.getBufferKey(e,t);if(this.bufferReusePool.has(a)){var i=this.bufferReusePool.get(a);if(i&&i.length>0)return i.pop()}return this.device.createBuffer({size:e*Uint8Array.BYTES_PER_ELEMENT,usage:t,mappedAtCreation:r,label:n})}},{key:"scheduleUniformBufferForRelease",value:function(e){this._uniformBuffersPendingRelease.push(e)}},{key:"releaseBuffer",value:function(e){(!(arguments.length>1&&void 0!==arguments[1])||arguments[1])&&(this.endComputePass(),this.commandEncoder.clearBuffer(e,0,e.size));var t=this.getBufferKey(e.size,e.usage);this.bufferReusePool.has(t)||this.bufferReusePool.set(t,[]),this.bufferReusePool.get(t).push(e)}},{key:"sync",value:(u=t(_.mark((function e(){var t,r,n,a,i,u,o,s,c,l,d=this;return _.wrap((function(e){for(;;)switch(e.prev=e.next){case 0:return this.flushCommandEncoder(),e.next=3,this.device.queue.onSubmittedWorkDone();case 3:t=L(this.bufferReusePool.keys());try{for(t.s();!(r=t.n()).done;)if(n=r.value,(a=this.bufferReusePool.get(n))&&a.length>0){i=L(a);try{for(i.s();!(u=i.n()).done;)null==(o=u.value)||o.destroy()}catch(e){i.e(e)}finally{i.f()}}}catch(e){t.e(e)}finally{t.f()}this.bufferReusePool.clear(),s=_.mark((function e(){var t,r,n,a,i,u;return _.wrap((function(e){for(;;)switch(e.prev=e.next){case 0:t=G(l[c],2),r=t[0],n=t[1],d.shaderTimes[r]||(d.shaderTimes[r]=[]),a=L(n),e.prev=3,u=_.mark((function e(){var t;return _.wrap((function(e){for(;;)switch(e.prev=e.next){case 0:(t=i.value).mapAsync(GPUMapMode.READ).then((function(){var e=new BigInt64Array(t.getMappedRange()),n=e[1]-e[0];t.unmap(),t.destroy(),d.shaderTimes[r].push(n)}));case 2:case"end":return e.stop()}}),e)})),a.s();case 6:if((i=a.n()).done){e.next=10;break}return e.delegateYield(u(),"t0",8);case 8:e.next=6;break;case 10:e.next=15;break;case 12:e.prev=12,e.t1=e.catch(3),a.e(e.t1);case 15:return e.prev=15,a.f(),e.finish(15);case 18:case"end":return e.stop()}}),e,null,[[3,12,15,18]])})),c=0,l=Object.entries(this.timestampBuffers);case 8:if(!(c args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n".concat(e?"\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3) \nvar y: array;\n":" \n@group(0) @binding(2)\nvar x: array;\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\nvar sdata: array, workgroup_size_x>;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n let tid = local_id.x;\n let m = workgroup_id.x;\n let block_size = workgroup_size_x;\n \n var power_vec: vec4;\n let x_start: u32 = args.x_offset + (m * args.dimension);\n let skip = tid * 4;\n let shift = (block_size * 4);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = x_start + j + skip; \n\n let x_vec = vec4(\n x[local_index],\n x[local_index + 1],\n x[local_index + 2],\n x[local_index + 3]);\n \n power_vec += x_vec * x_vec; \n } \n \n if (tid == 0 && args.remainder > 0) {\n var remainder_vec = vec4(0.0, 0.0, 0.0, 0.0);\n let x_idx = x_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) { \n remainder_vec[j] = x[x_idx + j];\n } \n power_vec += remainder_vec * remainder_vec;\n }\n \n sdata[tid] = power_vec;\n workgroupBarrier();\n\n ").concat("\n for (var s: u32 = workgroup_size_x / 2; s > 0; s >>= 1) {\n if tid < s {\n sdata[tid] += sdata[tid + s];\n }\n workgroupBarrier();\n }\n","\n \n let power = sdata[0].x + sdata[0].y + sdata[0].z + sdata[0].w;\n let norm: vec4 = vec4(1.0 / sqrt((power / f32(args.dimension)) + args.eps));\n \n let y_start: u32 = args.y_offset + (m * args.dimension);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = j + skip;\n let x_idx = x_start + local_index;\n let x_vec = vec4(\n x[x_idx],\n x[x_idx + 1],\n x[x_idx + 2],\n x[x_idx + 3]);\n \n let weight_vec = vec4(\n weight[local_index],\n weight[local_index + 1],\n weight[local_index + 2],\n weight[local_index + 3]);\n let y_vec = x_vec * norm * weight_vec;\n \n let y_idx = y_start + local_index;\n").concat(e?" \n y[y_idx] = y_vec.x;\n y[y_idx + 1] = y_vec.y;\n y[y_idx + 2] = y_vec.z;\n y[y_idx + 3] = y_vec.w;\n":" \n x[y_idx] = y_vec.x;\n x[y_idx + 1] = y_vec.y;\n x[y_idx + 2] = y_vec.z;\n x[y_idx + 3] = y_vec.w;\n"," \n }\n \n if (tid == 0 && args.remainder > 0) {\n let x_idx = x_start + args.remainder_start;\n let weight_idx = args.remainder_start; \n let y_idx = y_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) {\n").concat(e?" \n y[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n":" \n x[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n"," \n } \n }\n}\n\n").concat(I)},je=function(e,t){var r=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}}];t?(r.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}}),r.push({binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}})):r.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}});var n=e.createBindGroupLayout({label:"norm forward ".concat(t?"multi":"single"," buffer bind group layout"),entries:r}),a=e.createPipelineLayout({label:"norm forward ".concat(t?"multi":"single"," buffer pipeline layout"),bindGroupLayouts:[n]}),i=e.createShaderModule({label:"norm forward ".concat(t?"multi":"single"," buffer shader module"),code:ze(t)});return{computePipeline:e.createComputePipeline({label:"norm forward ".concat(t?"multi":"single"," buffer pipeline"),layout:a,compute:{module:i,entryPoint:q,constants:{workgroup_size_x:N}}})}},Me=a(a({},Ue,(function(e){return je(e,!1)})),Ee,(function(e){return je(e,!0)})),Ce="pv_picollm_norm_layer_forward_multi_buffer_shader",Oe="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar x: array;\n\n@group(0) @binding(4)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += x[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (x[x_start + j] - mean) * (x[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((x[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(I),Ge="pv_picollm_norm_layer_forward_single_buffer_shader",Te="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += y[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (y[x_start + j] - mean) * (y[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((y[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(I),Le=a(a({},Ge,(function(e){var t=e.createBindGroupLayout({label:"norm layer forward single buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"norm layer forward single buffer pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"norm layer forward single buffer shader module",code:Te});return{computePipeline:e.createComputePipeline({label:"norm layer forward single buffer pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Ce,(function(e){var t=e.createBindGroupLayout({label:"norm layer forward multi buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"norm layer forward multi buffer pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"norm layer forward multi buffer shader module",code:Oe});return{computePipeline:e.createComputePipeline({label:"norm layer forward multi buffer pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Re="pv_picollm_transformer_add_to_buffer_shader",Ae="\nstruct argsStruct {\n n: u32,\n x_offset: u32,\n buffer_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(I),Fe="pv_picollm_transformer_add_buffers_shader",Ne="\n\nstruct argsStruct {\n n: u32,\n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(I),Ye=a(a({},Re,(function(e){var t=e.createBindGroupLayout({label:"transformer add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"transformer add to buffer pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"transformer add to buffer shader module",code:Ae});return{computePipeline:e.createComputePipeline({label:"transformer add to buffer compute",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Fe,(function(e){var t=e.createBindGroupLayout({label:"transformer add buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"transformer add buffers pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"transformer add buffers shader module",code:Ne});return{computePipeline:e.createComputePipeline({label:"transformer add buffers pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),We="pv_picollm_weight_float_forward_shader",Ie="\n\nstruct argsStruct {\n nr: u32,\n nc: u32,\n w_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar w: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n if (local_id.x >= args.nr) {\n return;\n }\n let x_start: u32 = args.x_offset + (workgroup_id.x * args.nc);\n let y_idx: u32 = local_id.x + args.y_offset + (workgroup_id.x * args.nr);\n \n let w_start: u32 = args.w_offset + (local_id.x * args.nc);\n for (var j = 0u; j < args.nc; j++) {\n y[y_idx] += w[w_start + j] * x[x_start + j]; \n }\n}\n\n".concat(I),qe=a({},We,(function(e){var t=e.createBindGroupLayout({label:"weight float forward bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight float forward pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight float forward shader module",code:Ie});return{computePipeline:e.createComputePipeline({label:"weight float forward pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),Ve=16,Ke="\n\nconst exponents: array = array(\n 2.9103830456733704e-11, \n 5.820766091346741e-11, \n 1.1641532182693481e-10, \n 2.3283064365386963e-10,\n 4.656612873077393e-10, \n 9.313225746154785e-10, \n 1.862645149230957e-09, \n 3.725290298461914e-09,\n 7.450580596923828e-09, \n 1.4901161193847656e-08, \n 2.9802322387695312e-08, \n 5.960464477539063e-08,\n 1.1920928955078125e-07, \n 2.384185791015625e-07, \n 4.76837158203125e-07, \n 9.5367431640625e-07,\n 1.9073486328125e-06, \n 3.814697265625e-06, \n 7.62939453125e-06, \n 1.52587890625e-05, \n 3.0517578125e-05,\n 6.103515625e-05, \n 0.0001220703125, \n 0.000244140625, \n 0.00048828125, \n 0.0009765625, \n 0.001953125, \n 0.00390625,\n 0.0078125, \n 0.015625, \n 0.03125, \n 0.0625);\n\nfn from_fp510(x: u32) -> f32 {\n let exponent = f32(exponents[extractBits(x, 10u, 5u)]); \n let fractional = f32(extractBits(x, 0u, 10u)); \n let abs = exponent * fractional;\n return abs * (1.0 - (2.0 * f32(extractBits(x, 15u, 1u))));\n}\n",De={3:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_3(packed_offset: u32) {\n let val_0 = blocks[packed_offset]; \n unpacked[0] = extractBits(val_0, 0u, 3u);\n unpacked[1] = extractBits(val_0, 3u, 3u);\n unpacked[2] = extractBits(val_0, 6u, 3u);\n unpacked[3] = extractBits(val_0, 9u, 3u);\n unpacked[4] = extractBits(val_0, 12u, 3u);\n unpacked[5] = extractBits(val_0, 15u, 3u);\n unpacked[6] = extractBits(val_0, 18u, 3u);\n unpacked[7] = extractBits(val_0, 21u, 3u);\n unpacked[8] = extractBits(val_0, 24u, 3u);\n unpacked[9] = extractBits(val_0, 27u, 3u);\n unpacked[10] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_1, 0u, 1u), 2u, 1u);\n unpacked[11] = extractBits(val_1, 1u, 3u);\n unpacked[12] = extractBits(val_1, 4u, 3u);\n unpacked[13] = extractBits(val_1, 7u, 3u);\n unpacked[14] = extractBits(val_1, 10u, 3u);\n unpacked[15] = extractBits(val_1, 13u, 3u);\n unpacked[16] = extractBits(val_1, 16u, 3u);\n unpacked[17] = extractBits(val_1, 19u, 3u);\n unpacked[18] = extractBits(val_1, 22u, 3u);\n unpacked[19] = extractBits(val_1, 25u, 3u);\n unpacked[20] = extractBits(val_1, 28u, 3u);\n unpacked[21] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_2, 0u, 2u), 1u, 2u); \n unpacked[22] = extractBits(val_2, 2u, 3u);\n unpacked[23] = extractBits(val_2, 5u, 3u);\n unpacked[24] = extractBits(val_2, 8u, 3u);\n unpacked[25] = extractBits(val_2, 11u, 3u);\n unpacked[26] = extractBits(val_2, 14u, 3u);\n unpacked[27] = extractBits(val_2, 17u, 3u);\n unpacked[28] = extractBits(val_2, 20u, 3u);\n unpacked[29] = extractBits(val_2, 23u, 3u);\n unpacked[30] = extractBits(val_2, 26u, 3u);\n unpacked[31] = extractBits(val_2, 29u, 3u);\n \n let val_3 = blocks[packed_offset + 3]; \n unpacked[32] = extractBits(val_3, 0u, 3u);\n unpacked[33] = extractBits(val_3, 3u, 3u);\n unpacked[34] = extractBits(val_3, 6u, 3u);\n unpacked[35] = extractBits(val_3, 9u, 3u);\n unpacked[36] = extractBits(val_3, 12u, 3u);\n unpacked[37] = extractBits(val_3, 15u, 3u);\n unpacked[38] = extractBits(val_3, 18u, 3u);\n unpacked[39] = extractBits(val_3, 21u, 3u);\n unpacked[40] = extractBits(val_3, 24u, 3u);\n unpacked[41] = extractBits(val_3, 27u, 3u);\n unpacked[42] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_4, 0u, 1u), 2u, 1u);\n unpacked[43] = extractBits(val_4, 1u, 3u);\n unpacked[44] = extractBits(val_4, 4u, 3u);\n unpacked[45] = extractBits(val_4, 7u, 3u);\n unpacked[46] = extractBits(val_4, 10u, 3u);\n unpacked[47] = extractBits(val_4, 13u, 3u);\n unpacked[48] = extractBits(val_4, 16u, 3u);\n unpacked[49] = extractBits(val_4, 19u, 3u);\n unpacked[50] = extractBits(val_4, 22u, 3u);\n unpacked[51] = extractBits(val_4, 25u, 3u);\n unpacked[52] = extractBits(val_4, 28u, 3u);\n unpacked[53] = extractBits(val_4, 31u, 1u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_5, 0u, 2u), 1u, 2u); \n unpacked[54] = extractBits(val_5, 2u, 3u);\n unpacked[55] = extractBits(val_5, 5u, 3u);\n unpacked[56] = extractBits(val_5, 8u, 3u);\n unpacked[57] = extractBits(val_5, 11u, 3u);\n unpacked[58] = extractBits(val_5, 14u, 3u);\n unpacked[59] = extractBits(val_5, 17u, 3u);\n unpacked[60] = extractBits(val_5, 20u, 3u);\n unpacked[61] = extractBits(val_5, 23u, 3u);\n unpacked[62] = extractBits(val_5, 26u, 3u);\n unpacked[63] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[64] = extractBits(val_6, 0u, 3u);\n unpacked[65] = extractBits(val_6, 3u, 3u);\n unpacked[66] = extractBits(val_6, 6u, 3u);\n unpacked[67] = extractBits(val_6, 9u, 3u);\n unpacked[68] = extractBits(val_6, 12u, 3u);\n unpacked[69] = extractBits(val_6, 15u, 3u);\n unpacked[70] = extractBits(val_6, 18u, 3u);\n unpacked[71] = extractBits(val_6, 21u, 3u);\n unpacked[72] = extractBits(val_6, 24u, 3u);\n unpacked[73] = extractBits(val_6, 27u, 3u);\n unpacked[74] = extractBits(val_6, 30u, 2u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_7, 0u, 1u), 2u, 1u);\n unpacked[75] = extractBits(val_7, 1u, 3u);\n unpacked[76] = extractBits(val_7, 4u, 3u);\n unpacked[77] = extractBits(val_7, 7u, 3u);\n unpacked[78] = extractBits(val_7, 10u, 3u);\n unpacked[79] = extractBits(val_7, 13u, 3u);\n unpacked[80] = extractBits(val_7, 16u, 3u);\n unpacked[81] = extractBits(val_7, 19u, 3u);\n unpacked[82] = extractBits(val_7, 22u, 3u);\n unpacked[83] = extractBits(val_7, 25u, 3u);\n unpacked[84] = extractBits(val_7, 28u, 3u);\n unpacked[85] = extractBits(val_7, 31u, 1u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_8, 0u, 2u), 1u, 2u); \n unpacked[86] = extractBits(val_8, 2u, 3u);\n unpacked[87] = extractBits(val_8, 5u, 3u);\n unpacked[88] = extractBits(val_8, 8u, 3u);\n unpacked[89] = extractBits(val_8, 11u, 3u);\n unpacked[90] = extractBits(val_8, 14u, 3u);\n unpacked[91] = extractBits(val_8, 17u, 3u);\n unpacked[92] = extractBits(val_8, 20u, 3u);\n unpacked[93] = extractBits(val_8, 23u, 3u);\n unpacked[94] = extractBits(val_8, 26u, 3u);\n unpacked[95] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[96] = extractBits(val_9, 0u, 3u);\n unpacked[97] = extractBits(val_9, 3u, 3u);\n unpacked[98] = extractBits(val_9, 6u, 3u);\n unpacked[99] = extractBits(val_9, 9u, 3u);\n unpacked[100] = extractBits(val_9, 12u, 3u);\n unpacked[101] = extractBits(val_9, 15u, 3u);\n unpacked[102] = extractBits(val_9, 18u, 3u);\n unpacked[103] = extractBits(val_9, 21u, 3u);\n unpacked[104] = extractBits(val_9, 24u, 3u);\n unpacked[105] = extractBits(val_9, 27u, 3u);\n unpacked[106] = extractBits(val_9, 30u, 2u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_10, 0u, 1u), 2u, 1u);\n unpacked[107] = extractBits(val_10, 1u, 3u);\n unpacked[108] = extractBits(val_10, 4u, 3u);\n unpacked[109] = extractBits(val_10, 7u, 3u);\n unpacked[110] = extractBits(val_10, 10u, 3u);\n unpacked[111] = extractBits(val_10, 13u, 3u);\n unpacked[112] = extractBits(val_10, 16u, 3u);\n unpacked[113] = extractBits(val_10, 19u, 3u);\n unpacked[114] = extractBits(val_10, 22u, 3u);\n unpacked[115] = extractBits(val_10, 25u, 3u);\n unpacked[116] = extractBits(val_10, 28u, 3u);\n unpacked[117] = extractBits(val_10, 31u, 1u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_11, 0u, 2u), 1u, 2u); \n unpacked[118] = extractBits(val_11, 2u, 3u);\n unpacked[119] = extractBits(val_11, 5u, 3u);\n unpacked[120] = extractBits(val_11, 8u, 3u);\n unpacked[121] = extractBits(val_11, 11u, 3u);\n unpacked[122] = extractBits(val_11, 14u, 3u);\n unpacked[123] = extractBits(val_11, 17u, 3u);\n unpacked[124] = extractBits(val_11, 20u, 3u);\n unpacked[125] = extractBits(val_11, 23u, 3u);\n unpacked[126] = extractBits(val_11, 26u, 3u);\n unpacked[127] = extractBits(val_11, 29u, 3u);\n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 12u); \n unpack_block_128_bit_depth_3(blocks_start); \n \n let b01: u32 = blocks_start;\n let b2: u32 = blocks_start + 8u;\n \n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n\n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), (r * 16u) % 32u, 16u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), (r * 8u) % 32u, 8u); \n }\n \n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n \n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), ((r * 16u) % 32u) + (2u * c), 2u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), ((r * 8u) % 32u) + c, 1u); \n }\n }\n}\n\n").concat(I),5:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_5(packed_offset: u32) { \n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 5u);\n unpacked[1] = extractBits(val_0, 5u, 5u);\n unpacked[2] = extractBits(val_0, 10u, 5u);\n unpacked[3] = extractBits(val_0, 15u, 5u);\n unpacked[4] = extractBits(val_0, 20u, 5u);\n unpacked[5] = extractBits(val_0, 25u, 5u);\n unpacked[6] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[6] = insertBits(unpacked[6], extractBits(val_1, 0u, 3u), 2u, 3u); \n unpacked[7] = extractBits(val_1, 3u, 5u);\n unpacked[8] = extractBits(val_1, 8u, 5u);\n unpacked[9] = extractBits(val_1, 13u, 5u);\n unpacked[10] = extractBits(val_1, 18u, 5u);\n unpacked[11] = extractBits(val_1, 23u, 5u);\n unpacked[12] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[12] = insertBits(unpacked[12], extractBits(val_2, 0u, 1u), 4u, 1u);\n unpacked[13] = extractBits(val_2, 1u, 5u);\n unpacked[14] = extractBits(val_2, 6u, 5u);\n unpacked[15] = extractBits(val_2, 11u, 5u);\n unpacked[16] = extractBits(val_2, 16u, 5u);\n unpacked[17] = extractBits(val_2, 21u, 5u);\n unpacked[18] = extractBits(val_2, 26u, 5u);\n unpacked[19] = extractBits(val_2, 31u, 1u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[19] = insertBits(unpacked[19], extractBits(val_3, 0u, 4u), 1u, 4u);\n unpacked[20] = extractBits(val_3, 4u, 5u);\n unpacked[21] = extractBits(val_3, 9u, 5u);\n unpacked[22] = extractBits(val_3, 14u, 5u);\n unpacked[23] = extractBits(val_3, 19u, 5u);\n unpacked[24] = extractBits(val_3, 24u, 5u);\n unpacked[25] = extractBits(val_3, 29u, 3u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[25] = insertBits(unpacked[25], extractBits(val_4, 0u, 2u), 3u, 2u);\n unpacked[26] = extractBits(val_4, 2u, 5u);\n unpacked[27] = extractBits(val_4, 7u, 5u);\n unpacked[28] = extractBits(val_4, 12u, 5u);\n unpacked[29] = extractBits(val_4, 17u, 5u);\n unpacked[30] = extractBits(val_4, 22u, 5u);\n unpacked[31] = extractBits(val_4, 27u, 5u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[32] = extractBits(val_5, 0u, 5u);\n unpacked[33] = extractBits(val_5, 5u, 5u);\n unpacked[34] = extractBits(val_5, 10u, 5u);\n unpacked[35] = extractBits(val_5, 15u, 5u);\n unpacked[36] = extractBits(val_5, 20u, 5u);\n unpacked[37] = extractBits(val_5, 25u, 5u);\n unpacked[38] = extractBits(val_5, 30u, 2u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[38] = insertBits(unpacked[38], extractBits(val_6, 0u, 3u), 2u, 3u);\n unpacked[39] = extractBits(val_6, 3u, 5u);\n unpacked[40] = extractBits(val_6, 8u, 5u);\n unpacked[41] = extractBits(val_6, 13u, 5u);\n unpacked[42] = extractBits(val_6, 18u, 5u);\n unpacked[43] = extractBits(val_6, 23u, 5u);\n unpacked[44] = extractBits(val_6, 28u, 4u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[44] = insertBits(unpacked[44], extractBits(val_7, 0u, 1u), 4u, 1u);\n unpacked[45] = extractBits(val_7, 1u, 5u);\n unpacked[46] = extractBits(val_7, 6u, 5u);\n unpacked[47] = extractBits(val_7, 11u, 5u);\n unpacked[48] = extractBits(val_7, 16u, 5u);\n unpacked[49] = extractBits(val_7, 21u, 5u);\n unpacked[50] = extractBits(val_7, 26u, 5u);\n unpacked[51] = extractBits(val_7, 31u, 1u);\n\n let val_8 = blocks[packed_offset + 8];\n unpacked[51] = insertBits(unpacked[51], extractBits(val_8, 0u, 4u), 1u, 4u);\n unpacked[52] = extractBits(val_8, 4u, 5u);\n unpacked[53] = extractBits(val_8, 9u, 5u);\n unpacked[54] = extractBits(val_8, 14u, 5u);\n unpacked[55] = extractBits(val_8, 19u, 5u);\n unpacked[56] = extractBits(val_8, 24u, 5u);\n unpacked[57] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[57] = insertBits(unpacked[57], extractBits(val_9, 0u, 2u), 3u, 2u);\n unpacked[58] = extractBits(val_9, 2u, 5u);\n unpacked[59] = extractBits(val_9, 7u, 5u);\n unpacked[60] = extractBits(val_9, 12u, 5u);\n unpacked[61] = extractBits(val_9, 17u, 5u);\n unpacked[62] = extractBits(val_9, 22u, 5u);\n unpacked[63] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[64] = extractBits(val_10, 0u, 5u);\n unpacked[65] = extractBits(val_10, 5u, 5u);\n unpacked[66] = extractBits(val_10, 10u, 5u);\n unpacked[67] = extractBits(val_10, 15u, 5u);\n unpacked[68] = extractBits(val_10, 20u, 5u);\n unpacked[69] = extractBits(val_10, 25u, 5u);\n unpacked[70] = extractBits(val_10, 30u, 2u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[70] = insertBits(unpacked[70], extractBits(val_11, 0u, 3u), 2u, 3u);\n unpacked[71] = extractBits(val_11, 3u, 5u);\n unpacked[72] = extractBits(val_11, 8u, 5u);\n unpacked[73] = extractBits(val_11, 13u, 5u);\n unpacked[74] = extractBits(val_11, 18u, 5u);\n unpacked[75] = extractBits(val_11, 23u, 5u);\n unpacked[76] = extractBits(val_11, 28u, 4u);\n\n let val_12 = blocks[packed_offset + 12];\n unpacked[76] = insertBits(unpacked[76], extractBits(val_12, 0u, 1u), 4u, 1u);\n unpacked[77] = extractBits(val_12, 1u, 5u);\n unpacked[78] = extractBits(val_12, 6u, 5u);\n unpacked[79] = extractBits(val_12, 11u, 5u);\n unpacked[80] = extractBits(val_12, 16u, 5u);\n unpacked[81] = extractBits(val_12, 21u, 5u);\n unpacked[82] = extractBits(val_12, 26u, 5u);\n unpacked[83] = extractBits(val_12, 31u, 1u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[83] = insertBits(unpacked[83], extractBits(val_13, 0u, 4u), 1u, 4u);\n unpacked[84] = extractBits(val_13, 4u, 5u);\n unpacked[85] = extractBits(val_13, 9u, 5u);\n unpacked[86] = extractBits(val_13, 14u, 5u);\n unpacked[87] = extractBits(val_13, 19u, 5u);\n unpacked[88] = extractBits(val_13, 24u, 5u);\n unpacked[89] = extractBits(val_13, 29u, 3u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[89] = insertBits(unpacked[89], extractBits(val_14, 0u, 2u), 3u, 2u);\n unpacked[90] = extractBits(val_14, 2u, 5u);\n unpacked[91] = extractBits(val_14, 7u, 5u);\n unpacked[92] = extractBits(val_14, 12u, 5u);\n unpacked[93] = extractBits(val_14, 17u, 5u);\n unpacked[94] = extractBits(val_14, 22u, 5u);\n unpacked[95] = extractBits(val_14, 27u, 5u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[96] = extractBits(val_15, 0u, 5u);\n unpacked[97] = extractBits(val_15, 5u, 5u);\n unpacked[98] = extractBits(val_15, 10u, 5u);\n unpacked[99] = extractBits(val_15, 15u, 5u);\n unpacked[100] = extractBits(val_15, 20u, 5u);\n unpacked[101] = extractBits(val_15, 25u, 5u);\n unpacked[102] = extractBits(val_15, 30u, 2u);\n\n let val_16 = blocks[packed_offset + 16];\n unpacked[102] = insertBits(unpacked[102], extractBits(val_16, 0u, 3u), 2u, 3u);\n unpacked[103] = extractBits(val_16, 3u, 5u);\n unpacked[104] = extractBits(val_16, 8u, 5u);\n unpacked[105] = extractBits(val_16, 13u, 5u);\n unpacked[106] = extractBits(val_16, 18u, 5u);\n unpacked[107] = extractBits(val_16, 23u, 5u);\n unpacked[108] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[108] = insertBits(unpacked[108], extractBits(val_17, 0u, 1u), 4u, 1u);\n unpacked[109] = extractBits(val_17, 1u, 5u);\n unpacked[110] = extractBits(val_17, 6u, 5u);\n unpacked[111] = extractBits(val_17, 11u, 5u);\n unpacked[112] = extractBits(val_17, 16u, 5u);\n unpacked[113] = extractBits(val_17, 21u, 5u);\n unpacked[114] = extractBits(val_17, 26u, 5u);\n unpacked[115] = extractBits(val_17, 31u, 1u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[115] = insertBits(unpacked[115], extractBits(val_18, 0u, 4u), 1u, 4u);\n unpacked[116] = extractBits(val_18, 4u, 5u);\n unpacked[117] = extractBits(val_18, 9u, 5u);\n unpacked[118] = extractBits(val_18, 14u, 5u);\n unpacked[119] = extractBits(val_18, 19u, 5u);\n unpacked[120] = extractBits(val_18, 24u, 5u);\n unpacked[121] = extractBits(val_18, 29u, 3u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[121] = insertBits(unpacked[121], extractBits(val_19, 0u, 2u), 3u, 2u);\n unpacked[122] = extractBits(val_19, 2u, 5u);\n unpacked[123] = extractBits(val_19, 7u, 5u);\n unpacked[124] = extractBits(val_19, 12u, 5u);\n unpacked[125] = extractBits(val_19, 17u, 5u);\n unpacked[126] = extractBits(val_19, 22u, 5u);\n unpacked[127] = extractBits(val_19, 27u, 5u);\n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n\n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 20u);\n unpack_block_128_bit_depth_5(blocks_start); \n \n let b03: u32 = blocks_start;\n let b4: u32 = blocks_start + 16u;\n\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), (r * 8u) % 32u, 8u);\n }\n \n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n } \n}\n\n").concat(I),6:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_6(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 6u);\n unpacked[1] = extractBits(val_0, 6u, 6u);\n unpacked[2] = extractBits(val_0, 12u, 6u);\n unpacked[3] = extractBits(val_0, 18u, 6u);\n unpacked[4] = extractBits(val_0, 24u, 6u);\n unpacked[5] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[5] = insertBits(unpacked[5], extractBits(val_1, 0u, 4u), 2u, 4u);\n unpacked[6] = extractBits(val_1, 4u, 6u);\n unpacked[7] = extractBits(val_1, 10u, 6u);\n unpacked[8] = extractBits(val_1, 16u, 6u);\n unpacked[9] = extractBits(val_1, 22u, 6u);\n unpacked[10] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_2, 0u, 2u), 4u, 2u);\n unpacked[11] = extractBits(val_2, 2u, 6u);\n unpacked[12] = extractBits(val_2, 8u, 6u);\n unpacked[13] = extractBits(val_2, 14u, 6u);\n unpacked[14] = extractBits(val_2, 20u, 6u);\n unpacked[15] = extractBits(val_2, 26u, 6u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[16] = extractBits(val_3, 0u, 6u);\n unpacked[17] = extractBits(val_3, 6u, 6u);\n unpacked[18] = extractBits(val_3, 12u, 6u);\n unpacked[19] = extractBits(val_3, 18u, 6u);\n unpacked[20] = extractBits(val_3, 24u, 6u);\n unpacked[21] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_4, 0u, 4u), 2u, 4u);\n unpacked[22] = extractBits(val_4, 4u, 6u);\n unpacked[23] = extractBits(val_4, 10u, 6u);\n unpacked[24] = extractBits(val_4, 16u, 6u);\n unpacked[25] = extractBits(val_4, 22u, 6u);\n unpacked[26] = extractBits(val_4, 28u, 4u);\n\n let val_5 = blocks[packed_offset + 5];\n unpacked[26] = insertBits(unpacked[26], extractBits(val_5, 0u, 2u), 4u, 2u);\n unpacked[27] = extractBits(val_5, 2u, 6u);\n unpacked[28] = extractBits(val_5, 8u, 6u);\n unpacked[29] = extractBits(val_5, 14u, 6u);\n unpacked[30] = extractBits(val_5, 20u, 6u);\n unpacked[31] = extractBits(val_5, 26u, 6u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[32] = extractBits(val_6, 0u, 6u);\n unpacked[33] = extractBits(val_6, 6u, 6u);\n unpacked[34] = extractBits(val_6, 12u, 6u);\n unpacked[35] = extractBits(val_6, 18u, 6u);\n unpacked[36] = extractBits(val_6, 24u, 6u);\n unpacked[37] = extractBits(val_6, 30u, 2u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[37] = insertBits(unpacked[37], extractBits(val_7, 0u, 4u), 2u, 4u);\n unpacked[38] = extractBits(val_7, 4u, 6u);\n unpacked[39] = extractBits(val_7, 10u, 6u);\n unpacked[40] = extractBits(val_7, 16u, 6u);\n unpacked[41] = extractBits(val_7, 22u, 6u);\n unpacked[42] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_8, 0u, 2u), 4u, 2u);\n unpacked[43] = extractBits(val_8, 2u, 6u);\n unpacked[44] = extractBits(val_8, 8u, 6u);\n unpacked[45] = extractBits(val_8, 14u, 6u);\n unpacked[46] = extractBits(val_8, 20u, 6u);\n unpacked[47] = extractBits(val_8, 26u, 6u);\n\n let val_9 = blocks[packed_offset + 9];\n unpacked[48] = extractBits(val_9, 0u, 6u);\n unpacked[49] = extractBits(val_9, 6u, 6u);\n unpacked[50] = extractBits(val_9, 12u, 6u);\n unpacked[51] = extractBits(val_9, 18u, 6u);\n unpacked[52] = extractBits(val_9, 24u, 6u);\n unpacked[53] = extractBits(val_9, 30u, 2u);\n\n let val_10 = blocks[packed_offset + 10];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_10, 0u, 4u), 2u, 4u);\n unpacked[54] = extractBits(val_10, 4u, 6u);\n unpacked[55] = extractBits(val_10, 10u, 6u);\n unpacked[56] = extractBits(val_10, 16u, 6u);\n unpacked[57] = extractBits(val_10, 22u, 6u);\n unpacked[58] = extractBits(val_10, 28u, 4u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[58] = insertBits(unpacked[58], extractBits(val_11, 0u, 2u), 4u, 2u);\n unpacked[59] = extractBits(val_11, 2u, 6u);\n unpacked[60] = extractBits(val_11, 8u, 6u);\n unpacked[61] = extractBits(val_11, 14u, 6u);\n unpacked[62] = extractBits(val_11, 20u, 6u);\n unpacked[63] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[64] = extractBits(val_12, 0u, 6u);\n unpacked[65] = extractBits(val_12, 6u, 6u);\n unpacked[66] = extractBits(val_12, 12u, 6u);\n unpacked[67] = extractBits(val_12, 18u, 6u);\n unpacked[68] = extractBits(val_12, 24u, 6u);\n unpacked[69] = extractBits(val_12, 30u, 2u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[69] = insertBits(unpacked[69], extractBits(val_13, 0u, 4u), 2u, 4u);\n unpacked[70] = extractBits(val_13, 4u, 6u);\n unpacked[71] = extractBits(val_13, 10u, 6u);\n unpacked[72] = extractBits(val_13, 16u, 6u);\n unpacked[73] = extractBits(val_13, 22u, 6u);\n unpacked[74] = extractBits(val_13, 28u, 4u);\n\n let val_14 = blocks[packed_offset + 14];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_14, 0u, 2u), 4u, 2u);\n unpacked[75] = extractBits(val_14, 2u, 6u);\n unpacked[76] = extractBits(val_14, 8u, 6u);\n unpacked[77] = extractBits(val_14, 14u, 6u);\n unpacked[78] = extractBits(val_14, 20u, 6u);\n unpacked[79] = extractBits(val_14, 26u, 6u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[80] = extractBits(val_15, 0u, 6u);\n unpacked[81] = extractBits(val_15, 6u, 6u);\n unpacked[82] = extractBits(val_15, 12u, 6u);\n unpacked[83] = extractBits(val_15, 18u, 6u);\n unpacked[84] = extractBits(val_15, 24u, 6u);\n unpacked[85] = extractBits(val_15, 30u, 2u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_16, 0u, 4u), 2u, 4u);\n unpacked[86] = extractBits(val_16, 4u, 6u);\n unpacked[87] = extractBits(val_16, 10u, 6u);\n unpacked[88] = extractBits(val_16, 16u, 6u);\n unpacked[89] = extractBits(val_16, 22u, 6u);\n unpacked[90] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[90] = insertBits(unpacked[90], extractBits(val_17, 0u, 2u), 4u, 2u);\n unpacked[91] = extractBits(val_17, 2u, 6u);\n unpacked[92] = extractBits(val_17, 8u, 6u);\n unpacked[93] = extractBits(val_17, 14u, 6u);\n unpacked[94] = extractBits(val_17, 20u, 6u);\n unpacked[95] = extractBits(val_17, 26u, 6u);\n\n let val_18 = blocks[packed_offset + 18];\n unpacked[96] = extractBits(val_18, 0u, 6u);\n unpacked[97] = extractBits(val_18, 6u, 6u);\n unpacked[98] = extractBits(val_18, 12u, 6u);\n unpacked[99] = extractBits(val_18, 18u, 6u);\n unpacked[100] = extractBits(val_18, 24u, 6u);\n unpacked[101] = extractBits(val_18, 30u, 2u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[101] = insertBits(unpacked[101], extractBits(val_19, 0u, 4u), 2u, 4u);\n unpacked[102] = extractBits(val_19, 4u, 6u);\n unpacked[103] = extractBits(val_19, 10u, 6u);\n unpacked[104] = extractBits(val_19, 16u, 6u);\n unpacked[105] = extractBits(val_19, 22u, 6u);\n unpacked[106] = extractBits(val_19, 28u, 4u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_20, 0u, 2u), 4u, 2u);\n unpacked[107] = extractBits(val_20, 2u, 6u);\n unpacked[108] = extractBits(val_20, 8u, 6u);\n unpacked[109] = extractBits(val_20, 14u, 6u);\n unpacked[110] = extractBits(val_20, 20u, 6u);\n unpacked[111] = extractBits(val_20, 26u, 6u);\n\n let val_21 = blocks[packed_offset + 21];\n unpacked[112] = extractBits(val_21, 0u, 6u);\n unpacked[113] = extractBits(val_21, 6u, 6u);\n unpacked[114] = extractBits(val_21, 12u, 6u);\n unpacked[115] = extractBits(val_21, 18u, 6u);\n unpacked[116] = extractBits(val_21, 24u, 6u);\n unpacked[117] = extractBits(val_21, 30u, 2u);\n\n let val_22 = blocks[packed_offset + 22];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_22, 0u, 4u), 2u, 4u);\n unpacked[118] = extractBits(val_22, 4u, 6u);\n unpacked[119] = extractBits(val_22, 10u, 6u);\n unpacked[120] = extractBits(val_22, 16u, 6u);\n unpacked[121] = extractBits(val_22, 22u, 6u);\n unpacked[122] = extractBits(val_22, 28u, 4u);\n\n let val_23 = blocks[packed_offset + 23];\n unpacked[122] = insertBits(unpacked[122], extractBits(val_23, 0u, 2u), 4u, 2u);\n unpacked[123] = extractBits(val_23, 2u, 6u);\n unpacked[124] = extractBits(val_23, 8u, 6u);\n unpacked[125] = extractBits(val_23, 14u, 6u);\n unpacked[126] = extractBits(val_23, 20u, 6u);\n unpacked[127] = extractBits(val_23, 26u, 6u);\n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 24u);\n unpack_block_128_bit_depth_6(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u; \n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n }\n\n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n }\n }\n}\n\n").concat(I),7:"\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat("\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_7(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 7u);\n unpacked[1] = extractBits(val_0, 7u, 7u);\n unpacked[2] = extractBits(val_0, 14u, 7u);\n unpacked[3] = extractBits(val_0, 21u, 7u);\n unpacked[4] = extractBits(val_0, 28u, 4u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[4] = insertBits(unpacked[4], extractBits(val_1, 0u, 3u), 4u, 3u);\n unpacked[5] = extractBits(val_1, 3u, 7u);\n unpacked[6] = extractBits(val_1, 10u, 7u);\n unpacked[7] = extractBits(val_1, 17u, 7u);\n unpacked[8] = extractBits(val_1, 24u, 7u);\n unpacked[9] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[9] = insertBits(unpacked[9], extractBits(val_2, 0u, 6u), 1u, 6u);\n unpacked[10] = extractBits(val_2, 6u, 7u);\n unpacked[11] = extractBits(val_2, 13u, 7u);\n unpacked[12] = extractBits(val_2, 20u, 7u);\n unpacked[13] = extractBits(val_2, 27u, 5u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[13] = insertBits(unpacked[13], extractBits(val_3, 0u, 2u), 5u, 2u);\n unpacked[14] = extractBits(val_3, 2u, 7u);\n unpacked[15] = extractBits(val_3, 9u, 7u);\n unpacked[16] = extractBits(val_3, 16u, 7u);\n unpacked[17] = extractBits(val_3, 23u, 7u);\n unpacked[18] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[18] = insertBits(unpacked[18], extractBits(val_4, 0u, 5u), 2u, 5u);\n unpacked[19] = extractBits(val_4, 5u, 7u);\n unpacked[20] = extractBits(val_4, 12u, 7u);\n unpacked[21] = extractBits(val_4, 19u, 7u);\n unpacked[22] = extractBits(val_4, 26u, 6u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[22] = insertBits(unpacked[22], extractBits(val_5, 0u, 1u), 6u, 1u);\n unpacked[23] = extractBits(val_5, 1u, 7u);\n unpacked[24] = extractBits(val_5, 8u, 7u);\n unpacked[25] = extractBits(val_5, 15u, 7u);\n unpacked[26] = extractBits(val_5, 22u, 7u);\n unpacked[27] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[27] = insertBits(unpacked[27], extractBits(val_6, 0u, 4u), 3u, 4u);\n unpacked[28] = extractBits(val_6, 4u, 7u);\n unpacked[29] = extractBits(val_6, 11u, 7u);\n unpacked[30] = extractBits(val_6, 18u, 7u);\n unpacked[31] = extractBits(val_6, 25u, 7u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[32] = extractBits(val_7, 0u, 7u);\n unpacked[33] = extractBits(val_7, 7u, 7u);\n unpacked[34] = extractBits(val_7, 14u, 7u);\n unpacked[35] = extractBits(val_7, 21u, 7u);\n unpacked[36] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[36] = insertBits(unpacked[36], extractBits(val_8, 0u, 3u), 4u, 3u);\n unpacked[37] = extractBits(val_8, 3u, 7u);\n unpacked[38] = extractBits(val_8, 10u, 7u);\n unpacked[39] = extractBits(val_8, 17u, 7u);\n unpacked[40] = extractBits(val_8, 24u, 7u);\n unpacked[41] = extractBits(val_8, 31u, 1u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[41] = insertBits(unpacked[41], extractBits(val_9, 0u, 6u), 1u, 6u);\n unpacked[42] = extractBits(val_9, 6u, 7u);\n unpacked[43] = extractBits(val_9, 13u, 7u);\n unpacked[44] = extractBits(val_9, 20u, 7u);\n unpacked[45] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[45] = insertBits(unpacked[45], extractBits(val_10, 0u, 2u), 5u, 2u);\n unpacked[46] = extractBits(val_10, 2u, 7u);\n unpacked[47] = extractBits(val_10, 9u, 7u);\n unpacked[48] = extractBits(val_10, 16u, 7u);\n unpacked[49] = extractBits(val_10, 23u, 7u);\n unpacked[50] = extractBits(val_10, 30u, 2u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[50] = insertBits(unpacked[50], extractBits(val_11, 0u, 5u), 2u, 5u);\n unpacked[51] = extractBits(val_11, 5u, 7u);\n unpacked[52] = extractBits(val_11, 12u, 7u);\n unpacked[53] = extractBits(val_11, 19u, 7u);\n unpacked[54] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[54] = insertBits(unpacked[54], extractBits(val_12, 0u, 1u), 6u, 1u);\n unpacked[55] = extractBits(val_12, 1u, 7u);\n unpacked[56] = extractBits(val_12, 8u, 7u);\n unpacked[57] = extractBits(val_12, 15u, 7u);\n unpacked[58] = extractBits(val_12, 22u, 7u);\n unpacked[59] = extractBits(val_12, 29u, 3u);\n \n let val_13 = blocks[packed_offset + 13];\n unpacked[59] = insertBits(unpacked[59], extractBits(val_13, 0u, 4u), 3u, 4u);\n unpacked[60] = extractBits(val_13, 4u, 7u);\n unpacked[61] = extractBits(val_13, 11u, 7u);\n unpacked[62] = extractBits(val_13, 18u, 7u);\n unpacked[63] = extractBits(val_13, 25u, 7u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[64] = extractBits(val_14, 0u, 7u);\n unpacked[65] = extractBits(val_14, 7u, 7u);\n unpacked[66] = extractBits(val_14, 14u, 7u);\n unpacked[67] = extractBits(val_14, 21u, 7u);\n unpacked[68] = extractBits(val_14, 28u, 4u);\n \n let val_15 = blocks[packed_offset + 15];\n unpacked[68] = insertBits(unpacked[68], extractBits(val_15, 0u, 3u), 4u, 3u);\n unpacked[69] = extractBits(val_15, 3u, 7u);\n unpacked[70] = extractBits(val_15, 10u, 7u);\n unpacked[71] = extractBits(val_15, 17u, 7u);\n unpacked[72] = extractBits(val_15, 24u, 7u);\n unpacked[73] = extractBits(val_15, 31u, 1u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[73] = insertBits(unpacked[73], extractBits(val_16, 0u, 6u), 1u, 6u);\n unpacked[74] = extractBits(val_16, 6u, 7u);\n unpacked[75] = extractBits(val_16, 13u, 7u);\n unpacked[76] = extractBits(val_16, 20u, 7u);\n unpacked[77] = extractBits(val_16, 27u, 5u);\n \n let val_17 = blocks[packed_offset + 17];\n unpacked[77] = insertBits(unpacked[77], extractBits(val_17, 0u, 2u), 5u, 2u);\n unpacked[78] = extractBits(val_17, 2u, 7u);\n unpacked[79] = extractBits(val_17, 9u, 7u);\n unpacked[80] = extractBits(val_17, 16u, 7u);\n unpacked[81] = extractBits(val_17, 23u, 7u);\n unpacked[82] = extractBits(val_17, 30u, 2u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[82] = insertBits(unpacked[82], extractBits(val_18, 0u, 5u), 2u, 5u);\n unpacked[83] = extractBits(val_18, 5u, 7u);\n unpacked[84] = extractBits(val_18, 12u, 7u);\n unpacked[85] = extractBits(val_18, 19u, 7u);\n unpacked[86] = extractBits(val_18, 26u, 6u);\n \n let val_19 = blocks[packed_offset + 19];\n unpacked[86] = insertBits(unpacked[86], extractBits(val_19, 0u, 1u), 6u, 1u);\n unpacked[87] = extractBits(val_19, 1u, 7u);\n unpacked[88] = extractBits(val_19, 8u, 7u);\n unpacked[89] = extractBits(val_19, 15u, 7u);\n unpacked[90] = extractBits(val_19, 22u, 7u);\n unpacked[91] = extractBits(val_19, 29u, 3u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[91] = insertBits(unpacked[91], extractBits(val_20, 0u, 4u), 3u, 4u);\n unpacked[92] = extractBits(val_20, 4u, 7u);\n unpacked[93] = extractBits(val_20, 11u, 7u);\n unpacked[94] = extractBits(val_20, 18u, 7u);\n unpacked[95] = extractBits(val_20, 25u, 7u);\n \n let val_21 = blocks[packed_offset + 21];\n unpacked[96] = extractBits(val_21, 0u, 7u);\n unpacked[97] = extractBits(val_21, 7u, 7u);\n unpacked[98] = extractBits(val_21, 14u, 7u);\n unpacked[99] = extractBits(val_21, 21u, 7u);\n unpacked[100] = extractBits(val_21, 28u, 4u);\n \n let val_22 = blocks[packed_offset + 22];\n unpacked[100] = insertBits(unpacked[100], extractBits(val_22, 0u, 3u), 4u, 3u);\n unpacked[101] = extractBits(val_22, 3u, 7u);\n unpacked[102] = extractBits(val_22, 10u, 7u);\n unpacked[103] = extractBits(val_22, 17u, 7u);\n unpacked[104] = extractBits(val_22, 24u, 7u);\n unpacked[105] = extractBits(val_22, 31u, 1u);\n \n let val_23 = blocks[packed_offset + 23];\n unpacked[105] = insertBits(unpacked[105], extractBits(val_23, 0u, 6u), 1u, 6u);\n unpacked[106] = extractBits(val_23, 6u, 7u);\n unpacked[107] = extractBits(val_23, 13u, 7u);\n unpacked[108] = extractBits(val_23, 20u, 7u);\n unpacked[109] = extractBits(val_23, 27u, 5u);\n \n let val_24 = blocks[packed_offset + 24];\n unpacked[109] = insertBits(unpacked[109], extractBits(val_24, 0u, 2u), 5u, 2u);\n unpacked[110] = extractBits(val_24, 2u, 7u);\n unpacked[111] = extractBits(val_24, 9u, 7u);\n unpacked[112] = extractBits(val_24, 16u, 7u);\n unpacked[113] = extractBits(val_24, 23u, 7u);\n unpacked[114] = extractBits(val_24, 30u, 2u);\n \n let val_25 = blocks[packed_offset + 25];\n unpacked[114] = insertBits(unpacked[114], extractBits(val_25, 0u, 5u), 2u, 5u);\n unpacked[115] = extractBits(val_25, 5u, 7u);\n unpacked[116] = extractBits(val_25, 12u, 7u);\n unpacked[117] = extractBits(val_25, 19u, 7u);\n unpacked[118] = extractBits(val_25, 26u, 6u);\n \n let val_26 = blocks[packed_offset + 26];\n unpacked[118] = insertBits(unpacked[118], extractBits(val_26, 0u, 1u), 6u, 1u);\n unpacked[119] = extractBits(val_26, 1u, 7u);\n unpacked[120] = extractBits(val_26, 8u, 7u);\n unpacked[121] = extractBits(val_26, 15u, 7u);\n unpacked[122] = extractBits(val_26, 22u, 7u);\n unpacked[123] = extractBits(val_26, 29u, 3u);\n \n let val_27 = blocks[packed_offset + 27];\n unpacked[123] = insertBits(unpacked[123], extractBits(val_27, 0u, 4u), 3u, 4u);\n unpacked[124] = extractBits(val_27, 4u, 7u);\n unpacked[125] = extractBits(val_27, 11u, 7u);\n unpacked[126] = extractBits(val_27, 18u, 7u);\n unpacked[127] = extractBits(val_27, 25u, 7u); \n}\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 28u);\n unpack_block_128_bit_depth_7(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u;\n let b6: u32 = blocks_start + 24u; \n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u), 8u);\n }\n \n for (var c = 1u; c < ").concat(8,"u; c++) {\n for (var r = 0u; r < ").concat(Ve,"u; r++) {\n let unpacked_idx = r * ").concat(8,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n }\n}\n\n").concat(I)},Qe={3:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_3bit_shader",5:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_5bit_shader",6:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_6bit_shader",7:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_7bit_shader"},Xe="\nconst BM = ".concat(8,"u;\nconst BN = ").concat(32,"u;\n\nconst TM = ").concat(2,"u;\nconst TN = ").concat(16,"u;\n\nconst TC = ").concat(128,"u;\n\nconst ROW_PER_BLOCK = ").concat(Ve,"u;\nconst COL_PER_BLOCK = ").concat(8,"u;\n\nconst VEC_COL_PER_BLOCK = COL_PER_BLOCK / 4u;\n\nconst block_size: u32 = (COL_PER_BLOCK * ROW_PER_BLOCK * bit_depth) / 32u;\n\n"),He={1:"\n let b0 = blocks[src + (row / 4u)];\n\n let b0_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b0_offset = b0_offset_base + (c * 4u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b0, b0_offset, 1u)), \n f32(extractBits(b0, b0_offset + 1, 1u)),\n f32(extractBits(b0, b0_offset + 2, 1u)),\n f32(extractBits(b0, b0_offset + 3, 1u)));\n }\n",2:"\n let b01 = blocks[src + (row / 2u)];\n \n let b01_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b01_offset = b01_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(extractBits(b01, b01_offset, 2u)), \n f32(extractBits(b01, b01_offset + 2, 2u)),\n f32(extractBits(b01, b01_offset + 4, 2u)),\n f32(extractBits(b01, b01_offset + 6, 2u)));\n } \n",3:"\n let b01 = blocks[src + (row / 2u)];\n let b2 = blocks[src + 8u + (row / 4u)]; \n\n let b01_offset_base = (row * 16u) % 32u;\n let b2_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b01_offset = b01_offset_base + (c * 8u);\n let b2_offset = b2_offset_base + (c * 4u);\n\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b01, b01_offset, 2u), extractBits(b2, b2_offset, 1u), 2u, 1u)), \n f32(insertBits(extractBits(b01, b01_offset + 2, 2u), extractBits(b2, b2_offset + 1, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 4, 2u), extractBits(b2, b2_offset + 2, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 6, 2u), extractBits(b2, b2_offset + 3, 1u), 2u, 1u))); \n }\n",4:"\n let b03 = blocks[src + row];\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b03, b03_offset, 4u)), \n f32(extractBits(b03, b03_offset + 4, 4u)),\n f32(extractBits(b03, b03_offset + 8, 4u)),\n f32(extractBits(b03, b03_offset + 12, 4u)));\n }\n",5:"\n let b03 = blocks[src + row];\n let b4 = blocks[src + 16u + (row / 4u)];\n \n let b4_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b4_offset = b4_offset_base + (c * 4u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b4, b4_offset, 1u), 4u, 1u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b4, b4_offset + 1, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b4, b4_offset + 2, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b4, b4_offset + 3, 1u), 4u, 1u)));\n }\n",6:"\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u)));\n }\n",7:"\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n let b6 = blocks[src + 24u + (row / 4u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n let b6_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n let b6_offset = b6_offset_base + (c * 4u);\n \n shared_w[dst + c] = vec4(\n f32(insertBits(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u), extractBits(b6, b6_offset, 1u), 6u, 1u)), \n f32(insertBits(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u), extractBits(b6, b6_offset + 1, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u), extractBits(b6, b6_offset + 2, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u), extractBits(b6, b6_offset + 3, 1u), 6u, 1u)));\n }\n",8:"\n let b07_offset = src + (row * 2);\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b07 = blocks[b07_offset + c];\n shared_w[dst + c] = vec4(\n f32(extractBits(b07, 0u, 8u)), \n f32(extractBits(b07, 8u, 8u)),\n f32(extractBits(b07, 16u, 8u)),\n f32(extractBits(b07, 24u, 8u)));\n }\n"},Ze=function(e){return"\n\n".concat("\nstruct argsStruct {\n n: u32,\n m: u32,\n total_nbc: u32,\n k: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array;\n","\n\n").concat(Xe,"\n\n").concat("\nvar shared_x: array, BN * VEC_COL_PER_BLOCK>;\nvar shared_ab: array;\nvar shared_w: array, BM * ROW_PER_BLOCK * VEC_COL_PER_BLOCK>;\n\nvar local_x: array, TN * VEC_COL_PER_BLOCK>;\nvar local_x_sums: array;\nvar local_results: array;\n","\n\n").concat(Ke,"\n\n").concat("\n fn divide_pad(a: u32, b: u32) -> u32 { \n return (a + b - 1) / b;\n }\n","\n\nconst bit_depth: u32 = ").concat(e,"u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n \n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n ").concat("\n let tid = local_id.x;\n let bm_idx = workgroup_id.x;\n let bn_idx = workgroup_id.y;\n\n let local_bm_idx = bm_idx * BM;\n let local_bn_idx = bn_idx * BN;\n \n let n_idx = tid % (BN / TN);\n let k_idx = tid / (BN / TN) / (BM * ROW_PER_BLOCK / TM);\n let m_idx = tid / (BN / TN) % (BM * ROW_PER_BLOCK / TM);\n","\n \n for (var bk_idx = 0u; bk_idx < args.k; bk_idx++) { \n ").concat("\n let total_work_x = VEC_COL_PER_BLOCK * BN;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_x, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_x) { \n let n_load_idx = local_bn_idx + idx / VEC_COL_PER_BLOCK;\n let inner_idx = idx % VEC_COL_PER_BLOCK;\n \n if (bk_idx < args.k && n_load_idx < args.n) { \n let x_idx = (args.x_offset / 4u) + ((bk_idx * args.n + n_load_idx) * VEC_COL_PER_BLOCK + inner_idx); \n shared_x[idx] = x[x_idx];\n } else {\n shared_x[idx] = vec4(0.0);\n }\n }\n }\n","\n ").concat("\n let total_work_ab = BM * 2;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_ab, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_ab) {\n let m_load_idx = local_bm_idx + idx / 2; \n let inner_idx = (idx % 2) * 16u;\n \n if (m_load_idx < args.m && bk_idx < args.k) {\n let ab_bits = extractBits(metas[args.metas_offset + (m_load_idx * args.k + bk_idx)], inner_idx, 16u);\n shared_ab[idx] = from_fp510(ab_bits); \n } else {\n shared_ab[idx] = 0.0;\n }\n }\n }\n"," \n ").concat(function(e){return"\n let total_work_w = BM * ROW_PER_BLOCK;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_w, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_w) {\n let m_load_idx = local_bm_idx + idx / ROW_PER_BLOCK;\n let row = idx % ROW_PER_BLOCK;\n let dst = idx * VEC_COL_PER_BLOCK;\n\n if (m_load_idx < args.m) {\n let src = args.blocks_offset + (m_load_idx * args.k + bk_idx) * block_size;\n ".concat(He[e],"\n } else { \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n shared_w[dst + c] = vec4(0.0);\n }\n }\n }\n }\n")}(e)," \n workgroupBarrier();\n \n ").concat("\nfor (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n var x_sum_vec = vec4(0.0); \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n let shared_x_idx = (n_idx * TN + tn_idx) * VEC_COL_PER_BLOCK + (k_idx * VEC_COL_PER_BLOCK);\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n local_x[local_x_idx + c] = shared_x[shared_x_idx + c];\n x_sum_vec += local_x[local_x_idx + c];\n }\n local_x_sums[tn_idx] = x_sum_vec.x + x_sum_vec.y + x_sum_vec.z + x_sum_vec.w; \n}\n","\n ").concat("\n for (var tm_idx = 0u; tm_idx < TM; tm_idx++) { \n let shared_ab_idx = ((m_idx * TM + tm_idx) / ROW_PER_BLOCK + k_idx) * 2;\n let alpha = shared_ab[shared_ab_idx];\n let beta = shared_ab[shared_ab_idx + 1]; \n let shared_w_idx = ((m_idx * TM + tm_idx) + k_idx) * VEC_COL_PER_BLOCK;\n \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n \n var swx_vec = vec4(0.0); \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n swx_vec += shared_w[shared_w_idx + c] * local_x[local_x_idx + c];\n }\n let swx = swx_vec.x + swx_vec.y + swx_vec.z + swx_vec.w;\n \n let kappa = alpha * local_x_sums[tn_idx]; \n let results_idx = tm_idx * TN + tn_idx;\n local_results[results_idx] += kappa + (beta * swx);\n }\n }\n","\n workgroupBarrier();\n }\n \n ").concat("\nfor (var tm_idx = 0u; tm_idx < TM; tm_idx++) {\n let row = local_bm_idx * ROW_PER_BLOCK + (m_idx * TM + tm_idx); \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let col = local_bn_idx + (n_idx * TN + tn_idx);\n if (row < args.m * ROW_PER_BLOCK && col < args.n) {\n let y_idx = args.y_offset + ((row / ROW_PER_BLOCK) * args.n + col) * ROW_PER_BLOCK + (row % ROW_PER_BLOCK);\n let results_idx = tm_idx * TN + tn_idx;\n \n y[y_idx] += local_results[results_idx];\n }\n }\n}\n","\n}\n\n").concat(I,"\n")},Je="\nstruct argsStruct {\n n: u32,\n shape1: u32,\n x_offset: u32,\n indices_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar indices: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape1) {\n return;\n } \n\n let b = global_id.x;\n let i = global_id.y;\n \n let c = i / 8u;\n let j = i % 8u;\n y[((c * args.n) + b) * 8 + j] = x[args.x_offset + (b * args.shape1) + indices[args.indices_offset + i]];\n}\n\n".concat(I,"\n"),$e="\nstruct argsStruct {\n nvr: u32,\n nbc: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar y: array>;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x > args.nvr) {\n return;\n }\n\n let x_start = global_id.x * args.nbc;\n var sum: vec4 = vec4(0.0, 0.0, 0.0, 0.0);\n for (var i = 0u; i < args.nbc; i++) {\n sum += x[x_start + i]; \n }\n y[global_id.x] += sum;\n}\n\n".concat(I),et="\nstruct argsStruct {\n n: u32,\n shape0: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape0) {\n return;\n } \n \n let b = global_id.x;\n let i = global_id.y;\n \n let r = i / 16u;\n let j = i % 16u;\n y[(b * args.shape0) + (r * 16) + j] = x[(((r * args.n) + b) * 16) + j];\n}\n\n".concat(I),tt="\nstruct argsStruct {\n dimension: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar bias: array;\n\n@group(0) @binding(2)\nvar y: array;\n\n".concat(Ke,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n y[(global_id.x * args.dimension) + global_id.y] += bias[global_id.y];\n}\n\n").concat(I),rt={1:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 4u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b0_start = row_blocks_start + br_offset + (bc * block_size);\n var b0_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b0 = blocks[b0_start];\n \n let w0_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w0_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w0_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w0_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 8u;\n \n let w1_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w1_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w1_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w1_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 16u;\n \n let w2_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w2_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w2_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w2_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 24u;\n \n let w3_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w3_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w3_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w3_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),2:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 8u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b01_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b01 = blocks[b01_start];\n \n let w0_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w0_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w0_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w0_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w1_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w1_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w1_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w1_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u; \n b01 = blocks[b01_start + 1u];\n \n let w2_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w2_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w2_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w2_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w3_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w3_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w3_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w3_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),3:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 12u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id : vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n\n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b2_start = row_blocks_start + br_offset + (bc * block_size) + 8u;\n var b01_offset = 0u;\n var b2_offset = 0u;\n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) { \n \n var b01 = blocks[b01_start];\n var b2 = blocks[b2_start];\n \n var b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n var b2_bit = extractBits(b2, b2_offset + j, 1u);\n let w0_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 8u;\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w1_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 16u; \n b01 = blocks[b01_start + 1u];\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w2_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 24u; \n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w3_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 0u; \n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n \n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),4:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 16u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start]; \n \n let w0_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w0_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w0_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w0_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 1];\n \n let w1_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w1_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w1_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w1_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 2];\n \n let w2_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w2_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w2_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w2_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 3];\n \n let w3_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w3_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w3_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w3_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),5:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 20u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b4_start = row_blocks_start + br_offset + (bc * block_size) + 16u;\n \n var b4_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b4 = blocks[b4_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w0_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 1];\n b4_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w1_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 2];\n b4_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w2_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 3];\n b4_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w3_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b4_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),6:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 24u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n \n var b45_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w0_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w0_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w0_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w0_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w1_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w1_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w1_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w1_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u)); \n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w2_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w2_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w2_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w2_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w3_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w3_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w3_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w3_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b45_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),7:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 28u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n var b6_start = row_blocks_start + br_offset + (bc * block_size) + 24u;\n \n var b45_offset = 0u;\n var b6_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(8,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n var b6 = blocks[b6_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n var b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w0_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w0_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w0_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w0_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n b6_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w1_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w1_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w1_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w1_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n b6_offset = 16u;\n\n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w2_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w2_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w2_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w2_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n b6_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w3_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w3_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w3_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w3_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b45_offset = 0u;\n b6_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I),8:"\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(Ke,"\n\nconst block_size: u32 = 32u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(8,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b07_start = row_blocks_start + (br_offset * 8u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < 2; j++) {\n \n var b07 = blocks[b07_start + j];\n \n let w0_0 = f32(extractBits(b07, 0u, 8u)); \n let w0_1 = f32(extractBits(b07, 8u, 8u)); \n let w0_2 = f32(extractBits(b07, 16u, 8u));\n let w0_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 2 + j];\n \n let w1_0 = f32(extractBits(b07, 0u, 8u)); \n let w1_1 = f32(extractBits(b07, 8u, 8u)); \n let w1_2 = f32(extractBits(b07, 16u, 8u));\n let w1_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 4 + j];\n \n let w2_0 = f32(extractBits(b07, 0u, 8u)); \n let w2_1 = f32(extractBits(b07, 8u, 8u)); \n let w2_2 = f32(extractBits(b07, 16u, 8u));\n let w2_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 6 + j];\n \n let w3_0 = f32(extractBits(b07, 0u, 8u)); \n let w3_1 = f32(extractBits(b07, 8u, 8u)); \n let w3_2 = f32(extractBits(b07, 16u, 8u));\n let w3_3 = f32(extractBits(b07, 24u, 8u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_vec = x[x_start + j];\n res[j] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(I)},nt={1:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_8_shader"},at={1:Ze(1),2:Ze(2),3:Ze(3),4:Ze(4),5:Ze(5),6:Ze(6),7:Ze(7),8:Ze(8)},it={1:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_8_shader"},ut="pv_picollm_weight_block_mixed_16x8_forward_shuffle_x_shader",ot="pv_picollm_weight_block_mixed_16x8_forward_shuffle_y_shader",st="pv_picollm_weight_block_mixed_16x8_add_bias_shader",ct="pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_shader",lt=function(e,t){var r=e.createBindGroupLayout({label:"weight preprocess blocks ".concat(t," bind group layout"),entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),n=e.createPipelineLayout({label:"weight preprocess blocks ".concat(t," pipeline layout"),bindGroupLayouts:[r]}),a=e.createShaderModule({label:"weight preprocess blocks ".concat(t," shader module"),code:De[t]});return{computePipeline:e.createComputePipeline({label:"weight preprocess blocks ".concat(t," pipeline"),layout:n,compute:{module:a,entryPoint:q,constants:{workgroup_size_x:16,workgroup_size_y:16}}})}},_t=function(e,t){var r=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}],n=e.createBindGroupLayout({label:"weight forward single ".concat(t," bind group layout"),entries:r}),a=e.createPipelineLayout({label:"weight forward single ".concat(t," pipeline layout"),bindGroupLayouts:[n]}),i=e.createShaderModule({label:"weight forward single ".concat(t," shader module"),code:rt[t]});return{computePipeline:e.createComputePipeline({label:"weight forward single ".concat(t," pipeline"),layout:a,compute:{module:i,entryPoint:q,constants:{workgroup_size_x:256,workgroup_size_y:1}}})}},dt=function(e,t){var r=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}],n=e.createBindGroupLayout({label:"weight forward multi ".concat(t," bind group layout"),entries:r}),a=e.createPipelineLayout({label:"weight forward multi ".concat(t," pipeline layout"),bindGroupLayouts:[n]}),i=e.createShaderModule({label:"weight forward multi ".concat(t," shader module"),code:at[t]});return{computePipeline:e.createComputePipeline({label:"weight forward multi ".concat(t," pipeline"),layout:a,compute:{module:i,entryPoint:q,constants:{workgroup_size_x:128}}})}},ft=(a(a(a(a(a(a(a(a(a(a(A={},Qe[3],(function(e){return lt(e,3)})),Qe[5],(function(e){return lt(e,5)})),Qe[6],(function(e){return lt(e,6)})),Qe[7],(function(e){return lt(e,7)})),ut,(function(e){var t=e.createBindGroupLayout({label:"weight shuffle x bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight shuffle x pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight shuffle x shader module",code:Je});return{computePipeline:e.createComputePipeline({label:"weight shuffle x pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_y:N}}})}})),ot,(function(e){var t=e.createBindGroupLayout({label:"weight shuffle y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight shuffle y pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight shuffle y shader module",code:et});return{computePipeline:e.createComputePipeline({label:"weight shuffle y pipeline",layout:r,compute:{module:n,entryPoint:q}})}})),ct,(function(e){var t=e.createBindGroupLayout({label:"weight single reduce y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight single reduce y pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight single reduce y shader module",code:$e});return{computePipeline:e.createComputePipeline({label:"weight single reduce y pipeline",layout:r,compute:{module:n,entryPoint:q,constants:{workgroup_size_x:N}}})}})),nt[1],(function(e){return _t(e,1)})),nt[2],(function(e){return _t(e,2)})),nt[3],(function(e){return _t(e,3)})),a(a(a(a(a(a(a(a(a(a(A,nt[4],(function(e){return _t(e,4)})),nt[5],(function(e){return _t(e,5)})),nt[6],(function(e){return _t(e,6)})),nt[7],(function(e){return _t(e,7)})),nt[8],(function(e){return _t(e,8)})),it[1],(function(e){return dt(e,1)})),it[2],(function(e){return dt(e,2)})),it[3],(function(e){return dt(e,3)})),it[4],(function(e){return dt(e,4)})),it[5],(function(e){return dt(e,5)})),a(a(a(a(A,it[6],(function(e){return dt(e,6)})),it[7],(function(e){return dt(e,7)})),it[8],(function(e){return dt(e,8)})),st,(function(e){var t=e.createBindGroupLayout({label:"weight add bias bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]}),r=e.createPipelineLayout({label:"weight add bias pipeline layout",bindGroupLayouts:[t]}),n=e.createShaderModule({label:"weight add bias shader module",code:tt});return{computePipeline:e.createComputePipeline({label:"weight add bias pipeline",layout:r,compute:{module:n,entryPoint:q}})}})));function bt(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function pt(e){for(var t=1;t { - var _scriptName = typeof document != 'undefined' ? document.currentScript?.src : undefined; - - return ( -function(moduleArg = {}) { - var moduleRtn; - -function GROWABLE_HEAP_I8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP8}function GROWABLE_HEAP_U8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU8}function GROWABLE_HEAP_I16(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP16}function GROWABLE_HEAP_I32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP32}function GROWABLE_HEAP_U32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU32}function GROWABLE_HEAP_F32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF32}function GROWABLE_HEAP_F64(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF64}var Module=moduleArg;var readyPromiseResolve,readyPromiseReject;var readyPromise=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject});var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var ENVIRONMENT_IS_PTHREAD=ENVIRONMENT_IS_WORKER&&self.name=="em-pthread";function _typeof$2(o){"@babel/helpers - typeof";return _typeof$2="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(o){return typeof o}:function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o},_typeof$2(o)}function toPrimitive(t,r){if("object"!=_typeof$2(t)||!t)return t;var e=t[Symbol.toPrimitive];if(void 0!==e){var i=e.call(t,r||"default");if("object"!=_typeof$2(i))return i;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===r?String:Number)(t)}function toPropertyKey(t){var i=toPrimitive(t,"string");return"symbol"==_typeof$2(i)?i:i+""}function _defineProperty(e,r,t){return(r=toPropertyKey(r))in e?Object.defineProperty(e,r,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[r]=t,e}function asyncGeneratorStep(n,t,e,r,o,a,c){try{var i=n[a](c),u=i.value}catch(n){return void e(n)}i.done?t(u):Promise.resolve(u).then(r,o)}function _asyncToGenerator(n){return function(){var t=this,e=arguments;return new Promise(function(r,o){var a=n.apply(t,e);function _next(n){asyncGeneratorStep(a,r,o,_next,_throw,"next",n)}function _throw(n){asyncGeneratorStep(a,r,o,_next,_throw,"throw",n)}_next(void 0)})}}function getDefaultExportFromCjs(x){return x&&x.__esModule&&Object.prototype.hasOwnProperty.call(x,"default")?x["default"]:x}var regeneratorRuntime$3={exports:{}};var _typeof$1={exports:{}};var _typeof_1=_typeof$1.exports;(function(module){function _typeof(o){"@babel/helpers - typeof";return module.exports=_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(o){return typeof o}:function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o},module.exports.__esModule=true,module.exports["default"]=module.exports,_typeof(o)}module.exports=_typeof,module.exports.__esModule=true,module.exports["default"]=module.exports})(_typeof$1);var _typeofExports=_typeof$1.exports;var _typeof=getDefaultExportFromCjs(_typeofExports);var regeneratorRuntime$1=regeneratorRuntime$3.exports;(function(module){var _typeof=_typeofExports["default"];function _regeneratorRuntime(){"use strict";module.exports=_regeneratorRuntime=function _regeneratorRuntime(){return e},module.exports.__esModule=true,module.exports["default"]=module.exports;var t,e={},r=Object.prototype,n=r.hasOwnProperty,o=Object.defineProperty||function(t,e,r){t[e]=r.value},i="function"==typeof Symbol?Symbol:{},a=i.iterator||"@@iterator",c=i.asyncIterator||"@@asyncIterator",u=i.toStringTag||"@@toStringTag";function define(t,e,r){return Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}),t[e]}try{define({},"")}catch(t){define=function define(t,e,r){return t[e]=r}}function wrap(t,e,r,n){var i=e&&e.prototype instanceof Generator?e:Generator,a=Object.create(i.prototype),c=new Context(n||[]);return o(a,"_invoke",{value:makeInvokeMethod(t,r,c)}),a}function tryCatch(t,e,r){try{return{type:"normal",arg:t.call(e,r)}}catch(t){return{type:"throw",arg:t}}}e.wrap=wrap;var h="suspendedStart",l="suspendedYield",f="executing",s="completed",y={};function Generator(){}function GeneratorFunction(){}function GeneratorFunctionPrototype(){}var p={};define(p,a,function(){return this});var d=Object.getPrototypeOf,v=d&&d(d(values([])));v&&v!==r&&n.call(v,a)&&(p=v);var g=GeneratorFunctionPrototype.prototype=Generator.prototype=Object.create(p);function defineIteratorMethods(t){["next","throw","return"].forEach(function(e){define(t,e,function(t){return this._invoke(e,t)})})}function AsyncIterator(t,e){function invoke(r,o,i,a){var c=tryCatch(t[r],t,o);if("throw"!==c.type){var u=c.arg,h=u.value;return h&&"object"==_typeof(h)&&n.call(h,"__await")?e.resolve(h.__await).then(function(t){invoke("next",t,i,a)},function(t){invoke("throw",t,i,a)}):e.resolve(h).then(function(t){u.value=t,i(u)},function(t){return invoke("throw",t,i,a)})}a(c.arg)}var r;o(this,"_invoke",{value:function value(t,n){function callInvokeWithMethodAndArg(){return new e(function(e,r){invoke(t,n,e,r)})}return r=r?r.then(callInvokeWithMethodAndArg,callInvokeWithMethodAndArg):callInvokeWithMethodAndArg()}})}function makeInvokeMethod(e,r,n){var o=h;return function(i,a){if(o===f)throw Error("Generator is already running");if(o===s){if("throw"===i)throw a;return{value:t,done:!0}}for(n.method=i,n.arg=a;;){var c=n.delegate;if(c){var u=maybeInvokeDelegate(c,n);if(u){if(u===y)continue;return u}}if("next"===n.method)n.sent=n._sent=n.arg;else if("throw"===n.method){if(o===h)throw o=s,n.arg;n.dispatchException(n.arg)}else"return"===n.method&&n.abrupt("return",n.arg);o=f;var p=tryCatch(e,r,n);if("normal"===p.type){if(o=n.done?s:l,p.arg===y)continue;return{value:p.arg,done:n.done}}"throw"===p.type&&(o=s,n.method="throw",n.arg=p.arg)}}}function maybeInvokeDelegate(e,r){var n=r.method,o=e.iterator[n];if(o===t)return r.delegate=null,"throw"===n&&e.iterator["return"]&&(r.method="return",r.arg=t,maybeInvokeDelegate(e,r),"throw"===r.method)||"return"!==n&&(r.method="throw",r.arg=new TypeError("The iterator does not provide a '"+n+"' method")),y;var i=tryCatch(o,e.iterator,r.arg);if("throw"===i.type)return r.method="throw",r.arg=i.arg,r.delegate=null,y;var a=i.arg;return a?a.done?(r[e.resultName]=a.value,r.next=e.nextLoc,"return"!==r.method&&(r.method="next",r.arg=t),r.delegate=null,y):a:(r.method="throw",r.arg=new TypeError("iterator result is not an object"),r.delegate=null,y)}function pushTryEntry(t){var e={tryLoc:t[0]};1 in t&&(e.catchLoc=t[1]),2 in t&&(e.finallyLoc=t[2],e.afterLoc=t[3]),this.tryEntries.push(e)}function resetTryEntry(t){var e=t.completion||{};e.type="normal",delete e.arg,t.completion=e}function Context(t){this.tryEntries=[{tryLoc:"root"}],t.forEach(pushTryEntry,this),this.reset(!0)}function values(e){if(e||""===e){var r=e[a];if(r)return r.call(e);if("function"==typeof e.next)return e;if(!isNaN(e.length)){var o=-1,i=function next(){for(;++o=0;--o){var i=this.tryEntries[o],a=i.completion;if("root"===i.tryLoc)return handle("end");if(i.tryLoc<=this.prev){var c=n.call(i,"catchLoc"),u=n.call(i,"finallyLoc");if(c&&u){if(this.prev=0;--r){var o=this.tryEntries[r];if(o.tryLoc<=this.prev&&n.call(o,"finallyLoc")&&this.prev=0;--e){var r=this.tryEntries[e];if(r.finallyLoc===t)return this.complete(r.completion,r.afterLoc),resetTryEntry(r),y}},catch:function _catch(t){for(var e=this.tryEntries.length-1;e>=0;--e){var r=this.tryEntries[e];if(r.tryLoc===t){var n=r.completion;if("throw"===n.type){var o=n.arg;resetTryEntry(r)}return o}}throw Error("illegal catch attempt")},delegateYield:function delegateYield(e,r,n){return this.delegate={iterator:values(e),resultName:r,nextLoc:n},"next"===this.method&&(this.arg=t),y}},e}module.exports=_regeneratorRuntime,module.exports.__esModule=true,module.exports["default"]=module.exports})(regeneratorRuntime$3);var regeneratorRuntimeExports=regeneratorRuntime$3.exports;var regeneratorRuntime$2=getDefaultExportFromCjs(regeneratorRuntimeExports);var runtime=regeneratorRuntimeExports();var regenerator=runtime;try{regeneratorRuntime=runtime}catch(accidentalStrictMode){if(typeof globalThis==="object"){globalThis.regeneratorRuntime=runtime}else{Function("r","regeneratorRuntime = r")(runtime)}}var _regeneratorRuntime=getDefaultExportFromCjs(regenerator);const BROWSER_ALIASES_MAP={"Amazon Silk":"amazon_silk","Android Browser":"android",Bada:"bada",BlackBerry:"blackberry",Chrome:"chrome",Chromium:"chromium",Electron:"electron",Epiphany:"epiphany",Firefox:"firefox",Focus:"focus",Generic:"generic","Google Search":"google_search",Googlebot:"googlebot","Internet Explorer":"ie","K-Meleon":"k_meleon",Maxthon:"maxthon","Microsoft Edge":"edge","MZ Browser":"mz","NAVER Whale Browser":"naver",Opera:"opera","Opera Coast":"opera_coast",PhantomJS:"phantomjs",Puffin:"puffin",QupZilla:"qupzilla",QQ:"qq",QQLite:"qqlite",Safari:"safari",Sailfish:"sailfish","Samsung Internet for Android":"samsung_internet",SeaMonkey:"seamonkey",Sleipnir:"sleipnir",Swing:"swing",Tizen:"tizen","UC Browser":"uc",Vivaldi:"vivaldi","WebOS Browser":"webos",WeChat:"wechat","Yandex Browser":"yandex",Roku:"roku"};const BROWSER_MAP={amazon_silk:"Amazon Silk",android:"Android Browser",bada:"Bada",blackberry:"BlackBerry",chrome:"Chrome",chromium:"Chromium",electron:"Electron",epiphany:"Epiphany",firefox:"Firefox",focus:"Focus",generic:"Generic",googlebot:"Googlebot",google_search:"Google Search",ie:"Internet Explorer",k_meleon:"K-Meleon",maxthon:"Maxthon",edge:"Microsoft Edge",mz:"MZ Browser",naver:"NAVER Whale Browser",opera:"Opera",opera_coast:"Opera Coast",phantomjs:"PhantomJS",puffin:"Puffin",qupzilla:"QupZilla",qq:"QQ Browser",qqlite:"QQ Browser Lite",safari:"Safari",sailfish:"Sailfish",samsung_internet:"Samsung Internet for Android",seamonkey:"SeaMonkey",sleipnir:"Sleipnir",swing:"Swing",tizen:"Tizen",uc:"UC Browser",vivaldi:"Vivaldi",webos:"WebOS Browser",wechat:"WeChat",yandex:"Yandex Browser"};const PLATFORMS_MAP={tablet:"tablet",mobile:"mobile",desktop:"desktop",tv:"tv"};const OS_MAP={WindowsPhone:"Windows Phone",Windows:"Windows",MacOS:"macOS",iOS:"iOS",Android:"Android",WebOS:"WebOS",BlackBerry:"BlackBerry",Bada:"Bada",Tizen:"Tizen",Linux:"Linux",ChromeOS:"Chrome OS",PlayStation4:"PlayStation 4",Roku:"Roku"};const ENGINE_MAP={EdgeHTML:"EdgeHTML",Blink:"Blink",Trident:"Trident",Presto:"Presto",Gecko:"Gecko",WebKit:"WebKit"};class Utils{static getFirstMatch(regexp,ua){const match=ua.match(regexp);return match&&match.length>0&&match[1]||""}static getSecondMatch(regexp,ua){const match=ua.match(regexp);return match&&match.length>1&&match[2]||""}static matchAndReturnConst(regexp,ua,_const){if(regexp.test(ua)){return _const}return void 0}static getWindowsVersionName(version){switch(version){case"NT":return"NT";case"XP":return"XP";case"NT 5.0":return"2000";case"NT 5.1":return"XP";case"NT 5.2":return"2003";case"NT 6.0":return"Vista";case"NT 6.1":return"7";case"NT 6.2":return"8";case"NT 6.3":return"8.1";case"NT 10.0":return"10";default:return undefined}}static getMacOSVersionName(version){const v=version.split(".").splice(0,2).map(s=>parseInt(s,10)||0);v.push(0);if(v[0]!==10)return undefined;switch(v[1]){case 5:return"Leopard";case 6:return"Snow Leopard";case 7:return"Lion";case 8:return"Mountain Lion";case 9:return"Mavericks";case 10:return"Yosemite";case 11:return"El Capitan";case 12:return"Sierra";case 13:return"High Sierra";case 14:return"Mojave";case 15:return"Catalina";default:return undefined}}static getAndroidVersionName(version){const v=version.split(".").splice(0,2).map(s=>parseInt(s,10)||0);v.push(0);if(v[0]===1&&v[1]<5)return undefined;if(v[0]===1&&v[1]<6)return"Cupcake";if(v[0]===1&&v[1]>=6)return"Donut";if(v[0]===2&&v[1]<2)return"Eclair";if(v[0]===2&&v[1]===2)return"Froyo";if(v[0]===2&&v[1]>2)return"Gingerbread";if(v[0]===3)return"Honeycomb";if(v[0]===4&&v[1]<1)return"Ice Cream Sandwich";if(v[0]===4&&v[1]<4)return"Jelly Bean";if(v[0]===4&&v[1]>=4)return"KitKat";if(v[0]===5)return"Lollipop";if(v[0]===6)return"Marshmallow";if(v[0]===7)return"Nougat";if(v[0]===8)return"Oreo";if(v[0]===9)return"Pie";return undefined}static getVersionPrecision(version){return version.split(".").length}static compareVersions(versionA,versionB,isLoose=false){const versionAPrecision=Utils.getVersionPrecision(versionA);const versionBPrecision=Utils.getVersionPrecision(versionB);let precision=Math.max(versionAPrecision,versionBPrecision);let lastPrecision=0;const chunks=Utils.map([versionA,versionB],version=>{const delta=precision-Utils.getVersionPrecision(version);const _version=version+new Array(delta+1).join(".0");return Utils.map(_version.split("."),chunk=>new Array(20-chunk.length).join("0")+chunk).reverse()});if(isLoose){lastPrecision=precision-Math.min(versionAPrecision,versionBPrecision)}precision-=1;while(precision>=lastPrecision){if(chunks[0][precision]>chunks[1][precision]){return 1}if(chunks[0][precision]===chunks[1][precision]){if(precision===lastPrecision){return 0}precision-=1}else if(chunks[0][precision]{result[key]=assigner[key]})}}return obj}static getBrowserAlias(browserName){return BROWSER_ALIASES_MAP[browserName]}static getBrowserTypeByAlias(browserAlias){return BROWSER_MAP[browserAlias]||""}}const commonVersionIdentifier=/version\/(\d+(\.?_?\d+)+)/i;const browsersList=[{test:[/googlebot/i],describe(ua){const browser={name:"Googlebot"};const version=Utils.getFirstMatch(/googlebot\/(\d+(\.\d+))/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/opera/i],describe(ua){const browser={name:"Opera"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:opera)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/opr\/|opios/i],describe(ua){const browser={name:"Opera"};const version=Utils.getFirstMatch(/(?:opr|opios)[\s/](\S+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/SamsungBrowser/i],describe(ua){const browser={name:"Samsung Internet for Android"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:SamsungBrowser)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/Whale/i],describe(ua){const browser={name:"NAVER Whale Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:whale)[\s/](\d+(?:\.\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/MZBrowser/i],describe(ua){const browser={name:"MZ Browser"};const version=Utils.getFirstMatch(/(?:MZBrowser)[\s/](\d+(?:\.\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/focus/i],describe(ua){const browser={name:"Focus"};const version=Utils.getFirstMatch(/(?:focus)[\s/](\d+(?:\.\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/swing/i],describe(ua){const browser={name:"Swing"};const version=Utils.getFirstMatch(/(?:swing)[\s/](\d+(?:\.\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/coast/i],describe(ua){const browser={name:"Opera Coast"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:coast)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/opt\/\d+(?:.?_?\d+)+/i],describe(ua){const browser={name:"Opera Touch"};const version=Utils.getFirstMatch(/(?:opt)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/yabrowser/i],describe(ua){const browser={name:"Yandex Browser"};const version=Utils.getFirstMatch(/(?:yabrowser)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/ucbrowser/i],describe(ua){const browser={name:"UC Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:ucbrowser)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/Maxthon|mxios/i],describe(ua){const browser={name:"Maxthon"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:Maxthon|mxios)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/epiphany/i],describe(ua){const browser={name:"Epiphany"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:epiphany)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/puffin/i],describe(ua){const browser={name:"Puffin"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:puffin)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/sleipnir/i],describe(ua){const browser={name:"Sleipnir"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:sleipnir)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/k-meleon/i],describe(ua){const browser={name:"K-Meleon"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:k-meleon)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/micromessenger/i],describe(ua){const browser={name:"WeChat"};const version=Utils.getFirstMatch(/(?:micromessenger)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/qqbrowser/i],describe(ua){const browser={name:/qqbrowserlite/i.test(ua)?"QQ Browser Lite":"QQ Browser"};const version=Utils.getFirstMatch(/(?:qqbrowserlite|qqbrowser)[/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/msie|trident/i],describe(ua){const browser={name:"Internet Explorer"};const version=Utils.getFirstMatch(/(?:msie |rv:)(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/\sedg\//i],describe(ua){const browser={name:"Microsoft Edge"};const version=Utils.getFirstMatch(/\sedg\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/edg([ea]|ios)/i],describe(ua){const browser={name:"Microsoft Edge"};const version=Utils.getSecondMatch(/edg([ea]|ios)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/vivaldi/i],describe(ua){const browser={name:"Vivaldi"};const version=Utils.getFirstMatch(/vivaldi\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/seamonkey/i],describe(ua){const browser={name:"SeaMonkey"};const version=Utils.getFirstMatch(/seamonkey\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/sailfish/i],describe(ua){const browser={name:"Sailfish"};const version=Utils.getFirstMatch(/sailfish\s?browser\/(\d+(\.\d+)?)/i,ua);if(version){browser.version=version}return browser}},{test:[/silk/i],describe(ua){const browser={name:"Amazon Silk"};const version=Utils.getFirstMatch(/silk\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/phantom/i],describe(ua){const browser={name:"PhantomJS"};const version=Utils.getFirstMatch(/phantomjs\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/slimerjs/i],describe(ua){const browser={name:"SlimerJS"};const version=Utils.getFirstMatch(/slimerjs\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(ua){const browser={name:"BlackBerry"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/blackberry[\d]+\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/(web|hpw)[o0]s/i],describe(ua){const browser={name:"WebOS Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/w(?:eb)?[o0]sbrowser\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/bada/i],describe(ua){const browser={name:"Bada"};const version=Utils.getFirstMatch(/dolfin\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/tizen/i],describe(ua){const browser={name:"Tizen"};const version=Utils.getFirstMatch(/(?:tizen\s?)?browser\/(\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/qupzilla/i],describe(ua){const browser={name:"QupZilla"};const version=Utils.getFirstMatch(/(?:qupzilla)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/firefox|iceweasel|fxios/i],describe(ua){const browser={name:"Firefox"};const version=Utils.getFirstMatch(/(?:firefox|iceweasel|fxios)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/electron/i],describe(ua){const browser={name:"Electron"};const version=Utils.getFirstMatch(/(?:electron)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/MiuiBrowser/i],describe(ua){const browser={name:"Miui"};const version=Utils.getFirstMatch(/(?:MiuiBrowser)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/chromium/i],describe(ua){const browser={name:"Chromium"};const version=Utils.getFirstMatch(/(?:chromium)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/chrome|crios|crmo/i],describe(ua){const browser={name:"Chrome"};const version=Utils.getFirstMatch(/(?:chrome|crios|crmo)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/GSA/i],describe(ua){const browser={name:"Google Search"};const version=Utils.getFirstMatch(/(?:GSA)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test(parser){const notLikeAndroid=!parser.test(/like android/i);const butAndroid=parser.test(/android/i);return notLikeAndroid&&butAndroid},describe(ua){const browser={name:"Android Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/playstation 4/i],describe(ua){const browser={name:"PlayStation 4"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/safari|applewebkit/i],describe(ua){const browser={name:"Safari"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/.*/i],describe(ua){const regexpWithoutDeviceSpec=/^(.*)\/(.*) /;const regexpWithDeviceSpec=/^(.*)\/(.*)[ \t]\((.*)/;const hasDeviceSpec=ua.search("\\(")!==-1;const regexp=hasDeviceSpec?regexpWithDeviceSpec:regexpWithoutDeviceSpec;return{name:Utils.getFirstMatch(regexp,ua),version:Utils.getSecondMatch(regexp,ua)}}}];var osParsersList=[{test:[/Roku\/DVP/],describe(ua){const version=Utils.getFirstMatch(/Roku\/DVP-(\d+\.\d+)/i,ua);return{name:OS_MAP.Roku,version:version}}},{test:[/windows phone/i],describe(ua){const version=Utils.getFirstMatch(/windows phone (?:os)?\s?(\d+(\.\d+)*)/i,ua);return{name:OS_MAP.WindowsPhone,version:version}}},{test:[/windows /i],describe(ua){const version=Utils.getFirstMatch(/Windows ((NT|XP)( \d\d?.\d)?)/i,ua);const versionName=Utils.getWindowsVersionName(version);return{name:OS_MAP.Windows,version:version,versionName:versionName}}},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe(ua){const result={name:OS_MAP.iOS};const version=Utils.getSecondMatch(/(Version\/)(\d[\d.]+)/,ua);if(version){result.version=version}return result}},{test:[/macintosh/i],describe(ua){const version=Utils.getFirstMatch(/mac os x (\d+(\.?_?\d+)+)/i,ua).replace(/[_\s]/g,".");const versionName=Utils.getMacOSVersionName(version);const os={name:OS_MAP.MacOS,version:version};if(versionName){os.versionName=versionName}return os}},{test:[/(ipod|iphone|ipad)/i],describe(ua){const version=Utils.getFirstMatch(/os (\d+([_\s]\d+)*) like mac os x/i,ua).replace(/[_\s]/g,".");return{name:OS_MAP.iOS,version:version}}},{test(parser){const notLikeAndroid=!parser.test(/like android/i);const butAndroid=parser.test(/android/i);return notLikeAndroid&&butAndroid},describe(ua){const version=Utils.getFirstMatch(/android[\s/-](\d+(\.\d+)*)/i,ua);const versionName=Utils.getAndroidVersionName(version);const os={name:OS_MAP.Android,version:version};if(versionName){os.versionName=versionName}return os}},{test:[/(web|hpw)[o0]s/i],describe(ua){const version=Utils.getFirstMatch(/(?:web|hpw)[o0]s\/(\d+(\.\d+)*)/i,ua);const os={name:OS_MAP.WebOS};if(version&&version.length){os.version=version}return os}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(ua){const version=Utils.getFirstMatch(/rim\stablet\sos\s(\d+(\.\d+)*)/i,ua)||Utils.getFirstMatch(/blackberry\d+\/(\d+([_\s]\d+)*)/i,ua)||Utils.getFirstMatch(/\bbb(\d+)/i,ua);return{name:OS_MAP.BlackBerry,version:version}}},{test:[/bada/i],describe(ua){const version=Utils.getFirstMatch(/bada\/(\d+(\.\d+)*)/i,ua);return{name:OS_MAP.Bada,version:version}}},{test:[/tizen/i],describe(ua){const version=Utils.getFirstMatch(/tizen[/\s](\d+(\.\d+)*)/i,ua);return{name:OS_MAP.Tizen,version:version}}},{test:[/linux/i],describe(){return{name:OS_MAP.Linux}}},{test:[/CrOS/],describe(){return{name:OS_MAP.ChromeOS}}},{test:[/PlayStation 4/],describe(ua){const version=Utils.getFirstMatch(/PlayStation 4[/\s](\d+(\.\d+)*)/i,ua);return{name:OS_MAP.PlayStation4,version:version}}}];var platformParsersList=[{test:[/googlebot/i],describe(){return{type:"bot",vendor:"Google"}}},{test:[/huawei/i],describe(ua){const model=Utils.getFirstMatch(/(can-l01)/i,ua)&&"Nova";const platform={type:PLATFORMS_MAP.mobile,vendor:"Huawei"};if(model){platform.model=model}return platform}},{test:[/nexus\s*(?:7|8|9|10).*/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Nexus"}}},{test:[/ipad/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Apple",model:"iPad"}}},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Apple",model:"iPad"}}},{test:[/kftt build/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Amazon",model:"Kindle Fire HD 7"}}},{test:[/silk/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Amazon"}}},{test:[/tablet(?! pc)/i],describe(){return{type:PLATFORMS_MAP.tablet}}},{test(parser){const iDevice=parser.test(/ipod|iphone/i);const likeIDevice=parser.test(/like (ipod|iphone)/i);return iDevice&&!likeIDevice},describe(ua){const model=Utils.getFirstMatch(/(ipod|iphone)/i,ua);return{type:PLATFORMS_MAP.mobile,vendor:"Apple",model:model}}},{test:[/nexus\s*[0-6].*/i,/galaxy nexus/i],describe(){return{type:PLATFORMS_MAP.mobile,vendor:"Nexus"}}},{test:[/[^-]mobi/i],describe(){return{type:PLATFORMS_MAP.mobile}}},{test(parser){return parser.getBrowserName(true)==="blackberry"},describe(){return{type:PLATFORMS_MAP.mobile,vendor:"BlackBerry"}}},{test(parser){return parser.getBrowserName(true)==="bada"},describe(){return{type:PLATFORMS_MAP.mobile}}},{test(parser){return parser.getBrowserName()==="windows phone"},describe(){return{type:PLATFORMS_MAP.mobile,vendor:"Microsoft"}}},{test(parser){const osMajorVersion=Number(String(parser.getOSVersion()).split(".")[0]);return parser.getOSName(true)==="android"&&osMajorVersion>=3},describe(){return{type:PLATFORMS_MAP.tablet}}},{test(parser){return parser.getOSName(true)==="android"},describe(){return{type:PLATFORMS_MAP.mobile}}},{test(parser){return parser.getOSName(true)==="macos"},describe(){return{type:PLATFORMS_MAP.desktop,vendor:"Apple"}}},{test(parser){return parser.getOSName(true)==="windows"},describe(){return{type:PLATFORMS_MAP.desktop}}},{test(parser){return parser.getOSName(true)==="linux"},describe(){return{type:PLATFORMS_MAP.desktop}}},{test(parser){return parser.getOSName(true)==="playstation 4"},describe(){return{type:PLATFORMS_MAP.tv}}},{test(parser){return parser.getOSName(true)==="roku"},describe(){return{type:PLATFORMS_MAP.tv}}}];var enginesParsersList=[{test(parser){return parser.getBrowserName(true)==="microsoft edge"},describe(ua){const isBlinkBased=/\sedg\//i.test(ua);if(isBlinkBased){return{name:ENGINE_MAP.Blink}}const version=Utils.getFirstMatch(/edge\/(\d+(\.?_?\d+)+)/i,ua);return{name:ENGINE_MAP.EdgeHTML,version:version}}},{test:[/trident/i],describe(ua){const engine={name:ENGINE_MAP.Trident};const version=Utils.getFirstMatch(/trident\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}},{test(parser){return parser.test(/presto/i)},describe(ua){const engine={name:ENGINE_MAP.Presto};const version=Utils.getFirstMatch(/presto\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}},{test(parser){const isGecko=parser.test(/gecko/i);const likeGecko=parser.test(/like gecko/i);return isGecko&&!likeGecko},describe(ua){const engine={name:ENGINE_MAP.Gecko};const version=Utils.getFirstMatch(/gecko\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}},{test:[/(apple)?webkit\/537\.36/i],describe(){return{name:ENGINE_MAP.Blink}}},{test:[/(apple)?webkit/i],describe(ua){const engine={name:ENGINE_MAP.WebKit};const version=Utils.getFirstMatch(/webkit\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}}];class Parser{constructor(UA,skipParsing=false){if(UA===void 0||UA===null||UA===""){throw new Error("UserAgent parameter can't be empty")}this._ua=UA;this.parsedResult={};if(skipParsing!==true){this.parse()}}getUA(){return this._ua}test(regex){return regex.test(this._ua)}parseBrowser(){this.parsedResult.browser={};const browserDescriptor=Utils.find(browsersList,_browser=>{if(typeof _browser.test==="function"){return _browser.test(this)}if(_browser.test instanceof Array){return _browser.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(browserDescriptor){this.parsedResult.browser=browserDescriptor.describe(this.getUA())}return this.parsedResult.browser}getBrowser(){if(this.parsedResult.browser){return this.parsedResult.browser}return this.parseBrowser()}getBrowserName(toLowerCase){if(toLowerCase){return String(this.getBrowser().name).toLowerCase()||""}return this.getBrowser().name||""}getBrowserVersion(){return this.getBrowser().version}getOS(){if(this.parsedResult.os){return this.parsedResult.os}return this.parseOS()}parseOS(){this.parsedResult.os={};const os=Utils.find(osParsersList,_os=>{if(typeof _os.test==="function"){return _os.test(this)}if(_os.test instanceof Array){return _os.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(os){this.parsedResult.os=os.describe(this.getUA())}return this.parsedResult.os}getOSName(toLowerCase){const{name:name}=this.getOS();if(toLowerCase){return String(name).toLowerCase()||""}return name||""}getOSVersion(){return this.getOS().version}getPlatform(){if(this.parsedResult.platform){return this.parsedResult.platform}return this.parsePlatform()}getPlatformType(toLowerCase=false){const{type:type}=this.getPlatform();if(toLowerCase){return String(type).toLowerCase()||""}return type||""}parsePlatform(){this.parsedResult.platform={};const platform=Utils.find(platformParsersList,_platform=>{if(typeof _platform.test==="function"){return _platform.test(this)}if(_platform.test instanceof Array){return _platform.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(platform){this.parsedResult.platform=platform.describe(this.getUA())}return this.parsedResult.platform}getEngine(){if(this.parsedResult.engine){return this.parsedResult.engine}return this.parseEngine()}getEngineName(toLowerCase){if(toLowerCase){return String(this.getEngine().name).toLowerCase()||""}return this.getEngine().name||""}parseEngine(){this.parsedResult.engine={};const engine=Utils.find(enginesParsersList,_engine=>{if(typeof _engine.test==="function"){return _engine.test(this)}if(_engine.test instanceof Array){return _engine.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(engine){this.parsedResult.engine=engine.describe(this.getUA())}return this.parsedResult.engine}parse(){this.parseBrowser();this.parseOS();this.parsePlatform();this.parseEngine();return this}getResult(){return Utils.assign({},this.parsedResult)}satisfies(checkTree){const platformsAndOSes={};let platformsAndOSCounter=0;const browsers={};let browsersCounter=0;const allDefinitions=Object.keys(checkTree);allDefinitions.forEach(key=>{const currentDefinition=checkTree[key];if(typeof currentDefinition==="string"){browsers[key]=currentDefinition;browsersCounter+=1}else if(typeof currentDefinition==="object"){platformsAndOSes[key]=currentDefinition;platformsAndOSCounter+=1}});if(platformsAndOSCounter>0){const platformsAndOSNames=Object.keys(platformsAndOSes);const OSMatchingDefinition=Utils.find(platformsAndOSNames,name=>this.isOS(name));if(OSMatchingDefinition){const osResult=this.satisfies(platformsAndOSes[OSMatchingDefinition]);if(osResult!==void 0){return osResult}}const platformMatchingDefinition=Utils.find(platformsAndOSNames,name=>this.isPlatform(name));if(platformMatchingDefinition){const platformResult=this.satisfies(platformsAndOSes[platformMatchingDefinition]);if(platformResult!==void 0){return platformResult}}}if(browsersCounter>0){const browserNames=Object.keys(browsers);const matchingDefinition=Utils.find(browserNames,name=>this.isBrowser(name,true));if(matchingDefinition!==void 0){return this.compareVersion(browsers[matchingDefinition])}}return undefined}isBrowser(browserName,includingAlias=false){const defaultBrowserName=this.getBrowserName().toLowerCase();let browserNameLower=browserName.toLowerCase();const alias=Utils.getBrowserTypeByAlias(browserNameLower);if(includingAlias&&alias){browserNameLower=alias.toLowerCase()}return browserNameLower===defaultBrowserName}compareVersion(version){let expectedResults=[0];let comparableVersion=version;let isLoose=false;const currentBrowserVersion=this.getBrowserVersion();if(typeof currentBrowserVersion!=="string"){return void 0}if(version[0]===">"||version[0]==="<"){comparableVersion=version.substr(1);if(version[1]==="="){isLoose=true;comparableVersion=version.substr(2)}else{expectedResults=[]}if(version[0]===">"){expectedResults.push(1)}else{expectedResults.push(-1)}}else if(version[0]==="="){comparableVersion=version.substr(1)}else if(version[0]==="~"){isLoose=true;comparableVersion=version.substr(1)}return expectedResults.indexOf(Utils.compareVersions(currentBrowserVersion,comparableVersion,isLoose))>-1}isOS(osName){return this.getOSName(true)===String(osName).toLowerCase()}isPlatform(platformType){return this.getPlatformType(true)===String(platformType).toLowerCase()}isEngine(engineName){return this.getEngineName(true)===String(engineName).toLowerCase()}is(anything,includingAlias=false){return this.isBrowser(anything,includingAlias)||this.isOS(anything)||this.isPlatform(anything)}some(anythings=[]){return anythings.some(anything=>this.is(anything))}}class Bowser{static getParser(UA,skipParsing=false){if(typeof UA!=="string"){throw new Error("UserAgent should be a string")}return new Parser(UA,skipParsing)}static parse(UA){return new Parser(UA).getResult()}static get BROWSER_MAP(){return BROWSER_MAP}static get ENGINE_MAP(){return ENGINE_MAP}static get OS_MAP(){return OS_MAP}static get PLATFORMS_MAP(){return PLATFORMS_MAP}}function _classCallCheck(a,n){if(!(a instanceof n))throw new TypeError("Cannot call a class as a function")}function _defineProperties(e,r){for(var t=0;t=this._data.length){return undefined}var res=this._data.slice(this._pos,this._pos+bytes);this._pos+=bytes;return res}},{key:"set",value:function set(pos,data){this._pos=pos;this._data=data}},{key:"clear",value:function clear(){this._pos=0;this._data=new Uint8Array}}])}();var PvFileIDB=function(_PvFile){function PvFileIDB(path,meta,db,mode){var _this;_classCallCheck(this,PvFileIDB);_this=_callSuper$1(this,PvFileIDB);_this._pageSize=512*1024;_this._pagePtr=0;_this._pageOffset=0;_this._path=path;_this._meta=meta;_this._db=db;_this._mode=mode;_this._cache=new PvCache;return _this}_inherits(PvFileIDB,_PvFile);return _createClass(PvFileIDB,[{key:"pageSize",get:function get(){return this._pageSize}},{key:"close",value:function(){var _close=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(){return _regeneratorRuntime.wrap(function _callee$(_context){while(1)switch(_context.prev=_context.next){case 0:this._db.close();case 1:case"end":return _context.stop()}},_callee,this)}));function close(){return _close.apply(this,arguments)}return close}()},{key:"read",value:function(){var _read=_asyncToGenerator(_regeneratorRuntime.mark(function _callee2(size,count){var _this2=this;return _regeneratorRuntime.wrap(function _callee2$(_context2){while(1)switch(_context2.prev=_context2.next){case 0:return _context2.abrupt("return",new Promise(function(resolve,reject){if(!_this2.exists()){reject(new Error("'".concat(_this2._path,"' doesn't exist.")));return}if(_this2._isEOF){var err=new Error("EOF");err.name="EndOfFile";reject(err);return}var copied=0;var maxToCopy=Math.min(size*count,_this2._meta.size);var totalElems=maxToCopy-maxToCopy%size;var buffer=new Uint8Array(totalElems);var res=_this2._cache.get(totalElems);if(res){copied+=res.length;_this2._pageOffset+=res.length;if(_this2._pageOffset===_this2._pageSize){_this2._pagePtr+=1;_this2._pageOffset=0}if(totalElems===copied){resolve(res);return}buffer.set(res)}var keyRange=IDBKeyRange.bound("".concat(_this2._path,"-").concat(PvFileIDB.createPage(_this2._pagePtr)),"".concat(_this2._path,"-").concat(PvFileIDB.createPage(_this2._pagePtr+Math.floor(totalElems/_this2._pageSize)+1)));var store=_this2._store;var req=store.openCursor(keyRange);req.onsuccess=function(){var cursor=req.result;if(!cursor||_this2._isEOF){return}var toCopy=Math.min(totalElems-copied,cursor.value.length-_this2._pageOffset);buffer.set(cursor.value.slice(_this2._pageOffset,_this2._pageOffset+toCopy),copied);copied+=toCopy;_this2._pageOffset+=toCopy;if(_this2._pageOffset===_this2._pageSize){_this2._pagePtr+=1;_this2._pageOffset=0}if(copied1&&_args4[1]!==undefined?_args4[1]:1;return _context4.abrupt("return",new Promise(function(){var _ref=_asyncToGenerator(_regeneratorRuntime.mark(function _callee3(resolve,reject){var _store$transaction3;var store,getCurrentPage,last,newContent,newSize,newMeta,pages,i,keyRange,_store$transaction4;return _regeneratorRuntime.wrap(function _callee3$(_context3){while(1)switch(_context3.prev=_context3.next){case 0:if(!(_this3._mode==="readonly")){_context3.next=3;break}reject(new Error("Instance is readonly mode only."));return _context3.abrupt("return");case 3:if(!(typeof version!=="number"&&version<=0)){_context3.next=6;break}reject(new Error("Version should be a positive number"));return _context3.abrupt("return");case 6:store=_this3._store;getCurrentPage=function getCurrentPage(){return new Promise(function(res){var req=store.get("".concat(_this3._path,"-").concat(PvFileIDB.createPage(_this3._pagePtr)));req.onsuccess=function(){if(req.result!==undefined){res(req.result.slice(0,_this3._pageOffset))}else{res(new Uint8Array(0))}}})};_context3.next=10;return getCurrentPage();case 10:last=_context3.sent;newContent=new Uint8Array(last.length+content.length);newContent.set(last);newContent.set(content,last.length);newSize=_this3._pagePtr*_this3._pageSize+newContent.length;newMeta={size:newSize,numPages:Math.ceil(newSize/_this3._pageSize),version:version,pageSize:_this3._pageSize};store.put(newMeta,_this3._path);pages=Math.ceil(newContent.length/_this3._pageSize);for(i=0;i=this._meta.numPages-1&&this._pageOffset>=this._meta.size%this._pageSize}},{key:"_store",get:function get(){return this._db.transaction(PV_FILE_STORE,this._mode).objectStore(PV_FILE_STORE)}}],[{key:"open",value:function open(path,mode){if(!self.indexedDB){var error=new Error("IndexedDB is not supported");error.name="IndexedDBNotSupported";throw error}return new Promise(function(){var _ref3=_asyncToGenerator(_regeneratorRuntime.mark(function _callee7(resolve,reject){var db,req,_error2;return _regeneratorRuntime.wrap(function _callee7$(_context7){while(1)switch(_context7.prev=_context7.next){case 0:_context7.prev=0;_context7.next=3;return getDB();case 3:db=_context7.sent;req=db.transaction(PV_FILE_STORE,"readwrite").objectStore(PV_FILE_STORE).get(path);req.onerror=function(){reject(req.error)};req.onsuccess=function(){var meta=req.result;var dbMode=mode.includes("r")?"readonly":"readwrite";if(meta===undefined&&dbMode==="readonly"){var _error=new Error("'".concat(path,"' doesn't exist."));_error.name="FileNotExists";reject(_error);return}var fileIDB=new PvFileIDB(path,meta,db,dbMode);if(mode.includes("a")){fileIDB.seek(0,2)}resolve(fileIDB)};_context7.next=12;break;case 9:_context7.prev=9;_context7.t0=_context7["catch"](0);if(_context7.t0.name==="InvalidStateError"){_error2=new Error("IndexedDB is not supported");_error2.name="IndexedDBNotSupported";reject(_error2)}else{reject(_context7.t0)}case 12:case"end":return _context7.stop()}},_callee7,null,[[0,9]])}));return function(_x8,_x9){return _ref3.apply(this,arguments)}}())}},{key:"createPage",value:function createPage(page){return("00000"+page).slice(-6)}}])}(PvFile);function _callSuper(t,o,e){return o=_getPrototypeOf(o),_possibleConstructorReturn(t,_isNativeReflectConstruct()?Reflect.construct(o,e||[],_getPrototypeOf(t).constructor):o.apply(t,e))}function _isNativeReflectConstruct(){try{var t=!Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){}))}catch(t){}return(_isNativeReflectConstruct=function _isNativeReflectConstruct(){return!!t})()}var PvFileMem=function(_PvFile){function PvFileMem(path,meta,db,mode){var _this;_classCallCheck(this,PvFileMem);_this=_callSuper(this,PvFileMem);_this._pos=0;_this._path=path;_this._meta=meta;_this._mode=mode;return _this}_inherits(PvFileMem,_PvFile);return _createClass(PvFileMem,[{key:"close",value:function close(){return}},{key:"read",value:function read(size,count){if(!this.exists()){throw new Error("'".concat(this._path,"' doesn't exist."))}if(this._isEOF){var err=new Error("EOF");err.name="EndOfFile";throw err}var toCopy=Math.min(size*count,this._file.length-this._pos);var totalElems=toCopy-toCopy%size;var buffer=new Uint8Array(totalElems);buffer.set(this._file.slice(this._pos,this._pos+totalElems),0);this._pos+=totalElems;return buffer}},{key:"write",value:function write(content){var version=arguments.length>1&&arguments[1]!==undefined?arguments[1]:1;var newFile=new Uint8Array(this._pos+content.length);if(this._file!==undefined){newFile.set(this._file.slice(0,this._pos));newFile.set(content,this._pos)}else{newFile.set(content)}this._file=newFile;this._pos+=content.length}},{key:"seek",value:function seek(offset,whence){if(!this.exists()&&this._mode==="readonly"){throw new Error("'".concat(this._path,"' doesn't exist."))}if(!this.exists()){throw new Error("'".concat(this._path,"' doesn't exist."))}if(offset<0){var err=new Error("EOF");err.name="EndOfFile";throw err}var newOffset;if(whence===0){newOffset=Math.min(offset,this._file.length)}else if(whence===1){newOffset=Math.min(this._pos+offset,this._file.length)}else if(whence===2){newOffset=Math.min(this._file.length+offset,this._file.length)}else{throw new Error("Invalid operation: ".concat(whence,"."))}this._pos=newOffset}},{key:"tell",value:function tell(){if(!this.exists()){return-1}return this._pos}},{key:"remove",value:function(){var _remove=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(){return _regeneratorRuntime.wrap(function _callee$(_context){while(1)switch(_context.prev=_context.next){case 0:if(this.exists()){_context.next=2;break}throw new Error("ENOENT");case 2:PvFileMem._memFiles["delete"](this._path);this._pos=0;case 4:case"end":return _context.stop()}},_callee,this)}));function remove(){return _remove.apply(this,arguments)}return remove}()},{key:"exists",value:function exists(){return this._file!==undefined}},{key:"_isEOF",get:function get(){return this._pos>=this._file.length}},{key:"_file",get:function get(){return PvFileMem._memFiles.get(this._path)},set:function set(content){PvFileMem._memFiles.set(this._path,content)}}],[{key:"open",value:function open(path,mode){var file=PvFileMem._memFiles.get(path);var dbMode=mode.includes("r")?"readonly":"readwrite";if(file===undefined&&dbMode==="readonly"){var error=new Error("'".concat(path,"' doesn't exist."));error.name="FileNotExists";throw error}var fileMem=new PvFileMem(path,undefined,undefined,dbMode);if(mode.includes("a")){fileMem.seek(0,2)}return fileMem}}])}(PvFile);PvFileMem._memFiles=new Map;function unsignedAddress(address){if(address<0){return address>>>0}return address}function _arrayWithHoles(r){if(Array.isArray(r))return r}function _iterableToArrayLimit(r,l){var t=null==r?null:"undefined"!=typeof Symbol&&r[Symbol.iterator]||r["@@iterator"];if(null!=t){var e,n,i,u,a=[],f=!0,o=!1;try{if(i=(t=t.call(r)).next,0===l){if(Object(t)!==t)return;f=!1}else for(;!(f=(e=i.call(t)).done)&&(a.push(e.value),a.length!==l);f=!0);}catch(r){o=!0,n=r}finally{try{if(!f&&null!=t["return"]&&(u=t["return"](),Object(u)!==u))return}finally{if(o)throw n}}return a}}function _arrayLikeToArray$2(r,a){(null==a||a>r.length)&&(a=r.length);for(var e=0,n=Array(a);e=r.length?{done:!0}:{done:!1,value:r[_n++]}},e:function e(r){throw r},f:F}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var o,a=!0,u=!1;return{s:function s(){t=t.call(r)},n:function n(){var r=t.next();return a=r.done,r},e:function e(r){u=!0,o=r},f:function f(){try{a||null==t["return"]||t["return"]()}finally{if(u)throw o}}}}function _unsupportedIterableToArray$1(r,a){if(r){if("string"==typeof r)return _arrayLikeToArray$1(r,a);var t={}.toString.call(r).slice(8,-1);return"Object"===t&&r.constructor&&(t=r.constructor.name),"Map"===t||"Set"===t?Array.from(r):"Arguments"===t||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t)?_arrayLikeToArray$1(r,a):void 0}}function _arrayLikeToArray$1(r,a){(null==a||a>r.length)&&(a=r.length);for(var e=0,n=Array(a);e=16){this.endComputePass();this.flushCommandEncoder()}}},{key:"endComputePass",value:function endComputePass(){if(this._passEncoder){this._passEncoder.end();this._passEncoder=null}}},{key:"getBuffer",value:function getBuffer(sizeBytes,usage){var mappedAtCreation=arguments.length>2&&arguments[2]!==undefined?arguments[2]:false;var label=arguments.length>3?arguments[3]:undefined;var key=this.getBufferKey(sizeBytes,usage);if(this.bufferReusePool.has(key)){var buffers=this.bufferReusePool.get(key);if(buffers&&buffers.length>0){return buffers.pop()}}return this.device.createBuffer({size:sizeBytes*Uint8Array.BYTES_PER_ELEMENT,usage:usage,mappedAtCreation:mappedAtCreation,label:label})}},{key:"scheduleUniformBufferForRelease",value:function scheduleUniformBufferForRelease(buffer){this._uniformBuffersPendingRelease.push(buffer)}},{key:"releaseBuffer",value:function releaseBuffer(buffer){var clearBuffer=arguments.length>1&&arguments[1]!==undefined?arguments[1]:true;if(clearBuffer){this.endComputePass();this.commandEncoder.clearBuffer(buffer,0,buffer.size)}var key=this.getBufferKey(buffer.size,buffer.usage);if(!this.bufferReusePool.has(key)){this.bufferReusePool.set(key,[])}this.bufferReusePool.get(key).push(buffer)}},{key:"sync",value:function(){var _sync=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var _this=this;var _iterator,_step,k,buffers,_iterator3,_step3,b,_loop,_i,_Object$entries;return _regeneratorRuntime.wrap(function _callee$(_context3){while(1)switch(_context3.prev=_context3.next){case 0:this.flushCommandEncoder();_context3.next=3;return this.device.queue.onSubmittedWorkDone();case 3:_iterator=_createForOfIteratorHelper$1(this.bufferReusePool.keys());try{for(_iterator.s();!(_step=_iterator.n()).done;){k=_step.value;buffers=this.bufferReusePool.get(k);if(buffers&&buffers.length>0){_iterator3=_createForOfIteratorHelper$1(buffers);try{for(_iterator3.s();!(_step3=_iterator3.n()).done;){b=_step3.value;b===null||b===void 0||b.destroy()}}catch(err){_iterator3.e(err)}finally{_iterator3.f()}}}}catch(err){_iterator.e(err)}finally{_iterator.f()}this.bufferReusePool.clear();_loop=_regeneratorRuntime.mark(function _loop(){var _Object$entries$_i,shaderName,timestampBuffers,_iterator2,_step2,_loop2;return _regeneratorRuntime.wrap(function _loop$(_context2){while(1)switch(_context2.prev=_context2.next){case 0:_Object$entries$_i=_slicedToArray(_Object$entries[_i],2),shaderName=_Object$entries$_i[0],timestampBuffers=_Object$entries$_i[1];if(!_this.shaderTimes[shaderName]){_this.shaderTimes[shaderName]=[]}_iterator2=_createForOfIteratorHelper$1(timestampBuffers);_context2.prev=3;_loop2=_regeneratorRuntime.mark(function _loop2(){var timestampBuffer;return _regeneratorRuntime.wrap(function _loop2$(_context){while(1)switch(_context.prev=_context.next){case 0:timestampBuffer=_step2.value;timestampBuffer.mapAsync(GPUMapMode.READ).then(function(){var times=new BigInt64Array(timestampBuffer.getMappedRange());var timeDif=times[1]-times[0];timestampBuffer.unmap();timestampBuffer.destroy();_this.shaderTimes[shaderName].push(timeDif)});case 2:case"end":return _context.stop()}},_loop2)});_iterator2.s();case 6:if((_step2=_iterator2.n()).done){_context2.next=10;break}return _context2.delegateYield(_loop2(),"t0",8);case 8:_context2.next=6;break;case 10:_context2.next=15;break;case 12:_context2.prev=12;_context2.t1=_context2["catch"](3);_iterator2.e(_context2.t1);case 15:_context2.prev=15;_iterator2.f();return _context2.finish(15);case 18:case"end":return _context2.stop()}},_loop,null,[[3,12,15,18]])});_i=0,_Object$entries=Object.entries(this.timestampBuffers);case 8:if(!(_i<_Object$entries.length)){_context3.next=13;break}return _context3.delegateYield(_loop(),"t0",10);case 10:_i++;_context3.next=8;break;case 13:this.timestampBuffers={};case 14:case"end":return _context3.stop()}},_callee,this)}));function sync(){return _sync.apply(this,arguments)}return sync}()},{key:"reportShaderTimes",value:function reportShaderTimes(){for(var _i2=0,_Object$entries2=Object.entries(this.shaderTimes);_i2<_Object$entries2.length;_i2++){var _Object$entries2$_i=_slicedToArray(_Object$entries2[_i2],2),shaderName=_Object$entries2$_i[0],shaderTimes=_Object$entries2$_i[1];var timeSum=0n;var _iterator4=_createForOfIteratorHelper$1(shaderTimes),_step4;try{for(_iterator4.s();!(_step4=_iterator4.n()).done;){var shaderTime=_step4.value;timeSum=timeSum+shaderTime}}catch(err){_iterator4.e(err)}finally{_iterator4.f()}var totalSeconds=Number(timeSum)*1e-9;var avgSeconds=(totalSeconds/shaderTimes.length).toFixed(7);console.log("".concat(shaderName,", ").concat(totalSeconds.toFixed(5),", ").concat(avgSeconds))}this.shaderTimes={}}},{key:"flushCommandEncoder",value:function flushCommandEncoder(){var _this2=this;this.device.queue.submit([this.commandEncoder.finish()]);this._commandEncoder=null;this._numCommandsEncoded=0;this._stageBuffersPendingMap.forEach(function(buffer){buffer.destroy()});this._stageBuffersPendingMap=[];this._uniformBuffersPendingRelease.forEach(function(buffer){_this2.releaseBuffer(buffer,false)});this._uniformBuffersPendingRelease=[]}},{key:"writeBuffer",value:function writeBuffer(sizeBytes,offset,srcArray,dstBuffer){var stagingBuffer=this.getBuffer(sizeBytes,GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC,true);new Uint8Array(stagingBuffer.getMappedRange()).set(srcArray);stagingBuffer.unmap();this._stageBuffersPendingMap.push(stagingBuffer);this.endComputePass();this.commandEncoder.copyBufferToBuffer(stagingBuffer,0,dstBuffer,offset,sizeBytes);this.numCommandsEncoded++}},{key:"dispatchComputerShader",value:function dispatchComputerShader(bindGroup,pipeline,shaderName,workgroupCountX,workgroupCountY,workgroupCountZ){if(this.isTimerEnabled){var querySet=this.device.createQuerySet({type:"timestamp",count:2});var timestampWrites={querySet:querySet,beginningOfPassWriteIndex:0,endOfPassWriteIndex:1};this.endComputePass();this._passEncoder=this.commandEncoder.beginComputePass({timestampWrites:timestampWrites});this._passEncoder.setBindGroup(0,bindGroup);this._passEncoder.setPipeline(pipeline);this._passEncoder.dispatchWorkgroups(workgroupCountX,workgroupCountY,workgroupCountZ);this.endComputePass();var size=2*BigInt64Array.BYTES_PER_ELEMENT;var resolveBuffer=this.device.createBuffer({size:size,usage:GPUBufferUsage.QUERY_RESOLVE|GPUBufferUsage.COPY_SRC});this.commandEncoder.resolveQuerySet(querySet,0,2,resolveBuffer,0);var resultBuffer=this.device.createBuffer({size:size,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});this.commandEncoder.copyBufferToBuffer(resolveBuffer,0,resultBuffer,0,size);if(!this.timestampBuffers[shaderName]){this.timestampBuffers[shaderName]=[]}this.timestampBuffers[shaderName].push(resultBuffer);this.numCommandsEncoded+=3}else{if(!this._passEncoder){this._passEncoder=this.commandEncoder.beginComputePass()}this._passEncoder.setBindGroup(0,bindGroup);this._passEncoder.setPipeline(pipeline);this._passEncoder.dispatchWorkgroups(workgroupCountX,workgroupCountY,workgroupCountZ);this.numCommandsEncoded++}}}])}();var PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE=256;var PV_PICOLLM_WEBGPU_MAX_GRID_DIM=65535;var gpuDevices=new Map;var gpuBuffers=new Map;var emptyShader="\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main_empty() {}\n";var shaderEntryPoint="main";var PRECOMPUTE_ENCODING_SHADER_NAME="pv_picollm_attention_precompute_encoding_shader";var attentionPrecomputeEncodingShaderSource="\nstruct argsStruct {\n dimension: u32,\n steps: u32,\n theta: f32,\n encoding_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let ds = local_id.x;\n \n for (var t = ts; t < args.steps; t += num_workgroups.x) {\n for (var d = ds; d < (args.dimension / 2u); d += workgroup_size_x) {\n let f = 2u * d;\n let x = f32(t) / pow(args.theta, f32(f) / f32(args.dimension));\n let encoding_idx = args.encoding_offset + (t * args.dimension) + f;\n encoding[encoding_idx] = cos(x);\n encoding[encoding_idx + 1] = sin(x);\n }\n } \n}\n\n".concat(emptyShader);var loadAttentionPrecomputeEncodingShader=function loadAttentionPrecomputeEncodingShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention precompute encoding bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention precompute encoding pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention precompute encoding shader module",code:attentionPrecomputeEncodingShaderSource});var computePipeline=device.createComputePipeline({label:"attention precompute encoding pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ENCODE_ROPE_INTERLEAVED_SHADER_NAME="pv_picollm_attention_encode_rope_interleaved_shader";var attentionEncodeRopeInterleavedShaderSource="\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let x_start = args.x_offset + (t * args.num_heads + h) * args.head_dimension;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < (args.head_dimension / 2u); d += workgroup_size_x) {\n let i = 2u * d;\n let x_idx = x_start + i;\n let encoding_idx = encoding_start + i;\n \n let re = x[x_idx];\n let im = x[x_idx + 1];\n x[x_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[x_idx + 1] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader);var loadAttentionEncodeRopeInterleavedShader=function loadAttentionEncodeRopeInterleavedShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention encode rope interleave bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention encode rope interleave pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention encode rope interleave shader module",code:attentionEncodeRopeInterleavedShaderSource});var computePipeline=device.createComputePipeline({label:"attention encode rope interleave pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ENCODE_SHADER_NAME="pv_picollm_attention_encode_shader";var attentionEncodeShaderSource="\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let half_rope = (args.rope_dimension / 2);\n let xr_start = args.x_offset + ((t * args.num_heads + h) * args.head_dimension);\n let xi_start = xr_start + half_rope;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < half_rope; d += workgroup_size_x) {\n let xr_idx = xr_start + d;\n let xi_idx = xi_start + d;\n let encoding_idx = encoding_start + (2 * d);\n\n let re = x[xr_idx];\n let im = x[xi_idx];\n x[xr_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[xi_idx] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader);var loadAttentionEncodeShader=function loadAttentionEncodeShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention encode bind layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention encode pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention encode shader",code:attentionEncodeShaderSource});var computePipeline=device.createComputePipeline({label:"attention encode pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var DOT_PRODUCT_SHADER_NAME="pv_picollm_attention_dot_product_shader";var attentionDotProductShaderSource="\nstruct argsStruct { \n n: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n norm: f32,\n length1: u32,\n num_keys: u32,\n query_offset: u32,\n keys_offset: u32,\n key_intercepts_offset: u32,\n key_slopes_offset: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar query: array;\n\n@group(0) @binding(2)\nvar keys: array;\n\n@group(0) @binding(3)\nvar key_intercepts: array;\n\n@group(0) @binding(4)\nvar key_slopes: array;\n\n@group(0) @binding(5)\nvar scores: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n \n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n \n let keys_local_a = args.keys_offset + (start_index * args.head_dimension);\n let key_intercepts_local_a = args.key_intercepts_offset + start_index; \n let key_slopes_local_a = args.key_slopes_offset + start_index;\n \n let keys_local_b = args.keys_offset + (head_offset * args.head_dimension);\n let key_intercepts_local_b = args.key_intercepts_offset + head_offset; \n let key_slopes_local_b = args.key_slopes_offset + head_offset;\n \n let scores_local = args.scores_offset + (global_id.x * args.num_keys);\n let query_local = args.query_offset + (((global_id.x * args.n) + args.tq) * args.head_dimension);\n \n for (var i = 0u; i < args.head_dimension; i++) { \n for (var k = 0u; k < args.num_keys; k++) {\n if (k < args.length1) { \n let key_idx = keys_local_a + (k * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_a + k] + (key_slopes[key_slopes_local_a + k] * key_val));\n scores[scores_local + k] += tmp;\n }\n else {\n let j = k - args.length1;\n let key_idx = keys_local_b + (j * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_b + j] + (key_slopes[key_slopes_local_b + j] * key_val));\n scores[scores_local + k] += tmp;\n }\n } \n }\n \n for (var k = 0u; k < args.num_keys; k++) {\n scores[scores_local + k] *= args.norm;\n }\n}\n\n".concat(emptyShader);var loadAttentionDotProductShader=function loadAttentionDotProductShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention dot product bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:5,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention dot product pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention dot product shader module",code:attentionDotProductShaderSource});var computePipeline=device.createComputePipeline({label:"attention dot product pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var SOFTMAX_SHADER_NAME="pv_picollm_attention_softmax_shader";var attentionSoftmaxShaderSource="\nstruct argsStruct { \n num_heads: u32,\n num_keys: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar scores: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let scores_start = args.scores_offset + (global_id.x * args.num_keys);\n \n var max_index: u32 = 0;\n for (var i = 1u; i < args.num_keys; i++) {\n if (scores[scores_start + i] > scores[scores_start + max_index]) {\n max_index = i;\n }\n }\n let max: f32 = scores[scores_start + max_index];\n\n var sum: f32 = 0.0;\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] = exp(scores[scores_start + i] - max);\n sum += scores[scores_start + i];\n }\n\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] /= sum;\n }\n}\n\n".concat(emptyShader);var loadAttentionSoftmaxShader=function loadAttentionSoftmaxShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention softmax bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention softmax pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention softmax shader module",code:attentionSoftmaxShaderSource});var computePipeline=device.createComputePipeline({label:"attention softmax pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var FIR_SHADER_NAME="pv_picollm_attention_fir_shader";var attentionFirShaderSource="\nstruct argsStruct { \n length1: u32,\n length2: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n values_offset: u32,\n value_intercepts_offset: u32,\n value_slopes_offset: u32,\n scores_offset: u32,\n output_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar values: array;\n\n@group(0) @binding(2)\nvar value_intercepts: array;\n\n@group(0) @binding(3)\nvar value_slopes: array;\n\n@group(0) @binding(4)\nvar scores: array;\n\n@group(0) @binding(5)\nvar output: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n\n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n\n let values_local_a = args.values_offset + (start_index * args.head_dimension);\n let value_intercepts_local_a = args.value_intercepts_offset + start_index;\n let value_slopes_local_a = args.value_slopes_offset + start_index;\n let values_local_b = args.values_offset + (head_offset * args.head_dimension);\n let value_intercepts_local_b = args.value_intercepts_offset + head_offset;\n let value_slopes_local_b = args.value_slopes_offset + head_offset;\n let scores_local = args.scores_offset + (global_id.x * (args.length1 + args.length2));\n let output_local = args.output_offset + (((args.tq * args.num_heads) + global_id.x) * args.head_dimension);\n\n for (var i = 0u; i < args.head_dimension; i++) {\n var tmp: f32 = 0.0;\n for (var k = 0u; k < args.length1; k++) {\n let value_idx = values_local_a + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + k] * (value_intercepts[value_intercepts_local_a + k] + (value_slopes[value_slopes_local_a + k] * value_val)); \n }\n for (var k = 0u; k < args.length2; k++) {\n let value_idx = values_local_b + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + args.length1 + k] * (value_intercepts[value_intercepts_local_b + k] + (value_slopes[value_slopes_local_b + k] * value_val)); \n }\n output[output_local + i] = tmp;\n }\n}\n\n".concat(emptyShader);var loadAttentionFirShader=function loadAttentionFirShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention fir bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:5,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention fir pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention fir shader module",code:attentionFirShaderSource});var computePipeline=device.createComputePipeline({label:"attention fir pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var UPDATE_KV_SHADER_NAME="pv_picollm_attention_update_kv_shader";var attentionUpdateKvShaderSource="\nstruct argsStruct {\n n: u32,\n num_kv_heads: u32,\n window_length: u32,\n position: u32,\n head_dimension: u32,\n tf_offset: u32,\n kv_offset: u32,\n kv_intercepts_offset: u32,\n kv_slopes_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar kv: array;\n\n@group(0) @binding(3)\nvar kv_intercepts: array;\n\n@group(0) @binding(4)\nvar kv_slopes: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x >= args.num_kv_heads) {\n return;\n }\n \n for (var i = 0u; i < args.n; i++) {\n let index = (global_id.x * args.window_length) + ((args.position + i) % args.window_length);\n let tf_start = args.tf_offset + (((i * args.num_kv_heads) + global_id.x) * args.head_dimension);\n let kv_start = args.kv_offset + ((index * args.head_dimension) / 4);\n let kv_intercepts_start = args.kv_intercepts_offset + index;\n let kv_slopes_start = args.kv_slopes_offset + index;\n \n var xmax = tf[tf_start]; \n var xmin = tf[tf_start]; \n \n for (var j = 1u; j < args.head_dimension; j++) {\n xmax = max(xmax, tf[tf_start + j]);\n xmin = min(xmin, tf[tf_start + j]);\n }\n\n kv_intercepts[kv_intercepts_start] = xmin;\n kv_slopes[kv_slopes_start] = f32(xmax - xmin) / 255.0;\n\n for (var j = 0u; j < args.head_dimension; j++) {\n let kv_idx = kv_start + (j / 4);\n let kv_val = u32(round((tf[tf_start + j] - xmin) / kv_slopes[kv_slopes_start])); \n kv[kv_idx] = insertBits(kv[kv_idx], extractBits(kv_val, 0u, 8u), (j * 8u) % 32u, 8u); \n }\n }\n}\n\n".concat(emptyShader);var loadAttentionUpdateKvShader=function loadAttentionUpdateKvShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention update kv bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention update kv pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention update kv shader module",code:attentionUpdateKvShaderSource});var computePipeline=device.createComputePipeline({label:"attention update kv pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var TRANSPOSE_QUERY_SHADER_NAME="pv_picollm_attention_transpose_query_shader";var attentionTransposeQueryShaderSource="\nstruct argsStruct {\n n: u32,\n num_heads: u32,\n head_dimension: u32,\n tf_offset: u32,\n hf_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar hf: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n\n if (workgroup_id.x >= args.num_heads || workgroup_id.y >= args.n || local_id.x >= args.head_dimension) {\n return;\n }\n \n let tf_idx = args.tf_offset + (workgroup_id.y * args.num_heads * args.head_dimension) + (workgroup_id.x * args.head_dimension) + local_id.x; \n let hf_idx = args.hf_offset + (workgroup_id.x * args.n * args.head_dimension) + (workgroup_id.y * args.head_dimension) + local_id.x; \n hf[hf_idx] = tf[tf_idx];\n}\n\n".concat(emptyShader);var loadAttentionTransposeQueryShader=function loadAttentionTransposeQueryShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention transpose query bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention transpose query pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention transpose query shader module",code:attentionTransposeQueryShaderSource});var computePipeline=device.createComputePipeline({label:"attention transpose query pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var attentionShaders=_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty({},PRECOMPUTE_ENCODING_SHADER_NAME,loadAttentionPrecomputeEncodingShader),ENCODE_ROPE_INTERLEAVED_SHADER_NAME,loadAttentionEncodeRopeInterleavedShader),ENCODE_SHADER_NAME,loadAttentionEncodeShader),DOT_PRODUCT_SHADER_NAME,loadAttentionDotProductShader),SOFTMAX_SHADER_NAME,loadAttentionSoftmaxShader),FIR_SHADER_NAME,loadAttentionFirShader),UPDATE_KV_SHADER_NAME,loadAttentionUpdateKvShader),TRANSPOSE_QUERY_SHADER_NAME,loadAttentionTransposeQueryShader);var getPicollmAttentionWebGpuFunctions=function getPicollmAttentionWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmAttentionPrecomputeEncodingWebGpu=function pvPicollmAttentionPrecomputeEncodingWebGpu(objAddress,encodingAddress,encodingOffset,dimension,steps,theta,statusAddress){var _gpuBuffers$get;objAddress=unsignedAddress(objAddress);encodingAddress=unsignedAddress(encodingAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[PRECOMPUTE_ENCODING_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var encodingBuffer=(_gpuBuffers$get=gpuBuffers.get(encodingAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!encodingBuffer){console.error("Encoding buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention precompute encoding arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,dimension,true);view.setUint32(4,steps,true);view.setFloat32(8,theta,true);view.setUint32(12,encodingOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention precompute encoding bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:encodingBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,PRECOMPUTE_ENCODING_SHADER_NAME,steps);setStatus(statusAddress,0)};var pvPicollmAttentionEncodeWebGpu=function pvPicollmAttentionEncodeWebGpu(objAddress,isRopeInterleaved,xAddress,xOffset,n,numHeads,headDimension,ropeDimension,position,encodingAddress,encodingOffset,statusAddress){var _gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);encodingAddress=unsignedAddress(encodingAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shaderName=isRopeInterleaved?ENCODE_ROPE_INTERLEAVED_SHADER_NAME:ENCODE_SHADER_NAME;var shader=obj.shaders[shaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var encodingBuffer=(_gpuBuffers$get2=gpuBuffers.get(encodingAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!encodingBuffer){console.error("Encoding buffer has not been allocated");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get3=gpuBuffers.get(xAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(7*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention encode arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,numHeads,headDimension,ropeDimension,position,encodingOffset/4,xOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention encode bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:encodingBuffer}},{binding:2,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,shaderName,Math.min(n,PV_PICOLLM_WEBGPU_MAX_GRID_DIM),numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionDotProductWebGpu=function pvPicollmAttentionDotProductWebGpu(objAddress,queryAddress,queryOffset,keysAddress,keysOffset,keyInterceptsAddress,keyInterceptsOffset,keySlopesAddress,keySlopesOffset,n,tq,headDimension,numHeads,numKvHeads,windowLength,start,norm,length1,length2,numKeys,scoresAddress,scoresOffset,statusAddress){var _gpuBuffers$get4,_gpuBuffers$get5,_gpuBuffers$get6,_gpuBuffers$get7,_gpuBuffers$get8;objAddress=unsignedAddress(objAddress);queryAddress=unsignedAddress(queryAddress);keysAddress=unsignedAddress(keysAddress);keyInterceptsAddress=unsignedAddress(keyInterceptsAddress);keySlopesAddress=unsignedAddress(keySlopesAddress);scoresAddress=unsignedAddress(scoresAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[DOT_PRODUCT_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var queryBuffer=(_gpuBuffers$get4=gpuBuffers.get(queryAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!queryBuffer){console.error("query buffer has not been allocated");setStatus(statusAddress,-1);return}var keysBuffer=(_gpuBuffers$get5=gpuBuffers.get(keysAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!keysBuffer){console.error("keys buffer has not been allocated");setStatus(statusAddress,-1);return}var keyInterceptsBuffer=(_gpuBuffers$get6=gpuBuffers.get(keyInterceptsAddress))===null||_gpuBuffers$get6===void 0?void 0:_gpuBuffers$get6.buffer;if(!keyInterceptsBuffer){console.error("key intercepts buffer has not been allocated");setStatus(statusAddress,-1);return}var keySlopesBuffer=(_gpuBuffers$get7=gpuBuffers.get(keySlopesAddress))===null||_gpuBuffers$get7===void 0?void 0:_gpuBuffers$get7.buffer;if(!keySlopesBuffer){console.error("key slopes buffer has not been allocated");setStatus(statusAddress,-1);return}var scoresBuffer=(_gpuBuffers$get8=gpuBuffers.get(scoresAddress))===null||_gpuBuffers$get8===void 0?void 0:_gpuBuffers$get8.buffer;if(!scoresBuffer){console.error("scores buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(15*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention dot product arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,n,true);view.setUint32(4,tq,true);view.setUint32(8,headDimension,true);view.setUint32(12,numHeads,true);view.setUint32(16,numKvHeads,true);view.setUint32(20,windowLength,true);view.setUint32(24,start,true);view.setFloat32(28,norm,true);view.setUint32(32,length1,true);view.setUint32(36,numKeys,true);view.setUint32(40,scoresOffset,true);view.setUint32(44,queryOffset/4,true);view.setUint32(48,keysOffset,true);view.setUint32(52,keyInterceptsOffset/4,true);view.setUint32(56,keySlopesOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention dot product bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:queryBuffer}},{binding:2,resource:{buffer:keysBuffer}},{binding:3,resource:{buffer:keyInterceptsBuffer}},{binding:4,resource:{buffer:keySlopesBuffer}},{binding:5,resource:{buffer:scoresBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,DOT_PRODUCT_SHADER_NAME,numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionSoftmaxWebGpu=function pvPicollmAttentionSoftmaxWebGpu(objAddress,scoresAddress,scoresOffset,numHeads,numKeys,statusAddress){var _gpuBuffers$get9;objAddress=unsignedAddress(objAddress);scoresAddress=unsignedAddress(scoresAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[SOFTMAX_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var scoresBuffer=(_gpuBuffers$get9=gpuBuffers.get(scoresAddress))===null||_gpuBuffers$get9===void 0?void 0:_gpuBuffers$get9.buffer;if(!scoresBuffer){console.error("Scores buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention softmax arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([numHeads,numKeys,scoresOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention softmax bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:scoresBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,SOFTMAX_SHADER_NAME,numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionFirWebGpu=function pvPicollmAttentionFirWebGpu(objAddress,valuesAddress,valuesOffset,valueInterceptsAddress,valueInterceptsOffset,valueSlopesAddress,valueSlopesOffset,length1,length2,tq,headDimension,numHeads,numKvHeads,windowLength,start,scoresAddress,scoresOffset,outputAddress,outputOffset,statusAddress){var _gpuBuffers$get10,_gpuBuffers$get11,_gpuBuffers$get12,_gpuBuffers$get13,_gpuBuffers$get14;objAddress=unsignedAddress(objAddress);valuesAddress=unsignedAddress(valuesAddress);valueInterceptsAddress=unsignedAddress(valueInterceptsAddress);valueSlopesAddress=unsignedAddress(valueSlopesAddress);scoresAddress=unsignedAddress(scoresAddress);outputAddress=unsignedAddress(outputAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[FIR_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var valuesBuffer=(_gpuBuffers$get10=gpuBuffers.get(valuesAddress))===null||_gpuBuffers$get10===void 0?void 0:_gpuBuffers$get10.buffer;if(!valuesBuffer){console.error("values buffer has not been allocated");setStatus(statusAddress,-1);return}var valueInterceptsBuffer=(_gpuBuffers$get11=gpuBuffers.get(valueInterceptsAddress))===null||_gpuBuffers$get11===void 0?void 0:_gpuBuffers$get11.buffer;if(!valueInterceptsBuffer){console.error("value intercepts buffer has not been allocated");setStatus(statusAddress,-1);return}var valueSlopesBuffer=(_gpuBuffers$get12=gpuBuffers.get(valueSlopesAddress))===null||_gpuBuffers$get12===void 0?void 0:_gpuBuffers$get12.buffer;if(!valueSlopesBuffer){console.error("value slopes buffer has not been allocated");setStatus(statusAddress,-1);return}var scoresBuffer=(_gpuBuffers$get13=gpuBuffers.get(scoresAddress))===null||_gpuBuffers$get13===void 0?void 0:_gpuBuffers$get13.buffer;if(!scoresBuffer){console.error("scores buffer has not been allocated");setStatus(statusAddress,-1);return}var outputBuffer=(_gpuBuffers$get14=gpuBuffers.get(outputAddress))===null||_gpuBuffers$get14===void 0?void 0:_gpuBuffers$get14.buffer;if(!outputBuffer){console.error("output buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(13*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention fir arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([length1,length2,tq,headDimension,numHeads,numKvHeads,windowLength,start,valuesOffset,valueInterceptsOffset/4,valueSlopesOffset/4,scoresOffset/4,outputOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention fir bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:valuesBuffer}},{binding:2,resource:{buffer:valueInterceptsBuffer}},{binding:3,resource:{buffer:valueSlopesBuffer}},{binding:4,resource:{buffer:scoresBuffer}},{binding:5,resource:{buffer:outputBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,FIR_SHADER_NAME,numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionUpdateKvWebGpu=function pvPicollmAttentionUpdateKvWebGpu(objAddress,tfAddress,tfOffset,n,kvAddress,kvOffset,kvInterceptsAddress,kvInterceptsOffset,kvSlopesAddress,kvSlopesOffset,numKvHeads,windowLength,position,headDimension,statusAddress){var _gpuBuffers$get15,_gpuBuffers$get16,_gpuBuffers$get17,_gpuBuffers$get18;objAddress=unsignedAddress(objAddress);tfAddress=unsignedAddress(tfAddress);kvAddress=unsignedAddress(kvAddress);kvInterceptsAddress=unsignedAddress(kvInterceptsAddress);kvSlopesAddress=unsignedAddress(kvSlopesAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[UPDATE_KV_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var tfBuffer=(_gpuBuffers$get15=gpuBuffers.get(tfAddress))===null||_gpuBuffers$get15===void 0?void 0:_gpuBuffers$get15.buffer;if(!tfBuffer){console.error("tf buffer has not been allocated");setStatus(statusAddress,-1);return}var kvBuffer=(_gpuBuffers$get16=gpuBuffers.get(kvAddress))===null||_gpuBuffers$get16===void 0?void 0:_gpuBuffers$get16.buffer;if(!kvBuffer){console.error("KV buffer has not been allocated");setStatus(statusAddress,-1);return}var kvInterceptsBuffer=(_gpuBuffers$get17=gpuBuffers.get(kvInterceptsAddress))===null||_gpuBuffers$get17===void 0?void 0:_gpuBuffers$get17.buffer;if(!kvInterceptsBuffer){console.error("KV intercept buffer has not been allocated");setStatus(statusAddress,-1);return}var kvSlopesBuffer=(_gpuBuffers$get18=gpuBuffers.get(kvSlopesAddress))===null||_gpuBuffers$get18===void 0?void 0:_gpuBuffers$get18.buffer;if(!kvSlopesBuffer){console.error("KV slopes buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(9*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention update kv arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,numKvHeads,windowLength,position,headDimension,tfOffset/4,kvOffset,kvInterceptsOffset/4,kvSlopesOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention update kv bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:tfBuffer}},{binding:2,resource:{buffer:kvBuffer}},{binding:3,resource:{buffer:kvInterceptsBuffer}},{binding:4,resource:{buffer:kvSlopesBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,UPDATE_KV_SHADER_NAME,numKvHeads);setStatus(statusAddress,0)};var pvPicollmAttentionTransposeQueryWebGpu=function pvPicollmAttentionTransposeQueryWebGpu(objAddress,tfAddress,tfOffset,hfAddress,hfOffset,n,numHeads,headDimension,statusAddress){var _gpuBuffers$get19,_gpuBuffers$get20;objAddress=unsignedAddress(objAddress);tfAddress=unsignedAddress(tfAddress);hfAddress=unsignedAddress(hfAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[TRANSPOSE_QUERY_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var tfBuffer=(_gpuBuffers$get19=gpuBuffers.get(tfAddress))===null||_gpuBuffers$get19===void 0?void 0:_gpuBuffers$get19.buffer;if(!tfBuffer){console.error("tf buffer has not been allocated");setStatus(statusAddress,-1);return}var hfBuffer=(_gpuBuffers$get20=gpuBuffers.get(hfAddress))===null||_gpuBuffers$get20===void 0?void 0:_gpuBuffers$get20.buffer;if(!hfBuffer){console.error("hf buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(5*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention transpose query arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,numHeads,headDimension,tfOffset/4,hfOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention transpose query bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:tfBuffer}},{binding:2,resource:{buffer:hfBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,TRANSPOSE_QUERY_SHADER_NAME,numHeads,n);setStatus(statusAddress,0)};return{pv_picollm_attention_precompute_encoding_webgpu_wasm:pvPicollmAttentionPrecomputeEncodingWebGpu,pv_picollm_attention_encode_webgpu_wasm:pvPicollmAttentionEncodeWebGpu,pv_picollm_attention_dot_product_webgpu_wasm:pvPicollmAttentionDotProductWebGpu,pv_picollm_attention_softmax_webgpu_wasm:pvPicollmAttentionSoftmaxWebGpu,pv_picollm_attention_fir_webgpu_wasm:pvPicollmAttentionFirWebGpu,pv_picollm_attention_update_kv_webgpu_wasm:pvPicollmAttentionUpdateKvWebGpu,pv_picollm_attention_transpose_query_webgpu_wasm:pvPicollmAttentionTransposeQueryWebGpu}};var SILU_SHADER_NAME="pv_picollm_feed_forward_silu_shader";var feedForwardSiluShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = x[global_id.x] / (1.0 + exp(-x[global_id.x]));\n}\n\n".concat(emptyShader);var loadFeedForwardSiluShader=function loadFeedForwardSiluShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff silu bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff silu pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff silu shader module",code:feedForwardSiluShaderSource});var computePipeline=device.createComputePipeline({label:"ff silu pipline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var GELU_SHADER_NAME="pv_picollm_feed_forward_gelu_shader";var feedForwardGeluShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\nconst a1: f32 = 0.254829592;\nconst a2: f32 = -0.284496736;\nconst a3: f32 = 1.421413741;\nconst a4: f32 = -1.453152027;\nconst a5: f32 = 1.061405429;\nconst p: f32 = 0.3275911;\n\n// A&S formula 7.1.26\nfn erf(x: f32) -> f32 { \n var sign: f32 = 1.0;\n if (x < 0) {\n sign = -1.0;\n }\n var x_abs: f32 = abs(x);\n \n let t: f32 = 1.0 / fma(p, x_abs, 1.0);\n let y: f32 = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs);\n\n return sign * y;\n}\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1.0 + erf(x[global_id.x] * 0.7071067811865475));\n}\n\n".concat(emptyShader);var loadFeedForwardGeluShader=function loadFeedForwardGeluShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff gelu bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff gelu pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff gelu shader module",code:feedForwardGeluShaderSource});var computePipeline=device.createComputePipeline({label:"ff gelu pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ALMOST_GELU_SHADER_NAME="pv_picollm_feed_forward_almost_gelu_shader";var feedForwardAlmostGeluShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1 + tanh(0.7978845608028654 * (x[global_id.x] + (0.044715f * x[global_id.x] * x[global_id.x] * x[global_id.x]))));\n}\n\n".concat(emptyShader);var loadFeedForwardAlmostGeluShader=function loadFeedForwardAlmostGeluShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff almost gelu bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff almost gelu pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff almost gelu shader module",code:feedForwardAlmostGeluShaderSource});var computePipeline=device.createComputePipeline({label:"ff almost gelu pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var MULTIPLY_BUFFERS_SHADER_NAME="pv_picollm_feed_forward_multiply_buffers_shader";var feedForwardMultiplyBuffersShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n y[global_id.x] *= x[global_id.x];\n}\n\n".concat(emptyShader);var loadFeedForwardMultiplyBuffersShader=function loadFeedForwardMultiplyBuffersShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff multiply buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff multiply buffers pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff multiply buffers shader module",code:feedForwardMultiplyBuffersShaderSource});var computePipeline=device.createComputePipeline({label:"ff multiply buffers pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var feedForwardShaders=_defineProperty(_defineProperty(_defineProperty(_defineProperty({},SILU_SHADER_NAME,loadFeedForwardSiluShader),GELU_SHADER_NAME,loadFeedForwardGeluShader),ALMOST_GELU_SHADER_NAME,loadFeedForwardAlmostGeluShader),MULTIPLY_BUFFERS_SHADER_NAME,loadFeedForwardMultiplyBuffersShader);var getPicollmFeedForwardWebGpuFunctions=function getPicollmFeedForwardWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmFeedForwardSiluWebGpu=function pvPicollmFeedForwardSiluWebGpu(objAddress,n,xAddress,statusAddress){var _gpuBuffers$get;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[SILU_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get=gpuBuffers.get(xAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff silu arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff silu bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,SILU_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmFeedForwardGeluWebGpu=function pvPicollmFeedForwardGeluWebGpu(objAddress,n,xAddress,statusAddress){var _gpuBuffers$get2;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[GELU_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get2=gpuBuffers.get(xAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff gelu arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff gelu bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,GELU_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmFeedForwardAlmostGeluWebGpu=function pvPicollmFeedForwardAlmostGeluWebGpu(objAddress,n,xAddress,statusAddress){var _gpuBuffers$get3;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ALMOST_GELU_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get3=gpuBuffers.get(xAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff almost gelu arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff almost gelu bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ALMOST_GELU_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmFeedForwardMultiplyBuffersWebGpu=function pvPicollmFeedForwardMultiplyBuffersWebGpu(objAddress,n,xAddress,yAddress,statusAddress){var _gpuBuffers$get4,_gpuBuffers$get5;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[MULTIPLY_BUFFERS_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get4=gpuBuffers.get(xAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get5=gpuBuffers.get(yAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff multiply buffers arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff multiply buffers bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,MULTIPLY_BUFFERS_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};return{pv_picollm_feed_forward_silu_webgpu_wasm:pvPicollmFeedForwardSiluWebGpu,pv_picollm_feed_forward_gelu_webgpu_wasm:pvPicollmFeedForwardGeluWebGpu,pv_picollm_feed_forward_almost_gelu_webgpu_wasm:pvPicollmFeedForwardAlmostGeluWebGpu,pv_picollm_feed_forward_multiply_buffers_webgpu_wasm:pvPicollmFeedForwardMultiplyBuffersWebGpu}};var FORWARD_SHADER_NAME$1="pv_picollm_gate_forward_shader";var gateForwardShaderSource="\n\nstruct pv_picollm_gate_ix_t {\n i: u32,\n x: f32,\n}\n\n@group(0) @binding(0)\nvar y: array;\n\n@group(0) @binding(1)\nvar indices: array;\n\n@group(0) @binding(2)\nvar weights: array;\n\noverride n: u32 = 0;\noverride k: u32 = 0;\noverride num_experts: u32 = 0;\n\noverride y_offset: u32 = 0;\noverride indices_offset: u32 = 0;\noverride weights_offset: u32 = 0;\n\nvar ixs: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(global_invocation_id) global_id : vec3\n) {\n if (global_id.x >= n) {\n return;\n }\n \n var y_start: u32 = y_offset + global_id.x * num_experts;\n for (var j = 0u; j < num_experts; j++) {\n ixs[j].i = j;\n ixs[j].x = y[y_start + j];\n }\n\n for (var i = 0u; i < num_experts - 1; i++) {\n for (var j = 0u; j < num_experts - i - 1; j++) {\n if (ixs[j].x < ixs[j + 1].x) {\n let tmp = ixs[j];\n ixs[j] = ixs[j + 1];\n ixs[j + 1] = tmp;\n }\n }\n }\n\n for (var j = 0u; j < k; j++) {\n indices[indices_offset + (global_id.x * k) + j] = ixs[j].i;\n weights[weights_offset + (global_id.x * k) + j] = ixs[j].x;\n }\n\n var max_weight: f32 = weights[weights_offset + (global_id.x * k)];\n for (var j = 1u; j < k; j++) {\n max_weight = max(max_weight, weights[weights_offset + (global_id.x * k) + j]);\n }\n\n var sum_weight: f32 = 0.0;\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] = exp(weights[weights_offset + (global_id.x * k) + j] - max_weight);\n sum_weight += weights[weights_offset + (global_id.x * k) + j];\n }\n\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] /= sum_weight;\n }\n}\n\n".concat(emptyShader);var loadGateForwardShader=function loadGateForwardShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"gate forward bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"gate forward pipeline",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"gate forward shader module",code:gateForwardShaderSource});var computePipeline=device.createComputePipeline({layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{num_experts:1}}});return{computePipeline:computePipeline,pipelineLayout:pipelineLayout,shaderModule:shaderModule}};var gateForwardShader=_defineProperty({},FORWARD_SHADER_NAME$1,loadGateForwardShader);var getPicollmGateWebGpuFunctions=function getPicollmGateWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmGateForwardWebGpu=function pvPicollmGateForwardWebGpu(objAddress,n,k,numExperts,yAddress,yOffset,indicesAddress,indicesOffset,weightsAddress,weightsOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);yAddress=unsignedAddress(yAddress);indicesAddress=unsignedAddress(indicesAddress);weightsAddress=unsignedAddress(weightsAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[FORWARD_SHADER_NAME$1];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var pipeline=obj.device.createComputePipeline({label:"gate forward pipeline",layout:shader.pipelineLayout,compute:{module:shader.shaderModule,entryPoint:shaderEntryPoint,constants:{n:n,k:k,num_experts:numExperts,y_offset:yOffset/4,indices_offset:indicesOffset/4,weights_offset:weightsOffset/4}}});var yBuffer=(_gpuBuffers$get=gpuBuffers.get(yAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var indicesBuffer=(_gpuBuffers$get2=gpuBuffers.get(indicesAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!indicesBuffer){console.error("Indices buffer has not been allocated");setStatus(statusAddress,-1);return}var weightsBuffer=(_gpuBuffers$get3=gpuBuffers.get(weightsAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!weightsBuffer){console.error("Weights buffer has not been allocated");setStatus(statusAddress,-1);return}var bindGroup=obj.device.createBindGroup({label:"gate forward bind group",layout:pipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:yBuffer}},{binding:1,resource:{buffer:indicesBuffer}},{binding:2,resource:{buffer:weightsBuffer}}]});obj.dispatchComputerShader(bindGroup,pipeline,FORWARD_SHADER_NAME$1,n);setStatus(statusAddress,0)};return{pv_picollm_gate_forward_webgpu_wasm:pvPicollmGateForwardWebGpu}};var ADD_TO_BUFFER_SHADER_NAME$1="pv_picollm_moe_transformer_add_to_buffer_shader";var moeTransformerAddToBufferShaderSource="\nstruct argsStruct {\n n: u32, \n x_offset: u32,\n buffer_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n\n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadMoeTransformerAddToBufferShader=function loadMoeTransformerAddToBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"moe transformer add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"moe transformer add to buffer pipeline",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"moe transformer add to buffer shader module",code:moeTransformerAddToBufferShaderSource});var computePipeline=device.createComputePipeline({label:"moe transformer add to buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME="pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_shader";var moeTransformerMultiplyWeightAndToBufferShaderSource="\nstruct argsStruct {\n n: u32, \n weights_index: u32,\n y_index: u32,\n weights_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weights: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + args.y_index + global_id.x] += weights[args.weights_index] + x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadMoeTransformerMultiplyWeightAndAddToBufferShader=function loadMoeTransformerMultiplyWeightAndAddToBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"moe transformer multiply weight and add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"moe transformer multiply weight and add to buffer pipeline",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"moe transformer multiply weight and add to buffer shader module",code:moeTransformerMultiplyWeightAndToBufferShaderSource});var computePipeline=device.createComputePipeline({label:"moe transformer multiply weight and add to buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var ADD_BUFFERS_SHADER_NAME$1="pv_picollm_moe_transformer_add_buffers_shader";var moeTransformerAddBuffersShaderSource="\nstruct argsStruct {\n n: u32, \n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadMoeTransformerAddBuffersShader=function loadMoeTransformerAddBuffersShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"moe transformer add buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"moe transformer add buffers pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"moe transformer add buffers shader module",code:moeTransformerAddBuffersShaderSource});var computePipeline=device.createComputePipeline({label:"moe transformer add buffers pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var moeTransformerForwardShaders=_defineProperty(_defineProperty(_defineProperty({},ADD_TO_BUFFER_SHADER_NAME$1,loadMoeTransformerAddToBufferShader),MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME,loadMoeTransformerMultiplyWeightAndAddToBufferShader),ADD_BUFFERS_SHADER_NAME$1,loadMoeTransformerAddBuffersShader);var getPicollmMoeTransformerWebGpuFunctions=function getPicollmMoeTransformerWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmMoeTransformerAddToBufferWebGpu=function pvPicollmMoeTransformerAddToBufferWebGpu(objAddress,n,xAddress,xOffset,bufferAddress,bufferOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);bufferAddress=unsignedAddress(bufferAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_TO_BUFFER_SHADER_NAME$1];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get=gpuBuffers.get(xAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var bufferBuffer=(_gpuBuffers$get2=gpuBuffers.get(bufferAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!bufferBuffer){console.error("buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"moe transformer add to buffer arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,xOffset,bufferOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"moe transformer add to buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:bufferBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_TO_BUFFER_SHADER_NAME$1,n);setStatus(statusAddress,0)};var pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu=function pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu(objAddress,n,weightsIndex,yIndex,weightsAddress,weightsOffset,xAddress,xOffset,yAddress,yOffset,statusAddress){var _gpuBuffers$get3,_gpuBuffers$get4,_gpuBuffers$get5;objAddress=unsignedAddress(objAddress);weightsAddress=unsignedAddress(weightsAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var weightsBuffer=(_gpuBuffers$get3=gpuBuffers.get(weightsAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!weightsBuffer){console.error("weights has not been allocated");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get4=gpuBuffers.get(xAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!xBuffer){console.error("buffer2 has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get5=gpuBuffers.get(yAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!yBuffer){console.error("y has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(6*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"moe transformer multiply weight and add to buffer arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,weightsIndex,yIndex,weightsOffset,xOffset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"moe transformer multiply weight and add to buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightsBuffer}},{binding:2,resource:{buffer:xBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME,n);setStatus(statusAddress,0)};var pvPicollmMoeTransformerAddBuffersWebGpu=function pvPicollmMoeTransformerAddBuffersWebGpu(objAddress,n,buffer1Address,buffer1Offset,buffer2Address,buffer2Offset,yAddress,yOffset,statusAddress){var _gpuBuffers$get6,_gpuBuffers$get7,_gpuBuffers$get8;objAddress=unsignedAddress(objAddress);buffer1Address=unsignedAddress(buffer1Address);buffer2Address=unsignedAddress(buffer2Address);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_BUFFERS_SHADER_NAME$1];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var buffer1Buffer=(_gpuBuffers$get6=gpuBuffers.get(buffer1Address))===null||_gpuBuffers$get6===void 0?void 0:_gpuBuffers$get6.buffer;if(!buffer1Buffer){console.error("buffer1 has not been allocated");setStatus(statusAddress,-1);return}var buffer2Buffer=(_gpuBuffers$get7=gpuBuffers.get(buffer2Address))===null||_gpuBuffers$get7===void 0?void 0:_gpuBuffers$get7.buffer;if(!buffer2Buffer){console.error("buffer2 has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get8=gpuBuffers.get(yAddress))===null||_gpuBuffers$get8===void 0?void 0:_gpuBuffers$get8.buffer;if(!yBuffer){console.error("y has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"moe transformer add buffers arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,buffer1Offset,buffer2Offset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"moe transformer add buffers bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:buffer1Buffer}},{binding:2,resource:{buffer:buffer2Buffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_BUFFERS_SHADER_NAME$1,n);setStatus(statusAddress,0)};return{pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm:pvPicollmMoeTransformerAddToBufferWebGpu,pv_picollm_moe_transformer_add_buffers_webgpu_wasm:pvPicollmMoeTransformerAddBuffersWebGpu,pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm:pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu}};var sdataReduce="\n for (var s: u32 = workgroup_size_x / 2; s > 0; s >>= 1) {\n if tid < s {\n sdata[tid] += sdata[tid + s];\n }\n workgroupBarrier();\n }\n";var dividePadFunction="\n fn divide_pad(a: u32, b: u32) -> u32 { \n return (a + b - 1) / b;\n }\n";var FORWARD_MULTI_BUFFER_SHADER_NAME$1="pv_picollm_norm_forward_multi_buffer_shader";var FORWARD_SINGLE_BUFFER_SHADER_NAME$1="pv_picollm_norm_forward_single_buffer_shader";var normForwardShaderSource=function normForwardShaderSource(isMulti){return"\nstruct argsStruct {\n n: u32,\n dimension: u32,\n remainder: u32,\n remainder_start: u32, \n eps: f32, \n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n".concat(isMulti?"\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3) \nvar y: array;\n":" \n@group(0) @binding(2)\nvar x: array;\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\nvar sdata: array, workgroup_size_x>;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n let tid = local_id.x;\n let m = workgroup_id.x;\n let block_size = workgroup_size_x;\n \n var power_vec: vec4;\n let x_start: u32 = args.x_offset + (m * args.dimension);\n let skip = tid * 4;\n let shift = (block_size * 4);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = x_start + j + skip; \n\n let x_vec = vec4(\n x[local_index],\n x[local_index + 1],\n x[local_index + 2],\n x[local_index + 3]);\n \n power_vec += x_vec * x_vec; \n } \n \n if (tid == 0 && args.remainder > 0) {\n var remainder_vec = vec4(0.0, 0.0, 0.0, 0.0);\n let x_idx = x_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) { \n remainder_vec[j] = x[x_idx + j];\n } \n power_vec += remainder_vec * remainder_vec;\n }\n \n sdata[tid] = power_vec;\n workgroupBarrier();\n\n ").concat(sdataReduce,"\n \n let power = sdata[0].x + sdata[0].y + sdata[0].z + sdata[0].w;\n let norm: vec4 = vec4(1.0 / sqrt((power / f32(args.dimension)) + args.eps));\n \n let y_start: u32 = args.y_offset + (m * args.dimension);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = j + skip;\n let x_idx = x_start + local_index;\n let x_vec = vec4(\n x[x_idx],\n x[x_idx + 1],\n x[x_idx + 2],\n x[x_idx + 3]);\n \n let weight_vec = vec4(\n weight[local_index],\n weight[local_index + 1],\n weight[local_index + 2],\n weight[local_index + 3]);\n let y_vec = x_vec * norm * weight_vec;\n \n let y_idx = y_start + local_index;\n").concat(isMulti?" \n y[y_idx] = y_vec.x;\n y[y_idx + 1] = y_vec.y;\n y[y_idx + 2] = y_vec.z;\n y[y_idx + 3] = y_vec.w;\n":" \n x[y_idx] = y_vec.x;\n x[y_idx + 1] = y_vec.y;\n x[y_idx + 2] = y_vec.z;\n x[y_idx + 3] = y_vec.w;\n"," \n }\n \n if (tid == 0 && args.remainder > 0) {\n let x_idx = x_start + args.remainder_start;\n let weight_idx = args.remainder_start; \n let y_idx = y_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) {\n").concat(isMulti?" \n y[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n":" \n x[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n"," \n } \n }\n}\n\n").concat(emptyShader)};var loadNormForwardShader=function loadNormForwardShader(device,isMulti){var entries=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}}];if(isMulti){entries.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}});entries.push({binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}})}else{entries.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}})}var bindGroupLayout=device.createBindGroupLayout({label:"norm forward ".concat(isMulti?"multi":"single"," buffer bind group layout"),entries:entries});var pipelineLayout=device.createPipelineLayout({label:"norm forward ".concat(isMulti?"multi":"single"," buffer pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"norm forward ".concat(isMulti?"multi":"single"," buffer shader module"),code:normForwardShaderSource(isMulti)});var computePipeline=device.createComputePipeline({label:"norm forward ".concat(isMulti?"multi":"single"," buffer pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var normForwardShader=_defineProperty(_defineProperty({},FORWARD_SINGLE_BUFFER_SHADER_NAME$1,function(device){return loadNormForwardShader(device,false)}),FORWARD_MULTI_BUFFER_SHADER_NAME$1,function(device){return loadNormForwardShader(device,true)});var getPicollmNormWebGpuFunctions=function getPicollmNormWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmNormForwardWebGpu=function pvPicollmNormForwardWebGpu(objAddress,dimension,eps,weightAddress,n,xOffset,xAddress,yOffset,yAddress,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2;objAddress=unsignedAddress(objAddress);weightAddress=unsignedAddress(weightAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shaderName=xAddress===yAddress?FORWARD_SINGLE_BUFFER_SHADER_NAME$1:FORWARD_MULTI_BUFFER_SHADER_NAME$1;var shader=obj.shaders[shaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var weightBuffer=(_gpuBuffers$get=gpuBuffers.get(weightAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!weightBuffer){console.error("Weight buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get2=gpuBuffers.get(yAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var remainder=dimension%4;var remainder_start=dimension-remainder;var argsBuffer=obj.getBuffer(7*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"norm forward multi buffer arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,n,true);view.setUint32(4,dimension,true);view.setUint32(8,remainder,true);view.setUint32(12,remainder_start,true);view.setFloat32(16,eps,true);view.setUint32(20,xOffset/4,true);view.setUint32(24,yOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var entries=[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightBuffer}}];if(xAddress===yAddress){entries.push({binding:2,resource:{buffer:yBuffer}})}else{var _gpuBuffers$get3;var xBuffer=(_gpuBuffers$get3=gpuBuffers.get(xAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}entries.push({binding:2,resource:{buffer:xBuffer}});entries.push({binding:3,resource:{buffer:yBuffer}})}var bindGroup=obj.device.createBindGroup({label:"norm forward ".concat(xAddress===yAddress?"single":"multi"," buffer bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:entries});obj.dispatchComputerShader(bindGroup,shader.computePipeline,shaderName,n);setStatus(statusAddress,0)};return{pv_picollm_norm_forward_webgpu_wasm:pvPicollmNormForwardWebGpu}};var FORWARD_MULTI_BUFFER_SHADER_NAME="pv_picollm_norm_layer_forward_multi_buffer_shader";var normLayerForwardMultiBufferShaderSource="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar x: array;\n\n@group(0) @binding(4)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += x[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (x[x_start + j] - mean) * (x[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((x[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader);var loadNormLayerForwardMultiBufferShader=function loadNormLayerForwardMultiBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"norm layer forward multi buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"norm layer forward multi buffer pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"norm layer forward multi buffer shader module",code:normLayerForwardMultiBufferShaderSource});var computePipeline=device.createComputePipeline({label:"norm layer forward multi buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var FORWARD_SINGLE_BUFFER_SHADER_NAME="pv_picollm_norm_layer_forward_single_buffer_shader";var normLayerForwardSingleBufferShaderSource="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += y[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (y[x_start + j] - mean) * (y[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((y[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader);var loadNormLayerForwardSingleBufferShader=function loadNormLayerForwardSingleBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"norm layer forward single buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"norm layer forward single buffer pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"norm layer forward single buffer shader module",code:normLayerForwardSingleBufferShaderSource});var computePipeline=device.createComputePipeline({label:"norm layer forward single buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var normLayerForwardShader=_defineProperty(_defineProperty({},FORWARD_SINGLE_BUFFER_SHADER_NAME,loadNormLayerForwardSingleBufferShader),FORWARD_MULTI_BUFFER_SHADER_NAME,loadNormLayerForwardMultiBufferShader);var getPicollmNormLayerWebGpuFunctions=function getPicollmNormLayerWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmNormLayerForwardWebGpu=function pvPicollmNormLayerForwardWebGpu(objAddress,dimension,eps,weightAddress,weightOffset,biasAddress,biasOffset,n,xAddress,xOffset,yAddress,yOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);weightAddress=unsignedAddress(weightAddress);biasAddress=unsignedAddress(biasAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shaderName=xAddress===yAddress?FORWARD_SINGLE_BUFFER_SHADER_NAME:FORWARD_MULTI_BUFFER_SHADER_NAME;var shader=obj.shaders[shaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var weightBuffer=(_gpuBuffers$get=gpuBuffers.get(weightAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!weightBuffer){console.error("weight buffer has not been allocated");setStatus(statusAddress,-1);return}var biasBuffer=(_gpuBuffers$get2=gpuBuffers.get(biasAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!biasBuffer){console.error("bias buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get3=gpuBuffers.get(yAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(7*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"norm layer forward multi buffer arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,n,true);view.setUint32(4,dimension,true);view.setFloat32(8,eps,true);view.setUint32(12,weightOffset/4,true);view.setUint32(16,biasOffset/4,true);view.setUint32(20,xOffset/4,true);view.setUint32(24,yOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup;if(xAddress===yAddress){bindGroup=obj.device.createBindGroup({label:"norm layer forward single buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightBuffer}},{binding:2,resource:{buffer:biasBuffer}},{binding:3,resource:{buffer:yBuffer}}]})}else{var _gpuBuffers$get4;var xBuffer=(_gpuBuffers$get4=gpuBuffers.get(xAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}bindGroup=obj.device.createBindGroup({label:"norm layer forward multi buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightBuffer}},{binding:2,resource:{buffer:biasBuffer}},{binding:3,resource:{buffer:xBuffer}},{binding:4,resource:{buffer:yBuffer}}]})}obj.dispatchComputerShader(bindGroup,shader.computePipeline,shaderName,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};return{pv_picollm_norm_layer_forward_webgpu_wasm:pvPicollmNormLayerForwardWebGpu}};var ADD_TO_BUFFER_SHADER_NAME="pv_picollm_transformer_add_to_buffer_shader";var transformerAddToBufferShaderSource="\nstruct argsStruct {\n n: u32,\n x_offset: u32,\n buffer_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadTransformerAddToBufferShader=function loadTransformerAddToBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"transformer add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"transformer add to buffer pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"transformer add to buffer shader module",code:transformerAddToBufferShaderSource});var computePipeline=device.createComputePipeline({label:"transformer add to buffer compute",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ADD_BUFFERS_SHADER_NAME="pv_picollm_transformer_add_buffers_shader";var transformerAddBuffersShaderSource="\n\nstruct argsStruct {\n n: u32,\n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadTransformerAddBuffersShader=function loadTransformerAddBuffersShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"transformer add buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"transformer add buffers pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"transformer add buffers shader module",code:transformerAddBuffersShaderSource});var computePipeline=device.createComputePipeline({label:"transformer add buffers pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var transformerForwardShaders=_defineProperty(_defineProperty({},ADD_TO_BUFFER_SHADER_NAME,loadTransformerAddToBufferShader),ADD_BUFFERS_SHADER_NAME,loadTransformerAddBuffersShader);var getPicollmTransformerWebGpuFunctions=function getPicollmTransformerWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmTransformerAddToBufferWebGpu=function pvPicollmTransformerAddToBufferWebGpu(objAddress,n,xAddress,xOffset,bufferAddress,bufferOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);bufferAddress=unsignedAddress(bufferAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_TO_BUFFER_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get=gpuBuffers.get(xAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var bufferBuffer=(_gpuBuffers$get2=gpuBuffers.get(bufferAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!bufferBuffer){console.error("buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"transformer add to buffer arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,xOffset,bufferOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"transformer add to buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:bufferBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_TO_BUFFER_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmTransformerAddBuffersWebGpu=function pvPicollmTransformerAddBuffersWebGpu(objAddress,n,buffer1Address,buffer1Offset,buffer2Address,buffer2Offset,yAddress,yOffset,statusAddress){var _gpuBuffers$get3,_gpuBuffers$get4,_gpuBuffers$get5;objAddress=unsignedAddress(objAddress);buffer1Address=unsignedAddress(buffer1Address);buffer2Address=unsignedAddress(buffer2Address);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_BUFFERS_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var buffer1Buffer=(_gpuBuffers$get3=gpuBuffers.get(buffer1Address))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!buffer1Buffer){console.error("buffer1 has not been allocated");setStatus(statusAddress,-1);return}var buffer2Buffer=(_gpuBuffers$get4=gpuBuffers.get(buffer2Address))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!buffer2Buffer){console.error("buffer2 has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get5=gpuBuffers.get(yAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!yBuffer){console.error("y has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"transformer add buffers arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,buffer1Offset,buffer2Offset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"transformer add buffers bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:buffer1Buffer}},{binding:2,resource:{buffer:buffer2Buffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_BUFFERS_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};return{pv_picollm_transformer_add_to_buffer_webgpu_wasm:pvPicollmTransformerAddToBufferWebGpu,pv_picollm_transformer_add_buffers_webgpu_wasm:pvPicollmTransformerAddBuffersWebGpu}};var FORWARD_SHADER_NAME="pv_picollm_weight_float_forward_shader";var weightFloatForwardShaderSource="\n\nstruct argsStruct {\n nr: u32,\n nc: u32,\n w_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar w: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n if (local_id.x >= args.nr) {\n return;\n }\n let x_start: u32 = args.x_offset + (workgroup_id.x * args.nc);\n let y_idx: u32 = local_id.x + args.y_offset + (workgroup_id.x * args.nr);\n \n let w_start: u32 = args.w_offset + (local_id.x * args.nc);\n for (var j = 0u; j < args.nc; j++) {\n y[y_idx] += w[w_start + j] * x[x_start + j]; \n }\n}\n\n".concat(emptyShader);var loadWeightFloatForwardShader=function loadWeightFloatForwardShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight float forward bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight float forward pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight float forward shader module",code:weightFloatForwardShaderSource});var computePipeline=device.createComputePipeline({label:"weight float forward pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var weightFloatForwardShader=_defineProperty({},FORWARD_SHADER_NAME,loadWeightFloatForwardShader);var getPicollmWeightFloatWebGpuFunctions=function getPicollmWeightFloatWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmWeightFloatForwardWebGpu=function pvPicollmWeightFloatForwardWebGpu(objAddress,n,nc,nr,wOffset,wAddress,xOffset,xAddress,yOffset,yAddress,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);wAddress=unsignedAddress(wAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[FORWARD_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var wBuffer=(_gpuBuffers$get=gpuBuffers.get(wAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!wBuffer){console.error("W buffer has not been allocated");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get2=gpuBuffers.get(xAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get3=gpuBuffers.get(yAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(5*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight float forward arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([nr,nc,wOffset,xOffset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight float forward bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:wBuffer}},{binding:2,resource:{buffer:xBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,FORWARD_SHADER_NAME,n);setStatus(statusAddress,0)};return{pv_picollm_weight_float_forward_webgpu_wasm:pvPicollmWeightFloatForwardWebGpu}};var rowsPerBlock=16;var columnsPerBlock=8;var preprocessDim=16;var weightBlockSize=256;var unpackBlock128BitDepth3="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_3(packed_offset: u32) {\n let val_0 = blocks[packed_offset]; \n unpacked[0] = extractBits(val_0, 0u, 3u);\n unpacked[1] = extractBits(val_0, 3u, 3u);\n unpacked[2] = extractBits(val_0, 6u, 3u);\n unpacked[3] = extractBits(val_0, 9u, 3u);\n unpacked[4] = extractBits(val_0, 12u, 3u);\n unpacked[5] = extractBits(val_0, 15u, 3u);\n unpacked[6] = extractBits(val_0, 18u, 3u);\n unpacked[7] = extractBits(val_0, 21u, 3u);\n unpacked[8] = extractBits(val_0, 24u, 3u);\n unpacked[9] = extractBits(val_0, 27u, 3u);\n unpacked[10] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_1, 0u, 1u), 2u, 1u);\n unpacked[11] = extractBits(val_1, 1u, 3u);\n unpacked[12] = extractBits(val_1, 4u, 3u);\n unpacked[13] = extractBits(val_1, 7u, 3u);\n unpacked[14] = extractBits(val_1, 10u, 3u);\n unpacked[15] = extractBits(val_1, 13u, 3u);\n unpacked[16] = extractBits(val_1, 16u, 3u);\n unpacked[17] = extractBits(val_1, 19u, 3u);\n unpacked[18] = extractBits(val_1, 22u, 3u);\n unpacked[19] = extractBits(val_1, 25u, 3u);\n unpacked[20] = extractBits(val_1, 28u, 3u);\n unpacked[21] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_2, 0u, 2u), 1u, 2u); \n unpacked[22] = extractBits(val_2, 2u, 3u);\n unpacked[23] = extractBits(val_2, 5u, 3u);\n unpacked[24] = extractBits(val_2, 8u, 3u);\n unpacked[25] = extractBits(val_2, 11u, 3u);\n unpacked[26] = extractBits(val_2, 14u, 3u);\n unpacked[27] = extractBits(val_2, 17u, 3u);\n unpacked[28] = extractBits(val_2, 20u, 3u);\n unpacked[29] = extractBits(val_2, 23u, 3u);\n unpacked[30] = extractBits(val_2, 26u, 3u);\n unpacked[31] = extractBits(val_2, 29u, 3u);\n \n let val_3 = blocks[packed_offset + 3]; \n unpacked[32] = extractBits(val_3, 0u, 3u);\n unpacked[33] = extractBits(val_3, 3u, 3u);\n unpacked[34] = extractBits(val_3, 6u, 3u);\n unpacked[35] = extractBits(val_3, 9u, 3u);\n unpacked[36] = extractBits(val_3, 12u, 3u);\n unpacked[37] = extractBits(val_3, 15u, 3u);\n unpacked[38] = extractBits(val_3, 18u, 3u);\n unpacked[39] = extractBits(val_3, 21u, 3u);\n unpacked[40] = extractBits(val_3, 24u, 3u);\n unpacked[41] = extractBits(val_3, 27u, 3u);\n unpacked[42] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_4, 0u, 1u), 2u, 1u);\n unpacked[43] = extractBits(val_4, 1u, 3u);\n unpacked[44] = extractBits(val_4, 4u, 3u);\n unpacked[45] = extractBits(val_4, 7u, 3u);\n unpacked[46] = extractBits(val_4, 10u, 3u);\n unpacked[47] = extractBits(val_4, 13u, 3u);\n unpacked[48] = extractBits(val_4, 16u, 3u);\n unpacked[49] = extractBits(val_4, 19u, 3u);\n unpacked[50] = extractBits(val_4, 22u, 3u);\n unpacked[51] = extractBits(val_4, 25u, 3u);\n unpacked[52] = extractBits(val_4, 28u, 3u);\n unpacked[53] = extractBits(val_4, 31u, 1u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_5, 0u, 2u), 1u, 2u); \n unpacked[54] = extractBits(val_5, 2u, 3u);\n unpacked[55] = extractBits(val_5, 5u, 3u);\n unpacked[56] = extractBits(val_5, 8u, 3u);\n unpacked[57] = extractBits(val_5, 11u, 3u);\n unpacked[58] = extractBits(val_5, 14u, 3u);\n unpacked[59] = extractBits(val_5, 17u, 3u);\n unpacked[60] = extractBits(val_5, 20u, 3u);\n unpacked[61] = extractBits(val_5, 23u, 3u);\n unpacked[62] = extractBits(val_5, 26u, 3u);\n unpacked[63] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[64] = extractBits(val_6, 0u, 3u);\n unpacked[65] = extractBits(val_6, 3u, 3u);\n unpacked[66] = extractBits(val_6, 6u, 3u);\n unpacked[67] = extractBits(val_6, 9u, 3u);\n unpacked[68] = extractBits(val_6, 12u, 3u);\n unpacked[69] = extractBits(val_6, 15u, 3u);\n unpacked[70] = extractBits(val_6, 18u, 3u);\n unpacked[71] = extractBits(val_6, 21u, 3u);\n unpacked[72] = extractBits(val_6, 24u, 3u);\n unpacked[73] = extractBits(val_6, 27u, 3u);\n unpacked[74] = extractBits(val_6, 30u, 2u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_7, 0u, 1u), 2u, 1u);\n unpacked[75] = extractBits(val_7, 1u, 3u);\n unpacked[76] = extractBits(val_7, 4u, 3u);\n unpacked[77] = extractBits(val_7, 7u, 3u);\n unpacked[78] = extractBits(val_7, 10u, 3u);\n unpacked[79] = extractBits(val_7, 13u, 3u);\n unpacked[80] = extractBits(val_7, 16u, 3u);\n unpacked[81] = extractBits(val_7, 19u, 3u);\n unpacked[82] = extractBits(val_7, 22u, 3u);\n unpacked[83] = extractBits(val_7, 25u, 3u);\n unpacked[84] = extractBits(val_7, 28u, 3u);\n unpacked[85] = extractBits(val_7, 31u, 1u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_8, 0u, 2u), 1u, 2u); \n unpacked[86] = extractBits(val_8, 2u, 3u);\n unpacked[87] = extractBits(val_8, 5u, 3u);\n unpacked[88] = extractBits(val_8, 8u, 3u);\n unpacked[89] = extractBits(val_8, 11u, 3u);\n unpacked[90] = extractBits(val_8, 14u, 3u);\n unpacked[91] = extractBits(val_8, 17u, 3u);\n unpacked[92] = extractBits(val_8, 20u, 3u);\n unpacked[93] = extractBits(val_8, 23u, 3u);\n unpacked[94] = extractBits(val_8, 26u, 3u);\n unpacked[95] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[96] = extractBits(val_9, 0u, 3u);\n unpacked[97] = extractBits(val_9, 3u, 3u);\n unpacked[98] = extractBits(val_9, 6u, 3u);\n unpacked[99] = extractBits(val_9, 9u, 3u);\n unpacked[100] = extractBits(val_9, 12u, 3u);\n unpacked[101] = extractBits(val_9, 15u, 3u);\n unpacked[102] = extractBits(val_9, 18u, 3u);\n unpacked[103] = extractBits(val_9, 21u, 3u);\n unpacked[104] = extractBits(val_9, 24u, 3u);\n unpacked[105] = extractBits(val_9, 27u, 3u);\n unpacked[106] = extractBits(val_9, 30u, 2u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_10, 0u, 1u), 2u, 1u);\n unpacked[107] = extractBits(val_10, 1u, 3u);\n unpacked[108] = extractBits(val_10, 4u, 3u);\n unpacked[109] = extractBits(val_10, 7u, 3u);\n unpacked[110] = extractBits(val_10, 10u, 3u);\n unpacked[111] = extractBits(val_10, 13u, 3u);\n unpacked[112] = extractBits(val_10, 16u, 3u);\n unpacked[113] = extractBits(val_10, 19u, 3u);\n unpacked[114] = extractBits(val_10, 22u, 3u);\n unpacked[115] = extractBits(val_10, 25u, 3u);\n unpacked[116] = extractBits(val_10, 28u, 3u);\n unpacked[117] = extractBits(val_10, 31u, 1u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_11, 0u, 2u), 1u, 2u); \n unpacked[118] = extractBits(val_11, 2u, 3u);\n unpacked[119] = extractBits(val_11, 5u, 3u);\n unpacked[120] = extractBits(val_11, 8u, 3u);\n unpacked[121] = extractBits(val_11, 11u, 3u);\n unpacked[122] = extractBits(val_11, 14u, 3u);\n unpacked[123] = extractBits(val_11, 17u, 3u);\n unpacked[124] = extractBits(val_11, 20u, 3u);\n unpacked[125] = extractBits(val_11, 23u, 3u);\n unpacked[126] = extractBits(val_11, 26u, 3u);\n unpacked[127] = extractBits(val_11, 29u, 3u);\n}\n";var unpackBlock128BitDepth5="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_5(packed_offset: u32) { \n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 5u);\n unpacked[1] = extractBits(val_0, 5u, 5u);\n unpacked[2] = extractBits(val_0, 10u, 5u);\n unpacked[3] = extractBits(val_0, 15u, 5u);\n unpacked[4] = extractBits(val_0, 20u, 5u);\n unpacked[5] = extractBits(val_0, 25u, 5u);\n unpacked[6] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[6] = insertBits(unpacked[6], extractBits(val_1, 0u, 3u), 2u, 3u); \n unpacked[7] = extractBits(val_1, 3u, 5u);\n unpacked[8] = extractBits(val_1, 8u, 5u);\n unpacked[9] = extractBits(val_1, 13u, 5u);\n unpacked[10] = extractBits(val_1, 18u, 5u);\n unpacked[11] = extractBits(val_1, 23u, 5u);\n unpacked[12] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[12] = insertBits(unpacked[12], extractBits(val_2, 0u, 1u), 4u, 1u);\n unpacked[13] = extractBits(val_2, 1u, 5u);\n unpacked[14] = extractBits(val_2, 6u, 5u);\n unpacked[15] = extractBits(val_2, 11u, 5u);\n unpacked[16] = extractBits(val_2, 16u, 5u);\n unpacked[17] = extractBits(val_2, 21u, 5u);\n unpacked[18] = extractBits(val_2, 26u, 5u);\n unpacked[19] = extractBits(val_2, 31u, 1u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[19] = insertBits(unpacked[19], extractBits(val_3, 0u, 4u), 1u, 4u);\n unpacked[20] = extractBits(val_3, 4u, 5u);\n unpacked[21] = extractBits(val_3, 9u, 5u);\n unpacked[22] = extractBits(val_3, 14u, 5u);\n unpacked[23] = extractBits(val_3, 19u, 5u);\n unpacked[24] = extractBits(val_3, 24u, 5u);\n unpacked[25] = extractBits(val_3, 29u, 3u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[25] = insertBits(unpacked[25], extractBits(val_4, 0u, 2u), 3u, 2u);\n unpacked[26] = extractBits(val_4, 2u, 5u);\n unpacked[27] = extractBits(val_4, 7u, 5u);\n unpacked[28] = extractBits(val_4, 12u, 5u);\n unpacked[29] = extractBits(val_4, 17u, 5u);\n unpacked[30] = extractBits(val_4, 22u, 5u);\n unpacked[31] = extractBits(val_4, 27u, 5u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[32] = extractBits(val_5, 0u, 5u);\n unpacked[33] = extractBits(val_5, 5u, 5u);\n unpacked[34] = extractBits(val_5, 10u, 5u);\n unpacked[35] = extractBits(val_5, 15u, 5u);\n unpacked[36] = extractBits(val_5, 20u, 5u);\n unpacked[37] = extractBits(val_5, 25u, 5u);\n unpacked[38] = extractBits(val_5, 30u, 2u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[38] = insertBits(unpacked[38], extractBits(val_6, 0u, 3u), 2u, 3u);\n unpacked[39] = extractBits(val_6, 3u, 5u);\n unpacked[40] = extractBits(val_6, 8u, 5u);\n unpacked[41] = extractBits(val_6, 13u, 5u);\n unpacked[42] = extractBits(val_6, 18u, 5u);\n unpacked[43] = extractBits(val_6, 23u, 5u);\n unpacked[44] = extractBits(val_6, 28u, 4u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[44] = insertBits(unpacked[44], extractBits(val_7, 0u, 1u), 4u, 1u);\n unpacked[45] = extractBits(val_7, 1u, 5u);\n unpacked[46] = extractBits(val_7, 6u, 5u);\n unpacked[47] = extractBits(val_7, 11u, 5u);\n unpacked[48] = extractBits(val_7, 16u, 5u);\n unpacked[49] = extractBits(val_7, 21u, 5u);\n unpacked[50] = extractBits(val_7, 26u, 5u);\n unpacked[51] = extractBits(val_7, 31u, 1u);\n\n let val_8 = blocks[packed_offset + 8];\n unpacked[51] = insertBits(unpacked[51], extractBits(val_8, 0u, 4u), 1u, 4u);\n unpacked[52] = extractBits(val_8, 4u, 5u);\n unpacked[53] = extractBits(val_8, 9u, 5u);\n unpacked[54] = extractBits(val_8, 14u, 5u);\n unpacked[55] = extractBits(val_8, 19u, 5u);\n unpacked[56] = extractBits(val_8, 24u, 5u);\n unpacked[57] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[57] = insertBits(unpacked[57], extractBits(val_9, 0u, 2u), 3u, 2u);\n unpacked[58] = extractBits(val_9, 2u, 5u);\n unpacked[59] = extractBits(val_9, 7u, 5u);\n unpacked[60] = extractBits(val_9, 12u, 5u);\n unpacked[61] = extractBits(val_9, 17u, 5u);\n unpacked[62] = extractBits(val_9, 22u, 5u);\n unpacked[63] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[64] = extractBits(val_10, 0u, 5u);\n unpacked[65] = extractBits(val_10, 5u, 5u);\n unpacked[66] = extractBits(val_10, 10u, 5u);\n unpacked[67] = extractBits(val_10, 15u, 5u);\n unpacked[68] = extractBits(val_10, 20u, 5u);\n unpacked[69] = extractBits(val_10, 25u, 5u);\n unpacked[70] = extractBits(val_10, 30u, 2u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[70] = insertBits(unpacked[70], extractBits(val_11, 0u, 3u), 2u, 3u);\n unpacked[71] = extractBits(val_11, 3u, 5u);\n unpacked[72] = extractBits(val_11, 8u, 5u);\n unpacked[73] = extractBits(val_11, 13u, 5u);\n unpacked[74] = extractBits(val_11, 18u, 5u);\n unpacked[75] = extractBits(val_11, 23u, 5u);\n unpacked[76] = extractBits(val_11, 28u, 4u);\n\n let val_12 = blocks[packed_offset + 12];\n unpacked[76] = insertBits(unpacked[76], extractBits(val_12, 0u, 1u), 4u, 1u);\n unpacked[77] = extractBits(val_12, 1u, 5u);\n unpacked[78] = extractBits(val_12, 6u, 5u);\n unpacked[79] = extractBits(val_12, 11u, 5u);\n unpacked[80] = extractBits(val_12, 16u, 5u);\n unpacked[81] = extractBits(val_12, 21u, 5u);\n unpacked[82] = extractBits(val_12, 26u, 5u);\n unpacked[83] = extractBits(val_12, 31u, 1u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[83] = insertBits(unpacked[83], extractBits(val_13, 0u, 4u), 1u, 4u);\n unpacked[84] = extractBits(val_13, 4u, 5u);\n unpacked[85] = extractBits(val_13, 9u, 5u);\n unpacked[86] = extractBits(val_13, 14u, 5u);\n unpacked[87] = extractBits(val_13, 19u, 5u);\n unpacked[88] = extractBits(val_13, 24u, 5u);\n unpacked[89] = extractBits(val_13, 29u, 3u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[89] = insertBits(unpacked[89], extractBits(val_14, 0u, 2u), 3u, 2u);\n unpacked[90] = extractBits(val_14, 2u, 5u);\n unpacked[91] = extractBits(val_14, 7u, 5u);\n unpacked[92] = extractBits(val_14, 12u, 5u);\n unpacked[93] = extractBits(val_14, 17u, 5u);\n unpacked[94] = extractBits(val_14, 22u, 5u);\n unpacked[95] = extractBits(val_14, 27u, 5u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[96] = extractBits(val_15, 0u, 5u);\n unpacked[97] = extractBits(val_15, 5u, 5u);\n unpacked[98] = extractBits(val_15, 10u, 5u);\n unpacked[99] = extractBits(val_15, 15u, 5u);\n unpacked[100] = extractBits(val_15, 20u, 5u);\n unpacked[101] = extractBits(val_15, 25u, 5u);\n unpacked[102] = extractBits(val_15, 30u, 2u);\n\n let val_16 = blocks[packed_offset + 16];\n unpacked[102] = insertBits(unpacked[102], extractBits(val_16, 0u, 3u), 2u, 3u);\n unpacked[103] = extractBits(val_16, 3u, 5u);\n unpacked[104] = extractBits(val_16, 8u, 5u);\n unpacked[105] = extractBits(val_16, 13u, 5u);\n unpacked[106] = extractBits(val_16, 18u, 5u);\n unpacked[107] = extractBits(val_16, 23u, 5u);\n unpacked[108] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[108] = insertBits(unpacked[108], extractBits(val_17, 0u, 1u), 4u, 1u);\n unpacked[109] = extractBits(val_17, 1u, 5u);\n unpacked[110] = extractBits(val_17, 6u, 5u);\n unpacked[111] = extractBits(val_17, 11u, 5u);\n unpacked[112] = extractBits(val_17, 16u, 5u);\n unpacked[113] = extractBits(val_17, 21u, 5u);\n unpacked[114] = extractBits(val_17, 26u, 5u);\n unpacked[115] = extractBits(val_17, 31u, 1u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[115] = insertBits(unpacked[115], extractBits(val_18, 0u, 4u), 1u, 4u);\n unpacked[116] = extractBits(val_18, 4u, 5u);\n unpacked[117] = extractBits(val_18, 9u, 5u);\n unpacked[118] = extractBits(val_18, 14u, 5u);\n unpacked[119] = extractBits(val_18, 19u, 5u);\n unpacked[120] = extractBits(val_18, 24u, 5u);\n unpacked[121] = extractBits(val_18, 29u, 3u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[121] = insertBits(unpacked[121], extractBits(val_19, 0u, 2u), 3u, 2u);\n unpacked[122] = extractBits(val_19, 2u, 5u);\n unpacked[123] = extractBits(val_19, 7u, 5u);\n unpacked[124] = extractBits(val_19, 12u, 5u);\n unpacked[125] = extractBits(val_19, 17u, 5u);\n unpacked[126] = extractBits(val_19, 22u, 5u);\n unpacked[127] = extractBits(val_19, 27u, 5u);\n}\n";var unpackBlock128BitDepth6="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_6(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 6u);\n unpacked[1] = extractBits(val_0, 6u, 6u);\n unpacked[2] = extractBits(val_0, 12u, 6u);\n unpacked[3] = extractBits(val_0, 18u, 6u);\n unpacked[4] = extractBits(val_0, 24u, 6u);\n unpacked[5] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[5] = insertBits(unpacked[5], extractBits(val_1, 0u, 4u), 2u, 4u);\n unpacked[6] = extractBits(val_1, 4u, 6u);\n unpacked[7] = extractBits(val_1, 10u, 6u);\n unpacked[8] = extractBits(val_1, 16u, 6u);\n unpacked[9] = extractBits(val_1, 22u, 6u);\n unpacked[10] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_2, 0u, 2u), 4u, 2u);\n unpacked[11] = extractBits(val_2, 2u, 6u);\n unpacked[12] = extractBits(val_2, 8u, 6u);\n unpacked[13] = extractBits(val_2, 14u, 6u);\n unpacked[14] = extractBits(val_2, 20u, 6u);\n unpacked[15] = extractBits(val_2, 26u, 6u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[16] = extractBits(val_3, 0u, 6u);\n unpacked[17] = extractBits(val_3, 6u, 6u);\n unpacked[18] = extractBits(val_3, 12u, 6u);\n unpacked[19] = extractBits(val_3, 18u, 6u);\n unpacked[20] = extractBits(val_3, 24u, 6u);\n unpacked[21] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_4, 0u, 4u), 2u, 4u);\n unpacked[22] = extractBits(val_4, 4u, 6u);\n unpacked[23] = extractBits(val_4, 10u, 6u);\n unpacked[24] = extractBits(val_4, 16u, 6u);\n unpacked[25] = extractBits(val_4, 22u, 6u);\n unpacked[26] = extractBits(val_4, 28u, 4u);\n\n let val_5 = blocks[packed_offset + 5];\n unpacked[26] = insertBits(unpacked[26], extractBits(val_5, 0u, 2u), 4u, 2u);\n unpacked[27] = extractBits(val_5, 2u, 6u);\n unpacked[28] = extractBits(val_5, 8u, 6u);\n unpacked[29] = extractBits(val_5, 14u, 6u);\n unpacked[30] = extractBits(val_5, 20u, 6u);\n unpacked[31] = extractBits(val_5, 26u, 6u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[32] = extractBits(val_6, 0u, 6u);\n unpacked[33] = extractBits(val_6, 6u, 6u);\n unpacked[34] = extractBits(val_6, 12u, 6u);\n unpacked[35] = extractBits(val_6, 18u, 6u);\n unpacked[36] = extractBits(val_6, 24u, 6u);\n unpacked[37] = extractBits(val_6, 30u, 2u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[37] = insertBits(unpacked[37], extractBits(val_7, 0u, 4u), 2u, 4u);\n unpacked[38] = extractBits(val_7, 4u, 6u);\n unpacked[39] = extractBits(val_7, 10u, 6u);\n unpacked[40] = extractBits(val_7, 16u, 6u);\n unpacked[41] = extractBits(val_7, 22u, 6u);\n unpacked[42] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_8, 0u, 2u), 4u, 2u);\n unpacked[43] = extractBits(val_8, 2u, 6u);\n unpacked[44] = extractBits(val_8, 8u, 6u);\n unpacked[45] = extractBits(val_8, 14u, 6u);\n unpacked[46] = extractBits(val_8, 20u, 6u);\n unpacked[47] = extractBits(val_8, 26u, 6u);\n\n let val_9 = blocks[packed_offset + 9];\n unpacked[48] = extractBits(val_9, 0u, 6u);\n unpacked[49] = extractBits(val_9, 6u, 6u);\n unpacked[50] = extractBits(val_9, 12u, 6u);\n unpacked[51] = extractBits(val_9, 18u, 6u);\n unpacked[52] = extractBits(val_9, 24u, 6u);\n unpacked[53] = extractBits(val_9, 30u, 2u);\n\n let val_10 = blocks[packed_offset + 10];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_10, 0u, 4u), 2u, 4u);\n unpacked[54] = extractBits(val_10, 4u, 6u);\n unpacked[55] = extractBits(val_10, 10u, 6u);\n unpacked[56] = extractBits(val_10, 16u, 6u);\n unpacked[57] = extractBits(val_10, 22u, 6u);\n unpacked[58] = extractBits(val_10, 28u, 4u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[58] = insertBits(unpacked[58], extractBits(val_11, 0u, 2u), 4u, 2u);\n unpacked[59] = extractBits(val_11, 2u, 6u);\n unpacked[60] = extractBits(val_11, 8u, 6u);\n unpacked[61] = extractBits(val_11, 14u, 6u);\n unpacked[62] = extractBits(val_11, 20u, 6u);\n unpacked[63] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[64] = extractBits(val_12, 0u, 6u);\n unpacked[65] = extractBits(val_12, 6u, 6u);\n unpacked[66] = extractBits(val_12, 12u, 6u);\n unpacked[67] = extractBits(val_12, 18u, 6u);\n unpacked[68] = extractBits(val_12, 24u, 6u);\n unpacked[69] = extractBits(val_12, 30u, 2u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[69] = insertBits(unpacked[69], extractBits(val_13, 0u, 4u), 2u, 4u);\n unpacked[70] = extractBits(val_13, 4u, 6u);\n unpacked[71] = extractBits(val_13, 10u, 6u);\n unpacked[72] = extractBits(val_13, 16u, 6u);\n unpacked[73] = extractBits(val_13, 22u, 6u);\n unpacked[74] = extractBits(val_13, 28u, 4u);\n\n let val_14 = blocks[packed_offset + 14];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_14, 0u, 2u), 4u, 2u);\n unpacked[75] = extractBits(val_14, 2u, 6u);\n unpacked[76] = extractBits(val_14, 8u, 6u);\n unpacked[77] = extractBits(val_14, 14u, 6u);\n unpacked[78] = extractBits(val_14, 20u, 6u);\n unpacked[79] = extractBits(val_14, 26u, 6u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[80] = extractBits(val_15, 0u, 6u);\n unpacked[81] = extractBits(val_15, 6u, 6u);\n unpacked[82] = extractBits(val_15, 12u, 6u);\n unpacked[83] = extractBits(val_15, 18u, 6u);\n unpacked[84] = extractBits(val_15, 24u, 6u);\n unpacked[85] = extractBits(val_15, 30u, 2u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_16, 0u, 4u), 2u, 4u);\n unpacked[86] = extractBits(val_16, 4u, 6u);\n unpacked[87] = extractBits(val_16, 10u, 6u);\n unpacked[88] = extractBits(val_16, 16u, 6u);\n unpacked[89] = extractBits(val_16, 22u, 6u);\n unpacked[90] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[90] = insertBits(unpacked[90], extractBits(val_17, 0u, 2u), 4u, 2u);\n unpacked[91] = extractBits(val_17, 2u, 6u);\n unpacked[92] = extractBits(val_17, 8u, 6u);\n unpacked[93] = extractBits(val_17, 14u, 6u);\n unpacked[94] = extractBits(val_17, 20u, 6u);\n unpacked[95] = extractBits(val_17, 26u, 6u);\n\n let val_18 = blocks[packed_offset + 18];\n unpacked[96] = extractBits(val_18, 0u, 6u);\n unpacked[97] = extractBits(val_18, 6u, 6u);\n unpacked[98] = extractBits(val_18, 12u, 6u);\n unpacked[99] = extractBits(val_18, 18u, 6u);\n unpacked[100] = extractBits(val_18, 24u, 6u);\n unpacked[101] = extractBits(val_18, 30u, 2u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[101] = insertBits(unpacked[101], extractBits(val_19, 0u, 4u), 2u, 4u);\n unpacked[102] = extractBits(val_19, 4u, 6u);\n unpacked[103] = extractBits(val_19, 10u, 6u);\n unpacked[104] = extractBits(val_19, 16u, 6u);\n unpacked[105] = extractBits(val_19, 22u, 6u);\n unpacked[106] = extractBits(val_19, 28u, 4u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_20, 0u, 2u), 4u, 2u);\n unpacked[107] = extractBits(val_20, 2u, 6u);\n unpacked[108] = extractBits(val_20, 8u, 6u);\n unpacked[109] = extractBits(val_20, 14u, 6u);\n unpacked[110] = extractBits(val_20, 20u, 6u);\n unpacked[111] = extractBits(val_20, 26u, 6u);\n\n let val_21 = blocks[packed_offset + 21];\n unpacked[112] = extractBits(val_21, 0u, 6u);\n unpacked[113] = extractBits(val_21, 6u, 6u);\n unpacked[114] = extractBits(val_21, 12u, 6u);\n unpacked[115] = extractBits(val_21, 18u, 6u);\n unpacked[116] = extractBits(val_21, 24u, 6u);\n unpacked[117] = extractBits(val_21, 30u, 2u);\n\n let val_22 = blocks[packed_offset + 22];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_22, 0u, 4u), 2u, 4u);\n unpacked[118] = extractBits(val_22, 4u, 6u);\n unpacked[119] = extractBits(val_22, 10u, 6u);\n unpacked[120] = extractBits(val_22, 16u, 6u);\n unpacked[121] = extractBits(val_22, 22u, 6u);\n unpacked[122] = extractBits(val_22, 28u, 4u);\n\n let val_23 = blocks[packed_offset + 23];\n unpacked[122] = insertBits(unpacked[122], extractBits(val_23, 0u, 2u), 4u, 2u);\n unpacked[123] = extractBits(val_23, 2u, 6u);\n unpacked[124] = extractBits(val_23, 8u, 6u);\n unpacked[125] = extractBits(val_23, 14u, 6u);\n unpacked[126] = extractBits(val_23, 20u, 6u);\n unpacked[127] = extractBits(val_23, 26u, 6u);\n}\n";var unpackBlock128BitDepth7="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_7(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 7u);\n unpacked[1] = extractBits(val_0, 7u, 7u);\n unpacked[2] = extractBits(val_0, 14u, 7u);\n unpacked[3] = extractBits(val_0, 21u, 7u);\n unpacked[4] = extractBits(val_0, 28u, 4u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[4] = insertBits(unpacked[4], extractBits(val_1, 0u, 3u), 4u, 3u);\n unpacked[5] = extractBits(val_1, 3u, 7u);\n unpacked[6] = extractBits(val_1, 10u, 7u);\n unpacked[7] = extractBits(val_1, 17u, 7u);\n unpacked[8] = extractBits(val_1, 24u, 7u);\n unpacked[9] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[9] = insertBits(unpacked[9], extractBits(val_2, 0u, 6u), 1u, 6u);\n unpacked[10] = extractBits(val_2, 6u, 7u);\n unpacked[11] = extractBits(val_2, 13u, 7u);\n unpacked[12] = extractBits(val_2, 20u, 7u);\n unpacked[13] = extractBits(val_2, 27u, 5u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[13] = insertBits(unpacked[13], extractBits(val_3, 0u, 2u), 5u, 2u);\n unpacked[14] = extractBits(val_3, 2u, 7u);\n unpacked[15] = extractBits(val_3, 9u, 7u);\n unpacked[16] = extractBits(val_3, 16u, 7u);\n unpacked[17] = extractBits(val_3, 23u, 7u);\n unpacked[18] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[18] = insertBits(unpacked[18], extractBits(val_4, 0u, 5u), 2u, 5u);\n unpacked[19] = extractBits(val_4, 5u, 7u);\n unpacked[20] = extractBits(val_4, 12u, 7u);\n unpacked[21] = extractBits(val_4, 19u, 7u);\n unpacked[22] = extractBits(val_4, 26u, 6u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[22] = insertBits(unpacked[22], extractBits(val_5, 0u, 1u), 6u, 1u);\n unpacked[23] = extractBits(val_5, 1u, 7u);\n unpacked[24] = extractBits(val_5, 8u, 7u);\n unpacked[25] = extractBits(val_5, 15u, 7u);\n unpacked[26] = extractBits(val_5, 22u, 7u);\n unpacked[27] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[27] = insertBits(unpacked[27], extractBits(val_6, 0u, 4u), 3u, 4u);\n unpacked[28] = extractBits(val_6, 4u, 7u);\n unpacked[29] = extractBits(val_6, 11u, 7u);\n unpacked[30] = extractBits(val_6, 18u, 7u);\n unpacked[31] = extractBits(val_6, 25u, 7u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[32] = extractBits(val_7, 0u, 7u);\n unpacked[33] = extractBits(val_7, 7u, 7u);\n unpacked[34] = extractBits(val_7, 14u, 7u);\n unpacked[35] = extractBits(val_7, 21u, 7u);\n unpacked[36] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[36] = insertBits(unpacked[36], extractBits(val_8, 0u, 3u), 4u, 3u);\n unpacked[37] = extractBits(val_8, 3u, 7u);\n unpacked[38] = extractBits(val_8, 10u, 7u);\n unpacked[39] = extractBits(val_8, 17u, 7u);\n unpacked[40] = extractBits(val_8, 24u, 7u);\n unpacked[41] = extractBits(val_8, 31u, 1u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[41] = insertBits(unpacked[41], extractBits(val_9, 0u, 6u), 1u, 6u);\n unpacked[42] = extractBits(val_9, 6u, 7u);\n unpacked[43] = extractBits(val_9, 13u, 7u);\n unpacked[44] = extractBits(val_9, 20u, 7u);\n unpacked[45] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[45] = insertBits(unpacked[45], extractBits(val_10, 0u, 2u), 5u, 2u);\n unpacked[46] = extractBits(val_10, 2u, 7u);\n unpacked[47] = extractBits(val_10, 9u, 7u);\n unpacked[48] = extractBits(val_10, 16u, 7u);\n unpacked[49] = extractBits(val_10, 23u, 7u);\n unpacked[50] = extractBits(val_10, 30u, 2u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[50] = insertBits(unpacked[50], extractBits(val_11, 0u, 5u), 2u, 5u);\n unpacked[51] = extractBits(val_11, 5u, 7u);\n unpacked[52] = extractBits(val_11, 12u, 7u);\n unpacked[53] = extractBits(val_11, 19u, 7u);\n unpacked[54] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[54] = insertBits(unpacked[54], extractBits(val_12, 0u, 1u), 6u, 1u);\n unpacked[55] = extractBits(val_12, 1u, 7u);\n unpacked[56] = extractBits(val_12, 8u, 7u);\n unpacked[57] = extractBits(val_12, 15u, 7u);\n unpacked[58] = extractBits(val_12, 22u, 7u);\n unpacked[59] = extractBits(val_12, 29u, 3u);\n \n let val_13 = blocks[packed_offset + 13];\n unpacked[59] = insertBits(unpacked[59], extractBits(val_13, 0u, 4u), 3u, 4u);\n unpacked[60] = extractBits(val_13, 4u, 7u);\n unpacked[61] = extractBits(val_13, 11u, 7u);\n unpacked[62] = extractBits(val_13, 18u, 7u);\n unpacked[63] = extractBits(val_13, 25u, 7u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[64] = extractBits(val_14, 0u, 7u);\n unpacked[65] = extractBits(val_14, 7u, 7u);\n unpacked[66] = extractBits(val_14, 14u, 7u);\n unpacked[67] = extractBits(val_14, 21u, 7u);\n unpacked[68] = extractBits(val_14, 28u, 4u);\n \n let val_15 = blocks[packed_offset + 15];\n unpacked[68] = insertBits(unpacked[68], extractBits(val_15, 0u, 3u), 4u, 3u);\n unpacked[69] = extractBits(val_15, 3u, 7u);\n unpacked[70] = extractBits(val_15, 10u, 7u);\n unpacked[71] = extractBits(val_15, 17u, 7u);\n unpacked[72] = extractBits(val_15, 24u, 7u);\n unpacked[73] = extractBits(val_15, 31u, 1u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[73] = insertBits(unpacked[73], extractBits(val_16, 0u, 6u), 1u, 6u);\n unpacked[74] = extractBits(val_16, 6u, 7u);\n unpacked[75] = extractBits(val_16, 13u, 7u);\n unpacked[76] = extractBits(val_16, 20u, 7u);\n unpacked[77] = extractBits(val_16, 27u, 5u);\n \n let val_17 = blocks[packed_offset + 17];\n unpacked[77] = insertBits(unpacked[77], extractBits(val_17, 0u, 2u), 5u, 2u);\n unpacked[78] = extractBits(val_17, 2u, 7u);\n unpacked[79] = extractBits(val_17, 9u, 7u);\n unpacked[80] = extractBits(val_17, 16u, 7u);\n unpacked[81] = extractBits(val_17, 23u, 7u);\n unpacked[82] = extractBits(val_17, 30u, 2u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[82] = insertBits(unpacked[82], extractBits(val_18, 0u, 5u), 2u, 5u);\n unpacked[83] = extractBits(val_18, 5u, 7u);\n unpacked[84] = extractBits(val_18, 12u, 7u);\n unpacked[85] = extractBits(val_18, 19u, 7u);\n unpacked[86] = extractBits(val_18, 26u, 6u);\n \n let val_19 = blocks[packed_offset + 19];\n unpacked[86] = insertBits(unpacked[86], extractBits(val_19, 0u, 1u), 6u, 1u);\n unpacked[87] = extractBits(val_19, 1u, 7u);\n unpacked[88] = extractBits(val_19, 8u, 7u);\n unpacked[89] = extractBits(val_19, 15u, 7u);\n unpacked[90] = extractBits(val_19, 22u, 7u);\n unpacked[91] = extractBits(val_19, 29u, 3u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[91] = insertBits(unpacked[91], extractBits(val_20, 0u, 4u), 3u, 4u);\n unpacked[92] = extractBits(val_20, 4u, 7u);\n unpacked[93] = extractBits(val_20, 11u, 7u);\n unpacked[94] = extractBits(val_20, 18u, 7u);\n unpacked[95] = extractBits(val_20, 25u, 7u);\n \n let val_21 = blocks[packed_offset + 21];\n unpacked[96] = extractBits(val_21, 0u, 7u);\n unpacked[97] = extractBits(val_21, 7u, 7u);\n unpacked[98] = extractBits(val_21, 14u, 7u);\n unpacked[99] = extractBits(val_21, 21u, 7u);\n unpacked[100] = extractBits(val_21, 28u, 4u);\n \n let val_22 = blocks[packed_offset + 22];\n unpacked[100] = insertBits(unpacked[100], extractBits(val_22, 0u, 3u), 4u, 3u);\n unpacked[101] = extractBits(val_22, 3u, 7u);\n unpacked[102] = extractBits(val_22, 10u, 7u);\n unpacked[103] = extractBits(val_22, 17u, 7u);\n unpacked[104] = extractBits(val_22, 24u, 7u);\n unpacked[105] = extractBits(val_22, 31u, 1u);\n \n let val_23 = blocks[packed_offset + 23];\n unpacked[105] = insertBits(unpacked[105], extractBits(val_23, 0u, 6u), 1u, 6u);\n unpacked[106] = extractBits(val_23, 6u, 7u);\n unpacked[107] = extractBits(val_23, 13u, 7u);\n unpacked[108] = extractBits(val_23, 20u, 7u);\n unpacked[109] = extractBits(val_23, 27u, 5u);\n \n let val_24 = blocks[packed_offset + 24];\n unpacked[109] = insertBits(unpacked[109], extractBits(val_24, 0u, 2u), 5u, 2u);\n unpacked[110] = extractBits(val_24, 2u, 7u);\n unpacked[111] = extractBits(val_24, 9u, 7u);\n unpacked[112] = extractBits(val_24, 16u, 7u);\n unpacked[113] = extractBits(val_24, 23u, 7u);\n unpacked[114] = extractBits(val_24, 30u, 2u);\n \n let val_25 = blocks[packed_offset + 25];\n unpacked[114] = insertBits(unpacked[114], extractBits(val_25, 0u, 5u), 2u, 5u);\n unpacked[115] = extractBits(val_25, 5u, 7u);\n unpacked[116] = extractBits(val_25, 12u, 7u);\n unpacked[117] = extractBits(val_25, 19u, 7u);\n unpacked[118] = extractBits(val_25, 26u, 6u);\n \n let val_26 = blocks[packed_offset + 26];\n unpacked[118] = insertBits(unpacked[118], extractBits(val_26, 0u, 1u), 6u, 1u);\n unpacked[119] = extractBits(val_26, 1u, 7u);\n unpacked[120] = extractBits(val_26, 8u, 7u);\n unpacked[121] = extractBits(val_26, 15u, 7u);\n unpacked[122] = extractBits(val_26, 22u, 7u);\n unpacked[123] = extractBits(val_26, 29u, 3u);\n \n let val_27 = blocks[packed_offset + 27];\n unpacked[123] = insertBits(unpacked[123], extractBits(val_27, 0u, 4u), 3u, 4u);\n unpacked[124] = extractBits(val_27, 4u, 7u);\n unpacked[125] = extractBits(val_27, 11u, 7u);\n unpacked[126] = extractBits(val_27, 18u, 7u);\n unpacked[127] = extractBits(val_27, 25u, 7u); \n}\n";var fromFP510Function="\n\nconst exponents: array = array(\n 2.9103830456733704e-11, \n 5.820766091346741e-11, \n 1.1641532182693481e-10, \n 2.3283064365386963e-10,\n 4.656612873077393e-10, \n 9.313225746154785e-10, \n 1.862645149230957e-09, \n 3.725290298461914e-09,\n 7.450580596923828e-09, \n 1.4901161193847656e-08, \n 2.9802322387695312e-08, \n 5.960464477539063e-08,\n 1.1920928955078125e-07, \n 2.384185791015625e-07, \n 4.76837158203125e-07, \n 9.5367431640625e-07,\n 1.9073486328125e-06, \n 3.814697265625e-06, \n 7.62939453125e-06, \n 1.52587890625e-05, \n 3.0517578125e-05,\n 6.103515625e-05, \n 0.0001220703125, \n 0.000244140625, \n 0.00048828125, \n 0.0009765625, \n 0.001953125, \n 0.00390625,\n 0.0078125, \n 0.015625, \n 0.03125, \n 0.0625);\n\nfn from_fp510(x: u32) -> f32 {\n let exponent = f32(exponents[extractBits(x, 10u, 5u)]); \n let fractional = f32(extractBits(x, 0u, 10u)); \n let abs = exponent * fractional;\n return abs * (1.0 - (2.0 * f32(extractBits(x, 15u, 1u))));\n}\n";var preprocessBlocks3BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth3,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 12u); \n unpack_block_128_bit_depth_3(blocks_start); \n \n let b01: u32 = blocks_start;\n let b2: u32 = blocks_start + 8u;\n \n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n\n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), (r * 16u) % 32u, 16u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), (r * 8u) % 32u, 8u); \n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n \n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), ((r * 16u) % 32u) + (2u * c), 2u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), ((r * 8u) % 32u) + c, 1u); \n }\n }\n}\n\n").concat(emptyShader);var preprocessBlocks5BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth5,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n\n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 20u);\n unpack_block_128_bit_depth_5(blocks_start); \n \n let b03: u32 = blocks_start;\n let b4: u32 = blocks_start + 16u;\n\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), (r * 8u) % 32u, 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n } \n}\n\n").concat(emptyShader);var preprocessBlocks6BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth6,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 24u);\n unpack_block_128_bit_depth_6(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u; \n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n }\n\n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n }\n }\n}\n\n").concat(emptyShader);var preprocessBlocks7BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth7,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 28u);\n unpack_block_128_bit_depth_7(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u;\n let b6: u32 = blocks_start + 24u; \n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u), 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n }\n}\n\n").concat(emptyShader);var preprocessShaderSources={3:preprocessBlocks3BitShaderSource,5:preprocessBlocks5BitShaderSource,6:preprocessBlocks6BitShaderSource,7:preprocessBlocks7BitShaderSource};var preprocessShaderNames={3:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_3bit_shader",5:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_5bit_shader",6:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_6bit_shader",7:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_7bit_shader"};var BM=8;var BN=32;var TM=2;var TN=16;var TC=rowsPerBlock*BM*BN/(TM*TN);var constantSnippet="\nconst BM = ".concat(BM,"u;\nconst BN = ").concat(BN,"u;\n\nconst TM = ").concat(TM,"u;\nconst TN = ").concat(TN,"u;\n\nconst TC = ").concat(TC,"u;\n\nconst ROW_PER_BLOCK = ").concat(rowsPerBlock,"u;\nconst COL_PER_BLOCK = ").concat(columnsPerBlock,"u;\n\nconst VEC_COL_PER_BLOCK = COL_PER_BLOCK / 4u;\n\nconst block_size: u32 = (COL_PER_BLOCK * ROW_PER_BLOCK * bit_depth) / 32u;\n\n");var forwardMultipleInputArgsSnippet="\nstruct argsStruct {\n n: u32,\n m: u32,\n total_nbc: u32,\n k: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array;\n";var forwardMultipleSharedPrivateMemSnippet="\nvar shared_x: array, BN * VEC_COL_PER_BLOCK>;\nvar shared_ab: array;\nvar shared_w: array, BM * ROW_PER_BLOCK * VEC_COL_PER_BLOCK>;\n\nvar local_x: array, TN * VEC_COL_PER_BLOCK>;\nvar local_x_sums: array;\nvar local_results: array;\n";var forwardMultipleLocalVarSnippet="\n let tid = local_id.x;\n let bm_idx = workgroup_id.x;\n let bn_idx = workgroup_id.y;\n\n let local_bm_idx = bm_idx * BM;\n let local_bn_idx = bn_idx * BN;\n \n let n_idx = tid % (BN / TN);\n let k_idx = tid / (BN / TN) / (BM * ROW_PER_BLOCK / TM);\n let m_idx = tid / (BN / TN) % (BM * ROW_PER_BLOCK / TM);\n";var forwardMultipleLoadW1Bit="\n let b0 = blocks[src + (row / 4u)];\n\n let b0_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b0_offset = b0_offset_base + (c * 4u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b0, b0_offset, 1u)), \n f32(extractBits(b0, b0_offset + 1, 1u)),\n f32(extractBits(b0, b0_offset + 2, 1u)),\n f32(extractBits(b0, b0_offset + 3, 1u)));\n }\n";var forwardMultipleLoadW2Bit="\n let b01 = blocks[src + (row / 2u)];\n \n let b01_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b01_offset = b01_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(extractBits(b01, b01_offset, 2u)), \n f32(extractBits(b01, b01_offset + 2, 2u)),\n f32(extractBits(b01, b01_offset + 4, 2u)),\n f32(extractBits(b01, b01_offset + 6, 2u)));\n } \n";var forwardMultipleLoadW3Bit="\n let b01 = blocks[src + (row / 2u)];\n let b2 = blocks[src + 8u + (row / 4u)]; \n\n let b01_offset_base = (row * 16u) % 32u;\n let b2_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b01_offset = b01_offset_base + (c * 8u);\n let b2_offset = b2_offset_base + (c * 4u);\n\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b01, b01_offset, 2u), extractBits(b2, b2_offset, 1u), 2u, 1u)), \n f32(insertBits(extractBits(b01, b01_offset + 2, 2u), extractBits(b2, b2_offset + 1, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 4, 2u), extractBits(b2, b2_offset + 2, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 6, 2u), extractBits(b2, b2_offset + 3, 1u), 2u, 1u))); \n }\n";var forwardMultipleLoadW4Bit="\n let b03 = blocks[src + row];\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b03, b03_offset, 4u)), \n f32(extractBits(b03, b03_offset + 4, 4u)),\n f32(extractBits(b03, b03_offset + 8, 4u)),\n f32(extractBits(b03, b03_offset + 12, 4u)));\n }\n";var forwardMultipleLoadW5Bit="\n let b03 = blocks[src + row];\n let b4 = blocks[src + 16u + (row / 4u)];\n \n let b4_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b4_offset = b4_offset_base + (c * 4u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b4, b4_offset, 1u), 4u, 1u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b4, b4_offset + 1, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b4, b4_offset + 2, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b4, b4_offset + 3, 1u), 4u, 1u)));\n }\n";var forwardMultipleLoadW6Bit="\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u)));\n }\n";var forwardMultipleLoadW7Bit="\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n let b6 = blocks[src + 24u + (row / 4u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n let b6_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n let b6_offset = b6_offset_base + (c * 4u);\n \n shared_w[dst + c] = vec4(\n f32(insertBits(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u), extractBits(b6, b6_offset, 1u), 6u, 1u)), \n f32(insertBits(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u), extractBits(b6, b6_offset + 1, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u), extractBits(b6, b6_offset + 2, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u), extractBits(b6, b6_offset + 3, 1u), 6u, 1u)));\n }\n";var forwardMultipleLoadW8Bit="\n let b07_offset = src + (row * 2);\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b07 = blocks[b07_offset + c];\n shared_w[dst + c] = vec4(\n f32(extractBits(b07, 0u, 8u)), \n f32(extractBits(b07, 8u, 8u)),\n f32(extractBits(b07, 16u, 8u)),\n f32(extractBits(b07, 24u, 8u)));\n }\n";var forwardLoadWBitDepthSnippets={1:forwardMultipleLoadW1Bit,2:forwardMultipleLoadW2Bit,3:forwardMultipleLoadW3Bit,4:forwardMultipleLoadW4Bit,5:forwardMultipleLoadW5Bit,6:forwardMultipleLoadW6Bit,7:forwardMultipleLoadW7Bit,8:forwardMultipleLoadW8Bit};var forwardMultipleLoadXSnippet="\n let total_work_x = VEC_COL_PER_BLOCK * BN;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_x, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_x) { \n let n_load_idx = local_bn_idx + idx / VEC_COL_PER_BLOCK;\n let inner_idx = idx % VEC_COL_PER_BLOCK;\n \n if (bk_idx < args.k && n_load_idx < args.n) { \n let x_idx = (args.x_offset / 4u) + ((bk_idx * args.n + n_load_idx) * VEC_COL_PER_BLOCK + inner_idx); \n shared_x[idx] = x[x_idx];\n } else {\n shared_x[idx] = vec4(0.0);\n }\n }\n }\n";var forwardMultipleLoadABSnippet="\n let total_work_ab = BM * 2;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_ab, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_ab) {\n let m_load_idx = local_bm_idx + idx / 2; \n let inner_idx = (idx % 2) * 16u;\n \n if (m_load_idx < args.m && bk_idx < args.k) {\n let ab_bits = extractBits(metas[args.metas_offset + (m_load_idx * args.k + bk_idx)], inner_idx, 16u);\n shared_ab[idx] = from_fp510(ab_bits); \n } else {\n shared_ab[idx] = 0.0;\n }\n }\n }\n";var forwardMultipleLoadWSnippet=function forwardMultipleLoadWSnippet(bitDepth){return"\n let total_work_w = BM * ROW_PER_BLOCK;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_w, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_w) {\n let m_load_idx = local_bm_idx + idx / ROW_PER_BLOCK;\n let row = idx % ROW_PER_BLOCK;\n let dst = idx * VEC_COL_PER_BLOCK;\n\n if (m_load_idx < args.m) {\n let src = args.blocks_offset + (m_load_idx * args.k + bk_idx) * block_size;\n ".concat(forwardLoadWBitDepthSnippets[bitDepth],"\n } else { \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n shared_w[dst + c] = vec4(0.0);\n }\n }\n }\n }\n")};var forwardMultipleCopyXSnippet="\nfor (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n var x_sum_vec = vec4(0.0); \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n let shared_x_idx = (n_idx * TN + tn_idx) * VEC_COL_PER_BLOCK + (k_idx * VEC_COL_PER_BLOCK);\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n local_x[local_x_idx + c] = shared_x[shared_x_idx + c];\n x_sum_vec += local_x[local_x_idx + c];\n }\n local_x_sums[tn_idx] = x_sum_vec.x + x_sum_vec.y + x_sum_vec.z + x_sum_vec.w; \n}\n";var forwardMultipleComputeResultsSnippet="\n for (var tm_idx = 0u; tm_idx < TM; tm_idx++) { \n let shared_ab_idx = ((m_idx * TM + tm_idx) / ROW_PER_BLOCK + k_idx) * 2;\n let alpha = shared_ab[shared_ab_idx];\n let beta = shared_ab[shared_ab_idx + 1]; \n let shared_w_idx = ((m_idx * TM + tm_idx) + k_idx) * VEC_COL_PER_BLOCK;\n \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n \n var swx_vec = vec4(0.0); \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n swx_vec += shared_w[shared_w_idx + c] * local_x[local_x_idx + c];\n }\n let swx = swx_vec.x + swx_vec.y + swx_vec.z + swx_vec.w;\n \n let kappa = alpha * local_x_sums[tn_idx]; \n let results_idx = tm_idx * TN + tn_idx;\n local_results[results_idx] += kappa + (beta * swx);\n }\n }\n";var forwardMultipleWriteResultsSnippet="\nfor (var tm_idx = 0u; tm_idx < TM; tm_idx++) {\n let row = local_bm_idx * ROW_PER_BLOCK + (m_idx * TM + tm_idx); \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let col = local_bn_idx + (n_idx * TN + tn_idx);\n if (row < args.m * ROW_PER_BLOCK && col < args.n) {\n let y_idx = args.y_offset + ((row / ROW_PER_BLOCK) * args.n + col) * ROW_PER_BLOCK + (row % ROW_PER_BLOCK);\n let results_idx = tm_idx * TN + tn_idx;\n \n y[y_idx] += local_results[results_idx];\n }\n }\n}\n";var forwardMultipleShaderSources=function forwardMultipleShaderSources(bitDepth){return"\n\n".concat(forwardMultipleInputArgsSnippet,"\n\n").concat(constantSnippet,"\n\n").concat(forwardMultipleSharedPrivateMemSnippet,"\n\n").concat(fromFP510Function,"\n\n").concat(dividePadFunction,"\n\nconst bit_depth: u32 = ").concat(bitDepth,"u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n \n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n ").concat(forwardMultipleLocalVarSnippet,"\n \n for (var bk_idx = 0u; bk_idx < args.k; bk_idx++) { \n ").concat(forwardMultipleLoadXSnippet,"\n ").concat(forwardMultipleLoadABSnippet," \n ").concat(forwardMultipleLoadWSnippet(bitDepth)," \n workgroupBarrier();\n \n ").concat(forwardMultipleCopyXSnippet,"\n ").concat(forwardMultipleComputeResultsSnippet,"\n workgroupBarrier();\n }\n \n ").concat(forwardMultipleWriteResultsSnippet,"\n}\n\n").concat(emptyShader,"\n")};var forwardShuffleXShaderSource="\nstruct argsStruct {\n n: u32,\n shape1: u32,\n x_offset: u32,\n indices_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar indices: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape1) {\n return;\n } \n\n let b = global_id.x;\n let i = global_id.y;\n \n let c = i / 8u;\n let j = i % 8u;\n y[((c * args.n) + b) * 8 + j] = x[args.x_offset + (b * args.shape1) + indices[args.indices_offset + i]];\n}\n\n".concat(emptyShader,"\n");var forwardSingleReduceYShaderSource="\nstruct argsStruct {\n nvr: u32,\n nbc: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar y: array>;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x > args.nvr) {\n return;\n }\n\n let x_start = global_id.x * args.nbc;\n var sum: vec4 = vec4(0.0, 0.0, 0.0, 0.0);\n for (var i = 0u; i < args.nbc; i++) {\n sum += x[x_start + i]; \n }\n y[global_id.x] += sum;\n}\n\n".concat(emptyShader);var forwardShuffleYShaderSource="\nstruct argsStruct {\n n: u32,\n shape0: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape0) {\n return;\n } \n \n let b = global_id.x;\n let i = global_id.y;\n \n let r = i / 16u;\n let j = i % 16u;\n y[(b * args.shape0) + (r * 16) + j] = x[(((r * args.n) + b) * 16) + j];\n}\n\n".concat(emptyShader);var addBiasShaderSource="\nstruct argsStruct {\n dimension: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar bias: array;\n\n@group(0) @binding(2)\nvar y: array;\n\n".concat(fromFP510Function,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n y[(global_id.x * args.dimension) + global_id.y] += bias[global_id.y];\n}\n\n").concat(emptyShader);var forwardSingleBitDepth1ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 4u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b0_start = row_blocks_start + br_offset + (bc * block_size);\n var b0_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b0 = blocks[b0_start];\n \n let w0_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w0_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w0_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w0_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 8u;\n \n let w1_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w1_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w1_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w1_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 16u;\n \n let w2_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w2_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w2_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w2_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 24u;\n \n let w3_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w3_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w3_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w3_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth2ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 8u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b01_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b01 = blocks[b01_start];\n \n let w0_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w0_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w0_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w0_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w1_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w1_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w1_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w1_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u; \n b01 = blocks[b01_start + 1u];\n \n let w2_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w2_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w2_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w2_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w3_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w3_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w3_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w3_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth3ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 12u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id : vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n\n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b2_start = row_blocks_start + br_offset + (bc * block_size) + 8u;\n var b01_offset = 0u;\n var b2_offset = 0u;\n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) { \n \n var b01 = blocks[b01_start];\n var b2 = blocks[b2_start];\n \n var b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n var b2_bit = extractBits(b2, b2_offset + j, 1u);\n let w0_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 8u;\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w1_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 16u; \n b01 = blocks[b01_start + 1u];\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w2_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 24u; \n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w3_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 0u; \n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n \n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth4ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 16u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start]; \n \n let w0_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w0_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w0_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w0_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 1];\n \n let w1_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w1_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w1_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w1_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 2];\n \n let w2_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w2_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w2_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w2_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 3];\n \n let w3_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w3_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w3_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w3_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth5ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 20u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b4_start = row_blocks_start + br_offset + (bc * block_size) + 16u;\n \n var b4_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b4 = blocks[b4_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w0_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 1];\n b4_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w1_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 2];\n b4_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w2_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 3];\n b4_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w3_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b4_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth6ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 24u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n \n var b45_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w0_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w0_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w0_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w0_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w1_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w1_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w1_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w1_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u)); \n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w2_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w2_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w2_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w2_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w3_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w3_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w3_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w3_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b45_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth7ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 28u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n var b6_start = row_blocks_start + br_offset + (bc * block_size) + 24u;\n \n var b45_offset = 0u;\n var b6_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n var b6 = blocks[b6_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n var b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w0_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w0_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w0_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w0_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n b6_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w1_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w1_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w1_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w1_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n b6_offset = 16u;\n\n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w2_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w2_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w2_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w2_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n b6_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w3_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w3_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w3_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w3_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b45_offset = 0u;\n b6_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth8ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 32u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b07_start = row_blocks_start + (br_offset * 8u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < 2; j++) {\n \n var b07 = blocks[b07_start + j];\n \n let w0_0 = f32(extractBits(b07, 0u, 8u)); \n let w0_1 = f32(extractBits(b07, 8u, 8u)); \n let w0_2 = f32(extractBits(b07, 16u, 8u));\n let w0_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 2 + j];\n \n let w1_0 = f32(extractBits(b07, 0u, 8u)); \n let w1_1 = f32(extractBits(b07, 8u, 8u)); \n let w1_2 = f32(extractBits(b07, 16u, 8u));\n let w1_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 4 + j];\n \n let w2_0 = f32(extractBits(b07, 0u, 8u)); \n let w2_1 = f32(extractBits(b07, 8u, 8u)); \n let w2_2 = f32(extractBits(b07, 16u, 8u));\n let w2_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 6 + j];\n \n let w3_0 = f32(extractBits(b07, 0u, 8u)); \n let w3_1 = f32(extractBits(b07, 8u, 8u)); \n let w3_2 = f32(extractBits(b07, 16u, 8u));\n let w3_3 = f32(extractBits(b07, 24u, 8u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_vec = x[x_start + j];\n res[j] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleShaderSources={1:forwardSingleBitDepth1ShaderSource,2:forwardSingleBitDepth2ShaderSource,3:forwardSingleBitDepth3ShaderSource,4:forwardSingleBitDepth4ShaderSource,5:forwardSingleBitDepth5ShaderSource,6:forwardSingleBitDepth6ShaderSource,7:forwardSingleBitDepth7ShaderSource,8:forwardSingleBitDepth8ShaderSource};var forwardSingleShaderNames={1:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_8_shader"};var forwardShaderSources={1:forwardMultipleShaderSources(1),2:forwardMultipleShaderSources(2),3:forwardMultipleShaderSources(3),4:forwardMultipleShaderSources(4),5:forwardMultipleShaderSources(5),6:forwardMultipleShaderSources(6),7:forwardMultipleShaderSources(7),8:forwardMultipleShaderSources(8)};var forwardShaderNames={1:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_8_shader"};var forwardShuffleXShaderName="pv_picollm_weight_block_mixed_16x8_forward_shuffle_x_shader";var forwardShuffleYShaderName="pv_picollm_weight_block_mixed_16x8_forward_shuffle_y_shader";var addBiasShaderName="pv_picollm_weight_block_mixed_16x8_add_bias_shader";var forwardSingleReduceYShaderName="pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_shader";var _weightBlockMixed16x;var loadPreprocessBlocksShader=function loadPreprocessBlocksShader(device,bitDepth){var bindGroupLayout=device.createBindGroupLayout({label:"weight preprocess blocks ".concat(bitDepth," bind group layout"),entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight preprocess blocks ".concat(bitDepth," pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight preprocess blocks ".concat(bitDepth," shader module"),code:preprocessShaderSources[bitDepth]});var computePipeline=device.createComputePipeline({label:"weight preprocess blocks ".concat(bitDepth," pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:preprocessDim,workgroup_size_y:preprocessDim}}});return{computePipeline:computePipeline}};var loadForwardShuffleXShader=function loadForwardShuffleXShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight shuffle x bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight shuffle x pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight shuffle x shader module",code:forwardShuffleXShaderSource});var computePipeline=device.createComputePipeline({label:"weight shuffle x pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_y:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var loadForwardSingleReduceYShader=function loadForwardSingleReduceYShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight single reduce y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight single reduce y pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight single reduce y shader module",code:forwardSingleReduceYShaderSource});var computePipeline=device.createComputePipeline({label:"weight single reduce y pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var loadForwardShuffleYShader=function loadForwardShuffleYShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight shuffle y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight shuffle y pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight shuffle y shader module",code:forwardShuffleYShaderSource});var computePipeline=device.createComputePipeline({label:"weight shuffle y pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var loadForwardSingleShader=function loadForwardSingleShader(device,bitDepth){var entries=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}];var bindGroupLayout=device.createBindGroupLayout({label:"weight forward single ".concat(bitDepth," bind group layout"),entries:entries});var pipelineLayout=device.createPipelineLayout({label:"weight forward single ".concat(bitDepth," pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight forward single ".concat(bitDepth," shader module"),code:forwardSingleShaderSources[bitDepth]});var computePipeline=device.createComputePipeline({label:"weight forward single ".concat(bitDepth," pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:weightBlockSize,workgroup_size_y:1}}});return{computePipeline:computePipeline}};var loadForwardShader=function loadForwardShader(device,bitDepth){var entries=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}];var bindGroupLayout=device.createBindGroupLayout({label:"weight forward multi ".concat(bitDepth," bind group layout"),entries:entries});var pipelineLayout=device.createPipelineLayout({label:"weight forward multi ".concat(bitDepth," pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight forward multi ".concat(bitDepth," shader module"),code:forwardShaderSources[bitDepth]});var computePipeline=device.createComputePipeline({label:"weight forward multi ".concat(bitDepth," pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:TC}}});return{computePipeline:computePipeline}};var loadAddBiasShader=function loadAddBiasShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight add bias bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight add bias pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight add bias shader module",code:addBiasShaderSource});var computePipeline=device.createComputePipeline({label:"weight add bias pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var weightBlockMixed16x8Shaders=(_weightBlockMixed16x={},_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x,preprocessShaderNames[3],function(device){return loadPreprocessBlocksShader(device,3)}),preprocessShaderNames[5],function(device){return loadPreprocessBlocksShader(device,5)}),preprocessShaderNames[6],function(device){return loadPreprocessBlocksShader(device,6)}),preprocessShaderNames[7],function(device){return loadPreprocessBlocksShader(device,7)}),forwardShuffleXShaderName,loadForwardShuffleXShader),forwardShuffleYShaderName,loadForwardShuffleYShader),forwardSingleReduceYShaderName,loadForwardSingleReduceYShader),forwardSingleShaderNames[1],function(device){return loadForwardSingleShader(device,1)}),forwardSingleShaderNames[2],function(device){return loadForwardSingleShader(device,2)}),forwardSingleShaderNames[3],function(device){return loadForwardSingleShader(device,3)}),_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x,forwardSingleShaderNames[4],function(device){return loadForwardSingleShader(device,4)}),forwardSingleShaderNames[5],function(device){return loadForwardSingleShader(device,5)}),forwardSingleShaderNames[6],function(device){return loadForwardSingleShader(device,6)}),forwardSingleShaderNames[7],function(device){return loadForwardSingleShader(device,7)}),forwardSingleShaderNames[8],function(device){return loadForwardSingleShader(device,8)}),forwardShaderNames[1],function(device){return loadForwardShader(device,1)}),forwardShaderNames[2],function(device){return loadForwardShader(device,2)}),forwardShaderNames[3],function(device){return loadForwardShader(device,3)}),forwardShaderNames[4],function(device){return loadForwardShader(device,4)}),forwardShaderNames[5],function(device){return loadForwardShader(device,5)}),_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x,forwardShaderNames[6],function(device){return loadForwardShader(device,6)}),forwardShaderNames[7],function(device){return loadForwardShader(device,7)}),forwardShaderNames[8],function(device){return loadForwardShader(device,8)}),addBiasShaderName,loadAddBiasShader));var getPicollmWeightBlockMixed16x8WebGpuFunctions=function getPicollmWeightBlockMixed16x8WebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmPreprocessBlocksWebGpu=function pvPicollmPreprocessBlocksWebGpu(objAddress,bitDepth,blocksAddress,blocksOffsetBytes,nbr,nbc,statusAddress){var _gpuBuffers$get;objAddress=unsignedAddress(objAddress);blocksAddress=unsignedAddress(blocksAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[preprocessShaderNames[bitDepth]];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var blocksBuffer=(_gpuBuffers$get=gpuBuffers.get(blocksAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!blocksBuffer){console.error("blocks buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight preprocess blocks ".concat(bitDepth," arg buffer"));obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([nbr,nbc,blocksOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight preprocess blocks ".concat(bitDepth," bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:blocksBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,preprocessShaderNames[bitDepth],Math.ceil(nbr/preprocessDim),Math.ceil(nbc/preprocessDim));setStatus(statusAddress,0)};var pvPicollmForwardSingleShuffleXWebGpu=function pvPicollmForwardSingleShuffleXWebGpu(objAddress,xAddress,xOffsetBytes,indicesAddress,indicesOffsetBytes,shape1,yAddress,statusAddress){var _gpuBuffers$get2,_gpuBuffers$get3,_gpuBuffers$get4;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);indicesAddress=unsignedAddress(indicesAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShuffleXShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get2=gpuBuffers.get(xAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var indicesBuffer=(_gpuBuffers$get3=gpuBuffers.get(indicesAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!indicesBuffer){console.error("Indices buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get4=gpuBuffers.get(yAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight shuffle x arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([1,shape1,xOffsetBytes/4,indicesOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward single shuffle x bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:indicesBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShuffleXShaderName+"_single",1,Math.ceil(shape1/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmForwardSingleWebGpu=function pvPicollmForwardSingleWebGpu(objAddress,bitDepth,xAddress,xOffsetBytes,metasAddress,metasOffsetBytes,blocksAddress,blocksOffsetBytes,nbr,totalNbc,bitDepthNbc,yAddress,yOffsetBytes,statusAddress){var _gpuBuffers$get5,_gpuBuffers$get6,_gpuBuffers$get7,_gpuBuffers$get8;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);metasAddress=unsignedAddress(metasAddress);blocksAddress=unsignedAddress(blocksAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardSingleShaderNames[bitDepth]];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get5=gpuBuffers.get(xAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var metasBuffer=(_gpuBuffers$get6=gpuBuffers.get(metasAddress))===null||_gpuBuffers$get6===void 0?void 0:_gpuBuffers$get6.buffer;if(!metasBuffer){console.error("Metas buffer has not been allocated");setStatus(statusAddress,-1);return}var blocksBuffer=(_gpuBuffers$get7=gpuBuffers.get(blocksAddress))===null||_gpuBuffers$get7===void 0?void 0:_gpuBuffers$get7.buffer;if(!blocksBuffer){console.error("Blocks buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get8=gpuBuffers.get(yAddress))===null||_gpuBuffers$get8===void 0?void 0:_gpuBuffers$get8.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(8*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight forward single ".concat(bitDepth," arg buffer"));obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([1,nbr,totalNbc,bitDepthNbc,xOffsetBytes/4,metasOffsetBytes/4,blocksOffsetBytes/4,yOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var entries=[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:metasBuffer}},{binding:3,resource:{buffer:blocksBuffer}},{binding:4,resource:{buffer:yBuffer}}];var bindGroup=obj.device.createBindGroup({label:"weight forward single ".concat(bitDepth," bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:entries});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardSingleShaderNames[bitDepth],Math.ceil(nbr*4/weightBlockSize),bitDepthNbc);setStatus(statusAddress,0)};var pvPicollmForwardSingleReduceYWebGpu=function pvPicollmForwardSingleReduceYWebGpu(objAddress,nbr,nbc,xAddress,yAddress,statusAddress){var _gpuBuffers$get9,_gpuBuffers$get10;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardSingleReduceYShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get9=gpuBuffers.get(xAddress))===null||_gpuBuffers$get9===void 0?void 0:_gpuBuffers$get9.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get10=gpuBuffers.get(yAddress))===null||_gpuBuffers$get10===void 0?void 0:_gpuBuffers$get10.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(2*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight single reduce y arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([nbr*4,nbc]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward single reduce y bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardSingleReduceYShaderName,Math.ceil(nbr*4/weightBlockSize));setStatus(statusAddress,0)};var pvPicollmForwardMultipleShuffleXWebGpu=function pvPicollmForwardMultipleShuffleXWebGpu(objAddress,xAddress,xOffsetBytes,indicesAddress,indicesOffsetBytes,n,shape1,yAddress,statusAddress){var _gpuBuffers$get11,_gpuBuffers$get12,_gpuBuffers$get13;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);indicesAddress=unsignedAddress(indicesAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShuffleXShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get11=gpuBuffers.get(xAddress))===null||_gpuBuffers$get11===void 0?void 0:_gpuBuffers$get11.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var indicesBuffer=(_gpuBuffers$get12=gpuBuffers.get(indicesAddress))===null||_gpuBuffers$get12===void 0?void 0:_gpuBuffers$get12.buffer;if(!indicesBuffer){console.error("Indices buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get13=gpuBuffers.get(yAddress))===null||_gpuBuffers$get13===void 0?void 0:_gpuBuffers$get13.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight multi shuffle x arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,shape1,xOffsetBytes/4,indicesOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward multiple shuffle x bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:indicesBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShuffleXShaderName+"_multi",n,shape1);setStatus(statusAddress,0)};var pvPicollmForwardMultipleWebGpu=function pvPicollmForwardMultipleWebGpu(objAddress,bitDepth,xAddress,xOffsetBytes,metasAddress,metasOffsetBytes,blocksAddress,blocksOffsetBytes,nbc,nbr,n,yAddress,yOffsetBytes,statusAddress){var _gpuBuffers$get14,_gpuBuffers$get15,_gpuBuffers$get16,_gpuBuffers$get17;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);metasAddress=unsignedAddress(metasAddress);blocksAddress=unsignedAddress(blocksAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShaderNames[bitDepth]];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get14=gpuBuffers.get(xAddress))===null||_gpuBuffers$get14===void 0?void 0:_gpuBuffers$get14.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var metasBuffer=(_gpuBuffers$get15=gpuBuffers.get(metasAddress))===null||_gpuBuffers$get15===void 0?void 0:_gpuBuffers$get15.buffer;if(!metasBuffer){console.error("Metas buffer has not been allocated");setStatus(statusAddress,-1);return}var blocksBuffer=(_gpuBuffers$get16=gpuBuffers.get(blocksAddress))===null||_gpuBuffers$get16===void 0?void 0:_gpuBuffers$get16.buffer;if(!blocksBuffer){console.error("Blocks buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get17=gpuBuffers.get(yAddress))===null||_gpuBuffers$get17===void 0?void 0:_gpuBuffers$get17.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(8*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight forward multi ".concat(bitDepth," arg buffer"));obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,nbr,0,nbc,xOffsetBytes/4,metasOffsetBytes/4,blocksOffsetBytes/4,yOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var entries=[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:metasBuffer}},{binding:3,resource:{buffer:blocksBuffer}},{binding:4,resource:{buffer:yBuffer}}];var bindGroup=obj.device.createBindGroup({label:"weight forward multi ".concat(bitDepth," bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:entries});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShaderNames[bitDepth],Math.ceil(nbr/BM),Math.ceil(n/BN));setStatus(statusAddress,0)};var pvPicollmForwardMultipleShuffleYWebGpu=function pvPicollmForwardMultipleShuffleYWebGpu(objAddress,n,shape0,xAddress,yAddress,statusAddress){var _gpuBuffers$get18,_gpuBuffers$get19;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShuffleYShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get18=gpuBuffers.get(xAddress))===null||_gpuBuffers$get18===void 0?void 0:_gpuBuffers$get18.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get19=gpuBuffers.get(yAddress))===null||_gpuBuffers$get19===void 0?void 0:_gpuBuffers$get19.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(2*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight shuffle y arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,shape0]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward multiple shuffle y bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShuffleYShaderName,n,shape0);setStatus(statusAddress,0)};var pvPicollmAddBiasWebGpu=function pvPicollmAddBiasWebGpu(objAddress,n,dimension,biasAddress,yAddress,statusAddress){var _gpuBuffers$get20,_gpuBuffers$get21;objAddress=unsignedAddress(objAddress);biasAddress=unsignedAddress(biasAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[addBiasShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var biasBuffer=(_gpuBuffers$get20=gpuBuffers.get(biasAddress))===null||_gpuBuffers$get20===void 0?void 0:_gpuBuffers$get20.buffer;if(!biasBuffer){console.error("Bias buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get21=gpuBuffers.get(yAddress))===null||_gpuBuffers$get21===void 0?void 0:_gpuBuffers$get21.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight add bias arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([dimension]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight add bias bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:biasBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,addBiasShaderName,n,dimension);setStatus(statusAddress,0)};return{pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm:pvPicollmPreprocessBlocksWebGpu,pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm:pvPicollmForwardSingleShuffleXWebGpu,pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm:pvPicollmForwardSingleWebGpu,pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm:pvPicollmForwardSingleReduceYWebGpu,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm:pvPicollmForwardMultipleShuffleXWebGpu,pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm:pvPicollmForwardMultipleWebGpu,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm:pvPicollmForwardMultipleShuffleYWebGpu,pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm:pvPicollmAddBiasWebGpu}};function ownKeys$1(e,r){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);r&&(o=o.filter(function(r){return Object.getOwnPropertyDescriptor(e,r).enumerable})),t.push.apply(t,o)}return t}function _objectSpread$1(e){for(var r=1;r=r.length?{done:!0}:{done:!1,value:r[_n++]}},e:function e(r){throw r},f:F}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var o,a=!0,u=!1;return{s:function s(){t=t.call(r)},n:function n(){var r=t.next();return a=r.done,r},e:function e(r){u=!0,o=r},f:function f(){try{a||null==t["return"]||t["return"]()}finally{if(u)throw o}}}}function _unsupportedIterableToArray(r,a){if(r){if("string"==typeof r)return _arrayLikeToArray(r,a);var t={}.toString.call(r).slice(8,-1);return"Object"===t&&r.constructor&&(t=r.constructor.name),"Map"===t||"Set"===t?Array.from(r):"Arguments"===t||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t)?_arrayLikeToArray(r,a):void 0}}function _arrayLikeToArray(r,a){(null==a||a>r.length)&&(a=r.length);for(var e=0,n=Array(a);e1&&_args[1]!==undefined?_args[1]:{};time=_args.length>2&&_args[2]!==undefined?_args[2]:5e3;controller=new AbortController;config=_objectSpread(_objectSpread({},options),{},{signal:controller.signal});timeout=setTimeout(function(){controller.abort()},time);_context.next=7;return fetch(uri,config);case 7:response=_context.sent;clearTimeout(timeout);return _context.abrupt("return",response);case 10:case"end":return _context.stop()}},_callee)}));return _fetchWithTimeout.apply(this,arguments)}function open(_x2,_x3){return _open.apply(this,arguments)}function _open(){_open=_asyncToGenerator(_regeneratorRuntime.mark(function _callee2(path,mode){var error;return _regeneratorRuntime.wrap(function _callee2$(_context2){while(1)switch(_context2.prev=_context2.next){case 0:_context2.prev=0;_context2.next=3;return PvFileIDB.open(path,mode);case 3:return _context2.abrupt("return",_context2.sent);case 6:_context2.prev=6;_context2.t0=_context2["catch"](0);if(_context2.t0.name==="IndexedDBNotSupported"){console.warn("IndexedDB is not supported. Fallback to in-memory storage.")}else if(_context2.t0.name!=="FileNotExists"){console.warn("Unable to access IndexedDB (".concat(_context2.t0.toString(),"). Fallback to in-memory storage."))}if(!(typeof WorkerGlobalScope!=="undefined"&&self instanceof WorkerGlobalScope)){_context2.next=16;break}if(!(_context2.t0.name==="FileNotExists")){_context2.next=12;break}throw _context2.t0;case 12:console.error("In-memory storage cannot be used inside a worker.");error=new Error("Failed to start PvFile: ".concat(_context2.t0.toString()));error.name="PvFileNotSupported";throw error;case 16:return _context2.abrupt("return",PvFileMem.open(path,mode));case 17:case"end":return _context2.stop()}},_callee2,null,[[0,6]])}));return _open.apply(this,arguments)}var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(_scriptName){scriptDirectory=_scriptName}if(scriptDirectory.startsWith("blob:")){scriptDirectory=""}else{scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=url=>{if(isFileURI(url)){return new Promise((reject,resolve)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){resolve(xhr.response)}reject(xhr.status)};xhr.onerror=reject;xhr.send(null)})}return fetch(url,{credentials:"same-origin"}).then(response=>{if(response.ok){return response.arrayBuffer()}return Promise.reject(new Error(response.status+" : "+response.url))})}}}else{}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.error.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["quit"])quit_=Module["quit"];if(ENVIRONMENT_IS_PTHREAD){var wasmPromiseResolve;var wasmPromiseReject;var initializedJS=false;function threadPrintErr(...args){var text=args.join(" ");console.error(text)}if(!Module["printErr"])err=threadPrintErr;function threadAlert(...args){var text=args.join(" ");postMessage({cmd:"alert",text:text,threadId:_pthread_self()})}self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>new Promise((resolve,reject)=>{wasmPromiseResolve=module=>{var instance=new WebAssembly.Instance(module,getWasmImports());receiveInstance(instance);resolve()};wasmPromiseReject=reject});self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{var msgData=e["data"];var cmd=msgData["cmd"];if(cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{postMessage({cmd:"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};for(const handler of msgData["handlers"]){if(!Module[handler]||Module[handler].proxy){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})};if(handler=="print")out=Module[handler];if(handler=="printErr")err=Module[handler]}}wasmMemory=msgData["wasmMemory"];updateMemoryViews();wasmPromiseResolve(msgData["wasmModule"])}else if(cmd==="run"){__emscripten_thread_init(msgData["pthread_ptr"],0,0,1,0,0);__emscripten_thread_mailbox_await(msgData["pthread_ptr"]);establishStackSpace();PThread.receiveObjectTransfer(msgData);PThread.threadInitTLS();if(!initializedJS){initializedJS=true}try{invokeEntryPoint(msgData["start_routine"],msgData["arg"])}catch(ex){if(ex!="unwind"){throw ex}}}else if(cmd==="cancel"){if(_pthread_self()){__emscripten_thread_exit(-1)}}else if(msgData.target==="setimmediate"){}else if(cmd==="checkMailbox"){if(initializedJS){checkMailbox()}}else if(cmd){err(`worker: received unknown command ${cmd}`);err(msgData)}}catch(ex){__emscripten_thread_crashed();throw ex}}self.onmessage=handleMessage}var wasmBinary;if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];var wasmMemory;var wasmModule;var ABORT=false;var EXITSTATUS;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);Module["HEAP16"]=HEAP16=new Int16Array(b);Module["HEAPU8"]=HEAPU8=new Uint8Array(b);Module["HEAPU16"]=HEAPU16=new Uint16Array(b);Module["HEAP32"]=HEAP32=new Int32Array(b);Module["HEAPU32"]=HEAPU32=new Uint32Array(b);Module["HEAPF32"]=HEAPF32=new Float32Array(b);Module["HEAPF64"]=HEAPF64=new Float64Array(b)}if(!ENVIRONMENT_IS_PTHREAD){if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||39321600;wasmMemory=new WebAssembly.Memory({initial:INITIAL_MEMORY/65536,maximum:4294967296/65536,shared:true});if(!(wasmMemory.buffer instanceof SharedArrayBuffer)){err("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag");if(ENVIRONMENT_IS_NODE){err("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)")}throw Error("bad memory")}}updateMemoryViews()}var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){runtimeInitialized=true;if(ENVIRONMENT_IS_PTHREAD)return;callRuntimeCallbacks(__ATINIT__)}function postRun(){if(ENVIRONMENT_IS_PTHREAD)return;if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;function addRunDependency(id){runDependencies++;Module["monitorRunDependencies"]?.(runDependencies)}function removeRunDependency(id){runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;EXITSTATUS=1;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject(e);throw e}var dataURIPrefix="data:application/octet-stream;base64,";var isDataURI=filename=>filename.startsWith(dataURIPrefix);var isFileURI=filename=>filename.startsWith("file://");function findWasmBinary(){var f="pv_picollm.wasm";if(!isDataURI(f)){return locateFile(f)}return f}var wasmBinaryFile;function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}function getBinaryPromise(binaryFile){if(!wasmBinary){return readAsync(binaryFile).then(response=>new Uint8Array(response),()=>getBinarySync(binaryFile))}return Promise.resolve().then(()=>getBinarySync(binaryFile))}function instantiateArrayBuffer(binaryFile,imports,receiver){return getBinaryPromise(binaryFile).then(binary=>WebAssembly.instantiate(binary,imports)).then(receiver,reason=>{err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)})}function instantiateAsync(binary,binaryFile,imports,callback){if(!binary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(binaryFile)&&!isFileURI(binaryFile)&&typeof fetch=="function"){return fetch(binaryFile,{credentials:"same-origin"}).then(response=>{var result=WebAssembly.instantiateStreaming(response,imports);return result.then(callback,function(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(binaryFile,imports,callback)})})}return instantiateArrayBuffer(binaryFile,imports,callback)}function getWasmImports(){assignWasmImports();return{env:wasmImports,wasi_snapshot_preview1:wasmImports}}function createWasm(){var info=getWasmImports();function receiveInstance(instance,module){wasmExports=instance.exports;wasmExports=Asyncify.instrumentWasmExports(wasmExports);wasmExports=applySignatureConversions(wasmExports);registerTLSInit(wasmExports["_emscripten_tls_init"]);wasmTable=wasmExports["__indirect_function_table"];addOnInit(wasmExports["__wasm_call_ctors"]);wasmModule=module;removeRunDependency("wasm-instantiate");return wasmExports}addRunDependency("wasm-instantiate");function receiveInstantiationResult(result){receiveInstance(result["instance"],result["module"])}if(Module["instantiateWasm"]){try{return Module["instantiateWasm"](info,receiveInstance)}catch(e){err(`Module.instantiateWasm callback failed with error: ${e}`);readyPromiseReject(e)}}if(!wasmBinaryFile)wasmBinaryFile=findWasmBinary();instantiateAsync(wasmBinary,wasmBinaryFile,info,receiveInstantiationResult).catch(readyPromiseReject);return{}}function ExitStatus(status){this.name="ExitStatus";this.message=`Program terminated with exit(${status})`;this.status=status}var terminateWorker=worker=>{worker.terminate();worker.onmessage=e=>{}};var killThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];delete PThread.pthreads[pthread_ptr];terminateWorker(worker);__emscripten_thread_free_data(pthread_ptr);PThread.runningWorkers.splice(PThread.runningWorkers.indexOf(worker),1);worker.pthread_ptr=0};var cancelThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];worker.postMessage({cmd:"cancel"})};var cleanupThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];PThread.returnWorkerToPool(worker)};var spawnThread=threadParams=>{var worker=PThread.getNewWorker();if(!worker){return 6}PThread.runningWorkers.push(worker);PThread.pthreads[threadParams.pthread_ptr]=worker;worker.pthread_ptr=threadParams.pthread_ptr;var msg={cmd:"run",start_routine:threadParams.startRoutine,arg:threadParams.arg,pthread_ptr:threadParams.pthread_ptr};worker.postMessage(msg,threadParams.transferList);return 0};var runtimeKeepaliveCounter=0;var keepRuntimeAlive=()=>noExitRuntime||runtimeKeepaliveCounter>0;var stackSave=()=>_emscripten_stack_get_current();var stackRestore=val=>__emscripten_stack_restore(val);var stackAlloc=sz=>__emscripten_stack_alloc(sz);var proxyToMainThread=(funcIndex,emAsmAddr,sync,...callArgs)=>{var serializedNumCallArgs=callArgs.length;var sp=stackSave();var args=stackAlloc(serializedNumCallArgs*8);var b=args>>>3;for(var i=0;i>>0]=arg}var rtn=__emscripten_run_on_main_thread_js(funcIndex,emAsmAddr,serializedNumCallArgs,args,sync);stackRestore(sp);return rtn};function _proc_exit(code){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(0,0,1,code);EXITSTATUS=code;if(!keepRuntimeAlive()){PThread.terminateAllThreads();Module["onExit"]?.(code);ABORT=true}quit_(code,new ExitStatus(code))}var handleException=e=>{if(e instanceof ExitStatus||e=="unwind"){return EXITSTATUS}quit_(1,e)};function exitOnMainThread(returnCode){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(1,0,0,returnCode);_exit(returnCode)}var exitJS=(status,implicit)=>{EXITSTATUS=status;if(ENVIRONMENT_IS_PTHREAD){exitOnMainThread(status);throw"unwind"}_proc_exit(status)};var _exit=exitJS;var PThread={unusedWorkers:[],runningWorkers:[],tlsInitFunctions:[],pthreads:{},init(){if(ENVIRONMENT_IS_PTHREAD){PThread.initWorker()}else{PThread.initMainThread()}},initMainThread(){var pthreadPoolSize=navigator.hardwareConcurrency;while(pthreadPoolSize--){PThread.allocateUnusedWorker()}addOnPreRun(()=>{addRunDependency("loading-workers");PThread.loadWasmModuleToAllWorkers(()=>removeRunDependency("loading-workers"))})},initWorker(){noExitRuntime=false},setExitStatus:status=>EXITSTATUS=status,terminateAllThreads__deps:["$terminateWorker"],terminateAllThreads:()=>{for(var worker of PThread.runningWorkers){terminateWorker(worker)}for(var worker of PThread.unusedWorkers){terminateWorker(worker)}PThread.unusedWorkers=[];PThread.runningWorkers=[];PThread.pthreads=[]},returnWorkerToPool:worker=>{var pthread_ptr=worker.pthread_ptr;delete PThread.pthreads[pthread_ptr];PThread.unusedWorkers.push(worker);PThread.runningWorkers.splice(PThread.runningWorkers.indexOf(worker),1);worker.pthread_ptr=0;__emscripten_thread_free_data(pthread_ptr)},receiveObjectTransfer(data){},threadInitTLS(){PThread.tlsInitFunctions.forEach(f=>f())},loadWasmModuleToWorker:worker=>new Promise(onFinishedLoading=>{worker.onmessage=e=>{var d=e["data"];var cmd=d["cmd"];if(d["targetThread"]&&d["targetThread"]!=_pthread_self()){var targetWorker=PThread.pthreads[d["targetThread"]];if(targetWorker){targetWorker.postMessage(d,d["transferList"])}else{err(`Internal error! Worker sent a message "${cmd}" to target pthread ${d["targetThread"]}, but that thread no longer exists!`)}return}if(cmd==="checkMailbox"){checkMailbox()}else if(cmd==="spawnThread"){spawnThread(d)}else if(cmd==="cleanupThread"){cleanupThread(d["thread"])}else if(cmd==="killThread"){killThread(d["thread"])}else if(cmd==="cancelThread"){cancelThread(d["thread"])}else if(cmd==="loaded"){worker.loaded=true;onFinishedLoading(worker)}else if(cmd==="alert"){alert(`Thread ${d["threadId"]}: ${d["text"]}`)}else if(d.target==="setimmediate"){worker.postMessage(d)}else if(cmd==="callHandler"){Module[d["handler"]](...d["args"])}else if(cmd){err(`worker sent an unknown command ${cmd}`)}};worker.onerror=e=>{var message="worker sent an error!";err(`${message} ${e.filename}:${e.lineno}: ${e.message}`);throw e};var handlers=[];var knownHandlers=["onExit","onAbort","print","printErr"];for(var handler of knownHandlers){if(Module.propertyIsEnumerable(handler)){handlers.push(handler)}}worker.postMessage({cmd:"load",handlers:handlers,wasmMemory:wasmMemory,wasmModule:wasmModule})}),loadWasmModuleToAllWorkers(onMaybeReady){if(ENVIRONMENT_IS_PTHREAD){return onMaybeReady()}let pthreadPoolReady=Promise.all(PThread.unusedWorkers.map(PThread.loadWasmModuleToWorker));pthreadPoolReady.then(onMaybeReady)},allocateUnusedWorker(){var worker;var workerOptions={type:"module",name:"em-pthread"};var pthreadMainJs=_scriptName;if(Module["mainScriptUrlOrBlob"]){pthreadMainJs=Module["mainScriptUrlOrBlob"];if(typeof pthreadMainJs!="string"){pthreadMainJs=URL.createObjectURL(pthreadMainJs)}}worker=new Worker(pthreadMainJs,workerOptions);PThread.unusedWorkers.push(worker)},getNewWorker(){if(PThread.unusedWorkers.length==0){PThread.allocateUnusedWorker();PThread.loadWasmModuleToWorker(PThread.unusedWorkers[0])}return PThread.unusedWorkers.pop()}};var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var establishStackSpace=()=>{var pthread_ptr=_pthread_self();var stackHigh=GROWABLE_HEAP_U32()[pthread_ptr+52>>>2>>>0];var stackSize=GROWABLE_HEAP_U32()[pthread_ptr+56>>>2>>>0];var stackLow=stackHigh-stackSize;_emscripten_stack_set_limits(stackHigh,stackLow);stackRestore(stackHigh)};var invokeEntryPoint=(ptr,arg)=>{runtimeKeepaliveCounter=0;var result=(a1=>dynCall_ii(ptr,a1))(arg);function finish(result){if(keepRuntimeAlive()){PThread.setExitStatus(result)}else{__emscripten_thread_exit(result)}}finish(result)};var noExitRuntime=Module["noExitRuntime"]||true;var registerTLSInit=tlsInitFunc=>PThread.tlsInitFunctions.push(tlsInitFunc);var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder:undefined;var UTF8ArrayToString=(heapOrArray,idx,maxBytesToRead)=>{idx>>>=0;var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.buffer instanceof SharedArrayBuffer?heapOrArray.slice(idx,endPtr):heapOrArray.subarray(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead)=>{ptr>>>=0;return ptr?UTF8ArrayToString(GROWABLE_HEAP_U8(),ptr,maxBytesToRead):""};function ___assert_fail(condition,filename,line,func){condition>>>=0;filename>>>=0;func>>>=0;abort(`Assertion failed: ${UTF8ToString(condition)}, at: `+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])}function pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(2,0,1,pthread_ptr,attr,startRoutine,arg);return ___pthread_create_js(pthread_ptr,attr,startRoutine,arg)}function ___pthread_create_js(pthread_ptr,attr,startRoutine,arg){pthread_ptr>>>=0;attr>>>=0;startRoutine>>>=0;arg>>>=0;if(typeof SharedArrayBuffer=="undefined"){err("Current environment does not support SharedArrayBuffer, pthreads are not available!");return 6}var transferList=[];var error=0;if(ENVIRONMENT_IS_PTHREAD&&(transferList.length===0||error)){return pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg)}if(error)return error;var threadParams={startRoutine:startRoutine,pthread_ptr:pthread_ptr,arg:arg,transferList:transferList};if(ENVIRONMENT_IS_PTHREAD){threadParams.cmd="spawnThread";postMessage(threadParams,transferList);return 0}return spawnThread(threadParams)}var __abort_js=()=>{abort("")};var nowIsMonotonic=1;var __emscripten_get_now_is_monotonic=()=>nowIsMonotonic;function __emscripten_init_main_thread_js(tb){tb>>>=0;__emscripten_thread_init(tb,!ENVIRONMENT_IS_WORKER,1,!ENVIRONMENT_IS_WEB,65536,false);PThread.threadInitTLS()}var maybeExit=()=>{if(!keepRuntimeAlive()){try{if(ENVIRONMENT_IS_PTHREAD)__emscripten_thread_exit(EXITSTATUS);else _exit(EXITSTATUS)}catch(e){handleException(e)}}};var callUserCallback=func=>{if(ABORT){return}try{func();maybeExit()}catch(e){handleException(e)}};function __emscripten_thread_mailbox_await(pthread_ptr){pthread_ptr>>>=0;if(typeof Atomics.waitAsync==="function"){var wait=Atomics.waitAsync(GROWABLE_HEAP_I32(),pthread_ptr>>>2,pthread_ptr);wait.value.then(checkMailbox);var waitingAsync=pthread_ptr+128;Atomics.store(GROWABLE_HEAP_I32(),waitingAsync>>>2,1)}}var checkMailbox=()=>{var pthread_ptr=_pthread_self();if(pthread_ptr){__emscripten_thread_mailbox_await(pthread_ptr);callUserCallback(__emscripten_check_mailbox)}};function __emscripten_notify_mailbox_postmessage(targetThreadId,currThreadId,mainThreadId){targetThreadId>>>=0;currThreadId>>>=0;mainThreadId>>>=0;if(targetThreadId==currThreadId){setTimeout(checkMailbox)}else if(ENVIRONMENT_IS_PTHREAD){postMessage({targetThread:targetThreadId,cmd:"checkMailbox"})}else{var worker=PThread.pthreads[targetThreadId];if(!worker){return}worker.postMessage({cmd:"checkMailbox"})}}var proxiedJSCallArgs=[];function __emscripten_receive_on_main_thread_js(funcIndex,emAsmAddr,callingThread,numCallArgs,args){emAsmAddr>>>=0;callingThread>>>=0;args>>>=0;proxiedJSCallArgs.length=numCallArgs;var b=args>>>3;for(var i=0;i>>0]}var func=proxiedFunctionTable[funcIndex];PThread.currentProxiedOperationCallerThread=callingThread;var rtn=func(...proxiedJSCallArgs);PThread.currentProxiedOperationCallerThread=0;return rtn}function __emscripten_thread_cleanup(thread){thread>>>=0;if(!ENVIRONMENT_IS_PTHREAD)cleanupThread(thread);else postMessage({cmd:"cleanupThread",thread:thread})}function __emscripten_thread_set_strongref(thread){thread>>>=0}var warnOnce=text=>{warnOnce.shown||={};if(!warnOnce.shown[text]){warnOnce.shown[text]=1;err(text)}};var _emscripten_check_blocking_allowed=()=>{};var _emscripten_date_now=()=>Date.now();var runtimeKeepalivePush=()=>{runtimeKeepaliveCounter+=1};var _emscripten_exit_with_live_runtime=()=>{runtimeKeepalivePush();throw"unwind"};var getHeapMax=()=>4294901760;function _emscripten_get_heap_max(){return getHeapMax()}var _emscripten_get_now;_emscripten_get_now=()=>performance.timeOrigin+performance.now();var _emscripten_num_logical_cores=()=>navigator["hardwareConcurrency"];var growMemory=size=>{var b=wasmMemory.buffer;var pages=(size-b.byteLength+65535)/65536;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};function _emscripten_resize_heap(requestedSize){requestedSize>>>=0;var oldSize=GROWABLE_HEAP_U8().length;if(requestedSize<=oldSize){return false}var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}var alignUp=(x,multiple)=>x+(multiple-x%multiple)%multiple;for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignUp(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false}var ENV={};var getExecutableName=()=>thisProgram||"./this.program";var getEnvStrings=()=>{if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8";var env={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:lang,_:getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(`${x}=${env[x]}`)}getEnvStrings.strings=strings}return getEnvStrings.strings};var stringToAscii=(str,buffer)=>{for(var i=0;i>>0]=str.charCodeAt(i)}GROWABLE_HEAP_I8()[buffer>>>0]=0};var _environ_get=function(__environ,environ_buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(3,0,1,__environ,environ_buf);__environ>>>=0;environ_buf>>>=0;var bufSize=0;getEnvStrings().forEach((string,i)=>{var ptr=environ_buf+bufSize;GROWABLE_HEAP_U32()[__environ+i*4>>>2>>>0]=ptr;stringToAscii(string,ptr);bufSize+=string.length+1});return 0};var _environ_sizes_get=function(penviron_count,penviron_buf_size){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(4,0,1,penviron_count,penviron_buf_size);penviron_count>>>=0;penviron_buf_size>>>=0;var strings=getEnvStrings();GROWABLE_HEAP_U32()[penviron_count>>>2>>>0]=strings.length;var bufSize=0;strings.forEach(string=>bufSize+=string.length+1);GROWABLE_HEAP_U32()[penviron_buf_size>>>2>>>0]=bufSize;return 0};var printCharBuffers=[null,[],[]];var printChar=(stream,curr)=>{var buffer=printCharBuffers[stream];if(curr===0||curr===10){(stream===1?out:err)(UTF8ArrayToString(buffer,0));buffer.length=0}else{buffer.push(curr)}};function _fd_write(fd,iov,iovcnt,pnum){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(5,0,1,fd,iov,iovcnt,pnum);iov>>>=0;iovcnt>>>=0;pnum>>>=0;var num=0;for(var i=0;i>>2>>>0];var len=GROWABLE_HEAP_U32()[iov+4>>>2>>>0];iov+=8;for(var j=0;j>>0])}num+=len}GROWABLE_HEAP_U32()[pnum>>>2>>>0]=num;return 0}function _pv_console_log_wasm(index){console.log(arrayBufferToStringAtIndex(Module.HEAPU8,index))}var _pv_file_close_wasm=async function(fileAddress,statusAddress){statusAddress=unsignedAddress(statusAddress);return Asyncify.handleAsync(async()=>{try{const file=await PvFile.getPtr(fileAddress);await file.close();setInt(statusAddress,0)}catch(e){console.error("pvFileCloseWasm",e);setInt(statusAddress,-1)}})};_pv_file_close_wasm.isAsync=true;var _pv_file_open_wasm=async function(fileAddress,pathAddress,modeAddress,statusAddress){pathAddress=unsignedAddress(pathAddress);modeAddress=unsignedAddress(modeAddress);statusAddress=unsignedAddress(statusAddress);return Asyncify.handleAsync(async()=>{const path=arrayBufferToStringAtIndex(Module.HEAPU8,pathAddress);const mode=arrayBufferToStringAtIndex(Module.HEAPU8,modeAddress);try{const file=await open(path,mode);PvFile.setPtr(fileAddress,file);setInt(statusAddress,0)}catch(e){if(e.name!=="FileNotExists"){console.error("pvFileOpenWasm",e)}setInt(statusAddress,-1)}})};_pv_file_open_wasm.isAsync=true;var _pv_file_read_wasm=async function(fileAddress,contentAddress,size,count,numReadAddress){contentAddress=unsignedAddress(contentAddress);numReadAddress=unsignedAddress(numReadAddress);return Asyncify.handleAsync(async()=>{try{const file=PvFile.getPtr(fileAddress);const content=await file.read(size,count);Module.HEAPU8.set(content,contentAddress);setInt(numReadAddress,content.length/size)}catch(e){console.error("pvFileReadWasm",e);setInt(numReadAddress,-1)}})};_pv_file_read_wasm.isAsync=true;function _pv_file_seek_wasm(fileAddress,offset,whence,statusAddress){statusAddress=unsignedAddress(statusAddress);try{const file=PvFile.getPtr(fileAddress);file.seek(offset,whence);setInt(statusAddress,0)}catch(e){console.error("pvFileSeekWasm",e);setInt(statusAddress,-1)}}function _pv_file_tell_wasm(fileAddress,offsetAddress){offsetAddress=unsignedAddress(offsetAddress);try{const file=PvFile.getPtr(fileAddress);setInt(offsetAddress,file.tell())}catch(e){console.error("pvFileTellWasm",e);setInt(offsetAddress,-1)}}var _pv_https_request_wasm=async function(httpMethodAddress,serverNameAddress,endpointAddress,headerAddress,bodyAddress,timeoutMs,responseAddressAddress,responseSizeAddress,responseCodeAddress){httpMethodAddress=unsignedAddress(httpMethodAddress);serverNameAddress=unsignedAddress(serverNameAddress);endpointAddress=unsignedAddress(endpointAddress);headerAddress=unsignedAddress(headerAddress);bodyAddress=unsignedAddress(bodyAddress);responseAddressAddress=unsignedAddress(responseAddressAddress);responseSizeAddress=unsignedAddress(responseSizeAddress);responseCodeAddress=unsignedAddress(responseCodeAddress);return Asyncify.handleAsync(async()=>{const httpMethod=arrayBufferToStringAtIndex(Module.HEAPU8,httpMethodAddress);const serverName=arrayBufferToStringAtIndex(Module.HEAPU8,serverNameAddress);const endpoint=arrayBufferToStringAtIndex(Module.HEAPU8,endpointAddress);const header=arrayBufferToStringAtIndex(Module.HEAPU8,headerAddress);const body=arrayBufferToStringAtIndex(Module.HEAPU8,bodyAddress);const headerObject=stringHeaderToObject(header);const options={method:httpMethod};if(body.length>0){options.body=body}if(Object.keys(headerObject).length>0){options.headers=headerObject}let response;let responseText;let statusCode;try{response=await fetchWithTimeout("https://"+serverName+endpoint,options,timeoutMs);statusCode=response.status}catch(error){console.error("pvHttpsRequestWasm",`Failed to fetch: ${error}`);return}try{responseText=await response.text()}catch(error){console.error("pvHttpsRequestWasm",`Failed to get response text: ${error}`);return}const responseAddress=Module._malloc((responseText.length+1)*Int8Array.BYTES_PER_ELEMENT);if(responseAddress===0){console.error("pvMallocError","pvHttpsRequestWasm: cannot allocate memory for response");setInt(responseAddressAddress,0);return}setInt(responseSizeAddress,responseText.length+1);setInt(responseAddressAddress,responseAddress);for(let i=0;inew Promise(resolve=>{setTimeout(()=>{resolve()},ms)}))};_pv_sleep_wasm.isAsync=true;function _pv_time_wasm(){return Date.now()/1e3}function xpu_cpu_support(){const functions=xpuCpuFunctions();if(typeof _pv_xpu_device_info_wasm!=="undefined")_pv_xpu_device_info_wasm=functions.pv_xpu_device_info_wasm;if(typeof _pv_xpu_get_max_workers_wasm!=="undefined")_pv_xpu_get_max_workers_wasm=functions.pv_xpu_get_max_workers_wasm}function _pv_xpu_device_info_wasm(){}function _pv_xpu_get_max_workers_wasm(){}function _pv_xpu_webgpu_device_cleanup_wasm(){}async function _pv_xpu_webgpu_device_info_wasm(){}_pv_xpu_webgpu_device_info_wasm.isAsync=true;async function _pv_xpu_webgpu_device_init_wasm(){}_pv_xpu_webgpu_device_init_wasm.isAsync=true;function _pv_xpu_webgpu_device_load_shader_func_wasm(){}function _pv_xpu_webgpu_device_mem_alloc_wasm(){}async function _pv_xpu_webgpu_device_mem_copy_from_xpu_wasm(){}_pv_xpu_webgpu_device_mem_copy_from_xpu_wasm.isAsync=true;function _pv_xpu_webgpu_device_mem_copy_to_xpu_wasm(){}function _pv_xpu_webgpu_device_mem_free_wasm(){}function _pv_xpu_webgpu_device_mem_memset_wasm(){}async function _pv_xpu_webgpu_device_wait_wasm(){}_pv_xpu_webgpu_device_wait_wasm.isAsync=true;var runAndAbortIfError=func=>{try{return func()}catch(e){abort(e)}};var sigToWasmTypes=sig=>{var typeNames={i:"i32",j:"i64",f:"f32",d:"f64",e:"externref",p:"i32"};var type={parameters:[],results:sig[0]=="v"?[]:[typeNames[sig[0]]]};for(var i=1;i{runtimeKeepaliveCounter-=1};var Asyncify={instrumentWasmImports(imports){var importPattern=/^(pv_https_request_wasm|pv_file_open_wasm|pv_file_close_wasm|pv_file_read_wasm|pv_file_write_wasm|pv_file_remove_wasm|pv_sleep_wasm|pv_xpu_webgpu_device_init_wasm|pv_xpu_webgpu_device_info_wasm|pv_xpu_webgpu_device_mem_copy_from_xpu_wasm|pv_xpu_webgpu_device_wait_wasm|pv_xpu_webgpu_timer_stop_wasm|invoke_.*|__asyncjs__.*)$/;for(let[x,original]of Object.entries(imports)){if(typeof original=="function"){let isAsyncifyImport=original.isAsync||importPattern.test(x)}}},instrumentWasmExports(exports){var ret={};for(let[x,original]of Object.entries(exports)){if(typeof original=="function"){ret[x]=(...args)=>{Asyncify.exportCallStack.push(x);try{return original(...args)}finally{if(!ABORT){var y=Asyncify.exportCallStack.pop();Asyncify.maybeStopUnwind()}}}}else{ret[x]=original}}return ret},State:{Normal:0,Unwinding:1,Rewinding:2,Disabled:3},state:0,StackSize:4096,currData:null,handleSleepReturnValue:0,exportCallStack:[],callStackNameToId:{},callStackIdToName:{},callStackId:0,asyncPromiseHandlers:null,sleepCallbacks:[],getCallStackId(funcName){var id=Asyncify.callStackNameToId[funcName];if(id===undefined){id=Asyncify.callStackId++;Asyncify.callStackNameToId[funcName]=id;Asyncify.callStackIdToName[id]=funcName}return id},maybeStopUnwind(){if(Asyncify.currData&&Asyncify.state===Asyncify.State.Unwinding&&Asyncify.exportCallStack.length===0){Asyncify.state=Asyncify.State.Normal;runtimeKeepalivePush();runAndAbortIfError(_asyncify_stop_unwind);if(typeof Fibers!="undefined"){Fibers.trampoline()}}},whenDone(){return new Promise((resolve,reject)=>{Asyncify.asyncPromiseHandlers={resolve:resolve,reject:reject}})},allocateData(){var ptr=_malloc(12+Asyncify.StackSize);Asyncify.setDataHeader(ptr,ptr+12,Asyncify.StackSize);Asyncify.setDataRewindFunc(ptr);return ptr},setDataHeader(ptr,stack,stackSize){GROWABLE_HEAP_U32()[ptr>>>2>>>0]=stack;GROWABLE_HEAP_U32()[ptr+4>>>2>>>0]=stack+stackSize},setDataRewindFunc(ptr){var bottomOfCallStack=Asyncify.exportCallStack[0];var rewindId=Asyncify.getCallStackId(bottomOfCallStack);GROWABLE_HEAP_I32()[ptr+8>>>2>>>0]=rewindId},getDataRewindFuncName(ptr){var id=GROWABLE_HEAP_I32()[ptr+8>>>2>>>0];var name=Asyncify.callStackIdToName[id];return name},getDataRewindFunc(name){var func=wasmExports[name];return func},doRewind(ptr){var name=Asyncify.getDataRewindFuncName(ptr);var func=Asyncify.getDataRewindFunc(name);runtimeKeepalivePop();return func()},handleSleep(startAsync){if(ABORT)return;if(Asyncify.state===Asyncify.State.Normal){var reachedCallback=false;var reachedAfterCallback=false;startAsync((handleSleepReturnValue=0)=>{if(ABORT)return;Asyncify.handleSleepReturnValue=handleSleepReturnValue;reachedCallback=true;if(!reachedAfterCallback){return}Asyncify.state=Asyncify.State.Rewinding;runAndAbortIfError(()=>_asyncify_start_rewind(Asyncify.currData));if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.resume()}var asyncWasmReturnValue,isError=false;try{asyncWasmReturnValue=Asyncify.doRewind(Asyncify.currData)}catch(err){asyncWasmReturnValue=err;isError=true}var handled=false;if(!Asyncify.currData){var asyncPromiseHandlers=Asyncify.asyncPromiseHandlers;if(asyncPromiseHandlers){Asyncify.asyncPromiseHandlers=null;(isError?asyncPromiseHandlers.reject:asyncPromiseHandlers.resolve)(asyncWasmReturnValue);handled=true}}if(isError&&!handled){throw asyncWasmReturnValue}});reachedAfterCallback=true;if(!reachedCallback){Asyncify.state=Asyncify.State.Unwinding;Asyncify.currData=Asyncify.allocateData();if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.pause()}runAndAbortIfError(()=>_asyncify_start_unwind(Asyncify.currData))}}else if(Asyncify.state===Asyncify.State.Rewinding){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_rewind);_free(Asyncify.currData);Asyncify.currData=null;Asyncify.sleepCallbacks.forEach(callUserCallback)}else{abort(`invalid state: ${Asyncify.state}`)}return Asyncify.handleSleepReturnValue},handleAsync(startAsync){return Asyncify.handleSleep(wakeUp=>{startAsync().then(wakeUp)})}};var uleb128Encode=(n,target)=>{if(n<128){target.push(n)}else{target.push(n%128|128,n>>7)}};var generateFuncType=(sig,target)=>{var sigRet=sig.slice(0,1);var sigParam=sig.slice(1);var typeCodes={i:127,p:127,j:126,f:125,d:124,e:111};target.push(96);uleb128Encode(sigParam.length,target);for(var i=0;i{if(typeof WebAssembly.Function=="function"){return new WebAssembly.Function(sigToWasmTypes(sig),func)}var typeSectionBody=[1];generateFuncType(sig,typeSectionBody);var bytes=[0,97,115,109,1,0,0,0,1];uleb128Encode(typeSectionBody.length,bytes);bytes.push(...typeSectionBody);bytes.push(2,7,1,1,101,1,102,0,0,7,5,1,1,102,0,0);var module=new WebAssembly.Module(new Uint8Array(bytes));var instance=new WebAssembly.Instance(module,{e:{f:func}});var wrappedFunc=instance.exports["f"];return wrappedFunc};var wasmTableMirror=[];var wasmTable;var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var updateTableMap=(offset,count)=>{if(functionsInTableMap){for(var i=offset;i{if(!functionsInTableMap){functionsInTableMap=new WeakMap;updateTableMap(0,wasmTable.length)}return functionsInTableMap.get(func)||0};var freeTableIndexes=[];var getEmptyTableSlot=()=>{if(freeTableIndexes.length){return freeTableIndexes.pop()}try{wasmTable.grow(1)}catch(err){if(!(err instanceof RangeError)){throw err}throw"Unable to grow wasm table. Set ALLOW_TABLE_GROWTH."}return wasmTable.length-1};var setWasmTableEntry=(idx,func)=>{wasmTable.set(idx,func);wasmTableMirror[idx]=wasmTable.get(idx)};var addFunction=(func,sig)=>{var rtn=getFunctionAddress(func);if(rtn){return rtn}var ret=getEmptyTableSlot();try{setWasmTableEntry(ret,func)}catch(err){if(!(err instanceof TypeError)){throw err}var wrapped=convertJsFunctionToWasm(func,sig);setWasmTableEntry(ret,wrapped)}functionsInTableMap.set(func,ret);return ret};var getCFunc=ident=>{var func=Module["_"+ident];return func};var writeArrayToMemory=(array,buffer)=>{GROWABLE_HEAP_I8().set(array,buffer>>>0)};var lengthBytesUTF8=str=>{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{outIdx>>>=0;if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++>>>0]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++>>>0]=192|u>>6;heap[outIdx++>>>0]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++>>>0]=224|u>>12;heap[outIdx++>>>0]=128|u>>6&63;heap[outIdx++>>>0]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++>>>0]=240|u>>18;heap[outIdx++>>>0]=128|u>>12&63;heap[outIdx++>>>0]=128|u>>6&63;heap[outIdx++>>>0]=128|u&63}}heap[outIdx>>>0]=0;return outIdx-startIdx};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,GROWABLE_HEAP_U8(),outPtr,maxBytesToWrite);var stringToUTF8OnStack=str=>{var size=lengthBytesUTF8(str)+1;var ret=stackAlloc(size);stringToUTF8(str,ret,size);return ret};var ccall=(ident,returnType,argTypes,args,opts)=>{var toC={string:str=>{var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=stringToUTF8OnStack(str)}return ret},array:arr=>{var ret=stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}};function convertReturnValue(ret){if(returnType==="string"){return UTF8ToString(ret)}if(returnType==="boolean")return Boolean(ret);return ret}var func=getCFunc(ident);var cArgs=[];var stack=0;if(args){for(var i=0;i{var numericArgs=!argTypes||argTypes.every(type=>type==="number"||type==="boolean");var numericRet=returnType!=="string";if(numericRet&&numericArgs&&!opts){return getCFunc(ident)}return(...args)=>ccall(ident,returnType,argTypes,args,opts)};Module["cwrap"]=cwrap;PThread.init();xpu_webgpu_support();xpu_cpu_support();var proxiedFunctionTable=[_proc_exit,exitOnMainThread,pthreadCreateProxied,_environ_get,_environ_sizes_get,_fd_write];var wasmImports;function assignWasmImports(){wasmImports={__assert_fail:___assert_fail,__pthread_create_js:___pthread_create_js,_abort_js:__abort_js,_emscripten_get_now_is_monotonic:__emscripten_get_now_is_monotonic,_emscripten_init_main_thread_js:__emscripten_init_main_thread_js,_emscripten_notify_mailbox_postmessage:__emscripten_notify_mailbox_postmessage,_emscripten_receive_on_main_thread_js:__emscripten_receive_on_main_thread_js,_emscripten_thread_cleanup:__emscripten_thread_cleanup,_emscripten_thread_mailbox_await:__emscripten_thread_mailbox_await,_emscripten_thread_set_strongref:__emscripten_thread_set_strongref,emscripten_check_blocking_allowed:_emscripten_check_blocking_allowed,emscripten_date_now:_emscripten_date_now,emscripten_exit_with_live_runtime:_emscripten_exit_with_live_runtime,emscripten_get_heap_max:_emscripten_get_heap_max,emscripten_get_now:_emscripten_get_now,emscripten_num_logical_cores:_emscripten_num_logical_cores,emscripten_resize_heap:_emscripten_resize_heap,environ_get:_environ_get,environ_sizes_get:_environ_sizes_get,exit:_exit,fd_write:_fd_write,memory:wasmMemory,pv_console_log_wasm:_pv_console_log_wasm,pv_file_close_wasm:_pv_file_close_wasm,pv_file_open_wasm:_pv_file_open_wasm,pv_file_read_wasm:_pv_file_read_wasm,pv_file_seek_wasm:_pv_file_seek_wasm,pv_file_tell_wasm:_pv_file_tell_wasm,pv_https_request_wasm:_pv_https_request_wasm,pv_picollm_attention_dot_product_webgpu_wasm:_pv_picollm_attention_dot_product_webgpu_wasm,pv_picollm_attention_encode_webgpu_wasm:_pv_picollm_attention_encode_webgpu_wasm,pv_picollm_attention_fir_webgpu_wasm:_pv_picollm_attention_fir_webgpu_wasm,pv_picollm_attention_precompute_encoding_webgpu_wasm:_pv_picollm_attention_precompute_encoding_webgpu_wasm,pv_picollm_attention_softmax_webgpu_wasm:_pv_picollm_attention_softmax_webgpu_wasm,pv_picollm_attention_transpose_query_webgpu_wasm:_pv_picollm_attention_transpose_query_webgpu_wasm,pv_picollm_attention_update_kv_webgpu_wasm:_pv_picollm_attention_update_kv_webgpu_wasm,pv_picollm_feed_forward_almost_gelu_webgpu_wasm:_pv_picollm_feed_forward_almost_gelu_webgpu_wasm,pv_picollm_feed_forward_gelu_webgpu_wasm:_pv_picollm_feed_forward_gelu_webgpu_wasm,pv_picollm_feed_forward_multiply_buffers_webgpu_wasm:_pv_picollm_feed_forward_multiply_buffers_webgpu_wasm,pv_picollm_feed_forward_silu_webgpu_wasm:_pv_picollm_feed_forward_silu_webgpu_wasm,pv_picollm_gate_forward_webgpu_wasm:_pv_picollm_gate_forward_webgpu_wasm,pv_picollm_moe_transformer_add_buffers_webgpu_wasm:_pv_picollm_moe_transformer_add_buffers_webgpu_wasm,pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm:_pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm,pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm:_pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm,pv_picollm_norm_forward_webgpu_wasm:_pv_picollm_norm_forward_webgpu_wasm,pv_picollm_norm_layer_forward_webgpu_wasm:_pv_picollm_norm_layer_forward_webgpu_wasm,pv_picollm_transformer_add_buffers_webgpu_wasm:_pv_picollm_transformer_add_buffers_webgpu_wasm,pv_picollm_transformer_add_to_buffer_webgpu_wasm:_pv_picollm_transformer_add_to_buffer_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm,pv_picollm_weight_float_forward_webgpu_wasm:_pv_picollm_weight_float_forward_webgpu_wasm,pv_sleep_wasm:_pv_sleep_wasm,pv_time_wasm:_pv_time_wasm,pv_xpu_device_info_wasm:_pv_xpu_device_info_wasm,pv_xpu_get_max_workers_wasm:_pv_xpu_get_max_workers_wasm,pv_xpu_webgpu_device_cleanup_wasm:_pv_xpu_webgpu_device_cleanup_wasm,pv_xpu_webgpu_device_info_wasm:_pv_xpu_webgpu_device_info_wasm,pv_xpu_webgpu_device_init_wasm:_pv_xpu_webgpu_device_init_wasm,pv_xpu_webgpu_device_load_shader_func_wasm:_pv_xpu_webgpu_device_load_shader_func_wasm,pv_xpu_webgpu_device_mem_alloc_wasm:_pv_xpu_webgpu_device_mem_alloc_wasm,pv_xpu_webgpu_device_mem_copy_from_xpu_wasm:_pv_xpu_webgpu_device_mem_copy_from_xpu_wasm,pv_xpu_webgpu_device_mem_copy_to_xpu_wasm:_pv_xpu_webgpu_device_mem_copy_to_xpu_wasm,pv_xpu_webgpu_device_mem_free_wasm:_pv_xpu_webgpu_device_mem_free_wasm,pv_xpu_webgpu_device_mem_memset_wasm:_pv_xpu_webgpu_device_mem_memset_wasm,pv_xpu_webgpu_device_wait_wasm:_pv_xpu_webgpu_device_wait_wasm}}var wasmExports=createWasm();var ___wasm_call_ctors=()=>(___wasm_call_ctors=wasmExports["__wasm_call_ctors"])();var __Znwm=Module["__Znwm"]=a0=>(__Znwm=Module["__Znwm"]=wasmExports["_Znwm"])(a0);var __Znam=Module["__Znam"]=a0=>(__Znam=Module["__Znam"]=wasmExports["_Znam"])(a0);var __ZdlPv=Module["__ZdlPv"]=a0=>(__ZdlPv=Module["__ZdlPv"]=wasmExports["_ZdlPv"])(a0);var __ZdlPvm=Module["__ZdlPvm"]=(a0,a1)=>(__ZdlPvm=Module["__ZdlPvm"]=wasmExports["_ZdlPvm"])(a0,a1);var __ZdaPv=Module["__ZdaPv"]=a0=>(__ZdaPv=Module["__ZdaPv"]=wasmExports["_ZdaPv"])(a0);var __ZdaPvm=Module["__ZdaPvm"]=(a0,a1)=>(__ZdaPvm=Module["__ZdaPvm"]=wasmExports["_ZdaPvm"])(a0,a1);var __ZnwmSt11align_val_t=Module["__ZnwmSt11align_val_t"]=(a0,a1)=>(__ZnwmSt11align_val_t=Module["__ZnwmSt11align_val_t"]=wasmExports["_ZnwmSt11align_val_t"])(a0,a1);var __ZnamSt11align_val_t=Module["__ZnamSt11align_val_t"]=(a0,a1)=>(__ZnamSt11align_val_t=Module["__ZnamSt11align_val_t"]=wasmExports["_ZnamSt11align_val_t"])(a0,a1);var __ZdlPvSt11align_val_t=Module["__ZdlPvSt11align_val_t"]=(a0,a1)=>(__ZdlPvSt11align_val_t=Module["__ZdlPvSt11align_val_t"]=wasmExports["_ZdlPvSt11align_val_t"])(a0,a1);var __ZdlPvmSt11align_val_t=Module["__ZdlPvmSt11align_val_t"]=(a0,a1,a2)=>(__ZdlPvmSt11align_val_t=Module["__ZdlPvmSt11align_val_t"]=wasmExports["_ZdlPvmSt11align_val_t"])(a0,a1,a2);var __ZdaPvSt11align_val_t=Module["__ZdaPvSt11align_val_t"]=(a0,a1)=>(__ZdaPvSt11align_val_t=Module["__ZdaPvSt11align_val_t"]=wasmExports["_ZdaPvSt11align_val_t"])(a0,a1);var __ZdaPvmSt11align_val_t=Module["__ZdaPvmSt11align_val_t"]=(a0,a1,a2)=>(__ZdaPvmSt11align_val_t=Module["__ZdaPvmSt11align_val_t"]=wasmExports["_ZdaPvmSt11align_val_t"])(a0,a1,a2);var __ZSt14set_unexpectedPFvvE=Module["__ZSt14set_unexpectedPFvvE"]=a0=>(__ZSt14set_unexpectedPFvvE=Module["__ZSt14set_unexpectedPFvvE"]=wasmExports["_ZSt14set_unexpectedPFvvE"])(a0);var __ZSt13set_terminatePFvvE=Module["__ZSt13set_terminatePFvvE"]=a0=>(__ZSt13set_terminatePFvvE=Module["__ZSt13set_terminatePFvvE"]=wasmExports["_ZSt13set_terminatePFvvE"])(a0);var __ZSt15set_new_handlerPFvvE=Module["__ZSt15set_new_handlerPFvvE"]=a0=>(__ZSt15set_new_handlerPFvvE=Module["__ZSt15set_new_handlerPFvvE"]=wasmExports["_ZSt15set_new_handlerPFvvE"])(a0);var __ZSt14get_unexpectedv=Module["__ZSt14get_unexpectedv"]=()=>(__ZSt14get_unexpectedv=Module["__ZSt14get_unexpectedv"]=wasmExports["_ZSt14get_unexpectedv"])();var __ZSt10unexpectedv=Module["__ZSt10unexpectedv"]=()=>(__ZSt10unexpectedv=Module["__ZSt10unexpectedv"]=wasmExports["_ZSt10unexpectedv"])();var __ZSt13get_terminatev=Module["__ZSt13get_terminatev"]=()=>(__ZSt13get_terminatev=Module["__ZSt13get_terminatev"]=wasmExports["_ZSt13get_terminatev"])();var __ZSt9terminatev=Module["__ZSt9terminatev"]=()=>(__ZSt9terminatev=Module["__ZSt9terminatev"]=wasmExports["_ZSt9terminatev"])();var __ZSt15get_new_handlerv=Module["__ZSt15get_new_handlerv"]=()=>(__ZSt15get_new_handlerv=Module["__ZSt15get_new_handlerv"]=wasmExports["_ZSt15get_new_handlerv"])();var ___cxa_pure_virtual=Module["___cxa_pure_virtual"]=()=>(___cxa_pure_virtual=Module["___cxa_pure_virtual"]=wasmExports["__cxa_pure_virtual"])();var ___cxa_deleted_virtual=Module["___cxa_deleted_virtual"]=()=>(___cxa_deleted_virtual=Module["___cxa_deleted_virtual"]=wasmExports["__cxa_deleted_virtual"])();var ___dynamic_cast=Module["___dynamic_cast"]=(a0,a1,a2,a3)=>(___dynamic_cast=Module["___dynamic_cast"]=wasmExports["__dynamic_cast"])(a0,a1,a2,a3);var __ZNSt9exceptionD2Ev=Module["__ZNSt9exceptionD2Ev"]=a0=>(__ZNSt9exceptionD2Ev=Module["__ZNSt9exceptionD2Ev"]=wasmExports["_ZNSt9exceptionD2Ev"])(a0);var __ZNSt9exceptionD0Ev=Module["__ZNSt9exceptionD0Ev"]=a0=>(__ZNSt9exceptionD0Ev=Module["__ZNSt9exceptionD0Ev"]=wasmExports["_ZNSt9exceptionD0Ev"])(a0);var __ZNSt9exceptionD1Ev=Module["__ZNSt9exceptionD1Ev"]=a0=>(__ZNSt9exceptionD1Ev=Module["__ZNSt9exceptionD1Ev"]=wasmExports["_ZNSt9exceptionD1Ev"])(a0);var __ZNKSt9exception4whatEv=Module["__ZNKSt9exception4whatEv"]=a0=>(__ZNKSt9exception4whatEv=Module["__ZNKSt9exception4whatEv"]=wasmExports["_ZNKSt9exception4whatEv"])(a0);var __ZNSt13bad_exceptionD0Ev=Module["__ZNSt13bad_exceptionD0Ev"]=a0=>(__ZNSt13bad_exceptionD0Ev=Module["__ZNSt13bad_exceptionD0Ev"]=wasmExports["_ZNSt13bad_exceptionD0Ev"])(a0);var __ZNSt13bad_exceptionD1Ev=Module["__ZNSt13bad_exceptionD1Ev"]=a0=>(__ZNSt13bad_exceptionD1Ev=Module["__ZNSt13bad_exceptionD1Ev"]=wasmExports["_ZNSt13bad_exceptionD1Ev"])(a0);var __ZNKSt13bad_exception4whatEv=Module["__ZNKSt13bad_exception4whatEv"]=a0=>(__ZNKSt13bad_exception4whatEv=Module["__ZNKSt13bad_exception4whatEv"]=wasmExports["_ZNKSt13bad_exception4whatEv"])(a0);var __ZNSt9bad_allocC2Ev=Module["__ZNSt9bad_allocC2Ev"]=a0=>(__ZNSt9bad_allocC2Ev=Module["__ZNSt9bad_allocC2Ev"]=wasmExports["_ZNSt9bad_allocC2Ev"])(a0);var __ZNSt9bad_allocD0Ev=Module["__ZNSt9bad_allocD0Ev"]=a0=>(__ZNSt9bad_allocD0Ev=Module["__ZNSt9bad_allocD0Ev"]=wasmExports["_ZNSt9bad_allocD0Ev"])(a0);var __ZNSt9bad_allocD1Ev=Module["__ZNSt9bad_allocD1Ev"]=a0=>(__ZNSt9bad_allocD1Ev=Module["__ZNSt9bad_allocD1Ev"]=wasmExports["_ZNSt9bad_allocD1Ev"])(a0);var __ZNKSt9bad_alloc4whatEv=Module["__ZNKSt9bad_alloc4whatEv"]=a0=>(__ZNKSt9bad_alloc4whatEv=Module["__ZNKSt9bad_alloc4whatEv"]=wasmExports["_ZNKSt9bad_alloc4whatEv"])(a0);var __ZNSt20bad_array_new_lengthC2Ev=Module["__ZNSt20bad_array_new_lengthC2Ev"]=a0=>(__ZNSt20bad_array_new_lengthC2Ev=Module["__ZNSt20bad_array_new_lengthC2Ev"]=wasmExports["_ZNSt20bad_array_new_lengthC2Ev"])(a0);var __ZNSt20bad_array_new_lengthD0Ev=Module["__ZNSt20bad_array_new_lengthD0Ev"]=a0=>(__ZNSt20bad_array_new_lengthD0Ev=Module["__ZNSt20bad_array_new_lengthD0Ev"]=wasmExports["_ZNSt20bad_array_new_lengthD0Ev"])(a0);var __ZNSt20bad_array_new_lengthD1Ev=Module["__ZNSt20bad_array_new_lengthD1Ev"]=a0=>(__ZNSt20bad_array_new_lengthD1Ev=Module["__ZNSt20bad_array_new_lengthD1Ev"]=wasmExports["_ZNSt20bad_array_new_lengthD1Ev"])(a0);var __ZNKSt20bad_array_new_length4whatEv=Module["__ZNKSt20bad_array_new_length4whatEv"]=a0=>(__ZNKSt20bad_array_new_length4whatEv=Module["__ZNKSt20bad_array_new_length4whatEv"]=wasmExports["_ZNKSt20bad_array_new_length4whatEv"])(a0);var __ZNSt13bad_exceptionD2Ev=Module["__ZNSt13bad_exceptionD2Ev"]=a0=>(__ZNSt13bad_exceptionD2Ev=Module["__ZNSt13bad_exceptionD2Ev"]=wasmExports["_ZNSt13bad_exceptionD2Ev"])(a0);var __ZNSt9bad_allocC1Ev=Module["__ZNSt9bad_allocC1Ev"]=a0=>(__ZNSt9bad_allocC1Ev=Module["__ZNSt9bad_allocC1Ev"]=wasmExports["_ZNSt9bad_allocC1Ev"])(a0);var __ZNSt9bad_allocD2Ev=Module["__ZNSt9bad_allocD2Ev"]=a0=>(__ZNSt9bad_allocD2Ev=Module["__ZNSt9bad_allocD2Ev"]=wasmExports["_ZNSt9bad_allocD2Ev"])(a0);var __ZNSt20bad_array_new_lengthC1Ev=Module["__ZNSt20bad_array_new_lengthC1Ev"]=a0=>(__ZNSt20bad_array_new_lengthC1Ev=Module["__ZNSt20bad_array_new_lengthC1Ev"]=wasmExports["_ZNSt20bad_array_new_lengthC1Ev"])(a0);var __ZNSt20bad_array_new_lengthD2Ev=Module["__ZNSt20bad_array_new_lengthD2Ev"]=a0=>(__ZNSt20bad_array_new_lengthD2Ev=Module["__ZNSt20bad_array_new_lengthD2Ev"]=wasmExports["_ZNSt20bad_array_new_lengthD2Ev"])(a0);var __ZNSt9type_infoD2Ev=Module["__ZNSt9type_infoD2Ev"]=a0=>(__ZNSt9type_infoD2Ev=Module["__ZNSt9type_infoD2Ev"]=wasmExports["_ZNSt9type_infoD2Ev"])(a0);var __ZNSt9type_infoD0Ev=Module["__ZNSt9type_infoD0Ev"]=a0=>(__ZNSt9type_infoD0Ev=Module["__ZNSt9type_infoD0Ev"]=wasmExports["_ZNSt9type_infoD0Ev"])(a0);var __ZNSt9type_infoD1Ev=Module["__ZNSt9type_infoD1Ev"]=a0=>(__ZNSt9type_infoD1Ev=Module["__ZNSt9type_infoD1Ev"]=wasmExports["_ZNSt9type_infoD1Ev"])(a0);var __ZNSt8bad_castC2Ev=Module["__ZNSt8bad_castC2Ev"]=a0=>(__ZNSt8bad_castC2Ev=Module["__ZNSt8bad_castC2Ev"]=wasmExports["_ZNSt8bad_castC2Ev"])(a0);var __ZNSt8bad_castD2Ev=Module["__ZNSt8bad_castD2Ev"]=a0=>(__ZNSt8bad_castD2Ev=Module["__ZNSt8bad_castD2Ev"]=wasmExports["_ZNSt8bad_castD2Ev"])(a0);var __ZNSt8bad_castD0Ev=Module["__ZNSt8bad_castD0Ev"]=a0=>(__ZNSt8bad_castD0Ev=Module["__ZNSt8bad_castD0Ev"]=wasmExports["_ZNSt8bad_castD0Ev"])(a0);var __ZNSt8bad_castD1Ev=Module["__ZNSt8bad_castD1Ev"]=a0=>(__ZNSt8bad_castD1Ev=Module["__ZNSt8bad_castD1Ev"]=wasmExports["_ZNSt8bad_castD1Ev"])(a0);var __ZNKSt8bad_cast4whatEv=Module["__ZNKSt8bad_cast4whatEv"]=a0=>(__ZNKSt8bad_cast4whatEv=Module["__ZNKSt8bad_cast4whatEv"]=wasmExports["_ZNKSt8bad_cast4whatEv"])(a0);var __ZNSt10bad_typeidC2Ev=Module["__ZNSt10bad_typeidC2Ev"]=a0=>(__ZNSt10bad_typeidC2Ev=Module["__ZNSt10bad_typeidC2Ev"]=wasmExports["_ZNSt10bad_typeidC2Ev"])(a0);var __ZNSt10bad_typeidD2Ev=Module["__ZNSt10bad_typeidD2Ev"]=a0=>(__ZNSt10bad_typeidD2Ev=Module["__ZNSt10bad_typeidD2Ev"]=wasmExports["_ZNSt10bad_typeidD2Ev"])(a0);var __ZNSt10bad_typeidD0Ev=Module["__ZNSt10bad_typeidD0Ev"]=a0=>(__ZNSt10bad_typeidD0Ev=Module["__ZNSt10bad_typeidD0Ev"]=wasmExports["_ZNSt10bad_typeidD0Ev"])(a0);var __ZNSt10bad_typeidD1Ev=Module["__ZNSt10bad_typeidD1Ev"]=a0=>(__ZNSt10bad_typeidD1Ev=Module["__ZNSt10bad_typeidD1Ev"]=wasmExports["_ZNSt10bad_typeidD1Ev"])(a0);var __ZNKSt10bad_typeid4whatEv=Module["__ZNKSt10bad_typeid4whatEv"]=a0=>(__ZNKSt10bad_typeid4whatEv=Module["__ZNKSt10bad_typeid4whatEv"]=wasmExports["_ZNKSt10bad_typeid4whatEv"])(a0);var __ZNSt8bad_castC1Ev=Module["__ZNSt8bad_castC1Ev"]=a0=>(__ZNSt8bad_castC1Ev=Module["__ZNSt8bad_castC1Ev"]=wasmExports["_ZNSt8bad_castC1Ev"])(a0);var __ZNSt10bad_typeidC1Ev=Module["__ZNSt10bad_typeidC1Ev"]=a0=>(__ZNSt10bad_typeidC1Ev=Module["__ZNSt10bad_typeidC1Ev"]=wasmExports["_ZNSt10bad_typeidC1Ev"])(a0);var _pv_picollm_delete=Module["_pv_picollm_delete"]=a0=>(_pv_picollm_delete=Module["_pv_picollm_delete"]=wasmExports["pv_picollm_delete"])(a0);var _free=a0=>(_free=wasmExports["free"])(a0);var _malloc=Module["_malloc"]=a0=>(_malloc=Module["_malloc"]=wasmExports["malloc"])(a0);var _pv_picollm_version=Module["_pv_picollm_version"]=()=>(_pv_picollm_version=Module["_pv_picollm_version"]=wasmExports["pv_picollm_version"])();var _pv_picollm_init=Module["_pv_picollm_init"]=(a0,a1,a2,a3)=>(_pv_picollm_init=Module["_pv_picollm_init"]=wasmExports["pv_picollm_init"])(a0,a1,a2,a3);var _pv_picollm_generate=Module["_pv_picollm_generate"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17)=>(_pv_picollm_generate=Module["_pv_picollm_generate"]=wasmExports["pv_picollm_generate"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17);var _pv_picollm_delete_completion_tokens=Module["_pv_picollm_delete_completion_tokens"]=(a0,a1)=>(_pv_picollm_delete_completion_tokens=Module["_pv_picollm_delete_completion_tokens"]=wasmExports["pv_picollm_delete_completion_tokens"])(a0,a1);var _pv_picollm_delete_completion=Module["_pv_picollm_delete_completion"]=a0=>(_pv_picollm_delete_completion=Module["_pv_picollm_delete_completion"]=wasmExports["pv_picollm_delete_completion"])(a0);var _pv_picollm_interrupt=Module["_pv_picollm_interrupt"]=a0=>(_pv_picollm_interrupt=Module["_pv_picollm_interrupt"]=wasmExports["pv_picollm_interrupt"])(a0);var _pv_picollm_tokenize=Module["_pv_picollm_tokenize"]=(a0,a1,a2,a3,a4,a5)=>(_pv_picollm_tokenize=Module["_pv_picollm_tokenize"]=wasmExports["pv_picollm_tokenize"])(a0,a1,a2,a3,a4,a5);var _pv_picollm_delete_tokens=Module["_pv_picollm_delete_tokens"]=a0=>(_pv_picollm_delete_tokens=Module["_pv_picollm_delete_tokens"]=wasmExports["pv_picollm_delete_tokens"])(a0);var _pv_picollm_forward=Module["_pv_picollm_forward"]=(a0,a1,a2,a3)=>(_pv_picollm_forward=Module["_pv_picollm_forward"]=wasmExports["pv_picollm_forward"])(a0,a1,a2,a3);var _pv_picollm_delete_logits=Module["_pv_picollm_delete_logits"]=a0=>(_pv_picollm_delete_logits=Module["_pv_picollm_delete_logits"]=wasmExports["pv_picollm_delete_logits"])(a0);var _pv_picollm_reset=Module["_pv_picollm_reset"]=a0=>(_pv_picollm_reset=Module["_pv_picollm_reset"]=wasmExports["pv_picollm_reset"])(a0);var _pv_picollm_model=Module["_pv_picollm_model"]=(a0,a1)=>(_pv_picollm_model=Module["_pv_picollm_model"]=wasmExports["pv_picollm_model"])(a0,a1);var _pv_picollm_context_length=Module["_pv_picollm_context_length"]=(a0,a1)=>(_pv_picollm_context_length=Module["_pv_picollm_context_length"]=wasmExports["pv_picollm_context_length"])(a0,a1);var _pv_picollm_max_top_choices=Module["_pv_picollm_max_top_choices"]=()=>(_pv_picollm_max_top_choices=Module["_pv_picollm_max_top_choices"]=wasmExports["pv_picollm_max_top_choices"])();var _pv_picollm_list_hardware_devices=Module["_pv_picollm_list_hardware_devices"]=(a0,a1)=>(_pv_picollm_list_hardware_devices=Module["_pv_picollm_list_hardware_devices"]=wasmExports["pv_picollm_list_hardware_devices"])(a0,a1);var _pv_picollm_free_hardware_devices=Module["_pv_picollm_free_hardware_devices"]=(a0,a1)=>(_pv_picollm_free_hardware_devices=Module["_pv_picollm_free_hardware_devices"]=wasmExports["pv_picollm_free_hardware_devices"])(a0,a1);var _pv_picollm_table_forward=Module["_pv_picollm_table_forward"]=(a0,a1,a2,a3)=>(_pv_picollm_table_forward=Module["_pv_picollm_table_forward"]=wasmExports["pv_picollm_table_forward"])(a0,a1,a2,a3);var _pv_log_enable=Module["_pv_log_enable"]=()=>(_pv_log_enable=Module["_pv_log_enable"]=wasmExports["pv_log_enable"])();var _pv_log_disable=Module["_pv_log_disable"]=()=>(_pv_log_disable=Module["_pv_log_disable"]=wasmExports["pv_log_disable"])();var _aligned_alloc=Module["_aligned_alloc"]=(a0,a1)=>(_aligned_alloc=Module["_aligned_alloc"]=wasmExports["aligned_alloc"])(a0,a1);var _pv_get_sdk=Module["_pv_get_sdk"]=()=>(_pv_get_sdk=Module["_pv_get_sdk"]=wasmExports["pv_get_sdk"])();var _pv_sample_rate=Module["_pv_sample_rate"]=()=>(_pv_sample_rate=Module["_pv_sample_rate"]=wasmExports["pv_sample_rate"])();var _pv_status_to_string=Module["_pv_status_to_string"]=a0=>(_pv_status_to_string=Module["_pv_status_to_string"]=wasmExports["pv_status_to_string"])(a0);var _pv_set_sdk=Module["_pv_set_sdk"]=a0=>(_pv_set_sdk=Module["_pv_set_sdk"]=wasmExports["pv_set_sdk"])(a0);var _pv_free=Module["_pv_free"]=a0=>(_pv_free=Module["_pv_free"]=wasmExports["pv_free"])(a0);var _pv_get_error_stack=Module["_pv_get_error_stack"]=(a0,a1)=>(_pv_get_error_stack=Module["_pv_get_error_stack"]=wasmExports["pv_get_error_stack"])(a0,a1);var _pv_free_error_stack=Module["_pv_free_error_stack"]=a0=>(_pv_free_error_stack=Module["_pv_free_error_stack"]=wasmExports["pv_free_error_stack"])(a0);var __emscripten_tls_init=()=>(__emscripten_tls_init=wasmExports["_emscripten_tls_init"])();var _pthread_self=()=>(_pthread_self=wasmExports["pthread_self"])();var __emscripten_memcpy_bulkmem=Module["__emscripten_memcpy_bulkmem"]=(a0,a1,a2)=>(__emscripten_memcpy_bulkmem=Module["__emscripten_memcpy_bulkmem"]=wasmExports["_emscripten_memcpy_bulkmem"])(a0,a1,a2);var __emscripten_memset_bulkmem=Module["__emscripten_memset_bulkmem"]=(a0,a1,a2)=>(__emscripten_memset_bulkmem=Module["__emscripten_memset_bulkmem"]=wasmExports["_emscripten_memset_bulkmem"])(a0,a1,a2);var ___get_tp=Module["___get_tp"]=()=>(___get_tp=Module["___get_tp"]=wasmExports["__get_tp"])();var __emscripten_thread_supports_atomics_wait=Module["__emscripten_thread_supports_atomics_wait"]=()=>(__emscripten_thread_supports_atomics_wait=Module["__emscripten_thread_supports_atomics_wait"]=wasmExports["_emscripten_thread_supports_atomics_wait"])();var __emscripten_thread_init=(a0,a1,a2,a3,a4,a5)=>(__emscripten_thread_init=wasmExports["_emscripten_thread_init"])(a0,a1,a2,a3,a4,a5);var ___set_thread_state=Module["___set_thread_state"]=(a0,a1,a2,a3)=>(___set_thread_state=Module["___set_thread_state"]=wasmExports["__set_thread_state"])(a0,a1,a2,a3);var _emscripten_is_main_runtime_thread=Module["_emscripten_is_main_runtime_thread"]=()=>(_emscripten_is_main_runtime_thread=Module["_emscripten_is_main_runtime_thread"]=wasmExports["emscripten_is_main_runtime_thread"])();var _emscripten_is_main_browser_thread=Module["_emscripten_is_main_browser_thread"]=()=>(_emscripten_is_main_browser_thread=Module["_emscripten_is_main_browser_thread"]=wasmExports["emscripten_is_main_browser_thread"])();var __emscripten_thread_crashed=()=>(__emscripten_thread_crashed=wasmExports["_emscripten_thread_crashed"])();var _emscripten_main_thread_process_queued_calls=()=>(_emscripten_main_thread_process_queued_calls=wasmExports["emscripten_main_thread_process_queued_calls"])();var _emscripten_main_runtime_thread_id=()=>(_emscripten_main_runtime_thread_id=wasmExports["emscripten_main_runtime_thread_id"])();var _emscripten_stack_get_base=Module["_emscripten_stack_get_base"]=()=>(_emscripten_stack_get_base=Module["_emscripten_stack_get_base"]=wasmExports["emscripten_stack_get_base"])();var _emscripten_stack_get_end=Module["_emscripten_stack_get_end"]=()=>(_emscripten_stack_get_end=Module["_emscripten_stack_get_end"]=wasmExports["emscripten_stack_get_end"])();var __emscripten_run_on_main_thread_js=(a0,a1,a2,a3,a4)=>(__emscripten_run_on_main_thread_js=wasmExports["_emscripten_run_on_main_thread_js"])(a0,a1,a2,a3,a4);var __emscripten_thread_free_data=a0=>(__emscripten_thread_free_data=wasmExports["_emscripten_thread_free_data"])(a0);var __emscripten_thread_exit=a0=>(__emscripten_thread_exit=wasmExports["_emscripten_thread_exit"])(a0);var __emscripten_check_mailbox=()=>(__emscripten_check_mailbox=wasmExports["_emscripten_check_mailbox"])();var __emscripten_tempret_set=a0=>(__emscripten_tempret_set=wasmExports["_emscripten_tempret_set"])(a0);var __emscripten_tempret_get=()=>(__emscripten_tempret_get=wasmExports["_emscripten_tempret_get"])();var _getTempRet0=Module["_getTempRet0"]=()=>(_getTempRet0=Module["_getTempRet0"]=wasmExports["getTempRet0"])();var _setTempRet0=Module["_setTempRet0"]=a0=>(_setTempRet0=Module["_setTempRet0"]=wasmExports["setTempRet0"])(a0);var _emscripten_stack_init=Module["_emscripten_stack_init"]=()=>(_emscripten_stack_init=Module["_emscripten_stack_init"]=wasmExports["emscripten_stack_init"])();var _emscripten_stack_set_limits=(a0,a1)=>(_emscripten_stack_set_limits=wasmExports["emscripten_stack_set_limits"])(a0,a1);var _emscripten_stack_get_free=Module["_emscripten_stack_get_free"]=()=>(_emscripten_stack_get_free=Module["_emscripten_stack_get_free"]=wasmExports["emscripten_stack_get_free"])();var __emscripten_wasm_worker_initialize=Module["__emscripten_wasm_worker_initialize"]=(a0,a1)=>(__emscripten_wasm_worker_initialize=Module["__emscripten_wasm_worker_initialize"]=wasmExports["_emscripten_wasm_worker_initialize"])(a0,a1);var __emscripten_stack_restore=a0=>(__emscripten_stack_restore=wasmExports["_emscripten_stack_restore"])(a0);var __emscripten_stack_alloc=a0=>(__emscripten_stack_alloc=wasmExports["_emscripten_stack_alloc"])(a0);var _emscripten_stack_get_current=()=>(_emscripten_stack_get_current=wasmExports["emscripten_stack_get_current"])();var dynCall_ii=Module["dynCall_ii"]=(a0,a1)=>(dynCall_ii=Module["dynCall_ii"]=wasmExports["dynCall_ii"])(a0,a1);var dynCall_iii=Module["dynCall_iii"]=(a0,a1,a2)=>(dynCall_iii=Module["dynCall_iii"]=wasmExports["dynCall_iii"])(a0,a1,a2);var dynCall_iiiiif=Module["dynCall_iiiiif"]=(a0,a1,a2,a3,a4,a5)=>(dynCall_iiiiif=Module["dynCall_iiiiif"]=wasmExports["dynCall_iiiiif"])(a0,a1,a2,a3,a4,a5);var dynCall_iiiii=Module["dynCall_iiiii"]=(a0,a1,a2,a3,a4)=>(dynCall_iiiii=Module["dynCall_iiiii"]=wasmExports["dynCall_iiiii"])(a0,a1,a2,a3,a4);var dynCall_iiii=Module["dynCall_iiii"]=(a0,a1,a2,a3)=>(dynCall_iiii=Module["dynCall_iiii"]=wasmExports["dynCall_iiii"])(a0,a1,a2,a3);var dynCall_vii=Module["dynCall_vii"]=(a0,a1,a2)=>(dynCall_vii=Module["dynCall_vii"]=wasmExports["dynCall_vii"])(a0,a1,a2);var dynCall_iiiiiiii=Module["dynCall_iiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7)=>(dynCall_iiiiiiii=Module["dynCall_iiiiiiii"]=wasmExports["dynCall_iiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7);var dynCall_iiiiii=Module["dynCall_iiiiii"]=(a0,a1,a2,a3,a4,a5)=>(dynCall_iiiiii=Module["dynCall_iiiiii"]=wasmExports["dynCall_iiiiii"])(a0,a1,a2,a3,a4,a5);var dynCall_iiifiiii=Module["dynCall_iiifiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7)=>(dynCall_iiifiiii=Module["dynCall_iiifiiii"]=wasmExports["dynCall_iiifiiii"])(a0,a1,a2,a3,a4,a5,a6,a7);var dynCall_iiifiiiii=Module["dynCall_iiifiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iiifiiiii=Module["dynCall_iiifiiiii"]=wasmExports["dynCall_iiifiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_iiiiiii=Module["dynCall_iiiiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iiiiiii=Module["dynCall_iiiiiii"]=wasmExports["dynCall_iiiiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_iiiiiiiiiii=Module["dynCall_iiiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)=>(dynCall_iiiiiiiiiii=Module["dynCall_iiiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10);var dynCall_iiiiiiiiiiii=Module["dynCall_iiiiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)=>(dynCall_iiiiiiiiiiii=Module["dynCall_iiiiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11);var dynCall_iiiiiiiiiiiii=Module["dynCall_iiiiiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12)=>(dynCall_iiiiiiiiiiiii=Module["dynCall_iiiiiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12);var dynCall_v=Module["dynCall_v"]=a0=>(dynCall_v=Module["dynCall_v"]=wasmExports["dynCall_v"])(a0);var dynCall_vi=Module["dynCall_vi"]=(a0,a1)=>(dynCall_vi=Module["dynCall_vi"]=wasmExports["dynCall_vi"])(a0,a1);var dynCall_viiiiii=Module["dynCall_viiiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_viiiiii=Module["dynCall_viiiiii"]=wasmExports["dynCall_viiiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_viiiii=Module["dynCall_viiiii"]=(a0,a1,a2,a3,a4,a5)=>(dynCall_viiiii=Module["dynCall_viiiii"]=wasmExports["dynCall_viiiii"])(a0,a1,a2,a3,a4,a5);var dynCall_viiii=Module["dynCall_viiii"]=(a0,a1,a2,a3,a4)=>(dynCall_viiii=Module["dynCall_viiii"]=wasmExports["dynCall_viiii"])(a0,a1,a2,a3,a4);var dynCall_viii=Module["dynCall_viii"]=(a0,a1,a2,a3)=>(dynCall_viii=Module["dynCall_viii"]=wasmExports["dynCall_viii"])(a0,a1,a2,a3);var dynCall_iiiiji=Module["dynCall_iiiiji"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iiiiji=Module["dynCall_iiiiji"]=wasmExports["dynCall_iiiiji"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_ifi=Module["dynCall_ifi"]=(a0,a1,a2)=>(dynCall_ifi=Module["dynCall_ifi"]=wasmExports["dynCall_ifi"])(a0,a1,a2);var dynCall_viiiiiifi=Module["dynCall_viiiiiifi"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_viiiiiifi=Module["dynCall_viiiiiifi"]=wasmExports["dynCall_viiiiiifi"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_viiiiiii=Module["dynCall_viiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7)=>(dynCall_viiiiiii=Module["dynCall_viiiiiii"]=wasmExports["dynCall_viiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7);var dynCall_vifiiii=Module["dynCall_vifiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_vifiiii=Module["dynCall_vifiiii"]=wasmExports["dynCall_vifiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_viiiiiiii=Module["dynCall_viiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_viiiiiiii=Module["dynCall_viiiiiiii"]=wasmExports["dynCall_viiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_iiiiiiiiii=Module["dynCall_iiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9)=>(dynCall_iiiiiiiiii=Module["dynCall_iiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9);var dynCall_i=Module["dynCall_i"]=a0=>(dynCall_i=Module["dynCall_i"]=wasmExports["dynCall_i"])(a0);var dynCall_ji=Module["dynCall_ji"]=(a0,a1)=>(dynCall_ji=Module["dynCall_ji"]=wasmExports["dynCall_ji"])(a0,a1);var dynCall_iiji=Module["dynCall_iiji"]=(a0,a1,a2,a3,a4)=>(dynCall_iiji=Module["dynCall_iiji"]=wasmExports["dynCall_iiji"])(a0,a1,a2,a3,a4);var dynCall_iijjiii=Module["dynCall_iijjiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iijjiii=Module["dynCall_iijjiii"]=wasmExports["dynCall_iijjiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_vijjjii=Module["dynCall_vijjjii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9)=>(dynCall_vijjjii=Module["dynCall_vijjjii"]=wasmExports["dynCall_vijjjii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9);var dynCall_fii=Module["dynCall_fii"]=(a0,a1,a2)=>(dynCall_fii=Module["dynCall_fii"]=wasmExports["dynCall_fii"])(a0,a1,a2);var dynCall_fiii=Module["dynCall_fiii"]=(a0,a1,a2,a3)=>(dynCall_fiii=Module["dynCall_fiii"]=wasmExports["dynCall_fiii"])(a0,a1,a2,a3);var dynCall_iiiiiiiii=Module["dynCall_iiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iiiiiiiii=Module["dynCall_iiiiiiiii"]=wasmExports["dynCall_iiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_jii=Module["dynCall_jii"]=(a0,a1,a2)=>(dynCall_jii=Module["dynCall_jii"]=wasmExports["dynCall_jii"])(a0,a1,a2);var dynCall_jiii=Module["dynCall_jiii"]=(a0,a1,a2,a3)=>(dynCall_jiii=Module["dynCall_jiii"]=wasmExports["dynCall_jiii"])(a0,a1,a2,a3);var dynCall_iijji=Module["dynCall_iijji"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iijji=Module["dynCall_iijji"]=wasmExports["dynCall_iijji"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_iijjji=Module["dynCall_iijjji"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iijjji=Module["dynCall_iijjji"]=wasmExports["dynCall_iijjji"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_iidiiii=Module["dynCall_iidiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iidiiii=Module["dynCall_iidiiii"]=wasmExports["dynCall_iidiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_jiji=Module["dynCall_jiji"]=(a0,a1,a2,a3,a4)=>(dynCall_jiji=Module["dynCall_jiji"]=wasmExports["dynCall_jiji"])(a0,a1,a2,a3,a4);var _asyncify_start_unwind=a0=>(_asyncify_start_unwind=wasmExports["asyncify_start_unwind"])(a0);var _asyncify_stop_unwind=()=>(_asyncify_stop_unwind=wasmExports["asyncify_stop_unwind"])();var _asyncify_start_rewind=a0=>(_asyncify_start_rewind=wasmExports["asyncify_start_rewind"])(a0);var _asyncify_stop_rewind=()=>(_asyncify_stop_rewind=wasmExports["asyncify_stop_rewind"])();var ___cxa_unexpected_handler=Module["___cxa_unexpected_handler"]=342416;var ___cxa_terminate_handler=Module["___cxa_terminate_handler"]=342412;var ___cxa_new_handler=Module["___cxa_new_handler"]=360196;var __ZTIN10__cxxabiv116__shim_type_infoE=Module["__ZTIN10__cxxabiv116__shim_type_infoE"]=14884;var __ZTIN10__cxxabiv117__class_type_infoE=Module["__ZTIN10__cxxabiv117__class_type_infoE"]=14932;var __ZTIN10__cxxabiv117__pbase_type_infoE=Module["__ZTIN10__cxxabiv117__pbase_type_infoE"]=14980;var __ZTIDn=Module["__ZTIDn"]=15328;var __ZTIN10__cxxabiv119__pointer_type_infoE=Module["__ZTIN10__cxxabiv119__pointer_type_infoE"]=15028;var __ZTIv=Module["__ZTIv"]=15276;var __ZTIN10__cxxabiv120__function_type_infoE=Module["__ZTIN10__cxxabiv120__function_type_infoE"]=15080;var __ZTIN10__cxxabiv129__pointer_to_member_type_infoE=Module["__ZTIN10__cxxabiv129__pointer_to_member_type_infoE"]=15140;var __ZTSN10__cxxabiv116__shim_type_infoE=Module["__ZTSN10__cxxabiv116__shim_type_infoE"]=14848;var __ZTVN10__cxxabiv120__si_class_type_infoE=Module["__ZTVN10__cxxabiv120__si_class_type_infoE"]=16812;var __ZTSN10__cxxabiv117__class_type_infoE=Module["__ZTSN10__cxxabiv117__class_type_infoE"]=14896;var __ZTSN10__cxxabiv117__pbase_type_infoE=Module["__ZTSN10__cxxabiv117__pbase_type_infoE"]=14944;var __ZTSN10__cxxabiv119__pointer_type_infoE=Module["__ZTSN10__cxxabiv119__pointer_type_infoE"]=14992;var __ZTSN10__cxxabiv120__function_type_infoE=Module["__ZTSN10__cxxabiv120__function_type_infoE"]=15040;var __ZTSN10__cxxabiv129__pointer_to_member_type_infoE=Module["__ZTSN10__cxxabiv129__pointer_to_member_type_infoE"]=15092;var __ZTVN10__cxxabiv116__shim_type_infoE=Module["__ZTVN10__cxxabiv116__shim_type_infoE"]=15164;var __ZTVN10__cxxabiv123__fundamental_type_infoE=Module["__ZTVN10__cxxabiv123__fundamental_type_infoE"]=15192;var __ZTIN10__cxxabiv123__fundamental_type_infoE=Module["__ZTIN10__cxxabiv123__fundamental_type_infoE"]=15260;var __ZTSN10__cxxabiv123__fundamental_type_infoE=Module["__ZTSN10__cxxabiv123__fundamental_type_infoE"]=15220;var __ZTSv=Module["__ZTSv"]=15272;var __ZTSPv=Module["__ZTSPv"]=15284;var __ZTIPv=Module["__ZTIPv"]=15288;var __ZTVN10__cxxabiv119__pointer_type_infoE=Module["__ZTVN10__cxxabiv119__pointer_type_infoE"]=17024;var __ZTSPKv=Module["__ZTSPKv"]=15304;var __ZTIPKv=Module["__ZTIPKv"]=15308;var __ZTSDn=Module["__ZTSDn"]=15324;var __ZTSPDn=Module["__ZTSPDn"]=15336;var __ZTIPDn=Module["__ZTIPDn"]=15340;var __ZTSPKDn=Module["__ZTSPKDn"]=15356;var __ZTIPKDn=Module["__ZTIPKDn"]=15364;var __ZTSb=Module["__ZTSb"]=15380;var __ZTIb=Module["__ZTIb"]=15384;var __ZTSPb=Module["__ZTSPb"]=15392;var __ZTIPb=Module["__ZTIPb"]=15396;var __ZTSPKb=Module["__ZTSPKb"]=15412;var __ZTIPKb=Module["__ZTIPKb"]=15416;var __ZTSw=Module["__ZTSw"]=15432;var __ZTIw=Module["__ZTIw"]=15436;var __ZTSPw=Module["__ZTSPw"]=15444;var __ZTIPw=Module["__ZTIPw"]=15448;var __ZTSPKw=Module["__ZTSPKw"]=15464;var __ZTIPKw=Module["__ZTIPKw"]=15468;var __ZTSc=Module["__ZTSc"]=15484;var __ZTIc=Module["__ZTIc"]=15488;var __ZTSPc=Module["__ZTSPc"]=15496;var __ZTIPc=Module["__ZTIPc"]=15500;var __ZTSPKc=Module["__ZTSPKc"]=15516;var __ZTIPKc=Module["__ZTIPKc"]=15520;var __ZTSh=Module["__ZTSh"]=15536;var __ZTIh=Module["__ZTIh"]=15540;var __ZTSPh=Module["__ZTSPh"]=15548;var __ZTIPh=Module["__ZTIPh"]=15552;var __ZTSPKh=Module["__ZTSPKh"]=15568;var __ZTIPKh=Module["__ZTIPKh"]=15572;var __ZTSa=Module["__ZTSa"]=15588;var __ZTIa=Module["__ZTIa"]=15592;var __ZTSPa=Module["__ZTSPa"]=15600;var __ZTIPa=Module["__ZTIPa"]=15604;var __ZTSPKa=Module["__ZTSPKa"]=15620;var __ZTIPKa=Module["__ZTIPKa"]=15624;var __ZTSs=Module["__ZTSs"]=15640;var __ZTIs=Module["__ZTIs"]=15644;var __ZTSPs=Module["__ZTSPs"]=15652;var __ZTIPs=Module["__ZTIPs"]=15656;var __ZTSPKs=Module["__ZTSPKs"]=15672;var __ZTIPKs=Module["__ZTIPKs"]=15676;var __ZTSt=Module["__ZTSt"]=15692;var __ZTIt=Module["__ZTIt"]=15696;var __ZTSPt=Module["__ZTSPt"]=15704;var __ZTIPt=Module["__ZTIPt"]=15708;var __ZTSPKt=Module["__ZTSPKt"]=15724;var __ZTIPKt=Module["__ZTIPKt"]=15728;var __ZTSi=Module["__ZTSi"]=15744;var __ZTIi=Module["__ZTIi"]=15748;var __ZTSPi=Module["__ZTSPi"]=15756;var __ZTIPi=Module["__ZTIPi"]=15760;var __ZTSPKi=Module["__ZTSPKi"]=15776;var __ZTIPKi=Module["__ZTIPKi"]=15780;var __ZTSj=Module["__ZTSj"]=15796;var __ZTIj=Module["__ZTIj"]=15800;var __ZTSPj=Module["__ZTSPj"]=15808;var __ZTIPj=Module["__ZTIPj"]=15812;var __ZTSPKj=Module["__ZTSPKj"]=15828;var __ZTIPKj=Module["__ZTIPKj"]=15832;var __ZTSl=Module["__ZTSl"]=15848;var __ZTIl=Module["__ZTIl"]=15852;var __ZTSPl=Module["__ZTSPl"]=15860;var __ZTIPl=Module["__ZTIPl"]=15864;var __ZTSPKl=Module["__ZTSPKl"]=15880;var __ZTIPKl=Module["__ZTIPKl"]=15884;var __ZTSm=Module["__ZTSm"]=15900;var __ZTIm=Module["__ZTIm"]=15904;var __ZTSPm=Module["__ZTSPm"]=15912;var __ZTIPm=Module["__ZTIPm"]=15916;var __ZTSPKm=Module["__ZTSPKm"]=15932;var __ZTIPKm=Module["__ZTIPKm"]=15936;var __ZTSx=Module["__ZTSx"]=15952;var __ZTIx=Module["__ZTIx"]=15956;var __ZTSPx=Module["__ZTSPx"]=15964;var __ZTIPx=Module["__ZTIPx"]=15968;var __ZTSPKx=Module["__ZTSPKx"]=15984;var __ZTIPKx=Module["__ZTIPKx"]=15988;var __ZTSy=Module["__ZTSy"]=16004;var __ZTIy=Module["__ZTIy"]=16008;var __ZTSPy=Module["__ZTSPy"]=16016;var __ZTIPy=Module["__ZTIPy"]=16020;var __ZTSPKy=Module["__ZTSPKy"]=16036;var __ZTIPKy=Module["__ZTIPKy"]=16040;var __ZTSn=Module["__ZTSn"]=16056;var __ZTIn=Module["__ZTIn"]=16060;var __ZTSPn=Module["__ZTSPn"]=16068;var __ZTIPn=Module["__ZTIPn"]=16072;var __ZTSPKn=Module["__ZTSPKn"]=16088;var __ZTIPKn=Module["__ZTIPKn"]=16092;var __ZTSo=Module["__ZTSo"]=16108;var __ZTIo=Module["__ZTIo"]=16112;var __ZTSPo=Module["__ZTSPo"]=16120;var __ZTIPo=Module["__ZTIPo"]=16124;var __ZTSPKo=Module["__ZTSPKo"]=16140;var __ZTIPKo=Module["__ZTIPKo"]=16144;var __ZTSDh=Module["__ZTSDh"]=16160;var __ZTIDh=Module["__ZTIDh"]=16164;var __ZTSPDh=Module["__ZTSPDh"]=16172;var __ZTIPDh=Module["__ZTIPDh"]=16176;var __ZTSPKDh=Module["__ZTSPKDh"]=16192;var __ZTIPKDh=Module["__ZTIPKDh"]=16200;var __ZTSf=Module["__ZTSf"]=16216;var __ZTIf=Module["__ZTIf"]=16220;var __ZTSPf=Module["__ZTSPf"]=16228;var __ZTIPf=Module["__ZTIPf"]=16232;var __ZTSPKf=Module["__ZTSPKf"]=16248;var __ZTIPKf=Module["__ZTIPKf"]=16252;var __ZTSd=Module["__ZTSd"]=16268;var __ZTId=Module["__ZTId"]=16272;var __ZTSPd=Module["__ZTSPd"]=16280;var __ZTIPd=Module["__ZTIPd"]=16284;var __ZTSPKd=Module["__ZTSPKd"]=16300;var __ZTIPKd=Module["__ZTIPKd"]=16304;var __ZTSe=Module["__ZTSe"]=16320;var __ZTIe=Module["__ZTIe"]=16324;var __ZTSPe=Module["__ZTSPe"]=16332;var __ZTIPe=Module["__ZTIPe"]=16336;var __ZTSPKe=Module["__ZTSPKe"]=16352;var __ZTIPKe=Module["__ZTIPKe"]=16356;var __ZTSg=Module["__ZTSg"]=16372;var __ZTIg=Module["__ZTIg"]=16376;var __ZTSPg=Module["__ZTSPg"]=16384;var __ZTIPg=Module["__ZTIPg"]=16388;var __ZTSPKg=Module["__ZTSPKg"]=16404;var __ZTIPKg=Module["__ZTIPKg"]=16408;var __ZTSDu=Module["__ZTSDu"]=16424;var __ZTIDu=Module["__ZTIDu"]=16428;var __ZTSPDu=Module["__ZTSPDu"]=16436;var __ZTIPDu=Module["__ZTIPDu"]=16440;var __ZTSPKDu=Module["__ZTSPKDu"]=16456;var __ZTIPKDu=Module["__ZTIPKDu"]=16464;var __ZTSDs=Module["__ZTSDs"]=16480;var __ZTIDs=Module["__ZTIDs"]=16484;var __ZTSPDs=Module["__ZTSPDs"]=16492;var __ZTIPDs=Module["__ZTIPDs"]=16496;var __ZTSPKDs=Module["__ZTSPKDs"]=16512;var __ZTIPKDs=Module["__ZTIPKDs"]=16520;var __ZTSDi=Module["__ZTSDi"]=16536;var __ZTIDi=Module["__ZTIDi"]=16540;var __ZTSPDi=Module["__ZTSPDi"]=16548;var __ZTIPDi=Module["__ZTIPDi"]=16552;var __ZTSPKDi=Module["__ZTSPKDi"]=16568;var __ZTIPKDi=Module["__ZTIPKDi"]=16576;var __ZTVN10__cxxabiv117__array_type_infoE=Module["__ZTVN10__cxxabiv117__array_type_infoE"]=16592;var __ZTIN10__cxxabiv117__array_type_infoE=Module["__ZTIN10__cxxabiv117__array_type_infoE"]=16656;var __ZTSN10__cxxabiv117__array_type_infoE=Module["__ZTSN10__cxxabiv117__array_type_infoE"]=16620;var __ZTVN10__cxxabiv120__function_type_infoE=Module["__ZTVN10__cxxabiv120__function_type_infoE"]=16668;var __ZTVN10__cxxabiv116__enum_type_infoE=Module["__ZTVN10__cxxabiv116__enum_type_infoE"]=16696;var __ZTIN10__cxxabiv116__enum_type_infoE=Module["__ZTIN10__cxxabiv116__enum_type_infoE"]=16760;var __ZTSN10__cxxabiv116__enum_type_infoE=Module["__ZTSN10__cxxabiv116__enum_type_infoE"]=16724;var __ZTVN10__cxxabiv117__class_type_infoE=Module["__ZTVN10__cxxabiv117__class_type_infoE"]=16772;var __ZTIN10__cxxabiv120__si_class_type_infoE=Module["__ZTIN10__cxxabiv120__si_class_type_infoE"]=16892;var __ZTSN10__cxxabiv120__si_class_type_infoE=Module["__ZTSN10__cxxabiv120__si_class_type_infoE"]=16852;var __ZTVN10__cxxabiv121__vmi_class_type_infoE=Module["__ZTVN10__cxxabiv121__vmi_class_type_infoE"]=16904;var __ZTIN10__cxxabiv121__vmi_class_type_infoE=Module["__ZTIN10__cxxabiv121__vmi_class_type_infoE"]=16984;var __ZTSN10__cxxabiv121__vmi_class_type_infoE=Module["__ZTSN10__cxxabiv121__vmi_class_type_infoE"]=16944;var __ZTVN10__cxxabiv117__pbase_type_infoE=Module["__ZTVN10__cxxabiv117__pbase_type_infoE"]=16996;var __ZTVN10__cxxabiv129__pointer_to_member_type_infoE=Module["__ZTVN10__cxxabiv129__pointer_to_member_type_infoE"]=17052;var __ZTVSt9bad_alloc=Module["__ZTVSt9bad_alloc"]=14512;var __ZTVSt9exception=Module["__ZTVSt9exception"]=14552;var __ZTVSt20bad_array_new_length=Module["__ZTVSt20bad_array_new_length"]=14532;var __ZTISt9bad_alloc=Module["__ZTISt9bad_alloc"]=14664;var __ZTISt20bad_array_new_length=Module["__ZTISt20bad_array_new_length"]=14704;var __ZTISt9exception=Module["__ZTISt9exception"]=14588;var __ZTSSt9exception=Module["__ZTSSt9exception"]=14572;var __ZTVSt13bad_exception=Module["__ZTVSt13bad_exception"]=14596;var __ZTISt13bad_exception=Module["__ZTISt13bad_exception"]=14636;var __ZTSSt13bad_exception=Module["__ZTSSt13bad_exception"]=14616;var __ZTSSt9bad_alloc=Module["__ZTSSt9bad_alloc"]=14648;var __ZTSSt20bad_array_new_length=Module["__ZTSSt20bad_array_new_length"]=14676;var __ZTVSt8bad_cast=Module["__ZTVSt8bad_cast"]=14716;var __ZTVSt10bad_typeid=Module["__ZTVSt10bad_typeid"]=14736;var __ZTISt8bad_cast=Module["__ZTISt8bad_cast"]=14808;var __ZTISt10bad_typeid=Module["__ZTISt10bad_typeid"]=14836;var __ZTVSt9type_info=Module["__ZTVSt9type_info"]=14756;var __ZTISt9type_info=Module["__ZTISt9type_info"]=14788;var __ZTSSt9type_info=Module["__ZTSSt9type_info"]=14772;var __ZTSSt8bad_cast=Module["__ZTSSt8bad_cast"]=14796;var __ZTSSt10bad_typeid=Module["__ZTSSt10bad_typeid"]=14820;function applySignatureConversions(wasmExports){wasmExports=Object.assign({},wasmExports);var makeWrapper_pp=f=>a0=>f(a0)>>>0;var makeWrapper_p=f=>()=>f()>>>0;wasmExports["malloc"]=makeWrapper_pp(wasmExports["malloc"]);wasmExports["pthread_self"]=makeWrapper_p(wasmExports["pthread_self"]);wasmExports["emscripten_main_runtime_thread_id"]=makeWrapper_p(wasmExports["emscripten_main_runtime_thread_id"]);wasmExports["emscripten_stack_get_base"]=makeWrapper_p(wasmExports["emscripten_stack_get_base"]);wasmExports["emscripten_stack_get_end"]=makeWrapper_p(wasmExports["emscripten_stack_get_end"]);wasmExports["_emscripten_stack_alloc"]=makeWrapper_pp(wasmExports["_emscripten_stack_alloc"]);wasmExports["emscripten_stack_get_current"]=makeWrapper_p(wasmExports["emscripten_stack_get_current"]);return wasmExports}Module["addFunction"]=addFunction;var calledRun;dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function run(){if(runDependencies>0){return}if(ENVIRONMENT_IS_PTHREAD){readyPromiseResolve(Module);initRuntime();startWorker(Module);return}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(function(){setTimeout(function(){Module["setStatus"]("")},1);doRun()},1)}else{doRun()}}if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run();moduleRtn=readyPromise; - - - return moduleRtn; -} -); -})(); -export default pv_picollm; -var isPthread = globalThis.self?.name === 'em-pthread'; -// When running as a pthread, construct a new instance on startup -isPthread && pv_picollm(); diff --git a/lib/wasm/pv_picollm_simd.js b/lib/wasm/pv_picollm_simd.js deleted file mode 100644 index 78957ef4..00000000 --- a/lib/wasm/pv_picollm_simd.js +++ /dev/null @@ -1,19 +0,0 @@ - -var pv_picollm_simd = (() => { - var _scriptName = typeof document != 'undefined' ? document.currentScript?.src : undefined; - - return ( -function(moduleArg = {}) { - var moduleRtn; - -function GROWABLE_HEAP_I8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP8}function GROWABLE_HEAP_U8(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU8}function GROWABLE_HEAP_I16(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP16}function GROWABLE_HEAP_I32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAP32}function GROWABLE_HEAP_U32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPU32}function GROWABLE_HEAP_F32(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF32}function GROWABLE_HEAP_F64(){if(wasmMemory.buffer!=HEAP8.buffer){updateMemoryViews()}return HEAPF64}var Module=moduleArg;var readyPromiseResolve,readyPromiseReject;var readyPromise=new Promise((resolve,reject)=>{readyPromiseResolve=resolve;readyPromiseReject=reject});var ENVIRONMENT_IS_WEB=typeof window=="object";var ENVIRONMENT_IS_WORKER=typeof importScripts=="function";var ENVIRONMENT_IS_NODE=typeof process=="object"&&typeof process.versions=="object"&&typeof process.versions.node=="string";var ENVIRONMENT_IS_PTHREAD=ENVIRONMENT_IS_WORKER&&self.name=="em-pthread";function _typeof$2(o){"@babel/helpers - typeof";return _typeof$2="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(o){return typeof o}:function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o},_typeof$2(o)}function toPrimitive(t,r){if("object"!=_typeof$2(t)||!t)return t;var e=t[Symbol.toPrimitive];if(void 0!==e){var i=e.call(t,r||"default");if("object"!=_typeof$2(i))return i;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===r?String:Number)(t)}function toPropertyKey(t){var i=toPrimitive(t,"string");return"symbol"==_typeof$2(i)?i:i+""}function _defineProperty(e,r,t){return(r=toPropertyKey(r))in e?Object.defineProperty(e,r,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[r]=t,e}function asyncGeneratorStep(n,t,e,r,o,a,c){try{var i=n[a](c),u=i.value}catch(n){return void e(n)}i.done?t(u):Promise.resolve(u).then(r,o)}function _asyncToGenerator(n){return function(){var t=this,e=arguments;return new Promise(function(r,o){var a=n.apply(t,e);function _next(n){asyncGeneratorStep(a,r,o,_next,_throw,"next",n)}function _throw(n){asyncGeneratorStep(a,r,o,_next,_throw,"throw",n)}_next(void 0)})}}function getDefaultExportFromCjs(x){return x&&x.__esModule&&Object.prototype.hasOwnProperty.call(x,"default")?x["default"]:x}var regeneratorRuntime$3={exports:{}};var _typeof$1={exports:{}};var _typeof_1=_typeof$1.exports;(function(module){function _typeof(o){"@babel/helpers - typeof";return module.exports=_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(o){return typeof o}:function(o){return o&&"function"==typeof Symbol&&o.constructor===Symbol&&o!==Symbol.prototype?"symbol":typeof o},module.exports.__esModule=true,module.exports["default"]=module.exports,_typeof(o)}module.exports=_typeof,module.exports.__esModule=true,module.exports["default"]=module.exports})(_typeof$1);var _typeofExports=_typeof$1.exports;var _typeof=getDefaultExportFromCjs(_typeofExports);var regeneratorRuntime$1=regeneratorRuntime$3.exports;(function(module){var _typeof=_typeofExports["default"];function _regeneratorRuntime(){"use strict";module.exports=_regeneratorRuntime=function _regeneratorRuntime(){return e},module.exports.__esModule=true,module.exports["default"]=module.exports;var t,e={},r=Object.prototype,n=r.hasOwnProperty,o=Object.defineProperty||function(t,e,r){t[e]=r.value},i="function"==typeof Symbol?Symbol:{},a=i.iterator||"@@iterator",c=i.asyncIterator||"@@asyncIterator",u=i.toStringTag||"@@toStringTag";function define(t,e,r){return Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}),t[e]}try{define({},"")}catch(t){define=function define(t,e,r){return t[e]=r}}function wrap(t,e,r,n){var i=e&&e.prototype instanceof Generator?e:Generator,a=Object.create(i.prototype),c=new Context(n||[]);return o(a,"_invoke",{value:makeInvokeMethod(t,r,c)}),a}function tryCatch(t,e,r){try{return{type:"normal",arg:t.call(e,r)}}catch(t){return{type:"throw",arg:t}}}e.wrap=wrap;var h="suspendedStart",l="suspendedYield",f="executing",s="completed",y={};function Generator(){}function GeneratorFunction(){}function GeneratorFunctionPrototype(){}var p={};define(p,a,function(){return this});var d=Object.getPrototypeOf,v=d&&d(d(values([])));v&&v!==r&&n.call(v,a)&&(p=v);var g=GeneratorFunctionPrototype.prototype=Generator.prototype=Object.create(p);function defineIteratorMethods(t){["next","throw","return"].forEach(function(e){define(t,e,function(t){return this._invoke(e,t)})})}function AsyncIterator(t,e){function invoke(r,o,i,a){var c=tryCatch(t[r],t,o);if("throw"!==c.type){var u=c.arg,h=u.value;return h&&"object"==_typeof(h)&&n.call(h,"__await")?e.resolve(h.__await).then(function(t){invoke("next",t,i,a)},function(t){invoke("throw",t,i,a)}):e.resolve(h).then(function(t){u.value=t,i(u)},function(t){return invoke("throw",t,i,a)})}a(c.arg)}var r;o(this,"_invoke",{value:function value(t,n){function callInvokeWithMethodAndArg(){return new e(function(e,r){invoke(t,n,e,r)})}return r=r?r.then(callInvokeWithMethodAndArg,callInvokeWithMethodAndArg):callInvokeWithMethodAndArg()}})}function makeInvokeMethod(e,r,n){var o=h;return function(i,a){if(o===f)throw Error("Generator is already running");if(o===s){if("throw"===i)throw a;return{value:t,done:!0}}for(n.method=i,n.arg=a;;){var c=n.delegate;if(c){var u=maybeInvokeDelegate(c,n);if(u){if(u===y)continue;return u}}if("next"===n.method)n.sent=n._sent=n.arg;else if("throw"===n.method){if(o===h)throw o=s,n.arg;n.dispatchException(n.arg)}else"return"===n.method&&n.abrupt("return",n.arg);o=f;var p=tryCatch(e,r,n);if("normal"===p.type){if(o=n.done?s:l,p.arg===y)continue;return{value:p.arg,done:n.done}}"throw"===p.type&&(o=s,n.method="throw",n.arg=p.arg)}}}function maybeInvokeDelegate(e,r){var n=r.method,o=e.iterator[n];if(o===t)return r.delegate=null,"throw"===n&&e.iterator["return"]&&(r.method="return",r.arg=t,maybeInvokeDelegate(e,r),"throw"===r.method)||"return"!==n&&(r.method="throw",r.arg=new TypeError("The iterator does not provide a '"+n+"' method")),y;var i=tryCatch(o,e.iterator,r.arg);if("throw"===i.type)return r.method="throw",r.arg=i.arg,r.delegate=null,y;var a=i.arg;return a?a.done?(r[e.resultName]=a.value,r.next=e.nextLoc,"return"!==r.method&&(r.method="next",r.arg=t),r.delegate=null,y):a:(r.method="throw",r.arg=new TypeError("iterator result is not an object"),r.delegate=null,y)}function pushTryEntry(t){var e={tryLoc:t[0]};1 in t&&(e.catchLoc=t[1]),2 in t&&(e.finallyLoc=t[2],e.afterLoc=t[3]),this.tryEntries.push(e)}function resetTryEntry(t){var e=t.completion||{};e.type="normal",delete e.arg,t.completion=e}function Context(t){this.tryEntries=[{tryLoc:"root"}],t.forEach(pushTryEntry,this),this.reset(!0)}function values(e){if(e||""===e){var r=e[a];if(r)return r.call(e);if("function"==typeof e.next)return e;if(!isNaN(e.length)){var o=-1,i=function next(){for(;++o=0;--o){var i=this.tryEntries[o],a=i.completion;if("root"===i.tryLoc)return handle("end");if(i.tryLoc<=this.prev){var c=n.call(i,"catchLoc"),u=n.call(i,"finallyLoc");if(c&&u){if(this.prev=0;--r){var o=this.tryEntries[r];if(o.tryLoc<=this.prev&&n.call(o,"finallyLoc")&&this.prev=0;--e){var r=this.tryEntries[e];if(r.finallyLoc===t)return this.complete(r.completion,r.afterLoc),resetTryEntry(r),y}},catch:function _catch(t){for(var e=this.tryEntries.length-1;e>=0;--e){var r=this.tryEntries[e];if(r.tryLoc===t){var n=r.completion;if("throw"===n.type){var o=n.arg;resetTryEntry(r)}return o}}throw Error("illegal catch attempt")},delegateYield:function delegateYield(e,r,n){return this.delegate={iterator:values(e),resultName:r,nextLoc:n},"next"===this.method&&(this.arg=t),y}},e}module.exports=_regeneratorRuntime,module.exports.__esModule=true,module.exports["default"]=module.exports})(regeneratorRuntime$3);var regeneratorRuntimeExports=regeneratorRuntime$3.exports;var regeneratorRuntime$2=getDefaultExportFromCjs(regeneratorRuntimeExports);var runtime=regeneratorRuntimeExports();var regenerator=runtime;try{regeneratorRuntime=runtime}catch(accidentalStrictMode){if(typeof globalThis==="object"){globalThis.regeneratorRuntime=runtime}else{Function("r","regeneratorRuntime = r")(runtime)}}var _regeneratorRuntime=getDefaultExportFromCjs(regenerator);const BROWSER_ALIASES_MAP={"Amazon Silk":"amazon_silk","Android Browser":"android",Bada:"bada",BlackBerry:"blackberry",Chrome:"chrome",Chromium:"chromium",Electron:"electron",Epiphany:"epiphany",Firefox:"firefox",Focus:"focus",Generic:"generic","Google Search":"google_search",Googlebot:"googlebot","Internet Explorer":"ie","K-Meleon":"k_meleon",Maxthon:"maxthon","Microsoft Edge":"edge","MZ Browser":"mz","NAVER Whale Browser":"naver",Opera:"opera","Opera Coast":"opera_coast",PhantomJS:"phantomjs",Puffin:"puffin",QupZilla:"qupzilla",QQ:"qq",QQLite:"qqlite",Safari:"safari",Sailfish:"sailfish","Samsung Internet for Android":"samsung_internet",SeaMonkey:"seamonkey",Sleipnir:"sleipnir",Swing:"swing",Tizen:"tizen","UC Browser":"uc",Vivaldi:"vivaldi","WebOS Browser":"webos",WeChat:"wechat","Yandex Browser":"yandex",Roku:"roku"};const BROWSER_MAP={amazon_silk:"Amazon Silk",android:"Android Browser",bada:"Bada",blackberry:"BlackBerry",chrome:"Chrome",chromium:"Chromium",electron:"Electron",epiphany:"Epiphany",firefox:"Firefox",focus:"Focus",generic:"Generic",googlebot:"Googlebot",google_search:"Google Search",ie:"Internet Explorer",k_meleon:"K-Meleon",maxthon:"Maxthon",edge:"Microsoft Edge",mz:"MZ Browser",naver:"NAVER Whale Browser",opera:"Opera",opera_coast:"Opera Coast",phantomjs:"PhantomJS",puffin:"Puffin",qupzilla:"QupZilla",qq:"QQ Browser",qqlite:"QQ Browser Lite",safari:"Safari",sailfish:"Sailfish",samsung_internet:"Samsung Internet for Android",seamonkey:"SeaMonkey",sleipnir:"Sleipnir",swing:"Swing",tizen:"Tizen",uc:"UC Browser",vivaldi:"Vivaldi",webos:"WebOS Browser",wechat:"WeChat",yandex:"Yandex Browser"};const PLATFORMS_MAP={tablet:"tablet",mobile:"mobile",desktop:"desktop",tv:"tv"};const OS_MAP={WindowsPhone:"Windows Phone",Windows:"Windows",MacOS:"macOS",iOS:"iOS",Android:"Android",WebOS:"WebOS",BlackBerry:"BlackBerry",Bada:"Bada",Tizen:"Tizen",Linux:"Linux",ChromeOS:"Chrome OS",PlayStation4:"PlayStation 4",Roku:"Roku"};const ENGINE_MAP={EdgeHTML:"EdgeHTML",Blink:"Blink",Trident:"Trident",Presto:"Presto",Gecko:"Gecko",WebKit:"WebKit"};class Utils{static getFirstMatch(regexp,ua){const match=ua.match(regexp);return match&&match.length>0&&match[1]||""}static getSecondMatch(regexp,ua){const match=ua.match(regexp);return match&&match.length>1&&match[2]||""}static matchAndReturnConst(regexp,ua,_const){if(regexp.test(ua)){return _const}return void 0}static getWindowsVersionName(version){switch(version){case"NT":return"NT";case"XP":return"XP";case"NT 5.0":return"2000";case"NT 5.1":return"XP";case"NT 5.2":return"2003";case"NT 6.0":return"Vista";case"NT 6.1":return"7";case"NT 6.2":return"8";case"NT 6.3":return"8.1";case"NT 10.0":return"10";default:return undefined}}static getMacOSVersionName(version){const v=version.split(".").splice(0,2).map(s=>parseInt(s,10)||0);v.push(0);if(v[0]!==10)return undefined;switch(v[1]){case 5:return"Leopard";case 6:return"Snow Leopard";case 7:return"Lion";case 8:return"Mountain Lion";case 9:return"Mavericks";case 10:return"Yosemite";case 11:return"El Capitan";case 12:return"Sierra";case 13:return"High Sierra";case 14:return"Mojave";case 15:return"Catalina";default:return undefined}}static getAndroidVersionName(version){const v=version.split(".").splice(0,2).map(s=>parseInt(s,10)||0);v.push(0);if(v[0]===1&&v[1]<5)return undefined;if(v[0]===1&&v[1]<6)return"Cupcake";if(v[0]===1&&v[1]>=6)return"Donut";if(v[0]===2&&v[1]<2)return"Eclair";if(v[0]===2&&v[1]===2)return"Froyo";if(v[0]===2&&v[1]>2)return"Gingerbread";if(v[0]===3)return"Honeycomb";if(v[0]===4&&v[1]<1)return"Ice Cream Sandwich";if(v[0]===4&&v[1]<4)return"Jelly Bean";if(v[0]===4&&v[1]>=4)return"KitKat";if(v[0]===5)return"Lollipop";if(v[0]===6)return"Marshmallow";if(v[0]===7)return"Nougat";if(v[0]===8)return"Oreo";if(v[0]===9)return"Pie";return undefined}static getVersionPrecision(version){return version.split(".").length}static compareVersions(versionA,versionB,isLoose=false){const versionAPrecision=Utils.getVersionPrecision(versionA);const versionBPrecision=Utils.getVersionPrecision(versionB);let precision=Math.max(versionAPrecision,versionBPrecision);let lastPrecision=0;const chunks=Utils.map([versionA,versionB],version=>{const delta=precision-Utils.getVersionPrecision(version);const _version=version+new Array(delta+1).join(".0");return Utils.map(_version.split("."),chunk=>new Array(20-chunk.length).join("0")+chunk).reverse()});if(isLoose){lastPrecision=precision-Math.min(versionAPrecision,versionBPrecision)}precision-=1;while(precision>=lastPrecision){if(chunks[0][precision]>chunks[1][precision]){return 1}if(chunks[0][precision]===chunks[1][precision]){if(precision===lastPrecision){return 0}precision-=1}else if(chunks[0][precision]{result[key]=assigner[key]})}}return obj}static getBrowserAlias(browserName){return BROWSER_ALIASES_MAP[browserName]}static getBrowserTypeByAlias(browserAlias){return BROWSER_MAP[browserAlias]||""}}const commonVersionIdentifier=/version\/(\d+(\.?_?\d+)+)/i;const browsersList=[{test:[/googlebot/i],describe(ua){const browser={name:"Googlebot"};const version=Utils.getFirstMatch(/googlebot\/(\d+(\.\d+))/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/opera/i],describe(ua){const browser={name:"Opera"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:opera)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/opr\/|opios/i],describe(ua){const browser={name:"Opera"};const version=Utils.getFirstMatch(/(?:opr|opios)[\s/](\S+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/SamsungBrowser/i],describe(ua){const browser={name:"Samsung Internet for Android"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:SamsungBrowser)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/Whale/i],describe(ua){const browser={name:"NAVER Whale Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:whale)[\s/](\d+(?:\.\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/MZBrowser/i],describe(ua){const browser={name:"MZ Browser"};const version=Utils.getFirstMatch(/(?:MZBrowser)[\s/](\d+(?:\.\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/focus/i],describe(ua){const browser={name:"Focus"};const version=Utils.getFirstMatch(/(?:focus)[\s/](\d+(?:\.\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/swing/i],describe(ua){const browser={name:"Swing"};const version=Utils.getFirstMatch(/(?:swing)[\s/](\d+(?:\.\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/coast/i],describe(ua){const browser={name:"Opera Coast"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:coast)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/opt\/\d+(?:.?_?\d+)+/i],describe(ua){const browser={name:"Opera Touch"};const version=Utils.getFirstMatch(/(?:opt)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/yabrowser/i],describe(ua){const browser={name:"Yandex Browser"};const version=Utils.getFirstMatch(/(?:yabrowser)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/ucbrowser/i],describe(ua){const browser={name:"UC Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:ucbrowser)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/Maxthon|mxios/i],describe(ua){const browser={name:"Maxthon"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:Maxthon|mxios)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/epiphany/i],describe(ua){const browser={name:"Epiphany"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:epiphany)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/puffin/i],describe(ua){const browser={name:"Puffin"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:puffin)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/sleipnir/i],describe(ua){const browser={name:"Sleipnir"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:sleipnir)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/k-meleon/i],describe(ua){const browser={name:"K-Meleon"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/(?:k-meleon)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/micromessenger/i],describe(ua){const browser={name:"WeChat"};const version=Utils.getFirstMatch(/(?:micromessenger)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/qqbrowser/i],describe(ua){const browser={name:/qqbrowserlite/i.test(ua)?"QQ Browser Lite":"QQ Browser"};const version=Utils.getFirstMatch(/(?:qqbrowserlite|qqbrowser)[/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/msie|trident/i],describe(ua){const browser={name:"Internet Explorer"};const version=Utils.getFirstMatch(/(?:msie |rv:)(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/\sedg\//i],describe(ua){const browser={name:"Microsoft Edge"};const version=Utils.getFirstMatch(/\sedg\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/edg([ea]|ios)/i],describe(ua){const browser={name:"Microsoft Edge"};const version=Utils.getSecondMatch(/edg([ea]|ios)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/vivaldi/i],describe(ua){const browser={name:"Vivaldi"};const version=Utils.getFirstMatch(/vivaldi\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/seamonkey/i],describe(ua){const browser={name:"SeaMonkey"};const version=Utils.getFirstMatch(/seamonkey\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/sailfish/i],describe(ua){const browser={name:"Sailfish"};const version=Utils.getFirstMatch(/sailfish\s?browser\/(\d+(\.\d+)?)/i,ua);if(version){browser.version=version}return browser}},{test:[/silk/i],describe(ua){const browser={name:"Amazon Silk"};const version=Utils.getFirstMatch(/silk\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/phantom/i],describe(ua){const browser={name:"PhantomJS"};const version=Utils.getFirstMatch(/phantomjs\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/slimerjs/i],describe(ua){const browser={name:"SlimerJS"};const version=Utils.getFirstMatch(/slimerjs\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(ua){const browser={name:"BlackBerry"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/blackberry[\d]+\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/(web|hpw)[o0]s/i],describe(ua){const browser={name:"WebOS Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua)||Utils.getFirstMatch(/w(?:eb)?[o0]sbrowser\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/bada/i],describe(ua){const browser={name:"Bada"};const version=Utils.getFirstMatch(/dolfin\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/tizen/i],describe(ua){const browser={name:"Tizen"};const version=Utils.getFirstMatch(/(?:tizen\s?)?browser\/(\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/qupzilla/i],describe(ua){const browser={name:"QupZilla"};const version=Utils.getFirstMatch(/(?:qupzilla)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/firefox|iceweasel|fxios/i],describe(ua){const browser={name:"Firefox"};const version=Utils.getFirstMatch(/(?:firefox|iceweasel|fxios)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/electron/i],describe(ua){const browser={name:"Electron"};const version=Utils.getFirstMatch(/(?:electron)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/MiuiBrowser/i],describe(ua){const browser={name:"Miui"};const version=Utils.getFirstMatch(/(?:MiuiBrowser)[\s/](\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/chromium/i],describe(ua){const browser={name:"Chromium"};const version=Utils.getFirstMatch(/(?:chromium)[\s/](\d+(\.?_?\d+)+)/i,ua)||Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/chrome|crios|crmo/i],describe(ua){const browser={name:"Chrome"};const version=Utils.getFirstMatch(/(?:chrome|crios|crmo)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test:[/GSA/i],describe(ua){const browser={name:"Google Search"};const version=Utils.getFirstMatch(/(?:GSA)\/(\d+(\.?_?\d+)+)/i,ua);if(version){browser.version=version}return browser}},{test(parser){const notLikeAndroid=!parser.test(/like android/i);const butAndroid=parser.test(/android/i);return notLikeAndroid&&butAndroid},describe(ua){const browser={name:"Android Browser"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/playstation 4/i],describe(ua){const browser={name:"PlayStation 4"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/safari|applewebkit/i],describe(ua){const browser={name:"Safari"};const version=Utils.getFirstMatch(commonVersionIdentifier,ua);if(version){browser.version=version}return browser}},{test:[/.*/i],describe(ua){const regexpWithoutDeviceSpec=/^(.*)\/(.*) /;const regexpWithDeviceSpec=/^(.*)\/(.*)[ \t]\((.*)/;const hasDeviceSpec=ua.search("\\(")!==-1;const regexp=hasDeviceSpec?regexpWithDeviceSpec:regexpWithoutDeviceSpec;return{name:Utils.getFirstMatch(regexp,ua),version:Utils.getSecondMatch(regexp,ua)}}}];var osParsersList=[{test:[/Roku\/DVP/],describe(ua){const version=Utils.getFirstMatch(/Roku\/DVP-(\d+\.\d+)/i,ua);return{name:OS_MAP.Roku,version:version}}},{test:[/windows phone/i],describe(ua){const version=Utils.getFirstMatch(/windows phone (?:os)?\s?(\d+(\.\d+)*)/i,ua);return{name:OS_MAP.WindowsPhone,version:version}}},{test:[/windows /i],describe(ua){const version=Utils.getFirstMatch(/Windows ((NT|XP)( \d\d?.\d)?)/i,ua);const versionName=Utils.getWindowsVersionName(version);return{name:OS_MAP.Windows,version:version,versionName:versionName}}},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe(ua){const result={name:OS_MAP.iOS};const version=Utils.getSecondMatch(/(Version\/)(\d[\d.]+)/,ua);if(version){result.version=version}return result}},{test:[/macintosh/i],describe(ua){const version=Utils.getFirstMatch(/mac os x (\d+(\.?_?\d+)+)/i,ua).replace(/[_\s]/g,".");const versionName=Utils.getMacOSVersionName(version);const os={name:OS_MAP.MacOS,version:version};if(versionName){os.versionName=versionName}return os}},{test:[/(ipod|iphone|ipad)/i],describe(ua){const version=Utils.getFirstMatch(/os (\d+([_\s]\d+)*) like mac os x/i,ua).replace(/[_\s]/g,".");return{name:OS_MAP.iOS,version:version}}},{test(parser){const notLikeAndroid=!parser.test(/like android/i);const butAndroid=parser.test(/android/i);return notLikeAndroid&&butAndroid},describe(ua){const version=Utils.getFirstMatch(/android[\s/-](\d+(\.\d+)*)/i,ua);const versionName=Utils.getAndroidVersionName(version);const os={name:OS_MAP.Android,version:version};if(versionName){os.versionName=versionName}return os}},{test:[/(web|hpw)[o0]s/i],describe(ua){const version=Utils.getFirstMatch(/(?:web|hpw)[o0]s\/(\d+(\.\d+)*)/i,ua);const os={name:OS_MAP.WebOS};if(version&&version.length){os.version=version}return os}},{test:[/blackberry|\bbb\d+/i,/rim\stablet/i],describe(ua){const version=Utils.getFirstMatch(/rim\stablet\sos\s(\d+(\.\d+)*)/i,ua)||Utils.getFirstMatch(/blackberry\d+\/(\d+([_\s]\d+)*)/i,ua)||Utils.getFirstMatch(/\bbb(\d+)/i,ua);return{name:OS_MAP.BlackBerry,version:version}}},{test:[/bada/i],describe(ua){const version=Utils.getFirstMatch(/bada\/(\d+(\.\d+)*)/i,ua);return{name:OS_MAP.Bada,version:version}}},{test:[/tizen/i],describe(ua){const version=Utils.getFirstMatch(/tizen[/\s](\d+(\.\d+)*)/i,ua);return{name:OS_MAP.Tizen,version:version}}},{test:[/linux/i],describe(){return{name:OS_MAP.Linux}}},{test:[/CrOS/],describe(){return{name:OS_MAP.ChromeOS}}},{test:[/PlayStation 4/],describe(ua){const version=Utils.getFirstMatch(/PlayStation 4[/\s](\d+(\.\d+)*)/i,ua);return{name:OS_MAP.PlayStation4,version:version}}}];var platformParsersList=[{test:[/googlebot/i],describe(){return{type:"bot",vendor:"Google"}}},{test:[/huawei/i],describe(ua){const model=Utils.getFirstMatch(/(can-l01)/i,ua)&&"Nova";const platform={type:PLATFORMS_MAP.mobile,vendor:"Huawei"};if(model){platform.model=model}return platform}},{test:[/nexus\s*(?:7|8|9|10).*/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Nexus"}}},{test:[/ipad/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Apple",model:"iPad"}}},{test:[/Macintosh(.*?) FxiOS(.*?)\//],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Apple",model:"iPad"}}},{test:[/kftt build/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Amazon",model:"Kindle Fire HD 7"}}},{test:[/silk/i],describe(){return{type:PLATFORMS_MAP.tablet,vendor:"Amazon"}}},{test:[/tablet(?! pc)/i],describe(){return{type:PLATFORMS_MAP.tablet}}},{test(parser){const iDevice=parser.test(/ipod|iphone/i);const likeIDevice=parser.test(/like (ipod|iphone)/i);return iDevice&&!likeIDevice},describe(ua){const model=Utils.getFirstMatch(/(ipod|iphone)/i,ua);return{type:PLATFORMS_MAP.mobile,vendor:"Apple",model:model}}},{test:[/nexus\s*[0-6].*/i,/galaxy nexus/i],describe(){return{type:PLATFORMS_MAP.mobile,vendor:"Nexus"}}},{test:[/[^-]mobi/i],describe(){return{type:PLATFORMS_MAP.mobile}}},{test(parser){return parser.getBrowserName(true)==="blackberry"},describe(){return{type:PLATFORMS_MAP.mobile,vendor:"BlackBerry"}}},{test(parser){return parser.getBrowserName(true)==="bada"},describe(){return{type:PLATFORMS_MAP.mobile}}},{test(parser){return parser.getBrowserName()==="windows phone"},describe(){return{type:PLATFORMS_MAP.mobile,vendor:"Microsoft"}}},{test(parser){const osMajorVersion=Number(String(parser.getOSVersion()).split(".")[0]);return parser.getOSName(true)==="android"&&osMajorVersion>=3},describe(){return{type:PLATFORMS_MAP.tablet}}},{test(parser){return parser.getOSName(true)==="android"},describe(){return{type:PLATFORMS_MAP.mobile}}},{test(parser){return parser.getOSName(true)==="macos"},describe(){return{type:PLATFORMS_MAP.desktop,vendor:"Apple"}}},{test(parser){return parser.getOSName(true)==="windows"},describe(){return{type:PLATFORMS_MAP.desktop}}},{test(parser){return parser.getOSName(true)==="linux"},describe(){return{type:PLATFORMS_MAP.desktop}}},{test(parser){return parser.getOSName(true)==="playstation 4"},describe(){return{type:PLATFORMS_MAP.tv}}},{test(parser){return parser.getOSName(true)==="roku"},describe(){return{type:PLATFORMS_MAP.tv}}}];var enginesParsersList=[{test(parser){return parser.getBrowserName(true)==="microsoft edge"},describe(ua){const isBlinkBased=/\sedg\//i.test(ua);if(isBlinkBased){return{name:ENGINE_MAP.Blink}}const version=Utils.getFirstMatch(/edge\/(\d+(\.?_?\d+)+)/i,ua);return{name:ENGINE_MAP.EdgeHTML,version:version}}},{test:[/trident/i],describe(ua){const engine={name:ENGINE_MAP.Trident};const version=Utils.getFirstMatch(/trident\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}},{test(parser){return parser.test(/presto/i)},describe(ua){const engine={name:ENGINE_MAP.Presto};const version=Utils.getFirstMatch(/presto\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}},{test(parser){const isGecko=parser.test(/gecko/i);const likeGecko=parser.test(/like gecko/i);return isGecko&&!likeGecko},describe(ua){const engine={name:ENGINE_MAP.Gecko};const version=Utils.getFirstMatch(/gecko\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}},{test:[/(apple)?webkit\/537\.36/i],describe(){return{name:ENGINE_MAP.Blink}}},{test:[/(apple)?webkit/i],describe(ua){const engine={name:ENGINE_MAP.WebKit};const version=Utils.getFirstMatch(/webkit\/(\d+(\.?_?\d+)+)/i,ua);if(version){engine.version=version}return engine}}];class Parser{constructor(UA,skipParsing=false){if(UA===void 0||UA===null||UA===""){throw new Error("UserAgent parameter can't be empty")}this._ua=UA;this.parsedResult={};if(skipParsing!==true){this.parse()}}getUA(){return this._ua}test(regex){return regex.test(this._ua)}parseBrowser(){this.parsedResult.browser={};const browserDescriptor=Utils.find(browsersList,_browser=>{if(typeof _browser.test==="function"){return _browser.test(this)}if(_browser.test instanceof Array){return _browser.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(browserDescriptor){this.parsedResult.browser=browserDescriptor.describe(this.getUA())}return this.parsedResult.browser}getBrowser(){if(this.parsedResult.browser){return this.parsedResult.browser}return this.parseBrowser()}getBrowserName(toLowerCase){if(toLowerCase){return String(this.getBrowser().name).toLowerCase()||""}return this.getBrowser().name||""}getBrowserVersion(){return this.getBrowser().version}getOS(){if(this.parsedResult.os){return this.parsedResult.os}return this.parseOS()}parseOS(){this.parsedResult.os={};const os=Utils.find(osParsersList,_os=>{if(typeof _os.test==="function"){return _os.test(this)}if(_os.test instanceof Array){return _os.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(os){this.parsedResult.os=os.describe(this.getUA())}return this.parsedResult.os}getOSName(toLowerCase){const{name:name}=this.getOS();if(toLowerCase){return String(name).toLowerCase()||""}return name||""}getOSVersion(){return this.getOS().version}getPlatform(){if(this.parsedResult.platform){return this.parsedResult.platform}return this.parsePlatform()}getPlatformType(toLowerCase=false){const{type:type}=this.getPlatform();if(toLowerCase){return String(type).toLowerCase()||""}return type||""}parsePlatform(){this.parsedResult.platform={};const platform=Utils.find(platformParsersList,_platform=>{if(typeof _platform.test==="function"){return _platform.test(this)}if(_platform.test instanceof Array){return _platform.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(platform){this.parsedResult.platform=platform.describe(this.getUA())}return this.parsedResult.platform}getEngine(){if(this.parsedResult.engine){return this.parsedResult.engine}return this.parseEngine()}getEngineName(toLowerCase){if(toLowerCase){return String(this.getEngine().name).toLowerCase()||""}return this.getEngine().name||""}parseEngine(){this.parsedResult.engine={};const engine=Utils.find(enginesParsersList,_engine=>{if(typeof _engine.test==="function"){return _engine.test(this)}if(_engine.test instanceof Array){return _engine.test.some(condition=>this.test(condition))}throw new Error("Browser's test function is not valid")});if(engine){this.parsedResult.engine=engine.describe(this.getUA())}return this.parsedResult.engine}parse(){this.parseBrowser();this.parseOS();this.parsePlatform();this.parseEngine();return this}getResult(){return Utils.assign({},this.parsedResult)}satisfies(checkTree){const platformsAndOSes={};let platformsAndOSCounter=0;const browsers={};let browsersCounter=0;const allDefinitions=Object.keys(checkTree);allDefinitions.forEach(key=>{const currentDefinition=checkTree[key];if(typeof currentDefinition==="string"){browsers[key]=currentDefinition;browsersCounter+=1}else if(typeof currentDefinition==="object"){platformsAndOSes[key]=currentDefinition;platformsAndOSCounter+=1}});if(platformsAndOSCounter>0){const platformsAndOSNames=Object.keys(platformsAndOSes);const OSMatchingDefinition=Utils.find(platformsAndOSNames,name=>this.isOS(name));if(OSMatchingDefinition){const osResult=this.satisfies(platformsAndOSes[OSMatchingDefinition]);if(osResult!==void 0){return osResult}}const platformMatchingDefinition=Utils.find(platformsAndOSNames,name=>this.isPlatform(name));if(platformMatchingDefinition){const platformResult=this.satisfies(platformsAndOSes[platformMatchingDefinition]);if(platformResult!==void 0){return platformResult}}}if(browsersCounter>0){const browserNames=Object.keys(browsers);const matchingDefinition=Utils.find(browserNames,name=>this.isBrowser(name,true));if(matchingDefinition!==void 0){return this.compareVersion(browsers[matchingDefinition])}}return undefined}isBrowser(browserName,includingAlias=false){const defaultBrowserName=this.getBrowserName().toLowerCase();let browserNameLower=browserName.toLowerCase();const alias=Utils.getBrowserTypeByAlias(browserNameLower);if(includingAlias&&alias){browserNameLower=alias.toLowerCase()}return browserNameLower===defaultBrowserName}compareVersion(version){let expectedResults=[0];let comparableVersion=version;let isLoose=false;const currentBrowserVersion=this.getBrowserVersion();if(typeof currentBrowserVersion!=="string"){return void 0}if(version[0]===">"||version[0]==="<"){comparableVersion=version.substr(1);if(version[1]==="="){isLoose=true;comparableVersion=version.substr(2)}else{expectedResults=[]}if(version[0]===">"){expectedResults.push(1)}else{expectedResults.push(-1)}}else if(version[0]==="="){comparableVersion=version.substr(1)}else if(version[0]==="~"){isLoose=true;comparableVersion=version.substr(1)}return expectedResults.indexOf(Utils.compareVersions(currentBrowserVersion,comparableVersion,isLoose))>-1}isOS(osName){return this.getOSName(true)===String(osName).toLowerCase()}isPlatform(platformType){return this.getPlatformType(true)===String(platformType).toLowerCase()}isEngine(engineName){return this.getEngineName(true)===String(engineName).toLowerCase()}is(anything,includingAlias=false){return this.isBrowser(anything,includingAlias)||this.isOS(anything)||this.isPlatform(anything)}some(anythings=[]){return anythings.some(anything=>this.is(anything))}}class Bowser{static getParser(UA,skipParsing=false){if(typeof UA!=="string"){throw new Error("UserAgent should be a string")}return new Parser(UA,skipParsing)}static parse(UA){return new Parser(UA).getResult()}static get BROWSER_MAP(){return BROWSER_MAP}static get ENGINE_MAP(){return ENGINE_MAP}static get OS_MAP(){return OS_MAP}static get PLATFORMS_MAP(){return PLATFORMS_MAP}}function _classCallCheck(a,n){if(!(a instanceof n))throw new TypeError("Cannot call a class as a function")}function _defineProperties(e,r){for(var t=0;t=this._data.length){return undefined}var res=this._data.slice(this._pos,this._pos+bytes);this._pos+=bytes;return res}},{key:"set",value:function set(pos,data){this._pos=pos;this._data=data}},{key:"clear",value:function clear(){this._pos=0;this._data=new Uint8Array}}])}();var PvFileIDB=function(_PvFile){function PvFileIDB(path,meta,db,mode){var _this;_classCallCheck(this,PvFileIDB);_this=_callSuper$1(this,PvFileIDB);_this._pageSize=512*1024;_this._pagePtr=0;_this._pageOffset=0;_this._path=path;_this._meta=meta;_this._db=db;_this._mode=mode;_this._cache=new PvCache;return _this}_inherits(PvFileIDB,_PvFile);return _createClass(PvFileIDB,[{key:"pageSize",get:function get(){return this._pageSize}},{key:"close",value:function(){var _close=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(){return _regeneratorRuntime.wrap(function _callee$(_context){while(1)switch(_context.prev=_context.next){case 0:this._db.close();case 1:case"end":return _context.stop()}},_callee,this)}));function close(){return _close.apply(this,arguments)}return close}()},{key:"read",value:function(){var _read=_asyncToGenerator(_regeneratorRuntime.mark(function _callee2(size,count){var _this2=this;return _regeneratorRuntime.wrap(function _callee2$(_context2){while(1)switch(_context2.prev=_context2.next){case 0:return _context2.abrupt("return",new Promise(function(resolve,reject){if(!_this2.exists()){reject(new Error("'".concat(_this2._path,"' doesn't exist.")));return}if(_this2._isEOF){var err=new Error("EOF");err.name="EndOfFile";reject(err);return}var copied=0;var maxToCopy=Math.min(size*count,_this2._meta.size);var totalElems=maxToCopy-maxToCopy%size;var buffer=new Uint8Array(totalElems);var res=_this2._cache.get(totalElems);if(res){copied+=res.length;_this2._pageOffset+=res.length;if(_this2._pageOffset===_this2._pageSize){_this2._pagePtr+=1;_this2._pageOffset=0}if(totalElems===copied){resolve(res);return}buffer.set(res)}var keyRange=IDBKeyRange.bound("".concat(_this2._path,"-").concat(PvFileIDB.createPage(_this2._pagePtr)),"".concat(_this2._path,"-").concat(PvFileIDB.createPage(_this2._pagePtr+Math.floor(totalElems/_this2._pageSize)+1)));var store=_this2._store;var req=store.openCursor(keyRange);req.onsuccess=function(){var cursor=req.result;if(!cursor||_this2._isEOF){return}var toCopy=Math.min(totalElems-copied,cursor.value.length-_this2._pageOffset);buffer.set(cursor.value.slice(_this2._pageOffset,_this2._pageOffset+toCopy),copied);copied+=toCopy;_this2._pageOffset+=toCopy;if(_this2._pageOffset===_this2._pageSize){_this2._pagePtr+=1;_this2._pageOffset=0}if(copied1&&_args4[1]!==undefined?_args4[1]:1;return _context4.abrupt("return",new Promise(function(){var _ref=_asyncToGenerator(_regeneratorRuntime.mark(function _callee3(resolve,reject){var _store$transaction3;var store,getCurrentPage,last,newContent,newSize,newMeta,pages,i,keyRange,_store$transaction4;return _regeneratorRuntime.wrap(function _callee3$(_context3){while(1)switch(_context3.prev=_context3.next){case 0:if(!(_this3._mode==="readonly")){_context3.next=3;break}reject(new Error("Instance is readonly mode only."));return _context3.abrupt("return");case 3:if(!(typeof version!=="number"&&version<=0)){_context3.next=6;break}reject(new Error("Version should be a positive number"));return _context3.abrupt("return");case 6:store=_this3._store;getCurrentPage=function getCurrentPage(){return new Promise(function(res){var req=store.get("".concat(_this3._path,"-").concat(PvFileIDB.createPage(_this3._pagePtr)));req.onsuccess=function(){if(req.result!==undefined){res(req.result.slice(0,_this3._pageOffset))}else{res(new Uint8Array(0))}}})};_context3.next=10;return getCurrentPage();case 10:last=_context3.sent;newContent=new Uint8Array(last.length+content.length);newContent.set(last);newContent.set(content,last.length);newSize=_this3._pagePtr*_this3._pageSize+newContent.length;newMeta={size:newSize,numPages:Math.ceil(newSize/_this3._pageSize),version:version,pageSize:_this3._pageSize};store.put(newMeta,_this3._path);pages=Math.ceil(newContent.length/_this3._pageSize);for(i=0;i=this._meta.numPages-1&&this._pageOffset>=this._meta.size%this._pageSize}},{key:"_store",get:function get(){return this._db.transaction(PV_FILE_STORE,this._mode).objectStore(PV_FILE_STORE)}}],[{key:"open",value:function open(path,mode){if(!self.indexedDB){var error=new Error("IndexedDB is not supported");error.name="IndexedDBNotSupported";throw error}return new Promise(function(){var _ref3=_asyncToGenerator(_regeneratorRuntime.mark(function _callee7(resolve,reject){var db,req,_error2;return _regeneratorRuntime.wrap(function _callee7$(_context7){while(1)switch(_context7.prev=_context7.next){case 0:_context7.prev=0;_context7.next=3;return getDB();case 3:db=_context7.sent;req=db.transaction(PV_FILE_STORE,"readwrite").objectStore(PV_FILE_STORE).get(path);req.onerror=function(){reject(req.error)};req.onsuccess=function(){var meta=req.result;var dbMode=mode.includes("r")?"readonly":"readwrite";if(meta===undefined&&dbMode==="readonly"){var _error=new Error("'".concat(path,"' doesn't exist."));_error.name="FileNotExists";reject(_error);return}var fileIDB=new PvFileIDB(path,meta,db,dbMode);if(mode.includes("a")){fileIDB.seek(0,2)}resolve(fileIDB)};_context7.next=12;break;case 9:_context7.prev=9;_context7.t0=_context7["catch"](0);if(_context7.t0.name==="InvalidStateError"){_error2=new Error("IndexedDB is not supported");_error2.name="IndexedDBNotSupported";reject(_error2)}else{reject(_context7.t0)}case 12:case"end":return _context7.stop()}},_callee7,null,[[0,9]])}));return function(_x8,_x9){return _ref3.apply(this,arguments)}}())}},{key:"createPage",value:function createPage(page){return("00000"+page).slice(-6)}}])}(PvFile);function _callSuper(t,o,e){return o=_getPrototypeOf(o),_possibleConstructorReturn(t,_isNativeReflectConstruct()?Reflect.construct(o,e||[],_getPrototypeOf(t).constructor):o.apply(t,e))}function _isNativeReflectConstruct(){try{var t=!Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){}))}catch(t){}return(_isNativeReflectConstruct=function _isNativeReflectConstruct(){return!!t})()}var PvFileMem=function(_PvFile){function PvFileMem(path,meta,db,mode){var _this;_classCallCheck(this,PvFileMem);_this=_callSuper(this,PvFileMem);_this._pos=0;_this._path=path;_this._meta=meta;_this._mode=mode;return _this}_inherits(PvFileMem,_PvFile);return _createClass(PvFileMem,[{key:"close",value:function close(){return}},{key:"read",value:function read(size,count){if(!this.exists()){throw new Error("'".concat(this._path,"' doesn't exist."))}if(this._isEOF){var err=new Error("EOF");err.name="EndOfFile";throw err}var toCopy=Math.min(size*count,this._file.length-this._pos);var totalElems=toCopy-toCopy%size;var buffer=new Uint8Array(totalElems);buffer.set(this._file.slice(this._pos,this._pos+totalElems),0);this._pos+=totalElems;return buffer}},{key:"write",value:function write(content){var version=arguments.length>1&&arguments[1]!==undefined?arguments[1]:1;var newFile=new Uint8Array(this._pos+content.length);if(this._file!==undefined){newFile.set(this._file.slice(0,this._pos));newFile.set(content,this._pos)}else{newFile.set(content)}this._file=newFile;this._pos+=content.length}},{key:"seek",value:function seek(offset,whence){if(!this.exists()&&this._mode==="readonly"){throw new Error("'".concat(this._path,"' doesn't exist."))}if(!this.exists()){throw new Error("'".concat(this._path,"' doesn't exist."))}if(offset<0){var err=new Error("EOF");err.name="EndOfFile";throw err}var newOffset;if(whence===0){newOffset=Math.min(offset,this._file.length)}else if(whence===1){newOffset=Math.min(this._pos+offset,this._file.length)}else if(whence===2){newOffset=Math.min(this._file.length+offset,this._file.length)}else{throw new Error("Invalid operation: ".concat(whence,"."))}this._pos=newOffset}},{key:"tell",value:function tell(){if(!this.exists()){return-1}return this._pos}},{key:"remove",value:function(){var _remove=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(){return _regeneratorRuntime.wrap(function _callee$(_context){while(1)switch(_context.prev=_context.next){case 0:if(this.exists()){_context.next=2;break}throw new Error("ENOENT");case 2:PvFileMem._memFiles["delete"](this._path);this._pos=0;case 4:case"end":return _context.stop()}},_callee,this)}));function remove(){return _remove.apply(this,arguments)}return remove}()},{key:"exists",value:function exists(){return this._file!==undefined}},{key:"_isEOF",get:function get(){return this._pos>=this._file.length}},{key:"_file",get:function get(){return PvFileMem._memFiles.get(this._path)},set:function set(content){PvFileMem._memFiles.set(this._path,content)}}],[{key:"open",value:function open(path,mode){var file=PvFileMem._memFiles.get(path);var dbMode=mode.includes("r")?"readonly":"readwrite";if(file===undefined&&dbMode==="readonly"){var error=new Error("'".concat(path,"' doesn't exist."));error.name="FileNotExists";throw error}var fileMem=new PvFileMem(path,undefined,undefined,dbMode);if(mode.includes("a")){fileMem.seek(0,2)}return fileMem}}])}(PvFile);PvFileMem._memFiles=new Map;function unsignedAddress(address){if(address<0){return address>>>0}return address}function _arrayWithHoles(r){if(Array.isArray(r))return r}function _iterableToArrayLimit(r,l){var t=null==r?null:"undefined"!=typeof Symbol&&r[Symbol.iterator]||r["@@iterator"];if(null!=t){var e,n,i,u,a=[],f=!0,o=!1;try{if(i=(t=t.call(r)).next,0===l){if(Object(t)!==t)return;f=!1}else for(;!(f=(e=i.call(t)).done)&&(a.push(e.value),a.length!==l);f=!0);}catch(r){o=!0,n=r}finally{try{if(!f&&null!=t["return"]&&(u=t["return"](),Object(u)!==u))return}finally{if(o)throw n}}return a}}function _arrayLikeToArray$2(r,a){(null==a||a>r.length)&&(a=r.length);for(var e=0,n=Array(a);e=r.length?{done:!0}:{done:!1,value:r[_n++]}},e:function e(r){throw r},f:F}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var o,a=!0,u=!1;return{s:function s(){t=t.call(r)},n:function n(){var r=t.next();return a=r.done,r},e:function e(r){u=!0,o=r},f:function f(){try{a||null==t["return"]||t["return"]()}finally{if(u)throw o}}}}function _unsupportedIterableToArray$1(r,a){if(r){if("string"==typeof r)return _arrayLikeToArray$1(r,a);var t={}.toString.call(r).slice(8,-1);return"Object"===t&&r.constructor&&(t=r.constructor.name),"Map"===t||"Set"===t?Array.from(r):"Arguments"===t||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t)?_arrayLikeToArray$1(r,a):void 0}}function _arrayLikeToArray$1(r,a){(null==a||a>r.length)&&(a=r.length);for(var e=0,n=Array(a);e=16){this.endComputePass();this.flushCommandEncoder()}}},{key:"endComputePass",value:function endComputePass(){if(this._passEncoder){this._passEncoder.end();this._passEncoder=null}}},{key:"getBuffer",value:function getBuffer(sizeBytes,usage){var mappedAtCreation=arguments.length>2&&arguments[2]!==undefined?arguments[2]:false;var label=arguments.length>3?arguments[3]:undefined;var key=this.getBufferKey(sizeBytes,usage);if(this.bufferReusePool.has(key)){var buffers=this.bufferReusePool.get(key);if(buffers&&buffers.length>0){return buffers.pop()}}return this.device.createBuffer({size:sizeBytes*Uint8Array.BYTES_PER_ELEMENT,usage:usage,mappedAtCreation:mappedAtCreation,label:label})}},{key:"scheduleUniformBufferForRelease",value:function scheduleUniformBufferForRelease(buffer){this._uniformBuffersPendingRelease.push(buffer)}},{key:"releaseBuffer",value:function releaseBuffer(buffer){var clearBuffer=arguments.length>1&&arguments[1]!==undefined?arguments[1]:true;if(clearBuffer){this.endComputePass();this.commandEncoder.clearBuffer(buffer,0,buffer.size)}var key=this.getBufferKey(buffer.size,buffer.usage);if(!this.bufferReusePool.has(key)){this.bufferReusePool.set(key,[])}this.bufferReusePool.get(key).push(buffer)}},{key:"sync",value:function(){var _sync=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var _this=this;var _iterator,_step,k,buffers,_iterator3,_step3,b,_loop,_i,_Object$entries;return _regeneratorRuntime.wrap(function _callee$(_context3){while(1)switch(_context3.prev=_context3.next){case 0:this.flushCommandEncoder();_context3.next=3;return this.device.queue.onSubmittedWorkDone();case 3:_iterator=_createForOfIteratorHelper$1(this.bufferReusePool.keys());try{for(_iterator.s();!(_step=_iterator.n()).done;){k=_step.value;buffers=this.bufferReusePool.get(k);if(buffers&&buffers.length>0){_iterator3=_createForOfIteratorHelper$1(buffers);try{for(_iterator3.s();!(_step3=_iterator3.n()).done;){b=_step3.value;b===null||b===void 0||b.destroy()}}catch(err){_iterator3.e(err)}finally{_iterator3.f()}}}}catch(err){_iterator.e(err)}finally{_iterator.f()}this.bufferReusePool.clear();_loop=_regeneratorRuntime.mark(function _loop(){var _Object$entries$_i,shaderName,timestampBuffers,_iterator2,_step2,_loop2;return _regeneratorRuntime.wrap(function _loop$(_context2){while(1)switch(_context2.prev=_context2.next){case 0:_Object$entries$_i=_slicedToArray(_Object$entries[_i],2),shaderName=_Object$entries$_i[0],timestampBuffers=_Object$entries$_i[1];if(!_this.shaderTimes[shaderName]){_this.shaderTimes[shaderName]=[]}_iterator2=_createForOfIteratorHelper$1(timestampBuffers);_context2.prev=3;_loop2=_regeneratorRuntime.mark(function _loop2(){var timestampBuffer;return _regeneratorRuntime.wrap(function _loop2$(_context){while(1)switch(_context.prev=_context.next){case 0:timestampBuffer=_step2.value;timestampBuffer.mapAsync(GPUMapMode.READ).then(function(){var times=new BigInt64Array(timestampBuffer.getMappedRange());var timeDif=times[1]-times[0];timestampBuffer.unmap();timestampBuffer.destroy();_this.shaderTimes[shaderName].push(timeDif)});case 2:case"end":return _context.stop()}},_loop2)});_iterator2.s();case 6:if((_step2=_iterator2.n()).done){_context2.next=10;break}return _context2.delegateYield(_loop2(),"t0",8);case 8:_context2.next=6;break;case 10:_context2.next=15;break;case 12:_context2.prev=12;_context2.t1=_context2["catch"](3);_iterator2.e(_context2.t1);case 15:_context2.prev=15;_iterator2.f();return _context2.finish(15);case 18:case"end":return _context2.stop()}},_loop,null,[[3,12,15,18]])});_i=0,_Object$entries=Object.entries(this.timestampBuffers);case 8:if(!(_i<_Object$entries.length)){_context3.next=13;break}return _context3.delegateYield(_loop(),"t0",10);case 10:_i++;_context3.next=8;break;case 13:this.timestampBuffers={};case 14:case"end":return _context3.stop()}},_callee,this)}));function sync(){return _sync.apply(this,arguments)}return sync}()},{key:"reportShaderTimes",value:function reportShaderTimes(){for(var _i2=0,_Object$entries2=Object.entries(this.shaderTimes);_i2<_Object$entries2.length;_i2++){var _Object$entries2$_i=_slicedToArray(_Object$entries2[_i2],2),shaderName=_Object$entries2$_i[0],shaderTimes=_Object$entries2$_i[1];var timeSum=0n;var _iterator4=_createForOfIteratorHelper$1(shaderTimes),_step4;try{for(_iterator4.s();!(_step4=_iterator4.n()).done;){var shaderTime=_step4.value;timeSum=timeSum+shaderTime}}catch(err){_iterator4.e(err)}finally{_iterator4.f()}var totalSeconds=Number(timeSum)*1e-9;var avgSeconds=(totalSeconds/shaderTimes.length).toFixed(7);console.log("".concat(shaderName,", ").concat(totalSeconds.toFixed(5),", ").concat(avgSeconds))}this.shaderTimes={}}},{key:"flushCommandEncoder",value:function flushCommandEncoder(){var _this2=this;this.device.queue.submit([this.commandEncoder.finish()]);this._commandEncoder=null;this._numCommandsEncoded=0;this._stageBuffersPendingMap.forEach(function(buffer){buffer.destroy()});this._stageBuffersPendingMap=[];this._uniformBuffersPendingRelease.forEach(function(buffer){_this2.releaseBuffer(buffer,false)});this._uniformBuffersPendingRelease=[]}},{key:"writeBuffer",value:function writeBuffer(sizeBytes,offset,srcArray,dstBuffer){var stagingBuffer=this.getBuffer(sizeBytes,GPUBufferUsage.MAP_WRITE|GPUBufferUsage.COPY_SRC,true);new Uint8Array(stagingBuffer.getMappedRange()).set(srcArray);stagingBuffer.unmap();this._stageBuffersPendingMap.push(stagingBuffer);this.endComputePass();this.commandEncoder.copyBufferToBuffer(stagingBuffer,0,dstBuffer,offset,sizeBytes);this.numCommandsEncoded++}},{key:"dispatchComputerShader",value:function dispatchComputerShader(bindGroup,pipeline,shaderName,workgroupCountX,workgroupCountY,workgroupCountZ){if(this.isTimerEnabled){var querySet=this.device.createQuerySet({type:"timestamp",count:2});var timestampWrites={querySet:querySet,beginningOfPassWriteIndex:0,endOfPassWriteIndex:1};this.endComputePass();this._passEncoder=this.commandEncoder.beginComputePass({timestampWrites:timestampWrites});this._passEncoder.setBindGroup(0,bindGroup);this._passEncoder.setPipeline(pipeline);this._passEncoder.dispatchWorkgroups(workgroupCountX,workgroupCountY,workgroupCountZ);this.endComputePass();var size=2*BigInt64Array.BYTES_PER_ELEMENT;var resolveBuffer=this.device.createBuffer({size:size,usage:GPUBufferUsage.QUERY_RESOLVE|GPUBufferUsage.COPY_SRC});this.commandEncoder.resolveQuerySet(querySet,0,2,resolveBuffer,0);var resultBuffer=this.device.createBuffer({size:size,usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});this.commandEncoder.copyBufferToBuffer(resolveBuffer,0,resultBuffer,0,size);if(!this.timestampBuffers[shaderName]){this.timestampBuffers[shaderName]=[]}this.timestampBuffers[shaderName].push(resultBuffer);this.numCommandsEncoded+=3}else{if(!this._passEncoder){this._passEncoder=this.commandEncoder.beginComputePass()}this._passEncoder.setBindGroup(0,bindGroup);this._passEncoder.setPipeline(pipeline);this._passEncoder.dispatchWorkgroups(workgroupCountX,workgroupCountY,workgroupCountZ);this.numCommandsEncoded++}}}])}();var PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE=256;var PV_PICOLLM_WEBGPU_MAX_GRID_DIM=65535;var gpuDevices=new Map;var gpuBuffers=new Map;var emptyShader="\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main_empty() {}\n";var shaderEntryPoint="main";var PRECOMPUTE_ENCODING_SHADER_NAME="pv_picollm_attention_precompute_encoding_shader";var attentionPrecomputeEncodingShaderSource="\nstruct argsStruct {\n dimension: u32,\n steps: u32,\n theta: f32,\n encoding_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let ds = local_id.x;\n \n for (var t = ts; t < args.steps; t += num_workgroups.x) {\n for (var d = ds; d < (args.dimension / 2u); d += workgroup_size_x) {\n let f = 2u * d;\n let x = f32(t) / pow(args.theta, f32(f) / f32(args.dimension));\n let encoding_idx = args.encoding_offset + (t * args.dimension) + f;\n encoding[encoding_idx] = cos(x);\n encoding[encoding_idx + 1] = sin(x);\n }\n } \n}\n\n".concat(emptyShader);var loadAttentionPrecomputeEncodingShader=function loadAttentionPrecomputeEncodingShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention precompute encoding bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention precompute encoding pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention precompute encoding shader module",code:attentionPrecomputeEncodingShaderSource});var computePipeline=device.createComputePipeline({label:"attention precompute encoding pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ENCODE_ROPE_INTERLEAVED_SHADER_NAME="pv_picollm_attention_encode_rope_interleaved_shader";var attentionEncodeRopeInterleavedShaderSource="\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let x_start = args.x_offset + (t * args.num_heads + h) * args.head_dimension;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < (args.head_dimension / 2u); d += workgroup_size_x) {\n let i = 2u * d;\n let x_idx = x_start + i;\n let encoding_idx = encoding_start + i;\n \n let re = x[x_idx];\n let im = x[x_idx + 1];\n x[x_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[x_idx + 1] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader);var loadAttentionEncodeRopeInterleavedShader=function loadAttentionEncodeRopeInterleavedShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention encode rope interleave bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention encode rope interleave pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention encode rope interleave shader module",code:attentionEncodeRopeInterleavedShaderSource});var computePipeline=device.createComputePipeline({label:"attention encode rope interleave pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ENCODE_SHADER_NAME="pv_picollm_attention_encode_shader";var attentionEncodeShaderSource="\nstruct argsStruct { \n n: u32,\n num_heads: u32,\n head_dimension: u32,\n rope_dimension: u32,\n position: u32,\n encoding_offset: u32,\n x_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar encoding: array;\n\n@group(0) @binding(2)\nvar x: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3,\n @builtin(num_workgroups) num_workgroups: vec3\n) {\n let ts = workgroup_id.x;\n let h = workgroup_id.y;\n let ds = local_id.x;\n\n for (var t = ts; t < args.n; t += num_workgroups.x) {\n let half_rope = (args.rope_dimension / 2);\n let xr_start = args.x_offset + ((t * args.num_heads + h) * args.head_dimension);\n let xi_start = xr_start + half_rope;\n let encoding_start = args.encoding_offset + ((t + args.position) * args.rope_dimension); \n for (var d = ds; d < half_rope; d += workgroup_size_x) {\n let xr_idx = xr_start + d;\n let xi_idx = xi_start + d;\n let encoding_idx = encoding_start + (2 * d);\n\n let re = x[xr_idx];\n let im = x[xi_idx];\n x[xr_idx] = (re * encoding[encoding_idx]) - (im * encoding[encoding_idx + 1]);\n x[xi_idx] = (re * encoding[encoding_idx + 1]) + (im * encoding[encoding_idx]);\n }\n }\n}\n\n".concat(emptyShader);var loadAttentionEncodeShader=function loadAttentionEncodeShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention encode bind layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention encode pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention encode shader",code:attentionEncodeShaderSource});var computePipeline=device.createComputePipeline({label:"attention encode pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var DOT_PRODUCT_SHADER_NAME="pv_picollm_attention_dot_product_shader";var attentionDotProductShaderSource="\nstruct argsStruct { \n n: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n norm: f32,\n length1: u32,\n num_keys: u32,\n query_offset: u32,\n keys_offset: u32,\n key_intercepts_offset: u32,\n key_slopes_offset: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar query: array;\n\n@group(0) @binding(2)\nvar keys: array;\n\n@group(0) @binding(3)\nvar key_intercepts: array;\n\n@group(0) @binding(4)\nvar key_slopes: array;\n\n@group(0) @binding(5)\nvar scores: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n \n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n \n let keys_local_a = args.keys_offset + (start_index * args.head_dimension);\n let key_intercepts_local_a = args.key_intercepts_offset + start_index; \n let key_slopes_local_a = args.key_slopes_offset + start_index;\n \n let keys_local_b = args.keys_offset + (head_offset * args.head_dimension);\n let key_intercepts_local_b = args.key_intercepts_offset + head_offset; \n let key_slopes_local_b = args.key_slopes_offset + head_offset;\n \n let scores_local = args.scores_offset + (global_id.x * args.num_keys);\n let query_local = args.query_offset + (((global_id.x * args.n) + args.tq) * args.head_dimension);\n \n for (var i = 0u; i < args.head_dimension; i++) { \n for (var k = 0u; k < args.num_keys; k++) {\n if (k < args.length1) { \n let key_idx = keys_local_a + (k * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_a + k] + (key_slopes[key_slopes_local_a + k] * key_val));\n scores[scores_local + k] += tmp;\n }\n else {\n let j = k - args.length1;\n let key_idx = keys_local_b + (j * args.head_dimension) + i;\n let key_val = f32(extractBits(keys[key_idx / 4], (i * 8u) % 32u, 8u));\n let tmp = query[query_local + i] * (key_intercepts[key_intercepts_local_b + j] + (key_slopes[key_slopes_local_b + j] * key_val));\n scores[scores_local + k] += tmp;\n }\n } \n }\n \n for (var k = 0u; k < args.num_keys; k++) {\n scores[scores_local + k] *= args.norm;\n }\n}\n\n".concat(emptyShader);var loadAttentionDotProductShader=function loadAttentionDotProductShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention dot product bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:5,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention dot product pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention dot product shader module",code:attentionDotProductShaderSource});var computePipeline=device.createComputePipeline({label:"attention dot product pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var SOFTMAX_SHADER_NAME="pv_picollm_attention_softmax_shader";var attentionSoftmaxShaderSource="\nstruct argsStruct { \n num_heads: u32,\n num_keys: u32,\n scores_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar scores: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let scores_start = args.scores_offset + (global_id.x * args.num_keys);\n \n var max_index: u32 = 0;\n for (var i = 1u; i < args.num_keys; i++) {\n if (scores[scores_start + i] > scores[scores_start + max_index]) {\n max_index = i;\n }\n }\n let max: f32 = scores[scores_start + max_index];\n\n var sum: f32 = 0.0;\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] = exp(scores[scores_start + i] - max);\n sum += scores[scores_start + i];\n }\n\n for (var i = 0u; i < args.num_keys; i++) {\n scores[scores_start + i] /= sum;\n }\n}\n\n".concat(emptyShader);var loadAttentionSoftmaxShader=function loadAttentionSoftmaxShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention softmax bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention softmax pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention softmax shader module",code:attentionSoftmaxShaderSource});var computePipeline=device.createComputePipeline({label:"attention softmax pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var FIR_SHADER_NAME="pv_picollm_attention_fir_shader";var attentionFirShaderSource="\nstruct argsStruct { \n length1: u32,\n length2: u32,\n tq: u32,\n head_dimension: u32,\n num_heads: u32,\n num_kv_heads: u32,\n window_length: u32,\n start: u32,\n values_offset: u32,\n value_intercepts_offset: u32,\n value_slopes_offset: u32,\n scores_offset: u32,\n output_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar values: array;\n\n@group(0) @binding(2)\nvar value_intercepts: array;\n\n@group(0) @binding(3)\nvar value_slopes: array;\n\n@group(0) @binding(4)\nvar scores: array;\n\n@group(0) @binding(5)\nvar output: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.num_heads) {\n return;\n }\n\n let head = global_id.x / (args.num_heads / args.num_kv_heads);\n\n let head_offset = head * args.window_length;\n let start_index = head_offset + args.start;\n\n let values_local_a = args.values_offset + (start_index * args.head_dimension);\n let value_intercepts_local_a = args.value_intercepts_offset + start_index;\n let value_slopes_local_a = args.value_slopes_offset + start_index;\n let values_local_b = args.values_offset + (head_offset * args.head_dimension);\n let value_intercepts_local_b = args.value_intercepts_offset + head_offset;\n let value_slopes_local_b = args.value_slopes_offset + head_offset;\n let scores_local = args.scores_offset + (global_id.x * (args.length1 + args.length2));\n let output_local = args.output_offset + (((args.tq * args.num_heads) + global_id.x) * args.head_dimension);\n\n for (var i = 0u; i < args.head_dimension; i++) {\n var tmp: f32 = 0.0;\n for (var k = 0u; k < args.length1; k++) {\n let value_idx = values_local_a + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + k] * (value_intercepts[value_intercepts_local_a + k] + (value_slopes[value_slopes_local_a + k] * value_val)); \n }\n for (var k = 0u; k < args.length2; k++) {\n let value_idx = values_local_b + (k * args.head_dimension) + i;\n let value_val = f32(extractBits(values[value_idx / 4], (i * 8u) % 32u, 8u));\n tmp += scores[scores_local + args.length1 + k] * (value_intercepts[value_intercepts_local_b + k] + (value_slopes[value_slopes_local_b + k] * value_val)); \n }\n output[output_local + i] = tmp;\n }\n}\n\n".concat(emptyShader);var loadAttentionFirShader=function loadAttentionFirShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention fir bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:5,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention fir pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention fir shader module",code:attentionFirShaderSource});var computePipeline=device.createComputePipeline({label:"attention fir pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var UPDATE_KV_SHADER_NAME="pv_picollm_attention_update_kv_shader";var attentionUpdateKvShaderSource="\nstruct argsStruct {\n n: u32,\n num_kv_heads: u32,\n window_length: u32,\n position: u32,\n head_dimension: u32,\n tf_offset: u32,\n kv_offset: u32,\n kv_intercepts_offset: u32,\n kv_slopes_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar kv: array;\n\n@group(0) @binding(3)\nvar kv_intercepts: array;\n\n@group(0) @binding(4)\nvar kv_slopes: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x >= args.num_kv_heads) {\n return;\n }\n \n for (var i = 0u; i < args.n; i++) {\n let index = (global_id.x * args.window_length) + ((args.position + i) % args.window_length);\n let tf_start = args.tf_offset + (((i * args.num_kv_heads) + global_id.x) * args.head_dimension);\n let kv_start = args.kv_offset + ((index * args.head_dimension) / 4);\n let kv_intercepts_start = args.kv_intercepts_offset + index;\n let kv_slopes_start = args.kv_slopes_offset + index;\n \n var xmax = tf[tf_start]; \n var xmin = tf[tf_start]; \n \n for (var j = 1u; j < args.head_dimension; j++) {\n xmax = max(xmax, tf[tf_start + j]);\n xmin = min(xmin, tf[tf_start + j]);\n }\n\n kv_intercepts[kv_intercepts_start] = xmin;\n kv_slopes[kv_slopes_start] = f32(xmax - xmin) / 255.0;\n\n for (var j = 0u; j < args.head_dimension; j++) {\n let kv_idx = kv_start + (j / 4);\n let kv_val = u32(round((tf[tf_start + j] - xmin) / kv_slopes[kv_slopes_start])); \n kv[kv_idx] = insertBits(kv[kv_idx], extractBits(kv_val, 0u, 8u), (j * 8u) % 32u, 8u); \n }\n }\n}\n\n".concat(emptyShader);var loadAttentionUpdateKvShader=function loadAttentionUpdateKvShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention update kv bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention update kv pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention update kv shader module",code:attentionUpdateKvShaderSource});var computePipeline=device.createComputePipeline({label:"attention update kv pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var TRANSPOSE_QUERY_SHADER_NAME="pv_picollm_attention_transpose_query_shader";var attentionTransposeQueryShaderSource="\nstruct argsStruct {\n n: u32,\n num_heads: u32,\n head_dimension: u32,\n tf_offset: u32,\n hf_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar tf: array;\n\n@group(0) @binding(2)\nvar hf: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n\n if (workgroup_id.x >= args.num_heads || workgroup_id.y >= args.n || local_id.x >= args.head_dimension) {\n return;\n }\n \n let tf_idx = args.tf_offset + (workgroup_id.y * args.num_heads * args.head_dimension) + (workgroup_id.x * args.head_dimension) + local_id.x; \n let hf_idx = args.hf_offset + (workgroup_id.x * args.n * args.head_dimension) + (workgroup_id.y * args.head_dimension) + local_id.x; \n hf[hf_idx] = tf[tf_idx];\n}\n\n".concat(emptyShader);var loadAttentionTransposeQueryShader=function loadAttentionTransposeQueryShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"attention transpose query bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"attention transpose query pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"attention transpose query shader module",code:attentionTransposeQueryShaderSource});var computePipeline=device.createComputePipeline({label:"attention transpose query pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var attentionShaders=_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty({},PRECOMPUTE_ENCODING_SHADER_NAME,loadAttentionPrecomputeEncodingShader),ENCODE_ROPE_INTERLEAVED_SHADER_NAME,loadAttentionEncodeRopeInterleavedShader),ENCODE_SHADER_NAME,loadAttentionEncodeShader),DOT_PRODUCT_SHADER_NAME,loadAttentionDotProductShader),SOFTMAX_SHADER_NAME,loadAttentionSoftmaxShader),FIR_SHADER_NAME,loadAttentionFirShader),UPDATE_KV_SHADER_NAME,loadAttentionUpdateKvShader),TRANSPOSE_QUERY_SHADER_NAME,loadAttentionTransposeQueryShader);var getPicollmAttentionWebGpuFunctions=function getPicollmAttentionWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmAttentionPrecomputeEncodingWebGpu=function pvPicollmAttentionPrecomputeEncodingWebGpu(objAddress,encodingAddress,encodingOffset,dimension,steps,theta,statusAddress){var _gpuBuffers$get;objAddress=unsignedAddress(objAddress);encodingAddress=unsignedAddress(encodingAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[PRECOMPUTE_ENCODING_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var encodingBuffer=(_gpuBuffers$get=gpuBuffers.get(encodingAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!encodingBuffer){console.error("Encoding buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention precompute encoding arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,dimension,true);view.setUint32(4,steps,true);view.setFloat32(8,theta,true);view.setUint32(12,encodingOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention precompute encoding bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:encodingBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,PRECOMPUTE_ENCODING_SHADER_NAME,steps);setStatus(statusAddress,0)};var pvPicollmAttentionEncodeWebGpu=function pvPicollmAttentionEncodeWebGpu(objAddress,isRopeInterleaved,xAddress,xOffset,n,numHeads,headDimension,ropeDimension,position,encodingAddress,encodingOffset,statusAddress){var _gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);encodingAddress=unsignedAddress(encodingAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shaderName=isRopeInterleaved?ENCODE_ROPE_INTERLEAVED_SHADER_NAME:ENCODE_SHADER_NAME;var shader=obj.shaders[shaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var encodingBuffer=(_gpuBuffers$get2=gpuBuffers.get(encodingAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!encodingBuffer){console.error("Encoding buffer has not been allocated");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get3=gpuBuffers.get(xAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(7*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention encode arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,numHeads,headDimension,ropeDimension,position,encodingOffset/4,xOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention encode bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:encodingBuffer}},{binding:2,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,shaderName,Math.min(n,PV_PICOLLM_WEBGPU_MAX_GRID_DIM),numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionDotProductWebGpu=function pvPicollmAttentionDotProductWebGpu(objAddress,queryAddress,queryOffset,keysAddress,keysOffset,keyInterceptsAddress,keyInterceptsOffset,keySlopesAddress,keySlopesOffset,n,tq,headDimension,numHeads,numKvHeads,windowLength,start,norm,length1,length2,numKeys,scoresAddress,scoresOffset,statusAddress){var _gpuBuffers$get4,_gpuBuffers$get5,_gpuBuffers$get6,_gpuBuffers$get7,_gpuBuffers$get8;objAddress=unsignedAddress(objAddress);queryAddress=unsignedAddress(queryAddress);keysAddress=unsignedAddress(keysAddress);keyInterceptsAddress=unsignedAddress(keyInterceptsAddress);keySlopesAddress=unsignedAddress(keySlopesAddress);scoresAddress=unsignedAddress(scoresAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[DOT_PRODUCT_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var queryBuffer=(_gpuBuffers$get4=gpuBuffers.get(queryAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!queryBuffer){console.error("query buffer has not been allocated");setStatus(statusAddress,-1);return}var keysBuffer=(_gpuBuffers$get5=gpuBuffers.get(keysAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!keysBuffer){console.error("keys buffer has not been allocated");setStatus(statusAddress,-1);return}var keyInterceptsBuffer=(_gpuBuffers$get6=gpuBuffers.get(keyInterceptsAddress))===null||_gpuBuffers$get6===void 0?void 0:_gpuBuffers$get6.buffer;if(!keyInterceptsBuffer){console.error("key intercepts buffer has not been allocated");setStatus(statusAddress,-1);return}var keySlopesBuffer=(_gpuBuffers$get7=gpuBuffers.get(keySlopesAddress))===null||_gpuBuffers$get7===void 0?void 0:_gpuBuffers$get7.buffer;if(!keySlopesBuffer){console.error("key slopes buffer has not been allocated");setStatus(statusAddress,-1);return}var scoresBuffer=(_gpuBuffers$get8=gpuBuffers.get(scoresAddress))===null||_gpuBuffers$get8===void 0?void 0:_gpuBuffers$get8.buffer;if(!scoresBuffer){console.error("scores buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(15*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention dot product arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,n,true);view.setUint32(4,tq,true);view.setUint32(8,headDimension,true);view.setUint32(12,numHeads,true);view.setUint32(16,numKvHeads,true);view.setUint32(20,windowLength,true);view.setUint32(24,start,true);view.setFloat32(28,norm,true);view.setUint32(32,length1,true);view.setUint32(36,numKeys,true);view.setUint32(40,scoresOffset,true);view.setUint32(44,queryOffset/4,true);view.setUint32(48,keysOffset,true);view.setUint32(52,keyInterceptsOffset/4,true);view.setUint32(56,keySlopesOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention dot product bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:queryBuffer}},{binding:2,resource:{buffer:keysBuffer}},{binding:3,resource:{buffer:keyInterceptsBuffer}},{binding:4,resource:{buffer:keySlopesBuffer}},{binding:5,resource:{buffer:scoresBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,DOT_PRODUCT_SHADER_NAME,numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionSoftmaxWebGpu=function pvPicollmAttentionSoftmaxWebGpu(objAddress,scoresAddress,scoresOffset,numHeads,numKeys,statusAddress){var _gpuBuffers$get9;objAddress=unsignedAddress(objAddress);scoresAddress=unsignedAddress(scoresAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[SOFTMAX_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var scoresBuffer=(_gpuBuffers$get9=gpuBuffers.get(scoresAddress))===null||_gpuBuffers$get9===void 0?void 0:_gpuBuffers$get9.buffer;if(!scoresBuffer){console.error("Scores buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention softmax arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([numHeads,numKeys,scoresOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention softmax bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:scoresBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,SOFTMAX_SHADER_NAME,numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionFirWebGpu=function pvPicollmAttentionFirWebGpu(objAddress,valuesAddress,valuesOffset,valueInterceptsAddress,valueInterceptsOffset,valueSlopesAddress,valueSlopesOffset,length1,length2,tq,headDimension,numHeads,numKvHeads,windowLength,start,scoresAddress,scoresOffset,outputAddress,outputOffset,statusAddress){var _gpuBuffers$get10,_gpuBuffers$get11,_gpuBuffers$get12,_gpuBuffers$get13,_gpuBuffers$get14;objAddress=unsignedAddress(objAddress);valuesAddress=unsignedAddress(valuesAddress);valueInterceptsAddress=unsignedAddress(valueInterceptsAddress);valueSlopesAddress=unsignedAddress(valueSlopesAddress);scoresAddress=unsignedAddress(scoresAddress);outputAddress=unsignedAddress(outputAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[FIR_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var valuesBuffer=(_gpuBuffers$get10=gpuBuffers.get(valuesAddress))===null||_gpuBuffers$get10===void 0?void 0:_gpuBuffers$get10.buffer;if(!valuesBuffer){console.error("values buffer has not been allocated");setStatus(statusAddress,-1);return}var valueInterceptsBuffer=(_gpuBuffers$get11=gpuBuffers.get(valueInterceptsAddress))===null||_gpuBuffers$get11===void 0?void 0:_gpuBuffers$get11.buffer;if(!valueInterceptsBuffer){console.error("value intercepts buffer has not been allocated");setStatus(statusAddress,-1);return}var valueSlopesBuffer=(_gpuBuffers$get12=gpuBuffers.get(valueSlopesAddress))===null||_gpuBuffers$get12===void 0?void 0:_gpuBuffers$get12.buffer;if(!valueSlopesBuffer){console.error("value slopes buffer has not been allocated");setStatus(statusAddress,-1);return}var scoresBuffer=(_gpuBuffers$get13=gpuBuffers.get(scoresAddress))===null||_gpuBuffers$get13===void 0?void 0:_gpuBuffers$get13.buffer;if(!scoresBuffer){console.error("scores buffer has not been allocated");setStatus(statusAddress,-1);return}var outputBuffer=(_gpuBuffers$get14=gpuBuffers.get(outputAddress))===null||_gpuBuffers$get14===void 0?void 0:_gpuBuffers$get14.buffer;if(!outputBuffer){console.error("output buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(13*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention fir arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([length1,length2,tq,headDimension,numHeads,numKvHeads,windowLength,start,valuesOffset,valueInterceptsOffset/4,valueSlopesOffset/4,scoresOffset/4,outputOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention fir bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:valuesBuffer}},{binding:2,resource:{buffer:valueInterceptsBuffer}},{binding:3,resource:{buffer:valueSlopesBuffer}},{binding:4,resource:{buffer:scoresBuffer}},{binding:5,resource:{buffer:outputBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,FIR_SHADER_NAME,numHeads);setStatus(statusAddress,0)};var pvPicollmAttentionUpdateKvWebGpu=function pvPicollmAttentionUpdateKvWebGpu(objAddress,tfAddress,tfOffset,n,kvAddress,kvOffset,kvInterceptsAddress,kvInterceptsOffset,kvSlopesAddress,kvSlopesOffset,numKvHeads,windowLength,position,headDimension,statusAddress){var _gpuBuffers$get15,_gpuBuffers$get16,_gpuBuffers$get17,_gpuBuffers$get18;objAddress=unsignedAddress(objAddress);tfAddress=unsignedAddress(tfAddress);kvAddress=unsignedAddress(kvAddress);kvInterceptsAddress=unsignedAddress(kvInterceptsAddress);kvSlopesAddress=unsignedAddress(kvSlopesAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[UPDATE_KV_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var tfBuffer=(_gpuBuffers$get15=gpuBuffers.get(tfAddress))===null||_gpuBuffers$get15===void 0?void 0:_gpuBuffers$get15.buffer;if(!tfBuffer){console.error("tf buffer has not been allocated");setStatus(statusAddress,-1);return}var kvBuffer=(_gpuBuffers$get16=gpuBuffers.get(kvAddress))===null||_gpuBuffers$get16===void 0?void 0:_gpuBuffers$get16.buffer;if(!kvBuffer){console.error("KV buffer has not been allocated");setStatus(statusAddress,-1);return}var kvInterceptsBuffer=(_gpuBuffers$get17=gpuBuffers.get(kvInterceptsAddress))===null||_gpuBuffers$get17===void 0?void 0:_gpuBuffers$get17.buffer;if(!kvInterceptsBuffer){console.error("KV intercept buffer has not been allocated");setStatus(statusAddress,-1);return}var kvSlopesBuffer=(_gpuBuffers$get18=gpuBuffers.get(kvSlopesAddress))===null||_gpuBuffers$get18===void 0?void 0:_gpuBuffers$get18.buffer;if(!kvSlopesBuffer){console.error("KV slopes buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(9*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention update kv arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,numKvHeads,windowLength,position,headDimension,tfOffset/4,kvOffset,kvInterceptsOffset/4,kvSlopesOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention update kv bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:tfBuffer}},{binding:2,resource:{buffer:kvBuffer}},{binding:3,resource:{buffer:kvInterceptsBuffer}},{binding:4,resource:{buffer:kvSlopesBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,UPDATE_KV_SHADER_NAME,numKvHeads);setStatus(statusAddress,0)};var pvPicollmAttentionTransposeQueryWebGpu=function pvPicollmAttentionTransposeQueryWebGpu(objAddress,tfAddress,tfOffset,hfAddress,hfOffset,n,numHeads,headDimension,statusAddress){var _gpuBuffers$get19,_gpuBuffers$get20;objAddress=unsignedAddress(objAddress);tfAddress=unsignedAddress(tfAddress);hfAddress=unsignedAddress(hfAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[TRANSPOSE_QUERY_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var tfBuffer=(_gpuBuffers$get19=gpuBuffers.get(tfAddress))===null||_gpuBuffers$get19===void 0?void 0:_gpuBuffers$get19.buffer;if(!tfBuffer){console.error("tf buffer has not been allocated");setStatus(statusAddress,-1);return}var hfBuffer=(_gpuBuffers$get20=gpuBuffers.get(hfAddress))===null||_gpuBuffers$get20===void 0?void 0:_gpuBuffers$get20.buffer;if(!hfBuffer){console.error("hf buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(5*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"attention transpose query arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,numHeads,headDimension,tfOffset/4,hfOffset/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"attention transpose query bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:tfBuffer}},{binding:2,resource:{buffer:hfBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,TRANSPOSE_QUERY_SHADER_NAME,numHeads,n);setStatus(statusAddress,0)};return{pv_picollm_attention_precompute_encoding_webgpu_wasm:pvPicollmAttentionPrecomputeEncodingWebGpu,pv_picollm_attention_encode_webgpu_wasm:pvPicollmAttentionEncodeWebGpu,pv_picollm_attention_dot_product_webgpu_wasm:pvPicollmAttentionDotProductWebGpu,pv_picollm_attention_softmax_webgpu_wasm:pvPicollmAttentionSoftmaxWebGpu,pv_picollm_attention_fir_webgpu_wasm:pvPicollmAttentionFirWebGpu,pv_picollm_attention_update_kv_webgpu_wasm:pvPicollmAttentionUpdateKvWebGpu,pv_picollm_attention_transpose_query_webgpu_wasm:pvPicollmAttentionTransposeQueryWebGpu}};var SILU_SHADER_NAME="pv_picollm_feed_forward_silu_shader";var feedForwardSiluShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = x[global_id.x] / (1.0 + exp(-x[global_id.x]));\n}\n\n".concat(emptyShader);var loadFeedForwardSiluShader=function loadFeedForwardSiluShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff silu bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff silu pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff silu shader module",code:feedForwardSiluShaderSource});var computePipeline=device.createComputePipeline({label:"ff silu pipline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var GELU_SHADER_NAME="pv_picollm_feed_forward_gelu_shader";var feedForwardGeluShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\nconst a1: f32 = 0.254829592;\nconst a2: f32 = -0.284496736;\nconst a3: f32 = 1.421413741;\nconst a4: f32 = -1.453152027;\nconst a5: f32 = 1.061405429;\nconst p: f32 = 0.3275911;\n\n// A&S formula 7.1.26\nfn erf(x: f32) -> f32 { \n var sign: f32 = 1.0;\n if (x < 0) {\n sign = -1.0;\n }\n var x_abs: f32 = abs(x);\n \n let t: f32 = 1.0 / fma(p, x_abs, 1.0);\n let y: f32 = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs);\n\n return sign * y;\n}\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1.0 + erf(x[global_id.x] * 0.7071067811865475));\n}\n\n".concat(emptyShader);var loadFeedForwardGeluShader=function loadFeedForwardGeluShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff gelu bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff gelu pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff gelu shader module",code:feedForwardGeluShaderSource});var computePipeline=device.createComputePipeline({label:"ff gelu pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ALMOST_GELU_SHADER_NAME="pv_picollm_feed_forward_almost_gelu_shader";var feedForwardAlmostGeluShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n x[global_id.x] = 0.5 * x[global_id.x] * (1 + tanh(0.7978845608028654 * (x[global_id.x] + (0.044715f * x[global_id.x] * x[global_id.x] * x[global_id.x]))));\n}\n\n".concat(emptyShader);var loadFeedForwardAlmostGeluShader=function loadFeedForwardAlmostGeluShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff almost gelu bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff almost gelu pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff almost gelu shader module",code:feedForwardAlmostGeluShaderSource});var computePipeline=device.createComputePipeline({label:"ff almost gelu pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var MULTIPLY_BUFFERS_SHADER_NAME="pv_picollm_feed_forward_multiply_buffers_shader";var feedForwardMultiplyBuffersShaderSource="\nstruct argsStruct {\n n: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n y[global_id.x] *= x[global_id.x];\n}\n\n".concat(emptyShader);var loadFeedForwardMultiplyBuffersShader=function loadFeedForwardMultiplyBuffersShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"ff multiply buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"ff multiply buffers pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"ff multiply buffers shader module",code:feedForwardMultiplyBuffersShaderSource});var computePipeline=device.createComputePipeline({label:"ff multiply buffers pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var feedForwardShaders=_defineProperty(_defineProperty(_defineProperty(_defineProperty({},SILU_SHADER_NAME,loadFeedForwardSiluShader),GELU_SHADER_NAME,loadFeedForwardGeluShader),ALMOST_GELU_SHADER_NAME,loadFeedForwardAlmostGeluShader),MULTIPLY_BUFFERS_SHADER_NAME,loadFeedForwardMultiplyBuffersShader);var getPicollmFeedForwardWebGpuFunctions=function getPicollmFeedForwardWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmFeedForwardSiluWebGpu=function pvPicollmFeedForwardSiluWebGpu(objAddress,n,xAddress,statusAddress){var _gpuBuffers$get;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[SILU_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get=gpuBuffers.get(xAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff silu arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff silu bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,SILU_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmFeedForwardGeluWebGpu=function pvPicollmFeedForwardGeluWebGpu(objAddress,n,xAddress,statusAddress){var _gpuBuffers$get2;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[GELU_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get2=gpuBuffers.get(xAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff gelu arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff gelu bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,GELU_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmFeedForwardAlmostGeluWebGpu=function pvPicollmFeedForwardAlmostGeluWebGpu(objAddress,n,xAddress,statusAddress){var _gpuBuffers$get3;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ALMOST_GELU_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get3=gpuBuffers.get(xAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff almost gelu arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff almost gelu bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ALMOST_GELU_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmFeedForwardMultiplyBuffersWebGpu=function pvPicollmFeedForwardMultiplyBuffersWebGpu(objAddress,n,xAddress,yAddress,statusAddress){var _gpuBuffers$get4,_gpuBuffers$get5;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[MULTIPLY_BUFFERS_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get4=gpuBuffers.get(xAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get5=gpuBuffers.get(yAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"ff multiply buffers arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"ff multiply buffers bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,MULTIPLY_BUFFERS_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};return{pv_picollm_feed_forward_silu_webgpu_wasm:pvPicollmFeedForwardSiluWebGpu,pv_picollm_feed_forward_gelu_webgpu_wasm:pvPicollmFeedForwardGeluWebGpu,pv_picollm_feed_forward_almost_gelu_webgpu_wasm:pvPicollmFeedForwardAlmostGeluWebGpu,pv_picollm_feed_forward_multiply_buffers_webgpu_wasm:pvPicollmFeedForwardMultiplyBuffersWebGpu}};var FORWARD_SHADER_NAME$1="pv_picollm_gate_forward_shader";var gateForwardShaderSource="\n\nstruct pv_picollm_gate_ix_t {\n i: u32,\n x: f32,\n}\n\n@group(0) @binding(0)\nvar y: array;\n\n@group(0) @binding(1)\nvar indices: array;\n\n@group(0) @binding(2)\nvar weights: array;\n\noverride n: u32 = 0;\noverride k: u32 = 0;\noverride num_experts: u32 = 0;\n\noverride y_offset: u32 = 0;\noverride indices_offset: u32 = 0;\noverride weights_offset: u32 = 0;\n\nvar ixs: array;\n \noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(global_invocation_id) global_id : vec3\n) {\n if (global_id.x >= n) {\n return;\n }\n \n var y_start: u32 = y_offset + global_id.x * num_experts;\n for (var j = 0u; j < num_experts; j++) {\n ixs[j].i = j;\n ixs[j].x = y[y_start + j];\n }\n\n for (var i = 0u; i < num_experts - 1; i++) {\n for (var j = 0u; j < num_experts - i - 1; j++) {\n if (ixs[j].x < ixs[j + 1].x) {\n let tmp = ixs[j];\n ixs[j] = ixs[j + 1];\n ixs[j + 1] = tmp;\n }\n }\n }\n\n for (var j = 0u; j < k; j++) {\n indices[indices_offset + (global_id.x * k) + j] = ixs[j].i;\n weights[weights_offset + (global_id.x * k) + j] = ixs[j].x;\n }\n\n var max_weight: f32 = weights[weights_offset + (global_id.x * k)];\n for (var j = 1u; j < k; j++) {\n max_weight = max(max_weight, weights[weights_offset + (global_id.x * k) + j]);\n }\n\n var sum_weight: f32 = 0.0;\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] = exp(weights[weights_offset + (global_id.x * k) + j] - max_weight);\n sum_weight += weights[weights_offset + (global_id.x * k) + j];\n }\n\n for (var j = 0u; j < k; j++) {\n weights[weights_offset + (global_id.x * k) + j] /= sum_weight;\n }\n}\n\n".concat(emptyShader);var loadGateForwardShader=function loadGateForwardShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"gate forward bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"gate forward pipeline",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"gate forward shader module",code:gateForwardShaderSource});var computePipeline=device.createComputePipeline({layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{num_experts:1}}});return{computePipeline:computePipeline,pipelineLayout:pipelineLayout,shaderModule:shaderModule}};var gateForwardShader=_defineProperty({},FORWARD_SHADER_NAME$1,loadGateForwardShader);var getPicollmGateWebGpuFunctions=function getPicollmGateWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmGateForwardWebGpu=function pvPicollmGateForwardWebGpu(objAddress,n,k,numExperts,yAddress,yOffset,indicesAddress,indicesOffset,weightsAddress,weightsOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);yAddress=unsignedAddress(yAddress);indicesAddress=unsignedAddress(indicesAddress);weightsAddress=unsignedAddress(weightsAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[FORWARD_SHADER_NAME$1];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var pipeline=obj.device.createComputePipeline({label:"gate forward pipeline",layout:shader.pipelineLayout,compute:{module:shader.shaderModule,entryPoint:shaderEntryPoint,constants:{n:n,k:k,num_experts:numExperts,y_offset:yOffset/4,indices_offset:indicesOffset/4,weights_offset:weightsOffset/4}}});var yBuffer=(_gpuBuffers$get=gpuBuffers.get(yAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var indicesBuffer=(_gpuBuffers$get2=gpuBuffers.get(indicesAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!indicesBuffer){console.error("Indices buffer has not been allocated");setStatus(statusAddress,-1);return}var weightsBuffer=(_gpuBuffers$get3=gpuBuffers.get(weightsAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!weightsBuffer){console.error("Weights buffer has not been allocated");setStatus(statusAddress,-1);return}var bindGroup=obj.device.createBindGroup({label:"gate forward bind group",layout:pipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:yBuffer}},{binding:1,resource:{buffer:indicesBuffer}},{binding:2,resource:{buffer:weightsBuffer}}]});obj.dispatchComputerShader(bindGroup,pipeline,FORWARD_SHADER_NAME$1,n);setStatus(statusAddress,0)};return{pv_picollm_gate_forward_webgpu_wasm:pvPicollmGateForwardWebGpu}};var ADD_TO_BUFFER_SHADER_NAME$1="pv_picollm_moe_transformer_add_to_buffer_shader";var moeTransformerAddToBufferShaderSource="\nstruct argsStruct {\n n: u32, \n x_offset: u32,\n buffer_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n\n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadMoeTransformerAddToBufferShader=function loadMoeTransformerAddToBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"moe transformer add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"moe transformer add to buffer pipeline",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"moe transformer add to buffer shader module",code:moeTransformerAddToBufferShaderSource});var computePipeline=device.createComputePipeline({label:"moe transformer add to buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME="pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_shader";var moeTransformerMultiplyWeightAndToBufferShaderSource="\nstruct argsStruct {\n n: u32, \n weights_index: u32,\n y_index: u32,\n weights_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weights: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + args.y_index + global_id.x] += weights[args.weights_index] + x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadMoeTransformerMultiplyWeightAndAddToBufferShader=function loadMoeTransformerMultiplyWeightAndAddToBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"moe transformer multiply weight and add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"moe transformer multiply weight and add to buffer pipeline",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"moe transformer multiply weight and add to buffer shader module",code:moeTransformerMultiplyWeightAndToBufferShaderSource});var computePipeline=device.createComputePipeline({label:"moe transformer multiply weight and add to buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var ADD_BUFFERS_SHADER_NAME$1="pv_picollm_moe_transformer_add_buffers_shader";var moeTransformerAddBuffersShaderSource="\nstruct argsStruct {\n n: u32, \n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadMoeTransformerAddBuffersShader=function loadMoeTransformerAddBuffersShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"moe transformer add buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"moe transformer add buffers pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"moe transformer add buffers shader module",code:moeTransformerAddBuffersShaderSource});var computePipeline=device.createComputePipeline({label:"moe transformer add buffers pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var moeTransformerForwardShaders=_defineProperty(_defineProperty(_defineProperty({},ADD_TO_BUFFER_SHADER_NAME$1,loadMoeTransformerAddToBufferShader),MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME,loadMoeTransformerMultiplyWeightAndAddToBufferShader),ADD_BUFFERS_SHADER_NAME$1,loadMoeTransformerAddBuffersShader);var getPicollmMoeTransformerWebGpuFunctions=function getPicollmMoeTransformerWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmMoeTransformerAddToBufferWebGpu=function pvPicollmMoeTransformerAddToBufferWebGpu(objAddress,n,xAddress,xOffset,bufferAddress,bufferOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);bufferAddress=unsignedAddress(bufferAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_TO_BUFFER_SHADER_NAME$1];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get=gpuBuffers.get(xAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var bufferBuffer=(_gpuBuffers$get2=gpuBuffers.get(bufferAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!bufferBuffer){console.error("buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"moe transformer add to buffer arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,xOffset,bufferOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"moe transformer add to buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:bufferBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_TO_BUFFER_SHADER_NAME$1,n);setStatus(statusAddress,0)};var pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu=function pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu(objAddress,n,weightsIndex,yIndex,weightsAddress,weightsOffset,xAddress,xOffset,yAddress,yOffset,statusAddress){var _gpuBuffers$get3,_gpuBuffers$get4,_gpuBuffers$get5;objAddress=unsignedAddress(objAddress);weightsAddress=unsignedAddress(weightsAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var weightsBuffer=(_gpuBuffers$get3=gpuBuffers.get(weightsAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!weightsBuffer){console.error("weights has not been allocated");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get4=gpuBuffers.get(xAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!xBuffer){console.error("buffer2 has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get5=gpuBuffers.get(yAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!yBuffer){console.error("y has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(6*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"moe transformer multiply weight and add to buffer arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,weightsIndex,yIndex,weightsOffset,xOffset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"moe transformer multiply weight and add to buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightsBuffer}},{binding:2,resource:{buffer:xBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,MULTIPLY_WEIGHT_AND_ADD_TO_BUFFER_SHADER_NAME,n);setStatus(statusAddress,0)};var pvPicollmMoeTransformerAddBuffersWebGpu=function pvPicollmMoeTransformerAddBuffersWebGpu(objAddress,n,buffer1Address,buffer1Offset,buffer2Address,buffer2Offset,yAddress,yOffset,statusAddress){var _gpuBuffers$get6,_gpuBuffers$get7,_gpuBuffers$get8;objAddress=unsignedAddress(objAddress);buffer1Address=unsignedAddress(buffer1Address);buffer2Address=unsignedAddress(buffer2Address);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_BUFFERS_SHADER_NAME$1];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var buffer1Buffer=(_gpuBuffers$get6=gpuBuffers.get(buffer1Address))===null||_gpuBuffers$get6===void 0?void 0:_gpuBuffers$get6.buffer;if(!buffer1Buffer){console.error("buffer1 has not been allocated");setStatus(statusAddress,-1);return}var buffer2Buffer=(_gpuBuffers$get7=gpuBuffers.get(buffer2Address))===null||_gpuBuffers$get7===void 0?void 0:_gpuBuffers$get7.buffer;if(!buffer2Buffer){console.error("buffer2 has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get8=gpuBuffers.get(yAddress))===null||_gpuBuffers$get8===void 0?void 0:_gpuBuffers$get8.buffer;if(!yBuffer){console.error("y has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"moe transformer add buffers arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,buffer1Offset,buffer2Offset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"moe transformer add buffers bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:buffer1Buffer}},{binding:2,resource:{buffer:buffer2Buffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_BUFFERS_SHADER_NAME$1,n);setStatus(statusAddress,0)};return{pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm:pvPicollmMoeTransformerAddToBufferWebGpu,pv_picollm_moe_transformer_add_buffers_webgpu_wasm:pvPicollmMoeTransformerAddBuffersWebGpu,pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm:pvPicollmMoeTransformerMultiplyWeightAndAddToBufferWebGpu}};var sdataReduce="\n for (var s: u32 = workgroup_size_x / 2; s > 0; s >>= 1) {\n if tid < s {\n sdata[tid] += sdata[tid + s];\n }\n workgroupBarrier();\n }\n";var dividePadFunction="\n fn divide_pad(a: u32, b: u32) -> u32 { \n return (a + b - 1) / b;\n }\n";var FORWARD_MULTI_BUFFER_SHADER_NAME$1="pv_picollm_norm_forward_multi_buffer_shader";var FORWARD_SINGLE_BUFFER_SHADER_NAME$1="pv_picollm_norm_forward_single_buffer_shader";var normForwardShaderSource=function normForwardShaderSource(isMulti){return"\nstruct argsStruct {\n n: u32,\n dimension: u32,\n remainder: u32,\n remainder_start: u32, \n eps: f32, \n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n".concat(isMulti?"\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3) \nvar y: array;\n":" \n@group(0) @binding(2)\nvar x: array;\n","\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\nvar sdata: array, workgroup_size_x>;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n let tid = local_id.x;\n let m = workgroup_id.x;\n let block_size = workgroup_size_x;\n \n var power_vec: vec4;\n let x_start: u32 = args.x_offset + (m * args.dimension);\n let skip = tid * 4;\n let shift = (block_size * 4);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = x_start + j + skip; \n\n let x_vec = vec4(\n x[local_index],\n x[local_index + 1],\n x[local_index + 2],\n x[local_index + 3]);\n \n power_vec += x_vec * x_vec; \n } \n \n if (tid == 0 && args.remainder > 0) {\n var remainder_vec = vec4(0.0, 0.0, 0.0, 0.0);\n let x_idx = x_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) { \n remainder_vec[j] = x[x_idx + j];\n } \n power_vec += remainder_vec * remainder_vec;\n }\n \n sdata[tid] = power_vec;\n workgroupBarrier();\n\n ").concat(sdataReduce,"\n \n let power = sdata[0].x + sdata[0].y + sdata[0].z + sdata[0].w;\n let norm: vec4 = vec4(1.0 / sqrt((power / f32(args.dimension)) + args.eps));\n \n let y_start: u32 = args.y_offset + (m * args.dimension);\n for (var j = 0u; j + skip < args.remainder_start; j += shift) {\n let local_index = j + skip;\n let x_idx = x_start + local_index;\n let x_vec = vec4(\n x[x_idx],\n x[x_idx + 1],\n x[x_idx + 2],\n x[x_idx + 3]);\n \n let weight_vec = vec4(\n weight[local_index],\n weight[local_index + 1],\n weight[local_index + 2],\n weight[local_index + 3]);\n let y_vec = x_vec * norm * weight_vec;\n \n let y_idx = y_start + local_index;\n").concat(isMulti?" \n y[y_idx] = y_vec.x;\n y[y_idx + 1] = y_vec.y;\n y[y_idx + 2] = y_vec.z;\n y[y_idx + 3] = y_vec.w;\n":" \n x[y_idx] = y_vec.x;\n x[y_idx + 1] = y_vec.y;\n x[y_idx + 2] = y_vec.z;\n x[y_idx + 3] = y_vec.w;\n"," \n }\n \n if (tid == 0 && args.remainder > 0) {\n let x_idx = x_start + args.remainder_start;\n let weight_idx = args.remainder_start; \n let y_idx = y_start + args.remainder_start;\n for (var j = 0u; j < args.remainder; j++) {\n").concat(isMulti?" \n y[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n":" \n x[y_idx + j] = x[x_idx + j] * norm[j] * weight[weight_idx + j];\n"," \n } \n }\n}\n\n").concat(emptyShader)};var loadNormForwardShader=function loadNormForwardShader(device,isMulti){var entries=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}}];if(isMulti){entries.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}});entries.push({binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}})}else{entries.push({binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}})}var bindGroupLayout=device.createBindGroupLayout({label:"norm forward ".concat(isMulti?"multi":"single"," buffer bind group layout"),entries:entries});var pipelineLayout=device.createPipelineLayout({label:"norm forward ".concat(isMulti?"multi":"single"," buffer pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"norm forward ".concat(isMulti?"multi":"single"," buffer shader module"),code:normForwardShaderSource(isMulti)});var computePipeline=device.createComputePipeline({label:"norm forward ".concat(isMulti?"multi":"single"," buffer pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var normForwardShader=_defineProperty(_defineProperty({},FORWARD_SINGLE_BUFFER_SHADER_NAME$1,function(device){return loadNormForwardShader(device,false)}),FORWARD_MULTI_BUFFER_SHADER_NAME$1,function(device){return loadNormForwardShader(device,true)});var getPicollmNormWebGpuFunctions=function getPicollmNormWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmNormForwardWebGpu=function pvPicollmNormForwardWebGpu(objAddress,dimension,eps,weightAddress,n,xOffset,xAddress,yOffset,yAddress,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2;objAddress=unsignedAddress(objAddress);weightAddress=unsignedAddress(weightAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shaderName=xAddress===yAddress?FORWARD_SINGLE_BUFFER_SHADER_NAME$1:FORWARD_MULTI_BUFFER_SHADER_NAME$1;var shader=obj.shaders[shaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var weightBuffer=(_gpuBuffers$get=gpuBuffers.get(weightAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!weightBuffer){console.error("Weight buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get2=gpuBuffers.get(yAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var remainder=dimension%4;var remainder_start=dimension-remainder;var argsBuffer=obj.getBuffer(7*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"norm forward multi buffer arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,n,true);view.setUint32(4,dimension,true);view.setUint32(8,remainder,true);view.setUint32(12,remainder_start,true);view.setFloat32(16,eps,true);view.setUint32(20,xOffset/4,true);view.setUint32(24,yOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var entries=[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightBuffer}}];if(xAddress===yAddress){entries.push({binding:2,resource:{buffer:yBuffer}})}else{var _gpuBuffers$get3;var xBuffer=(_gpuBuffers$get3=gpuBuffers.get(xAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}entries.push({binding:2,resource:{buffer:xBuffer}});entries.push({binding:3,resource:{buffer:yBuffer}})}var bindGroup=obj.device.createBindGroup({label:"norm forward ".concat(xAddress===yAddress?"single":"multi"," buffer bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:entries});obj.dispatchComputerShader(bindGroup,shader.computePipeline,shaderName,n);setStatus(statusAddress,0)};return{pv_picollm_norm_forward_webgpu_wasm:pvPicollmNormForwardWebGpu}};var FORWARD_MULTI_BUFFER_SHADER_NAME="pv_picollm_norm_layer_forward_multi_buffer_shader";var normLayerForwardMultiBufferShaderSource="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar x: array;\n\n@group(0) @binding(4)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += x[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (x[x_start + j] - mean) * (x[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((x[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader);var loadNormLayerForwardMultiBufferShader=function loadNormLayerForwardMultiBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"norm layer forward multi buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"norm layer forward multi buffer pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"norm layer forward multi buffer shader module",code:normLayerForwardMultiBufferShaderSource});var computePipeline=device.createComputePipeline({label:"norm layer forward multi buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var FORWARD_SINGLE_BUFFER_SHADER_NAME="pv_picollm_norm_layer_forward_single_buffer_shader";var normLayerForwardSingleBufferShaderSource="\nstruct argsStruct {\n n: u32,\n dimension: u32,\n eps: f32,\n weight_offset: u32,\n bias_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar weight: array;\n\n@group(0) @binding(2)\nvar bias: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n let x_start: u32 = args.x_offset + (global_id.x * args.dimension);\n\n var mean: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean += y[x_start + j];\n }\n mean /= f32(args.dimension);\n\n var mean2: f32 = 0.0;\n for (var j = 0u; j < args.dimension; j++) {\n mean2 += (y[x_start + j] - mean) * (y[x_start + j] - mean);\n }\n mean2 /= f32(args.dimension);\n\n var norm: f32 = 1.0 / sqrt(mean2 + args.eps);\n\n var y_start = args.y_offset + (global_id.x * args.dimension);\n for (var j = 0u; j < args.dimension; j++) {\n y[y_start + j] = ((y[x_start + j] - mean) * norm * weight[args.weight_offset + j]) + bias[args.bias_offset + j];\n }\n}\n\n".concat(emptyShader);var loadNormLayerForwardSingleBufferShader=function loadNormLayerForwardSingleBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"norm layer forward single buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"norm layer forward single buffer pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"norm layer forward single buffer shader module",code:normLayerForwardSingleBufferShaderSource});var computePipeline=device.createComputePipeline({label:"norm layer forward single buffer pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var normLayerForwardShader=_defineProperty(_defineProperty({},FORWARD_SINGLE_BUFFER_SHADER_NAME,loadNormLayerForwardSingleBufferShader),FORWARD_MULTI_BUFFER_SHADER_NAME,loadNormLayerForwardMultiBufferShader);var getPicollmNormLayerWebGpuFunctions=function getPicollmNormLayerWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmNormLayerForwardWebGpu=function pvPicollmNormLayerForwardWebGpu(objAddress,dimension,eps,weightAddress,weightOffset,biasAddress,biasOffset,n,xAddress,xOffset,yAddress,yOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);weightAddress=unsignedAddress(weightAddress);biasAddress=unsignedAddress(biasAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shaderName=xAddress===yAddress?FORWARD_SINGLE_BUFFER_SHADER_NAME:FORWARD_MULTI_BUFFER_SHADER_NAME;var shader=obj.shaders[shaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var weightBuffer=(_gpuBuffers$get=gpuBuffers.get(weightAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!weightBuffer){console.error("weight buffer has not been allocated");setStatus(statusAddress,-1);return}var biasBuffer=(_gpuBuffers$get2=gpuBuffers.get(biasAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!biasBuffer){console.error("bias buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get3=gpuBuffers.get(yAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(7*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"norm layer forward multi buffer arg buffer");var buffer=new ArrayBuffer(argsBuffer.size);var view=new DataView(buffer);view.setUint32(0,n,true);view.setUint32(4,dimension,true);view.setFloat32(8,eps,true);view.setUint32(12,weightOffset/4,true);view.setUint32(16,biasOffset/4,true);view.setUint32(20,xOffset/4,true);view.setUint32(24,yOffset/4,true);obj.device.queue.writeBuffer(argsBuffer,0,buffer);obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup;if(xAddress===yAddress){bindGroup=obj.device.createBindGroup({label:"norm layer forward single buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightBuffer}},{binding:2,resource:{buffer:biasBuffer}},{binding:3,resource:{buffer:yBuffer}}]})}else{var _gpuBuffers$get4;var xBuffer=(_gpuBuffers$get4=gpuBuffers.get(xAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}bindGroup=obj.device.createBindGroup({label:"norm layer forward multi buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:weightBuffer}},{binding:2,resource:{buffer:biasBuffer}},{binding:3,resource:{buffer:xBuffer}},{binding:4,resource:{buffer:yBuffer}}]})}obj.dispatchComputerShader(bindGroup,shader.computePipeline,shaderName,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};return{pv_picollm_norm_layer_forward_webgpu_wasm:pvPicollmNormLayerForwardWebGpu}};var ADD_TO_BUFFER_SHADER_NAME="pv_picollm_transformer_add_to_buffer_shader";var transformerAddToBufferShaderSource="\nstruct argsStruct {\n n: u32,\n x_offset: u32,\n buffer_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar buffer: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n buffer[args.buffer_offset + global_id.x] += x[args.x_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadTransformerAddToBufferShader=function loadTransformerAddToBufferShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"transformer add to buffer bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"transformer add to buffer pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"transformer add to buffer shader module",code:transformerAddToBufferShaderSource});var computePipeline=device.createComputePipeline({label:"transformer add to buffer compute",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var ADD_BUFFERS_SHADER_NAME="pv_picollm_transformer_add_buffers_shader";var transformerAddBuffersShaderSource="\n\nstruct argsStruct {\n n: u32,\n buffer1_offset: u32,\n buffer2_offset: u32,\n y_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar buffer1: array;\n\n@group(0) @binding(2)\nvar buffer2: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n) {\n return;\n }\n \n y[args.y_offset + global_id.x] = buffer1[args.buffer1_offset + global_id.x] + buffer2[args.buffer2_offset + global_id.x]; \n}\n\n".concat(emptyShader);var loadTransformerAddBuffersShader=function loadTransformerAddBuffersShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"transformer add buffers bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"transformer add buffers pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"transformer add buffers shader module",code:transformerAddBuffersShaderSource});var computePipeline=device.createComputePipeline({label:"transformer add buffers pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var transformerForwardShaders=_defineProperty(_defineProperty({},ADD_TO_BUFFER_SHADER_NAME,loadTransformerAddToBufferShader),ADD_BUFFERS_SHADER_NAME,loadTransformerAddBuffersShader);var getPicollmTransformerWebGpuFunctions=function getPicollmTransformerWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmTransformerAddToBufferWebGpu=function pvPicollmTransformerAddToBufferWebGpu(objAddress,n,xAddress,xOffset,bufferAddress,bufferOffset,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);bufferAddress=unsignedAddress(bufferAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_TO_BUFFER_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get=gpuBuffers.get(xAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!xBuffer){console.error("x buffer has not been allocated");setStatus(statusAddress,-1);return}var bufferBuffer=(_gpuBuffers$get2=gpuBuffers.get(bufferAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!bufferBuffer){console.error("buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"transformer add to buffer arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,xOffset,bufferOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"transformer add to buffer bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:bufferBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_TO_BUFFER_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmTransformerAddBuffersWebGpu=function pvPicollmTransformerAddBuffersWebGpu(objAddress,n,buffer1Address,buffer1Offset,buffer2Address,buffer2Offset,yAddress,yOffset,statusAddress){var _gpuBuffers$get3,_gpuBuffers$get4,_gpuBuffers$get5;objAddress=unsignedAddress(objAddress);buffer1Address=unsignedAddress(buffer1Address);buffer2Address=unsignedAddress(buffer2Address);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[ADD_BUFFERS_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var buffer1Buffer=(_gpuBuffers$get3=gpuBuffers.get(buffer1Address))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!buffer1Buffer){console.error("buffer1 has not been allocated");setStatus(statusAddress,-1);return}var buffer2Buffer=(_gpuBuffers$get4=gpuBuffers.get(buffer2Address))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!buffer2Buffer){console.error("buffer2 has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get5=gpuBuffers.get(yAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!yBuffer){console.error("y has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"transformer add buffers arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,buffer1Offset,buffer2Offset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"transformer add buffers bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:buffer1Buffer}},{binding:2,resource:{buffer:buffer2Buffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,ADD_BUFFERS_SHADER_NAME,Math.ceil(n/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};return{pv_picollm_transformer_add_to_buffer_webgpu_wasm:pvPicollmTransformerAddToBufferWebGpu,pv_picollm_transformer_add_buffers_webgpu_wasm:pvPicollmTransformerAddBuffersWebGpu}};var FORWARD_SHADER_NAME="pv_picollm_weight_float_forward_shader";var weightFloatForwardShaderSource="\n\nstruct argsStruct {\n nr: u32,\n nc: u32,\n w_offset: u32,\n x_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar w: array;\n\n@group(0) @binding(2)\nvar x: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(workgroup_id) workgroup_id : vec3,\n @builtin(local_invocation_id) local_id : vec3\n) {\n if (local_id.x >= args.nr) {\n return;\n }\n let x_start: u32 = args.x_offset + (workgroup_id.x * args.nc);\n let y_idx: u32 = local_id.x + args.y_offset + (workgroup_id.x * args.nr);\n \n let w_start: u32 = args.w_offset + (local_id.x * args.nc);\n for (var j = 0u; j < args.nc; j++) {\n y[y_idx] += w[w_start + j] * x[x_start + j]; \n }\n}\n\n".concat(emptyShader);var loadWeightFloatForwardShader=function loadWeightFloatForwardShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight float forward bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight float forward pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight float forward shader module",code:weightFloatForwardShaderSource});var computePipeline=device.createComputePipeline({label:"weight float forward pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var weightFloatForwardShader=_defineProperty({},FORWARD_SHADER_NAME,loadWeightFloatForwardShader);var getPicollmWeightFloatWebGpuFunctions=function getPicollmWeightFloatWebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmWeightFloatForwardWebGpu=function pvPicollmWeightFloatForwardWebGpu(objAddress,n,nc,nr,wOffset,wAddress,xOffset,xAddress,yOffset,yAddress,statusAddress){var _gpuBuffers$get,_gpuBuffers$get2,_gpuBuffers$get3;objAddress=unsignedAddress(objAddress);wAddress=unsignedAddress(wAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[FORWARD_SHADER_NAME];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var wBuffer=(_gpuBuffers$get=gpuBuffers.get(wAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!wBuffer){console.error("W buffer has not been allocated");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get2=gpuBuffers.get(xAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get3=gpuBuffers.get(yAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(5*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight float forward arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([nr,nc,wOffset,xOffset,yOffset]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight float forward bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:wBuffer}},{binding:2,resource:{buffer:xBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,FORWARD_SHADER_NAME,n);setStatus(statusAddress,0)};return{pv_picollm_weight_float_forward_webgpu_wasm:pvPicollmWeightFloatForwardWebGpu}};var rowsPerBlock=16;var columnsPerBlock=8;var preprocessDim=16;var weightBlockSize=256;var unpackBlock128BitDepth3="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_3(packed_offset: u32) {\n let val_0 = blocks[packed_offset]; \n unpacked[0] = extractBits(val_0, 0u, 3u);\n unpacked[1] = extractBits(val_0, 3u, 3u);\n unpacked[2] = extractBits(val_0, 6u, 3u);\n unpacked[3] = extractBits(val_0, 9u, 3u);\n unpacked[4] = extractBits(val_0, 12u, 3u);\n unpacked[5] = extractBits(val_0, 15u, 3u);\n unpacked[6] = extractBits(val_0, 18u, 3u);\n unpacked[7] = extractBits(val_0, 21u, 3u);\n unpacked[8] = extractBits(val_0, 24u, 3u);\n unpacked[9] = extractBits(val_0, 27u, 3u);\n unpacked[10] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_1, 0u, 1u), 2u, 1u);\n unpacked[11] = extractBits(val_1, 1u, 3u);\n unpacked[12] = extractBits(val_1, 4u, 3u);\n unpacked[13] = extractBits(val_1, 7u, 3u);\n unpacked[14] = extractBits(val_1, 10u, 3u);\n unpacked[15] = extractBits(val_1, 13u, 3u);\n unpacked[16] = extractBits(val_1, 16u, 3u);\n unpacked[17] = extractBits(val_1, 19u, 3u);\n unpacked[18] = extractBits(val_1, 22u, 3u);\n unpacked[19] = extractBits(val_1, 25u, 3u);\n unpacked[20] = extractBits(val_1, 28u, 3u);\n unpacked[21] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_2, 0u, 2u), 1u, 2u); \n unpacked[22] = extractBits(val_2, 2u, 3u);\n unpacked[23] = extractBits(val_2, 5u, 3u);\n unpacked[24] = extractBits(val_2, 8u, 3u);\n unpacked[25] = extractBits(val_2, 11u, 3u);\n unpacked[26] = extractBits(val_2, 14u, 3u);\n unpacked[27] = extractBits(val_2, 17u, 3u);\n unpacked[28] = extractBits(val_2, 20u, 3u);\n unpacked[29] = extractBits(val_2, 23u, 3u);\n unpacked[30] = extractBits(val_2, 26u, 3u);\n unpacked[31] = extractBits(val_2, 29u, 3u);\n \n let val_3 = blocks[packed_offset + 3]; \n unpacked[32] = extractBits(val_3, 0u, 3u);\n unpacked[33] = extractBits(val_3, 3u, 3u);\n unpacked[34] = extractBits(val_3, 6u, 3u);\n unpacked[35] = extractBits(val_3, 9u, 3u);\n unpacked[36] = extractBits(val_3, 12u, 3u);\n unpacked[37] = extractBits(val_3, 15u, 3u);\n unpacked[38] = extractBits(val_3, 18u, 3u);\n unpacked[39] = extractBits(val_3, 21u, 3u);\n unpacked[40] = extractBits(val_3, 24u, 3u);\n unpacked[41] = extractBits(val_3, 27u, 3u);\n unpacked[42] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_4, 0u, 1u), 2u, 1u);\n unpacked[43] = extractBits(val_4, 1u, 3u);\n unpacked[44] = extractBits(val_4, 4u, 3u);\n unpacked[45] = extractBits(val_4, 7u, 3u);\n unpacked[46] = extractBits(val_4, 10u, 3u);\n unpacked[47] = extractBits(val_4, 13u, 3u);\n unpacked[48] = extractBits(val_4, 16u, 3u);\n unpacked[49] = extractBits(val_4, 19u, 3u);\n unpacked[50] = extractBits(val_4, 22u, 3u);\n unpacked[51] = extractBits(val_4, 25u, 3u);\n unpacked[52] = extractBits(val_4, 28u, 3u);\n unpacked[53] = extractBits(val_4, 31u, 1u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_5, 0u, 2u), 1u, 2u); \n unpacked[54] = extractBits(val_5, 2u, 3u);\n unpacked[55] = extractBits(val_5, 5u, 3u);\n unpacked[56] = extractBits(val_5, 8u, 3u);\n unpacked[57] = extractBits(val_5, 11u, 3u);\n unpacked[58] = extractBits(val_5, 14u, 3u);\n unpacked[59] = extractBits(val_5, 17u, 3u);\n unpacked[60] = extractBits(val_5, 20u, 3u);\n unpacked[61] = extractBits(val_5, 23u, 3u);\n unpacked[62] = extractBits(val_5, 26u, 3u);\n unpacked[63] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[64] = extractBits(val_6, 0u, 3u);\n unpacked[65] = extractBits(val_6, 3u, 3u);\n unpacked[66] = extractBits(val_6, 6u, 3u);\n unpacked[67] = extractBits(val_6, 9u, 3u);\n unpacked[68] = extractBits(val_6, 12u, 3u);\n unpacked[69] = extractBits(val_6, 15u, 3u);\n unpacked[70] = extractBits(val_6, 18u, 3u);\n unpacked[71] = extractBits(val_6, 21u, 3u);\n unpacked[72] = extractBits(val_6, 24u, 3u);\n unpacked[73] = extractBits(val_6, 27u, 3u);\n unpacked[74] = extractBits(val_6, 30u, 2u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_7, 0u, 1u), 2u, 1u);\n unpacked[75] = extractBits(val_7, 1u, 3u);\n unpacked[76] = extractBits(val_7, 4u, 3u);\n unpacked[77] = extractBits(val_7, 7u, 3u);\n unpacked[78] = extractBits(val_7, 10u, 3u);\n unpacked[79] = extractBits(val_7, 13u, 3u);\n unpacked[80] = extractBits(val_7, 16u, 3u);\n unpacked[81] = extractBits(val_7, 19u, 3u);\n unpacked[82] = extractBits(val_7, 22u, 3u);\n unpacked[83] = extractBits(val_7, 25u, 3u);\n unpacked[84] = extractBits(val_7, 28u, 3u);\n unpacked[85] = extractBits(val_7, 31u, 1u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_8, 0u, 2u), 1u, 2u); \n unpacked[86] = extractBits(val_8, 2u, 3u);\n unpacked[87] = extractBits(val_8, 5u, 3u);\n unpacked[88] = extractBits(val_8, 8u, 3u);\n unpacked[89] = extractBits(val_8, 11u, 3u);\n unpacked[90] = extractBits(val_8, 14u, 3u);\n unpacked[91] = extractBits(val_8, 17u, 3u);\n unpacked[92] = extractBits(val_8, 20u, 3u);\n unpacked[93] = extractBits(val_8, 23u, 3u);\n unpacked[94] = extractBits(val_8, 26u, 3u);\n unpacked[95] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[96] = extractBits(val_9, 0u, 3u);\n unpacked[97] = extractBits(val_9, 3u, 3u);\n unpacked[98] = extractBits(val_9, 6u, 3u);\n unpacked[99] = extractBits(val_9, 9u, 3u);\n unpacked[100] = extractBits(val_9, 12u, 3u);\n unpacked[101] = extractBits(val_9, 15u, 3u);\n unpacked[102] = extractBits(val_9, 18u, 3u);\n unpacked[103] = extractBits(val_9, 21u, 3u);\n unpacked[104] = extractBits(val_9, 24u, 3u);\n unpacked[105] = extractBits(val_9, 27u, 3u);\n unpacked[106] = extractBits(val_9, 30u, 2u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_10, 0u, 1u), 2u, 1u);\n unpacked[107] = extractBits(val_10, 1u, 3u);\n unpacked[108] = extractBits(val_10, 4u, 3u);\n unpacked[109] = extractBits(val_10, 7u, 3u);\n unpacked[110] = extractBits(val_10, 10u, 3u);\n unpacked[111] = extractBits(val_10, 13u, 3u);\n unpacked[112] = extractBits(val_10, 16u, 3u);\n unpacked[113] = extractBits(val_10, 19u, 3u);\n unpacked[114] = extractBits(val_10, 22u, 3u);\n unpacked[115] = extractBits(val_10, 25u, 3u);\n unpacked[116] = extractBits(val_10, 28u, 3u);\n unpacked[117] = extractBits(val_10, 31u, 1u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_11, 0u, 2u), 1u, 2u); \n unpacked[118] = extractBits(val_11, 2u, 3u);\n unpacked[119] = extractBits(val_11, 5u, 3u);\n unpacked[120] = extractBits(val_11, 8u, 3u);\n unpacked[121] = extractBits(val_11, 11u, 3u);\n unpacked[122] = extractBits(val_11, 14u, 3u);\n unpacked[123] = extractBits(val_11, 17u, 3u);\n unpacked[124] = extractBits(val_11, 20u, 3u);\n unpacked[125] = extractBits(val_11, 23u, 3u);\n unpacked[126] = extractBits(val_11, 26u, 3u);\n unpacked[127] = extractBits(val_11, 29u, 3u);\n}\n";var unpackBlock128BitDepth5="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_5(packed_offset: u32) { \n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 5u);\n unpacked[1] = extractBits(val_0, 5u, 5u);\n unpacked[2] = extractBits(val_0, 10u, 5u);\n unpacked[3] = extractBits(val_0, 15u, 5u);\n unpacked[4] = extractBits(val_0, 20u, 5u);\n unpacked[5] = extractBits(val_0, 25u, 5u);\n unpacked[6] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[6] = insertBits(unpacked[6], extractBits(val_1, 0u, 3u), 2u, 3u); \n unpacked[7] = extractBits(val_1, 3u, 5u);\n unpacked[8] = extractBits(val_1, 8u, 5u);\n unpacked[9] = extractBits(val_1, 13u, 5u);\n unpacked[10] = extractBits(val_1, 18u, 5u);\n unpacked[11] = extractBits(val_1, 23u, 5u);\n unpacked[12] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[12] = insertBits(unpacked[12], extractBits(val_2, 0u, 1u), 4u, 1u);\n unpacked[13] = extractBits(val_2, 1u, 5u);\n unpacked[14] = extractBits(val_2, 6u, 5u);\n unpacked[15] = extractBits(val_2, 11u, 5u);\n unpacked[16] = extractBits(val_2, 16u, 5u);\n unpacked[17] = extractBits(val_2, 21u, 5u);\n unpacked[18] = extractBits(val_2, 26u, 5u);\n unpacked[19] = extractBits(val_2, 31u, 1u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[19] = insertBits(unpacked[19], extractBits(val_3, 0u, 4u), 1u, 4u);\n unpacked[20] = extractBits(val_3, 4u, 5u);\n unpacked[21] = extractBits(val_3, 9u, 5u);\n unpacked[22] = extractBits(val_3, 14u, 5u);\n unpacked[23] = extractBits(val_3, 19u, 5u);\n unpacked[24] = extractBits(val_3, 24u, 5u);\n unpacked[25] = extractBits(val_3, 29u, 3u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[25] = insertBits(unpacked[25], extractBits(val_4, 0u, 2u), 3u, 2u);\n unpacked[26] = extractBits(val_4, 2u, 5u);\n unpacked[27] = extractBits(val_4, 7u, 5u);\n unpacked[28] = extractBits(val_4, 12u, 5u);\n unpacked[29] = extractBits(val_4, 17u, 5u);\n unpacked[30] = extractBits(val_4, 22u, 5u);\n unpacked[31] = extractBits(val_4, 27u, 5u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[32] = extractBits(val_5, 0u, 5u);\n unpacked[33] = extractBits(val_5, 5u, 5u);\n unpacked[34] = extractBits(val_5, 10u, 5u);\n unpacked[35] = extractBits(val_5, 15u, 5u);\n unpacked[36] = extractBits(val_5, 20u, 5u);\n unpacked[37] = extractBits(val_5, 25u, 5u);\n unpacked[38] = extractBits(val_5, 30u, 2u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[38] = insertBits(unpacked[38], extractBits(val_6, 0u, 3u), 2u, 3u);\n unpacked[39] = extractBits(val_6, 3u, 5u);\n unpacked[40] = extractBits(val_6, 8u, 5u);\n unpacked[41] = extractBits(val_6, 13u, 5u);\n unpacked[42] = extractBits(val_6, 18u, 5u);\n unpacked[43] = extractBits(val_6, 23u, 5u);\n unpacked[44] = extractBits(val_6, 28u, 4u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[44] = insertBits(unpacked[44], extractBits(val_7, 0u, 1u), 4u, 1u);\n unpacked[45] = extractBits(val_7, 1u, 5u);\n unpacked[46] = extractBits(val_7, 6u, 5u);\n unpacked[47] = extractBits(val_7, 11u, 5u);\n unpacked[48] = extractBits(val_7, 16u, 5u);\n unpacked[49] = extractBits(val_7, 21u, 5u);\n unpacked[50] = extractBits(val_7, 26u, 5u);\n unpacked[51] = extractBits(val_7, 31u, 1u);\n\n let val_8 = blocks[packed_offset + 8];\n unpacked[51] = insertBits(unpacked[51], extractBits(val_8, 0u, 4u), 1u, 4u);\n unpacked[52] = extractBits(val_8, 4u, 5u);\n unpacked[53] = extractBits(val_8, 9u, 5u);\n unpacked[54] = extractBits(val_8, 14u, 5u);\n unpacked[55] = extractBits(val_8, 19u, 5u);\n unpacked[56] = extractBits(val_8, 24u, 5u);\n unpacked[57] = extractBits(val_8, 29u, 3u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[57] = insertBits(unpacked[57], extractBits(val_9, 0u, 2u), 3u, 2u);\n unpacked[58] = extractBits(val_9, 2u, 5u);\n unpacked[59] = extractBits(val_9, 7u, 5u);\n unpacked[60] = extractBits(val_9, 12u, 5u);\n unpacked[61] = extractBits(val_9, 17u, 5u);\n unpacked[62] = extractBits(val_9, 22u, 5u);\n unpacked[63] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[64] = extractBits(val_10, 0u, 5u);\n unpacked[65] = extractBits(val_10, 5u, 5u);\n unpacked[66] = extractBits(val_10, 10u, 5u);\n unpacked[67] = extractBits(val_10, 15u, 5u);\n unpacked[68] = extractBits(val_10, 20u, 5u);\n unpacked[69] = extractBits(val_10, 25u, 5u);\n unpacked[70] = extractBits(val_10, 30u, 2u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[70] = insertBits(unpacked[70], extractBits(val_11, 0u, 3u), 2u, 3u);\n unpacked[71] = extractBits(val_11, 3u, 5u);\n unpacked[72] = extractBits(val_11, 8u, 5u);\n unpacked[73] = extractBits(val_11, 13u, 5u);\n unpacked[74] = extractBits(val_11, 18u, 5u);\n unpacked[75] = extractBits(val_11, 23u, 5u);\n unpacked[76] = extractBits(val_11, 28u, 4u);\n\n let val_12 = blocks[packed_offset + 12];\n unpacked[76] = insertBits(unpacked[76], extractBits(val_12, 0u, 1u), 4u, 1u);\n unpacked[77] = extractBits(val_12, 1u, 5u);\n unpacked[78] = extractBits(val_12, 6u, 5u);\n unpacked[79] = extractBits(val_12, 11u, 5u);\n unpacked[80] = extractBits(val_12, 16u, 5u);\n unpacked[81] = extractBits(val_12, 21u, 5u);\n unpacked[82] = extractBits(val_12, 26u, 5u);\n unpacked[83] = extractBits(val_12, 31u, 1u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[83] = insertBits(unpacked[83], extractBits(val_13, 0u, 4u), 1u, 4u);\n unpacked[84] = extractBits(val_13, 4u, 5u);\n unpacked[85] = extractBits(val_13, 9u, 5u);\n unpacked[86] = extractBits(val_13, 14u, 5u);\n unpacked[87] = extractBits(val_13, 19u, 5u);\n unpacked[88] = extractBits(val_13, 24u, 5u);\n unpacked[89] = extractBits(val_13, 29u, 3u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[89] = insertBits(unpacked[89], extractBits(val_14, 0u, 2u), 3u, 2u);\n unpacked[90] = extractBits(val_14, 2u, 5u);\n unpacked[91] = extractBits(val_14, 7u, 5u);\n unpacked[92] = extractBits(val_14, 12u, 5u);\n unpacked[93] = extractBits(val_14, 17u, 5u);\n unpacked[94] = extractBits(val_14, 22u, 5u);\n unpacked[95] = extractBits(val_14, 27u, 5u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[96] = extractBits(val_15, 0u, 5u);\n unpacked[97] = extractBits(val_15, 5u, 5u);\n unpacked[98] = extractBits(val_15, 10u, 5u);\n unpacked[99] = extractBits(val_15, 15u, 5u);\n unpacked[100] = extractBits(val_15, 20u, 5u);\n unpacked[101] = extractBits(val_15, 25u, 5u);\n unpacked[102] = extractBits(val_15, 30u, 2u);\n\n let val_16 = blocks[packed_offset + 16];\n unpacked[102] = insertBits(unpacked[102], extractBits(val_16, 0u, 3u), 2u, 3u);\n unpacked[103] = extractBits(val_16, 3u, 5u);\n unpacked[104] = extractBits(val_16, 8u, 5u);\n unpacked[105] = extractBits(val_16, 13u, 5u);\n unpacked[106] = extractBits(val_16, 18u, 5u);\n unpacked[107] = extractBits(val_16, 23u, 5u);\n unpacked[108] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[108] = insertBits(unpacked[108], extractBits(val_17, 0u, 1u), 4u, 1u);\n unpacked[109] = extractBits(val_17, 1u, 5u);\n unpacked[110] = extractBits(val_17, 6u, 5u);\n unpacked[111] = extractBits(val_17, 11u, 5u);\n unpacked[112] = extractBits(val_17, 16u, 5u);\n unpacked[113] = extractBits(val_17, 21u, 5u);\n unpacked[114] = extractBits(val_17, 26u, 5u);\n unpacked[115] = extractBits(val_17, 31u, 1u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[115] = insertBits(unpacked[115], extractBits(val_18, 0u, 4u), 1u, 4u);\n unpacked[116] = extractBits(val_18, 4u, 5u);\n unpacked[117] = extractBits(val_18, 9u, 5u);\n unpacked[118] = extractBits(val_18, 14u, 5u);\n unpacked[119] = extractBits(val_18, 19u, 5u);\n unpacked[120] = extractBits(val_18, 24u, 5u);\n unpacked[121] = extractBits(val_18, 29u, 3u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[121] = insertBits(unpacked[121], extractBits(val_19, 0u, 2u), 3u, 2u);\n unpacked[122] = extractBits(val_19, 2u, 5u);\n unpacked[123] = extractBits(val_19, 7u, 5u);\n unpacked[124] = extractBits(val_19, 12u, 5u);\n unpacked[125] = extractBits(val_19, 17u, 5u);\n unpacked[126] = extractBits(val_19, 22u, 5u);\n unpacked[127] = extractBits(val_19, 27u, 5u);\n}\n";var unpackBlock128BitDepth6="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_6(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 6u);\n unpacked[1] = extractBits(val_0, 6u, 6u);\n unpacked[2] = extractBits(val_0, 12u, 6u);\n unpacked[3] = extractBits(val_0, 18u, 6u);\n unpacked[4] = extractBits(val_0, 24u, 6u);\n unpacked[5] = extractBits(val_0, 30u, 2u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[5] = insertBits(unpacked[5], extractBits(val_1, 0u, 4u), 2u, 4u);\n unpacked[6] = extractBits(val_1, 4u, 6u);\n unpacked[7] = extractBits(val_1, 10u, 6u);\n unpacked[8] = extractBits(val_1, 16u, 6u);\n unpacked[9] = extractBits(val_1, 22u, 6u);\n unpacked[10] = extractBits(val_1, 28u, 4u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[10] = insertBits(unpacked[10], extractBits(val_2, 0u, 2u), 4u, 2u);\n unpacked[11] = extractBits(val_2, 2u, 6u);\n unpacked[12] = extractBits(val_2, 8u, 6u);\n unpacked[13] = extractBits(val_2, 14u, 6u);\n unpacked[14] = extractBits(val_2, 20u, 6u);\n unpacked[15] = extractBits(val_2, 26u, 6u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[16] = extractBits(val_3, 0u, 6u);\n unpacked[17] = extractBits(val_3, 6u, 6u);\n unpacked[18] = extractBits(val_3, 12u, 6u);\n unpacked[19] = extractBits(val_3, 18u, 6u);\n unpacked[20] = extractBits(val_3, 24u, 6u);\n unpacked[21] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[21] = insertBits(unpacked[21], extractBits(val_4, 0u, 4u), 2u, 4u);\n unpacked[22] = extractBits(val_4, 4u, 6u);\n unpacked[23] = extractBits(val_4, 10u, 6u);\n unpacked[24] = extractBits(val_4, 16u, 6u);\n unpacked[25] = extractBits(val_4, 22u, 6u);\n unpacked[26] = extractBits(val_4, 28u, 4u);\n\n let val_5 = blocks[packed_offset + 5];\n unpacked[26] = insertBits(unpacked[26], extractBits(val_5, 0u, 2u), 4u, 2u);\n unpacked[27] = extractBits(val_5, 2u, 6u);\n unpacked[28] = extractBits(val_5, 8u, 6u);\n unpacked[29] = extractBits(val_5, 14u, 6u);\n unpacked[30] = extractBits(val_5, 20u, 6u);\n unpacked[31] = extractBits(val_5, 26u, 6u);\n\n let val_6 = blocks[packed_offset + 6];\n unpacked[32] = extractBits(val_6, 0u, 6u);\n unpacked[33] = extractBits(val_6, 6u, 6u);\n unpacked[34] = extractBits(val_6, 12u, 6u);\n unpacked[35] = extractBits(val_6, 18u, 6u);\n unpacked[36] = extractBits(val_6, 24u, 6u);\n unpacked[37] = extractBits(val_6, 30u, 2u);\n\n let val_7 = blocks[packed_offset + 7];\n unpacked[37] = insertBits(unpacked[37], extractBits(val_7, 0u, 4u), 2u, 4u);\n unpacked[38] = extractBits(val_7, 4u, 6u);\n unpacked[39] = extractBits(val_7, 10u, 6u);\n unpacked[40] = extractBits(val_7, 16u, 6u);\n unpacked[41] = extractBits(val_7, 22u, 6u);\n unpacked[42] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[42] = insertBits(unpacked[42], extractBits(val_8, 0u, 2u), 4u, 2u);\n unpacked[43] = extractBits(val_8, 2u, 6u);\n unpacked[44] = extractBits(val_8, 8u, 6u);\n unpacked[45] = extractBits(val_8, 14u, 6u);\n unpacked[46] = extractBits(val_8, 20u, 6u);\n unpacked[47] = extractBits(val_8, 26u, 6u);\n\n let val_9 = blocks[packed_offset + 9];\n unpacked[48] = extractBits(val_9, 0u, 6u);\n unpacked[49] = extractBits(val_9, 6u, 6u);\n unpacked[50] = extractBits(val_9, 12u, 6u);\n unpacked[51] = extractBits(val_9, 18u, 6u);\n unpacked[52] = extractBits(val_9, 24u, 6u);\n unpacked[53] = extractBits(val_9, 30u, 2u);\n\n let val_10 = blocks[packed_offset + 10];\n unpacked[53] = insertBits(unpacked[53], extractBits(val_10, 0u, 4u), 2u, 4u);\n unpacked[54] = extractBits(val_10, 4u, 6u);\n unpacked[55] = extractBits(val_10, 10u, 6u);\n unpacked[56] = extractBits(val_10, 16u, 6u);\n unpacked[57] = extractBits(val_10, 22u, 6u);\n unpacked[58] = extractBits(val_10, 28u, 4u);\n\n let val_11 = blocks[packed_offset + 11];\n unpacked[58] = insertBits(unpacked[58], extractBits(val_11, 0u, 2u), 4u, 2u);\n unpacked[59] = extractBits(val_11, 2u, 6u);\n unpacked[60] = extractBits(val_11, 8u, 6u);\n unpacked[61] = extractBits(val_11, 14u, 6u);\n unpacked[62] = extractBits(val_11, 20u, 6u);\n unpacked[63] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[64] = extractBits(val_12, 0u, 6u);\n unpacked[65] = extractBits(val_12, 6u, 6u);\n unpacked[66] = extractBits(val_12, 12u, 6u);\n unpacked[67] = extractBits(val_12, 18u, 6u);\n unpacked[68] = extractBits(val_12, 24u, 6u);\n unpacked[69] = extractBits(val_12, 30u, 2u);\n\n let val_13 = blocks[packed_offset + 13];\n unpacked[69] = insertBits(unpacked[69], extractBits(val_13, 0u, 4u), 2u, 4u);\n unpacked[70] = extractBits(val_13, 4u, 6u);\n unpacked[71] = extractBits(val_13, 10u, 6u);\n unpacked[72] = extractBits(val_13, 16u, 6u);\n unpacked[73] = extractBits(val_13, 22u, 6u);\n unpacked[74] = extractBits(val_13, 28u, 4u);\n\n let val_14 = blocks[packed_offset + 14];\n unpacked[74] = insertBits(unpacked[74], extractBits(val_14, 0u, 2u), 4u, 2u);\n unpacked[75] = extractBits(val_14, 2u, 6u);\n unpacked[76] = extractBits(val_14, 8u, 6u);\n unpacked[77] = extractBits(val_14, 14u, 6u);\n unpacked[78] = extractBits(val_14, 20u, 6u);\n unpacked[79] = extractBits(val_14, 26u, 6u);\n\n let val_15 = blocks[packed_offset + 15];\n unpacked[80] = extractBits(val_15, 0u, 6u);\n unpacked[81] = extractBits(val_15, 6u, 6u);\n unpacked[82] = extractBits(val_15, 12u, 6u);\n unpacked[83] = extractBits(val_15, 18u, 6u);\n unpacked[84] = extractBits(val_15, 24u, 6u);\n unpacked[85] = extractBits(val_15, 30u, 2u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[85] = insertBits(unpacked[85], extractBits(val_16, 0u, 4u), 2u, 4u);\n unpacked[86] = extractBits(val_16, 4u, 6u);\n unpacked[87] = extractBits(val_16, 10u, 6u);\n unpacked[88] = extractBits(val_16, 16u, 6u);\n unpacked[89] = extractBits(val_16, 22u, 6u);\n unpacked[90] = extractBits(val_16, 28u, 4u);\n\n let val_17 = blocks[packed_offset + 17];\n unpacked[90] = insertBits(unpacked[90], extractBits(val_17, 0u, 2u), 4u, 2u);\n unpacked[91] = extractBits(val_17, 2u, 6u);\n unpacked[92] = extractBits(val_17, 8u, 6u);\n unpacked[93] = extractBits(val_17, 14u, 6u);\n unpacked[94] = extractBits(val_17, 20u, 6u);\n unpacked[95] = extractBits(val_17, 26u, 6u);\n\n let val_18 = blocks[packed_offset + 18];\n unpacked[96] = extractBits(val_18, 0u, 6u);\n unpacked[97] = extractBits(val_18, 6u, 6u);\n unpacked[98] = extractBits(val_18, 12u, 6u);\n unpacked[99] = extractBits(val_18, 18u, 6u);\n unpacked[100] = extractBits(val_18, 24u, 6u);\n unpacked[101] = extractBits(val_18, 30u, 2u);\n\n let val_19 = blocks[packed_offset + 19];\n unpacked[101] = insertBits(unpacked[101], extractBits(val_19, 0u, 4u), 2u, 4u);\n unpacked[102] = extractBits(val_19, 4u, 6u);\n unpacked[103] = extractBits(val_19, 10u, 6u);\n unpacked[104] = extractBits(val_19, 16u, 6u);\n unpacked[105] = extractBits(val_19, 22u, 6u);\n unpacked[106] = extractBits(val_19, 28u, 4u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[106] = insertBits(unpacked[106], extractBits(val_20, 0u, 2u), 4u, 2u);\n unpacked[107] = extractBits(val_20, 2u, 6u);\n unpacked[108] = extractBits(val_20, 8u, 6u);\n unpacked[109] = extractBits(val_20, 14u, 6u);\n unpacked[110] = extractBits(val_20, 20u, 6u);\n unpacked[111] = extractBits(val_20, 26u, 6u);\n\n let val_21 = blocks[packed_offset + 21];\n unpacked[112] = extractBits(val_21, 0u, 6u);\n unpacked[113] = extractBits(val_21, 6u, 6u);\n unpacked[114] = extractBits(val_21, 12u, 6u);\n unpacked[115] = extractBits(val_21, 18u, 6u);\n unpacked[116] = extractBits(val_21, 24u, 6u);\n unpacked[117] = extractBits(val_21, 30u, 2u);\n\n let val_22 = blocks[packed_offset + 22];\n unpacked[117] = insertBits(unpacked[117], extractBits(val_22, 0u, 4u), 2u, 4u);\n unpacked[118] = extractBits(val_22, 4u, 6u);\n unpacked[119] = extractBits(val_22, 10u, 6u);\n unpacked[120] = extractBits(val_22, 16u, 6u);\n unpacked[121] = extractBits(val_22, 22u, 6u);\n unpacked[122] = extractBits(val_22, 28u, 4u);\n\n let val_23 = blocks[packed_offset + 23];\n unpacked[122] = insertBits(unpacked[122], extractBits(val_23, 0u, 2u), 4u, 2u);\n unpacked[123] = extractBits(val_23, 2u, 6u);\n unpacked[124] = extractBits(val_23, 8u, 6u);\n unpacked[125] = extractBits(val_23, 14u, 6u);\n unpacked[126] = extractBits(val_23, 20u, 6u);\n unpacked[127] = extractBits(val_23, 26u, 6u);\n}\n";var unpackBlock128BitDepth7="\nvar unpacked: array;\n\nfn unpack_block_128_bit_depth_7(packed_offset: u32) {\n let val_0 = blocks[packed_offset];\n unpacked[0] = extractBits(val_0, 0u, 7u);\n unpacked[1] = extractBits(val_0, 7u, 7u);\n unpacked[2] = extractBits(val_0, 14u, 7u);\n unpacked[3] = extractBits(val_0, 21u, 7u);\n unpacked[4] = extractBits(val_0, 28u, 4u);\n \n let val_1 = blocks[packed_offset + 1];\n unpacked[4] = insertBits(unpacked[4], extractBits(val_1, 0u, 3u), 4u, 3u);\n unpacked[5] = extractBits(val_1, 3u, 7u);\n unpacked[6] = extractBits(val_1, 10u, 7u);\n unpacked[7] = extractBits(val_1, 17u, 7u);\n unpacked[8] = extractBits(val_1, 24u, 7u);\n unpacked[9] = extractBits(val_1, 31u, 1u);\n \n let val_2 = blocks[packed_offset + 2];\n unpacked[9] = insertBits(unpacked[9], extractBits(val_2, 0u, 6u), 1u, 6u);\n unpacked[10] = extractBits(val_2, 6u, 7u);\n unpacked[11] = extractBits(val_2, 13u, 7u);\n unpacked[12] = extractBits(val_2, 20u, 7u);\n unpacked[13] = extractBits(val_2, 27u, 5u);\n \n let val_3 = blocks[packed_offset + 3];\n unpacked[13] = insertBits(unpacked[13], extractBits(val_3, 0u, 2u), 5u, 2u);\n unpacked[14] = extractBits(val_3, 2u, 7u);\n unpacked[15] = extractBits(val_3, 9u, 7u);\n unpacked[16] = extractBits(val_3, 16u, 7u);\n unpacked[17] = extractBits(val_3, 23u, 7u);\n unpacked[18] = extractBits(val_3, 30u, 2u);\n \n let val_4 = blocks[packed_offset + 4];\n unpacked[18] = insertBits(unpacked[18], extractBits(val_4, 0u, 5u), 2u, 5u);\n unpacked[19] = extractBits(val_4, 5u, 7u);\n unpacked[20] = extractBits(val_4, 12u, 7u);\n unpacked[21] = extractBits(val_4, 19u, 7u);\n unpacked[22] = extractBits(val_4, 26u, 6u);\n \n let val_5 = blocks[packed_offset + 5];\n unpacked[22] = insertBits(unpacked[22], extractBits(val_5, 0u, 1u), 6u, 1u);\n unpacked[23] = extractBits(val_5, 1u, 7u);\n unpacked[24] = extractBits(val_5, 8u, 7u);\n unpacked[25] = extractBits(val_5, 15u, 7u);\n unpacked[26] = extractBits(val_5, 22u, 7u);\n unpacked[27] = extractBits(val_5, 29u, 3u);\n \n let val_6 = blocks[packed_offset + 6];\n unpacked[27] = insertBits(unpacked[27], extractBits(val_6, 0u, 4u), 3u, 4u);\n unpacked[28] = extractBits(val_6, 4u, 7u);\n unpacked[29] = extractBits(val_6, 11u, 7u);\n unpacked[30] = extractBits(val_6, 18u, 7u);\n unpacked[31] = extractBits(val_6, 25u, 7u);\n \n let val_7 = blocks[packed_offset + 7];\n unpacked[32] = extractBits(val_7, 0u, 7u);\n unpacked[33] = extractBits(val_7, 7u, 7u);\n unpacked[34] = extractBits(val_7, 14u, 7u);\n unpacked[35] = extractBits(val_7, 21u, 7u);\n unpacked[36] = extractBits(val_7, 28u, 4u);\n \n let val_8 = blocks[packed_offset + 8];\n unpacked[36] = insertBits(unpacked[36], extractBits(val_8, 0u, 3u), 4u, 3u);\n unpacked[37] = extractBits(val_8, 3u, 7u);\n unpacked[38] = extractBits(val_8, 10u, 7u);\n unpacked[39] = extractBits(val_8, 17u, 7u);\n unpacked[40] = extractBits(val_8, 24u, 7u);\n unpacked[41] = extractBits(val_8, 31u, 1u);\n \n let val_9 = blocks[packed_offset + 9];\n unpacked[41] = insertBits(unpacked[41], extractBits(val_9, 0u, 6u), 1u, 6u);\n unpacked[42] = extractBits(val_9, 6u, 7u);\n unpacked[43] = extractBits(val_9, 13u, 7u);\n unpacked[44] = extractBits(val_9, 20u, 7u);\n unpacked[45] = extractBits(val_9, 27u, 5u);\n \n let val_10 = blocks[packed_offset + 10];\n unpacked[45] = insertBits(unpacked[45], extractBits(val_10, 0u, 2u), 5u, 2u);\n unpacked[46] = extractBits(val_10, 2u, 7u);\n unpacked[47] = extractBits(val_10, 9u, 7u);\n unpacked[48] = extractBits(val_10, 16u, 7u);\n unpacked[49] = extractBits(val_10, 23u, 7u);\n unpacked[50] = extractBits(val_10, 30u, 2u);\n \n let val_11 = blocks[packed_offset + 11];\n unpacked[50] = insertBits(unpacked[50], extractBits(val_11, 0u, 5u), 2u, 5u);\n unpacked[51] = extractBits(val_11, 5u, 7u);\n unpacked[52] = extractBits(val_11, 12u, 7u);\n unpacked[53] = extractBits(val_11, 19u, 7u);\n unpacked[54] = extractBits(val_11, 26u, 6u);\n \n let val_12 = blocks[packed_offset + 12];\n unpacked[54] = insertBits(unpacked[54], extractBits(val_12, 0u, 1u), 6u, 1u);\n unpacked[55] = extractBits(val_12, 1u, 7u);\n unpacked[56] = extractBits(val_12, 8u, 7u);\n unpacked[57] = extractBits(val_12, 15u, 7u);\n unpacked[58] = extractBits(val_12, 22u, 7u);\n unpacked[59] = extractBits(val_12, 29u, 3u);\n \n let val_13 = blocks[packed_offset + 13];\n unpacked[59] = insertBits(unpacked[59], extractBits(val_13, 0u, 4u), 3u, 4u);\n unpacked[60] = extractBits(val_13, 4u, 7u);\n unpacked[61] = extractBits(val_13, 11u, 7u);\n unpacked[62] = extractBits(val_13, 18u, 7u);\n unpacked[63] = extractBits(val_13, 25u, 7u);\n \n let val_14 = blocks[packed_offset + 14];\n unpacked[64] = extractBits(val_14, 0u, 7u);\n unpacked[65] = extractBits(val_14, 7u, 7u);\n unpacked[66] = extractBits(val_14, 14u, 7u);\n unpacked[67] = extractBits(val_14, 21u, 7u);\n unpacked[68] = extractBits(val_14, 28u, 4u);\n \n let val_15 = blocks[packed_offset + 15];\n unpacked[68] = insertBits(unpacked[68], extractBits(val_15, 0u, 3u), 4u, 3u);\n unpacked[69] = extractBits(val_15, 3u, 7u);\n unpacked[70] = extractBits(val_15, 10u, 7u);\n unpacked[71] = extractBits(val_15, 17u, 7u);\n unpacked[72] = extractBits(val_15, 24u, 7u);\n unpacked[73] = extractBits(val_15, 31u, 1u);\n \n let val_16 = blocks[packed_offset + 16];\n unpacked[73] = insertBits(unpacked[73], extractBits(val_16, 0u, 6u), 1u, 6u);\n unpacked[74] = extractBits(val_16, 6u, 7u);\n unpacked[75] = extractBits(val_16, 13u, 7u);\n unpacked[76] = extractBits(val_16, 20u, 7u);\n unpacked[77] = extractBits(val_16, 27u, 5u);\n \n let val_17 = blocks[packed_offset + 17];\n unpacked[77] = insertBits(unpacked[77], extractBits(val_17, 0u, 2u), 5u, 2u);\n unpacked[78] = extractBits(val_17, 2u, 7u);\n unpacked[79] = extractBits(val_17, 9u, 7u);\n unpacked[80] = extractBits(val_17, 16u, 7u);\n unpacked[81] = extractBits(val_17, 23u, 7u);\n unpacked[82] = extractBits(val_17, 30u, 2u);\n \n let val_18 = blocks[packed_offset + 18];\n unpacked[82] = insertBits(unpacked[82], extractBits(val_18, 0u, 5u), 2u, 5u);\n unpacked[83] = extractBits(val_18, 5u, 7u);\n unpacked[84] = extractBits(val_18, 12u, 7u);\n unpacked[85] = extractBits(val_18, 19u, 7u);\n unpacked[86] = extractBits(val_18, 26u, 6u);\n \n let val_19 = blocks[packed_offset + 19];\n unpacked[86] = insertBits(unpacked[86], extractBits(val_19, 0u, 1u), 6u, 1u);\n unpacked[87] = extractBits(val_19, 1u, 7u);\n unpacked[88] = extractBits(val_19, 8u, 7u);\n unpacked[89] = extractBits(val_19, 15u, 7u);\n unpacked[90] = extractBits(val_19, 22u, 7u);\n unpacked[91] = extractBits(val_19, 29u, 3u);\n \n let val_20 = blocks[packed_offset + 20];\n unpacked[91] = insertBits(unpacked[91], extractBits(val_20, 0u, 4u), 3u, 4u);\n unpacked[92] = extractBits(val_20, 4u, 7u);\n unpacked[93] = extractBits(val_20, 11u, 7u);\n unpacked[94] = extractBits(val_20, 18u, 7u);\n unpacked[95] = extractBits(val_20, 25u, 7u);\n \n let val_21 = blocks[packed_offset + 21];\n unpacked[96] = extractBits(val_21, 0u, 7u);\n unpacked[97] = extractBits(val_21, 7u, 7u);\n unpacked[98] = extractBits(val_21, 14u, 7u);\n unpacked[99] = extractBits(val_21, 21u, 7u);\n unpacked[100] = extractBits(val_21, 28u, 4u);\n \n let val_22 = blocks[packed_offset + 22];\n unpacked[100] = insertBits(unpacked[100], extractBits(val_22, 0u, 3u), 4u, 3u);\n unpacked[101] = extractBits(val_22, 3u, 7u);\n unpacked[102] = extractBits(val_22, 10u, 7u);\n unpacked[103] = extractBits(val_22, 17u, 7u);\n unpacked[104] = extractBits(val_22, 24u, 7u);\n unpacked[105] = extractBits(val_22, 31u, 1u);\n \n let val_23 = blocks[packed_offset + 23];\n unpacked[105] = insertBits(unpacked[105], extractBits(val_23, 0u, 6u), 1u, 6u);\n unpacked[106] = extractBits(val_23, 6u, 7u);\n unpacked[107] = extractBits(val_23, 13u, 7u);\n unpacked[108] = extractBits(val_23, 20u, 7u);\n unpacked[109] = extractBits(val_23, 27u, 5u);\n \n let val_24 = blocks[packed_offset + 24];\n unpacked[109] = insertBits(unpacked[109], extractBits(val_24, 0u, 2u), 5u, 2u);\n unpacked[110] = extractBits(val_24, 2u, 7u);\n unpacked[111] = extractBits(val_24, 9u, 7u);\n unpacked[112] = extractBits(val_24, 16u, 7u);\n unpacked[113] = extractBits(val_24, 23u, 7u);\n unpacked[114] = extractBits(val_24, 30u, 2u);\n \n let val_25 = blocks[packed_offset + 25];\n unpacked[114] = insertBits(unpacked[114], extractBits(val_25, 0u, 5u), 2u, 5u);\n unpacked[115] = extractBits(val_25, 5u, 7u);\n unpacked[116] = extractBits(val_25, 12u, 7u);\n unpacked[117] = extractBits(val_25, 19u, 7u);\n unpacked[118] = extractBits(val_25, 26u, 6u);\n \n let val_26 = blocks[packed_offset + 26];\n unpacked[118] = insertBits(unpacked[118], extractBits(val_26, 0u, 1u), 6u, 1u);\n unpacked[119] = extractBits(val_26, 1u, 7u);\n unpacked[120] = extractBits(val_26, 8u, 7u);\n unpacked[121] = extractBits(val_26, 15u, 7u);\n unpacked[122] = extractBits(val_26, 22u, 7u);\n unpacked[123] = extractBits(val_26, 29u, 3u);\n \n let val_27 = blocks[packed_offset + 27];\n unpacked[123] = insertBits(unpacked[123], extractBits(val_27, 0u, 4u), 3u, 4u);\n unpacked[124] = extractBits(val_27, 4u, 7u);\n unpacked[125] = extractBits(val_27, 11u, 7u);\n unpacked[126] = extractBits(val_27, 18u, 7u);\n unpacked[127] = extractBits(val_27, 25u, 7u); \n}\n";var fromFP510Function="\n\nconst exponents: array = array(\n 2.9103830456733704e-11, \n 5.820766091346741e-11, \n 1.1641532182693481e-10, \n 2.3283064365386963e-10,\n 4.656612873077393e-10, \n 9.313225746154785e-10, \n 1.862645149230957e-09, \n 3.725290298461914e-09,\n 7.450580596923828e-09, \n 1.4901161193847656e-08, \n 2.9802322387695312e-08, \n 5.960464477539063e-08,\n 1.1920928955078125e-07, \n 2.384185791015625e-07, \n 4.76837158203125e-07, \n 9.5367431640625e-07,\n 1.9073486328125e-06, \n 3.814697265625e-06, \n 7.62939453125e-06, \n 1.52587890625e-05, \n 3.0517578125e-05,\n 6.103515625e-05, \n 0.0001220703125, \n 0.000244140625, \n 0.00048828125, \n 0.0009765625, \n 0.001953125, \n 0.00390625,\n 0.0078125, \n 0.015625, \n 0.03125, \n 0.0625);\n\nfn from_fp510(x: u32) -> f32 {\n let exponent = f32(exponents[extractBits(x, 10u, 5u)]); \n let fractional = f32(extractBits(x, 0u, 10u)); \n let abs = exponent * fractional;\n return abs * (1.0 - (2.0 * f32(extractBits(x, 15u, 1u))));\n}\n";var preprocessBlocks3BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth3,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 12u); \n unpack_block_128_bit_depth_3(blocks_start); \n \n let b01: u32 = blocks_start;\n let b2: u32 = blocks_start + 8u;\n \n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n\n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), (r * 16u) % 32u, 16u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), (r * 8u) % 32u, 8u); \n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b01_idx = b01 + (r / 2u);\n let b2_idx = b2 + (r / 4u);\n \n blocks[b01_idx] = insertBits(blocks[b01_idx], extractBits(unpacked[unpacked_idx], 0u, 2u), ((r * 16u) % 32u) + (2u * c), 2u);\n blocks[b2_idx] = insertBits(blocks[b2_idx], extractBits(unpacked[unpacked_idx], 2u, 1u), ((r * 8u) % 32u) + c, 1u); \n }\n }\n}\n\n").concat(emptyShader);var preprocessBlocks5BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth5,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n\n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 20u);\n unpack_block_128_bit_depth_5(blocks_start); \n \n let b03: u32 = blocks_start;\n let b4: u32 = blocks_start + 16u;\n\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), (r * 8u) % 32u, 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b03_idx = b03 + r;\n let b4_idx = b4 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b4_idx] = insertBits(blocks[b4_idx], extractBits(unpacked[unpacked_idx], 4u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n } \n}\n\n").concat(emptyShader);var preprocessBlocks6BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth6,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 24u);\n unpack_block_128_bit_depth_6(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u; \n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0u, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n }\n\n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n }\n }\n}\n\n").concat(emptyShader);var preprocessBlocks7BitShaderSource="\n\nstruct argsStruct {\n nbr: u32,\n nbc: u32,\n blocks_offset: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar blocks: array;\n\n".concat(unpackBlock128BitDepth7,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.nbr || global_id.y >= args.nbc) {\n return;\n }\n \n let blocks_start: u32 = args.blocks_offset + ((global_id.x * args.nbc + global_id.y) * 28u);\n unpack_block_128_bit_depth_7(blocks_start);\n \n let b03: u32 = blocks_start;\n let b45: u32 = blocks_start + 16u;\n let b6: u32 = blocks_start + 24u; \n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 0, 32u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u), 16u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u), 8u);\n }\n \n for (var c = 1u; c < ").concat(columnsPerBlock,"u; c++) {\n for (var r = 0u; r < ").concat(rowsPerBlock,"u; r++) {\n let unpacked_idx = r * ").concat(columnsPerBlock,"u + c;\n let b03_idx = b03 + r;\n let b45_idx = b45 + (r / 2u);\n let b6_idx = b6 + (r / 4u);\n \n blocks[b03_idx] = insertBits(blocks[b03_idx], extractBits(unpacked[unpacked_idx], 0u, 4u), 4 * c, 4u);\n blocks[b45_idx] = insertBits(blocks[b45_idx], extractBits(unpacked[unpacked_idx], 4u, 2u), ((r * 16u) % 32u) + (2 * c), 2u);\n blocks[b6_idx] = insertBits(blocks[b6_idx], extractBits(unpacked[unpacked_idx], 6u, 1u), ((r * 8u) % 32u) + c, 1u);\n }\n }\n}\n\n").concat(emptyShader);var preprocessShaderSources={3:preprocessBlocks3BitShaderSource,5:preprocessBlocks5BitShaderSource,6:preprocessBlocks6BitShaderSource,7:preprocessBlocks7BitShaderSource};var preprocessShaderNames={3:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_3bit_shader",5:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_5bit_shader",6:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_6bit_shader",7:"pv_picollm_weight_block_mixed_16x8_preprocess_blocks_7bit_shader"};var BM=8;var BN=32;var TM=2;var TN=16;var TC=rowsPerBlock*BM*BN/(TM*TN);var constantSnippet="\nconst BM = ".concat(BM,"u;\nconst BN = ").concat(BN,"u;\n\nconst TM = ").concat(TM,"u;\nconst TN = ").concat(TN,"u;\n\nconst TC = ").concat(TC,"u;\n\nconst ROW_PER_BLOCK = ").concat(rowsPerBlock,"u;\nconst COL_PER_BLOCK = ").concat(columnsPerBlock,"u;\n\nconst VEC_COL_PER_BLOCK = COL_PER_BLOCK / 4u;\n\nconst block_size: u32 = (COL_PER_BLOCK * ROW_PER_BLOCK * bit_depth) / 32u;\n\n");var forwardMultipleInputArgsSnippet="\nstruct argsStruct {\n n: u32,\n m: u32,\n total_nbc: u32,\n k: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array;\n";var forwardMultipleSharedPrivateMemSnippet="\nvar shared_x: array, BN * VEC_COL_PER_BLOCK>;\nvar shared_ab: array;\nvar shared_w: array, BM * ROW_PER_BLOCK * VEC_COL_PER_BLOCK>;\n\nvar local_x: array, TN * VEC_COL_PER_BLOCK>;\nvar local_x_sums: array;\nvar local_results: array;\n";var forwardMultipleLocalVarSnippet="\n let tid = local_id.x;\n let bm_idx = workgroup_id.x;\n let bn_idx = workgroup_id.y;\n\n let local_bm_idx = bm_idx * BM;\n let local_bn_idx = bn_idx * BN;\n \n let n_idx = tid % (BN / TN);\n let k_idx = tid / (BN / TN) / (BM * ROW_PER_BLOCK / TM);\n let m_idx = tid / (BN / TN) % (BM * ROW_PER_BLOCK / TM);\n";var forwardMultipleLoadW1Bit="\n let b0 = blocks[src + (row / 4u)];\n\n let b0_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b0_offset = b0_offset_base + (c * 4u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b0, b0_offset, 1u)), \n f32(extractBits(b0, b0_offset + 1, 1u)),\n f32(extractBits(b0, b0_offset + 2, 1u)),\n f32(extractBits(b0, b0_offset + 3, 1u)));\n }\n";var forwardMultipleLoadW2Bit="\n let b01 = blocks[src + (row / 2u)];\n \n let b01_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b01_offset = b01_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(extractBits(b01, b01_offset, 2u)), \n f32(extractBits(b01, b01_offset + 2, 2u)),\n f32(extractBits(b01, b01_offset + 4, 2u)),\n f32(extractBits(b01, b01_offset + 6, 2u)));\n } \n";var forwardMultipleLoadW3Bit="\n let b01 = blocks[src + (row / 2u)];\n let b2 = blocks[src + 8u + (row / 4u)]; \n\n let b01_offset_base = (row * 16u) % 32u;\n let b2_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b01_offset = b01_offset_base + (c * 8u);\n let b2_offset = b2_offset_base + (c * 4u);\n\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b01, b01_offset, 2u), extractBits(b2, b2_offset, 1u), 2u, 1u)), \n f32(insertBits(extractBits(b01, b01_offset + 2, 2u), extractBits(b2, b2_offset + 1, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 4, 2u), extractBits(b2, b2_offset + 2, 1u), 2u, 1u)),\n f32(insertBits(extractBits(b01, b01_offset + 6, 2u), extractBits(b2, b2_offset + 3, 1u), 2u, 1u))); \n }\n";var forwardMultipleLoadW4Bit="\n let b03 = blocks[src + row];\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u); \n shared_w[dst + c] = vec4(\n f32(extractBits(b03, b03_offset, 4u)), \n f32(extractBits(b03, b03_offset + 4, 4u)),\n f32(extractBits(b03, b03_offset + 8, 4u)),\n f32(extractBits(b03, b03_offset + 12, 4u)));\n }\n";var forwardMultipleLoadW5Bit="\n let b03 = blocks[src + row];\n let b4 = blocks[src + 16u + (row / 4u)];\n \n let b4_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b4_offset = b4_offset_base + (c * 4u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b4, b4_offset, 1u), 4u, 1u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b4, b4_offset + 1, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b4, b4_offset + 2, 1u), 4u, 1u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b4, b4_offset + 3, 1u), 4u, 1u)));\n }\n";var forwardMultipleLoadW6Bit="\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n shared_w[dst + c] = vec4(\n f32(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u)), \n f32(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u)),\n f32(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u)));\n }\n";var forwardMultipleLoadW7Bit="\n let b03 = blocks[src + row];\n let b45 = blocks[src + 16u + (row / 2u)];\n let b6 = blocks[src + 24u + (row / 4u)];\n \n let b45_offset_base = (row * 16u) % 32u;\n let b6_offset_base = (row * 8u) % 32u;\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b03_offset = (c * 16u);\n let b45_offset = b45_offset_base + (c * 8u);\n let b6_offset = b6_offset_base + (c * 4u);\n \n shared_w[dst + c] = vec4(\n f32(insertBits(insertBits(extractBits(b03, b03_offset, 4u), extractBits(b45, b45_offset, 2u), 4u, 2u), extractBits(b6, b6_offset, 1u), 6u, 1u)), \n f32(insertBits(insertBits(extractBits(b03, b03_offset + 4, 4u), extractBits(b45, b45_offset + 2, 2u), 4u, 2u), extractBits(b6, b6_offset + 1, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 8, 4u), extractBits(b45, b45_offset + 4, 2u), 4u, 2u), extractBits(b6, b6_offset + 2, 1u), 6u, 1u)),\n f32(insertBits(insertBits(extractBits(b03, b03_offset + 12, 4u), extractBits(b45, b45_offset + 6, 2u), 4u, 2u), extractBits(b6, b6_offset + 3, 1u), 6u, 1u)));\n }\n";var forwardMultipleLoadW8Bit="\n let b07_offset = src + (row * 2);\n \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n let b07 = blocks[b07_offset + c];\n shared_w[dst + c] = vec4(\n f32(extractBits(b07, 0u, 8u)), \n f32(extractBits(b07, 8u, 8u)),\n f32(extractBits(b07, 16u, 8u)),\n f32(extractBits(b07, 24u, 8u)));\n }\n";var forwardLoadWBitDepthSnippets={1:forwardMultipleLoadW1Bit,2:forwardMultipleLoadW2Bit,3:forwardMultipleLoadW3Bit,4:forwardMultipleLoadW4Bit,5:forwardMultipleLoadW5Bit,6:forwardMultipleLoadW6Bit,7:forwardMultipleLoadW7Bit,8:forwardMultipleLoadW8Bit};var forwardMultipleLoadXSnippet="\n let total_work_x = VEC_COL_PER_BLOCK * BN;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_x, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_x) { \n let n_load_idx = local_bn_idx + idx / VEC_COL_PER_BLOCK;\n let inner_idx = idx % VEC_COL_PER_BLOCK;\n \n if (bk_idx < args.k && n_load_idx < args.n) { \n let x_idx = (args.x_offset / 4u) + ((bk_idx * args.n + n_load_idx) * VEC_COL_PER_BLOCK + inner_idx); \n shared_x[idx] = x[x_idx];\n } else {\n shared_x[idx] = vec4(0.0);\n }\n }\n }\n";var forwardMultipleLoadABSnippet="\n let total_work_ab = BM * 2;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_ab, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_ab) {\n let m_load_idx = local_bm_idx + idx / 2; \n let inner_idx = (idx % 2) * 16u;\n \n if (m_load_idx < args.m && bk_idx < args.k) {\n let ab_bits = extractBits(metas[args.metas_offset + (m_load_idx * args.k + bk_idx)], inner_idx, 16u);\n shared_ab[idx] = from_fp510(ab_bits); \n } else {\n shared_ab[idx] = 0.0;\n }\n }\n }\n";var forwardMultipleLoadWSnippet=function forwardMultipleLoadWSnippet(bitDepth){return"\n let total_work_w = BM * ROW_PER_BLOCK;\n for (var local_idx = 0u; local_idx < divide_pad(total_work_w, TC); local_idx++) {\n let idx = local_idx * TC + tid;\n if (idx < total_work_w) {\n let m_load_idx = local_bm_idx + idx / ROW_PER_BLOCK;\n let row = idx % ROW_PER_BLOCK;\n let dst = idx * VEC_COL_PER_BLOCK;\n\n if (m_load_idx < args.m) {\n let src = args.blocks_offset + (m_load_idx * args.k + bk_idx) * block_size;\n ".concat(forwardLoadWBitDepthSnippets[bitDepth],"\n } else { \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n shared_w[dst + c] = vec4(0.0);\n }\n }\n }\n }\n")};var forwardMultipleCopyXSnippet="\nfor (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n var x_sum_vec = vec4(0.0); \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n let shared_x_idx = (n_idx * TN + tn_idx) * VEC_COL_PER_BLOCK + (k_idx * VEC_COL_PER_BLOCK);\n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) {\n local_x[local_x_idx + c] = shared_x[shared_x_idx + c];\n x_sum_vec += local_x[local_x_idx + c];\n }\n local_x_sums[tn_idx] = x_sum_vec.x + x_sum_vec.y + x_sum_vec.z + x_sum_vec.w; \n}\n";var forwardMultipleComputeResultsSnippet="\n for (var tm_idx = 0u; tm_idx < TM; tm_idx++) { \n let shared_ab_idx = ((m_idx * TM + tm_idx) / ROW_PER_BLOCK + k_idx) * 2;\n let alpha = shared_ab[shared_ab_idx];\n let beta = shared_ab[shared_ab_idx + 1]; \n let shared_w_idx = ((m_idx * TM + tm_idx) + k_idx) * VEC_COL_PER_BLOCK;\n \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let local_x_idx = tn_idx * VEC_COL_PER_BLOCK;\n \n var swx_vec = vec4(0.0); \n for (var c = 0u; c < VEC_COL_PER_BLOCK; c++) { \n swx_vec += shared_w[shared_w_idx + c] * local_x[local_x_idx + c];\n }\n let swx = swx_vec.x + swx_vec.y + swx_vec.z + swx_vec.w;\n \n let kappa = alpha * local_x_sums[tn_idx]; \n let results_idx = tm_idx * TN + tn_idx;\n local_results[results_idx] += kappa + (beta * swx);\n }\n }\n";var forwardMultipleWriteResultsSnippet="\nfor (var tm_idx = 0u; tm_idx < TM; tm_idx++) {\n let row = local_bm_idx * ROW_PER_BLOCK + (m_idx * TM + tm_idx); \n for (var tn_idx = 0u; tn_idx < TN; tn_idx++) { \n let col = local_bn_idx + (n_idx * TN + tn_idx);\n if (row < args.m * ROW_PER_BLOCK && col < args.n) {\n let y_idx = args.y_offset + ((row / ROW_PER_BLOCK) * args.n + col) * ROW_PER_BLOCK + (row % ROW_PER_BLOCK);\n let results_idx = tm_idx * TN + tn_idx;\n \n y[y_idx] += local_results[results_idx];\n }\n }\n}\n";var forwardMultipleShaderSources=function forwardMultipleShaderSources(bitDepth){return"\n\n".concat(forwardMultipleInputArgsSnippet,"\n\n").concat(constantSnippet,"\n\n").concat(forwardMultipleSharedPrivateMemSnippet,"\n\n").concat(fromFP510Function,"\n\n").concat(dividePadFunction,"\n\nconst bit_depth: u32 = ").concat(bitDepth,"u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n \n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(\n @builtin(local_invocation_id) local_id: vec3,\n @builtin(workgroup_id) workgroup_id: vec3\n) {\n ").concat(forwardMultipleLocalVarSnippet,"\n \n for (var bk_idx = 0u; bk_idx < args.k; bk_idx++) { \n ").concat(forwardMultipleLoadXSnippet,"\n ").concat(forwardMultipleLoadABSnippet," \n ").concat(forwardMultipleLoadWSnippet(bitDepth)," \n workgroupBarrier();\n \n ").concat(forwardMultipleCopyXSnippet,"\n ").concat(forwardMultipleComputeResultsSnippet,"\n workgroupBarrier();\n }\n \n ").concat(forwardMultipleWriteResultsSnippet,"\n}\n\n").concat(emptyShader,"\n")};var forwardShuffleXShaderSource="\nstruct argsStruct {\n n: u32,\n shape1: u32,\n x_offset: u32,\n indices_offset: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar indices: array;\n\n@group(0) @binding(3)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape1) {\n return;\n } \n\n let b = global_id.x;\n let i = global_id.y;\n \n let c = i / 8u;\n let j = i % 8u;\n y[((c * args.n) + b) * 8 + j] = x[args.x_offset + (b * args.shape1) + indices[args.indices_offset + i]];\n}\n\n".concat(emptyShader,"\n");var forwardSingleReduceYShaderSource="\nstruct argsStruct {\n nvr: u32,\n nbc: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar y: array>;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n if (global_id.x > args.nvr) {\n return;\n }\n\n let x_start = global_id.x * args.nbc;\n var sum: vec4 = vec4(0.0, 0.0, 0.0, 0.0);\n for (var i = 0u; i < args.nbc; i++) {\n sum += x[x_start + i]; \n }\n y[global_id.x] += sum;\n}\n\n".concat(emptyShader);var forwardShuffleYShaderSource="\nstruct argsStruct {\n n: u32,\n shape0: u32, \n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array;\n\n@group(0) @binding(2)\nvar y: array;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= args.n || global_id.y >= args.shape0) {\n return;\n } \n \n let b = global_id.x;\n let i = global_id.y;\n \n let r = i / 16u;\n let j = i % 16u;\n y[(b * args.shape0) + (r * 16) + j] = x[(((r * args.n) + b) * 16) + j];\n}\n\n".concat(emptyShader);var addBiasShaderSource="\nstruct argsStruct {\n dimension: u32\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar bias: array;\n\n@group(0) @binding(2)\nvar y: array;\n\n".concat(fromFP510Function,"\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) { \n y[(global_id.x * args.dimension) + global_id.y] += bias[global_id.y];\n}\n\n").concat(emptyShader);var forwardSingleBitDepth1ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 4u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b0_start = row_blocks_start + br_offset + (bc * block_size);\n var b0_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b0 = blocks[b0_start];\n \n let w0_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w0_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w0_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w0_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 8u;\n \n let w1_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w1_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w1_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w1_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 16u;\n \n let w2_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w2_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w2_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w2_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 24u;\n \n let w3_0 = f32(extractBits(b0, b0_offset + j, 1u)); \n let w3_1 = f32(extractBits(b0, b0_offset + j + 1, 1u)); \n let w3_2 = f32(extractBits(b0, b0_offset + j + 2, 1u));\n let w3_3 = f32(extractBits(b0, b0_offset + j + 3, 1u));\n \n b0_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth2ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 8u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n\n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b01_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b01 = blocks[b01_start];\n \n let w0_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w0_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w0_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w0_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w1_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w1_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w1_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w1_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u; \n b01 = blocks[b01_start + 1u];\n \n let w2_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w2_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w2_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w2_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 16u;\n \n let w3_0 = f32(extractBits(b01, b01_offset + (2u * j), 2u)); \n let w3_1 = f32(extractBits(b01, b01_offset + (2u * (j + 1)), 2u)); \n let w3_2 = f32(extractBits(b01, b01_offset + (2u * (j + 2)), 2u));\n let w3_3 = f32(extractBits(b01, b01_offset + (2u * (j + 3)), 2u));\n \n b01_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w; \n }\n \n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth3ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 12u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id : vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n\n let x_start = ((args.x_offset + c) / 4u);\n \n var b01_start = row_blocks_start + (br_offset * 2u) + (bc * block_size);\n var b2_start = row_blocks_start + br_offset + (bc * block_size) + 8u;\n var b01_offset = 0u;\n var b2_offset = 0u;\n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) { \n \n var b01 = blocks[b01_start];\n var b2 = blocks[b2_start];\n \n var b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n var b2_bit = extractBits(b2, b2_offset + j, 1u);\n let w0_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 8u;\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w1_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 16u; \n b01 = blocks[b01_start + 1u];\n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w2_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 16u;\n b2_offset = 24u; \n \n b01_bits = extractBits(b01, b01_offset + (2u * j), 2u);\n b2_bit = extractBits(b2, b2_offset + j, 1u); \n let w3_0 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 1)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 2)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_bits = extractBits(b01, b01_offset + (2u * (j + 3)), 2u);\n b2_bit = extractBits(b2, b2_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b01_bits, b2_bit, 2u, 1u));\n \n b01_offset = 0u;\n b2_offset = 0u; \n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n \n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth4ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 16u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start]; \n \n let w0_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w0_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w0_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w0_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 1];\n \n let w1_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w1_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w1_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w1_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 2];\n \n let w2_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w2_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w2_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w2_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n b03 = blocks[b03_start + 3];\n \n let w3_0 = f32(extractBits(b03, 4u * j, 4u)); \n let w3_1 = f32(extractBits(b03, 4u * (j + 1), 4u)); \n let w3_2 = f32(extractBits(b03, 4u * (j + 2), 4u));\n let w3_3 = f32(extractBits(b03, 4u * (j + 3), 4u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth5ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 20u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b4_start = row_blocks_start + br_offset + (bc * block_size) + 16u;\n \n var b4_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b4 = blocks[b4_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w0_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w0_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w0_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w0_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 1];\n b4_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w1_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w1_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w1_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w1_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 2];\n b4_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w2_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w2_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w2_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w2_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03 = blocks[b03_start + 3];\n b4_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b4_bit = extractBits(b4, b4_offset + j, 1u); \n let w3_0 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 1), 1u);\n let w3_1 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 2), 1u);\n let w3_2 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b4_bit = extractBits(b4, b4_offset + (j + 3), 1u);\n let w3_3 = f32(insertBits(b03_bits, b4_bit, 4u, 1u));\n \n b4_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth6ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 24u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n \n var b45_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w0_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w0_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w0_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w0_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w1_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w1_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w1_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w1_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u)); \n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w2_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w2_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w2_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w2_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n let w3_0 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n let w3_1 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n let w3_2 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n let w3_3 = f32(insertBits(b03_bits, b45_bits, 4u, 2u));\n \n b45_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth7ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 28u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b03_start = row_blocks_start + (br_offset * 4u) + (bc * block_size);\n var b45_start = row_blocks_start + (br_offset * 2u) + (bc * block_size) + 16u;\n var b6_start = row_blocks_start + br_offset + (bc * block_size) + 24u;\n \n var b45_offset = 0u;\n var b6_offset = 0u;\n \n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n \n for (var j = 0u; j < ").concat(columnsPerBlock,"; j+=4) {\n \n var b03 = blocks[b03_start];\n var b45 = blocks[b45_start];\n var b6 = blocks[b6_start];\n \n var b03_bits = extractBits(b03, 4u * j, 4u);\n var b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n var b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w0_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w0_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w0_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w0_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 1];\n b45_offset = 16u;\n b6_offset = 8u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w1_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w1_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w1_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w1_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 2];\n b45 = blocks[b45_start + 1];\n b45_offset = 0u;\n b6_offset = 16u;\n\n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w2_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w2_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w2_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w2_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03 = blocks[b03_start + 3];\n b45_offset = 16u;\n b6_offset = 24u;\n \n b03_bits = extractBits(b03, 4u * j, 4u);\n b45_bits = extractBits(b45, b45_offset + (j * 2), 2u);\n b6_bit = extractBits(b6, b6_offset + j, 1u);\n let w3_0 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 1), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 1)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 1, 1u);\n let w3_1 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b03_bits = extractBits(b03, 4u * (j + 2), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 2)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 2, 1u);\n let w3_2 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u)); \n \n b03_bits = extractBits(b03, 4u * (j + 3), 4u);\n b45_bits = extractBits(b45, b45_offset + (2u * (j + 3)), 2u);\n b6_bit = extractBits(b6, b6_offset + j + 3, 1u);\n let w3_3 = f32(insertBits(insertBits(b03_bits, b45_bits, 4u, 2u), b6_bit, 6u, 1u));\n \n b45_offset = 0u;\n b6_offset = 0u;\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_idx = (j / 4u);\n let x_vec = x[x_start + x_idx];\n res[x_idx] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleBitDepth8ShaderSource="\n\nstruct argsStruct {\n n: u32,\n nbr: u32,\n total_nbc: u32,\n bit_depth_nbc: u32,\n x_offset: u32,\n metas_offset: u32,\n blocks_offset: u32,\n y_offset: u32,\n};\n\n@group(0) @binding(0) \nvar args: argsStruct;\n\n@group(0) @binding(1)\nvar x: array>;\n\n@group(0) @binding(2)\nvar metas: array;\n\n@group(0) @binding(3)\nvar blocks: array;\n\n@group(0) @binding(4)\nvar y: array>;\n\n".concat(fromFP510Function,"\n\nconst block_size: u32 = 32u;\n\noverride workgroup_size_x: u32 = 1;\noverride workgroup_size_y: u32 = 1;\noverride workgroup_size_z: u32 = 1;\n\n@compute @workgroup_size(workgroup_size_x, workgroup_size_y, workgroup_size_z)\nfn main(@builtin(global_invocation_id) global_id: vec3) {\n if (global_id.x >= (args.nbr * 4) || global_id.y >= args.bit_depth_nbc) {\n return;\n }\n \n let r = global_id.x * 4u;\n let c = global_id.y * ").concat(columnsPerBlock,";\n let br = global_id.x / 4u; \n let bc = global_id.y;\n let br_offset = global_id.x % 4u;\n \n let row_metas_start: u32 = args.metas_offset + (br * args.bit_depth_nbc);\n let row_blocks_start: u32 = args.blocks_offset + (br * args.bit_depth_nbc * block_size); \n \n let alpha = from_fp510(extractBits(metas[row_metas_start + bc], 0, 16u)); \n let beta = from_fp510(extractBits(metas[row_metas_start + bc], 16u, 16u));\n \n let x_start = ((args.x_offset + c) / 4u);\n \n var b07_start = row_blocks_start + (br_offset * 8u) + (bc * block_size); \n\n var res: array, 2u>;\n var x_sum: f32 = 0.0;\n\n for (var j = 0u; j < 2; j++) {\n \n var b07 = blocks[b07_start + j];\n \n let w0_0 = f32(extractBits(b07, 0u, 8u)); \n let w0_1 = f32(extractBits(b07, 8u, 8u)); \n let w0_2 = f32(extractBits(b07, 16u, 8u));\n let w0_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 2 + j];\n \n let w1_0 = f32(extractBits(b07, 0u, 8u)); \n let w1_1 = f32(extractBits(b07, 8u, 8u)); \n let w1_2 = f32(extractBits(b07, 16u, 8u));\n let w1_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 4 + j];\n \n let w2_0 = f32(extractBits(b07, 0u, 8u)); \n let w2_1 = f32(extractBits(b07, 8u, 8u)); \n let w2_2 = f32(extractBits(b07, 16u, 8u));\n let w2_3 = f32(extractBits(b07, 24u, 8u));\n \n b07 = blocks[b07_start + 6 + j];\n \n let w3_0 = f32(extractBits(b07, 0u, 8u)); \n let w3_1 = f32(extractBits(b07, 8u, 8u)); \n let w3_2 = f32(extractBits(b07, 16u, 8u));\n let w3_3 = f32(extractBits(b07, 24u, 8u));\n \n let m = mat4x4(\n w0_0, w1_0, w2_0, w3_0,\n w0_1, w1_1, w2_1, w3_1, \n w0_2, w1_2, w2_2, w3_2,\n w0_3, w1_3, w2_3, w3_3);\n\n let x_vec = x[x_start + j];\n res[j] = m * x_vec;\n \n x_sum += x_vec.x + x_vec.y + x_vec.z + x_vec.w;\n }\n let swx = res[0] + res[1];\n let kappa = alpha * x_sum;\n \n let y_start = (args.y_offset + (r * args.total_nbc)) / 4u;\n y[y_start + bc] += (swx * beta) + vec4(kappa);\n}\n\n").concat(emptyShader);var forwardSingleShaderSources={1:forwardSingleBitDepth1ShaderSource,2:forwardSingleBitDepth2ShaderSource,3:forwardSingleBitDepth3ShaderSource,4:forwardSingleBitDepth4ShaderSource,5:forwardSingleBitDepth5ShaderSource,6:forwardSingleBitDepth6ShaderSource,7:forwardSingleBitDepth7ShaderSource,8:forwardSingleBitDepth8ShaderSource};var forwardSingleShaderNames={1:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_single_bit_depth_8_shader"};var forwardShaderSources={1:forwardMultipleShaderSources(1),2:forwardMultipleShaderSources(2),3:forwardMultipleShaderSources(3),4:forwardMultipleShaderSources(4),5:forwardMultipleShaderSources(5),6:forwardMultipleShaderSources(6),7:forwardMultipleShaderSources(7),8:forwardMultipleShaderSources(8)};var forwardShaderNames={1:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_1_shader",2:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_2_shader",3:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_3_shader",4:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_4_shader",5:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_5_shader",6:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_6_shader",7:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_7_shader",8:"pv_picollm_weight_block_mixed_16x8_forward_multiple_bit_depth_8_shader"};var forwardShuffleXShaderName="pv_picollm_weight_block_mixed_16x8_forward_shuffle_x_shader";var forwardShuffleYShaderName="pv_picollm_weight_block_mixed_16x8_forward_shuffle_y_shader";var addBiasShaderName="pv_picollm_weight_block_mixed_16x8_add_bias_shader";var forwardSingleReduceYShaderName="pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_shader";var _weightBlockMixed16x;var loadPreprocessBlocksShader=function loadPreprocessBlocksShader(device,bitDepth){var bindGroupLayout=device.createBindGroupLayout({label:"weight preprocess blocks ".concat(bitDepth," bind group layout"),entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight preprocess blocks ".concat(bitDepth," pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight preprocess blocks ".concat(bitDepth," shader module"),code:preprocessShaderSources[bitDepth]});var computePipeline=device.createComputePipeline({label:"weight preprocess blocks ".concat(bitDepth," pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:preprocessDim,workgroup_size_y:preprocessDim}}});return{computePipeline:computePipeline}};var loadForwardShuffleXShader=function loadForwardShuffleXShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight shuffle x bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight shuffle x pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight shuffle x shader module",code:forwardShuffleXShaderSource});var computePipeline=device.createComputePipeline({label:"weight shuffle x pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_y:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var loadForwardSingleReduceYShader=function loadForwardSingleReduceYShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight single reduce y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight single reduce y pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight single reduce y shader module",code:forwardSingleReduceYShaderSource});var computePipeline=device.createComputePipeline({label:"weight single reduce y pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE}}});return{computePipeline:computePipeline}};var loadForwardShuffleYShader=function loadForwardShuffleYShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight shuffle y bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight shuffle y pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight shuffle y shader module",code:forwardShuffleYShaderSource});var computePipeline=device.createComputePipeline({label:"weight shuffle y pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var loadForwardSingleShader=function loadForwardSingleShader(device,bitDepth){var entries=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}];var bindGroupLayout=device.createBindGroupLayout({label:"weight forward single ".concat(bitDepth," bind group layout"),entries:entries});var pipelineLayout=device.createPipelineLayout({label:"weight forward single ".concat(bitDepth," pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight forward single ".concat(bitDepth," shader module"),code:forwardSingleShaderSources[bitDepth]});var computePipeline=device.createComputePipeline({label:"weight forward single ".concat(bitDepth," pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:weightBlockSize,workgroup_size_y:1}}});return{computePipeline:computePipeline}};var loadForwardShader=function loadForwardShader(device,bitDepth){var entries=[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:3,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:4,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}];var bindGroupLayout=device.createBindGroupLayout({label:"weight forward multi ".concat(bitDepth," bind group layout"),entries:entries});var pipelineLayout=device.createPipelineLayout({label:"weight forward multi ".concat(bitDepth," pipeline layout"),bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight forward multi ".concat(bitDepth," shader module"),code:forwardShaderSources[bitDepth]});var computePipeline=device.createComputePipeline({label:"weight forward multi ".concat(bitDepth," pipeline"),layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint,constants:{workgroup_size_x:TC}}});return{computePipeline:computePipeline}};var loadAddBiasShader=function loadAddBiasShader(device){var bindGroupLayout=device.createBindGroupLayout({label:"weight add bias bind group layout",entries:[{binding:0,visibility:GPUShaderStage.COMPUTE,buffer:{type:"uniform"}},{binding:1,visibility:GPUShaderStage.COMPUTE,buffer:{type:"read-only-storage"}},{binding:2,visibility:GPUShaderStage.COMPUTE,buffer:{type:"storage"}}]});var pipelineLayout=device.createPipelineLayout({label:"weight add bias pipeline layout",bindGroupLayouts:[bindGroupLayout]});var shaderModule=device.createShaderModule({label:"weight add bias shader module",code:addBiasShaderSource});var computePipeline=device.createComputePipeline({label:"weight add bias pipeline",layout:pipelineLayout,compute:{module:shaderModule,entryPoint:shaderEntryPoint}});return{computePipeline:computePipeline}};var weightBlockMixed16x8Shaders=(_weightBlockMixed16x={},_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x,preprocessShaderNames[3],function(device){return loadPreprocessBlocksShader(device,3)}),preprocessShaderNames[5],function(device){return loadPreprocessBlocksShader(device,5)}),preprocessShaderNames[6],function(device){return loadPreprocessBlocksShader(device,6)}),preprocessShaderNames[7],function(device){return loadPreprocessBlocksShader(device,7)}),forwardShuffleXShaderName,loadForwardShuffleXShader),forwardShuffleYShaderName,loadForwardShuffleYShader),forwardSingleReduceYShaderName,loadForwardSingleReduceYShader),forwardSingleShaderNames[1],function(device){return loadForwardSingleShader(device,1)}),forwardSingleShaderNames[2],function(device){return loadForwardSingleShader(device,2)}),forwardSingleShaderNames[3],function(device){return loadForwardSingleShader(device,3)}),_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x,forwardSingleShaderNames[4],function(device){return loadForwardSingleShader(device,4)}),forwardSingleShaderNames[5],function(device){return loadForwardSingleShader(device,5)}),forwardSingleShaderNames[6],function(device){return loadForwardSingleShader(device,6)}),forwardSingleShaderNames[7],function(device){return loadForwardSingleShader(device,7)}),forwardSingleShaderNames[8],function(device){return loadForwardSingleShader(device,8)}),forwardShaderNames[1],function(device){return loadForwardShader(device,1)}),forwardShaderNames[2],function(device){return loadForwardShader(device,2)}),forwardShaderNames[3],function(device){return loadForwardShader(device,3)}),forwardShaderNames[4],function(device){return loadForwardShader(device,4)}),forwardShaderNames[5],function(device){return loadForwardShader(device,5)}),_defineProperty(_defineProperty(_defineProperty(_defineProperty(_weightBlockMixed16x,forwardShaderNames[6],function(device){return loadForwardShader(device,6)}),forwardShaderNames[7],function(device){return loadForwardShader(device,7)}),forwardShaderNames[8],function(device){return loadForwardShader(device,8)}),addBiasShaderName,loadAddBiasShader));var getPicollmWeightBlockMixed16x8WebGpuFunctions=function getPicollmWeightBlockMixed16x8WebGpuFunctions(){var setStatus=function setStatus(statusAddress,value){Module.HEAP32[statusAddress/Int32Array.BYTES_PER_ELEMENT]=value};var pvPicollmPreprocessBlocksWebGpu=function pvPicollmPreprocessBlocksWebGpu(objAddress,bitDepth,blocksAddress,blocksOffsetBytes,nbr,nbc,statusAddress){var _gpuBuffers$get;objAddress=unsignedAddress(objAddress);blocksAddress=unsignedAddress(blocksAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[preprocessShaderNames[bitDepth]];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var blocksBuffer=(_gpuBuffers$get=gpuBuffers.get(blocksAddress))===null||_gpuBuffers$get===void 0?void 0:_gpuBuffers$get.buffer;if(!blocksBuffer){console.error("blocks buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(3*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight preprocess blocks ".concat(bitDepth," arg buffer"));obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([nbr,nbc,blocksOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight preprocess blocks ".concat(bitDepth," bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:blocksBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,preprocessShaderNames[bitDepth],Math.ceil(nbr/preprocessDim),Math.ceil(nbc/preprocessDim));setStatus(statusAddress,0)};var pvPicollmForwardSingleShuffleXWebGpu=function pvPicollmForwardSingleShuffleXWebGpu(objAddress,xAddress,xOffsetBytes,indicesAddress,indicesOffsetBytes,shape1,yAddress,statusAddress){var _gpuBuffers$get2,_gpuBuffers$get3,_gpuBuffers$get4;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);indicesAddress=unsignedAddress(indicesAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShuffleXShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get2=gpuBuffers.get(xAddress))===null||_gpuBuffers$get2===void 0?void 0:_gpuBuffers$get2.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var indicesBuffer=(_gpuBuffers$get3=gpuBuffers.get(indicesAddress))===null||_gpuBuffers$get3===void 0?void 0:_gpuBuffers$get3.buffer;if(!indicesBuffer){console.error("Indices buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get4=gpuBuffers.get(yAddress))===null||_gpuBuffers$get4===void 0?void 0:_gpuBuffers$get4.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight shuffle x arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([1,shape1,xOffsetBytes/4,indicesOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward single shuffle x bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:indicesBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShuffleXShaderName+"_single",1,Math.ceil(shape1/PV_PICOLLM_WEBGPU_DEFAULT_WORKGROUP_SIZE));setStatus(statusAddress,0)};var pvPicollmForwardSingleWebGpu=function pvPicollmForwardSingleWebGpu(objAddress,bitDepth,xAddress,xOffsetBytes,metasAddress,metasOffsetBytes,blocksAddress,blocksOffsetBytes,nbr,totalNbc,bitDepthNbc,yAddress,yOffsetBytes,statusAddress){var _gpuBuffers$get5,_gpuBuffers$get6,_gpuBuffers$get7,_gpuBuffers$get8;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);metasAddress=unsignedAddress(metasAddress);blocksAddress=unsignedAddress(blocksAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardSingleShaderNames[bitDepth]];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get5=gpuBuffers.get(xAddress))===null||_gpuBuffers$get5===void 0?void 0:_gpuBuffers$get5.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var metasBuffer=(_gpuBuffers$get6=gpuBuffers.get(metasAddress))===null||_gpuBuffers$get6===void 0?void 0:_gpuBuffers$get6.buffer;if(!metasBuffer){console.error("Metas buffer has not been allocated");setStatus(statusAddress,-1);return}var blocksBuffer=(_gpuBuffers$get7=gpuBuffers.get(blocksAddress))===null||_gpuBuffers$get7===void 0?void 0:_gpuBuffers$get7.buffer;if(!blocksBuffer){console.error("Blocks buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get8=gpuBuffers.get(yAddress))===null||_gpuBuffers$get8===void 0?void 0:_gpuBuffers$get8.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(8*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight forward single ".concat(bitDepth," arg buffer"));obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([1,nbr,totalNbc,bitDepthNbc,xOffsetBytes/4,metasOffsetBytes/4,blocksOffsetBytes/4,yOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var entries=[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:metasBuffer}},{binding:3,resource:{buffer:blocksBuffer}},{binding:4,resource:{buffer:yBuffer}}];var bindGroup=obj.device.createBindGroup({label:"weight forward single ".concat(bitDepth," bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:entries});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardSingleShaderNames[bitDepth],Math.ceil(nbr*4/weightBlockSize),bitDepthNbc);setStatus(statusAddress,0)};var pvPicollmForwardSingleReduceYWebGpu=function pvPicollmForwardSingleReduceYWebGpu(objAddress,nbr,nbc,xAddress,yAddress,statusAddress){var _gpuBuffers$get9,_gpuBuffers$get10;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardSingleReduceYShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get9=gpuBuffers.get(xAddress))===null||_gpuBuffers$get9===void 0?void 0:_gpuBuffers$get9.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get10=gpuBuffers.get(yAddress))===null||_gpuBuffers$get10===void 0?void 0:_gpuBuffers$get10.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(2*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight single reduce y arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([nbr*4,nbc]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward single reduce y bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardSingleReduceYShaderName,Math.ceil(nbr*4/weightBlockSize));setStatus(statusAddress,0)};var pvPicollmForwardMultipleShuffleXWebGpu=function pvPicollmForwardMultipleShuffleXWebGpu(objAddress,xAddress,xOffsetBytes,indicesAddress,indicesOffsetBytes,n,shape1,yAddress,statusAddress){var _gpuBuffers$get11,_gpuBuffers$get12,_gpuBuffers$get13;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);indicesAddress=unsignedAddress(indicesAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShuffleXShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get11=gpuBuffers.get(xAddress))===null||_gpuBuffers$get11===void 0?void 0:_gpuBuffers$get11.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var indicesBuffer=(_gpuBuffers$get12=gpuBuffers.get(indicesAddress))===null||_gpuBuffers$get12===void 0?void 0:_gpuBuffers$get12.buffer;if(!indicesBuffer){console.error("Indices buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get13=gpuBuffers.get(yAddress))===null||_gpuBuffers$get13===void 0?void 0:_gpuBuffers$get13.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(4*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight multi shuffle x arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,shape1,xOffsetBytes/4,indicesOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward multiple shuffle x bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:indicesBuffer}},{binding:3,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShuffleXShaderName+"_multi",n,shape1);setStatus(statusAddress,0)};var pvPicollmForwardMultipleWebGpu=function pvPicollmForwardMultipleWebGpu(objAddress,bitDepth,xAddress,xOffsetBytes,metasAddress,metasOffsetBytes,blocksAddress,blocksOffsetBytes,nbc,nbr,n,yAddress,yOffsetBytes,statusAddress){var _gpuBuffers$get14,_gpuBuffers$get15,_gpuBuffers$get16,_gpuBuffers$get17;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);metasAddress=unsignedAddress(metasAddress);blocksAddress=unsignedAddress(blocksAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShaderNames[bitDepth]];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get14=gpuBuffers.get(xAddress))===null||_gpuBuffers$get14===void 0?void 0:_gpuBuffers$get14.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var metasBuffer=(_gpuBuffers$get15=gpuBuffers.get(metasAddress))===null||_gpuBuffers$get15===void 0?void 0:_gpuBuffers$get15.buffer;if(!metasBuffer){console.error("Metas buffer has not been allocated");setStatus(statusAddress,-1);return}var blocksBuffer=(_gpuBuffers$get16=gpuBuffers.get(blocksAddress))===null||_gpuBuffers$get16===void 0?void 0:_gpuBuffers$get16.buffer;if(!blocksBuffer){console.error("Blocks buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get17=gpuBuffers.get(yAddress))===null||_gpuBuffers$get17===void 0?void 0:_gpuBuffers$get17.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(8*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight forward multi ".concat(bitDepth," arg buffer"));obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,nbr,0,nbc,xOffsetBytes/4,metasOffsetBytes/4,blocksOffsetBytes/4,yOffsetBytes/4]));obj.scheduleUniformBufferForRelease(argsBuffer);var entries=[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:metasBuffer}},{binding:3,resource:{buffer:blocksBuffer}},{binding:4,resource:{buffer:yBuffer}}];var bindGroup=obj.device.createBindGroup({label:"weight forward multi ".concat(bitDepth," bind group"),layout:shader.computePipeline.getBindGroupLayout(0),entries:entries});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShaderNames[bitDepth],Math.ceil(nbr/BM),Math.ceil(n/BN));setStatus(statusAddress,0)};var pvPicollmForwardMultipleShuffleYWebGpu=function pvPicollmForwardMultipleShuffleYWebGpu(objAddress,n,shape0,xAddress,yAddress,statusAddress){var _gpuBuffers$get18,_gpuBuffers$get19;objAddress=unsignedAddress(objAddress);xAddress=unsignedAddress(xAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[forwardShuffleYShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var xBuffer=(_gpuBuffers$get18=gpuBuffers.get(xAddress))===null||_gpuBuffers$get18===void 0?void 0:_gpuBuffers$get18.buffer;if(!xBuffer){console.error("X buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get19=gpuBuffers.get(yAddress))===null||_gpuBuffers$get19===void 0?void 0:_gpuBuffers$get19.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(2*Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight shuffle y arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([n,shape0]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight forward multiple shuffle y bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:xBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,forwardShuffleYShaderName,n,shape0);setStatus(statusAddress,0)};var pvPicollmAddBiasWebGpu=function pvPicollmAddBiasWebGpu(objAddress,n,dimension,biasAddress,yAddress,statusAddress){var _gpuBuffers$get20,_gpuBuffers$get21;objAddress=unsignedAddress(objAddress);biasAddress=unsignedAddress(biasAddress);yAddress=unsignedAddress(yAddress);statusAddress=unsignedAddress(statusAddress);var obj=gpuDevices.get(objAddress);if(!obj||!obj.device){console.error("WebGPU device has not been initialized");setStatus(statusAddress,-1);return}var shader=obj.shaders[addBiasShaderName];if(!shader){console.error("Shader has not been loaded");setStatus(statusAddress,-1);return}var biasBuffer=(_gpuBuffers$get20=gpuBuffers.get(biasAddress))===null||_gpuBuffers$get20===void 0?void 0:_gpuBuffers$get20.buffer;if(!biasBuffer){console.error("Bias buffer has not been allocated");setStatus(statusAddress,-1);return}var yBuffer=(_gpuBuffers$get21=gpuBuffers.get(yAddress))===null||_gpuBuffers$get21===void 0?void 0:_gpuBuffers$get21.buffer;if(!yBuffer){console.error("Y buffer has not been allocated");setStatus(statusAddress,-1);return}var argsBuffer=obj.getBuffer(Uint32Array.BYTES_PER_ELEMENT,GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST,false,"weight add bias arg buffer");obj.device.queue.writeBuffer(argsBuffer,0,new Uint32Array([dimension]));obj.scheduleUniformBufferForRelease(argsBuffer);var bindGroup=obj.device.createBindGroup({label:"weight add bias bind group",layout:shader.computePipeline.getBindGroupLayout(0),entries:[{binding:0,resource:{buffer:argsBuffer}},{binding:1,resource:{buffer:biasBuffer}},{binding:2,resource:{buffer:yBuffer}}]});obj.dispatchComputerShader(bindGroup,shader.computePipeline,addBiasShaderName,n,dimension);setStatus(statusAddress,0)};return{pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm:pvPicollmPreprocessBlocksWebGpu,pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm:pvPicollmForwardSingleShuffleXWebGpu,pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm:pvPicollmForwardSingleWebGpu,pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm:pvPicollmForwardSingleReduceYWebGpu,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm:pvPicollmForwardMultipleShuffleXWebGpu,pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm:pvPicollmForwardMultipleWebGpu,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm:pvPicollmForwardMultipleShuffleYWebGpu,pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm:pvPicollmAddBiasWebGpu}};function ownKeys$1(e,r){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);r&&(o=o.filter(function(r){return Object.getOwnPropertyDescriptor(e,r).enumerable})),t.push.apply(t,o)}return t}function _objectSpread$1(e){for(var r=1;r=r.length?{done:!0}:{done:!1,value:r[_n++]}},e:function e(r){throw r},f:F}}throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}var o,a=!0,u=!1;return{s:function s(){t=t.call(r)},n:function n(){var r=t.next();return a=r.done,r},e:function e(r){u=!0,o=r},f:function f(){try{a||null==t["return"]||t["return"]()}finally{if(u)throw o}}}}function _unsupportedIterableToArray(r,a){if(r){if("string"==typeof r)return _arrayLikeToArray(r,a);var t={}.toString.call(r).slice(8,-1);return"Object"===t&&r.constructor&&(t=r.constructor.name),"Map"===t||"Set"===t?Array.from(r):"Arguments"===t||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t)?_arrayLikeToArray(r,a):void 0}}function _arrayLikeToArray(r,a){(null==a||a>r.length)&&(a=r.length);for(var e=0,n=Array(a);e1&&_args[1]!==undefined?_args[1]:{};time=_args.length>2&&_args[2]!==undefined?_args[2]:5e3;controller=new AbortController;config=_objectSpread(_objectSpread({},options),{},{signal:controller.signal});timeout=setTimeout(function(){controller.abort()},time);_context.next=7;return fetch(uri,config);case 7:response=_context.sent;clearTimeout(timeout);return _context.abrupt("return",response);case 10:case"end":return _context.stop()}},_callee)}));return _fetchWithTimeout.apply(this,arguments)}function open(_x2,_x3){return _open.apply(this,arguments)}function _open(){_open=_asyncToGenerator(_regeneratorRuntime.mark(function _callee2(path,mode){var error;return _regeneratorRuntime.wrap(function _callee2$(_context2){while(1)switch(_context2.prev=_context2.next){case 0:_context2.prev=0;_context2.next=3;return PvFileIDB.open(path,mode);case 3:return _context2.abrupt("return",_context2.sent);case 6:_context2.prev=6;_context2.t0=_context2["catch"](0);if(_context2.t0.name==="IndexedDBNotSupported"){console.warn("IndexedDB is not supported. Fallback to in-memory storage.")}else if(_context2.t0.name!=="FileNotExists"){console.warn("Unable to access IndexedDB (".concat(_context2.t0.toString(),"). Fallback to in-memory storage."))}if(!(typeof WorkerGlobalScope!=="undefined"&&self instanceof WorkerGlobalScope)){_context2.next=16;break}if(!(_context2.t0.name==="FileNotExists")){_context2.next=12;break}throw _context2.t0;case 12:console.error("In-memory storage cannot be used inside a worker.");error=new Error("Failed to start PvFile: ".concat(_context2.t0.toString()));error.name="PvFileNotSupported";throw error;case 16:return _context2.abrupt("return",PvFileMem.open(path,mode));case 17:case"end":return _context2.stop()}},_callee2,null,[[0,6]])}));return _open.apply(this,arguments)}var moduleOverrides=Object.assign({},Module);var arguments_=[];var thisProgram="./this.program";var quit_=(status,toThrow)=>{throw toThrow};var scriptDirectory="";function locateFile(path){if(Module["locateFile"]){return Module["locateFile"](path,scriptDirectory)}return scriptDirectory+path}var readAsync,readBinary;if(ENVIRONMENT_IS_WEB||ENVIRONMENT_IS_WORKER){if(ENVIRONMENT_IS_WORKER){scriptDirectory=self.location.href}else if(typeof document!="undefined"&&document.currentScript){scriptDirectory=document.currentScript.src}if(_scriptName){scriptDirectory=_scriptName}if(scriptDirectory.startsWith("blob:")){scriptDirectory=""}else{scriptDirectory=scriptDirectory.substr(0,scriptDirectory.replace(/[?#].*/,"").lastIndexOf("/")+1)}{if(ENVIRONMENT_IS_WORKER){readBinary=url=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,false);xhr.responseType="arraybuffer";xhr.send(null);return new Uint8Array(xhr.response)}}readAsync=url=>{if(isFileURI(url)){return new Promise((reject,resolve)=>{var xhr=new XMLHttpRequest;xhr.open("GET",url,true);xhr.responseType="arraybuffer";xhr.onload=()=>{if(xhr.status==200||xhr.status==0&&xhr.response){resolve(xhr.response)}reject(xhr.status)};xhr.onerror=reject;xhr.send(null)})}return fetch(url,{credentials:"same-origin"}).then(response=>{if(response.ok){return response.arrayBuffer()}return Promise.reject(new Error(response.status+" : "+response.url))})}}}else{}var out=Module["print"]||console.log.bind(console);var err=Module["printErr"]||console.error.bind(console);Object.assign(Module,moduleOverrides);moduleOverrides=null;if(Module["arguments"])arguments_=Module["arguments"];if(Module["thisProgram"])thisProgram=Module["thisProgram"];if(Module["quit"])quit_=Module["quit"];if(ENVIRONMENT_IS_PTHREAD){var wasmPromiseResolve;var wasmPromiseReject;var initializedJS=false;function threadPrintErr(...args){var text=args.join(" ");console.error(text)}if(!Module["printErr"])err=threadPrintErr;function threadAlert(...args){var text=args.join(" ");postMessage({cmd:"alert",text:text,threadId:_pthread_self()})}self.alert=threadAlert;Module["instantiateWasm"]=(info,receiveInstance)=>new Promise((resolve,reject)=>{wasmPromiseResolve=module=>{var instance=new WebAssembly.Instance(module,getWasmImports());receiveInstance(instance);resolve()};wasmPromiseReject=reject});self.onunhandledrejection=e=>{throw e.reason||e};function handleMessage(e){try{var msgData=e["data"];var cmd=msgData["cmd"];if(cmd==="load"){let messageQueue=[];self.onmessage=e=>messageQueue.push(e);self.startWorker=instance=>{postMessage({cmd:"loaded"});for(let msg of messageQueue){handleMessage(msg)}self.onmessage=handleMessage};for(const handler of msgData["handlers"]){if(!Module[handler]||Module[handler].proxy){Module[handler]=(...args)=>{postMessage({cmd:"callHandler",handler:handler,args:args})};if(handler=="print")out=Module[handler];if(handler=="printErr")err=Module[handler]}}wasmMemory=msgData["wasmMemory"];updateMemoryViews();wasmPromiseResolve(msgData["wasmModule"])}else if(cmd==="run"){__emscripten_thread_init(msgData["pthread_ptr"],0,0,1,0,0);__emscripten_thread_mailbox_await(msgData["pthread_ptr"]);establishStackSpace();PThread.receiveObjectTransfer(msgData);PThread.threadInitTLS();if(!initializedJS){initializedJS=true}try{invokeEntryPoint(msgData["start_routine"],msgData["arg"])}catch(ex){if(ex!="unwind"){throw ex}}}else if(cmd==="cancel"){if(_pthread_self()){__emscripten_thread_exit(-1)}}else if(msgData.target==="setimmediate"){}else if(cmd==="checkMailbox"){if(initializedJS){checkMailbox()}}else if(cmd){err(`worker: received unknown command ${cmd}`);err(msgData)}}catch(ex){__emscripten_thread_crashed();throw ex}}self.onmessage=handleMessage}var wasmBinary;if(Module["wasmBinary"])wasmBinary=Module["wasmBinary"];var wasmMemory;var wasmModule;var ABORT=false;var EXITSTATUS;var HEAP8,HEAPU8,HEAP16,HEAPU16,HEAP32,HEAPU32,HEAPF32,HEAPF64;function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEAP8=new Int8Array(b);Module["HEAP16"]=HEAP16=new Int16Array(b);Module["HEAPU8"]=HEAPU8=new Uint8Array(b);Module["HEAPU16"]=HEAPU16=new Uint16Array(b);Module["HEAP32"]=HEAP32=new Int32Array(b);Module["HEAPU32"]=HEAPU32=new Uint32Array(b);Module["HEAPF32"]=HEAPF32=new Float32Array(b);Module["HEAPF64"]=HEAPF64=new Float64Array(b)}if(!ENVIRONMENT_IS_PTHREAD){if(Module["wasmMemory"]){wasmMemory=Module["wasmMemory"]}else{var INITIAL_MEMORY=Module["INITIAL_MEMORY"]||39321600;wasmMemory=new WebAssembly.Memory({initial:INITIAL_MEMORY/65536,maximum:4294967296/65536,shared:true});if(!(wasmMemory.buffer instanceof SharedArrayBuffer)){err("requested a shared WebAssembly.Memory but the returned buffer is not a SharedArrayBuffer, indicating that while the browser has SharedArrayBuffer it does not have WebAssembly threads support - you may need to set a flag");if(ENVIRONMENT_IS_NODE){err("(on node you may need: --experimental-wasm-threads --experimental-wasm-bulk-memory and/or recent version)")}throw Error("bad memory")}}updateMemoryViews()}var __ATPRERUN__=[];var __ATINIT__=[];var __ATPOSTRUN__=[];var runtimeInitialized=false;function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="function")Module["preRun"]=[Module["preRun"]];while(Module["preRun"].length){addOnPreRun(Module["preRun"].shift())}}callRuntimeCallbacks(__ATPRERUN__)}function initRuntime(){runtimeInitialized=true;if(ENVIRONMENT_IS_PTHREAD)return;callRuntimeCallbacks(__ATINIT__)}function postRun(){if(ENVIRONMENT_IS_PTHREAD)return;if(Module["postRun"]){if(typeof Module["postRun"]=="function")Module["postRun"]=[Module["postRun"]];while(Module["postRun"].length){addOnPostRun(Module["postRun"].shift())}}callRuntimeCallbacks(__ATPOSTRUN__)}function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}function addOnInit(cb){__ATINIT__.unshift(cb)}function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}var runDependencies=0;var runDependencyWatcher=null;var dependenciesFulfilled=null;function addRunDependency(id){runDependencies++;Module["monitorRunDependencies"]?.(runDependencies)}function removeRunDependency(id){runDependencies--;Module["monitorRunDependencies"]?.(runDependencies);if(runDependencies==0){if(runDependencyWatcher!==null){clearInterval(runDependencyWatcher);runDependencyWatcher=null}if(dependenciesFulfilled){var callback=dependenciesFulfilled;dependenciesFulfilled=null;callback()}}}function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";err(what);ABORT=true;EXITSTATUS=1;what+=". Build with -sASSERTIONS for more info.";var e=new WebAssembly.RuntimeError(what);readyPromiseReject(e);throw e}var dataURIPrefix="data:application/octet-stream;base64,";var isDataURI=filename=>filename.startsWith(dataURIPrefix);var isFileURI=filename=>filename.startsWith("file://");function findWasmBinary(){var f="pv_picollm_simd.wasm";if(!isDataURI(f)){return locateFile(f)}return f}var wasmBinaryFile;function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return new Uint8Array(wasmBinary)}if(readBinary){return readBinary(file)}throw"both async and sync fetching of the wasm failed"}function getBinaryPromise(binaryFile){if(!wasmBinary){return readAsync(binaryFile).then(response=>new Uint8Array(response),()=>getBinarySync(binaryFile))}return Promise.resolve().then(()=>getBinarySync(binaryFile))}function instantiateArrayBuffer(binaryFile,imports,receiver){return getBinaryPromise(binaryFile).then(binary=>WebAssembly.instantiate(binary,imports)).then(receiver,reason=>{err(`failed to asynchronously prepare wasm: ${reason}`);abort(reason)})}function instantiateAsync(binary,binaryFile,imports,callback){if(!binary&&typeof WebAssembly.instantiateStreaming=="function"&&!isDataURI(binaryFile)&&!isFileURI(binaryFile)&&typeof fetch=="function"){return fetch(binaryFile,{credentials:"same-origin"}).then(response=>{var result=WebAssembly.instantiateStreaming(response,imports);return result.then(callback,function(reason){err(`wasm streaming compile failed: ${reason}`);err("falling back to ArrayBuffer instantiation");return instantiateArrayBuffer(binaryFile,imports,callback)})})}return instantiateArrayBuffer(binaryFile,imports,callback)}function getWasmImports(){assignWasmImports();return{env:wasmImports,wasi_snapshot_preview1:wasmImports}}function createWasm(){var info=getWasmImports();function receiveInstance(instance,module){wasmExports=instance.exports;wasmExports=Asyncify.instrumentWasmExports(wasmExports);wasmExports=applySignatureConversions(wasmExports);registerTLSInit(wasmExports["_emscripten_tls_init"]);wasmTable=wasmExports["__indirect_function_table"];addOnInit(wasmExports["__wasm_call_ctors"]);wasmModule=module;removeRunDependency("wasm-instantiate");return wasmExports}addRunDependency("wasm-instantiate");function receiveInstantiationResult(result){receiveInstance(result["instance"],result["module"])}if(Module["instantiateWasm"]){try{return Module["instantiateWasm"](info,receiveInstance)}catch(e){err(`Module.instantiateWasm callback failed with error: ${e}`);readyPromiseReject(e)}}if(!wasmBinaryFile)wasmBinaryFile=findWasmBinary();instantiateAsync(wasmBinary,wasmBinaryFile,info,receiveInstantiationResult).catch(readyPromiseReject);return{}}function ExitStatus(status){this.name="ExitStatus";this.message=`Program terminated with exit(${status})`;this.status=status}var terminateWorker=worker=>{worker.terminate();worker.onmessage=e=>{}};var killThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];delete PThread.pthreads[pthread_ptr];terminateWorker(worker);__emscripten_thread_free_data(pthread_ptr);PThread.runningWorkers.splice(PThread.runningWorkers.indexOf(worker),1);worker.pthread_ptr=0};var cancelThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];worker.postMessage({cmd:"cancel"})};var cleanupThread=pthread_ptr=>{var worker=PThread.pthreads[pthread_ptr];PThread.returnWorkerToPool(worker)};var spawnThread=threadParams=>{var worker=PThread.getNewWorker();if(!worker){return 6}PThread.runningWorkers.push(worker);PThread.pthreads[threadParams.pthread_ptr]=worker;worker.pthread_ptr=threadParams.pthread_ptr;var msg={cmd:"run",start_routine:threadParams.startRoutine,arg:threadParams.arg,pthread_ptr:threadParams.pthread_ptr};worker.postMessage(msg,threadParams.transferList);return 0};var runtimeKeepaliveCounter=0;var keepRuntimeAlive=()=>noExitRuntime||runtimeKeepaliveCounter>0;var stackSave=()=>_emscripten_stack_get_current();var stackRestore=val=>__emscripten_stack_restore(val);var stackAlloc=sz=>__emscripten_stack_alloc(sz);var proxyToMainThread=(funcIndex,emAsmAddr,sync,...callArgs)=>{var serializedNumCallArgs=callArgs.length;var sp=stackSave();var args=stackAlloc(serializedNumCallArgs*8);var b=args>>>3;for(var i=0;i>>0]=arg}var rtn=__emscripten_run_on_main_thread_js(funcIndex,emAsmAddr,serializedNumCallArgs,args,sync);stackRestore(sp);return rtn};function _proc_exit(code){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(0,0,1,code);EXITSTATUS=code;if(!keepRuntimeAlive()){PThread.terminateAllThreads();Module["onExit"]?.(code);ABORT=true}quit_(code,new ExitStatus(code))}var handleException=e=>{if(e instanceof ExitStatus||e=="unwind"){return EXITSTATUS}quit_(1,e)};function exitOnMainThread(returnCode){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(1,0,0,returnCode);_exit(returnCode)}var exitJS=(status,implicit)=>{EXITSTATUS=status;if(ENVIRONMENT_IS_PTHREAD){exitOnMainThread(status);throw"unwind"}_proc_exit(status)};var _exit=exitJS;var PThread={unusedWorkers:[],runningWorkers:[],tlsInitFunctions:[],pthreads:{},init(){if(ENVIRONMENT_IS_PTHREAD){PThread.initWorker()}else{PThread.initMainThread()}},initMainThread(){var pthreadPoolSize=navigator.hardwareConcurrency;while(pthreadPoolSize--){PThread.allocateUnusedWorker()}addOnPreRun(()=>{addRunDependency("loading-workers");PThread.loadWasmModuleToAllWorkers(()=>removeRunDependency("loading-workers"))})},initWorker(){noExitRuntime=false},setExitStatus:status=>EXITSTATUS=status,terminateAllThreads__deps:["$terminateWorker"],terminateAllThreads:()=>{for(var worker of PThread.runningWorkers){terminateWorker(worker)}for(var worker of PThread.unusedWorkers){terminateWorker(worker)}PThread.unusedWorkers=[];PThread.runningWorkers=[];PThread.pthreads=[]},returnWorkerToPool:worker=>{var pthread_ptr=worker.pthread_ptr;delete PThread.pthreads[pthread_ptr];PThread.unusedWorkers.push(worker);PThread.runningWorkers.splice(PThread.runningWorkers.indexOf(worker),1);worker.pthread_ptr=0;__emscripten_thread_free_data(pthread_ptr)},receiveObjectTransfer(data){},threadInitTLS(){PThread.tlsInitFunctions.forEach(f=>f())},loadWasmModuleToWorker:worker=>new Promise(onFinishedLoading=>{worker.onmessage=e=>{var d=e["data"];var cmd=d["cmd"];if(d["targetThread"]&&d["targetThread"]!=_pthread_self()){var targetWorker=PThread.pthreads[d["targetThread"]];if(targetWorker){targetWorker.postMessage(d,d["transferList"])}else{err(`Internal error! Worker sent a message "${cmd}" to target pthread ${d["targetThread"]}, but that thread no longer exists!`)}return}if(cmd==="checkMailbox"){checkMailbox()}else if(cmd==="spawnThread"){spawnThread(d)}else if(cmd==="cleanupThread"){cleanupThread(d["thread"])}else if(cmd==="killThread"){killThread(d["thread"])}else if(cmd==="cancelThread"){cancelThread(d["thread"])}else if(cmd==="loaded"){worker.loaded=true;onFinishedLoading(worker)}else if(cmd==="alert"){alert(`Thread ${d["threadId"]}: ${d["text"]}`)}else if(d.target==="setimmediate"){worker.postMessage(d)}else if(cmd==="callHandler"){Module[d["handler"]](...d["args"])}else if(cmd){err(`worker sent an unknown command ${cmd}`)}};worker.onerror=e=>{var message="worker sent an error!";err(`${message} ${e.filename}:${e.lineno}: ${e.message}`);throw e};var handlers=[];var knownHandlers=["onExit","onAbort","print","printErr"];for(var handler of knownHandlers){if(Module.propertyIsEnumerable(handler)){handlers.push(handler)}}worker.postMessage({cmd:"load",handlers:handlers,wasmMemory:wasmMemory,wasmModule:wasmModule})}),loadWasmModuleToAllWorkers(onMaybeReady){if(ENVIRONMENT_IS_PTHREAD){return onMaybeReady()}let pthreadPoolReady=Promise.all(PThread.unusedWorkers.map(PThread.loadWasmModuleToWorker));pthreadPoolReady.then(onMaybeReady)},allocateUnusedWorker(){var worker;var workerOptions={type:"module",name:"em-pthread"};var pthreadMainJs=_scriptName;if(Module["mainScriptUrlOrBlob"]){pthreadMainJs=Module["mainScriptUrlOrBlob"];if(typeof pthreadMainJs!="string"){pthreadMainJs=URL.createObjectURL(pthreadMainJs)}}worker=new Worker(pthreadMainJs,workerOptions);PThread.unusedWorkers.push(worker)},getNewWorker(){if(PThread.unusedWorkers.length==0){PThread.allocateUnusedWorker();PThread.loadWasmModuleToWorker(PThread.unusedWorkers[0])}return PThread.unusedWorkers.pop()}};var callRuntimeCallbacks=callbacks=>{while(callbacks.length>0){callbacks.shift()(Module)}};var establishStackSpace=()=>{var pthread_ptr=_pthread_self();var stackHigh=GROWABLE_HEAP_U32()[pthread_ptr+52>>>2>>>0];var stackSize=GROWABLE_HEAP_U32()[pthread_ptr+56>>>2>>>0];var stackLow=stackHigh-stackSize;_emscripten_stack_set_limits(stackHigh,stackLow);stackRestore(stackHigh)};var invokeEntryPoint=(ptr,arg)=>{runtimeKeepaliveCounter=0;var result=(a1=>dynCall_ii(ptr,a1))(arg);function finish(result){if(keepRuntimeAlive()){PThread.setExitStatus(result)}else{__emscripten_thread_exit(result)}}finish(result)};var noExitRuntime=Module["noExitRuntime"]||true;var registerTLSInit=tlsInitFunc=>PThread.tlsInitFunctions.push(tlsInitFunc);var UTF8Decoder=typeof TextDecoder!="undefined"?new TextDecoder:undefined;var UTF8ArrayToString=(heapOrArray,idx,maxBytesToRead)=>{idx>>>=0;var endIdx=idx+maxBytesToRead;var endPtr=idx;while(heapOrArray[endPtr]&&!(endPtr>=endIdx))++endPtr;if(endPtr-idx>16&&heapOrArray.buffer&&UTF8Decoder){return UTF8Decoder.decode(heapOrArray.buffer instanceof SharedArrayBuffer?heapOrArray.slice(idx,endPtr):heapOrArray.subarray(idx,endPtr))}var str="";while(idx>10,56320|ch&1023)}}return str};var UTF8ToString=(ptr,maxBytesToRead)=>{ptr>>>=0;return ptr?UTF8ArrayToString(GROWABLE_HEAP_U8(),ptr,maxBytesToRead):""};function ___assert_fail(condition,filename,line,func){condition>>>=0;filename>>>=0;func>>>=0;abort(`Assertion failed: ${UTF8ToString(condition)}, at: `+[filename?UTF8ToString(filename):"unknown filename",line,func?UTF8ToString(func):"unknown function"])}function pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(2,0,1,pthread_ptr,attr,startRoutine,arg);return ___pthread_create_js(pthread_ptr,attr,startRoutine,arg)}function ___pthread_create_js(pthread_ptr,attr,startRoutine,arg){pthread_ptr>>>=0;attr>>>=0;startRoutine>>>=0;arg>>>=0;if(typeof SharedArrayBuffer=="undefined"){err("Current environment does not support SharedArrayBuffer, pthreads are not available!");return 6}var transferList=[];var error=0;if(ENVIRONMENT_IS_PTHREAD&&(transferList.length===0||error)){return pthreadCreateProxied(pthread_ptr,attr,startRoutine,arg)}if(error)return error;var threadParams={startRoutine:startRoutine,pthread_ptr:pthread_ptr,arg:arg,transferList:transferList};if(ENVIRONMENT_IS_PTHREAD){threadParams.cmd="spawnThread";postMessage(threadParams,transferList);return 0}return spawnThread(threadParams)}var __abort_js=()=>{abort("")};var nowIsMonotonic=1;var __emscripten_get_now_is_monotonic=()=>nowIsMonotonic;function __emscripten_init_main_thread_js(tb){tb>>>=0;__emscripten_thread_init(tb,!ENVIRONMENT_IS_WORKER,1,!ENVIRONMENT_IS_WEB,65536,false);PThread.threadInitTLS()}var maybeExit=()=>{if(!keepRuntimeAlive()){try{if(ENVIRONMENT_IS_PTHREAD)__emscripten_thread_exit(EXITSTATUS);else _exit(EXITSTATUS)}catch(e){handleException(e)}}};var callUserCallback=func=>{if(ABORT){return}try{func();maybeExit()}catch(e){handleException(e)}};function __emscripten_thread_mailbox_await(pthread_ptr){pthread_ptr>>>=0;if(typeof Atomics.waitAsync==="function"){var wait=Atomics.waitAsync(GROWABLE_HEAP_I32(),pthread_ptr>>>2,pthread_ptr);wait.value.then(checkMailbox);var waitingAsync=pthread_ptr+128;Atomics.store(GROWABLE_HEAP_I32(),waitingAsync>>>2,1)}}var checkMailbox=()=>{var pthread_ptr=_pthread_self();if(pthread_ptr){__emscripten_thread_mailbox_await(pthread_ptr);callUserCallback(__emscripten_check_mailbox)}};function __emscripten_notify_mailbox_postmessage(targetThreadId,currThreadId,mainThreadId){targetThreadId>>>=0;currThreadId>>>=0;mainThreadId>>>=0;if(targetThreadId==currThreadId){setTimeout(checkMailbox)}else if(ENVIRONMENT_IS_PTHREAD){postMessage({targetThread:targetThreadId,cmd:"checkMailbox"})}else{var worker=PThread.pthreads[targetThreadId];if(!worker){return}worker.postMessage({cmd:"checkMailbox"})}}var proxiedJSCallArgs=[];function __emscripten_receive_on_main_thread_js(funcIndex,emAsmAddr,callingThread,numCallArgs,args){emAsmAddr>>>=0;callingThread>>>=0;args>>>=0;proxiedJSCallArgs.length=numCallArgs;var b=args>>>3;for(var i=0;i>>0]}var func=proxiedFunctionTable[funcIndex];PThread.currentProxiedOperationCallerThread=callingThread;var rtn=func(...proxiedJSCallArgs);PThread.currentProxiedOperationCallerThread=0;return rtn}function __emscripten_thread_cleanup(thread){thread>>>=0;if(!ENVIRONMENT_IS_PTHREAD)cleanupThread(thread);else postMessage({cmd:"cleanupThread",thread:thread})}function __emscripten_thread_set_strongref(thread){thread>>>=0}var warnOnce=text=>{warnOnce.shown||={};if(!warnOnce.shown[text]){warnOnce.shown[text]=1;err(text)}};var _emscripten_check_blocking_allowed=()=>{};var _emscripten_date_now=()=>Date.now();var runtimeKeepalivePush=()=>{runtimeKeepaliveCounter+=1};var _emscripten_exit_with_live_runtime=()=>{runtimeKeepalivePush();throw"unwind"};var getHeapMax=()=>4294901760;function _emscripten_get_heap_max(){return getHeapMax()}var _emscripten_get_now;_emscripten_get_now=()=>performance.timeOrigin+performance.now();var _emscripten_num_logical_cores=()=>navigator["hardwareConcurrency"];var growMemory=size=>{var b=wasmMemory.buffer;var pages=(size-b.byteLength+65535)/65536;try{wasmMemory.grow(pages);updateMemoryViews();return 1}catch(e){}};function _emscripten_resize_heap(requestedSize){requestedSize>>>=0;var oldSize=GROWABLE_HEAP_U8().length;if(requestedSize<=oldSize){return false}var maxHeapSize=getHeapMax();if(requestedSize>maxHeapSize){return false}var alignUp=(x,multiple)=>x+(multiple-x%multiple)%multiple;for(var cutDown=1;cutDown<=4;cutDown*=2){var overGrownHeapSize=oldSize*(1+.2/cutDown);overGrownHeapSize=Math.min(overGrownHeapSize,requestedSize+100663296);var newSize=Math.min(maxHeapSize,alignUp(Math.max(requestedSize,overGrownHeapSize),65536));var replacement=growMemory(newSize);if(replacement){return true}}return false}var ENV={};var getExecutableName=()=>thisProgram||"./this.program";var getEnvStrings=()=>{if(!getEnvStrings.strings){var lang=(typeof navigator=="object"&&navigator.languages&&navigator.languages[0]||"C").replace("-","_")+".UTF-8";var env={USER:"web_user",LOGNAME:"web_user",PATH:"/",PWD:"/",HOME:"/home/web_user",LANG:lang,_:getExecutableName()};for(var x in ENV){if(ENV[x]===undefined)delete env[x];else env[x]=ENV[x]}var strings=[];for(var x in env){strings.push(`${x}=${env[x]}`)}getEnvStrings.strings=strings}return getEnvStrings.strings};var stringToAscii=(str,buffer)=>{for(var i=0;i>>0]=str.charCodeAt(i)}GROWABLE_HEAP_I8()[buffer>>>0]=0};var _environ_get=function(__environ,environ_buf){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(3,0,1,__environ,environ_buf);__environ>>>=0;environ_buf>>>=0;var bufSize=0;getEnvStrings().forEach((string,i)=>{var ptr=environ_buf+bufSize;GROWABLE_HEAP_U32()[__environ+i*4>>>2>>>0]=ptr;stringToAscii(string,ptr);bufSize+=string.length+1});return 0};var _environ_sizes_get=function(penviron_count,penviron_buf_size){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(4,0,1,penviron_count,penviron_buf_size);penviron_count>>>=0;penviron_buf_size>>>=0;var strings=getEnvStrings();GROWABLE_HEAP_U32()[penviron_count>>>2>>>0]=strings.length;var bufSize=0;strings.forEach(string=>bufSize+=string.length+1);GROWABLE_HEAP_U32()[penviron_buf_size>>>2>>>0]=bufSize;return 0};var printCharBuffers=[null,[],[]];var printChar=(stream,curr)=>{var buffer=printCharBuffers[stream];if(curr===0||curr===10){(stream===1?out:err)(UTF8ArrayToString(buffer,0));buffer.length=0}else{buffer.push(curr)}};function _fd_write(fd,iov,iovcnt,pnum){if(ENVIRONMENT_IS_PTHREAD)return proxyToMainThread(5,0,1,fd,iov,iovcnt,pnum);iov>>>=0;iovcnt>>>=0;pnum>>>=0;var num=0;for(var i=0;i>>2>>>0];var len=GROWABLE_HEAP_U32()[iov+4>>>2>>>0];iov+=8;for(var j=0;j>>0])}num+=len}GROWABLE_HEAP_U32()[pnum>>>2>>>0]=num;return 0}function _pv_console_log_wasm(index){console.log(arrayBufferToStringAtIndex(Module.HEAPU8,index))}var _pv_file_close_wasm=async function(fileAddress,statusAddress){statusAddress=unsignedAddress(statusAddress);return Asyncify.handleAsync(async()=>{try{const file=await PvFile.getPtr(fileAddress);await file.close();setInt(statusAddress,0)}catch(e){console.error("pvFileCloseWasm",e);setInt(statusAddress,-1)}})};_pv_file_close_wasm.isAsync=true;var _pv_file_open_wasm=async function(fileAddress,pathAddress,modeAddress,statusAddress){pathAddress=unsignedAddress(pathAddress);modeAddress=unsignedAddress(modeAddress);statusAddress=unsignedAddress(statusAddress);return Asyncify.handleAsync(async()=>{const path=arrayBufferToStringAtIndex(Module.HEAPU8,pathAddress);const mode=arrayBufferToStringAtIndex(Module.HEAPU8,modeAddress);try{const file=await open(path,mode);PvFile.setPtr(fileAddress,file);setInt(statusAddress,0)}catch(e){if(e.name!=="FileNotExists"){console.error("pvFileOpenWasm",e)}setInt(statusAddress,-1)}})};_pv_file_open_wasm.isAsync=true;var _pv_file_read_wasm=async function(fileAddress,contentAddress,size,count,numReadAddress){contentAddress=unsignedAddress(contentAddress);numReadAddress=unsignedAddress(numReadAddress);return Asyncify.handleAsync(async()=>{try{const file=PvFile.getPtr(fileAddress);const content=await file.read(size,count);Module.HEAPU8.set(content,contentAddress);setInt(numReadAddress,content.length/size)}catch(e){console.error("pvFileReadWasm",e);setInt(numReadAddress,-1)}})};_pv_file_read_wasm.isAsync=true;function _pv_file_seek_wasm(fileAddress,offset,whence,statusAddress){statusAddress=unsignedAddress(statusAddress);try{const file=PvFile.getPtr(fileAddress);file.seek(offset,whence);setInt(statusAddress,0)}catch(e){console.error("pvFileSeekWasm",e);setInt(statusAddress,-1)}}function _pv_file_tell_wasm(fileAddress,offsetAddress){offsetAddress=unsignedAddress(offsetAddress);try{const file=PvFile.getPtr(fileAddress);setInt(offsetAddress,file.tell())}catch(e){console.error("pvFileTellWasm",e);setInt(offsetAddress,-1)}}var _pv_https_request_wasm=async function(httpMethodAddress,serverNameAddress,endpointAddress,headerAddress,bodyAddress,timeoutMs,responseAddressAddress,responseSizeAddress,responseCodeAddress){httpMethodAddress=unsignedAddress(httpMethodAddress);serverNameAddress=unsignedAddress(serverNameAddress);endpointAddress=unsignedAddress(endpointAddress);headerAddress=unsignedAddress(headerAddress);bodyAddress=unsignedAddress(bodyAddress);responseAddressAddress=unsignedAddress(responseAddressAddress);responseSizeAddress=unsignedAddress(responseSizeAddress);responseCodeAddress=unsignedAddress(responseCodeAddress);return Asyncify.handleAsync(async()=>{const httpMethod=arrayBufferToStringAtIndex(Module.HEAPU8,httpMethodAddress);const serverName=arrayBufferToStringAtIndex(Module.HEAPU8,serverNameAddress);const endpoint=arrayBufferToStringAtIndex(Module.HEAPU8,endpointAddress);const header=arrayBufferToStringAtIndex(Module.HEAPU8,headerAddress);const body=arrayBufferToStringAtIndex(Module.HEAPU8,bodyAddress);const headerObject=stringHeaderToObject(header);const options={method:httpMethod};if(body.length>0){options.body=body}if(Object.keys(headerObject).length>0){options.headers=headerObject}let response;let responseText;let statusCode;try{response=await fetchWithTimeout("https://"+serverName+endpoint,options,timeoutMs);statusCode=response.status}catch(error){console.error("pvHttpsRequestWasm",`Failed to fetch: ${error}`);return}try{responseText=await response.text()}catch(error){console.error("pvHttpsRequestWasm",`Failed to get response text: ${error}`);return}const responseAddress=Module._malloc((responseText.length+1)*Int8Array.BYTES_PER_ELEMENT);if(responseAddress===0){console.error("pvMallocError","pvHttpsRequestWasm: cannot allocate memory for response");setInt(responseAddressAddress,0);return}setInt(responseSizeAddress,responseText.length+1);setInt(responseAddressAddress,responseAddress);for(let i=0;inew Promise(resolve=>{setTimeout(()=>{resolve()},ms)}))};_pv_sleep_wasm.isAsync=true;function _pv_time_wasm(){return Date.now()/1e3}function xpu_cpu_support(){const functions=xpuCpuFunctions();if(typeof _pv_xpu_device_info_wasm!=="undefined")_pv_xpu_device_info_wasm=functions.pv_xpu_device_info_wasm;if(typeof _pv_xpu_get_max_workers_wasm!=="undefined")_pv_xpu_get_max_workers_wasm=functions.pv_xpu_get_max_workers_wasm}function _pv_xpu_device_info_wasm(){}function _pv_xpu_get_max_workers_wasm(){}function _pv_xpu_webgpu_device_cleanup_wasm(){}async function _pv_xpu_webgpu_device_info_wasm(){}_pv_xpu_webgpu_device_info_wasm.isAsync=true;async function _pv_xpu_webgpu_device_init_wasm(){}_pv_xpu_webgpu_device_init_wasm.isAsync=true;function _pv_xpu_webgpu_device_load_shader_func_wasm(){}function _pv_xpu_webgpu_device_mem_alloc_wasm(){}async function _pv_xpu_webgpu_device_mem_copy_from_xpu_wasm(){}_pv_xpu_webgpu_device_mem_copy_from_xpu_wasm.isAsync=true;function _pv_xpu_webgpu_device_mem_copy_to_xpu_wasm(){}function _pv_xpu_webgpu_device_mem_free_wasm(){}function _pv_xpu_webgpu_device_mem_memset_wasm(){}async function _pv_xpu_webgpu_device_wait_wasm(){}_pv_xpu_webgpu_device_wait_wasm.isAsync=true;var runAndAbortIfError=func=>{try{return func()}catch(e){abort(e)}};var sigToWasmTypes=sig=>{var typeNames={i:"i32",j:"i64",f:"f32",d:"f64",e:"externref",p:"i32"};var type={parameters:[],results:sig[0]=="v"?[]:[typeNames[sig[0]]]};for(var i=1;i{runtimeKeepaliveCounter-=1};var Asyncify={instrumentWasmImports(imports){var importPattern=/^(pv_https_request_wasm|pv_file_open_wasm|pv_file_close_wasm|pv_file_read_wasm|pv_file_write_wasm|pv_file_remove_wasm|pv_sleep_wasm|pv_xpu_webgpu_device_init_wasm|pv_xpu_webgpu_device_info_wasm|pv_xpu_webgpu_device_mem_copy_from_xpu_wasm|pv_xpu_webgpu_device_wait_wasm|pv_xpu_webgpu_timer_stop_wasm|invoke_.*|__asyncjs__.*)$/;for(let[x,original]of Object.entries(imports)){if(typeof original=="function"){let isAsyncifyImport=original.isAsync||importPattern.test(x)}}},instrumentWasmExports(exports){var ret={};for(let[x,original]of Object.entries(exports)){if(typeof original=="function"){ret[x]=(...args)=>{Asyncify.exportCallStack.push(x);try{return original(...args)}finally{if(!ABORT){var y=Asyncify.exportCallStack.pop();Asyncify.maybeStopUnwind()}}}}else{ret[x]=original}}return ret},State:{Normal:0,Unwinding:1,Rewinding:2,Disabled:3},state:0,StackSize:4096,currData:null,handleSleepReturnValue:0,exportCallStack:[],callStackNameToId:{},callStackIdToName:{},callStackId:0,asyncPromiseHandlers:null,sleepCallbacks:[],getCallStackId(funcName){var id=Asyncify.callStackNameToId[funcName];if(id===undefined){id=Asyncify.callStackId++;Asyncify.callStackNameToId[funcName]=id;Asyncify.callStackIdToName[id]=funcName}return id},maybeStopUnwind(){if(Asyncify.currData&&Asyncify.state===Asyncify.State.Unwinding&&Asyncify.exportCallStack.length===0){Asyncify.state=Asyncify.State.Normal;runtimeKeepalivePush();runAndAbortIfError(_asyncify_stop_unwind);if(typeof Fibers!="undefined"){Fibers.trampoline()}}},whenDone(){return new Promise((resolve,reject)=>{Asyncify.asyncPromiseHandlers={resolve:resolve,reject:reject}})},allocateData(){var ptr=_malloc(12+Asyncify.StackSize);Asyncify.setDataHeader(ptr,ptr+12,Asyncify.StackSize);Asyncify.setDataRewindFunc(ptr);return ptr},setDataHeader(ptr,stack,stackSize){GROWABLE_HEAP_U32()[ptr>>>2>>>0]=stack;GROWABLE_HEAP_U32()[ptr+4>>>2>>>0]=stack+stackSize},setDataRewindFunc(ptr){var bottomOfCallStack=Asyncify.exportCallStack[0];var rewindId=Asyncify.getCallStackId(bottomOfCallStack);GROWABLE_HEAP_I32()[ptr+8>>>2>>>0]=rewindId},getDataRewindFuncName(ptr){var id=GROWABLE_HEAP_I32()[ptr+8>>>2>>>0];var name=Asyncify.callStackIdToName[id];return name},getDataRewindFunc(name){var func=wasmExports[name];return func},doRewind(ptr){var name=Asyncify.getDataRewindFuncName(ptr);var func=Asyncify.getDataRewindFunc(name);runtimeKeepalivePop();return func()},handleSleep(startAsync){if(ABORT)return;if(Asyncify.state===Asyncify.State.Normal){var reachedCallback=false;var reachedAfterCallback=false;startAsync((handleSleepReturnValue=0)=>{if(ABORT)return;Asyncify.handleSleepReturnValue=handleSleepReturnValue;reachedCallback=true;if(!reachedAfterCallback){return}Asyncify.state=Asyncify.State.Rewinding;runAndAbortIfError(()=>_asyncify_start_rewind(Asyncify.currData));if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.resume()}var asyncWasmReturnValue,isError=false;try{asyncWasmReturnValue=Asyncify.doRewind(Asyncify.currData)}catch(err){asyncWasmReturnValue=err;isError=true}var handled=false;if(!Asyncify.currData){var asyncPromiseHandlers=Asyncify.asyncPromiseHandlers;if(asyncPromiseHandlers){Asyncify.asyncPromiseHandlers=null;(isError?asyncPromiseHandlers.reject:asyncPromiseHandlers.resolve)(asyncWasmReturnValue);handled=true}}if(isError&&!handled){throw asyncWasmReturnValue}});reachedAfterCallback=true;if(!reachedCallback){Asyncify.state=Asyncify.State.Unwinding;Asyncify.currData=Asyncify.allocateData();if(typeof Browser!="undefined"&&Browser.mainLoop.func){Browser.mainLoop.pause()}runAndAbortIfError(()=>_asyncify_start_unwind(Asyncify.currData))}}else if(Asyncify.state===Asyncify.State.Rewinding){Asyncify.state=Asyncify.State.Normal;runAndAbortIfError(_asyncify_stop_rewind);_free(Asyncify.currData);Asyncify.currData=null;Asyncify.sleepCallbacks.forEach(callUserCallback)}else{abort(`invalid state: ${Asyncify.state}`)}return Asyncify.handleSleepReturnValue},handleAsync(startAsync){return Asyncify.handleSleep(wakeUp=>{startAsync().then(wakeUp)})}};var uleb128Encode=(n,target)=>{if(n<128){target.push(n)}else{target.push(n%128|128,n>>7)}};var generateFuncType=(sig,target)=>{var sigRet=sig.slice(0,1);var sigParam=sig.slice(1);var typeCodes={i:127,p:127,j:126,f:125,d:124,e:111};target.push(96);uleb128Encode(sigParam.length,target);for(var i=0;i{if(typeof WebAssembly.Function=="function"){return new WebAssembly.Function(sigToWasmTypes(sig),func)}var typeSectionBody=[1];generateFuncType(sig,typeSectionBody);var bytes=[0,97,115,109,1,0,0,0,1];uleb128Encode(typeSectionBody.length,bytes);bytes.push(...typeSectionBody);bytes.push(2,7,1,1,101,1,102,0,0,7,5,1,1,102,0,0);var module=new WebAssembly.Module(new Uint8Array(bytes));var instance=new WebAssembly.Instance(module,{e:{f:func}});var wrappedFunc=instance.exports["f"];return wrappedFunc};var wasmTableMirror=[];var wasmTable;var getWasmTableEntry=funcPtr=>{var func=wasmTableMirror[funcPtr];if(!func){if(funcPtr>=wasmTableMirror.length)wasmTableMirror.length=funcPtr+1;wasmTableMirror[funcPtr]=func=wasmTable.get(funcPtr)}return func};var updateTableMap=(offset,count)=>{if(functionsInTableMap){for(var i=offset;i{if(!functionsInTableMap){functionsInTableMap=new WeakMap;updateTableMap(0,wasmTable.length)}return functionsInTableMap.get(func)||0};var freeTableIndexes=[];var getEmptyTableSlot=()=>{if(freeTableIndexes.length){return freeTableIndexes.pop()}try{wasmTable.grow(1)}catch(err){if(!(err instanceof RangeError)){throw err}throw"Unable to grow wasm table. Set ALLOW_TABLE_GROWTH."}return wasmTable.length-1};var setWasmTableEntry=(idx,func)=>{wasmTable.set(idx,func);wasmTableMirror[idx]=wasmTable.get(idx)};var addFunction=(func,sig)=>{var rtn=getFunctionAddress(func);if(rtn){return rtn}var ret=getEmptyTableSlot();try{setWasmTableEntry(ret,func)}catch(err){if(!(err instanceof TypeError)){throw err}var wrapped=convertJsFunctionToWasm(func,sig);setWasmTableEntry(ret,wrapped)}functionsInTableMap.set(func,ret);return ret};var getCFunc=ident=>{var func=Module["_"+ident];return func};var writeArrayToMemory=(array,buffer)=>{GROWABLE_HEAP_I8().set(array,buffer>>>0)};var lengthBytesUTF8=str=>{var len=0;for(var i=0;i=55296&&c<=57343){len+=4;++i}else{len+=3}}return len};var stringToUTF8Array=(str,heap,outIdx,maxBytesToWrite)=>{outIdx>>>=0;if(!(maxBytesToWrite>0))return 0;var startIdx=outIdx;var endIdx=outIdx+maxBytesToWrite-1;for(var i=0;i=55296&&u<=57343){var u1=str.charCodeAt(++i);u=65536+((u&1023)<<10)|u1&1023}if(u<=127){if(outIdx>=endIdx)break;heap[outIdx++>>>0]=u}else if(u<=2047){if(outIdx+1>=endIdx)break;heap[outIdx++>>>0]=192|u>>6;heap[outIdx++>>>0]=128|u&63}else if(u<=65535){if(outIdx+2>=endIdx)break;heap[outIdx++>>>0]=224|u>>12;heap[outIdx++>>>0]=128|u>>6&63;heap[outIdx++>>>0]=128|u&63}else{if(outIdx+3>=endIdx)break;heap[outIdx++>>>0]=240|u>>18;heap[outIdx++>>>0]=128|u>>12&63;heap[outIdx++>>>0]=128|u>>6&63;heap[outIdx++>>>0]=128|u&63}}heap[outIdx>>>0]=0;return outIdx-startIdx};var stringToUTF8=(str,outPtr,maxBytesToWrite)=>stringToUTF8Array(str,GROWABLE_HEAP_U8(),outPtr,maxBytesToWrite);var stringToUTF8OnStack=str=>{var size=lengthBytesUTF8(str)+1;var ret=stackAlloc(size);stringToUTF8(str,ret,size);return ret};var ccall=(ident,returnType,argTypes,args,opts)=>{var toC={string:str=>{var ret=0;if(str!==null&&str!==undefined&&str!==0){ret=stringToUTF8OnStack(str)}return ret},array:arr=>{var ret=stackAlloc(arr.length);writeArrayToMemory(arr,ret);return ret}};function convertReturnValue(ret){if(returnType==="string"){return UTF8ToString(ret)}if(returnType==="boolean")return Boolean(ret);return ret}var func=getCFunc(ident);var cArgs=[];var stack=0;if(args){for(var i=0;i{var numericArgs=!argTypes||argTypes.every(type=>type==="number"||type==="boolean");var numericRet=returnType!=="string";if(numericRet&&numericArgs&&!opts){return getCFunc(ident)}return(...args)=>ccall(ident,returnType,argTypes,args,opts)};Module["cwrap"]=cwrap;PThread.init();xpu_webgpu_support();xpu_cpu_support();var proxiedFunctionTable=[_proc_exit,exitOnMainThread,pthreadCreateProxied,_environ_get,_environ_sizes_get,_fd_write];var wasmImports;function assignWasmImports(){wasmImports={__assert_fail:___assert_fail,__pthread_create_js:___pthread_create_js,_abort_js:__abort_js,_emscripten_get_now_is_monotonic:__emscripten_get_now_is_monotonic,_emscripten_init_main_thread_js:__emscripten_init_main_thread_js,_emscripten_notify_mailbox_postmessage:__emscripten_notify_mailbox_postmessage,_emscripten_receive_on_main_thread_js:__emscripten_receive_on_main_thread_js,_emscripten_thread_cleanup:__emscripten_thread_cleanup,_emscripten_thread_mailbox_await:__emscripten_thread_mailbox_await,_emscripten_thread_set_strongref:__emscripten_thread_set_strongref,emscripten_check_blocking_allowed:_emscripten_check_blocking_allowed,emscripten_date_now:_emscripten_date_now,emscripten_exit_with_live_runtime:_emscripten_exit_with_live_runtime,emscripten_get_heap_max:_emscripten_get_heap_max,emscripten_get_now:_emscripten_get_now,emscripten_num_logical_cores:_emscripten_num_logical_cores,emscripten_resize_heap:_emscripten_resize_heap,environ_get:_environ_get,environ_sizes_get:_environ_sizes_get,exit:_exit,fd_write:_fd_write,memory:wasmMemory,pv_console_log_wasm:_pv_console_log_wasm,pv_file_close_wasm:_pv_file_close_wasm,pv_file_open_wasm:_pv_file_open_wasm,pv_file_read_wasm:_pv_file_read_wasm,pv_file_seek_wasm:_pv_file_seek_wasm,pv_file_tell_wasm:_pv_file_tell_wasm,pv_https_request_wasm:_pv_https_request_wasm,pv_picollm_attention_dot_product_webgpu_wasm:_pv_picollm_attention_dot_product_webgpu_wasm,pv_picollm_attention_encode_webgpu_wasm:_pv_picollm_attention_encode_webgpu_wasm,pv_picollm_attention_fir_webgpu_wasm:_pv_picollm_attention_fir_webgpu_wasm,pv_picollm_attention_precompute_encoding_webgpu_wasm:_pv_picollm_attention_precompute_encoding_webgpu_wasm,pv_picollm_attention_softmax_webgpu_wasm:_pv_picollm_attention_softmax_webgpu_wasm,pv_picollm_attention_transpose_query_webgpu_wasm:_pv_picollm_attention_transpose_query_webgpu_wasm,pv_picollm_attention_update_kv_webgpu_wasm:_pv_picollm_attention_update_kv_webgpu_wasm,pv_picollm_feed_forward_almost_gelu_webgpu_wasm:_pv_picollm_feed_forward_almost_gelu_webgpu_wasm,pv_picollm_feed_forward_gelu_webgpu_wasm:_pv_picollm_feed_forward_gelu_webgpu_wasm,pv_picollm_feed_forward_multiply_buffers_webgpu_wasm:_pv_picollm_feed_forward_multiply_buffers_webgpu_wasm,pv_picollm_feed_forward_silu_webgpu_wasm:_pv_picollm_feed_forward_silu_webgpu_wasm,pv_picollm_gate_forward_webgpu_wasm:_pv_picollm_gate_forward_webgpu_wasm,pv_picollm_moe_transformer_add_buffers_webgpu_wasm:_pv_picollm_moe_transformer_add_buffers_webgpu_wasm,pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm:_pv_picollm_moe_transformer_add_to_buffer_webgpu_wasm,pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm:_pv_picollm_moe_transformer_multiply_weight_and_add_to_buffer_webgpu_wasm,pv_picollm_norm_forward_webgpu_wasm:_pv_picollm_norm_forward_webgpu_wasm,pv_picollm_norm_layer_forward_webgpu_wasm:_pv_picollm_norm_layer_forward_webgpu_wasm,pv_picollm_transformer_add_buffers_webgpu_wasm:_pv_picollm_transformer_add_buffers_webgpu_wasm,pv_picollm_transformer_add_to_buffer_webgpu_wasm:_pv_picollm_transformer_add_to_buffer_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_add_bias_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_x_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_multiple_shuffle_y_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_multiple_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_single_reduce_y_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_single_shuffle_x_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_forward_single_webgpu_wasm,pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm:_pv_picollm_weight_block_mixed_16x8_preprocess_blocks_webgpu_wasm,pv_picollm_weight_float_forward_webgpu_wasm:_pv_picollm_weight_float_forward_webgpu_wasm,pv_sleep_wasm:_pv_sleep_wasm,pv_time_wasm:_pv_time_wasm,pv_xpu_device_info_wasm:_pv_xpu_device_info_wasm,pv_xpu_get_max_workers_wasm:_pv_xpu_get_max_workers_wasm,pv_xpu_webgpu_device_cleanup_wasm:_pv_xpu_webgpu_device_cleanup_wasm,pv_xpu_webgpu_device_info_wasm:_pv_xpu_webgpu_device_info_wasm,pv_xpu_webgpu_device_init_wasm:_pv_xpu_webgpu_device_init_wasm,pv_xpu_webgpu_device_load_shader_func_wasm:_pv_xpu_webgpu_device_load_shader_func_wasm,pv_xpu_webgpu_device_mem_alloc_wasm:_pv_xpu_webgpu_device_mem_alloc_wasm,pv_xpu_webgpu_device_mem_copy_from_xpu_wasm:_pv_xpu_webgpu_device_mem_copy_from_xpu_wasm,pv_xpu_webgpu_device_mem_copy_to_xpu_wasm:_pv_xpu_webgpu_device_mem_copy_to_xpu_wasm,pv_xpu_webgpu_device_mem_free_wasm:_pv_xpu_webgpu_device_mem_free_wasm,pv_xpu_webgpu_device_mem_memset_wasm:_pv_xpu_webgpu_device_mem_memset_wasm,pv_xpu_webgpu_device_wait_wasm:_pv_xpu_webgpu_device_wait_wasm}}var wasmExports=createWasm();var ___wasm_call_ctors=()=>(___wasm_call_ctors=wasmExports["__wasm_call_ctors"])();var __Znwm=Module["__Znwm"]=a0=>(__Znwm=Module["__Znwm"]=wasmExports["_Znwm"])(a0);var __Znam=Module["__Znam"]=a0=>(__Znam=Module["__Znam"]=wasmExports["_Znam"])(a0);var __ZdlPv=Module["__ZdlPv"]=a0=>(__ZdlPv=Module["__ZdlPv"]=wasmExports["_ZdlPv"])(a0);var __ZdlPvm=Module["__ZdlPvm"]=(a0,a1)=>(__ZdlPvm=Module["__ZdlPvm"]=wasmExports["_ZdlPvm"])(a0,a1);var __ZdaPv=Module["__ZdaPv"]=a0=>(__ZdaPv=Module["__ZdaPv"]=wasmExports["_ZdaPv"])(a0);var __ZdaPvm=Module["__ZdaPvm"]=(a0,a1)=>(__ZdaPvm=Module["__ZdaPvm"]=wasmExports["_ZdaPvm"])(a0,a1);var __ZnwmSt11align_val_t=Module["__ZnwmSt11align_val_t"]=(a0,a1)=>(__ZnwmSt11align_val_t=Module["__ZnwmSt11align_val_t"]=wasmExports["_ZnwmSt11align_val_t"])(a0,a1);var __ZnamSt11align_val_t=Module["__ZnamSt11align_val_t"]=(a0,a1)=>(__ZnamSt11align_val_t=Module["__ZnamSt11align_val_t"]=wasmExports["_ZnamSt11align_val_t"])(a0,a1);var __ZdlPvSt11align_val_t=Module["__ZdlPvSt11align_val_t"]=(a0,a1)=>(__ZdlPvSt11align_val_t=Module["__ZdlPvSt11align_val_t"]=wasmExports["_ZdlPvSt11align_val_t"])(a0,a1);var __ZdlPvmSt11align_val_t=Module["__ZdlPvmSt11align_val_t"]=(a0,a1,a2)=>(__ZdlPvmSt11align_val_t=Module["__ZdlPvmSt11align_val_t"]=wasmExports["_ZdlPvmSt11align_val_t"])(a0,a1,a2);var __ZdaPvSt11align_val_t=Module["__ZdaPvSt11align_val_t"]=(a0,a1)=>(__ZdaPvSt11align_val_t=Module["__ZdaPvSt11align_val_t"]=wasmExports["_ZdaPvSt11align_val_t"])(a0,a1);var __ZdaPvmSt11align_val_t=Module["__ZdaPvmSt11align_val_t"]=(a0,a1,a2)=>(__ZdaPvmSt11align_val_t=Module["__ZdaPvmSt11align_val_t"]=wasmExports["_ZdaPvmSt11align_val_t"])(a0,a1,a2);var __ZSt14set_unexpectedPFvvE=Module["__ZSt14set_unexpectedPFvvE"]=a0=>(__ZSt14set_unexpectedPFvvE=Module["__ZSt14set_unexpectedPFvvE"]=wasmExports["_ZSt14set_unexpectedPFvvE"])(a0);var __ZSt13set_terminatePFvvE=Module["__ZSt13set_terminatePFvvE"]=a0=>(__ZSt13set_terminatePFvvE=Module["__ZSt13set_terminatePFvvE"]=wasmExports["_ZSt13set_terminatePFvvE"])(a0);var __ZSt15set_new_handlerPFvvE=Module["__ZSt15set_new_handlerPFvvE"]=a0=>(__ZSt15set_new_handlerPFvvE=Module["__ZSt15set_new_handlerPFvvE"]=wasmExports["_ZSt15set_new_handlerPFvvE"])(a0);var __ZSt14get_unexpectedv=Module["__ZSt14get_unexpectedv"]=()=>(__ZSt14get_unexpectedv=Module["__ZSt14get_unexpectedv"]=wasmExports["_ZSt14get_unexpectedv"])();var __ZSt10unexpectedv=Module["__ZSt10unexpectedv"]=()=>(__ZSt10unexpectedv=Module["__ZSt10unexpectedv"]=wasmExports["_ZSt10unexpectedv"])();var __ZSt13get_terminatev=Module["__ZSt13get_terminatev"]=()=>(__ZSt13get_terminatev=Module["__ZSt13get_terminatev"]=wasmExports["_ZSt13get_terminatev"])();var __ZSt9terminatev=Module["__ZSt9terminatev"]=()=>(__ZSt9terminatev=Module["__ZSt9terminatev"]=wasmExports["_ZSt9terminatev"])();var __ZSt15get_new_handlerv=Module["__ZSt15get_new_handlerv"]=()=>(__ZSt15get_new_handlerv=Module["__ZSt15get_new_handlerv"]=wasmExports["_ZSt15get_new_handlerv"])();var ___cxa_pure_virtual=Module["___cxa_pure_virtual"]=()=>(___cxa_pure_virtual=Module["___cxa_pure_virtual"]=wasmExports["__cxa_pure_virtual"])();var ___cxa_deleted_virtual=Module["___cxa_deleted_virtual"]=()=>(___cxa_deleted_virtual=Module["___cxa_deleted_virtual"]=wasmExports["__cxa_deleted_virtual"])();var ___dynamic_cast=Module["___dynamic_cast"]=(a0,a1,a2,a3)=>(___dynamic_cast=Module["___dynamic_cast"]=wasmExports["__dynamic_cast"])(a0,a1,a2,a3);var __ZNSt9exceptionD2Ev=Module["__ZNSt9exceptionD2Ev"]=a0=>(__ZNSt9exceptionD2Ev=Module["__ZNSt9exceptionD2Ev"]=wasmExports["_ZNSt9exceptionD2Ev"])(a0);var __ZNSt9exceptionD0Ev=Module["__ZNSt9exceptionD0Ev"]=a0=>(__ZNSt9exceptionD0Ev=Module["__ZNSt9exceptionD0Ev"]=wasmExports["_ZNSt9exceptionD0Ev"])(a0);var __ZNSt9exceptionD1Ev=Module["__ZNSt9exceptionD1Ev"]=a0=>(__ZNSt9exceptionD1Ev=Module["__ZNSt9exceptionD1Ev"]=wasmExports["_ZNSt9exceptionD1Ev"])(a0);var __ZNKSt9exception4whatEv=Module["__ZNKSt9exception4whatEv"]=a0=>(__ZNKSt9exception4whatEv=Module["__ZNKSt9exception4whatEv"]=wasmExports["_ZNKSt9exception4whatEv"])(a0);var __ZNSt13bad_exceptionD0Ev=Module["__ZNSt13bad_exceptionD0Ev"]=a0=>(__ZNSt13bad_exceptionD0Ev=Module["__ZNSt13bad_exceptionD0Ev"]=wasmExports["_ZNSt13bad_exceptionD0Ev"])(a0);var __ZNSt13bad_exceptionD1Ev=Module["__ZNSt13bad_exceptionD1Ev"]=a0=>(__ZNSt13bad_exceptionD1Ev=Module["__ZNSt13bad_exceptionD1Ev"]=wasmExports["_ZNSt13bad_exceptionD1Ev"])(a0);var __ZNKSt13bad_exception4whatEv=Module["__ZNKSt13bad_exception4whatEv"]=a0=>(__ZNKSt13bad_exception4whatEv=Module["__ZNKSt13bad_exception4whatEv"]=wasmExports["_ZNKSt13bad_exception4whatEv"])(a0);var __ZNSt9bad_allocC2Ev=Module["__ZNSt9bad_allocC2Ev"]=a0=>(__ZNSt9bad_allocC2Ev=Module["__ZNSt9bad_allocC2Ev"]=wasmExports["_ZNSt9bad_allocC2Ev"])(a0);var __ZNSt9bad_allocD0Ev=Module["__ZNSt9bad_allocD0Ev"]=a0=>(__ZNSt9bad_allocD0Ev=Module["__ZNSt9bad_allocD0Ev"]=wasmExports["_ZNSt9bad_allocD0Ev"])(a0);var __ZNSt9bad_allocD1Ev=Module["__ZNSt9bad_allocD1Ev"]=a0=>(__ZNSt9bad_allocD1Ev=Module["__ZNSt9bad_allocD1Ev"]=wasmExports["_ZNSt9bad_allocD1Ev"])(a0);var __ZNKSt9bad_alloc4whatEv=Module["__ZNKSt9bad_alloc4whatEv"]=a0=>(__ZNKSt9bad_alloc4whatEv=Module["__ZNKSt9bad_alloc4whatEv"]=wasmExports["_ZNKSt9bad_alloc4whatEv"])(a0);var __ZNSt20bad_array_new_lengthC2Ev=Module["__ZNSt20bad_array_new_lengthC2Ev"]=a0=>(__ZNSt20bad_array_new_lengthC2Ev=Module["__ZNSt20bad_array_new_lengthC2Ev"]=wasmExports["_ZNSt20bad_array_new_lengthC2Ev"])(a0);var __ZNSt20bad_array_new_lengthD0Ev=Module["__ZNSt20bad_array_new_lengthD0Ev"]=a0=>(__ZNSt20bad_array_new_lengthD0Ev=Module["__ZNSt20bad_array_new_lengthD0Ev"]=wasmExports["_ZNSt20bad_array_new_lengthD0Ev"])(a0);var __ZNSt20bad_array_new_lengthD1Ev=Module["__ZNSt20bad_array_new_lengthD1Ev"]=a0=>(__ZNSt20bad_array_new_lengthD1Ev=Module["__ZNSt20bad_array_new_lengthD1Ev"]=wasmExports["_ZNSt20bad_array_new_lengthD1Ev"])(a0);var __ZNKSt20bad_array_new_length4whatEv=Module["__ZNKSt20bad_array_new_length4whatEv"]=a0=>(__ZNKSt20bad_array_new_length4whatEv=Module["__ZNKSt20bad_array_new_length4whatEv"]=wasmExports["_ZNKSt20bad_array_new_length4whatEv"])(a0);var __ZNSt13bad_exceptionD2Ev=Module["__ZNSt13bad_exceptionD2Ev"]=a0=>(__ZNSt13bad_exceptionD2Ev=Module["__ZNSt13bad_exceptionD2Ev"]=wasmExports["_ZNSt13bad_exceptionD2Ev"])(a0);var __ZNSt9bad_allocC1Ev=Module["__ZNSt9bad_allocC1Ev"]=a0=>(__ZNSt9bad_allocC1Ev=Module["__ZNSt9bad_allocC1Ev"]=wasmExports["_ZNSt9bad_allocC1Ev"])(a0);var __ZNSt9bad_allocD2Ev=Module["__ZNSt9bad_allocD2Ev"]=a0=>(__ZNSt9bad_allocD2Ev=Module["__ZNSt9bad_allocD2Ev"]=wasmExports["_ZNSt9bad_allocD2Ev"])(a0);var __ZNSt20bad_array_new_lengthC1Ev=Module["__ZNSt20bad_array_new_lengthC1Ev"]=a0=>(__ZNSt20bad_array_new_lengthC1Ev=Module["__ZNSt20bad_array_new_lengthC1Ev"]=wasmExports["_ZNSt20bad_array_new_lengthC1Ev"])(a0);var __ZNSt20bad_array_new_lengthD2Ev=Module["__ZNSt20bad_array_new_lengthD2Ev"]=a0=>(__ZNSt20bad_array_new_lengthD2Ev=Module["__ZNSt20bad_array_new_lengthD2Ev"]=wasmExports["_ZNSt20bad_array_new_lengthD2Ev"])(a0);var __ZNSt9type_infoD2Ev=Module["__ZNSt9type_infoD2Ev"]=a0=>(__ZNSt9type_infoD2Ev=Module["__ZNSt9type_infoD2Ev"]=wasmExports["_ZNSt9type_infoD2Ev"])(a0);var __ZNSt9type_infoD0Ev=Module["__ZNSt9type_infoD0Ev"]=a0=>(__ZNSt9type_infoD0Ev=Module["__ZNSt9type_infoD0Ev"]=wasmExports["_ZNSt9type_infoD0Ev"])(a0);var __ZNSt9type_infoD1Ev=Module["__ZNSt9type_infoD1Ev"]=a0=>(__ZNSt9type_infoD1Ev=Module["__ZNSt9type_infoD1Ev"]=wasmExports["_ZNSt9type_infoD1Ev"])(a0);var __ZNSt8bad_castC2Ev=Module["__ZNSt8bad_castC2Ev"]=a0=>(__ZNSt8bad_castC2Ev=Module["__ZNSt8bad_castC2Ev"]=wasmExports["_ZNSt8bad_castC2Ev"])(a0);var __ZNSt8bad_castD2Ev=Module["__ZNSt8bad_castD2Ev"]=a0=>(__ZNSt8bad_castD2Ev=Module["__ZNSt8bad_castD2Ev"]=wasmExports["_ZNSt8bad_castD2Ev"])(a0);var __ZNSt8bad_castD0Ev=Module["__ZNSt8bad_castD0Ev"]=a0=>(__ZNSt8bad_castD0Ev=Module["__ZNSt8bad_castD0Ev"]=wasmExports["_ZNSt8bad_castD0Ev"])(a0);var __ZNSt8bad_castD1Ev=Module["__ZNSt8bad_castD1Ev"]=a0=>(__ZNSt8bad_castD1Ev=Module["__ZNSt8bad_castD1Ev"]=wasmExports["_ZNSt8bad_castD1Ev"])(a0);var __ZNKSt8bad_cast4whatEv=Module["__ZNKSt8bad_cast4whatEv"]=a0=>(__ZNKSt8bad_cast4whatEv=Module["__ZNKSt8bad_cast4whatEv"]=wasmExports["_ZNKSt8bad_cast4whatEv"])(a0);var __ZNSt10bad_typeidC2Ev=Module["__ZNSt10bad_typeidC2Ev"]=a0=>(__ZNSt10bad_typeidC2Ev=Module["__ZNSt10bad_typeidC2Ev"]=wasmExports["_ZNSt10bad_typeidC2Ev"])(a0);var __ZNSt10bad_typeidD2Ev=Module["__ZNSt10bad_typeidD2Ev"]=a0=>(__ZNSt10bad_typeidD2Ev=Module["__ZNSt10bad_typeidD2Ev"]=wasmExports["_ZNSt10bad_typeidD2Ev"])(a0);var __ZNSt10bad_typeidD0Ev=Module["__ZNSt10bad_typeidD0Ev"]=a0=>(__ZNSt10bad_typeidD0Ev=Module["__ZNSt10bad_typeidD0Ev"]=wasmExports["_ZNSt10bad_typeidD0Ev"])(a0);var __ZNSt10bad_typeidD1Ev=Module["__ZNSt10bad_typeidD1Ev"]=a0=>(__ZNSt10bad_typeidD1Ev=Module["__ZNSt10bad_typeidD1Ev"]=wasmExports["_ZNSt10bad_typeidD1Ev"])(a0);var __ZNKSt10bad_typeid4whatEv=Module["__ZNKSt10bad_typeid4whatEv"]=a0=>(__ZNKSt10bad_typeid4whatEv=Module["__ZNKSt10bad_typeid4whatEv"]=wasmExports["_ZNKSt10bad_typeid4whatEv"])(a0);var __ZNSt8bad_castC1Ev=Module["__ZNSt8bad_castC1Ev"]=a0=>(__ZNSt8bad_castC1Ev=Module["__ZNSt8bad_castC1Ev"]=wasmExports["_ZNSt8bad_castC1Ev"])(a0);var __ZNSt10bad_typeidC1Ev=Module["__ZNSt10bad_typeidC1Ev"]=a0=>(__ZNSt10bad_typeidC1Ev=Module["__ZNSt10bad_typeidC1Ev"]=wasmExports["_ZNSt10bad_typeidC1Ev"])(a0);var _pv_picollm_delete=Module["_pv_picollm_delete"]=a0=>(_pv_picollm_delete=Module["_pv_picollm_delete"]=wasmExports["pv_picollm_delete"])(a0);var _free=a0=>(_free=wasmExports["free"])(a0);var _malloc=Module["_malloc"]=a0=>(_malloc=Module["_malloc"]=wasmExports["malloc"])(a0);var _pv_picollm_version=Module["_pv_picollm_version"]=()=>(_pv_picollm_version=Module["_pv_picollm_version"]=wasmExports["pv_picollm_version"])();var _pv_picollm_init=Module["_pv_picollm_init"]=(a0,a1,a2,a3)=>(_pv_picollm_init=Module["_pv_picollm_init"]=wasmExports["pv_picollm_init"])(a0,a1,a2,a3);var _pv_picollm_generate=Module["_pv_picollm_generate"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17)=>(_pv_picollm_generate=Module["_pv_picollm_generate"]=wasmExports["pv_picollm_generate"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12,a13,a14,a15,a16,a17);var _pv_picollm_delete_completion_tokens=Module["_pv_picollm_delete_completion_tokens"]=(a0,a1)=>(_pv_picollm_delete_completion_tokens=Module["_pv_picollm_delete_completion_tokens"]=wasmExports["pv_picollm_delete_completion_tokens"])(a0,a1);var _pv_picollm_delete_completion=Module["_pv_picollm_delete_completion"]=a0=>(_pv_picollm_delete_completion=Module["_pv_picollm_delete_completion"]=wasmExports["pv_picollm_delete_completion"])(a0);var _pv_picollm_interrupt=Module["_pv_picollm_interrupt"]=a0=>(_pv_picollm_interrupt=Module["_pv_picollm_interrupt"]=wasmExports["pv_picollm_interrupt"])(a0);var _pv_picollm_tokenize=Module["_pv_picollm_tokenize"]=(a0,a1,a2,a3,a4,a5)=>(_pv_picollm_tokenize=Module["_pv_picollm_tokenize"]=wasmExports["pv_picollm_tokenize"])(a0,a1,a2,a3,a4,a5);var _pv_picollm_delete_tokens=Module["_pv_picollm_delete_tokens"]=a0=>(_pv_picollm_delete_tokens=Module["_pv_picollm_delete_tokens"]=wasmExports["pv_picollm_delete_tokens"])(a0);var _pv_picollm_forward=Module["_pv_picollm_forward"]=(a0,a1,a2,a3)=>(_pv_picollm_forward=Module["_pv_picollm_forward"]=wasmExports["pv_picollm_forward"])(a0,a1,a2,a3);var _pv_picollm_delete_logits=Module["_pv_picollm_delete_logits"]=a0=>(_pv_picollm_delete_logits=Module["_pv_picollm_delete_logits"]=wasmExports["pv_picollm_delete_logits"])(a0);var _pv_picollm_reset=Module["_pv_picollm_reset"]=a0=>(_pv_picollm_reset=Module["_pv_picollm_reset"]=wasmExports["pv_picollm_reset"])(a0);var _pv_picollm_model=Module["_pv_picollm_model"]=(a0,a1)=>(_pv_picollm_model=Module["_pv_picollm_model"]=wasmExports["pv_picollm_model"])(a0,a1);var _pv_picollm_context_length=Module["_pv_picollm_context_length"]=(a0,a1)=>(_pv_picollm_context_length=Module["_pv_picollm_context_length"]=wasmExports["pv_picollm_context_length"])(a0,a1);var _pv_picollm_max_top_choices=Module["_pv_picollm_max_top_choices"]=()=>(_pv_picollm_max_top_choices=Module["_pv_picollm_max_top_choices"]=wasmExports["pv_picollm_max_top_choices"])();var _pv_picollm_list_hardware_devices=Module["_pv_picollm_list_hardware_devices"]=(a0,a1)=>(_pv_picollm_list_hardware_devices=Module["_pv_picollm_list_hardware_devices"]=wasmExports["pv_picollm_list_hardware_devices"])(a0,a1);var _pv_picollm_free_hardware_devices=Module["_pv_picollm_free_hardware_devices"]=(a0,a1)=>(_pv_picollm_free_hardware_devices=Module["_pv_picollm_free_hardware_devices"]=wasmExports["pv_picollm_free_hardware_devices"])(a0,a1);var _pv_picollm_table_forward=Module["_pv_picollm_table_forward"]=(a0,a1,a2,a3)=>(_pv_picollm_table_forward=Module["_pv_picollm_table_forward"]=wasmExports["pv_picollm_table_forward"])(a0,a1,a2,a3);var _pv_log_enable=Module["_pv_log_enable"]=()=>(_pv_log_enable=Module["_pv_log_enable"]=wasmExports["pv_log_enable"])();var _pv_log_disable=Module["_pv_log_disable"]=()=>(_pv_log_disable=Module["_pv_log_disable"]=wasmExports["pv_log_disable"])();var _aligned_alloc=Module["_aligned_alloc"]=(a0,a1)=>(_aligned_alloc=Module["_aligned_alloc"]=wasmExports["aligned_alloc"])(a0,a1);var _pv_sample_rate=Module["_pv_sample_rate"]=()=>(_pv_sample_rate=Module["_pv_sample_rate"]=wasmExports["pv_sample_rate"])();var _pv_status_to_string=Module["_pv_status_to_string"]=a0=>(_pv_status_to_string=Module["_pv_status_to_string"]=wasmExports["pv_status_to_string"])(a0);var _pv_set_sdk=Module["_pv_set_sdk"]=a0=>(_pv_set_sdk=Module["_pv_set_sdk"]=wasmExports["pv_set_sdk"])(a0);var _pv_get_sdk=Module["_pv_get_sdk"]=()=>(_pv_get_sdk=Module["_pv_get_sdk"]=wasmExports["pv_get_sdk"])();var _pv_free=Module["_pv_free"]=a0=>(_pv_free=Module["_pv_free"]=wasmExports["pv_free"])(a0);var _pv_get_error_stack=Module["_pv_get_error_stack"]=(a0,a1)=>(_pv_get_error_stack=Module["_pv_get_error_stack"]=wasmExports["pv_get_error_stack"])(a0,a1);var _pv_free_error_stack=Module["_pv_free_error_stack"]=a0=>(_pv_free_error_stack=Module["_pv_free_error_stack"]=wasmExports["pv_free_error_stack"])(a0);var __emscripten_tls_init=()=>(__emscripten_tls_init=wasmExports["_emscripten_tls_init"])();var _pthread_self=()=>(_pthread_self=wasmExports["pthread_self"])();var __emscripten_memcpy_bulkmem=Module["__emscripten_memcpy_bulkmem"]=(a0,a1,a2)=>(__emscripten_memcpy_bulkmem=Module["__emscripten_memcpy_bulkmem"]=wasmExports["_emscripten_memcpy_bulkmem"])(a0,a1,a2);var __emscripten_memset_bulkmem=Module["__emscripten_memset_bulkmem"]=(a0,a1,a2)=>(__emscripten_memset_bulkmem=Module["__emscripten_memset_bulkmem"]=wasmExports["_emscripten_memset_bulkmem"])(a0,a1,a2);var ___get_tp=Module["___get_tp"]=()=>(___get_tp=Module["___get_tp"]=wasmExports["__get_tp"])();var __emscripten_thread_supports_atomics_wait=Module["__emscripten_thread_supports_atomics_wait"]=()=>(__emscripten_thread_supports_atomics_wait=Module["__emscripten_thread_supports_atomics_wait"]=wasmExports["_emscripten_thread_supports_atomics_wait"])();var __emscripten_thread_init=(a0,a1,a2,a3,a4,a5)=>(__emscripten_thread_init=wasmExports["_emscripten_thread_init"])(a0,a1,a2,a3,a4,a5);var ___set_thread_state=Module["___set_thread_state"]=(a0,a1,a2,a3)=>(___set_thread_state=Module["___set_thread_state"]=wasmExports["__set_thread_state"])(a0,a1,a2,a3);var _emscripten_is_main_runtime_thread=Module["_emscripten_is_main_runtime_thread"]=()=>(_emscripten_is_main_runtime_thread=Module["_emscripten_is_main_runtime_thread"]=wasmExports["emscripten_is_main_runtime_thread"])();var _emscripten_is_main_browser_thread=Module["_emscripten_is_main_browser_thread"]=()=>(_emscripten_is_main_browser_thread=Module["_emscripten_is_main_browser_thread"]=wasmExports["emscripten_is_main_browser_thread"])();var __emscripten_thread_crashed=()=>(__emscripten_thread_crashed=wasmExports["_emscripten_thread_crashed"])();var _emscripten_main_thread_process_queued_calls=()=>(_emscripten_main_thread_process_queued_calls=wasmExports["emscripten_main_thread_process_queued_calls"])();var _emscripten_main_runtime_thread_id=()=>(_emscripten_main_runtime_thread_id=wasmExports["emscripten_main_runtime_thread_id"])();var _emscripten_stack_get_base=Module["_emscripten_stack_get_base"]=()=>(_emscripten_stack_get_base=Module["_emscripten_stack_get_base"]=wasmExports["emscripten_stack_get_base"])();var _emscripten_stack_get_end=Module["_emscripten_stack_get_end"]=()=>(_emscripten_stack_get_end=Module["_emscripten_stack_get_end"]=wasmExports["emscripten_stack_get_end"])();var __emscripten_run_on_main_thread_js=(a0,a1,a2,a3,a4)=>(__emscripten_run_on_main_thread_js=wasmExports["_emscripten_run_on_main_thread_js"])(a0,a1,a2,a3,a4);var __emscripten_thread_free_data=a0=>(__emscripten_thread_free_data=wasmExports["_emscripten_thread_free_data"])(a0);var __emscripten_thread_exit=a0=>(__emscripten_thread_exit=wasmExports["_emscripten_thread_exit"])(a0);var __emscripten_check_mailbox=()=>(__emscripten_check_mailbox=wasmExports["_emscripten_check_mailbox"])();var __emscripten_tempret_set=a0=>(__emscripten_tempret_set=wasmExports["_emscripten_tempret_set"])(a0);var __emscripten_tempret_get=()=>(__emscripten_tempret_get=wasmExports["_emscripten_tempret_get"])();var _getTempRet0=Module["_getTempRet0"]=()=>(_getTempRet0=Module["_getTempRet0"]=wasmExports["getTempRet0"])();var _setTempRet0=Module["_setTempRet0"]=a0=>(_setTempRet0=Module["_setTempRet0"]=wasmExports["setTempRet0"])(a0);var _emscripten_stack_init=Module["_emscripten_stack_init"]=()=>(_emscripten_stack_init=Module["_emscripten_stack_init"]=wasmExports["emscripten_stack_init"])();var _emscripten_stack_set_limits=(a0,a1)=>(_emscripten_stack_set_limits=wasmExports["emscripten_stack_set_limits"])(a0,a1);var _emscripten_stack_get_free=Module["_emscripten_stack_get_free"]=()=>(_emscripten_stack_get_free=Module["_emscripten_stack_get_free"]=wasmExports["emscripten_stack_get_free"])();var __emscripten_wasm_worker_initialize=Module["__emscripten_wasm_worker_initialize"]=(a0,a1)=>(__emscripten_wasm_worker_initialize=Module["__emscripten_wasm_worker_initialize"]=wasmExports["_emscripten_wasm_worker_initialize"])(a0,a1);var __emscripten_stack_restore=a0=>(__emscripten_stack_restore=wasmExports["_emscripten_stack_restore"])(a0);var __emscripten_stack_alloc=a0=>(__emscripten_stack_alloc=wasmExports["_emscripten_stack_alloc"])(a0);var _emscripten_stack_get_current=()=>(_emscripten_stack_get_current=wasmExports["emscripten_stack_get_current"])();var dynCall_ii=Module["dynCall_ii"]=(a0,a1)=>(dynCall_ii=Module["dynCall_ii"]=wasmExports["dynCall_ii"])(a0,a1);var dynCall_iii=Module["dynCall_iii"]=(a0,a1,a2)=>(dynCall_iii=Module["dynCall_iii"]=wasmExports["dynCall_iii"])(a0,a1,a2);var dynCall_iiiiif=Module["dynCall_iiiiif"]=(a0,a1,a2,a3,a4,a5)=>(dynCall_iiiiif=Module["dynCall_iiiiif"]=wasmExports["dynCall_iiiiif"])(a0,a1,a2,a3,a4,a5);var dynCall_iiiii=Module["dynCall_iiiii"]=(a0,a1,a2,a3,a4)=>(dynCall_iiiii=Module["dynCall_iiiii"]=wasmExports["dynCall_iiiii"])(a0,a1,a2,a3,a4);var dynCall_iiii=Module["dynCall_iiii"]=(a0,a1,a2,a3)=>(dynCall_iiii=Module["dynCall_iiii"]=wasmExports["dynCall_iiii"])(a0,a1,a2,a3);var dynCall_vii=Module["dynCall_vii"]=(a0,a1,a2)=>(dynCall_vii=Module["dynCall_vii"]=wasmExports["dynCall_vii"])(a0,a1,a2);var dynCall_iiiiiiii=Module["dynCall_iiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7)=>(dynCall_iiiiiiii=Module["dynCall_iiiiiiii"]=wasmExports["dynCall_iiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7);var dynCall_iiiiii=Module["dynCall_iiiiii"]=(a0,a1,a2,a3,a4,a5)=>(dynCall_iiiiii=Module["dynCall_iiiiii"]=wasmExports["dynCall_iiiiii"])(a0,a1,a2,a3,a4,a5);var dynCall_iiifiiii=Module["dynCall_iiifiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7)=>(dynCall_iiifiiii=Module["dynCall_iiifiiii"]=wasmExports["dynCall_iiifiiii"])(a0,a1,a2,a3,a4,a5,a6,a7);var dynCall_iiifiiiii=Module["dynCall_iiifiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iiifiiiii=Module["dynCall_iiifiiiii"]=wasmExports["dynCall_iiifiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_iiiiiii=Module["dynCall_iiiiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iiiiiii=Module["dynCall_iiiiiii"]=wasmExports["dynCall_iiiiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_iiiiiiiiiii=Module["dynCall_iiiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)=>(dynCall_iiiiiiiiiii=Module["dynCall_iiiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10);var dynCall_iiiiiiiiiiii=Module["dynCall_iiiiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)=>(dynCall_iiiiiiiiiiii=Module["dynCall_iiiiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11);var dynCall_iiiiiiiiiiiii=Module["dynCall_iiiiiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12)=>(dynCall_iiiiiiiiiiiii=Module["dynCall_iiiiiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12);var dynCall_v=Module["dynCall_v"]=a0=>(dynCall_v=Module["dynCall_v"]=wasmExports["dynCall_v"])(a0);var dynCall_vi=Module["dynCall_vi"]=(a0,a1)=>(dynCall_vi=Module["dynCall_vi"]=wasmExports["dynCall_vi"])(a0,a1);var dynCall_viiiiii=Module["dynCall_viiiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_viiiiii=Module["dynCall_viiiiii"]=wasmExports["dynCall_viiiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_viiiii=Module["dynCall_viiiii"]=(a0,a1,a2,a3,a4,a5)=>(dynCall_viiiii=Module["dynCall_viiiii"]=wasmExports["dynCall_viiiii"])(a0,a1,a2,a3,a4,a5);var dynCall_viiii=Module["dynCall_viiii"]=(a0,a1,a2,a3,a4)=>(dynCall_viiii=Module["dynCall_viiii"]=wasmExports["dynCall_viiii"])(a0,a1,a2,a3,a4);var dynCall_viii=Module["dynCall_viii"]=(a0,a1,a2,a3)=>(dynCall_viii=Module["dynCall_viii"]=wasmExports["dynCall_viii"])(a0,a1,a2,a3);var dynCall_iiiiji=Module["dynCall_iiiiji"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iiiiji=Module["dynCall_iiiiji"]=wasmExports["dynCall_iiiiji"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_ifi=Module["dynCall_ifi"]=(a0,a1,a2)=>(dynCall_ifi=Module["dynCall_ifi"]=wasmExports["dynCall_ifi"])(a0,a1,a2);var dynCall_viiiiiifi=Module["dynCall_viiiiiifi"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_viiiiiifi=Module["dynCall_viiiiiifi"]=wasmExports["dynCall_viiiiiifi"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_viiiiiii=Module["dynCall_viiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7)=>(dynCall_viiiiiii=Module["dynCall_viiiiiii"]=wasmExports["dynCall_viiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7);var dynCall_vifiiii=Module["dynCall_vifiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_vifiiii=Module["dynCall_vifiiii"]=wasmExports["dynCall_vifiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_viiiiiiii=Module["dynCall_viiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_viiiiiiii=Module["dynCall_viiiiiiii"]=wasmExports["dynCall_viiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_iiiiiiiiii=Module["dynCall_iiiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9)=>(dynCall_iiiiiiiiii=Module["dynCall_iiiiiiiiii"]=wasmExports["dynCall_iiiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9);var dynCall_i=Module["dynCall_i"]=a0=>(dynCall_i=Module["dynCall_i"]=wasmExports["dynCall_i"])(a0);var dynCall_ji=Module["dynCall_ji"]=(a0,a1)=>(dynCall_ji=Module["dynCall_ji"]=wasmExports["dynCall_ji"])(a0,a1);var dynCall_iiji=Module["dynCall_iiji"]=(a0,a1,a2,a3,a4)=>(dynCall_iiji=Module["dynCall_iiji"]=wasmExports["dynCall_iiji"])(a0,a1,a2,a3,a4);var dynCall_iijjiii=Module["dynCall_iijjiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iijjiii=Module["dynCall_iijjiii"]=wasmExports["dynCall_iijjiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_vijjjii=Module["dynCall_vijjjii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9)=>(dynCall_vijjjii=Module["dynCall_vijjjii"]=wasmExports["dynCall_vijjjii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9);var dynCall_fii=Module["dynCall_fii"]=(a0,a1,a2)=>(dynCall_fii=Module["dynCall_fii"]=wasmExports["dynCall_fii"])(a0,a1,a2);var dynCall_fiii=Module["dynCall_fiii"]=(a0,a1,a2,a3)=>(dynCall_fiii=Module["dynCall_fiii"]=wasmExports["dynCall_fiii"])(a0,a1,a2,a3);var dynCall_iiiiiiiii=Module["dynCall_iiiiiiiii"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iiiiiiiii=Module["dynCall_iiiiiiiii"]=wasmExports["dynCall_iiiiiiiii"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_jii=Module["dynCall_jii"]=(a0,a1,a2)=>(dynCall_jii=Module["dynCall_jii"]=wasmExports["dynCall_jii"])(a0,a1,a2);var dynCall_jiii=Module["dynCall_jiii"]=(a0,a1,a2,a3)=>(dynCall_jiii=Module["dynCall_jiii"]=wasmExports["dynCall_jiii"])(a0,a1,a2,a3);var dynCall_iijji=Module["dynCall_iijji"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iijji=Module["dynCall_iijji"]=wasmExports["dynCall_iijji"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_iijjji=Module["dynCall_iijjji"]=(a0,a1,a2,a3,a4,a5,a6,a7,a8)=>(dynCall_iijjji=Module["dynCall_iijjji"]=wasmExports["dynCall_iijjji"])(a0,a1,a2,a3,a4,a5,a6,a7,a8);var dynCall_iidiiii=Module["dynCall_iidiiii"]=(a0,a1,a2,a3,a4,a5,a6)=>(dynCall_iidiiii=Module["dynCall_iidiiii"]=wasmExports["dynCall_iidiiii"])(a0,a1,a2,a3,a4,a5,a6);var dynCall_jiji=Module["dynCall_jiji"]=(a0,a1,a2,a3,a4)=>(dynCall_jiji=Module["dynCall_jiji"]=wasmExports["dynCall_jiji"])(a0,a1,a2,a3,a4);var _asyncify_start_unwind=a0=>(_asyncify_start_unwind=wasmExports["asyncify_start_unwind"])(a0);var _asyncify_stop_unwind=()=>(_asyncify_stop_unwind=wasmExports["asyncify_stop_unwind"])();var _asyncify_start_rewind=a0=>(_asyncify_start_rewind=wasmExports["asyncify_start_rewind"])(a0);var _asyncify_stop_rewind=()=>(_asyncify_stop_rewind=wasmExports["asyncify_stop_rewind"])();var ___cxa_unexpected_handler=Module["___cxa_unexpected_handler"]=342544;var ___cxa_terminate_handler=Module["___cxa_terminate_handler"]=342540;var ___cxa_new_handler=Module["___cxa_new_handler"]=360420;var __ZTIN10__cxxabiv116__shim_type_infoE=Module["__ZTIN10__cxxabiv116__shim_type_infoE"]=14884;var __ZTIN10__cxxabiv117__class_type_infoE=Module["__ZTIN10__cxxabiv117__class_type_infoE"]=14932;var __ZTIN10__cxxabiv117__pbase_type_infoE=Module["__ZTIN10__cxxabiv117__pbase_type_infoE"]=14980;var __ZTIDn=Module["__ZTIDn"]=15328;var __ZTIN10__cxxabiv119__pointer_type_infoE=Module["__ZTIN10__cxxabiv119__pointer_type_infoE"]=15028;var __ZTIv=Module["__ZTIv"]=15276;var __ZTIN10__cxxabiv120__function_type_infoE=Module["__ZTIN10__cxxabiv120__function_type_infoE"]=15080;var __ZTIN10__cxxabiv129__pointer_to_member_type_infoE=Module["__ZTIN10__cxxabiv129__pointer_to_member_type_infoE"]=15140;var __ZTSN10__cxxabiv116__shim_type_infoE=Module["__ZTSN10__cxxabiv116__shim_type_infoE"]=14848;var __ZTVN10__cxxabiv120__si_class_type_infoE=Module["__ZTVN10__cxxabiv120__si_class_type_infoE"]=16812;var __ZTSN10__cxxabiv117__class_type_infoE=Module["__ZTSN10__cxxabiv117__class_type_infoE"]=14896;var __ZTSN10__cxxabiv117__pbase_type_infoE=Module["__ZTSN10__cxxabiv117__pbase_type_infoE"]=14944;var __ZTSN10__cxxabiv119__pointer_type_infoE=Module["__ZTSN10__cxxabiv119__pointer_type_infoE"]=14992;var __ZTSN10__cxxabiv120__function_type_infoE=Module["__ZTSN10__cxxabiv120__function_type_infoE"]=15040;var __ZTSN10__cxxabiv129__pointer_to_member_type_infoE=Module["__ZTSN10__cxxabiv129__pointer_to_member_type_infoE"]=15092;var __ZTVN10__cxxabiv116__shim_type_infoE=Module["__ZTVN10__cxxabiv116__shim_type_infoE"]=15164;var __ZTVN10__cxxabiv123__fundamental_type_infoE=Module["__ZTVN10__cxxabiv123__fundamental_type_infoE"]=15192;var __ZTIN10__cxxabiv123__fundamental_type_infoE=Module["__ZTIN10__cxxabiv123__fundamental_type_infoE"]=15260;var __ZTSN10__cxxabiv123__fundamental_type_infoE=Module["__ZTSN10__cxxabiv123__fundamental_type_infoE"]=15220;var __ZTSv=Module["__ZTSv"]=15272;var __ZTSPv=Module["__ZTSPv"]=15284;var __ZTIPv=Module["__ZTIPv"]=15288;var __ZTVN10__cxxabiv119__pointer_type_infoE=Module["__ZTVN10__cxxabiv119__pointer_type_infoE"]=17024;var __ZTSPKv=Module["__ZTSPKv"]=15304;var __ZTIPKv=Module["__ZTIPKv"]=15308;var __ZTSDn=Module["__ZTSDn"]=15324;var __ZTSPDn=Module["__ZTSPDn"]=15336;var __ZTIPDn=Module["__ZTIPDn"]=15340;var __ZTSPKDn=Module["__ZTSPKDn"]=15356;var __ZTIPKDn=Module["__ZTIPKDn"]=15364;var __ZTSb=Module["__ZTSb"]=15380;var __ZTIb=Module["__ZTIb"]=15384;var __ZTSPb=Module["__ZTSPb"]=15392;var __ZTIPb=Module["__ZTIPb"]=15396;var __ZTSPKb=Module["__ZTSPKb"]=15412;var __ZTIPKb=Module["__ZTIPKb"]=15416;var __ZTSw=Module["__ZTSw"]=15432;var __ZTIw=Module["__ZTIw"]=15436;var __ZTSPw=Module["__ZTSPw"]=15444;var __ZTIPw=Module["__ZTIPw"]=15448;var __ZTSPKw=Module["__ZTSPKw"]=15464;var __ZTIPKw=Module["__ZTIPKw"]=15468;var __ZTSc=Module["__ZTSc"]=15484;var __ZTIc=Module["__ZTIc"]=15488;var __ZTSPc=Module["__ZTSPc"]=15496;var __ZTIPc=Module["__ZTIPc"]=15500;var __ZTSPKc=Module["__ZTSPKc"]=15516;var __ZTIPKc=Module["__ZTIPKc"]=15520;var __ZTSh=Module["__ZTSh"]=15536;var __ZTIh=Module["__ZTIh"]=15540;var __ZTSPh=Module["__ZTSPh"]=15548;var __ZTIPh=Module["__ZTIPh"]=15552;var __ZTSPKh=Module["__ZTSPKh"]=15568;var __ZTIPKh=Module["__ZTIPKh"]=15572;var __ZTSa=Module["__ZTSa"]=15588;var __ZTIa=Module["__ZTIa"]=15592;var __ZTSPa=Module["__ZTSPa"]=15600;var __ZTIPa=Module["__ZTIPa"]=15604;var __ZTSPKa=Module["__ZTSPKa"]=15620;var __ZTIPKa=Module["__ZTIPKa"]=15624;var __ZTSs=Module["__ZTSs"]=15640;var __ZTIs=Module["__ZTIs"]=15644;var __ZTSPs=Module["__ZTSPs"]=15652;var __ZTIPs=Module["__ZTIPs"]=15656;var __ZTSPKs=Module["__ZTSPKs"]=15672;var __ZTIPKs=Module["__ZTIPKs"]=15676;var __ZTSt=Module["__ZTSt"]=15692;var __ZTIt=Module["__ZTIt"]=15696;var __ZTSPt=Module["__ZTSPt"]=15704;var __ZTIPt=Module["__ZTIPt"]=15708;var __ZTSPKt=Module["__ZTSPKt"]=15724;var __ZTIPKt=Module["__ZTIPKt"]=15728;var __ZTSi=Module["__ZTSi"]=15744;var __ZTIi=Module["__ZTIi"]=15748;var __ZTSPi=Module["__ZTSPi"]=15756;var __ZTIPi=Module["__ZTIPi"]=15760;var __ZTSPKi=Module["__ZTSPKi"]=15776;var __ZTIPKi=Module["__ZTIPKi"]=15780;var __ZTSj=Module["__ZTSj"]=15796;var __ZTIj=Module["__ZTIj"]=15800;var __ZTSPj=Module["__ZTSPj"]=15808;var __ZTIPj=Module["__ZTIPj"]=15812;var __ZTSPKj=Module["__ZTSPKj"]=15828;var __ZTIPKj=Module["__ZTIPKj"]=15832;var __ZTSl=Module["__ZTSl"]=15848;var __ZTIl=Module["__ZTIl"]=15852;var __ZTSPl=Module["__ZTSPl"]=15860;var __ZTIPl=Module["__ZTIPl"]=15864;var __ZTSPKl=Module["__ZTSPKl"]=15880;var __ZTIPKl=Module["__ZTIPKl"]=15884;var __ZTSm=Module["__ZTSm"]=15900;var __ZTIm=Module["__ZTIm"]=15904;var __ZTSPm=Module["__ZTSPm"]=15912;var __ZTIPm=Module["__ZTIPm"]=15916;var __ZTSPKm=Module["__ZTSPKm"]=15932;var __ZTIPKm=Module["__ZTIPKm"]=15936;var __ZTSx=Module["__ZTSx"]=15952;var __ZTIx=Module["__ZTIx"]=15956;var __ZTSPx=Module["__ZTSPx"]=15964;var __ZTIPx=Module["__ZTIPx"]=15968;var __ZTSPKx=Module["__ZTSPKx"]=15984;var __ZTIPKx=Module["__ZTIPKx"]=15988;var __ZTSy=Module["__ZTSy"]=16004;var __ZTIy=Module["__ZTIy"]=16008;var __ZTSPy=Module["__ZTSPy"]=16016;var __ZTIPy=Module["__ZTIPy"]=16020;var __ZTSPKy=Module["__ZTSPKy"]=16036;var __ZTIPKy=Module["__ZTIPKy"]=16040;var __ZTSn=Module["__ZTSn"]=16056;var __ZTIn=Module["__ZTIn"]=16060;var __ZTSPn=Module["__ZTSPn"]=16068;var __ZTIPn=Module["__ZTIPn"]=16072;var __ZTSPKn=Module["__ZTSPKn"]=16088;var __ZTIPKn=Module["__ZTIPKn"]=16092;var __ZTSo=Module["__ZTSo"]=16108;var __ZTIo=Module["__ZTIo"]=16112;var __ZTSPo=Module["__ZTSPo"]=16120;var __ZTIPo=Module["__ZTIPo"]=16124;var __ZTSPKo=Module["__ZTSPKo"]=16140;var __ZTIPKo=Module["__ZTIPKo"]=16144;var __ZTSDh=Module["__ZTSDh"]=16160;var __ZTIDh=Module["__ZTIDh"]=16164;var __ZTSPDh=Module["__ZTSPDh"]=16172;var __ZTIPDh=Module["__ZTIPDh"]=16176;var __ZTSPKDh=Module["__ZTSPKDh"]=16192;var __ZTIPKDh=Module["__ZTIPKDh"]=16200;var __ZTSf=Module["__ZTSf"]=16216;var __ZTIf=Module["__ZTIf"]=16220;var __ZTSPf=Module["__ZTSPf"]=16228;var __ZTIPf=Module["__ZTIPf"]=16232;var __ZTSPKf=Module["__ZTSPKf"]=16248;var __ZTIPKf=Module["__ZTIPKf"]=16252;var __ZTSd=Module["__ZTSd"]=16268;var __ZTId=Module["__ZTId"]=16272;var __ZTSPd=Module["__ZTSPd"]=16280;var __ZTIPd=Module["__ZTIPd"]=16284;var __ZTSPKd=Module["__ZTSPKd"]=16300;var __ZTIPKd=Module["__ZTIPKd"]=16304;var __ZTSe=Module["__ZTSe"]=16320;var __ZTIe=Module["__ZTIe"]=16324;var __ZTSPe=Module["__ZTSPe"]=16332;var __ZTIPe=Module["__ZTIPe"]=16336;var __ZTSPKe=Module["__ZTSPKe"]=16352;var __ZTIPKe=Module["__ZTIPKe"]=16356;var __ZTSg=Module["__ZTSg"]=16372;var __ZTIg=Module["__ZTIg"]=16376;var __ZTSPg=Module["__ZTSPg"]=16384;var __ZTIPg=Module["__ZTIPg"]=16388;var __ZTSPKg=Module["__ZTSPKg"]=16404;var __ZTIPKg=Module["__ZTIPKg"]=16408;var __ZTSDu=Module["__ZTSDu"]=16424;var __ZTIDu=Module["__ZTIDu"]=16428;var __ZTSPDu=Module["__ZTSPDu"]=16436;var __ZTIPDu=Module["__ZTIPDu"]=16440;var __ZTSPKDu=Module["__ZTSPKDu"]=16456;var __ZTIPKDu=Module["__ZTIPKDu"]=16464;var __ZTSDs=Module["__ZTSDs"]=16480;var __ZTIDs=Module["__ZTIDs"]=16484;var __ZTSPDs=Module["__ZTSPDs"]=16492;var __ZTIPDs=Module["__ZTIPDs"]=16496;var __ZTSPKDs=Module["__ZTSPKDs"]=16512;var __ZTIPKDs=Module["__ZTIPKDs"]=16520;var __ZTSDi=Module["__ZTSDi"]=16536;var __ZTIDi=Module["__ZTIDi"]=16540;var __ZTSPDi=Module["__ZTSPDi"]=16548;var __ZTIPDi=Module["__ZTIPDi"]=16552;var __ZTSPKDi=Module["__ZTSPKDi"]=16568;var __ZTIPKDi=Module["__ZTIPKDi"]=16576;var __ZTVN10__cxxabiv117__array_type_infoE=Module["__ZTVN10__cxxabiv117__array_type_infoE"]=16592;var __ZTIN10__cxxabiv117__array_type_infoE=Module["__ZTIN10__cxxabiv117__array_type_infoE"]=16656;var __ZTSN10__cxxabiv117__array_type_infoE=Module["__ZTSN10__cxxabiv117__array_type_infoE"]=16620;var __ZTVN10__cxxabiv120__function_type_infoE=Module["__ZTVN10__cxxabiv120__function_type_infoE"]=16668;var __ZTVN10__cxxabiv116__enum_type_infoE=Module["__ZTVN10__cxxabiv116__enum_type_infoE"]=16696;var __ZTIN10__cxxabiv116__enum_type_infoE=Module["__ZTIN10__cxxabiv116__enum_type_infoE"]=16760;var __ZTSN10__cxxabiv116__enum_type_infoE=Module["__ZTSN10__cxxabiv116__enum_type_infoE"]=16724;var __ZTVN10__cxxabiv117__class_type_infoE=Module["__ZTVN10__cxxabiv117__class_type_infoE"]=16772;var __ZTIN10__cxxabiv120__si_class_type_infoE=Module["__ZTIN10__cxxabiv120__si_class_type_infoE"]=16892;var __ZTSN10__cxxabiv120__si_class_type_infoE=Module["__ZTSN10__cxxabiv120__si_class_type_infoE"]=16852;var __ZTVN10__cxxabiv121__vmi_class_type_infoE=Module["__ZTVN10__cxxabiv121__vmi_class_type_infoE"]=16904;var __ZTIN10__cxxabiv121__vmi_class_type_infoE=Module["__ZTIN10__cxxabiv121__vmi_class_type_infoE"]=16984;var __ZTSN10__cxxabiv121__vmi_class_type_infoE=Module["__ZTSN10__cxxabiv121__vmi_class_type_infoE"]=16944;var __ZTVN10__cxxabiv117__pbase_type_infoE=Module["__ZTVN10__cxxabiv117__pbase_type_infoE"]=16996;var __ZTVN10__cxxabiv129__pointer_to_member_type_infoE=Module["__ZTVN10__cxxabiv129__pointer_to_member_type_infoE"]=17052;var __ZTVSt9bad_alloc=Module["__ZTVSt9bad_alloc"]=14512;var __ZTVSt9exception=Module["__ZTVSt9exception"]=14552;var __ZTVSt20bad_array_new_length=Module["__ZTVSt20bad_array_new_length"]=14532;var __ZTISt9bad_alloc=Module["__ZTISt9bad_alloc"]=14664;var __ZTISt20bad_array_new_length=Module["__ZTISt20bad_array_new_length"]=14704;var __ZTISt9exception=Module["__ZTISt9exception"]=14588;var __ZTSSt9exception=Module["__ZTSSt9exception"]=14572;var __ZTVSt13bad_exception=Module["__ZTVSt13bad_exception"]=14596;var __ZTISt13bad_exception=Module["__ZTISt13bad_exception"]=14636;var __ZTSSt13bad_exception=Module["__ZTSSt13bad_exception"]=14616;var __ZTSSt9bad_alloc=Module["__ZTSSt9bad_alloc"]=14648;var __ZTSSt20bad_array_new_length=Module["__ZTSSt20bad_array_new_length"]=14676;var __ZTVSt8bad_cast=Module["__ZTVSt8bad_cast"]=14716;var __ZTVSt10bad_typeid=Module["__ZTVSt10bad_typeid"]=14736;var __ZTISt8bad_cast=Module["__ZTISt8bad_cast"]=14808;var __ZTISt10bad_typeid=Module["__ZTISt10bad_typeid"]=14836;var __ZTVSt9type_info=Module["__ZTVSt9type_info"]=14756;var __ZTISt9type_info=Module["__ZTISt9type_info"]=14788;var __ZTSSt9type_info=Module["__ZTSSt9type_info"]=14772;var __ZTSSt8bad_cast=Module["__ZTSSt8bad_cast"]=14796;var __ZTSSt10bad_typeid=Module["__ZTSSt10bad_typeid"]=14820;function applySignatureConversions(wasmExports){wasmExports=Object.assign({},wasmExports);var makeWrapper_pp=f=>a0=>f(a0)>>>0;var makeWrapper_p=f=>()=>f()>>>0;wasmExports["malloc"]=makeWrapper_pp(wasmExports["malloc"]);wasmExports["pthread_self"]=makeWrapper_p(wasmExports["pthread_self"]);wasmExports["emscripten_main_runtime_thread_id"]=makeWrapper_p(wasmExports["emscripten_main_runtime_thread_id"]);wasmExports["emscripten_stack_get_base"]=makeWrapper_p(wasmExports["emscripten_stack_get_base"]);wasmExports["emscripten_stack_get_end"]=makeWrapper_p(wasmExports["emscripten_stack_get_end"]);wasmExports["_emscripten_stack_alloc"]=makeWrapper_pp(wasmExports["_emscripten_stack_alloc"]);wasmExports["emscripten_stack_get_current"]=makeWrapper_p(wasmExports["emscripten_stack_get_current"]);return wasmExports}Module["addFunction"]=addFunction;var calledRun;dependenciesFulfilled=function runCaller(){if(!calledRun)run();if(!calledRun)dependenciesFulfilled=runCaller};function run(){if(runDependencies>0){return}if(ENVIRONMENT_IS_PTHREAD){readyPromiseResolve(Module);initRuntime();startWorker(Module);return}preRun();if(runDependencies>0){return}function doRun(){if(calledRun)return;calledRun=true;Module["calledRun"]=true;if(ABORT)return;initRuntime();readyPromiseResolve(Module);Module["onRuntimeInitialized"]?.();postRun()}if(Module["setStatus"]){Module["setStatus"]("Running...");setTimeout(function(){setTimeout(function(){Module["setStatus"]("")},1);doRun()},1)}else{doRun()}}if(Module["preInit"]){if(typeof Module["preInit"]=="function")Module["preInit"]=[Module["preInit"]];while(Module["preInit"].length>0){Module["preInit"].pop()()}}run();moduleRtn=readyPromise; - - - return moduleRtn; -} -); -})(); -export default pv_picollm_simd; -var isPthread = globalThis.self?.name === 'em-pthread'; -// When running as a pthread, construct a new instance on startup -isPthread && pv_picollm_simd(); diff --git a/resources/.lint/spell-check/dict.txt b/resources/.lint/spell-check/dict.txt index 8c480ffb..142a4d77 100644 --- a/resources/.lint/spell-check/dict.txt +++ b/resources/.lint/spell-check/dict.txt @@ -71,3 +71,4 @@ Unmanaged Unretained Podfile xcworkspace +webgpu