diff --git a/js/common/lib/tensor-impl-type-mapping.ts b/js/common/lib/tensor-impl-type-mapping.ts index c4a43ea27fea1..c3edc440fb4d8 100644 --- a/js/common/lib/tensor-impl-type-mapping.ts +++ b/js/common/lib/tensor-impl-type-mapping.ts @@ -2,10 +2,11 @@ // Licensed under the MIT License. import {Tensor} from './tensor.js'; +import { Float16Array, Float16ArrayConstructor } from '@petamoriken/float16'; export type SupportedTypedArrayConstructors = Float32ArrayConstructor|Uint8ArrayConstructor|Int8ArrayConstructor| Uint16ArrayConstructor|Int16ArrayConstructor|Int32ArrayConstructor|BigInt64ArrayConstructor|Uint8ArrayConstructor| - Float64ArrayConstructor|Uint32ArrayConstructor|BigUint64ArrayConstructor; + Float64ArrayConstructor|Uint32ArrayConstructor|BigUint64ArrayConstructor|Float16ArrayConstructor; export type SupportedTypedArray = InstanceType; // a runtime map that maps type string to TypedArray constructor. Should match Tensor.DataTypeMap. @@ -14,7 +15,7 @@ export const NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP = new Map([ + [Float16Array, 'float16'], [Float32Array, 'float32'], [Uint8Array, 'uint8'], [Int8Array, 'int8'], diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts index 13a1b044db7bd..fc06daf515895 100644 --- a/js/common/lib/tensor-impl.ts +++ b/js/common/lib/tensor-impl.ts @@ -140,7 +140,7 @@ export class Tensor implements TensorInterface { if (typedArrayConstructor === undefined) { throw new TypeError(`Unsupported tensor type: ${arg0}.`); } - if (Array.isArray(arg1)) { + if (Array.isArray(arg1) || arg0 === 'float16') { if (arg0 === 'float16') { // Throw error here because when user try to use number array as data, // e.g. new Tensor('float16', [1, 2, 3, 4], dims)), it will actually call diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts index 10071eda39405..0795986a42980 100644 --- a/js/common/lib/tensor.ts +++ b/js/common/lib/tensor.ts @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +import { Float16Array } from '@petamoriken/float16'; import {TensorFactory} from './tensor-factory.js'; import {Tensor as TensorImpl} from './tensor-impl.js'; import {TypedTensorUtils} from './tensor-utils.js'; @@ -74,7 +75,7 @@ export declare namespace Tensor { int64: BigInt64Array; string: string[]; bool: Uint8Array; - float16: Uint16Array; // Keep using Uint16Array until we have a concrete solution for float 16. + float16: Float16Array; // Keep using Uint16Array until we have a concrete solution for float 16. float64: Float64Array; uint32: Uint32Array; uint64: BigUint64Array; @@ -198,6 +199,14 @@ export interface TensorConstructor { */ new(data: Float32Array, dims?: readonly number[]): TypedTensor<'float32'>; + /** + * Construct a new float32 tensor object from the given data and dims. + * + * @param data - Specify the CPU tensor data. + * @param dims - Specify the dimension of the tensor. If omitted, a 1-D tensor is assumed. + */ + new(data: Float16Array, dims?: readonly number[]): TypedTensor<'float16'>; + /** * Construct a new int8 tensor object from the given data and dims. * diff --git a/js/package-lock.json b/js/package-lock.json index be7b3c9cd7d30..1e56f066b20f3 100644 --- a/js/package-lock.json +++ b/js/package-lock.json @@ -5,6 +5,9 @@ "packages": { "": { "license": "MIT", + "dependencies": { + "@petamoriken/float16": "^3.8.3" + }, "devDependencies": { "@types/fs-extra": "^11.0.1", "@types/mocha": "^10.0.1", @@ -337,6 +340,11 @@ "node": ">= 8" } }, + "node_modules/@petamoriken/float16": { + "version": "3.8.3", + "resolved": "https://registry.npmjs.org/@petamoriken/float16/-/float16-3.8.3.tgz", + "integrity": "sha512-an2OZ7/6er9Jja8EDUvU/tmtGIutdlb6LwXOwgjzoCjDRAsUd8sRZMBjoPEy78Xa9iOp+Kglk2CHgVwZuZbWbw==" + }, "node_modules/@polka/url": { "version": "1.0.0-next.21", "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.21.tgz", @@ -6391,6 +6399,11 @@ "fastq": "^1.6.0" } }, + "@petamoriken/float16": { + "version": "3.8.3", + "resolved": "https://registry.npmjs.org/@petamoriken/float16/-/float16-3.8.3.tgz", + "integrity": "sha512-an2OZ7/6er9Jja8EDUvU/tmtGIutdlb6LwXOwgjzoCjDRAsUd8sRZMBjoPEy78Xa9iOp+Kglk2CHgVwZuZbWbw==" + }, "@polka/url": { "version": "1.0.0-next.21", "resolved": "https://registry.npmjs.org/@polka/url/-/url-1.0.0-next.21.tgz", diff --git a/js/package.json b/js/package.json index 82d644ae6570f..0d588c5c93ef4 100644 --- a/js/package.json +++ b/js/package.json @@ -40,5 +40,8 @@ "prepare-node-tests": "node ./scripts/prepare-onnx-node-tests", "update-version": "node ./scripts/update-version" }, - "license": "MIT" + "license": "MIT", + "dependencies": { + "@petamoriken/float16": "^3.8.3" + } } diff --git a/js/web/lib/onnxjs/session-handler.ts b/js/web/lib/onnxjs/session-handler.ts index 0b06a7a747a44..be0070adfa0a9 100644 --- a/js/web/lib/onnxjs/session-handler.ts +++ b/js/web/lib/onnxjs/session-handler.ts @@ -32,6 +32,7 @@ export class OnnxjsSessionHandler implements SessionHandler { const outputMap = await this.session.run(inputMap); const output: SessionHandler.ReturnType = {}; outputMap.forEach((tensor, name) => { + // @ts-ignore output[name] = new Tensor(tensor.type, tensor.data, tensor.dims); }); return output; diff --git a/js/web/lib/onnxjs/tensor.ts b/js/web/lib/onnxjs/tensor.ts index 1a4c1dfe7494d..ca25c7323ed90 100644 --- a/js/web/lib/onnxjs/tensor.ts +++ b/js/web/lib/onnxjs/tensor.ts @@ -3,6 +3,7 @@ import {Guid} from 'guid-typescript'; import Long from 'long'; +import { Float16Array } from '@petamoriken/float16'; import {onnxruntime} from './ort-schema/flatbuffers/ort-generated'; import {onnx} from './ort-schema/protobuf/onnx'; @@ -13,6 +14,7 @@ import ortFbs = onnxruntime.experimental.fbs; export declare namespace Tensor { export interface DataTypeMap { bool: Uint8Array; + float16: Float16Array; float32: Float32Array; float64: Float64Array; string: string[]; @@ -31,7 +33,7 @@ export declare namespace Tensor { export type BooleanType = Tensor.DataTypeMap['bool']; export type IntegerType = Tensor.DataTypeMap['int8']|Tensor.DataTypeMap['uint8']|Tensor.DataTypeMap['int16']| Tensor.DataTypeMap['uint16']|Tensor.DataTypeMap['int32']|Tensor.DataTypeMap['uint32']; - export type FloatType = Tensor.DataTypeMap['float32']|Tensor.DataTypeMap['float64']; + export type FloatType = Tensor.DataTypeMap['float16']|Tensor.DataTypeMap['float32']|Tensor.DataTypeMap['float64']; export type NumberType = BooleanType|IntegerType|FloatType; export type Id = Guid; @@ -95,6 +97,7 @@ export class Tensor { switch (this.type) { case 'float32': case 'float64': + case 'float16': return this.data as Tensor.FloatType; default: @@ -187,10 +190,10 @@ export class Tensor { } } else { if (cache !== undefined) { - const constructor = dataviewConstructor(type); - if (!(cache instanceof constructor)) { - throw new TypeError(`cache should be type ${constructor.name}`); - } + // const constructor = dataviewConstructor(type); + // if (!(cache instanceof constructor)) { + //throw new TypeError(`cache should be type ${constructor.name}`); + // } } if (empty) { @@ -248,6 +251,7 @@ export class Tensor { let array: Array; switch (tensorProto.dataType) { case onnx.TensorProto.DataType.FLOAT: + case onnx.TensorProto.DataType.FLOAT16: array = tensorProto.floatData!; break; case onnx.TensorProto.DataType.INT32: @@ -412,6 +416,8 @@ function dataviewConstructor(type: Tensor.DataType) { return Uint32Array; case 'int64': return BigInt64Array; + case 'float16': + return Float16Array; case 'float32': return Float32Array; case 'float64': diff --git a/js/web/lib/wasm/jsep/tensor.ts b/js/web/lib/wasm/jsep/tensor.ts index d8a3228d274ab..b3a33af4c715f 100644 --- a/js/web/lib/wasm/jsep/tensor.ts +++ b/js/web/lib/wasm/jsep/tensor.ts @@ -1,9 +1,12 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +import { Float16Array } from '@petamoriken/float16'; + export declare namespace Tensor { export interface DataTypeMap { bool: Uint8Array; + float16: Float16Array; float32: Float32Array; float64: Float64Array; string: string[]; @@ -24,7 +27,7 @@ export declare namespace Tensor { export type IntegerType = Tensor.DataTypeMap['int8']|Tensor.DataTypeMap['uint8']|Tensor.DataTypeMap['int16']| Tensor.DataTypeMap['uint16']|Tensor.DataTypeMap['int32']|Tensor.DataTypeMap['uint32']| Tensor.DataTypeMap['int64']|Tensor.DataTypeMap['uint64']; - export type FloatType = Tensor.DataTypeMap['float32']|Tensor.DataTypeMap['float64']; + export type FloatType = Tensor.DataTypeMap['float32']|Tensor.DataTypeMap['float64']|Tensor.DataTypeMap['float16']; export type NumberType = BooleanType|IntegerType|FloatType; export type Id = number; @@ -71,6 +74,8 @@ const dataviewConstructor = (type: Tensor.DataType) => { return BigInt64Array; case 'uint64': return BigUint64Array; + case 'float16': + return Float16Array; case 'float32': return Float32Array; case 'float64': @@ -82,7 +87,7 @@ const dataviewConstructor = (type: Tensor.DataType) => { }; export const createView = (dataBuffer: ArrayBuffer, type: Tensor.DataType): Int32Array|Uint32Array|BigInt64Array| - BigUint64Array|Uint8Array|Float32Array|Float64Array|Int8Array|Int16Array|Uint16Array => + BigUint64Array|Uint8Array|Float32Array|Float64Array|Int8Array|Int16Array|Uint16Array|Float16Array => new (dataviewConstructor(type))(dataBuffer); /** diff --git a/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts b/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts index 6c681a95b45cd..6b73f805ce9a5 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/instance-norm.ts @@ -87,12 +87,12 @@ const computeMean = (context: ComputeContext, input: TensorView, scale: TensorVi const inputHelper = inputVariable('input', input.dataType, input.dims, components); const scaleHelper = inputVariable('scale', scale.dataType, scale.dims, components); const biasHelper = inputVariable('bias', bias.dataType, bias.dims, components); - const dataType = tensorTypeToWsglStorageType(input.dataType); const WG = 64; // we will store channel scale and channel shift in [2, components] matrix // or in vec2 when components == 1 - const outputType = components === 1 ? `vec2<${dataType}>` : `mat2x${components}<${dataType}>`; + const outputType = components === 1 ? `vec2f` : `mat2x${components}f`; + const sumCastType = components === 1 ? `f32` : `vec${components}f`; const setOutputValue = (var1: string, var2: string) => { return `${outputType}(${var1}, ${var2})`; }; @@ -118,16 +118,13 @@ const computeMean = (context: ComputeContext, input: TensorView, scale: TensorVi let wgMax = min(wgOffset + ${wgSize}, H); let offset = currentImageNumber * imageSize + currentChannelNumber; - var sum: ${inputHelper.type.storage} = ${fillVector(dataType, components)}; - var squaredSum: ${inputHelper.type.storage} = ${fillVector(dataType, components)}; + var sum = ${fillVector('f32', components)}; + var squaredSum = ${fillVector('f32', components)}; for (var i: u32 = wgOffset; i < wgMax; i++) { - let value = input[offset + i * C]; + let value = ${sumCastType}(input[offset + i * C]); sum += value; squaredSum += value * value; } - // we need to divide it here to avoid fp16 overflow - sum = sum / ${wgSize}; - squaredSum = squaredSum / ${wgSize}; output[global_idx] = ${setOutputValue('sum', 'squaredSum')}; }`; @@ -147,7 +144,7 @@ const computeMean = (context: ComputeContext, input: TensorView, scale: TensorVi const H: u32 = ${h}; const C: u32 = ${c / components}; const imageSize: u32 = ${WG * c / components}; - const epsilon: ${dataType} = ${epsilon}; + const epsilon: f32 = ${epsilon}; @group(0) @binding(0) var input : array<${outputType}>; @group(0) @binding(1) var scale : array<${scaleHelper.type.storage}>; @@ -160,18 +157,18 @@ const computeMean = (context: ComputeContext, input: TensorView, scale: TensorVi let currentChannelNumber = global_idx % C; let offset = currentImageNumber * imageSize; - var sum: ${inputHelper.type.storage} = ${fillVector(dataType, components)}; - var squaredSum: ${inputHelper.type.storage} = ${fillVector(dataType, components)}; + var sum = ${fillVector('f32', components)}; + var squaredSum = ${fillVector('f32', components)}; for (var i: u32 = 0; i < ${WG}; i++) { let value = input[offset + i + currentChannelNumber * ${WG}]; sum += value[0]; squaredSum += value[1]; } - sum = sum / ${h / wgSize}; - squaredSum = squaredSum / ${h / wgSize}; + sum = sum / f32(H); + squaredSum = squaredSum / f32(H); let invStdDev = 1 / sqrt(squaredSum - sum * sum + epsilon); - let channelScale = invStdDev * scale[currentChannelNumber]; - let channelShift = bias[currentChannelNumber] - sum * channelScale; + let channelScale = invStdDev * ${sumCastType}(scale[currentChannelNumber]); + let channelShift = ${sumCastType}(bias[currentChannelNumber]) - sum * channelScale; output[global_idx] = ${setOutputValue('channelScale', 'channelShift')}; }`; @@ -205,7 +202,8 @@ const createInstanceNormNHWCProgramInfo = const outputHelper = outputVariable('output', inputs[0].dataType, outputShape, components); const dataType = tensorTypeToWsglStorageType(inputs[0].dataType); - const scaleType = components === 1 ? `vec2<${dataType}>` : `mat2x${components}<${dataType}>`; + const scaleType = components === 1 ? `vec2f` : `mat2x${components}f`; + const scaleCastType = components === 1 ? dataType : `vec${components}<${dataType}>`; // first compute mean const channelScaleShift = computeMean(context, inputs[0], inputs[1], inputs[2], N, H, C, attributes.epsilon); @@ -223,7 +221,7 @@ const createInstanceNormNHWCProgramInfo = let scaleOffset = currentImageNumber * C + currentChannelNumber; let scale = scaleInput[scaleOffset]; - output[global_idx] = fma(input[global_idx], scale[0], scale[1]); + output[global_idx] = fma(input[global_idx], ${scaleCastType}(scale[0]), ${scaleCastType}(scale[1])); }`; context.compute({ ...metadata, diff --git a/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts b/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts index 67efebb35cfe1..a69f37b8d0828 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/skip-layer-norm.ts @@ -13,6 +13,7 @@ import { outputVariable, ShaderHelper, sumVector, tensorTypeToWsglStorageType, } from './common'; +import { DataType } from '../../../wasm-common' export interface SkipLayerNormAttributes extends AttributeWithCacheKey { epsilon: number; @@ -104,10 +105,10 @@ const createSkipLayerNormProgramInfo = } variables.push(outputVariable('output', inputs[0].dataType, outputShape, components)); if (hasMeanOutput) { - variables.push(outputVariable('meanOutput', inputs[0].dataType, meanInvStdDevDim)); + variables.push(outputVariable('meanOutput', DataType.float, meanInvStdDevDim)); } if (hasInvStdDevOutput) { - variables.push(outputVariable('invStdOutput', inputs[0].dataType, meanInvStdDevDim)); + variables.push(outputVariable('invStdOutput', DataType.float, meanInvStdDevDim)); } if (hasInputSkipBiasSumOutput) { variables.push(outputVariable('inputSkipBiasSum', inputs[0].dataType, outputShape, components)); @@ -132,8 +133,8 @@ const createSkipLayerNormProgramInfo = let value = inputValue + skipValue + biasValue; ${hasInputSkipBiasSumOutput ? 'inputSkipBiasSum[offset + i] = value;' : ''} output[offset + i] = value; - sum += value; - squareSum += value * value; + sum += f32(value); + squareSum += f32(value) * f32(value); } let mean: ${dataType} = ${sumVector('sum', components)} / ${dataType}(hiddenSize); let variance: ${dataType} = sqrt(${sumVector('squareSum', components)} diff --git a/js/web/lib/wasm/wasm-common.ts b/js/web/lib/wasm/wasm-common.ts index 1562f0fa6f60d..e220ded7ebd3b 100644 --- a/js/web/lib/wasm/wasm-common.ts +++ b/js/web/lib/wasm/wasm-common.ts @@ -2,6 +2,7 @@ // Licensed under the MIT License. import {Tensor} from 'onnxruntime-common'; +import { Float16Array, Float16ArrayConstructor } from '@petamoriken/float16' // This file includes common definitions. They do NOT have dependency on the WebAssembly instance. @@ -114,10 +115,10 @@ export const getTensorElementSize = (dateType: number): number| */ export const tensorTypeToTypedArrayConstructor = (type: Tensor.Type): Float32ArrayConstructor|Uint8ArrayConstructor| Int8ArrayConstructor|Uint16ArrayConstructor|Int16ArrayConstructor|Int32ArrayConstructor|BigInt64ArrayConstructor| - Uint8ArrayConstructor|Float64ArrayConstructor|Uint32ArrayConstructor|BigUint64ArrayConstructor => { + Uint8ArrayConstructor|Float64ArrayConstructor|Uint32ArrayConstructor|BigUint64ArrayConstructor|Float16ArrayConstructor => { switch (type) { case 'float16': - return Uint16Array; + return Float16Array; case 'float32': return Float32Array; case 'uint8': diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts index 2f8d4bd6193d0..924078d8085c3 100644 --- a/js/web/lib/wasm/wasm-core-impl.ts +++ b/js/web/lib/wasm/wasm-core-impl.ts @@ -345,6 +345,7 @@ export const run = async( const data = new typedArrayConstructor(size); new Uint8Array(data.buffer, data.byteOffset, data.byteLength) .set(wasm.HEAPU8.subarray(dataOffset, dataOffset + data.byteLength)); + // @ts-ignore output.push([type, dims, data]); } } finally { diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts index 0a21291ed2f06..fe8cfc5d5fafa 100644 --- a/js/web/test/test-runner.ts +++ b/js/web/test/test-runner.ts @@ -18,6 +18,7 @@ import {tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common'; import {base64toBuffer, createMockGraph, readFile} from './test-shared'; import {Test} from './test-types'; +import { Float16Array } from '@petamoriken/float16'; // import {listSupportedBackends, onnxruntimeBackend} from '../../node/lib/backend'; // const backends = listSupportedBackends(); @@ -383,11 +384,12 @@ export class TensorResultValidator { case 'string': return this.strictEqual(actual.stringData, expected.stringData); + case 'float16': case 'float32': case 'float64': return this.floatEqual( - actual.numberData as number[] | Float32Array | Float64Array, - expected.numberData as number[] | Float32Array | Float64Array); + actual.numberData as number[] | Float32Array | Float64Array | Float16Array, + expected.numberData as number[] | Float32Array | Float64Array | Float16Array); case 'uint8': case 'int8': @@ -415,7 +417,7 @@ export class TensorResultValidator { return false; } } - floatEqual(actual: number[]|Float32Array|Float64Array, expected: number[]|Float32Array|Float64Array): boolean { + floatEqual(actual: number[]|Float32Array|Float64Array|Float16Array, expected: number[]|Float32Array|Float64Array|Float16Array): boolean { if (actual.length !== expected.length) { return false; }