From 4cb84340eb2908f2e67b44123fe0af08cfb89f9b Mon Sep 17 00:00:00 2001 From: Jonathan Buttner <56361221+jonathan-buttner@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:20:45 -0500 Subject: [PATCH] [ML] Adding text embedding byte representation (#2411) * Adding text embedding byte representation * Removing semi-colon --- output/schema/schema.json | 59 ++++++++++++++++++++++- output/typescript/types.ts | 7 +++ specification/inference/_types/Results.ts | 16 +++++- 3 files changed, 79 insertions(+), 3 deletions(-) diff --git a/output/schema/schema.json b/output/schema/schema.json index 4e94e19611..d8f4dd8778 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -127831,6 +127831,25 @@ }, "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L23-L30" }, + { + "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.", + "kind": "type_alias", + "name": { + "name": "DenseByteVector", + "namespace": "inference._types" + }, + "specLocation": "inference/_types/Results.ts#L39-L43", + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "byte", + "namespace": "_types" + } + } + } + }, { "description": "Text Embedding results are represented as Dense Vectors\nof floats.", "kind": "type_alias", @@ -127858,6 +127877,20 @@ "namespace": "inference._types" }, "properties": [ + { + "name": "text_embedding_bytes", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "TextEmbeddingByteResult", + "namespace": "inference._types" + } + } + } + }, { "name": "text_embedding", "required": false, @@ -127887,7 +127920,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L46-L53", + "specLocation": "inference/_types/Results.ts#L59-L67", "variants": { "kind": "container" } @@ -128066,6 +128099,28 @@ }, "specLocation": "inference/_types/TaskType.ts#L20-L26" }, + { + "description": "The text embedding result object for byte representation", + "kind": "interface", + "name": { + "name": "TextEmbeddingByteResult", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "embedding", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "DenseByteVector", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L45-L50" + }, { "description": "The text embedding result object", "kind": "interface", @@ -128086,7 +128141,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L39-L44" + "specLocation": "inference/_types/Results.ts#L52-L57" }, { "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 9498f1d415..4332c5c735 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -11464,9 +11464,12 @@ export interface IndicesValidateQueryResponse { error?: string } +export type InferenceDenseByteVector = byte[] + export type InferenceDenseVector = float[] export interface InferenceInferenceResult { + text_embedding_bytes?: InferenceTextEmbeddingByteResult[] text_embedding?: InferenceTextEmbeddingResult[] sparse_embedding?: InferenceSparseEmbeddingResult[] } @@ -11494,6 +11497,10 @@ export type InferenceTaskSettings = any export type InferenceTaskType = 'sparse_embedding' | 'text_embedding' +export interface InferenceTextEmbeddingByteResult { + embedding: InferenceDenseByteVector +} + export interface InferenceTextEmbeddingResult { embedding: InferenceDenseVector } diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts index e16d49ee9b..b7cfd981a4 100644 --- a/specification/inference/_types/Results.ts +++ b/specification/inference/_types/Results.ts @@ -17,7 +17,7 @@ * under the License. */ -import { float } from '@_types/Numeric' +import { float, byte } from '@_types/Numeric' import { Dictionary } from '@spec_utils/Dictionary' /** @@ -36,6 +36,19 @@ export class SparseEmbeddingResult { embedding: SparseVector } +/** + * Text Embedding results containing bytes are represented as Dense + * Vectors of bytes. + */ +export type DenseByteVector = Array + +/** + * The text embedding result object for byte representation + */ +export class TextEmbeddingByteResult { + embedding: DenseByteVector +} + /** * The text embedding result object */ @@ -48,6 +61,7 @@ export class TextEmbeddingResult { * @variants container */ export class InferenceResult { + text_embedding_bytes?: Array text_embedding?: Array sparse_embedding?: Array }