Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into flaky_semantic_test
Browse files Browse the repository at this point in the history
  • Loading branch information
jimczi committed Jun 18, 2024
2 parents d08404c + 86b80b6 commit 6a48c08
Show file tree
Hide file tree
Showing 112 changed files with 2,251 additions and 1,940 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class DistanceFunctionBenchmark {
@Param({ "96" })
private int dims;

@Param({ "dot", "cosine", "l1", "l2" })
@Param({ "dot", "cosine", "l1", "l2", "hamming" })
private String function;

@Param({ "knn", "binary" })
Expand Down Expand Up @@ -330,6 +330,18 @@ public void execute(Consumer<Object> consumer) {
}
}

private static class HammingKnnByteBenchmarkFunction extends KnnByteBenchmarkFunction {

private HammingKnnByteBenchmarkFunction(int dims) {
super(dims);
}

@Override
public void execute(Consumer<Object> consumer) {
new ByteKnnDenseVector(docVector).hamming(queryVector);
}
}

private static class L1BinaryFloatBenchmarkFunction extends BinaryFloatBenchmarkFunction {

private L1BinaryFloatBenchmarkFunction(int dims) {
Expand All @@ -354,6 +366,18 @@ public void execute(Consumer<Object> consumer) {
}
}

private static class HammingBinaryByteBenchmarkFunction extends BinaryByteBenchmarkFunction {

private HammingBinaryByteBenchmarkFunction(int dims) {
super(dims);
}

@Override
public void execute(Consumer<Object> consumer) {
new ByteBinaryDenseVector(vectorValue, docVector, dims).hamming(queryVector);
}
}

private static class L2KnnFloatBenchmarkFunction extends KnnFloatBenchmarkFunction {

private L2KnnFloatBenchmarkFunction(int dims) {
Expand Down Expand Up @@ -454,6 +478,11 @@ public void setBenchmarkFunction() {
case "binary" -> new L2BinaryByteBenchmarkFunction(dims);
default -> throw new UnsupportedOperationException("unexpected type [" + type + "]");
};
case "hamming" -> benchmarkFunction = switch (type) {
case "knn" -> new HammingKnnByteBenchmarkFunction(dims);
case "binary" -> new HammingBinaryByteBenchmarkFunction(dims);
default -> throw new UnsupportedOperationException("unexpected type [" + type + "]");
};
default -> throw new UnsupportedOperationException("unexpected function [" + function + "]");
}
}
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/109359.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109359
summary: Adding hamming distance function to painless for `dense_vector` fields
area: Vector Search
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/109551.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109551
summary: Avoid `InferenceRunner` deadlock
area: Machine Learning
type: bug
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/109653.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109653
summary: Handle the "JSON memory allocator bytes" field
area: Machine Learning
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/109717.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109717
summary: Bump jackson version in modules:repository-azure
area: Snapshot/Restore
type: upgrade
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/109794.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 109794
summary: Provide document size reporter with `MapperService`
area: Infra/Metrics
type: bug
issues: []
6 changes: 6 additions & 0 deletions docs/changelog/109824.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 109824
summary: Check array size before returning array item in script doc values
area: Infra/Scripting
type: bug
issues:
- 104998
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ The following methods are directly callable without a class/instance qualifier.
* double dotProduct(Object *, String *)
* double l1norm(Object *, String *)
* double l2norm(Object *, String *)
* double hamming(Object *, String *)
* double randomScore(int *)
* double randomScore(int *, String *)
* double saturation(double, double)
Expand Down
27 changes: 15 additions & 12 deletions docs/reference/cat/anomaly-detectors.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

[IMPORTANT]
====
cat APIs are only intended for human consumption using the command line or {kib}
console. They are _not_ intended for use by applications. For application
consumption, use the
cat APIs are only intended for human consumption using the command line or {kib}
console. They are _not_ intended for use by applications. For application
consumption, use the
<<ml-get-job-stats,get anomaly detection job statistics API>>.
====

Expand Down Expand Up @@ -137,16 +137,16 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count]

`forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`:::
The average memory usage in bytes for forecasts related to the {anomaly-job}.

`forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`:::
The maximum memory usage in bytes for forecasts related to the {anomaly-job}.

`forecasts.memory.min`, `fmmin`, `forecastsMemoryMin`:::
The minimum memory usage in bytes for forecasts related to the {anomaly-job}.

`forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`:::
The total memory usage in bytes for forecasts related to the {anomaly-job}.
The total memory usage in bytes for forecasts related to the {anomaly-job}.

`forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`:::
The average number of `model_forecast` documents written for forecasts related
to the {anomaly-job}.
Expand All @@ -161,8 +161,8 @@ to the {anomaly-job}.

`forecasts.records.total`, `frt`, `forecastsRecordsTotal`:::
The total number of `model_forecast` documents written for forecasts related to
the {anomaly-job}.
the {anomaly-job}.

`forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`:::
The average runtime in milliseconds for forecasts related to the {anomaly-job}.

Expand Down Expand Up @@ -198,7 +198,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded]

`model.categorization_status`, `mcs`, `modelCategorizationStatus`:::
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorization-status]

`model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`:::
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorized-doc-count]

Expand All @@ -221,6 +221,9 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs]
(Default)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-status]

`model.output_memory_allocator_bytes`, `momab`, `modelOutputMemoryAllocatorBytes`:::
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=output-memory-allocator-bytes]

`model.over_fields`, `mof`, `modelOverFields`:::
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-over-field-count]

Expand All @@ -232,10 +235,10 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=rare-category-count]

`model.timestamp`, `mt`, `modelTimestamp`:::
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-timestamp]

`model.total_category_count`, `mtcc`, `modelTotalCategoryCount`:::
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-category-count]

`node.address`, `na`, `nodeAddress`:::
The network address of the node.
+
Expand All @@ -261,7 +264,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=open-time]

`state`, `s`:::
(Default)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job]
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job]

include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help]

Expand Down
19 changes: 19 additions & 0 deletions docs/reference/esql/functions/kibana/inline_cast.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions docs/reference/ml/ml-shared.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -430,16 +430,16 @@ end::daily-model-snapshot-retention-after-days[]

tag::data-description[]
The data description defines the format of the input data when you send data to
the job by using the <<ml-post-data,post data>> API. Note that when using a
{dfeed}, only the `time_field` needs to be set, the rest of the properties are
automatically set. When data is received via the <<ml-post-data,post data>> API,
the job by using the <<ml-post-data,post data>> API. Note that when using a
{dfeed}, only the `time_field` needs to be set, the rest of the properties are
automatically set. When data is received via the <<ml-post-data,post data>> API,
it is not stored in {es}. Only the results for {anomaly-detect} are retained.
+
.Properties of `data_description`
[%collapsible%open]
====
`format`:::
(string) Only `xcontent` format is supported at this time, and this is the
(string) Only `xcontent` format is supported at this time, and this is the
default value.

`time_field`:::
Expand Down Expand Up @@ -1285,6 +1285,10 @@ tag::job-id-datafeed[]
The unique identifier for the job to which the {dfeed} sends data.
end::job-id-datafeed[]

tag::output-memory-allocator-bytes[]
The amount of memory, in bytes, used to output {anomaly-job} documents.
end::output-memory-allocator-bytes[]

tag::lambda[]
Advanced configuration option. Regularization parameter to prevent overfitting
on the training data set. Multiplies an L2 regularization term which applies to
Expand Down
50 changes: 47 additions & 3 deletions docs/reference/vectors/vector-functions.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ This is the list of available vector functions and vector access methods:
1. <<vector-functions-cosine,`cosineSimilarity`>> – calculates cosine similarity
2. <<vector-functions-dot-product,`dotProduct`>> – calculates dot product
3. <<vector-functions-l1,`l1norm`>> – calculates L^1^ distance
4. <<vector-functions-l2,`l2norm`>> - calculates L^2^ distance
5. <<vector-functions-accessing-vectors,`doc[<field>].vectorValue`>> – returns a vector's value as an array of floats
6. <<vector-functions-accessing-vectors,`doc[<field>].magnitude`>> – returns a vector's magnitude
4. <<vector-functions-hamming,`hamming`>> – calculates Hamming distance
5. <<vector-functions-l2,`l2norm`>> - calculates L^2^ distance
6. <<vector-functions-accessing-vectors,`doc[<field>].vectorValue`>> – returns a vector's value as an array of floats
7. <<vector-functions-accessing-vectors,`doc[<field>].magnitude`>> – returns a vector's magnitude

NOTE: The recommended way to access dense vectors is through the
`cosineSimilarity`, `dotProduct`, `l1norm` or `l2norm` functions. Please note
Expand All @@ -35,8 +36,15 @@ PUT my-index-000001
"properties": {
"my_dense_vector": {
"type": "dense_vector",
"index": false,
"dims": 3
},
"my_byte_dense_vector": {
"type": "dense_vector",
"index": false,
"dims": 3,
"element_type": "byte"
},
"status" : {
"type" : "keyword"
}
Expand All @@ -47,12 +55,14 @@ PUT my-index-000001
PUT my-index-000001/_doc/1
{
"my_dense_vector": [0.5, 10, 6],
"my_byte_dense_vector": [0, 10, 6],
"status" : "published"
}
PUT my-index-000001/_doc/2
{
"my_dense_vector": [-0.5, 10, 10],
"my_byte_dense_vector": [0, 10, 10],
"status" : "published"
}
Expand Down Expand Up @@ -179,6 +189,40 @@ we reversed the output from `l1norm` and `l2norm`. Also, to avoid
division by 0 when a document vector matches the query exactly,
we added `1` in the denominator.

[[vector-functions-hamming]]
====== Hamming distance

The `hamming` function calculates {wikipedia}/Hamming_distance[Hamming distance] between a given query vector and
document vectors. It is only available for byte vectors.

[source,console]
--------------------------------------------------
GET my-index-000001/_search
{
"query": {
"script_score": {
"query" : {
"bool" : {
"filter" : {
"term" : {
"status" : "published"
}
}
}
},
"script": {
"source": "(24 - hamming(params.queryVector, 'my_byte_dense_vector')) / 24", <1>
"params": {
"queryVector": [4, 3, 0]
}
}
}
}
}
--------------------------------------------------

<1> Calculate the Hamming distance and normalize it by the bits to get a score between 0 and 1.

[[vector-functions-l2]]
====== L^2^ distance (Euclidean distance)

Expand Down
25 changes: 25 additions & 0 deletions gradle/verification-metadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,11 @@
<sha256 value="91adcd3dcf5fd9a16499934e7536a23d456692a0093e3d4fd52f138c3936348c" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.core" name="jackson-annotations" version="2.15.4">
<artifact name="jackson-annotations-2.15.4.jar">
<sha256 value="f204ebbd552614a22b8531ffe350d47f8fd42c45bb60517c07974dc27a5a1dd3" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.core" name="jackson-core" version="2.10.5">
<artifact name="jackson-core-2.10.5.jar">
<sha256 value="2656010d1e921ac69b76fc7e0c0f5a6b14aca62fa9603e78831e6148eb7c77ba" origin="Generated by Gradle"/>
Expand All @@ -286,6 +291,11 @@
<sha256 value="5b483f68fa9dd6aa37da37d1f79dd5c4b9464238f4f0660a242cb6b5c724950c" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.core" name="jackson-core" version="2.15.4">
<artifact name="jackson-core-2.15.4.jar">
<sha256 value="8dc9210dd285db366f45f518dd1e6a9ccfeb0f1a8e184a899fe96d29edf1fd94" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.core" name="jackson-databind" version="2.10.5.1">
<artifact name="jackson-databind-2.10.5.1.jar">
<sha256 value="f93db83891a53e8d268e2cc8fcd88ead2981edc2163e35c2a52c88d9ab57b4a0" origin="Generated by Gradle"/>
Expand Down Expand Up @@ -331,6 +341,11 @@
<sha256 value="76fbf0cedd51af6a13aba39c27c8c29a5a280dc24ee66577d559e4660d8709ce" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.dataformat" name="jackson-dataformat-xml" version="2.15.4">
<artifact name="jackson-dataformat-xml-2.15.4.jar">
<sha256 value="90d8109cda7b90c494a7bfde44e96e2fa25021191b67a5924dfa5cbd698025c3" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.dataformat" name="jackson-dataformat-yaml" version="2.14.2">
<artifact name="jackson-dataformat-yaml-2.14.2.jar">
<sha256 value="5c3a0a71d0339529c80ae771497b20fdc0fa7cb67c772f99af5935927560006a" origin="Generated by Gradle"/>
Expand All @@ -346,6 +361,11 @@
<sha256 value="5ad75d210bacc17271925da28e1f393aaf8c83f6c92fbe5b2ed61954b84decf7" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.datatype" name="jackson-datatype-jsr310" version="2.15.4">
<artifact name="jackson-datatype-jsr310-2.15.4.jar">
<sha256 value="472498cbba2726012ff82f86fc8feef9593663bda1a695a17db804a63fa733ff" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.jaxrs" name="jackson-jaxrs-base" version="2.10.5">
<artifact name="jackson-jaxrs-base-2.10.5.jar">
<sha256 value="98f27188fa2a72ef5d3f85fab6e6ca0e76bde1a58c9396cb1cf91028080435d6" origin="Generated by Gradle"/>
Expand All @@ -366,6 +386,11 @@
<sha256 value="b23725fd92b783e3ddc149d23f565b9da5bdfc98361be81488ff4e45e6735ba1" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.module" name="jackson-module-jaxb-annotations" version="2.15.4">
<artifact name="jackson-module-jaxb-annotations-2.15.4.jar">
<sha256 value="20bf4d2ce22fa76c6feba48dc2e770bfeb313a36f984e00e3403af405e1c83b2" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.woodstox" name="woodstox-core" version="5.3.0">
<artifact name="woodstox-core-5.3.0.jar">
<sha256 value="b2bd29c31fda49a9b28a22b9e5c2b26443bcfa99c1a28eab70ab9c7d349b5002" origin="Generated by Gradle"/>
Expand Down
Loading

0 comments on commit 6a48c08

Please sign in to comment.