diff --git a/docs/changelog/117989.yaml b/docs/changelog/117989.yaml new file mode 100644 index 0000000000000..e4967141b3ebd --- /dev/null +++ b/docs/changelog/117989.yaml @@ -0,0 +1,5 @@ +pr: 117989 +summary: ESQL Add esql hash function +area: ES|QL +type: enhancement +issues: [] diff --git a/docs/reference/esql/functions/description/hash.asciidoc b/docs/reference/esql/functions/description/hash.asciidoc new file mode 100644 index 0000000000000..e074915c5132a --- /dev/null +++ b/docs/reference/esql/functions/description/hash.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512. diff --git a/docs/reference/esql/functions/kibana/definition/hash.json b/docs/reference/esql/functions/kibana/definition/hash.json new file mode 100644 index 0000000000000..17a60cf45acfe --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/hash.json @@ -0,0 +1,82 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "eval", + "name" : "hash", + "description" : "Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512.", + "signatures" : [ + { + "params" : [ + { + "name" : "algorithm", + "type" : "keyword", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "keyword", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "algorithm", + "type" : "keyword", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "text", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "algorithm", + "type" : "text", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "keyword", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "algorithm", + "type" : "text", + "optional" : false, + "description" : "Hash algorithm to use." + }, + { + "name" : "input", + "type" : "text", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + } + ], + "preview" : false, + "snapshot_only" : false +} diff --git a/docs/reference/esql/functions/kibana/docs/hash.md b/docs/reference/esql/functions/kibana/docs/hash.md new file mode 100644 index 0000000000000..9826e80ec5bec --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/hash.md @@ -0,0 +1,7 @@ + + +### HASH +Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512. + diff --git a/docs/reference/esql/functions/layout/hash.asciidoc b/docs/reference/esql/functions/layout/hash.asciidoc new file mode 100644 index 0000000000000..27c55ada6319b --- /dev/null +++ b/docs/reference/esql/functions/layout/hash.asciidoc @@ -0,0 +1,14 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-hash]] +=== `HASH` + +*Syntax* + +[.text-center] +image::esql/functions/signature/hash.svg[Embedded,opts=inline] + +include::../parameters/hash.asciidoc[] +include::../description/hash.asciidoc[] +include::../types/hash.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/hash.asciidoc b/docs/reference/esql/functions/parameters/hash.asciidoc new file mode 100644 index 0000000000000..d47a82d4ab214 --- /dev/null +++ b/docs/reference/esql/functions/parameters/hash.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`algorithm`:: +Hash algorithm to use. + +`input`:: +Input to hash. diff --git a/docs/reference/esql/functions/signature/hash.svg b/docs/reference/esql/functions/signature/hash.svg new file mode 100644 index 0000000000000..f819e14c9d1a4 --- /dev/null +++ b/docs/reference/esql/functions/signature/hash.svg @@ -0,0 +1 @@ +HASH(algorithm,input) \ No newline at end of file diff --git a/docs/reference/esql/functions/string-functions.asciidoc b/docs/reference/esql/functions/string-functions.asciidoc index ce9636f5c5a3a..da9580a55151a 100644 --- a/docs/reference/esql/functions/string-functions.asciidoc +++ b/docs/reference/esql/functions/string-functions.asciidoc @@ -13,6 +13,7 @@ * <> * <> * <> +* <> * <> * <> * <> @@ -37,6 +38,7 @@ include::layout/byte_length.asciidoc[] include::layout/concat.asciidoc[] include::layout/ends_with.asciidoc[] include::layout/from_base64.asciidoc[] +include::layout/hash.asciidoc[] include::layout/left.asciidoc[] include::layout/length.asciidoc[] include::layout/locate.asciidoc[] diff --git a/docs/reference/esql/functions/types/hash.asciidoc b/docs/reference/esql/functions/types/hash.asciidoc new file mode 100644 index 0000000000000..786ba03b2aa60 --- /dev/null +++ b/docs/reference/esql/functions/types/hash.asciidoc @@ -0,0 +1,12 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +algorithm | input | result +keyword | keyword | keyword +keyword | text | keyword +text | keyword | keyword +text | text | keyword +|=== diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec new file mode 100644 index 0000000000000..fcac1e1859c6d --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -0,0 +1,105 @@ +hash +required_capability: hash_function + +FROM sample_data +| WHERE message != "Connection error" +| EVAL md5 = hash("md5", message), sha256 = hash("sha256", message) +| KEEP message, md5, sha256; +ignoreOrder:true + +message:keyword | md5:keyword | sha256:keyword +Connected to 10.1.0.1 | abd7d1ce2bb636842a29246b3512dcae | 6d8372129ad78770f7185554dd39864749a62690216460752d6c075fa38ad85c +Connected to 10.1.0.2 | 8f8f1cb60832d153f5b9ec6dc828b93f | b0db24720f15857091b3c99f4c4833586d0ea3229911b8777efb8d917cf27e9a +Connected to 10.1.0.3 | 912b6dc13503165a15de43304bb77c78 | 75b0480188db8acc4d5cc666a51227eb2bc5b989cd8ca912609f33e0846eff57 +Disconnected | ef70e46fd3bbc21e3e1f0b6815e750c0 | 04dfac3671b494ad53fcd152f7a14511bfb35747278aad8ce254a0d6e4ba4718 +; + + +hashOfConvertedType +required_capability: hash_function + +FROM sample_data +| WHERE message != "Connection error" +| EVAL input = event_duration::STRING, md5 = hash("md5", input), sha256 = hash("sha256", input) +| KEEP message, input, md5, sha256; +ignoreOrder:true + +message:keyword | input:keyword | md5:keyword | sha256:keyword +Connected to 10.1.0.1 | 1756467 | c4fc1c57ee9b1d2b2023b70c8c167b54 | 8376a50a7ba7e6bd1bf9ad0c32d27d2f49fd0fa422573f98f239e21048b078f3 +Connected to 10.1.0.2 | 2764889 | 8e8cf005e11a7b5df1d9478a4715a444 | 1031f2bef8eaecbf47319505422300b27ea1f7c38b6717d41332325062f9a56a +Connected to 10.1.0.3 | 3450233 | 09f2c64f5a55e9edf8ffbad336b561d8 | f77d7545769c4ecc85092f4f0b7ec8c20f467e4beb15fe67ca29f9aa8e9a6900 +Disconnected | 1232382 | 6beac1485638d51e13c2c53990a2f611 | 9a03c1274a3ebb6c1cb85d170ce0a6fdb9d2232724e06b9f5e7cb9274af3cad6 +; + + +hashOfEmptyInput +required_capability: hash_function + +ROW input="" | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input); + +input:keyword | md5:keyword | sha256:keyword + | d41d8cd98f00b204e9800998ecf8427e | e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +; + +hashOfNullInput +required_capability: hash_function + +ROW input=null::STRING | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input); + +input:keyword | md5:keyword | sha256:keyword +null | null | null +; + + +hashWithNullAlgorithm +required_capability: hash_function + +ROW input="input" | EVAL hash = hash(null, input); + +input:keyword | hash:keyword +input | null +; + + +hashWithMv +required_capability: hash_function + +ROW input=["foo", "bar"] | mv_expand input | EVAL md5 = hash("md5", input), sha256 = hash("sha256", input); + +input:keyword | md5:keyword | sha256:keyword +foo | acbd18db4cc2f85cedef654fccc4a4d8 | 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae +bar | 37b51d194a7513e45b56f6524f2d51f2 | fcde2b2edba56bf408601fb721fe9b5c338d10ee429ea04fae5511b68fbf8fb9 +; + + +hashWithNestedFunctions +required_capability: hash_function + +ROW input=["foo", "bar"] | EVAL hash = concat(hash("md5", mv_concat(input, "-")), "-", hash("sha256", mv_concat(input, "-"))); + +input:keyword | hash:keyword +["foo", "bar"] | e5f9ec048d1dbe19c70f720e002f9cb1-7d89c4f517e3bd4b5e8e76687937005b602ea00c5cba3e25ef1fc6575a55103e +; + + +hashWithConvertedTypes +required_capability: hash_function + +ROW input=42 | EVAL md5 = hash("md5", input::STRING), sha256 = hash("sha256", to_string(input)); + +input:integer | md5:keyword | sha256:keyword +42 | a1d0c6e83f027327d8461063f4ac58a6 | 73475cb40a568e8da8a045ced110137e159f890ac4da883b6b17dc651b3a8049 +; + + +hashWithStats +required_capability: hash_function + +FROM sample_data +| EVAL md5="md5" +| STATS count = count(*) by hash(md5, message) +| WHERE count > 1; + +count:long | hash(md5, message):keyword +3 | 2e92ae79ff32b37fee4368a594792183 +; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java new file mode 100644 index 0000000000000..34cff73018634 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashConstantEvaluator.java @@ -0,0 +1,142 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.util.function.Function; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Hash}. + * This class is generated. Do not edit it. + */ +public final class HashConstantEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final BreakingBytesRefBuilder scratch; + + private final Hash.HashFunction algorithm; + + private final EvalOperator.ExpressionEvaluator input; + + private final DriverContext driverContext; + + private Warnings warnings; + + public HashConstantEvaluator(Source source, BreakingBytesRefBuilder scratch, + Hash.HashFunction algorithm, EvalOperator.ExpressionEvaluator input, + DriverContext driverContext) { + this.source = source; + this.scratch = scratch; + this.algorithm = algorithm; + this.input = input; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock inputBlock = (BytesRefBlock) input.eval(page)) { + BytesRefVector inputVector = inputBlock.asVector(); + if (inputVector == null) { + return eval(page.getPositionCount(), inputBlock); + } + return eval(page.getPositionCount(), inputVector).asBlock(); + } + } + + public BytesRefBlock eval(int positionCount, BytesRefBlock inputBlock) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (inputBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (inputBlock.getValueCount(p) != 1) { + if (inputBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + result.appendBytesRef(Hash.processConstant(this.scratch, this.algorithm, inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + } + return result.build(); + } + } + + public BytesRefVector eval(int positionCount, BytesRefVector inputVector) { + try(BytesRefVector.Builder result = driverContext.blockFactory().newBytesRefVectorBuilder(positionCount)) { + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + result.appendBytesRef(Hash.processConstant(this.scratch, this.algorithm, inputVector.getBytesRef(p, inputScratch))); + } + return result.build(); + } + } + + @Override + public String toString() { + return "HashConstantEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(scratch, input); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final Function scratch; + + private final Function algorithm; + + private final EvalOperator.ExpressionEvaluator.Factory input; + + public Factory(Source source, Function scratch, + Function algorithm, + EvalOperator.ExpressionEvaluator.Factory input) { + this.source = source; + this.scratch = scratch; + this.algorithm = algorithm; + this.input = input; + } + + @Override + public HashConstantEvaluator get(DriverContext context) { + return new HashConstantEvaluator(source, scratch.apply(context), algorithm.apply(context), input.get(context), context); + } + + @Override + public String toString() { + return "HashConstantEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java new file mode 100644 index 0000000000000..8b01cc0330142 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashEvaluator.java @@ -0,0 +1,174 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.security.NoSuchAlgorithmException; +import java.util.function.Function; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Hash}. + * This class is generated. Do not edit it. + */ +public final class HashEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final BreakingBytesRefBuilder scratch; + + private final EvalOperator.ExpressionEvaluator algorithm; + + private final EvalOperator.ExpressionEvaluator input; + + private final DriverContext driverContext; + + private Warnings warnings; + + public HashEvaluator(Source source, BreakingBytesRefBuilder scratch, + EvalOperator.ExpressionEvaluator algorithm, EvalOperator.ExpressionEvaluator input, + DriverContext driverContext) { + this.source = source; + this.scratch = scratch; + this.algorithm = algorithm; + this.input = input; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock algorithmBlock = (BytesRefBlock) algorithm.eval(page)) { + try (BytesRefBlock inputBlock = (BytesRefBlock) input.eval(page)) { + BytesRefVector algorithmVector = algorithmBlock.asVector(); + if (algorithmVector == null) { + return eval(page.getPositionCount(), algorithmBlock, inputBlock); + } + BytesRefVector inputVector = inputBlock.asVector(); + if (inputVector == null) { + return eval(page.getPositionCount(), algorithmBlock, inputBlock); + } + return eval(page.getPositionCount(), algorithmVector, inputVector); + } + } + } + + public BytesRefBlock eval(int positionCount, BytesRefBlock algorithmBlock, + BytesRefBlock inputBlock) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef algorithmScratch = new BytesRef(); + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (algorithmBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (algorithmBlock.getValueCount(p) != 1) { + if (algorithmBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + if (inputBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (inputBlock.getValueCount(p) != 1) { + if (inputBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + try { + result.appendBytesRef(Hash.process(this.scratch, algorithmBlock.getBytesRef(algorithmBlock.getFirstValueIndex(p), algorithmScratch), inputBlock.getBytesRef(inputBlock.getFirstValueIndex(p), inputScratch))); + } catch (NoSuchAlgorithmException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + public BytesRefBlock eval(int positionCount, BytesRefVector algorithmVector, + BytesRefVector inputVector) { + try(BytesRefBlock.Builder result = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef algorithmScratch = new BytesRef(); + BytesRef inputScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + try { + result.appendBytesRef(Hash.process(this.scratch, algorithmVector.getBytesRef(p, algorithmScratch), inputVector.getBytesRef(p, inputScratch))); + } catch (NoSuchAlgorithmException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + @Override + public String toString() { + return "HashEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(scratch, algorithm, input); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final Function scratch; + + private final EvalOperator.ExpressionEvaluator.Factory algorithm; + + private final EvalOperator.ExpressionEvaluator.Factory input; + + public Factory(Source source, Function scratch, + EvalOperator.ExpressionEvaluator.Factory algorithm, + EvalOperator.ExpressionEvaluator.Factory input) { + this.source = source; + this.scratch = scratch; + this.algorithm = algorithm; + this.input = input; + } + + @Override + public HashEvaluator get(DriverContext context) { + return new HashEvaluator(source, scratch.apply(context), algorithm.get(context), input.get(context), context); + } + + @Override + public String toString() { + return "HashEvaluator[" + "algorithm=" + algorithm + ", input=" + input + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 8c7e381d33322..d6c1539088d47 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -449,6 +449,11 @@ public enum Cap { */ KQL_FUNCTION(Build.current().isSnapshot()), + /** + * Hash function + */ + HASH_FUNCTION, + /** * Don't optimize CASE IS NOT NULL function by not requiring the fields to be not null as well. * https://github.com/elastic/elasticsearch/issues/112704 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index a59ef5bb1575d..908c9c5f197a8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -129,6 +129,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.ByteLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length; @@ -327,6 +328,7 @@ private static FunctionDefinition[][] functions() { def(ByteLength.class, ByteLength::new, "byte_length"), def(Concat.class, Concat::new, "concat"), def(EndsWith.class, EndsWith::new, "ends_with"), + def(Hash.class, Hash::new, "hash"), def(LTrim.class, LTrim::new, "ltrim"), def(Left.class, Left::new, "left"), def(Length.class, Length::new, "length"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 192ca6c43e57d..3cf0eef9074ad 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -34,6 +34,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Repeat; @@ -64,6 +65,7 @@ public static List getNamedWriteables() { entries.add(E.ENTRY); entries.add(EndsWith.ENTRY); entries.add(Greatest.ENTRY); + entries.add(Hash.ENTRY); entries.add(Hypot.ENTRY); entries.add(In.ENTRY); entries.add(InsensitiveEquals.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java new file mode 100644 index 0000000000000..99c5908699ec2 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java @@ -0,0 +1,217 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.ann.Fixed; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.List; +import java.util.function.Function; + +import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +public class Hash extends EsqlScalarFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Hash", Hash::new); + + private final Expression algorithm; + private final Expression input; + + @FunctionInfo( + returnType = "keyword", + description = "Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512." + ) + public Hash( + Source source, + @Param(name = "algorithm", type = { "keyword", "text" }, description = "Hash algorithm to use.") Expression algorithm, + @Param(name = "input", type = { "keyword", "text" }, description = "Input to hash.") Expression input + ) { + super(source, List.of(algorithm, input)); + this.algorithm = algorithm; + this.input = input; + } + + private Hash(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(algorithm); + out.writeNamedWriteable(input); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public DataType dataType() { + return DataType.KEYWORD; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + TypeResolution resolution = isString(algorithm, sourceText(), FIRST); + if (resolution.unresolved()) { + return resolution; + } + + return isString(input, sourceText(), SECOND); + } + + @Override + public boolean foldable() { + return algorithm.foldable() && input.foldable(); + } + + @Evaluator(warnExceptions = NoSuchAlgorithmException.class) + static BytesRef process( + @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, + BytesRef algorithm, + BytesRef input + ) throws NoSuchAlgorithmException { + return hash(scratch, MessageDigest.getInstance(algorithm.utf8ToString()), input); + } + + @Evaluator(extraName = "Constant") + static BytesRef processConstant( + @Fixed(includeInToString = false, scope = THREAD_LOCAL) BreakingBytesRefBuilder scratch, + @Fixed(scope = THREAD_LOCAL) HashFunction algorithm, + BytesRef input + ) { + return hash(scratch, algorithm.digest, input); + } + + private static BytesRef hash(BreakingBytesRefBuilder scratch, MessageDigest algorithm, BytesRef input) { + algorithm.reset(); + algorithm.update(input.bytes, input.offset, input.length); + var digest = algorithm.digest(); + scratch.clear(); + scratch.grow(digest.length * 2); + appendUtf8HexDigest(scratch, digest); + return scratch.bytesRefView(); + } + + private static final byte[] ASCII_HEX_BYTES = new byte[] { 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 97, 98, 99, 100, 101, 102 }; + + /** + * This function allows to append hex bytes dirrectly to the {@link BreakingBytesRefBuilder} + * bypassing unnecessary array allocations and byte array copying. + */ + private static void appendUtf8HexDigest(BreakingBytesRefBuilder scratch, byte[] bytes) { + for (byte b : bytes) { + scratch.append(ASCII_HEX_BYTES[b >> 4 & 0xf]); + scratch.append(ASCII_HEX_BYTES[b & 0xf]); + } + } + + @Override + public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + if (algorithm.foldable()) { + try { + // hash function is created here in order to validate the algorithm is valid before evaluator is created + var hf = HashFunction.create((BytesRef) algorithm.fold()); + return new HashConstantEvaluator.Factory( + source(), + context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), + new Function<>() { + @Override + public HashFunction apply(DriverContext context) { + return hf.copy(); + } + + @Override + public String toString() { + return hf.toString(); + } + }, + toEvaluator.apply(input) + ); + } catch (NoSuchAlgorithmException e) { + throw new InvalidArgumentException(e, "invalid algorithm for [{}]: {}", sourceText(), e.getMessage()); + } + } else { + return new HashEvaluator.Factory( + source(), + context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), + toEvaluator.apply(algorithm), + toEvaluator.apply(input) + ); + } + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Hash(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Hash::new, children().get(0), children().get(1)); + } + + public record HashFunction(String algorithm, MessageDigest digest) { + + public static HashFunction create(BytesRef literal) throws NoSuchAlgorithmException { + var algorithm = literal.utf8ToString(); + var digest = MessageDigest.getInstance(algorithm); + return new HashFunction(algorithm, digest); + } + + public HashFunction copy() { + try { + return new HashFunction(algorithm, MessageDigest.getInstance(algorithm)); + } catch (NoSuchAlgorithmException e) { + assert false : "Algorithm should be valid at this point"; + throw new IllegalStateException(e); + } + } + + @Override + public String toString() { + return algorithm; + } + } + + Expression algorithm() { + return algorithm; + } + + Expression input() { + return input; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java index 6dd0c5fe88afd..050293e58c19d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java @@ -14,10 +14,15 @@ import org.elasticsearch.xpack.esql.plan.AbstractNodeSerializationTests; public abstract class AbstractExpressionSerializationTests extends AbstractNodeSerializationTests { + public static Expression randomChild() { return ReferenceAttributeTests.randomReferenceAttribute(false); } + public static Expression mutateExpression(Expression expression) { + return randomValueOtherThan(expression, AbstractExpressionSerializationTests::randomChild); + } + @Override protected final NamedWriteableRegistry getNamedWriteableRegistry() { return new NamedWriteableRegistry(ExpressionWritables.getNamedWriteables()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashSerializationTests.java new file mode 100644 index 0000000000000..f21105c2c8bca --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashSerializationTests.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests; + +import java.io.IOException; + +public class HashSerializationTests extends AbstractExpressionSerializationTests { + + @Override + protected Hash createTestInstance() { + return new Hash(randomSource(), randomChild(), randomChild()); + } + + @Override + protected Hash mutateInstance(Hash instance) throws IOException { + return randomBoolean() + ? new Hash(instance.source(), mutateExpression(instance.algorithm()), instance.input()) + : new Hash(instance.source(), instance.algorithm(), mutateExpression(instance.input())); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashStaticTests.java new file mode 100644 index 0000000000000..871bec7c06804 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashStaticTests.java @@ -0,0 +1,66 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.junit.After; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase.evaluator; +import static org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase.field; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.startsWith; + +public class HashStaticTests extends ESTestCase { + + public void testInvalidAlgorithmLiteral() { + Source source = new Source(0, 0, "hast(\"invalid\", input)"); + DriverContext driverContext = driverContext(); + InvalidArgumentException e = expectThrows( + InvalidArgumentException.class, + () -> evaluator( + new Hash(source, new Literal(source, new BytesRef("invalid"), DataType.KEYWORD), field("input", DataType.KEYWORD)) + ).get(driverContext) + ); + assertThat(e.getMessage(), startsWith("invalid algorithm for [hast(\"invalid\", input)]: invalid MessageDigest not available")); + } + + /** + * The following fields and methods were borrowed from AbstractScalarFunctionTestCase + */ + private final List breakers = Collections.synchronizedList(new ArrayList<>()); + + private DriverContext driverContext() { + BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking(); + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + breakers.add(breaker); + return new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays)); + } + + @After + public void allMemoryReleased() { + for (CircuitBreaker breaker : breakers) { + assertThat(breaker.getUsed(), equalTo(0L)); + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java new file mode 100644 index 0000000000000..c5cdf97eccd17 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -0,0 +1,107 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.HexFormat; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class HashTests extends AbstractScalarFunctionTestCase { + + public HashTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List cases = new ArrayList<>(); + for (String algorithm : List.of("MD5", "SHA", "SHA-224", "SHA-256", "SHA-384", "SHA-512")) { + cases.addAll(createTestCases(algorithm)); + } + cases.add(new TestCaseSupplier("Invalid algorithm", List.of(DataType.KEYWORD, DataType.KEYWORD), () -> { + var input = randomAlphaOfLength(10); + return new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(new BytesRef("invalid"), DataType.KEYWORD, "algorithm"), + new TestCaseSupplier.TypedData(new BytesRef(input), DataType.KEYWORD, "input") + ), + "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]", + DataType.KEYWORD, + is(nullValue()) + ).withWarning("Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.") + .withWarning("Line -1:-1: java.security.NoSuchAlgorithmException: invalid MessageDigest not available") + .withFoldingException(InvalidArgumentException.class, "invalid algorithm for []: invalid MessageDigest not available"); + })); + return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases, (v, p) -> "string"); + } + + private static List createTestCases(String algorithm) { + return List.of( + createTestCase(algorithm, false, DataType.KEYWORD, DataType.KEYWORD), + createTestCase(algorithm, false, DataType.KEYWORD, DataType.TEXT), + createTestCase(algorithm, false, DataType.TEXT, DataType.KEYWORD), + createTestCase(algorithm, false, DataType.TEXT, DataType.TEXT), + createTestCase(algorithm, true, DataType.KEYWORD, DataType.KEYWORD), + createTestCase(algorithm, true, DataType.KEYWORD, DataType.TEXT), + createTestCase(algorithm, true, DataType.TEXT, DataType.KEYWORD), + createTestCase(algorithm, true, DataType.TEXT, DataType.TEXT) + ); + } + + private static TestCaseSupplier createTestCase(String algorithm, boolean forceLiteral, DataType algorithmType, DataType inputType) { + return new TestCaseSupplier(algorithm, List.of(algorithmType, inputType), () -> { + var input = randomFrom(TestCaseSupplier.stringCases(inputType)).get(); + return new TestCaseSupplier.TestCase( + List.of(createTypedData(algorithm, forceLiteral, algorithmType, "algorithm"), input), + forceLiteral + ? "HashConstantEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=0]]" + : "HashEvaluator[algorithm=Attribute[channel=0], input=Attribute[channel=1]]", + DataType.KEYWORD, + equalTo(new BytesRef(hash(algorithm, BytesRefs.toString(input.data())))) + ); + }); + } + + private static TestCaseSupplier.TypedData createTypedData(String value, boolean forceLiteral, DataType type, String name) { + var data = new TestCaseSupplier.TypedData(new BytesRef(value), type, name); + return forceLiteral ? data.forceLiteral() : data; + } + + private static String hash(String algorithm, String input) { + try { + return HexFormat.of().formatHex(MessageDigest.getInstance(algorithm).digest(input.getBytes(StandardCharsets.UTF_8))); + } catch (NoSuchAlgorithmException e) { + throw new IllegalArgumentException("Unknown algorithm: " + algorithm); + } + } + + @Override + protected Expression build(Source source, List args) { + return new Hash(source, args.get(0), args.get(1)); + } +} diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index 2a4cde9a680e9..b6d75048591e5 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -92,7 +92,7 @@ setup: - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} # Testing for the entire function set isn't feasbile, so we just check that we return the correct count as an approximation. - - length: {esql.functions: 129} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 130} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version": @@ -163,4 +163,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 125} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 126} # check the "sister" test above for a likely update to the same esql.functions length check