diff --git a/docs/reference/esql/functions/description/md5.asciidoc b/docs/reference/esql/functions/description/md5.asciidoc new file mode 100644 index 0000000000000..2d6d8515b6caf --- /dev/null +++ b/docs/reference/esql/functions/description/md5.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Computes MD5 hash of the input. diff --git a/docs/reference/esql/functions/kibana/definition/md5.json b/docs/reference/esql/functions/kibana/definition/md5.json new file mode 100644 index 0000000000000..f39610b62a039 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/md5.json @@ -0,0 +1,34 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "eval", + "name" : "md5", + "description" : "Computes MD5 hash of the input.", + "signatures" : [ + { + "params" : [ + { + "name" : "input", + "type" : "keyword", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "input", + "type" : "text", + "optional" : false, + "description" : "Input to hash." + } + ], + "variadic" : false, + "returnType" : "keyword" + } + ], + "preview" : false, + "snapshot_only" : false +} diff --git a/docs/reference/esql/functions/kibana/docs/md5.md b/docs/reference/esql/functions/kibana/docs/md5.md new file mode 100644 index 0000000000000..e040d17897f1a --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/md5.md @@ -0,0 +1,7 @@ + + +### MD5 +Computes MD5 hash of the input. + diff --git a/docs/reference/esql/functions/layout/md5.asciidoc b/docs/reference/esql/functions/layout/md5.asciidoc new file mode 100644 index 0000000000000..23f443c1c38b0 --- /dev/null +++ b/docs/reference/esql/functions/layout/md5.asciidoc @@ -0,0 +1,14 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-md5]] +=== `MD5` + +*Syntax* + +[.text-center] +image::esql/functions/signature/md5.svg[Embedded,opts=inline] + +include::../parameters/md5.asciidoc[] +include::../description/md5.asciidoc[] +include::../types/md5.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/md5.asciidoc b/docs/reference/esql/functions/parameters/md5.asciidoc new file mode 100644 index 0000000000000..99eba4dc2cb3d --- /dev/null +++ b/docs/reference/esql/functions/parameters/md5.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`input`:: +Input to hash. diff --git a/docs/reference/esql/functions/signature/md5.svg b/docs/reference/esql/functions/signature/md5.svg new file mode 100644 index 0000000000000..419af764a212e --- /dev/null +++ b/docs/reference/esql/functions/signature/md5.svg @@ -0,0 +1 @@ +MD5(input) \ No newline at end of file diff --git a/docs/reference/esql/functions/string-functions.asciidoc b/docs/reference/esql/functions/string-functions.asciidoc index da9580a55151a..4b81f247b7276 100644 --- a/docs/reference/esql/functions/string-functions.asciidoc +++ b/docs/reference/esql/functions/string-functions.asciidoc @@ -43,6 +43,7 @@ include::layout/left.asciidoc[] include::layout/length.asciidoc[] include::layout/locate.asciidoc[] include::layout/ltrim.asciidoc[] +include::layout/md5.asciidoc[] include::layout/repeat.asciidoc[] include::layout/replace.asciidoc[] include::layout/reverse.asciidoc[] diff --git a/docs/reference/esql/functions/types/md5.asciidoc b/docs/reference/esql/functions/types/md5.asciidoc new file mode 100644 index 0000000000000..049a553397bbd --- /dev/null +++ b/docs/reference/esql/functions/types/md5.asciidoc @@ -0,0 +1,10 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +input | result +keyword | keyword +text | keyword +|=== diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec index fcac1e1859c6d..d1983cf12c25d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/hash.csv-spec @@ -103,3 +103,12 @@ FROM sample_data count:long | hash(md5, message):keyword 3 | 2e92ae79ff32b37fee4368a594792183 ; + +md5Hash +required_capability: short_hash_functions + +ROW input="input" | EVAL md5 = hash("md5", input); + +input:keyword | md5:keyword +input | a43c1b0aa53a0c908810c06ab1ff3967 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index d6c1539088d47..7dadd02b369b6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -453,6 +453,10 @@ public enum Cap { * Hash function */ HASH_FUNCTION, + /** + * Hash functions such as MD5 + */ + SHORT_HASH_FUNCTIONS, /** * Don't optimize CASE IS NOT NULL function by not requiring the fields to be not null as well. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 908c9c5f197a8..5ba3f58033bc5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -134,6 +134,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Length; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Md5; import org.elasticsearch.xpack.esql.expression.function.scalar.string.RTrim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Repeat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace; @@ -333,6 +334,7 @@ private static FunctionDefinition[][] functions() { def(Left.class, Left::new, "left"), def(Length.class, Length::new, "length"), def(Locate.class, Locate::new, "locate"), + def(Md5.class, uni(Md5::new), "md5"), def(RTrim.class, RTrim::new, "rtrim"), def(Repeat.class, Repeat::new, "repeat"), def(Replace.class, Replace::new, "replace"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java index 3cf0eef9074ad..1e1728d03fcc5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java @@ -37,6 +37,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Locate; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Md5; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Repeat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Reverse; @@ -79,6 +80,7 @@ public static List getNamedWriteables() { entries.add(Left.ENTRY); entries.add(Locate.ENTRY); entries.add(Log.ENTRY); + entries.add(Md5.ENTRY); entries.add(Now.ENTRY); entries.add(Or.ENTRY); entries.add(Pi.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AbstractHashFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AbstractHashFunction.java new file mode 100644 index 0000000000000..a39f0a4f32db2 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/AbstractHashFunction.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash.HashFunction; + +import java.io.IOException; +import java.util.function.Function; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +public abstract class AbstractHashFunction extends UnaryScalarFunction { + + protected AbstractHashFunction(Source source, Expression field) { + super(source, field); + } + + protected AbstractHashFunction(StreamInput in) throws IOException { + super(in); + } + + protected abstract HashFunction getHashFunction(); + + @Override + public DataType dataType() { + return DataType.KEYWORD; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + return isString(field, sourceText(), DEFAULT); + } + + @Override + public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + return new HashConstantEvaluator.Factory( + source(), + context -> new BreakingBytesRefBuilder(context.breaker(), "hash"), + new Function<>() { + @Override + public HashFunction apply(DriverContext context) { + return getHashFunction().copy(); + } + + @Override + public String toString() { + return getHashFunction().toString(); + } + }, + toEvaluator.apply(field) + ); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java index 99c5908699ec2..dfe007c708060 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Hash.java @@ -186,10 +186,18 @@ protected NodeInfo info() { public record HashFunction(String algorithm, MessageDigest digest) { + public static HashFunction create(String algorithm) { + try { + return new HashFunction(algorithm, MessageDigest.getInstance(algorithm)); + } catch (NoSuchAlgorithmException e) { + assert false : "Expected to create a valid hashing algorithm"; + throw new IllegalStateException(e); + } + } + public static HashFunction create(BytesRef literal) throws NoSuchAlgorithmException { var algorithm = literal.utf8ToString(); - var digest = MessageDigest.getInstance(algorithm); - return new HashFunction(algorithm, digest); + return new HashFunction(algorithm, MessageDigest.getInstance(algorithm)); } public HashFunction copy() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5.java new file mode 100644 index 0000000000000..eae559c4fffdc --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.Hash.HashFunction; + +import java.io.IOException; +import java.util.List; + +public class Md5 extends AbstractHashFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "MD5", Md5::new); + + private static final HashFunction MD5 = HashFunction.create("MD5"); + + @FunctionInfo(returnType = "keyword", description = "Computes MD5 hash of the input.") + public Md5(Source source, @Param(name = "input", type = { "keyword", "text" }, description = "Input to hash.") Expression input) { + super(source, input); + } + + public Md5(StreamInput in) throws IOException { + super(in); + } + + @Override + protected HashFunction getHashFunction() { + return MD5; + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Md5(source(), field); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Md5::new, field); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java index c5cdf97eccd17..b16742292e24e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/HashTests.java @@ -87,12 +87,28 @@ private static TestCaseSupplier createTestCase(String algorithm, boolean forceLi }); } + static List createHashFunctionTestCases(String algorithm) { + return List.of(createHashFunctionTestCase(algorithm, DataType.KEYWORD), createHashFunctionTestCase(algorithm, DataType.TEXT)); + } + + private static TestCaseSupplier createHashFunctionTestCase(String algorithm, DataType inputType) { + return new TestCaseSupplier(algorithm + " with " + inputType, List.of(inputType), () -> { + var input = randomFrom(TestCaseSupplier.stringCases(inputType)).get(); + return new TestCaseSupplier.TestCase( + List.of(input), + "HashConstantEvaluator[algorithm=" + algorithm + ", input=Attribute[channel=0]]", + DataType.KEYWORD, + equalTo(new BytesRef(HashTests.hash(algorithm, BytesRefs.toString(input.data())))) + ); + }); + } + private static TestCaseSupplier.TypedData createTypedData(String value, boolean forceLiteral, DataType type, String name) { var data = new TestCaseSupplier.TypedData(new BytesRef(value), type, name); return forceLiteral ? data.forceLiteral() : data; } - private static String hash(String algorithm, String input) { + static String hash(String algorithm, String input) { try { return HexFormat.of().formatHex(MessageDigest.getInstance(algorithm).digest(input.getBytes(StandardCharsets.UTF_8))); } catch (NoSuchAlgorithmException e) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5SerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5SerializationTests.java new file mode 100644 index 0000000000000..666e5a3ea5d58 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5SerializationTests.java @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests; + +import java.io.IOException; + +public class Md5SerializationTests extends AbstractExpressionSerializationTests { + + @Override + protected Md5 createTestInstance() { + return new Md5(randomSource(), randomChild()); + } + + @Override + protected Md5 mutateInstance(Md5 instance) throws IOException { + return new Md5(instance.source(), mutateExpression(instance.field())); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5Tests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5Tests.java new file mode 100644 index 0000000000000..125e412ee5067 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Md5Tests.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +public class Md5Tests extends AbstractScalarFunctionTestCase { + + public Md5Tests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List cases = new ArrayList<>(); + cases.addAll(HashTests.createHashFunctionTestCases("MD5")); + return parameterSuppliersFromTypedDataWithDefaultChecks(true, cases, (v, p) -> "string"); + } + + @Override + protected Expression build(Source source, List args) { + return new Md5(source, args.get(0)); + } +}