diff --git a/docs/changelog/118474.yaml b/docs/changelog/118474.yaml new file mode 100644 index 0000000000000..1b0c6942eb323 --- /dev/null +++ b/docs/changelog/118474.yaml @@ -0,0 +1,6 @@ +pr: 118474 +summary: Esql bucket function for date nanos +area: ES|QL +type: enhancement +issues: + - 118031 diff --git a/docs/reference/esql/functions/kibana/definition/bucket.json b/docs/reference/esql/functions/kibana/definition/bucket.json index 660e1be49fda9..18802f5ff8fef 100644 --- a/docs/reference/esql/functions/kibana/definition/bucket.json +++ b/docs/reference/esql/functions/kibana/definition/bucket.json @@ -310,6 +310,312 @@ "variadic" : false, "returnType" : "date" }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "date_period", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "date", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "date", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "date", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "keyword", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "date", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "text", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "keyword", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "date", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "keyword", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "keyword", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "keyword", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "text", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "text", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "date", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "text", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "keyword", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "integer", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + }, + { + "name" : "from", + "type" : "text", + "optional" : true, + "description" : "Start of the range. Can be a number, a date or a date expressed as a string." + }, + { + "name" : "to", + "type" : "text", + "optional" : true, + "description" : "End of the range. Can be a number, a date or a date expressed as a string." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "field", + "type" : "date_nanos", + "optional" : false, + "description" : "Numeric or date expression from which to derive buckets." + }, + { + "name" : "buckets", + "type" : "time_duration", + "optional" : false, + "description" : "Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/types/bucket.asciidoc b/docs/reference/esql/functions/types/bucket.asciidoc index 172e84b6f7860..2e6985e6bc4ed 100644 --- a/docs/reference/esql/functions/types/bucket.asciidoc +++ b/docs/reference/esql/functions/types/bucket.asciidoc @@ -16,6 +16,17 @@ date | integer | text | date | date date | integer | text | keyword | date date | integer | text | text | date date | time_duration | | | date +date_nanos | date_period | | | date_nanos +date_nanos | integer | date | date | date_nanos +date_nanos | integer | date | keyword | date_nanos +date_nanos | integer | date | text | date_nanos +date_nanos | integer | keyword | date | date_nanos +date_nanos | integer | keyword | keyword | date_nanos +date_nanos | integer | keyword | text | date_nanos +date_nanos | integer | text | date | date_nanos +date_nanos | integer | text | keyword | date_nanos +date_nanos | integer | text | text | date_nanos +date_nanos | time_duration | | | date_nanos double | double | | | double double | integer | double | double | double double | integer | double | integer | double diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec index bf0fd72f4f3f0..22b0bc2878cbb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec @@ -548,6 +548,80 @@ yr:date_nanos | mo:date_nanos | mn:date_nanos 2023-01-01T00:00:00.000000000Z | 2023-10-01T00:00:00.000000000Z | 2023-10-23T12:10:00.000000000Z | 2023-10-23T12:15:03.360000000Z ; +Bucket Date nanos by Year +required_capability: date_trunc_date_nanos +required_capability: date_nanos_bucket + +FROM date_nanos +| WHERE millis > "2020-01-01" +| STATS ct = count(*) BY yr = BUCKET(nanos, 1 year); + +ct:long | yr:date_nanos +8 | 2023-01-01T00:00:00.000000000Z +; + +Bucket Date nanos by Year, range version +required_capability: date_trunc_date_nanos +required_capability: date_nanos_bucket + +FROM date_nanos +| WHERE millis > "2020-01-01" +| STATS ct = count(*) BY yr = BUCKET(nanos, 5, "1999-01-01", NOW()); + +ct:long | yr:date_nanos +8 | 2023-01-01T00:00:00.000000000Z +; + +Bucket Date nanos by Month +required_capability: date_trunc_date_nanos +required_capability: date_nanos_bucket + +FROM date_nanos +| WHERE millis > "2020-01-01" +| STATS ct = count(*) BY mo = BUCKET(nanos, 1 month); + +ct:long | mo:date_nanos +8 | 2023-10-01T00:00:00.000000000Z +; + +Bucket Date nanos by Month, range version +required_capability: date_trunc_date_nanos +required_capability: date_nanos_bucket + +FROM date_nanos +| WHERE millis > "2020-01-01" +| STATS ct = count(*) BY mo = BUCKET(nanos, 20, "2023-01-01", "2023-12-31"); + +ct:long | mo:date_nanos +8 | 2023-10-01T00:00:00.000000000Z +; + +Bucket Date nanos by Week, range version +required_capability: date_trunc_date_nanos +required_capability: date_nanos_bucket + +FROM date_nanos +| WHERE millis > "2020-01-01" +| STATS ct = count(*) BY mo = BUCKET(nanos, 55, "2023-01-01", "2023-12-31"); + +ct:long | mo:date_nanos +8 | 2023-10-23T00:00:00.000000000Z +; +Bucket Date nanos by 10 minutes +required_capability: date_trunc_date_nanos +required_capability: date_nanos_bucket + +FROM date_nanos +| WHERE millis > "2020-01-01" +| STATS ct = count(*) BY mn = BUCKET(nanos, 10 minutes); + +ct:long | mn:date_nanos +4 | 2023-10-23T13:50:00.000000000Z +1 | 2023-10-23T13:30:00.000000000Z +1 | 2023-10-23T12:20:00.000000000Z +2 | 2023-10-23T12:10:00.000000000Z +; + Add date nanos required_capability: date_nanos_add_subtract diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 6853747171048..18d7a2d32ca60 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -357,6 +357,11 @@ public enum Cap { */ DATE_TRUNC_DATE_NANOS(), + /** + * Support date nanos values as the field argument to bucket + */ + DATE_NANOS_BUCKET(), + /** * support aggregations on date nanos */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java index 9e40b85fd6590..347d542f5212d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java @@ -90,7 +90,7 @@ public class Bucket extends GroupingFunction implements Validatable, TwoOptional private final Expression to; @FunctionInfo( - returnType = { "double", "date" }, + returnType = { "double", "date", "date_nanos" }, description = """ Creates groups of values - buckets - out of a datetime or numeric input. The size of the buckets can either be provided directly, or chosen based on a recommended count and values range.""", @@ -169,7 +169,7 @@ public Bucket( Source source, @Param( name = "field", - type = { "integer", "long", "double", "date" }, + type = { "integer", "long", "double", "date", "date_nanos" }, description = "Numeric or date expression from which to derive buckets." ) Expression field, @Param( @@ -241,7 +241,7 @@ public boolean foldable() { @Override public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { - if (field.dataType() == DataType.DATETIME) { + if (field.dataType() == DataType.DATETIME || field.dataType() == DataType.DATE_NANOS) { Rounding.Prepared preparedRounding; if (buckets.dataType().isWholeNumber()) { int b = ((Number) buckets.fold()).intValue(); @@ -314,8 +314,8 @@ private double pickRounding(int buckets, double from, double to) { } // supported parameter type combinations (1st, 2nd, 3rd, 4th): - // datetime, integer, string/datetime, string/datetime - // datetime, rounding/duration, -, - + // datetime/date_nanos, integer, string/datetime, string/datetime + // datetime/date_nanos, rounding/duration, -, - // numeric, integer, numeric, numeric // numeric, numeric, -, - @Override @@ -329,7 +329,7 @@ protected TypeResolution resolveType() { return TypeResolution.TYPE_RESOLVED; } - if (fieldType == DataType.DATETIME) { + if (fieldType == DataType.DATETIME || fieldType == DataType.DATE_NANOS) { TypeResolution resolution = isType( buckets, dt -> dt.isWholeNumber() || DataType.isTemporalAmount(dt), diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/BucketTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/BucketTests.java index 7e7d91cdf76f4..f01b06c23e8a8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/BucketTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/BucketTests.java @@ -12,15 +12,19 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.logging.LogManager; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; import org.hamcrest.Matcher; +import org.hamcrest.Matchers; import java.time.Duration; +import java.time.Instant; import java.time.Period; import java.util.ArrayList; import java.util.List; @@ -38,6 +42,7 @@ public BucketTests(@Name("TestCase") Supplier testCas public static Iterable parameters() { List suppliers = new ArrayList<>(); dateCases(suppliers, "fixed date", () -> DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-02-17T09:00:00.00Z")); + dateNanosCases(suppliers, "fixed date nanos", () -> DateUtils.toLong(Instant.parse("2023-02-17T09:00:00.00Z"))); dateCasesWithSpan( suppliers, "fixed date with period", @@ -54,6 +59,22 @@ public static Iterable parameters() { Duration.ofDays(1L), "[86400000 in Z][fixed]" ); + dateNanosCasesWithSpan( + suppliers, + "fixed date nanos with period", + () -> DateUtils.toLong(Instant.parse("2023-01-01T00:00:00.00Z")), + DataType.DATE_PERIOD, + Period.ofYears(1), + "[YEAR_OF_CENTURY in Z][fixed to midnight]" + ); + dateNanosCasesWithSpan( + suppliers, + "fixed date nanos with duration", + () -> DateUtils.toLong(Instant.parse("2023-02-17T09:00:00.00Z")), + DataType.TIME_DURATION, + Duration.ofDays(1L), + "[86400000 in Z][fixed]" + ); numberCases(suppliers, "fixed long", DataType.LONG, () -> 100L); numberCasesWithSpan(suppliers, "fixed long with span", DataType.LONG, () -> 100L); numberCases(suppliers, "fixed int", DataType.INTEGER, () -> 100); @@ -142,6 +163,62 @@ private static void dateCasesWithSpan( })); } + private static void dateNanosCasesWithSpan( + List suppliers, + String name, + LongSupplier date, + DataType spanType, + Object span, + String spanStr + ) { + suppliers.add(new TestCaseSupplier(name, List.of(DataType.DATE_NANOS, spanType), () -> { + List args = new ArrayList<>(); + args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATE_NANOS, "field")); + args.add(new TestCaseSupplier.TypedData(span, spanType, "buckets").forceLiteral()); + return new TestCaseSupplier.TestCase( + args, + Matchers.startsWith("DateTruncDateNanosEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding["), + DataType.DATE_NANOS, + resultsMatcher(args) + ); + })); + } + + private static void dateNanosCases(List suppliers, String name, LongSupplier date) { + for (DataType fromType : DATE_BOUNDS_TYPE) { + for (DataType toType : DATE_BOUNDS_TYPE) { + suppliers.add(new TestCaseSupplier(name, List.of(DataType.DATE_NANOS, DataType.INTEGER, fromType, toType), () -> { + List args = new ArrayList<>(); + args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATE_NANOS, "field")); + // TODO more "from" and "to" and "buckets" + args.add(new TestCaseSupplier.TypedData(50, DataType.INTEGER, "buckets").forceLiteral()); + args.add(dateBound("from", fromType, "2023-02-01T00:00:00.00Z")); + args.add(dateBound("to", toType, "2023-03-01T09:00:00.00Z")); + return new TestCaseSupplier.TestCase( + args, + Matchers.startsWith("DateTruncDateNanosEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding["), + DataType.DATE_NANOS, + resultsMatcher(args) + ); + })); + // same as above, but a low bucket count and datetime bounds that match it (at hour span) + suppliers.add(new TestCaseSupplier(name, List.of(DataType.DATE_NANOS, DataType.INTEGER, fromType, toType), () -> { + List args = new ArrayList<>(); + args.add(new TestCaseSupplier.TypedData(date.getAsLong(), DataType.DATE_NANOS, "field")); + args.add(new TestCaseSupplier.TypedData(4, DataType.INTEGER, "buckets").forceLiteral()); + args.add(dateBound("from", fromType, "2023-02-17T09:00:00Z")); + args.add(dateBound("to", toType, "2023-02-17T12:00:00Z")); + return new TestCaseSupplier.TestCase( + args, + Matchers.startsWith("DateTruncDateNanosEvaluator[fieldVal=Attribute[channel=0], rounding=Rounding["), + DataType.DATE_NANOS, + equalTo(Rounding.builder(Rounding.DateTimeUnit.HOUR_OF_DAY).build().prepareForUnknown().round(date.getAsLong())) + ); + })); + } + } + } + private static final DataType[] NUMBER_BOUNDS_TYPES = new DataType[] { DataType.INTEGER, DataType.LONG, DataType.DOUBLE }; private static void numberCases(List suppliers, String name, DataType numberType, Supplier number) { @@ -221,7 +298,19 @@ private static TestCaseSupplier.TypedData keywordDateLiteral(String name, DataTy private static Matcher resultsMatcher(List typedData) { if (typedData.get(0).type() == DataType.DATETIME) { long millis = ((Number) typedData.get(0).data()).longValue(); - return equalTo(Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(millis)); + long expected = Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(millis); + LogManager.getLogger(getTestClass()).info("Expected: " + Instant.ofEpochMilli(expected)); + LogManager.getLogger(getTestClass()).info("Input: " + Instant.ofEpochMilli(millis)); + return equalTo(expected); + } + if (typedData.get(0).type() == DataType.DATE_NANOS) { + long nanos = ((Number) typedData.get(0).data()).longValue(); + long expected = DateUtils.toNanoSeconds( + Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build().prepareForUnknown().round(DateUtils.toMilliSeconds(nanos)) + ); + LogManager.getLogger(getTestClass()).info("Expected: " + DateUtils.toInstant(expected)); + LogManager.getLogger(getTestClass()).info("Input: " + DateUtils.toInstant(nanos)); + return equalTo(expected); } return equalTo(((Number) typedData.get(0).data()).doubleValue()); }