From 63ab3beb52221af8b295fb25f7dd213a770ac129 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Wed, 18 Dec 2024 17:12:14 +0100 Subject: [PATCH] ESQL: Docs: add example of date bucketing with offset (#116680) Add an example of how to create date histograms with an offset. Fixes #114167 --- .../esql/functions/examples/bucket.asciidoc | 14 ++++++++++++++ .../functions/kibana/definition/bucket.json | 3 ++- .../elasticsearch/xpack/esql/CsvTestUtils.java | 2 +- .../src/main/resources/bucket.csv-spec | 18 ++++++++++++++++++ .../expression/function/grouping/Bucket.java | 11 +++++++++++ .../org/elasticsearch/xpack/esql/CsvTests.java | 5 +++-- 6 files changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/reference/esql/functions/examples/bucket.asciidoc b/docs/reference/esql/functions/examples/bucket.asciidoc index 4afea30660339..264efc191748f 100644 --- a/docs/reference/esql/functions/examples/bucket.asciidoc +++ b/docs/reference/esql/functions/examples/bucket.asciidoc @@ -116,4 +116,18 @@ include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression] |=== include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression-result] |=== +Sometimes you need to change the start value of each bucket by a given duration (similar to date histogram +aggregation's <> parameter). To do so, you will need to +take into account how the language handles expressions within the `STATS` command: if these contain functions or +arithmetic operators, a virtual `EVAL` is inserted before and/or after the `STATS` command. Consequently, a double +compensation is needed to adjust the bucketed date value before the aggregation and then again after. For instance, +inserting a negative offset of `1 hour` to buckets of `1 year` looks like this: +[source.merge.styled,esql] +---- +include::{esql-specs}/bucket.csv-spec[tag=bucketWithOffset] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/bucket.csv-spec[tag=bucketWithOffset-result] +|=== diff --git a/docs/reference/esql/functions/kibana/definition/bucket.json b/docs/reference/esql/functions/kibana/definition/bucket.json index 18802f5ff8fef..3d96de05c8407 100644 --- a/docs/reference/esql/functions/kibana/definition/bucket.json +++ b/docs/reference/esql/functions/kibana/definition/bucket.json @@ -1598,7 +1598,8 @@ "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)\n| SORT b", "FROM sample_data \n| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()\n| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())", "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT bucket", - "FROM employees\n| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)\n| SORT b1, b2\n| KEEP s1, b1, s2, b2" + "FROM employees\n| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)\n| SORT b1, b2\n| KEEP s1, b1, s2, b2", + "FROM employees \n| STATS dates = VALUES(birth_date) BY b = BUCKET(birth_date + 1 HOUR, 1 YEAR) - 1 HOUR\n| EVAL d_count = MV_COUNT(dates)\n| SORT d_count\n| LIMIT 3" ], "preview" : false, "snapshot_only" : false diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index 7adafa908ce4f..f0bdf089f69d1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -63,7 +63,7 @@ import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO; public final class CsvTestUtils { - private static final int MAX_WIDTH = 20; + private static final int MAX_WIDTH = 80; private static final CsvPreference CSV_SPEC_PREFERENCES = new CsvPreference.Builder('"', '|', "\r\n").build(); private static final String NULL_VALUE = "null"; private static final char ESCAPE_CHAR = '\\'; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec index b29c489910f65..8cfde2bb9bde7 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec @@ -145,6 +145,24 @@ AVG(salary):double | bucket:date // end::bucket_in_agg-result[] ; +bucketWithOffset#[skip:-8.13.99, reason:BUCKET renamed in 8.14] +// tag::bucketWithOffset[] +FROM employees +| STATS dates = MV_SORT(VALUES(birth_date)) BY b = BUCKET(birth_date + 1 HOUR, 1 YEAR) - 1 HOUR +| EVAL d_count = MV_COUNT(dates) +| SORT d_count, b +| LIMIT 3 +// end::bucketWithOffset[] +; + +// tag::bucketWithOffset-result[] +dates:date |b:date |d_count:integer +1965-01-03T00:00:00.000Z |1964-12-31T23:00:00.000Z|1 +[1955-01-21T00:00:00.000Z, 1955-08-20T00:00:00.000Z, 1955-08-28T00:00:00.000Z, 1955-10-04T00:00:00.000Z]|1954-12-31T23:00:00.000Z|4 +[1957-04-04T00:00:00.000Z, 1957-05-23T00:00:00.000Z, 1957-05-25T00:00:00.000Z, 1957-12-03T00:00:00.000Z]|1956-12-31T23:00:00.000Z|4 +// end::bucketWithOffset-result[] +; + docsBucketMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14] //tag::docsBucketMonth[] FROM employees diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java index 347d542f5212d..12932ba8d6e11 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java @@ -163,6 +163,17 @@ another in which the bucket size is provided directly (two parameters). grouping part, or that it is invoked with the exact same expression:""", file = "bucket", tag = "reuseGroupingFunctionWithExpression" + ), + @Example( + description = """ + Sometimes you need to change the start value of each bucket by a given duration (similar to date histogram + aggregation's <> parameter). To do so, you will need to + take into account how the language handles expressions within the `STATS` command: if these contain functions or + arithmetic operators, a virtual `EVAL` is inserted before and/or after the `STATS` command. Consequently, a double + compensation is needed to adjust the bucketed date value before the aggregation and then again after. For instance, + inserting a negative offset of `1 hour` to buckets of `1 year` looks like this:""", + file = "bucket", + tag = "bucketWithOffset" ) } ) public Bucket( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 70e5d793843bf..1b4c7fee9c7dd 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -319,13 +319,14 @@ private void doTest() throws Exception { } protected void assertResults(ExpectedResults expected, ActualResults actual, boolean ignoreOrder, Logger logger) { - CsvAssert.assertResults(expected, actual, ignoreOrder, logger); /* - * Comment the assertion above and enable the next two lines to see the results returned by ES without any assertions being done. + * Enable the next two lines to see the results returned by ES. * This is useful when creating a new test or trying to figure out what are the actual results. */ // CsvTestUtils.logMetaData(actual.columnNames(), actual.columnTypes(), LOGGER); // CsvTestUtils.logData(actual.values(), LOGGER); + + CsvAssert.assertResults(expected, actual, ignoreOrder, logger); } private static IndexResolution loadIndexResolution(String mappingName, String indexName, Map typeMapping) {