From 63ab3beb52221af8b295fb25f7dd213a770ac129 Mon Sep 17 00:00:00 2001
From: Bogdan Pintea <bogdan.pintea@elastic.co>
Date: Wed, 18 Dec 2024 17:12:14 +0100
Subject: [PATCH] ESQL: Docs: add example of date bucketing with offset
 (#116680)

Add an example of how to create date histograms with an offset.

Fixes #114167
---
 .../esql/functions/examples/bucket.asciidoc    | 14 ++++++++++++++
 .../functions/kibana/definition/bucket.json    |  3 ++-
 .../elasticsearch/xpack/esql/CsvTestUtils.java |  2 +-
 .../src/main/resources/bucket.csv-spec         | 18 ++++++++++++++++++
 .../expression/function/grouping/Bucket.java   | 11 +++++++++++
 .../org/elasticsearch/xpack/esql/CsvTests.java |  5 +++--
 6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/docs/reference/esql/functions/examples/bucket.asciidoc b/docs/reference/esql/functions/examples/bucket.asciidoc
index 4afea30660339..264efc191748f 100644
--- a/docs/reference/esql/functions/examples/bucket.asciidoc
+++ b/docs/reference/esql/functions/examples/bucket.asciidoc
@@ -116,4 +116,18 @@ include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression]
 |===
 include::{esql-specs}/bucket.csv-spec[tag=reuseGroupingFunctionWithExpression-result]
 |===
+Sometimes you need to change the start value of each bucket by a given duration (similar to date histogram
+aggregation's <<search-aggregations-bucket-histogram-aggregation,`offset`>> parameter). To do so, you will need to
+take into account how the language handles expressions within the `STATS` command: if these contain functions or
+arithmetic operators, a virtual `EVAL` is inserted before and/or after the `STATS` command. Consequently, a double
+compensation is needed to adjust the bucketed date value before the aggregation and then again after. For instance,
+inserting a negative offset of `1 hour` to buckets of `1 year` looks like this:
+[source.merge.styled,esql]
+----
+include::{esql-specs}/bucket.csv-spec[tag=bucketWithOffset]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/bucket.csv-spec[tag=bucketWithOffset-result]
+|===
 
diff --git a/docs/reference/esql/functions/kibana/definition/bucket.json b/docs/reference/esql/functions/kibana/definition/bucket.json
index 18802f5ff8fef..3d96de05c8407 100644
--- a/docs/reference/esql/functions/kibana/definition/bucket.json
+++ b/docs/reference/esql/functions/kibana/definition/bucket.json
@@ -1598,7 +1598,8 @@
     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.)\n| SORT b",
     "FROM sample_data \n| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW()\n| STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW())",
     "FROM employees\n| WHERE hire_date >= \"1985-01-01T00:00:00Z\" AND hire_date < \"1986-01-01T00:00:00Z\"\n| STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, \"1985-01-01T00:00:00Z\", \"1986-01-01T00:00:00Z\")\n| SORT bucket",
-    "FROM employees\n| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)\n| SORT b1, b2\n| KEEP s1, b1, s2, b2"
+    "FROM employees\n| STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)\n| SORT b1, b2\n| KEEP s1, b1, s2, b2",
+    "FROM employees \n| STATS dates = VALUES(birth_date) BY b = BUCKET(birth_date + 1 HOUR, 1 YEAR) - 1 HOUR\n| EVAL d_count = MV_COUNT(dates)\n| SORT d_count\n| LIMIT 3"
   ],
   "preview" : false,
   "snapshot_only" : false
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java
index 7adafa908ce4f..f0bdf089f69d1 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java
@@ -63,7 +63,7 @@
 import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO;
 
 public final class CsvTestUtils {
-    private static final int MAX_WIDTH = 20;
+    private static final int MAX_WIDTH = 80;
     private static final CsvPreference CSV_SPEC_PREFERENCES = new CsvPreference.Builder('"', '|', "\r\n").build();
     private static final String NULL_VALUE = "null";
     private static final char ESCAPE_CHAR = '\\';
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
index b29c489910f65..8cfde2bb9bde7 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
@@ -145,6 +145,24 @@ AVG(salary):double | bucket:date
 // end::bucket_in_agg-result[]
 ;
 
+bucketWithOffset#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+// tag::bucketWithOffset[]
+FROM employees
+| STATS dates = MV_SORT(VALUES(birth_date)) BY b = BUCKET(birth_date + 1 HOUR, 1 YEAR) - 1 HOUR
+| EVAL d_count = MV_COUNT(dates)
+| SORT d_count, b
+| LIMIT 3
+// end::bucketWithOffset[]
+;
+
+// tag::bucketWithOffset-result[]
+dates:date                                                                                              |b:date                  |d_count:integer
+1965-01-03T00:00:00.000Z                                                                                |1964-12-31T23:00:00.000Z|1
+[1955-01-21T00:00:00.000Z, 1955-08-20T00:00:00.000Z, 1955-08-28T00:00:00.000Z, 1955-10-04T00:00:00.000Z]|1954-12-31T23:00:00.000Z|4
+[1957-04-04T00:00:00.000Z, 1957-05-23T00:00:00.000Z, 1957-05-25T00:00:00.000Z, 1957-12-03T00:00:00.000Z]|1956-12-31T23:00:00.000Z|4
+// end::bucketWithOffset-result[]
+;
+
 docsBucketMonth#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
 //tag::docsBucketMonth[]
 FROM employees
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
index 347d542f5212d..12932ba8d6e11 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java
@@ -163,6 +163,17 @@ another in which the bucket size is provided directly (two parameters).
                     grouping part, or that it is invoked with the exact same expression:""",
                 file = "bucket",
                 tag = "reuseGroupingFunctionWithExpression"
+            ),
+            @Example(
+                description = """
+                    Sometimes you need to change the start value of each bucket by a given duration (similar to date histogram
+                    aggregation's <<search-aggregations-bucket-histogram-aggregation,`offset`>> parameter). To do so, you will need to
+                    take into account how the language handles expressions within the `STATS` command: if these contain functions or
+                    arithmetic operators, a virtual `EVAL` is inserted before and/or after the `STATS` command. Consequently, a double
+                    compensation is needed to adjust the bucketed date value before the aggregation and then again after. For instance,
+                    inserting a negative offset of `1 hour` to buckets of `1 year` looks like this:""",
+                file = "bucket",
+                tag = "bucketWithOffset"
             ) }
     )
     public Bucket(
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
index 70e5d793843bf..1b4c7fee9c7dd 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
@@ -319,13 +319,14 @@ private void doTest() throws Exception {
     }
 
     protected void assertResults(ExpectedResults expected, ActualResults actual, boolean ignoreOrder, Logger logger) {
-        CsvAssert.assertResults(expected, actual, ignoreOrder, logger);
         /*
-         * Comment the assertion above and enable the next two lines to see the results returned by ES without any assertions being done.
+         * Enable the next two lines to see the results returned by ES.
          * This is useful when creating a new test or trying to figure out what are the actual results.
          */
         // CsvTestUtils.logMetaData(actual.columnNames(), actual.columnTypes(), LOGGER);
         // CsvTestUtils.logData(actual.values(), LOGGER);
+
+        CsvAssert.assertResults(expected, actual, ignoreOrder, logger);
     }
 
     private static IndexResolution loadIndexResolution(String mappingName, String indexName, Map<String, String> typeMapping) {