Skip to content

Commit

Permalink
ESQL: top_list aggregation (elastic#109386)
Browse files Browse the repository at this point in the history
Added `top_list(<field>, <limit>, <order>)` aggregation, that collect
top N values per bucket. Works with the same types as MAX/MIN.

- Added the aggregation function
- Added a template to generate the aggregators
- Added a template to generate the `<Type>BucketedSort` implementations per-type
  - This structure is based on the `BucketedSort` structure used on the original aggregations. It was modified to better fit the ESQL ecosystem (Blocks based, no docs...)

Also added a guide to create aggregations. Fixes
elastic#109213
  • Loading branch information
ivancea authored Jun 19, 2024
1 parent 0145a41 commit 2233349
Show file tree
Hide file tree
Showing 41 changed files with 4,364 additions and 19 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ CHANGELOG.asciidoc merge=union
# Windows
build-tools-internal/src/test/resources/org/elasticsearch/gradle/internal/release/*.asciidoc text eol=lf

x-pack/plugin/esql/compute/src/main/generated/** linguist-generated=true
x-pack/plugin/esql/compute/src/main/generated-src/** linguist-generated=true
x-pack/plugin/esql/src/main/antlr/*.tokens linguist-generated=true
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/*.interp linguist-generated=true
x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlBaseLexer*.java linguist-generated=true
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog/109386.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 109386
summary: "ESQL: `top_list` aggregation"
area: ES|QL
type: feature
issues:
- 109213
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
* Annotates a class that implements an aggregation function with grouping.
* See {@link Aggregator} for more information.
*/
@Target(ElementType.TYPE)
@Retention(RetentionPolicy.SOURCE)
public @interface GroupingAggregator {
Expand Down
51 changes: 44 additions & 7 deletions x-pack/plugin/esql/compute/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ spotless {
}
}

def prop(Type, type, TYPE, BYTES, Array, Hash) {
def prop(Type, type, Wrapper, TYPE, BYTES, Array, Hash) {
return [
"Type" : Type,
"type" : type,
"Wrapper": Wrapper,
"TYPE" : TYPE,
"BYTES" : BYTES,
"Array" : Array,
Expand All @@ -55,12 +56,13 @@ def prop(Type, type, TYPE, BYTES, Array, Hash) {
}

tasks.named('stringTemplates').configure {
var intProperties = prop("Int", "int", "INT", "Integer.BYTES", "IntArray", "LongHash")
var floatProperties = prop("Float", "float", "FLOAT", "Float.BYTES", "FloatArray", "LongHash")
var longProperties = prop("Long", "long", "LONG", "Long.BYTES", "LongArray", "LongHash")
var doubleProperties = prop("Double", "double", "DOUBLE", "Double.BYTES", "DoubleArray", "LongHash")
var bytesRefProperties = prop("BytesRef", "BytesRef", "BYTES_REF", "org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF", "", "BytesRefHash")
var booleanProperties = prop("Boolean", "boolean", "BOOLEAN", "Byte.BYTES", "BitArray", "")
var intProperties = prop("Int", "int", "Integer", "INT", "Integer.BYTES", "IntArray", "LongHash")
var floatProperties = prop("Float", "float", "Float", "FLOAT", "Float.BYTES", "FloatArray", "LongHash")
var longProperties = prop("Long", "long", "Long", "LONG", "Long.BYTES", "LongArray", "LongHash")
var doubleProperties = prop("Double", "double", "Double", "DOUBLE", "Double.BYTES", "DoubleArray", "LongHash")
var bytesRefProperties = prop("BytesRef", "BytesRef", "", "BYTES_REF", "org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF", "", "BytesRefHash")
var booleanProperties = prop("Boolean", "boolean", "Boolean", "BOOLEAN", "Byte.BYTES", "BitArray", "")

// primitive vectors
File vectorInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st")
template {
Expand Down Expand Up @@ -500,6 +502,24 @@ tasks.named('stringTemplates').configure {
it.outputFile = "org/elasticsearch/compute/aggregation/RateDoubleAggregator.java"
}


File topListAggregatorInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/aggregation/X-TopListAggregator.java.st")
template {
it.properties = intProperties
it.inputFile = topListAggregatorInputFile
it.outputFile = "org/elasticsearch/compute/aggregation/TopListIntAggregator.java"
}
template {
it.properties = longProperties
it.inputFile = topListAggregatorInputFile
it.outputFile = "org/elasticsearch/compute/aggregation/TopListLongAggregator.java"
}
template {
it.properties = doubleProperties
it.inputFile = topListAggregatorInputFile
it.outputFile = "org/elasticsearch/compute/aggregation/TopListDoubleAggregator.java"
}

File multivalueDedupeInputFile = file("src/main/java/org/elasticsearch/compute/operator/mvdedupe/X-MultivalueDedupe.java.st")
template {
it.properties = intProperties
Expand Down Expand Up @@ -635,4 +655,21 @@ tasks.named('stringTemplates').configure {
it.inputFile = resultBuilderInputFile
it.outputFile = "org/elasticsearch/compute/operator/topn/ResultBuilderForFloat.java"
}

File bucketedSortInputFile = new File("${projectDir}/src/main/java/org/elasticsearch/compute/data/sort/X-BucketedSort.java.st")
template {
it.properties = intProperties
it.inputFile = bucketedSortInputFile
it.outputFile = "org/elasticsearch/compute/data/sort/IntBucketedSort.java"
}
template {
it.properties = longProperties
it.inputFile = bucketedSortInputFile
it.outputFile = "org/elasticsearch/compute/data/sort/LongBucketedSort.java"
}
template {
it.properties = doubleProperties
it.inputFile = bucketedSortInputFile
it.outputFile = "org/elasticsearch/compute/data/sort/DoubleBucketedSort.java"
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 2233349

Please sign in to comment.