From d4a9277734b03f50ac2fe7f7cc311468c947578f Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 7 Dec 2023 11:24:18 +0000 Subject: [PATCH] [ML] Fix frequent_item_sets aggregation on empty index Previously the frequent_item_sets aggregation would fail with an internal server error if run against an empty index. This change makes it return empty output, as expected. Fixes #103067 --- .../mr/ItemSetMapReduceAggregator.java | 25 +++++----- .../test/ml/frequent_item_sets_agg.yml | 47 +++++++++++++++++++ 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java index 7afe6265f61d7..72bfb6f1f0394 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java @@ -42,6 +42,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.BiConsumer; public abstract class ItemSetMapReduceAggregator< @@ -76,7 +77,7 @@ protected ItemSetMapReduceAggregator( List valueSources = new ArrayList<>(); List fields = new ArrayList<>(); IndexSearcher contextSearcher = context.searcher(); - LeafReaderContext ctx = getLeafReaderForOrdinals(context); + Optional ctx = getLeafReaderForOrdinals(context); int id = 0; this.weightDocumentFilter = documentFilter != null @@ -85,15 +86,17 @@ protected ItemSetMapReduceAggregator( boolean rewriteBasedOnOrdinals = false; - for (var c : configsAndValueFilters) { - ItemSetMapReduceValueSource e = context.getValuesSourceRegistry() - .getAggregator(registryKey, c.v1()) - .build(c.v1(), id++, c.v2(), ordinalOptimization, ctx); - if (e.getField().getName() != null) { - fields.add(e.getField()); - valueSources.add(e); + if (ctx.isPresent()) { + for (var c : configsAndValueFilters) { + ItemSetMapReduceValueSource e = context.getValuesSourceRegistry() + .getAggregator(registryKey, c.v1()) + .build(c.v1(), id++, c.v2(), ordinalOptimization, ctx.get()); + if (e.getField().getName() != null) { + fields.add(e.getField()); + valueSources.add(e); + } + rewriteBasedOnOrdinals |= e.usesOrdinals(); } - rewriteBasedOnOrdinals |= e.usesOrdinals(); } this.rewriteBasedOnOrdinals = rewriteBasedOnOrdinals; @@ -220,8 +223,8 @@ private InternalAggregation buildAggregation(long owningBucketOrdinal) throws IO return new InternalItemSetMapReduceAggregation<>(name, metadata(), mapReducer, context, null, fields, profiling); } - private static LeafReaderContext getLeafReaderForOrdinals(AggregationContext context) { + private static Optional getLeafReaderForOrdinals(AggregationContext context) { IndexReader reader = context.searcher().getIndexReader(); - return reader.leaves().get(0); + return reader.leaves().stream().findFirst(); } } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml index f5244d271abed..ec04840d26644 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml @@ -540,3 +540,50 @@ setup: - match: { aggregations.fi.buckets.1.doc_count: 4 } - match: { aggregations.fi.buckets.1.support: 0.4 } - match: { aggregations.fi.buckets.1.key.error_message: ["engine overheated"] } + +--- +"Test frequent items on empty index": + - skip: + features: + - "headers" + - "allowed_warnings" + + - do: + headers: + Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser + indices.create: + index: empty_store + body: + mappings: + properties: + features: + type: keyword + error_message: + type: keyword + timestamp: + type: date + geo_point: + type: geo_point + histogram: + type: histogram + + - do: + search: + index: empty_store + body: > + { + "size": 0, + "aggs": { + "fi": { + "frequent_item_sets": { + "minimum_set_size": 3, + "minimum_support": 0.3, + "fields": [ + {"field": "features"}, + {"field": "error_message"} + ] + } + } + } + } + - length: { aggregations.fi.buckets: 0 }