diff --git a/docs/changelog/108459.yaml b/docs/changelog/108459.yaml new file mode 100644 index 0000000000000..5e05797f284be --- /dev/null +++ b/docs/changelog/108459.yaml @@ -0,0 +1,6 @@ +pr: 108459 +summary: Do not use global ordinals strategy if the leaf reader context cannot be + obtained +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetCollector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetCollector.java index 18086748d6fe0..bd80e362f2f71 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetCollector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/FrequentItemSetCollector.java @@ -177,7 +177,8 @@ FrequentItemSet toFrequentItemSet(List fields) throws IOException { int pos = items.nextSetBit(0); while (pos > 0) { Tuple item = transactionStore.getItem(topItemIds.getItemIdAt(pos - 1)); - assert item.v1() < fields.size() : "item id exceed number of given items, did you configure eclat correctly?"; + assert item.v1() < fields.size() + : "eclat error: item id (" + item.v1() + ") exceeds the number of given items (" + fields.size() + ")"; final Field field = fields.get(item.v1()); Object formattedValue = field.formatValue(item.v2()); String fieldName = fields.get(item.v1()).getName(); @@ -252,19 +253,20 @@ public FrequentItemSetCollector(TransactionStore transactionStore, TopItemIds to this.topItemIds = topItemIds; this.size = size; this.min = min; - queue = new FrequentItemSetPriorityQueue(size); - frequentItemsByCount = Maps.newMapWithExpectedSize(size / 10); + this.queue = new FrequentItemSetPriorityQueue(size); + this.frequentItemsByCount = Maps.newMapWithExpectedSize(size / 10); } public FrequentItemSet[] finalizeAndGetResults(List fields) throws IOException { - FrequentItemSet[] topFrequentItems = new FrequentItemSet[size()]; + FrequentItemSet[] topFrequentItems = new FrequentItemSet[queue.size()]; for (int i = topFrequentItems.length - 1; i >= 0; i--) { topFrequentItems[i] = queue.pop().toFrequentItemSet(fields); } return topFrequentItems; } - public int size() { + // Visible for testing + int size() { return queue.size(); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java index 72bfb6f1f0394..0f9555c77341f 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java @@ -86,17 +86,15 @@ protected ItemSetMapReduceAggregator( boolean rewriteBasedOnOrdinals = false; - if (ctx.isPresent()) { - for (var c : configsAndValueFilters) { - ItemSetMapReduceValueSource e = context.getValuesSourceRegistry() - .getAggregator(registryKey, c.v1()) - .build(c.v1(), id++, c.v2(), ordinalOptimization, ctx.get()); - if (e.getField().getName() != null) { - fields.add(e.getField()); - valueSources.add(e); - } - rewriteBasedOnOrdinals |= e.usesOrdinals(); + for (var c : configsAndValueFilters) { + ItemSetMapReduceValueSource e = context.getValuesSourceRegistry() + .getAggregator(registryKey, c.v1()) + .build(c.v1(), id++, c.v2(), ordinalOptimization, ctx); + if (e.getField().getName() != null) { + fields.add(e.getField()); + valueSources.add(e); } + rewriteBasedOnOrdinals |= e.usesOrdinals(); } this.rewriteBasedOnOrdinals = rewriteBasedOnOrdinals; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceValueSource.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceValueSource.java index c9ec772eb3321..08adecd3fbce5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceValueSource.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceValueSource.java @@ -37,6 +37,7 @@ import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.Optional; /** * Interface to extract values from Lucene in order to feed it into the MapReducer. @@ -53,7 +54,7 @@ ItemSetMapReduceValueSource build( int id, IncludeExclude includeExclude, AbstractItemSetMapReducer.OrdinalOptimization ordinalOptimization, - LeafReaderContext ctx + Optional ctx ) throws IOException; } @@ -345,20 +346,21 @@ public KeywordValueSource( int id, IncludeExclude includeExclude, AbstractItemSetMapReducer.OrdinalOptimization ordinalOptimization, - LeafReaderContext ctx + Optional ctx ) throws IOException { super(config, id, ValueFormatter.BYTES_REF); if (AbstractItemSetMapReducer.OrdinalOptimization.GLOBAL_ORDINALS.equals(ordinalOptimization) && config.getValuesSource() instanceof Bytes.WithOrdinals - && ((Bytes.WithOrdinals) config.getValuesSource()).supportsGlobalOrdinalsMapping()) { + && ((Bytes.WithOrdinals) config.getValuesSource()).supportsGlobalOrdinalsMapping() + && ctx.isPresent()) { logger.debug("Use ordinals for field [{}]", config.fieldContext().field()); this.executionStrategy = new GlobalOrdinalsStrategy( getField(), (Bytes.WithOrdinals) config.getValuesSource(), includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(config.format()), - ctx + ctx.get() ); } else { this.executionStrategy = new MapStrategy( @@ -394,7 +396,7 @@ public NumericValueSource( int id, IncludeExclude includeExclude, AbstractItemSetMapReducer.OrdinalOptimization unusedOrdinalOptimization, - LeafReaderContext unusedCtx + Optional unusedCtx ) { super(config, id, ValueFormatter.LONG); this.source = (Numeric) config.getValuesSource();