diff --git a/CHANGELOG.md b/CHANGELOG.md index 78607d63da1ae..02fb585ebc526 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895)) - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023)) +- Performance improvement for date histogram aggregations without sub-aggregations ([#11083](https://github.com/opensearch-project/OpenSearch/pull/11083)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) - Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528)) - Change error message when per shard document limit is breached ([#11312](https://github.com/opensearch-project/OpenSearch/pull/11312)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index 3b16cdb13a22f..e7da9a0bc454c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -598,7 +598,6 @@ setup: - match: { aggregations.histo.buckets.0.doc_count: 2 } - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator } - match: { profile.shards.0.aggregations.0.description: histo } - - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } - match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 } --- diff --git a/server/src/main/java/org/opensearch/common/Rounding.java b/server/src/main/java/org/opensearch/common/Rounding.java index a278634384cb9..b6509a57761b1 100644 --- a/server/src/main/java/org/opensearch/common/Rounding.java +++ b/server/src/main/java/org/opensearch/common/Rounding.java @@ -55,6 +55,7 @@ import java.time.OffsetDateTime; import java.time.ZoneId; import java.time.ZoneOffset; +import java.time.format.TextStyle; import java.time.temporal.ChronoField; import java.time.temporal.ChronoUnit; import java.time.temporal.IsoFields; @@ -65,6 +66,7 @@ import java.util.List; import java.util.Locale; import java.util.Objects; +import java.util.OptionalLong; import java.util.concurrent.TimeUnit; /** @@ -1383,4 +1385,40 @@ public static Rounding read(StreamInput in) throws IOException { throw new OpenSearchException("unknown rounding id [" + id + "]"); } } + + /** + * Extracts the interval value from the {@link Rounding} instance + * @param rounding {@link Rounding} instance + * @return the interval value from the {@link Rounding} instance or {@code OptionalLong.empty()} + * if the interval is not available + */ + public static OptionalLong getInterval(Rounding rounding) { + long interval = 0; + + if (rounding instanceof TimeUnitRounding) { + interval = (((TimeUnitRounding) rounding).unit).extraLocalOffsetLookup(); + if (!isUTCTimeZone(((TimeUnitRounding) rounding).timeZone)) { + // Fast filter aggregation cannot be used if it needs time zone rounding + return OptionalLong.empty(); + } + } else if (rounding instanceof TimeIntervalRounding) { + interval = ((TimeIntervalRounding) rounding).interval; + if (!isUTCTimeZone(((TimeIntervalRounding) rounding).timeZone)) { + // Fast filter aggregation cannot be used if it needs time zone rounding + return OptionalLong.empty(); + } + } else { + return OptionalLong.empty(); + } + + return OptionalLong.of(interval); + } + + /** + * Helper function for checking if the time zone requested for date histogram + * aggregation is utc or not + */ + private static boolean isUTCTimeZone(final ZoneId zoneId) { + return "Z".equals(zoneId.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java index 4c374f02bba52..f133470a8fc43 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java @@ -414,6 +414,16 @@ public long parse(String value) { return resolution.convert(DateFormatters.from(dateTimeFormatter().parse(value), dateTimeFormatter().locale()).toInstant()); } + public long convertNanosToMillis(long nanoSecondsSinceEpoch) { + if (resolution.numericType.equals(NumericType.DATE_NANOSECONDS)) return DateUtils.toMilliSeconds(nanoSecondsSinceEpoch); + return nanoSecondsSinceEpoch; + } + + public long convertRoundedMillisToNanos(long milliSecondsSinceEpoch) { + if (resolution.numericType.equals(NumericType.DATE_NANOSECONDS)) return DateUtils.toNanoSeconds(milliSecondsSinceEpoch); + return milliSecondsSinceEpoch; + } + @Override public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { DateFormatter defaultFormatter = dateTimeFormatter(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index b4f1e78f77aaf..a71c15d551927 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -34,6 +34,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.CollectionUtil; import org.opensearch.common.Rounding; import org.opensearch.common.Rounding.Prepared; @@ -41,6 +42,7 @@ import org.opensearch.common.util.IntArray; import org.opensearch.common.util.LongArray; import org.opensearch.core.common.util.ByteArray; +import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -125,9 +127,13 @@ static AutoDateHistogramAggregator build( * {@link MergingBucketsDeferringCollector#mergeBuckets(long[])}. */ private MergingBucketsDeferringCollector deferringCollector; + private final Weight[] filters; + private final DateFieldMapper.DateFieldType fieldType; protected final RoundingInfo[] roundingInfos; protected final int targetBuckets; + protected int roundingIdx; + protected Rounding.Prepared preparedRounding; private AutoDateHistogramAggregator( String name, @@ -148,8 +154,51 @@ private AutoDateHistogramAggregator( this.formatter = valuesSourceConfig.format(); this.roundingInfos = roundingInfos; this.roundingPreparer = roundingPreparer; + this.preparedRounding = prepareRounding(0); + + FilterRewriteHelper.FilterContext filterContext = FilterRewriteHelper.buildFastFilterContext( + parent(), + subAggregators.length, + context, + b -> getMinimumRounding(b[0], b[1]), + // Passing prepared rounding as supplier to ensure the correct prepared + // rounding is set as it is done during getMinimumRounding + () -> preparedRounding, + valuesSourceConfig, + fc -> FilterRewriteHelper.getAggregationBounds(context, fc.field()) + ); + if (filterContext != null) { + fieldType = filterContext.fieldType; + filters = filterContext.filters; + } else { + fieldType = null; + filters = null; + } } + private Rounding getMinimumRounding(final long low, final long high) { + // max - min / targetBuckets = bestDuration + // find the right innerInterval this bestDuration belongs to + // since we cannot exceed targetBuckets, bestDuration should go up, + // so the right innerInterval should be an upper bound + long bestDuration = (high - low) / targetBuckets; + while (roundingIdx < roundingInfos.length - 1) { + final RoundingInfo curRoundingInfo = roundingInfos[roundingIdx]; + final int temp = curRoundingInfo.innerIntervals[curRoundingInfo.innerIntervals.length - 1]; + // If the interval duration is covered by the maximum inner interval, + // we can start with this outer interval for creating the buckets + if (bestDuration <= temp * curRoundingInfo.roughEstimateDurationMillis) { + break; + } + roundingIdx++; + } + + preparedRounding = prepareRounding(roundingIdx); + return roundingInfos[roundingIdx].rounding; + } + + protected abstract LongKeyedBucketOrds getBucketOrds(); + @Override public final ScoreMode scoreMode() { if (valuesSource != null && valuesSource.needsScores()) { @@ -176,7 +225,32 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBuc if (valuesSource == null) { return LeafBucketCollector.NO_OP_COLLECTOR; } - return getLeafCollector(valuesSource.longValues(ctx), sub); + + final SortedNumericDocValues values = valuesSource.longValues(ctx); + final LeafBucketCollector iteratingCollector = getLeafCollector(values, sub); + + // Need to be declared as final and array for usage within the + // LeafBucketCollectorBase subclass below + final boolean[] useOpt = new boolean[1]; + useOpt[0] = filters != null; + + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long owningBucketOrd) throws IOException { + // Try fast filter aggregation if the filters have been created + // Skip if tried before and gave incorrect/incomplete results + if (useOpt[0]) { + useOpt[0] = FilterRewriteHelper.tryFastFilterAggregation(ctx, filters, fieldType, (key, count) -> { + incrementBucketDocCount( + FilterRewriteHelper.getBucketOrd(getBucketOrds().add(owningBucketOrd, preparedRounding.round(key))), + count + ); + }); + } + + iteratingCollector.collect(doc, owningBucketOrd); + } + }; } protected final InternalAggregation[] buildAggregations( @@ -247,8 +321,6 @@ protected final void merge(long[] mergeMap, long newNumBuckets) { * @opensearch.internal */ private static class FromSingle extends AutoDateHistogramAggregator { - private int roundingIdx; - private Rounding.Prepared preparedRounding; /** * Map from value to bucket ordinals. *
@@ -286,10 +358,14 @@ private static class FromSingle extends AutoDateHistogramAggregator {
metadata
);
- preparedRounding = prepareRounding(0);
bucketOrds = new LongKeyedBucketOrds.FromSingle(context.bigArrays());
}
+ @Override
+ protected LongKeyedBucketOrds getBucketOrds() {
+ return bucketOrds;
+ }
+
@Override
protected LeafBucketCollector getLeafCollector(SortedNumericDocValues values, LeafBucketCollector sub) throws IOException {
return new LeafBucketCollectorBase(sub, values) {
@@ -507,6 +583,11 @@ private static class FromMany extends AutoDateHistogramAggregator {
liveBucketCountUnderestimate = context.bigArrays().newIntArray(1, true);
}
+ @Override
+ protected LongKeyedBucketOrds getBucketOrds() {
+ return bucketOrds;
+ }
+
@Override
protected LeafBucketCollector getLeafCollector(SortedNumericDocValues values, LeafBucketCollector sub) throws IOException {
return new LeafBucketCollectorBase(sub, values) {
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java
index f602eea7a9b12..8437e1dce9fe0 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java
@@ -34,10 +34,12 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Weight;
import org.apache.lucene.util.CollectionUtil;
import org.opensearch.common.Nullable;
import org.opensearch.common.Rounding;
import org.opensearch.common.lease.Releasables;
+import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.Aggregator;
import org.opensearch.search.aggregations.AggregatorFactories;
@@ -48,6 +50,7 @@
import org.opensearch.search.aggregations.LeafBucketCollectorBase;
import org.opensearch.search.aggregations.bucket.BucketsAggregator;
import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
+import org.opensearch.search.aggregations.support.FieldContext;
import org.opensearch.search.aggregations.support.ValuesSource;
import org.opensearch.search.aggregations.support.ValuesSourceConfig;
import org.opensearch.search.internal.SearchContext;
@@ -66,7 +69,6 @@
* @opensearch.internal
*/
class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator {
-
private final ValuesSource.Numeric valuesSource;
private final DocValueFormat formatter;
private final Rounding rounding;
@@ -76,12 +78,12 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg
private final Rounding.Prepared preparedRounding;
private final BucketOrder order;
private final boolean keyed;
-
private final long minDocCount;
private final LongBounds extendedBounds;
private final LongBounds hardBounds;
-
+ private final Weight[] filters;
private final LongKeyedBucketOrds bucketOrds;
+ private final DateFieldMapper.DateFieldType fieldType;
DateHistogramAggregator(
String name,
@@ -99,7 +101,6 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg
CardinalityUpperBound cardinality,
Map