Skip to content

Commit

Permalink
[Backport 2.x] Apply fast date histogram optimization at the segment …
Browse files Browse the repository at this point in the history
…level (opensearch-project#12279)

* Apply fast date histogram optimization at the segment level (opensearch-project#12073)

---------

Signed-off-by: bowenlan-amzn <[email protected]>
(cherry picked from commit 9a0a69f)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>

* Fix: reset the filter built at segment level for date histogram optimization (opensearch-project#12267)


---------

Signed-off-by: bowenlan-amzn <[email protected]>

---------

Signed-off-by: bowenlan-amzn <[email protected]>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
bowenlan-amzn and github-actions[bot] authored Feb 9, 2024
1 parent 644416f commit 25c2fde
Show file tree
Hide file tree
Showing 9 changed files with 728 additions and 179 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,9 @@ public void setupSuiteScopeCluster() throws Exception {
indexDoc(2, 15, 3), // date: Feb 15, dates: Feb 15, Mar 16
indexDoc(3, 2, 4), // date: Mar 2, dates: Mar 2, Apr 3
indexDoc(3, 15, 5), // date: Mar 15, dates: Mar 15, Apr 16
indexDoc(3, 23, 6)
indexDoc(3, 23, 6) // date: Mar 23, dates: Mar 23, Apr 24
)
); // date: Mar 23, dates: Mar 23, Apr 24
);
indexRandom(true, builders);
ensureSearchable();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.aggregations.bucket;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.opensearch.action.index.IndexRequestBuilder;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval;
import org.opensearch.search.aggregations.bucket.histogram.Histogram;
import org.opensearch.test.OpenSearchIntegTestCase;
import org.opensearch.test.ParameterizedDynamicSettingsOpenSearchIntegTestCase;

import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;

@OpenSearchIntegTestCase.SuiteScopeTestCase
public class FilterRewriteIT extends ParameterizedDynamicSettingsOpenSearchIntegTestCase {

// simulate segment level match all
private static final QueryBuilder QUERY = QueryBuilders.termQuery("match", true);
private static final Map<String, Long> expected = new HashMap<>();

public FilterRewriteIT(Settings dynamicSettings) {
super(dynamicSettings);
}

@ParametersFactory
public static Collection<Object[]> parameters() {
return Arrays.asList(
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() },
new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() }
);
}

@Override
protected void setupSuiteScopeCluster() throws Exception {
assertAcked(client().admin().indices().prepareCreate("idx").get());

final int segmentCount = randomIntBetween(2, 10);
final Set<Long> longTerms = new HashSet();

final Map<String, Integer> dateTerms = new HashMap<>();
for (int i = 0; i < segmentCount; i++) {
final List<IndexRequestBuilder> indexRequests = new ArrayList<>();

long longTerm;
do {
longTerm = randomInt(segmentCount * 2);
} while (!longTerms.add(longTerm));
ZonedDateTime time = ZonedDateTime.of(2024, 1, ((int) longTerm % 20) + 1, 0, 0, 0, 0, ZoneOffset.UTC);
String dateTerm = DateFormatter.forPattern("yyyy-MM-dd").format(time);

final int frequency = randomBoolean() ? 1 : randomIntBetween(2, 20);
for (int j = 0; j < frequency; j++) {
indexRequests.add(
client().prepareIndex("idx")
.setSource(jsonBuilder().startObject().field("date", dateTerm).field("match", true).endObject())
);
}
expected.put(dateTerm + "T00:00:00.000Z", (long) frequency);

indexRandom(true, false, indexRequests);
}

ensureSearchable();
}

public void testMinDocCountOnDateHistogram() throws Exception {
final SearchResponse allResponse = client().prepareSearch("idx")
.setSize(0)
.setQuery(QUERY)
.addAggregation(dateHistogram("histo").field("date").dateHistogramInterval(DateHistogramInterval.DAY).minDocCount(0))
.get();

final Histogram allHisto = allResponse.getAggregations().get("histo");
Map<String, Long> results = new HashMap<>();
allHisto.getBuckets().forEach(bucket -> results.put(bucket.getKeyAsString(), bucket.getDocCount()));

for (Map.Entry<String, Long> entry : expected.entrySet()) {
assertEquals(entry.getValue(), results.get(entry.getKey()));
}
}
}
Loading

0 comments on commit 25c2fde

Please sign in to comment.