From 52ffa161278c45f4ffffd3b4908a2d2a141f37f3 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:12:48 +0530 Subject: [PATCH 01/17] [Star tree][Bug] Fix for derived metrics (#15640) (#15686) --------- (cherry picked from commit 71a771e938f5faf5f6d2d00e0ba27e99e170ed88) Signed-off-by: Bharathwaj G Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../Composite99DocValuesReader.java | 2 +- .../index/compositeindex/datacube/Metric.java | 16 +++++ .../startree/builder/BaseStarTreeBuilder.java | 7 +- .../builder/OffHeapStarTreeBuilder.java | 2 +- .../builder/OnHeapStarTreeBuilder.java | 2 +- .../fileformats/meta/StarTreeMetadata.java | 29 ++++++-- .../startree/index/StarTreeValues.java | 2 +- .../index/mapper/NumberFieldMapper.java | 4 +- .../index/mapper/StarTreeMapper.java | 6 +- .../StarTreeDocValuesFormatTests.java | 69 ++++++++++++++++--- .../datacube/startree/StarTreeTestUtils.java | 40 +++++++++-- .../builder/AbstractStarTreeBuilderTests.java | 28 +++++--- 12 files changed, 163 insertions(+), 44 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java index ca55e8e4573ab..7901336151c8e 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java @@ -164,7 +164,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r // adding metric fields for (Metric metric : starTreeMetadata.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { fields.add( fullyQualifiedFieldNameForStarTreeMetricsDocValues( compositeFieldName, diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java index 9accb0201170a..be16f1e9886cd 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java @@ -13,6 +13,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -23,10 +24,18 @@ public class Metric implements ToXContent { private final String field; private final List metrics; + private final List baseMetrics; public Metric(String field, List metrics) { this.field = field; this.metrics = metrics; + this.baseMetrics = new ArrayList<>(); + for (MetricStat metricStat : metrics) { + if (metricStat.isDerivedMetric()) { + continue; + } + baseMetrics.add(metricStat); + } } public String getField() { @@ -37,6 +46,13 @@ public List getMetrics() { return metrics; } + /** + * Returns only the base metrics + */ + public List getBaseMetrics() { + return baseMetrics; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index ba4cb792c00df..a1d638616f2aa 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -150,10 +150,7 @@ public List generateMetricAggregatorInfos(MapperService ma metricAggregatorInfos.add(metricAggregatorInfo); continue; } - for (MetricStat metricStat : metric.getMetrics()) { - if (metricStat.isDerivedMetric()) { - continue; - } + for (MetricStat metricStat : metric.getBaseMetrics()) { FieldValueConverter fieldValueConverter; Mapper fieldMapper = mapperService.documentMapper().mappers().getMapper(metric.getField()); if (fieldMapper instanceof FieldMapper && ((FieldMapper) fieldMapper).fieldType() instanceof FieldValueConverter) { @@ -185,7 +182,7 @@ public List getMetricReaders(SegmentWriteState stat List metricReaders = new ArrayList<>(); for (Metric metric : this.starTreeField.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { SequentialDocValuesIterator metricReader; FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); if (metricStat.equals(MetricStat.DOC_COUNT)) { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java index 62fda3e56d289..1613b7c5a3ac0 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java @@ -158,7 +158,7 @@ Iterator mergeStarTrees(List starTreeValuesSub List metricReaders = new ArrayList<>(); // get doc id set iterators for metrics for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeValues.getStarTreeField().getName(), metric.getField(), diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index ec9475caf7d6d..1a5c906ad413b 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -144,7 +144,7 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal List metricReaders = new ArrayList<>(); // get doc id set iterators for metrics for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeValues.getStarTreeField().getName(), metric.getField(), diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java index e7bb32282ece7..7352c215ee390 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -220,16 +220,37 @@ private int readMetricsCount() throws IOException { private List readMetricEntries() throws IOException { int metricCount = readMetricsCount(); - Map starTreeMetricMap = new LinkedHashMap<>(); + Map> starTreeMetricStatMap = new LinkedHashMap<>(); for (int i = 0; i < metricCount; i++) { String metricName = meta.readString(); int metricStatOrdinal = meta.readVInt(); MetricStat metricStat = MetricStat.fromMetricOrdinal(metricStatOrdinal); - Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); - metric.getMetrics().add(metricStat); + List metricStats = starTreeMetricStatMap.computeIfAbsent(metricName, field -> new ArrayList<>()); + metricStats.add(metricStat); } + List starTreeMetricMap = new ArrayList<>(); + for (Map.Entry> metricStatsEntry : starTreeMetricStatMap.entrySet()) { + addEligibleDerivedMetrics(metricStatsEntry.getValue()); + starTreeMetricMap.add(new Metric(metricStatsEntry.getKey(), metricStatsEntry.getValue())); - return new ArrayList<>(starTreeMetricMap.values()); + } + return starTreeMetricMap; + } + + /** + * Add derived metrics if all associated base metrics are present + */ + private void addEligibleDerivedMetrics(List metricStatsList) { + Set metricStatsSet = new HashSet<>(metricStatsList); + for (MetricStat metric : MetricStat.values()) { + if (metric.isDerivedMetric() && !metricStatsSet.contains(metric)) { + List sourceMetrics = metric.getBaseMetrics(); + if (metricStatsSet.containsAll(sourceMetrics)) { + metricStatsList.add(metric); + metricStatsSet.add(metric); + } + } + } } private int readSegmentAggregatedDocCount() throws IOException { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java index 7d5f5ba02b9f8..a34bbbe9ee738 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java @@ -171,7 +171,7 @@ public StarTreeValues( // get doc id set iterators for metrics for (Metric metric : starTreeMetadata.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeField.getName(), metric.getField(), diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index eb0694edc70ba..43e975f95757b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -653,9 +653,7 @@ public byte[] encodePoint(Number value) { @Override public double toDoubleValue(long value) { - byte[] bytes = new byte[8]; - NumericUtils.longToSortableBytes(value, bytes, 0); - return NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(bytes, 0)); + return objectToDouble(value); } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index 9e2e7ef3c3889..52dab17e0b0bb 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -287,11 +287,7 @@ private List buildMetrics(String fieldName, Map map, Map } int numBaseMetrics = 0; for (Metric metric : metrics) { - for (MetricStat metricStat : metric.getMetrics()) { - if (metricStat.isDerivedMetric() == false) { - numBaseMetrics++; - } - } + numBaseMetrics += metric.getBaseMetrics().size(); } if (numBaseMetrics > context.getSettings() .getAsInt( diff --git a/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java index 0c6d21d28cc8a..1c267c67e60ed 100644 --- a/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java @@ -119,23 +119,23 @@ public void testStarTreeDocValues() throws IOException { Document doc = new Document(); doc.add(new SortedNumericDocValuesField("sndv", 1)); doc.add(new SortedNumericDocValuesField("dv", 1)); - doc.add(new SortedNumericDocValuesField("field", 1)); + doc.add(new SortedNumericDocValuesField("field", -1)); iw.addDocument(doc); doc = new Document(); doc.add(new SortedNumericDocValuesField("sndv", 1)); doc.add(new SortedNumericDocValuesField("dv", 1)); - doc.add(new SortedNumericDocValuesField("field", 1)); + doc.add(new SortedNumericDocValuesField("field", -1)); iw.addDocument(doc); doc = new Document(); iw.forceMerge(1); doc.add(new SortedNumericDocValuesField("sndv", 2)); doc.add(new SortedNumericDocValuesField("dv", 2)); - doc.add(new SortedNumericDocValuesField("field", 2)); + doc.add(new SortedNumericDocValuesField("field", -2)); iw.addDocument(doc); doc = new Document(); doc.add(new SortedNumericDocValuesField("sndv", 2)); doc.add(new SortedNumericDocValuesField("dv", 2)); - doc.add(new SortedNumericDocValuesField("field", 2)); + doc.add(new SortedNumericDocValuesField("field", -2)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); @@ -144,11 +144,39 @@ public void testStarTreeDocValues() throws IOException { TestUtil.checkReader(ir); assertEquals(1, ir.leaves().size()); + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree docuements + /** + * sndv dv | [ sum, value_count, min, max[field]] , [ sum, value_count, min, max[sndv]], doc_count + * [1, 1] | [-2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0] + * [2, 2] | [-4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0] + * [null, 1] | [-2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0] + * [null, 2] | [-4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0] + */ StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[4]; - expectedStarTreeDocuments[0] = new StarTreeDocument(new Long[] { 1L, 1L }, new Double[] { 2.0, 2.0, 2.0 }); - expectedStarTreeDocuments[1] = new StarTreeDocument(new Long[] { 2L, 2L }, new Double[] { 4.0, 2.0, 4.0 }); - expectedStarTreeDocuments[2] = new StarTreeDocument(new Long[] { null, 1L }, new Double[] { 2.0, 2.0, 2.0 }); - expectedStarTreeDocuments[3] = new StarTreeDocument(new Long[] { null, 2L }, new Double[] { 4.0, 2.0, 4.0 }); + expectedStarTreeDocuments[0] = new StarTreeDocument( + new Long[] { 1L, 1L }, + new Double[] { -2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0 } + ); + expectedStarTreeDocuments[1] = new StarTreeDocument( + new Long[] { 2L, 2L }, + new Double[] { -4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0 } + ); + expectedStarTreeDocuments[2] = new StarTreeDocument( + new Long[] { null, 1L }, + new Double[] { -2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0 } + ); + expectedStarTreeDocuments[3] = new StarTreeDocument( + new Long[] { null, 2L }, + new Double[] { -4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0 } + ); for (LeafReaderContext context : ir.leaves()) { SegmentReader reader = Lucene.segmentReader(context.reader()); @@ -159,7 +187,17 @@ public void testStarTreeDocValues() throws IOException { StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( List.of(starTreeValues), - List.of(NumberFieldMapper.NumberType.DOUBLE, NumberFieldMapper.NumberType.LONG, NumberFieldMapper.NumberType.LONG), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), reader.maxDoc() ); assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); @@ -190,6 +228,19 @@ private XContentBuilder getExpandedMapping() throws IOException { b.startArray("stats"); b.value("sum"); b.value("value_count"); + b.value("avg"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", "sndv"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("avg"); + b.value("min"); + b.value("max"); b.endArray(); b.endObject(); b.endArray(); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java index f3e111bf6caa6..b7395b993f67b 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java @@ -61,6 +61,9 @@ public static StarTreeDocument[] getSegmentsStarTreeDocuments( // get doc id set iterators for metrics for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { for (MetricStat metricStat : metric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( starTreeValues.getStarTreeField().getName(), metric.getField(), @@ -125,18 +128,18 @@ public static void assertStarTreeDocuments(StarTreeDocument[] starTreeDocuments, assertNotNull(resultStarTreeDocument.dimensions); assertNotNull(resultStarTreeDocument.metrics); - assertEquals(resultStarTreeDocument.dimensions.length, expectedStarTreeDocument.dimensions.length); - assertEquals(resultStarTreeDocument.metrics.length, expectedStarTreeDocument.metrics.length); + assertEquals(expectedStarTreeDocument.dimensions.length, resultStarTreeDocument.dimensions.length); + assertEquals(expectedStarTreeDocument.metrics.length, resultStarTreeDocument.metrics.length); for (int di = 0; di < resultStarTreeDocument.dimensions.length; di++) { - assertEquals(resultStarTreeDocument.dimensions[di], expectedStarTreeDocument.dimensions[di]); + assertEquals(expectedStarTreeDocument.dimensions[di], resultStarTreeDocument.dimensions[di]); } for (int mi = 0; mi < resultStarTreeDocument.metrics.length; mi++) { if (expectedStarTreeDocument.metrics[mi] instanceof Long) { - assertEquals(resultStarTreeDocument.metrics[mi], ((Long) expectedStarTreeDocument.metrics[mi]).doubleValue()); + assertEquals(((Long) expectedStarTreeDocument.metrics[mi]).doubleValue(), resultStarTreeDocument.metrics[mi]); } else { - assertEquals(resultStarTreeDocument.metrics[mi], expectedStarTreeDocument.metrics[mi]); + assertEquals(expectedStarTreeDocument.metrics[mi], resultStarTreeDocument.metrics[mi]); } } } @@ -267,9 +270,34 @@ public static void assertStarTreeMetadata(StarTreeMetadata expectedStarTreeMetad Metric expectedMetric = expectedStarTreeMetadata.getMetrics().get(i); Metric resultMetric = resultStarTreeMetadata.getMetrics().get(i); assertEquals(expectedMetric.getField(), resultMetric.getField()); + List metricStats = new ArrayList<>(); + for (MetricStat metricStat : expectedMetric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } + metricStats.add(metricStat); + } + Metric expectedMetricWithoutDerivedMetrics = new Metric(expectedMetric.getField(), metricStats); + metricStats = new ArrayList<>(); + for (MetricStat metricStat : resultMetric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } + metricStats.add(metricStat); + } + Metric resultantMetricWithoutDerivedMetrics = new Metric(resultMetric.getField(), metricStats); + + // assert base metrics are in order in metadata + for (int j = 0; j < expectedMetricWithoutDerivedMetrics.getMetrics().size(); j++) { + assertEquals( + expectedMetricWithoutDerivedMetrics.getMetrics().get(j), + resultantMetricWithoutDerivedMetrics.getMetrics().get(j) + ); + } + // assert all metrics ( including derived metrics are present ) for (int j = 0; j < expectedMetric.getMetrics().size(); j++) { - assertEquals(expectedMetric.getMetrics().get(j), resultMetric.getMetrics().get(j)); + assertTrue(resultMetric.getMetrics().contains(expectedMetric.getMetrics().get(j))); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index ad54bda9a916e..65adc43ea8bea 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -205,9 +205,7 @@ private SegmentReadState getReadState(int numDocs, List dimensionFields, int numMetrics = 0; for (Metric metric : metrics) { - for (MetricStat metricStat : metric.getMetrics()) { - numMetrics++; - } + numMetrics += metric.getBaseMetrics().size(); } FieldInfo[] fields = new FieldInfo[dimensionFields.size() + numMetrics]; @@ -237,7 +235,7 @@ private SegmentReadState getReadState(int numDocs, List dimensionFields, } for (Metric metric : metrics) { - for (MetricStat metricStat : metric.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { fields[i] = new FieldInfo( fullyQualifiedFieldNameForStarTreeMetricsDocValues( compositeField.getName(), @@ -2047,7 +2045,7 @@ public void testFlushFlow() throws IOException { VERSION_CURRENT, builder.numStarTreeNodes, List.of("field1", "field3"), - List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT))), + List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG))), 6, builder.numStarTreeDocs, 1000, @@ -2138,7 +2136,7 @@ public void testFlushFlowDimsReverse() throws IOException { VERSION_CURRENT, builder.numStarTreeNodes, List.of("field1", "field3"), - List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT))), + List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG))), 6, builder.numStarTreeDocs, 1000, @@ -2270,8 +2268,9 @@ private StarTreeField getStarTreeFieldWithMultipleMetrics() { Dimension d2 = new NumericDimension("field3"); Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); Metric m2 = new Metric("field2", List.of(MetricStat.VALUE_COUNT)); + Metric m3 = new Metric("field2", List.of(MetricStat.AVG)); List dims = List.of(d1, d2); - List metrics = List.of(m1, m2); + List metrics = List.of(m1, m2, m3); StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(1000, new HashSet<>(), getBuildMode()); return new StarTreeField("sf", dims, metrics, c); } @@ -4069,12 +4068,19 @@ public void testMergeFlow() throws IOException { metricsWithField.add(i); } + List metricsListValueCount = new ArrayList<>(1000); + List metricsWithFieldValueCount = new ArrayList<>(1000); + for (int i = 0; i < 1000; i++) { + metricsListValueCount.add((long) i); + metricsWithFieldValueCount.add(i); + } + Dimension d1 = new NumericDimension("field1"); Dimension d2 = new NumericDimension("field3"); Dimension d3 = new NumericDimension("field5"); Dimension d4 = new NumericDimension("field8"); // Dimension d5 = new NumericDimension("field5"); - Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); + Metric m1 = new Metric("field2", List.of(MetricStat.SUM, MetricStat.AVG, MetricStat.VALUE_COUNT)); Metric m2 = new Metric("_doc_count", List.of(MetricStat.DOC_COUNT)); List dims = List.of(d1, d2, d3, d4); List metrics = List.of(m1, m2); @@ -4085,6 +4091,7 @@ public void testMergeFlow() throws IOException { SortedNumericDocValues d3sndv = getSortedNumericMock(dimList3, docsWithField3); SortedNumericDocValues d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues m2sndv = DocValues.emptySortedNumeric(); Map> dimDocIdSetIterators = Map.of( "field1", @@ -4100,6 +4107,8 @@ public void testMergeFlow() throws IOException { Map> metricDocIdSetIterators = Map.of( "sf_field2_sum_metric", () -> m1sndv, + "sf_field2_value_count_metric", + () -> valucountsndv, "sf__doc_count_doc_count_metric", () -> m2sndv ); @@ -4118,6 +4127,7 @@ public void testMergeFlow() throws IOException { SortedNumericDocValues f2d3sndv = getSortedNumericMock(dimList3, docsWithField3); SortedNumericDocValues f2d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues f2valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues f2m2sndv = DocValues.emptySortedNumeric(); Map> f2dimDocIdSetIterators = Map.of( "field1", @@ -4133,6 +4143,8 @@ public void testMergeFlow() throws IOException { Map> f2metricDocIdSetIterators = Map.of( "sf_field2_sum_metric", () -> f2m1sndv, + "sf_field2_value_count_metric", + () -> f2valucountsndv, "sf__doc_count_doc_count_metric", () -> f2m2sndv ); From af8b4e902cf4b0bd25a9590f0f813837df9bcc9d Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Wed, 4 Sep 2024 22:09:47 -0700 Subject: [PATCH 02/17] [Backport 2.x] [RW Separation] Add polling segment replication for search replicas (#15627) (#15717) --- CHANGELOG.md | 2 +- .../SearchReplicaReplicationIT.java | 85 +++++++++ .../org/opensearch/index/IndexModule.java | 54 +++++- .../org/opensearch/index/IndexService.java | 73 +++++++- .../index/seqno/ReplicationTracker.java | 11 +- .../opensearch/indices/IndicesService.java | 12 +- .../SegmentReplicationTargetService.java | 1 + .../replication/SegmentReplicator.java | 35 +++- .../main/java/org/opensearch/node/Node.java | 7 +- .../opensearch/index/IndexModuleTests.java | 3 +- .../opensearch/index/IndexServiceTests.java | 53 ++++++ .../replication/SegmentReplicatorTests.java | 163 ++++++++++++++++++ 12 files changed, 482 insertions(+), 17 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java create mode 100644 server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 07cbbbe020286..6713403a4e339 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Adding translog durability validation in index templates ([#15494](https://github.com/opensearch-project/OpenSearch/pull/15494)) - [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) - [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527)) -- [Reader Writer Separation] Add searchOnly replica routing configuration ([#15410](https://github.com/opensearch-project/OpenSearch/pull/15410)) +- [Reader Writer Separation] Add experimental search replica shard type to achieve reader writer separation ([#15237](https://github.com/opensearch-project/OpenSearch/pull/15237)) - Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290)) - [Remote Publication] Add remote download stats ([#15291](https://github.com/opensearch-project/OpenSearch/pull/15291))) - Add support to upload snapshot shard blobs with hashed prefix ([#15426](https://github.com/opensearch-project/OpenSearch/pull/15426)) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java new file mode 100644 index 0000000000000..a1b512c326ac5 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; +import org.junit.Before; + +import java.nio.file.Path; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SearchReplicaReplicationIT extends SegmentReplicationBaseIT { + + private static final String REPOSITORY_NAME = "test-remote-store-repo"; + protected Path absolutePath; + + private Boolean useRemoteStore; + + @Before + public void randomizeRemoteStoreEnabled() { + useRemoteStore = randomBoolean(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + if (useRemoteStore) { + if (absolutePath == null) { + absolutePath = randomRepoPath().toAbsolutePath(); + } + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(remoteStoreClusterSettings(REPOSITORY_NAME, absolutePath)) + .build(); + } + return super.nodeSettings(nodeOrdinal); + } + + @After + public void teardown() { + if (useRemoteStore) { + clusterAdmin().prepareCleanupRepository(REPOSITORY_NAME).get(); + } + } + + @Override + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build(); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, true).build(); + } + + public void testReplication() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primary = internalCluster().startDataOnlyNode(); + createIndex(INDEX_NAME); + ensureYellowAndNoInitializingShards(INDEX_NAME); + final String replica = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + final int docCount = 10; + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + waitForSearchableDocs(docCount, primary, replica); + } + +} diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 33897599fdd51..1725afcad72cf 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -73,6 +73,7 @@ import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; +import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexingOperationListener; import org.opensearch.index.shard.SearchOperationListener; import org.opensearch.index.similarity.SimilarityService; @@ -729,6 +730,56 @@ public IndexService newIndexService( Supplier clusterDefaultRefreshIntervalSupplier, RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings + ) throws IOException { + return newIndexService( + indexCreationContext, + environment, + xContentRegistry, + shardStoreDeleter, + circuitBreakerService, + bigArrays, + threadPool, + scriptService, + clusterService, + client, + indicesQueryCache, + mapperRegistry, + indicesFieldDataCache, + namedWriteableRegistry, + idFieldDataEnabled, + valuesSourceRegistry, + remoteDirectoryFactory, + translogFactorySupplier, + clusterDefaultRefreshIntervalSupplier, + recoverySettings, + remoteStoreSettings, + (s) -> {} + ); + } + + public IndexService newIndexService( + IndexService.IndexCreationContext indexCreationContext, + NodeEnvironment environment, + NamedXContentRegistry xContentRegistry, + IndexService.ShardStoreDeleter shardStoreDeleter, + CircuitBreakerService circuitBreakerService, + BigArrays bigArrays, + ThreadPool threadPool, + ScriptService scriptService, + ClusterService clusterService, + Client client, + IndicesQueryCache indicesQueryCache, + MapperRegistry mapperRegistry, + IndicesFieldDataCache indicesFieldDataCache, + NamedWriteableRegistry namedWriteableRegistry, + BooleanSupplier idFieldDataEnabled, + ValuesSourceRegistry valuesSourceRegistry, + IndexStorePlugin.DirectoryFactory remoteDirectoryFactory, + BiFunction translogFactorySupplier, + Supplier clusterDefaultRefreshIntervalSupplier, + RecoverySettings recoverySettings, + RemoteStoreSettings remoteStoreSettings, + Consumer replicator ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -789,7 +840,8 @@ public IndexService newIndexService( recoverySettings, remoteStoreSettings, fileCache, - compositeIndexSettings + compositeIndexSettings, + replicator ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 3d1bc2851069f..6e67c8f83ee4c 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -136,6 +136,7 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.common.collect.MapBuilder.newMapBuilder; +import static org.opensearch.common.util.FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING; import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater.indexHasRemoteStoreSettings; /** @@ -174,6 +175,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile AsyncTranslogFSync fsyncTask; private volatile AsyncGlobalCheckpointTask globalCheckpointTask; private volatile AsyncRetentionLeaseSyncTask retentionLeaseSyncTask; + private volatile AsyncReplicationTask asyncReplicationTask; // don't convert to Setting<> and register... we only set this in tests and register via a plugin private final String INDEX_TRANSLOG_RETENTION_CHECK_INTERVAL_SETTING = "index.translog.retention.check_interval"; @@ -194,6 +196,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private final RemoteStoreSettings remoteStoreSettings; private final FileCache fileCache; private final CompositeIndexSettings compositeIndexSettings; + private final Consumer replicator; public IndexService( IndexSettings indexSettings, @@ -231,7 +234,8 @@ public IndexService( RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings, FileCache fileCache, - CompositeIndexSettings compositeIndexSettings + CompositeIndexSettings compositeIndexSettings, + Consumer replicator ) { super(indexSettings); this.allowExpensiveQueries = allowExpensiveQueries; @@ -306,11 +310,15 @@ public IndexService( this.trimTranslogTask = new AsyncTrimTranslogTask(this); this.globalCheckpointTask = new AsyncGlobalCheckpointTask(this); this.retentionLeaseSyncTask = new AsyncRetentionLeaseSyncTask(this); + if (READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(indexSettings.getNodeSettings())) { + this.asyncReplicationTask = new AsyncReplicationTask(this); + } this.translogFactorySupplier = translogFactorySupplier; this.recoverySettings = recoverySettings; this.remoteStoreSettings = remoteStoreSettings; this.compositeIndexSettings = compositeIndexSettings; this.fileCache = fileCache; + this.replicator = replicator; updateFsyncTaskIfNecessary(); } @@ -387,7 +395,8 @@ public IndexService( recoverySettings, remoteStoreSettings, fileCache, - null + null, + (s) -> {} ); } @@ -463,7 +472,8 @@ public IndexService( recoverySettings, remoteStoreSettings, null, - null + null, + s -> {} ); } @@ -472,6 +482,11 @@ static boolean needsMapperService(IndexSettings indexSettings, IndexCreationCont && indexCreationContext == IndexCreationContext.CREATE_INDEX); // metadata verification needs a mapper service } + // visible for tests + AsyncReplicationTask getReplicationTask() { + return asyncReplicationTask; + } + /** * Context for index creation * @@ -1142,11 +1157,22 @@ public synchronized void updateMetadata(final IndexMetadata currentIndexMetadata } onRefreshIntervalChange(); updateFsyncTaskIfNecessary(); + if (READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(indexSettings.getNodeSettings())) { + updateReplicationTask(); + } } metadataListeners.forEach(c -> c.accept(newIndexMetadata)); } + private void updateReplicationTask() { + try { + asyncReplicationTask.close(); + } finally { + asyncReplicationTask = new AsyncReplicationTask(this); + } + } + /** * Called whenever the refresh interval changes. This can happen in 2 cases - * 1. {@code cluster.default.index.refresh_interval} cluster setting changes. The change would only happen for @@ -1411,6 +1437,47 @@ public String toString() { } } + final class AsyncReplicationTask extends BaseAsyncTask { + + AsyncReplicationTask(IndexService indexService) { + super(indexService, indexService.getRefreshInterval()); + } + + @Override + protected void runInternal() { + indexService.maybeSyncSegments(false); + } + + @Override + protected String getThreadPool() { + return ThreadPool.Names.GENERIC; + } + + @Override + public String toString() { + return "replication"; + } + + @Override + protected boolean mustReschedule() { + return indexSettings.isSegRepEnabledOrRemoteNode() && super.mustReschedule(); + } + } + + private void maybeSyncSegments(boolean force) { + if (getRefreshInterval().millis() > 0 || force) { + for (IndexShard shard : this.shards.values()) { + try { + if (shard.routingEntry().isSearchOnly() && shard.routingEntry().active()) { + replicator.accept(shard); + } + } catch (IndexShardClosedException | AlreadyClosedException ex) { + // do nothing + } + } + } + } + final class AsyncTrimTranslogTask extends BaseAsyncTask { AsyncTrimTranslogTask(IndexService indexService) { diff --git a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java index ee92e09698a98..9bffaedcbf482 100644 --- a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java @@ -1253,12 +1253,13 @@ public ReplicationCheckpoint getLatestReplicationCheckpoint() { return this.latestReplicationCheckpoint; } - private boolean isPrimaryRelocation(String allocationId) { + // skip any shard that is a relocating primary or search only replica (not tracked by primary) + private boolean shouldSkipReplicationTimer(String allocationId) { Optional shardRouting = routingTable.shards() .stream() .filter(routing -> routing.allocationId().getId().equals(allocationId)) .findAny(); - return shardRouting.isPresent() && shardRouting.get().primary(); + return shardRouting.isPresent() && (shardRouting.get().primary() || shardRouting.get().isSearchOnly()); } private void createReplicationLagTimers() { @@ -1270,7 +1271,7 @@ private void createReplicationLagTimers() { // it is possible for a shard to be in-sync but not yet removed from the checkpoints collection after a failover event. if (cps.inSync && replicationGroup.getUnavailableInSyncShards().contains(allocationId) == false - && isPrimaryRelocation(allocationId) == false + && shouldSkipReplicationTimer(allocationId) == false && latestReplicationCheckpoint.isAheadOf(cps.visibleReplicationCheckpoint) && (indexSettings.isSegRepLocalEnabled() == true || isShardOnRemoteEnabledNode.apply(routingTable.getByAllocationId(allocationId).currentNodeId()))) { @@ -1304,7 +1305,7 @@ public synchronized void startReplicationLagTimers(ReplicationCheckpoint checkpo final CheckpointState cps = e.getValue(); if (cps.inSync && replicationGroup.getUnavailableInSyncShards().contains(allocationId) == false - && isPrimaryRelocation(e.getKey()) == false + && shouldSkipReplicationTimer(e.getKey()) == false && latestReplicationCheckpoint.isAheadOf(cps.visibleReplicationCheckpoint) && cps.checkpointTimers.containsKey(latestReplicationCheckpoint)) { cps.checkpointTimers.get(latestReplicationCheckpoint).start(); @@ -1332,7 +1333,7 @@ public synchronized Set getSegmentReplicationStats entry -> entry.getKey().equals(this.shardAllocationId) == false && entry.getValue().inSync && replicationGroup.getUnavailableInSyncShards().contains(entry.getKey()) == false - && isPrimaryRelocation(entry.getKey()) == false + && shouldSkipReplicationTimer(entry.getKey()) == false /*Check if the current primary shard is migrating to remote and all the other shard copies of the same index still hasn't completely moved over to the remote enabled nodes. Ensures that: diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index f95b53e627567..1981e5c08d7c3 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -361,6 +361,7 @@ public class IndicesService extends AbstractLifecycleComponent private final SearchRequestStats searchRequestStats; private final FileCache fileCache; private final CompositeIndexSettings compositeIndexSettings; + private final Consumer replicator; @Override protected void doStart() { @@ -397,7 +398,8 @@ public IndicesService( CacheService cacheService, RemoteStoreSettings remoteStoreSettings, FileCache fileCache, - CompositeIndexSettings compositeIndexSettings + CompositeIndexSettings compositeIndexSettings, + Consumer replicator ) { this.settings = settings; this.threadPool = threadPool; @@ -506,6 +508,7 @@ protected void closeInternal() { this.remoteStoreSettings = remoteStoreSettings; this.compositeIndexSettings = compositeIndexSettings; this.fileCache = fileCache; + this.replicator = replicator; } public IndicesService( @@ -567,7 +570,8 @@ public IndicesService( cacheService, remoteStoreSettings, fileCache, - null + null, + (s) -> {} ); } @@ -629,6 +633,7 @@ public IndicesService( cacheService, remoteStoreSettings, null, + null, null ); } @@ -1046,7 +1051,8 @@ private synchronized IndexService createIndexService( translogFactorySupplier, this::getClusterDefaultRefreshInterval, this.recoverySettings, - this.remoteStoreSettings + this.remoteStoreSettings, + replicator ); } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java index a042e08c2a53f..8fee3f671ecc9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java @@ -157,6 +157,7 @@ public SegmentReplicationTargetService( ForceSyncRequest::new, new ForceSyncTransportRequestHandler() ); + replicator.setSourceFactory(sourceFactory); } @Override diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java index 417556c22636e..ad3bc1933208c 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java @@ -13,6 +13,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.index.CorruptIndexException; import org.opensearch.OpenSearchCorruptionException; +import org.opensearch.common.SetOnce; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.ConcurrentCollections; @@ -44,12 +45,44 @@ public class SegmentReplicator { private final Map completedReplications = ConcurrentCollections.newConcurrentMap(); private final ThreadPool threadPool; + private final SetOnce sourceFactory; + public SegmentReplicator(ThreadPool threadPool) { this.onGoingReplications = new ReplicationCollection<>(logger, threadPool); this.threadPool = threadPool; + this.sourceFactory = new SetOnce<>(); + } + + /** + * Starts a replication event for the given shard. + * @param shard - {@link IndexShard} replica shard + */ + public void startReplication(IndexShard shard) { + if (sourceFactory.get() == null) return; + startReplication( + shard, + shard.getLatestReplicationCheckpoint(), + sourceFactory.get().get(shard), + new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + logger.trace("Completed replication for {}", shard.shardId()); + } + + @Override + public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { + logger.error(() -> new ParameterizedMessage("Failed segment replication for {}", shard.shardId()), e); + if (sendShardFailure) { + shard.failShard("unrecoverable replication failure", e); + } + } + } + ); } - // TODO: Add public entrypoint for replication on an interval to be invoked via IndexService + void setSourceFactory(SegmentReplicationSourceFactory sourceFactory) { + this.sourceFactory.set(sourceFactory); + } /** * Start a round of replication and sync to at least the given checkpoint. diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 77aa49b91a2c3..42b84d8e79d88 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -893,6 +893,7 @@ protected Node( remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, settings); CacheModule cacheModule = new CacheModule(pluginsService.filterPlugins(CachePlugin.class), settings); CacheService cacheService = cacheModule.getCacheService(); + final SegmentReplicator segmentReplicator = new SegmentReplicator(threadPool); final IndicesService indicesService = new IndicesService( settings, pluginsService, @@ -922,7 +923,8 @@ protected Node( cacheService, remoteStoreSettings, fileCache, - compositeIndexSettings + compositeIndexSettings, + segmentReplicator::startReplication ); final IngestService ingestService = new IngestService( @@ -1424,7 +1426,7 @@ protected Node( new SegmentReplicationSourceFactory(transportService, recoverySettings, clusterService), indicesService, clusterService, - new SegmentReplicator(threadPool) + segmentReplicator ) ); b.bind(SegmentReplicationSourceService.class) @@ -1459,6 +1461,7 @@ protected Node( b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); + b.bind(SegmentReplicator.class).toInstance(segmentReplicator); }); injector = modules.createInjector(); diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index d656285deb2bf..3138297eb8ebb 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -263,7 +263,8 @@ private IndexService newIndexService(IndexModule module) throws IOException { translogFactorySupplier, () -> IndexSettings.DEFAULT_REFRESH_INTERVAL, DefaultRecoverySettings.INSTANCE, - DefaultRemoteStoreSettings.INSTANCE + DefaultRemoteStoreSettings.INSTANCE, + s -> {} ); } diff --git a/server/src/test/java/org/opensearch/index/IndexServiceTests.java b/server/src/test/java/org/opensearch/index/IndexServiceTests.java index 00c8b52d30c55..f0bad0e653453 100644 --- a/server/src/test/java/org/opensearch/index/IndexServiceTests.java +++ b/server/src/test/java/org/opensearch/index/IndexServiceTests.java @@ -41,6 +41,7 @@ import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -52,6 +53,7 @@ import org.opensearch.index.shard.IndexShardTestCase; import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalSettingsPlugin; import org.opensearch.test.OpenSearchSingleNodeTestCase; @@ -590,6 +592,57 @@ public void testIndexSortBackwardCompatible() { } } + public void testReplicationTask() throws Exception { + // create with docrep - task should not schedule + IndexService indexService = createIndex( + "docrep_index", + Settings.builder().put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT).build() + ); + final Index index = indexService.index(); + ensureGreen(index.getName()); + IndexService.AsyncReplicationTask task = indexService.getReplicationTask(); + assertFalse(task.isScheduled()); + assertFalse(task.mustReschedule()); + + // create for segrep - task should schedule + indexService = createIndex( + "segrep_index", + Settings.builder() + .put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "5s") + .build() + ); + final Index srIndex = indexService.index(); + ensureGreen(srIndex.getName()); + task = indexService.getReplicationTask(); + assertTrue(task.isScheduled()); + assertTrue(task.mustReschedule()); + assertEquals(5000, task.getInterval().millis()); + + // test we can update the interval + client().admin() + .indices() + .prepareUpdateSettings("segrep_index") + .setSettings(Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "1s")) + .get(); + + IndexService.AsyncReplicationTask updatedTask = indexService.getReplicationTask(); + assertNotSame(task, updatedTask); + assertFalse(task.isScheduled()); + assertTrue(task.isClosed()); + assertTrue(updatedTask.isScheduled()); + assertTrue(updatedTask.mustReschedule()); + assertEquals(1000, updatedTask.getInterval().millis()); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder() + .put(super.featureFlagSettings()) + .put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.getKey(), true) + .build(); + } + private static String createTestMapping(String type) { return " \"properties\": {\n" + " \"test\": {\n" diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java new file mode 100644 index 0000000000000..7acee449a1b46 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java @@ -0,0 +1,163 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.apache.lucene.store.IOContext; +import org.opensearch.OpenSearchCorruptionException; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.engine.NRTReplicationEngineFactory; +import org.opensearch.index.replication.TestReplicationSource; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.IndexShardTestCase; +import org.opensearch.index.store.StoreFileMetadata; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.indices.replication.common.CopyState; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiConsumer; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +public class SegmentReplicatorTests extends IndexShardTestCase { + + private static final Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + + public void testStartReplicationWithoutSourceFactory() { + ThreadPool threadpool = mock(ThreadPool.class); + ExecutorService mock = mock(ExecutorService.class); + when(threadpool.generic()).thenReturn(mock); + SegmentReplicator segmentReplicator = new SegmentReplicator(threadpool); + + IndexShard shard = mock(IndexShard.class); + segmentReplicator.startReplication(shard); + Mockito.verifyNoInteractions(mock); + } + + public void testStartReplicationRunsSuccessfully() throws Exception { + final IndexShard replica = newStartedShard(false, settings, new NRTReplicationEngineFactory()); + final IndexShard primary = newStartedShard(true, settings, new NRTReplicationEngineFactory()); + + // index and copy segments to replica. + int numDocs = randomIntBetween(10, 20); + for (int i = 0; i < numDocs; i++) { + indexDoc(primary, "_doc", Integer.toString(i)); + } + primary.refresh("test"); + + SegmentReplicator segmentReplicator = spy(new SegmentReplicator(threadPool)); + SegmentReplicationSourceFactory factory = mock(SegmentReplicationSourceFactory.class); + when(factory.get(replica)).thenReturn(new TestReplicationSource() { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + resolveCheckpointListener(listener, primary); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + try { + Lucene.cleanLuceneIndex(indexShard.store().directory()); + Map segmentMetadataMap = primary.getSegmentMetadataMap(); + for (String file : segmentMetadataMap.keySet()) { + indexShard.store().directory().copyFrom(primary.store().directory(), file, file, IOContext.DEFAULT); + } + listener.onResponse(new GetSegmentFilesResponse(new ArrayList<>(segmentMetadataMap.values()))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }); + segmentReplicator.setSourceFactory(factory); + segmentReplicator.startReplication(replica); + assertBusy(() -> assertDocCount(replica, numDocs)); + closeShards(primary, replica); + } + + public void testReplicationFails() throws Exception { + allowShardFailures(); + final IndexShard replica = newStartedShard(false, settings, new NRTReplicationEngineFactory()); + final IndexShard primary = newStartedShard(true, settings, new NRTReplicationEngineFactory()); + + SegmentReplicator segmentReplicator = spy(new SegmentReplicator(threadPool)); + SegmentReplicationSourceFactory factory = mock(SegmentReplicationSourceFactory.class); + when(factory.get(replica)).thenReturn(new TestReplicationSource() { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + resolveCheckpointListener(listener, primary); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + OpenSearchCorruptionException corruptIndexException = new OpenSearchCorruptionException("test"); + try { + indexShard.store().markStoreCorrupted(corruptIndexException); + } catch (IOException e) { + throw new RuntimeException(e); + } + listener.onFailure(corruptIndexException); + } + }); + // assert shard failure on corruption + AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false); + replica.addShardFailureCallback((ig) -> failureCallbackTriggered.set(true)); + segmentReplicator.setSourceFactory(factory); + segmentReplicator.startReplication(replica); + assertBusy(() -> assertTrue(failureCallbackTriggered.get())); + closeShards(primary, replica); + } + + protected void resolveCheckpointListener(ActionListener listener, IndexShard primary) { + try (final CopyState copyState = new CopyState(primary)) { + listener.onResponse( + new CheckpointInfoResponse(copyState.getCheckpoint(), copyState.getMetadataMap(), copyState.getInfosBytes()) + ); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + +} From be94715e465e438b64d1982c1565c36e1ef2c45d Mon Sep 17 00:00:00 2001 From: rajiv-kv <157019998+rajiv-kv@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:42:32 +0530 Subject: [PATCH 03/17] relaxing the join validation for nodes which have only store disabled but only publication enabled (#15471) (#15675) Signed-off-by: Rajiv Kumar Vaidyanathan --- CHANGELOG.md | 1 + .../coordination/JoinTaskExecutor.java | 45 ++++- .../metadata/RepositoriesMetadata.java | 18 ++ .../cluster/node/DiscoveryNode.java | 4 +- .../remotestore/RemoteStoreNodeAttribute.java | 13 ++ ...ransportClusterManagerNodeActionTests.java | 2 + .../coordination/JoinTaskExecutorTests.java | 157 +++++++++++++++--- .../MetadataCreateIndexServiceTests.java | 9 +- .../allocation/FailedShardsRoutingTests.java | 3 + ...eStoreMigrationAllocationDeciderTests.java | 2 + .../index/remote/RemoteStoreUtilsTests.java | 2 + .../index/shard/IndexShardTestUtils.java | 2 + 12 files changed, 235 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6713403a4e339..47e0d4f8b6556 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) - Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409)) - [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) +- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) - Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) ### Dependencies diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 6c0940bfc4fd2..7a1747743ad1c 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -69,6 +69,7 @@ import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; @@ -519,7 +520,7 @@ public static void ensureNodesCompatibility( ); } - ensureRemoteStoreNodesCompatibility(joiningNode, currentNodes, metadata); + ensureRemoteRepositoryCompatibility(joiningNode, currentNodes, metadata); } /** @@ -552,6 +553,30 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) } } + public static void ensureRemoteRepositoryCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes, Metadata metadata) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); + + boolean isClusterRemoteStoreEnabled = existingNodes.stream().anyMatch(DiscoveryNode::isRemoteStoreNode); + if (isClusterRemoteStoreEnabled || joiningNode.isRemoteStoreNode()) { + ensureRemoteStoreNodesCompatibility(joiningNode, currentNodes, metadata); + } else { + ensureRemoteClusterStateNodesCompatibility(joiningNode, currentNodes); + } + } + + private static void ensureRemoteClusterStateNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); + + assert existingNodes.isEmpty() == false; + Optional remotePublicationNode = existingNodes.stream() + .filter(DiscoveryNode::isRemoteStatePublicationEnabled) + .findFirst(); + + if (remotePublicationNode.isPresent() && joiningNode.isRemoteStatePublicationEnabled()) { + ensureRepositoryCompatibility(joiningNode, remotePublicationNode.get(), REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES); + } + } + /** * The method ensures homogeneity - * 1. The joining node has to be a remote store backed if it's joining a remote store backed cluster. Validates @@ -567,6 +592,7 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) * needs to be modified. */ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes, Metadata metadata) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); assert existingNodes.isEmpty() == false; @@ -648,6 +674,23 @@ private static void ensureRemoteStoreNodesCompatibility( } } + private static void ensureRepositoryCompatibility(DiscoveryNode joiningNode, DiscoveryNode existingNode, List reposToValidate) { + + RemoteStoreNodeAttribute joiningRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(joiningNode); + RemoteStoreNodeAttribute existingRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(existingNode); + + if (existingRemoteStoreNodeAttribute.equalsForRepositories(joiningRemoteStoreNodeAttribute, reposToValidate) == false) { + throw new IllegalStateException( + "a remote store node [" + + joiningNode + + "] is trying to join a remote store cluster with incompatible node attributes in " + + "comparison with existing node [" + + existingNode + + "]" + ); + } + } + public static Collection> addBuiltInJoinValidators( Collection> onJoinValidators ) { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java index 4b3dc7964a87b..59452e33191d7 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java @@ -184,6 +184,24 @@ public boolean equalsIgnoreGenerationsWithRepoSkip(@Nullable RepositoriesMetadat .filter(repo -> !reposToSkip.contains(repo.name())) .collect(Collectors.toList()); + return equalsRepository(currentRepositories, otherRepositories); + } + + public boolean equalsIgnoreGenerationsForRepo(@Nullable RepositoriesMetadata other, List reposToValidate) { + if (other == null) { + return false; + } + List currentRepositories = repositories.stream() + .filter(repo -> reposToValidate.contains(repo.name())) + .collect(Collectors.toList()); + List otherRepositories = other.repositories.stream() + .filter(repo -> reposToValidate.contains(repo.name())) + .collect(Collectors.toList()); + + return equalsRepository(currentRepositories, otherRepositories); + } + + public static boolean equalsRepository(List currentRepositories, List otherRepositories) { if (otherRepositories.size() != currentRepositories.size()) { return false; } diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index e71570b36552b..9e9802b1409c2 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -67,6 +67,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; /** * A discovery node represents a node that is part of the cluster. @@ -554,7 +555,8 @@ public boolean isSearchNode() { * @return true if the node contains remote store node attributes, false otherwise */ public boolean isRemoteStoreNode() { - return this.getAttributes().keySet().stream().anyMatch(key -> key.startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); + return this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)) + && this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY)); } /** diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java index efaefd530b34d..55971398634c5 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java @@ -55,6 +55,11 @@ public class RemoteStoreNodeAttribute { REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY ); + public static List REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES = List.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY + ); + /** * Creates a new {@link RemoteStoreNodeAttribute} */ @@ -261,6 +266,14 @@ public boolean equalsWithRepoSkip(Object o, List reposToSkip) { return this.getRepositoriesMetadata().equalsIgnoreGenerationsWithRepoSkip(that.getRepositoriesMetadata(), reposToSkip); } + public boolean equalsForRepositories(Object otherNode, List repositoryToValidate) { + if (this == otherNode) return true; + if (otherNode == null || getClass() != otherNode.getClass()) return false; + + RemoteStoreNodeAttribute other = (RemoteStoreNodeAttribute) otherNode; + return this.getRepositoriesMetadata().equalsIgnoreGenerationsForRepo(other.repositoriesMetadata, repositoryToValidate); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java index 37e884502b613..3c1c84653b384 100644 --- a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java @@ -85,6 +85,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdaterTests.createIndexMetadataWithRemoteStoreSettings; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -901,6 +902,7 @@ public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersion private Map getRemoteStoreNodeAttributes() { Map remoteStoreNodeAttributes = new HashMap<>(); + remoteStoreNodeAttributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 02cc87d334cc0..e5b8d407af868 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -689,6 +689,100 @@ public void testJoinClusterWithRemoteStateNodeJoiningRemoteStateCluster() { JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); } + public void testJoinRemotePublicationClusterWithNonRemoteNodes() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(new HashMap<>()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testJoinRemotePublicationCluster() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remotePublicationNodeAttributes()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testJoinRemotePubClusterWithRemoteStoreNodes() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + Map newNodeAttributes = new HashMap<>(); + newNodeAttributes.putAll(remoteStateNodeAttributes(CLUSTER_STATE_REPO)); + newNodeAttributes.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + newNodeAttributes.putAll(remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO)); + + DiscoveryNode joiningNode = newDiscoveryNode(newNodeAttributes); + Exception e = assertThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) + ); + assertTrue(e.getMessage().equals("a remote store node [" + joiningNode + "] is trying to join a non remote store cluster")); + } + + public void testPreventJoinRemotePublicationClusterWithIncompatibleAttributes() { + Map existingNodeAttributes = remotePublicationNodeAttributes(); + Map remoteStoreNodeAttributes = remotePublicationNodeAttributes(); + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + existingNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + for (Map.Entry nodeAttribute : existingNodeAttributes.entrySet()) { + remoteStoreNodeAttributes.put(nodeAttribute.getKey(), null); + DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes); + Exception e = assertThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) + ); + assertTrue( + e.getMessage().equals("joining node [" + joiningNode + "] doesn't have the node attribute [" + nodeAttribute.getKey() + "]") + || e.getMessage() + .equals( + "a remote store node [" + + joiningNode + + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" + + currentState.getNodes().getNodes().values().stream().findFirst().get() + + "]" + ) + ); + + remoteStoreNodeAttributes.put(nodeAttribute.getKey(), nodeAttribute.getValue()); + } + } + public void testPreventJoinClusterWithRemoteStateNodeJoiningRemoteStoreCluster() { Map existingNodeAttributes = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); final DiscoveryNode existingNode = new DiscoveryNode( @@ -706,16 +800,7 @@ public void testPreventJoinClusterWithRemoteStateNodeJoiningRemoteStoreCluster() IllegalStateException.class, () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) ); - assertTrue( - e.getMessage() - .equals( - "a remote store node [" - + joiningNode - + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" - + currentState.getNodes().getNodes().values().stream().findFirst().get() - + "]" - ) - ); + assertTrue(e.getMessage().equals("a non remote store node [" + joiningNode + "] is trying to join a remote store cluster")); } public void testPreventJoinClusterWithRemoteStoreNodeJoiningRemoteStateCluster() { @@ -735,16 +820,7 @@ public void testPreventJoinClusterWithRemoteStoreNodeJoiningRemoteStateCluster() IllegalStateException.class, () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) ); - assertTrue( - e.getMessage() - .equals( - "a remote store node [" - + joiningNode - + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" - + currentState.getNodes().getNodes().values().stream().findFirst().get() - + "]" - ) - ); + assertTrue(e.getMessage().equals("a remote store node [" + joiningNode + "] is trying to join a non remote store cluster")); } public void testUpdatesClusterStateWithSingleNodeCluster() throws Exception { @@ -1159,6 +1235,39 @@ public void testRemoteRoutingTableNodeJoinNodeWithRemoteAndRoutingRepoDifference JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); } + public void testJoinRemoteStoreClusterWithRemotePublicationNodeInMixedMode() { + final DiscoveryNode remoteStoreNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final DiscoveryNode nonRemoteStoreNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + new HashMap<>(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final Settings settings = Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + .build(); + final Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Metadata metadata = Metadata.builder().persistentSettings(settings).build(); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(remoteStoreNode).add(nonRemoteStoreNode).localNodeId(remoteStoreNode.getId()).build()) + .metadata(metadata) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remotePublicationNodeAttributes()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories) throws Exception { @@ -1197,6 +1306,7 @@ private DiscoveryNode newDiscoveryNode(Map attributes) { } private static final String SEGMENT_REPO = "segment-repo"; + private static final String TRANSLOG_REPO = "translog-repo"; private static final String CLUSTER_STATE_REPO = "cluster-state-repo"; private static final String COMMON_REPO = "remote-repo"; @@ -1243,6 +1353,13 @@ private Map remoteStoreNodeAttributes(String segmentRepoName, St }; } + private Map remotePublicationNodeAttributes() { + Map existingNodeAttributes = new HashMap<>(); + existingNodeAttributes.putAll(remoteStateNodeAttributes(CLUSTER_STATE_REPO)); + existingNodeAttributes.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + return existingNodeAttributes; + } + private Map remoteStateNodeAttributes(String clusterStateRepo) { String clusterStateRepositoryTypeAttributeKey = String.format( Locale.getDefault(), diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index bfb143b09881b..5ffe5cedc8286 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -169,6 +169,7 @@ import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.ShardLimitValidatorTests.createTestShardLimitService; import static org.opensearch.node.Node.NODE_ATTRIBUTES; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.getRemoteStoreTranslogRepo; @@ -1663,7 +1664,12 @@ public void testNewIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() null ); - Map missingTranslogAttribute = Map.of(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); + Map missingTranslogAttribute = Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "cluster-state-repo-1", + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + "my-segment-repo-1" + ); DiscoveryNodes finalDiscoveryNodes = DiscoveryNodes.builder() .add(nonRemoteClusterManagerNode) @@ -2568,6 +2574,7 @@ private void verifyRemoteStoreIndexSettings( private DiscoveryNode getRemoteNode() { Map attributes = new HashMap<>(); + attributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-rep-1"); attributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); attributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return new DiscoveryNode( diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 5e3b74ee138ab..f8e1c609e6ee8 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -68,6 +68,7 @@ import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; import static org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -853,6 +854,8 @@ public void testPreferReplicaOnRemoteNodeForPrimaryPromotion() { // add a remote node and start primary shard Map remoteStoreNodeAttributes = Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY", REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE", REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java index 5b29922f2400c..e6e81c94e7f32 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java @@ -68,6 +68,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.NONE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -617,6 +618,7 @@ private DiscoveryNode getRemoteNode() { REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_VALUE" ); + attributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE"); return new DiscoveryNode( UUIDs.base64UUID(), buildNewFakeTransportAddress(), diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index a6db37285fe6f..2a34dd3580948 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -67,6 +67,7 @@ import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -540,6 +541,7 @@ private RoutingTable createRoutingTableAllShardsStarted( private Map getRemoteStoreNodeAttributes() { Map remoteStoreNodeAttributes = new HashMap<>(); + remoteStoreNodeAttributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java index abf8f2a4da6c1..50a1751546293 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java @@ -21,6 +21,7 @@ import java.util.Map; public class IndexShardTestUtils { + public static final String MOCK_STATE_REPO_NAME = "state-test-repo"; public static final String MOCK_SEGMENT_REPO_NAME = "segment-test-repo"; public static final String MOCK_TLOG_REPO_NAME = "tlog-test-repo"; @@ -37,6 +38,7 @@ public static DiscoveryNode getFakeDiscoNode(String id) { public static DiscoveryNode getFakeRemoteEnabledNode(String id) { Map remoteNodeAttributes = new HashMap(); + remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_STATE_REPO_NAME); remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_SEGMENT_REPO_NAME); remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_TLOG_REPO_NAME); return new DiscoveryNode( From c91f2f158922f4f084a1b2c5dd6ee04466503332 Mon Sep 17 00:00:00 2001 From: Anshu Agarwal Date: Thu, 5 Sep 2024 11:20:48 +0530 Subject: [PATCH 04/17] [Snapshot V2] Support pinned timestamp in delete flow (#15256) (#15722) Signed-off-by: Anshu Agarwal (cherry picked from commit 5bf34d21d8345befbe02181c9b6fd28dd360eb2b) --- .../snapshots/DeleteSnapshotITV2.java | 332 ++++++++++++++++++ .../TransportCleanupRepositoryAction.java | 9 + .../RemoteStorePinnedTimestampService.java | 2 + .../opensearch/repositories/Repository.java | 60 +++- .../repositories/RepositoryData.java | 5 + .../blobstore/BlobStoreRepository.java | 313 ++++++++++++++++- .../snapshots/SnapshotsService.java | 85 ++++- .../blobstore/BlobStoreRepositoryTests.java | 20 +- 8 files changed, 795 insertions(+), 31 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java new file mode 100644 index 0000000000000..02b6ea47172c7 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotITV2.java @@ -0,0 +1,332 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.snapshots; + +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThan; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class DeleteSnapshotITV2 extends AbstractSnapshotIntegTestCase { + + private static final String REMOTE_REPO_NAME = "remote-store-repo-name"; + + public void testDeleteShallowCopyV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + + final Path remoteStoreRepoPath = randomRepoPath(); + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-create-snapshot-repo"; + String snapshotName1 = "test-create-snapshot1"; + String snapshotName2 = "test-create-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName1)); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(true) + .get(); + snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName2)); + + assertAcked(client().admin().indices().prepareDelete(indexName1)); + Thread.sleep(100); + + AcknowledgedResponse deleteResponse = client().admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotName2) + .setSnapshots(snapshotName2) + .get(); + assertTrue(deleteResponse.isAcknowledged()); + + // test delete non-existent snapshot + assertThrows( + SnapshotMissingException.class, + () -> client().admin().cluster().prepareDeleteSnapshot(snapshotRepoName, "random-snapshot").setSnapshots(snapshotName2).get() + ); + + } + + public void testDeleteShallowCopyV2MultipleSnapshots() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-create-snapshot-repo"; + String snapshotName1 = "test-create-snapshot1"; + String snapshotName2 = "test-create-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName1)); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(true) + .get(); + snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName2)); + + AcknowledgedResponse deleteResponse = client().admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotName1, snapshotName2) + .setSnapshots(snapshotName2) + .get(); + assertTrue(deleteResponse.isAcknowledged()); + + // test delete non-existent snapshot + assertThrows( + SnapshotMissingException.class, + () -> client().admin().cluster().prepareDeleteSnapshot(snapshotRepoName, "random-snapshot").setSnapshots(snapshotName2).get() + ); + + } + + public void testRemoteStoreCleanupForDeletedIndexForSnapshotV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + Settings settings = remoteStoreClusterSettings(REMOTE_REPO_NAME, remoteStoreRepoPath); + settings = Settings.builder() + .put(settings) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED.toString()) + .build(); + String clusterManagerName = internalCluster().startClusterManagerOnlyNode(settings); + internalCluster().startDataOnlyNode(settings); + final Client clusterManagerClient = internalCluster().clusterManagerClient(); + ensureStableCluster(2); + + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService = internalCluster().getInstance( + RemoteStorePinnedTimestampService.class, + clusterManagerName + ); + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueSeconds(1)); + + final String snapshotRepoName = "snapshot-repo-name"; + final Path snapshotRepoPath = randomRepoPath(); + createRepository(snapshotRepoName, "mock", snapshotRepoSettingsForShallowV2(snapshotRepoPath)); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + indexRandomDocs(remoteStoreEnabledIndexName, 5); + + String indexUUID = client().admin() + .indices() + .prepareGetSettings(remoteStoreEnabledIndexName) + .get() + .getSetting(remoteStoreEnabledIndexName, IndexMetadata.SETTING_INDEX_UUID); + + String numShards = client().admin() + .indices() + .prepareGetSettings(remoteStoreEnabledIndexName) + .get() + .getSetting(remoteStoreEnabledIndexName, IndexMetadata.SETTING_NUMBER_OF_SHARDS); + + logger.info("--> create two remote index shallow snapshots"); + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, "snap1") + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo1 = createSnapshotResponse.getSnapshotInfo(); + + indexRandomDocs(remoteStoreEnabledIndexName, 25); + + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, "snap2") + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo2 = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + assertThat(snapshotInfo2.snapshotId().getName(), equalTo("snap2")); + + // delete remote store index + assertAcked(client().admin().indices().prepareDelete(remoteStoreEnabledIndexName)); + + logger.info("--> delete snapshot 2"); + + Path indexPath = Path.of(String.valueOf(remoteStoreRepoPath), indexUUID); + Path shardPath = Path.of(String.valueOf(indexPath), "0"); + Path segmentsPath = Path.of(String.valueOf(shardPath), "segments"); + + // Get total segments remote store directory file count for deleted index and shard 0 + int segmentFilesCountBeforeDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath); + + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + + AcknowledgedResponse deleteSnapshotResponse = clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotInfo2.snapshotId().getName()) + .get(); + assertAcked(deleteSnapshotResponse); + + Thread.sleep(5000); + + assertBusy(() -> { + try { + assertThat(RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath), lessThan(segmentFilesCountBeforeDeletingSnapshot1)); + } catch (Exception e) {} + }, 30, TimeUnit.SECONDS); + int segmentFilesCountAfterDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath); + + logger.info("--> delete snapshot 1"); + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + // on snapshot deletion, remote store segment files should get cleaned up for deleted index - `remote-index-1` + deleteSnapshotResponse = clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotInfo1.snapshotId().getName()) + .get(); + assertAcked(deleteSnapshotResponse); + + // Delete is async. Give time for it + assertBusy(() -> { + try { + assertThat(RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath), lessThan(segmentFilesCountAfterDeletingSnapshot1)); + } catch (Exception e) {} + }, 60, TimeUnit.SECONDS); + } + + private Settings snapshotV2Settings(Path remoteStoreRepoPath) { + Settings settings = Settings.builder() + .put(remoteStoreClusterSettings(REMOTE_REPO_NAME, remoteStoreRepoPath)) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + return settings; + } + + protected Settings.Builder snapshotRepoSettingsForShallowV2(Path path) { + final Settings.Builder settings = Settings.builder(); + settings.put("location", path); + settings.put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), Boolean.TRUE); + settings.put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + return settings; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java index 65a26037e1cd5..83a63070161e9 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java @@ -54,6 +54,7 @@ import org.opensearch.common.inject.Inject; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.repositories.RepositoriesService; @@ -101,6 +102,8 @@ public final class TransportCleanupRepositoryAction extends TransportClusterMana private final RemoteStoreLockManagerFactory remoteStoreLockManagerFactory; + private final RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory; + @Override protected String executor() { return ThreadPool.Names.SAME; @@ -128,6 +131,11 @@ public TransportCleanupRepositoryAction( ); this.repositoriesService = repositoriesService; this.snapshotsService = snapshotsService; + this.remoteSegmentStoreDirectoryFactory = new RemoteSegmentStoreDirectoryFactory( + () -> repositoriesService, + threadPool, + remoteStoreSettings.getSegmentsPathFixedPrefix() + ); this.remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory( () -> repositoriesService, remoteStoreSettings.getSegmentsPathFixedPrefix() @@ -291,6 +299,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS repositoryStateId, snapshotsService.minCompatibleVersion(newState.nodes().getMinNodeVersion(), repositoryData, null), remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, ActionListener.wrap(result -> after(null, result), e -> after(e, null)) ) ) diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java index 782ba5e9a6540..3a7734fc0538f 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.collect.Tuple; @@ -42,6 +43,7 @@ * * @opensearch.internal */ +@ExperimentalApi public class RemoteStorePinnedTimestampService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteStorePinnedTimestampService.class); private static Tuple> pinnedTimestampsSet = new Tuple<>(-1L, Set.of()); diff --git a/server/src/main/java/org/opensearch/repositories/Repository.java b/server/src/main/java/org/opensearch/repositories/Repository.java index 29d4638f4bfbc..a7c44575465b0 100644 --- a/server/src/main/java/org/opensearch/repositories/Repository.java +++ b/server/src/main/java/org/opensearch/repositories/Repository.java @@ -50,9 +50,11 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.index.snapshots.blobstore.RemoteStoreShardShallowCopySnapshot; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.Store; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import org.opensearch.snapshots.SnapshotId; import org.opensearch.snapshots.SnapshotInfo; @@ -220,11 +222,59 @@ void deleteSnapshots( /** * Deletes snapshots and releases respective lock files from remote store repository. * - * @param snapshotIds snapshot ids - * @param repositoryStateId the unique id identifying the state of the repository when the snapshot deletion began - * @param repositoryMetaVersion version of the updated repository metadata to write - * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files - * @param listener completion listener + * @param snapshotIds snapshot ids + * @param repositoryStateId the unique id identifying the state of the repository when the snapshot deletion began + * @param repositoryMetaVersion version of the updated repository metadata to write + * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote store segment files + * @param remoteStorePinnedTimestampService service for pinning and unpinning of the timestamp + * @param snapshotIdsPinnedTimestampMap map of snapshots ids and the pinned timestamp + * @param isShallowSnapshotV2 true for shallow snapshots v2 + * @param listener completion listener + */ + default void deleteSnapshotsInternal( + Collection snapshotIds, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + Map snapshotIdsPinnedTimestampMap, + boolean isShallowSnapshotV2, + ActionListener listener + ) { + throw new UnsupportedOperationException(); + } + + /** + * Deletes snapshots and unpin the snapshot timestamp using remoteStorePinnedTimestampService + * + * @param snapshotsWithPinnedTimestamp map of snapshot ids and the pinned timestamps + * @param repositoryStateId the unique id identifying the state of the repository when the snapshot deletion began + * @param repositoryMetaVersion version of the updated repository metadata to write + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote store segment files + * @param remoteStorePinnedTimestampService service for pinning and unpinning of the timestamp + * @param listener completion listener + */ + default void deleteSnapshotsWithPinnedTimestamp( + Map snapshotsWithPinnedTimestamp, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + ActionListener listener + ) { + throw new UnsupportedOperationException(); + } + + /** + * Deletes snapshots and releases respective lock files from remote store repository + * + * @param snapshotIds + * @param repositoryStateId + * @param repositoryMetaVersion + * @param remoteStoreLockManagerFactory + * @param listener */ default void deleteSnapshotsAndReleaseLockFiles( Collection snapshotIds, diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryData.java b/server/src/main/java/org/opensearch/repositories/RepositoryData.java index b9fb2ba2498e8..5328618f688f4 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryData.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryData.java @@ -112,6 +112,11 @@ public final class RepositoryData { * The indices found in the repository across all snapshots, as a name to {@link IndexId} mapping */ private final Map indices; + + public Map> getIndexSnapshots() { + return indexSnapshots; + } + /** * The snapshots that each index belongs to. */ diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 0907326cd8668..7b7606d3b70c7 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -114,6 +114,7 @@ import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStorePathStrategy.BasePathInput; import org.opensearch.index.remote.RemoteStorePathStrategy.SnapshotShardPathInput; +import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.snapshots.IndexShardRestoreFailedException; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; @@ -133,6 +134,7 @@ import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.IndexMetaDataGenerations; import org.opensearch.repositories.Repository; @@ -189,6 +191,7 @@ import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1; import static org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName; import static org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS; +import static org.opensearch.snapshots.SnapshotsService.SNAPSHOT_PINNED_TIMESTAMP_DELIMITER; /** * BlobStore - based implementation of Snapshot Repository @@ -993,11 +996,15 @@ public void initializeSnapshot(SnapshotId snapshotId, List indices, Met } } - public void deleteSnapshotsAndReleaseLockFiles( + public void deleteSnapshotsInternal( Collection snapshotIds, long repositoryStateId, Version repositoryMetaVersion, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + Map snapshotIdsPinnedTimestampMap, + boolean isShallowSnapshotV2, ActionListener listener ) { if (isReadOnly()) { @@ -1019,6 +1026,10 @@ protected void doRun() throws Exception { repositoryData, repositoryMetaVersion, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + remoteStorePinnedTimestampService, + snapshotIdsPinnedTimestampMap, + isShallowSnapshotV2, listener ); } @@ -1031,6 +1042,49 @@ public void onFailure(Exception e) { } } + @Override + public void deleteSnapshotsWithPinnedTimestamp( + Map snapshotIdPinnedTimestampMap, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + ActionListener listener + ) { + deleteSnapshotsInternal( + snapshotIdPinnedTimestampMap.keySet(), + repositoryStateId, + repositoryMetaVersion, + null, // Passing null since no remote store lock files need to be cleaned up. + remoteSegmentStoreDirectoryFactory, + remoteStorePinnedTimestampService, + snapshotIdPinnedTimestampMap, + true, // true only for shallow snapshot v2 + listener + ); + } + + @Override + public void deleteSnapshotsAndReleaseLockFiles( + Collection snapshotIds, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + ActionListener listener + ) { + deleteSnapshotsInternal( + snapshotIds, + repositoryStateId, + repositoryMetaVersion, + remoteStoreLockManagerFactory, + null, + null, + Collections.emptyMap(), + false, + listener + ); + } + @Override public void deleteSnapshots( Collection snapshotIds, @@ -1038,11 +1092,15 @@ public void deleteSnapshots( Version repositoryMetaVersion, ActionListener listener ) { - deleteSnapshotsAndReleaseLockFiles( + deleteSnapshotsInternal( snapshotIds, repositoryStateId, repositoryMetaVersion, null, // Passing null since no remote store lock files need to be cleaned up. + null, // Passing null since no remote store segment files need to be cleaned up + null, + Collections.emptyMap(), + false, listener ); } @@ -1107,6 +1165,10 @@ private RepositoryData safeRepositoryData(long repositoryStateId, Map snapshotIdPinnedTimestampMap, + boolean isShallowSnapshotV2, ActionListener listener ) { // We can create map of indexId to ShardInfo based on the old repository data. This is later used in cleanup @@ -1165,29 +1231,59 @@ private void doDeleteShardSnapshots( ); }, listener::onFailure); // Once we have updated the repository, run the clean-ups + final StepListener pinnedTimestampListener = new StepListener<>(); writeUpdatedRepoDataStep.whenComplete(updatedRepoData -> { + if (snapshotIdPinnedTimestampMap == null || snapshotIdPinnedTimestampMap.isEmpty()) { + pinnedTimestampListener.onResponse(updatedRepoData); + } else { + removeSnapshotsPinnedTimestamp( + snapshotIdPinnedTimestampMap, + this, + updatedRepoData, + remoteStorePinnedTimestampService, + pinnedTimestampListener + ); + } + }, listener::onFailure); + + pinnedTimestampListener.whenComplete(updatedRepoData -> { + // Run unreferenced blobs cleanup in parallel to shard-level snapshot deletion final ActionListener afterCleanupsListener = new GroupedActionListener<>( ActionListener.wrap(() -> listener.onResponse(updatedRepoData)), 2 ); + cleanupUnlinkedRootAndIndicesBlobs( snapshotIds, foundIndices, rootBlobs, updatedRepoData, + repositoryData, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, afterCleanupsListener, idToShardInfoMap ); - asyncCleanupUnlinkedShardLevelBlobs( - repositoryData, - snapshotIds, - writeShardMetaDataAndComputeDeletesStep.result(), - remoteStoreLockManagerFactory, - afterCleanupsListener - ); + if (isShallowSnapshotV2) { + cleanUpRemoteStoreFilesForDeletedIndicesV2( + repositoryData, + snapshotIds, + writeShardMetaDataAndComputeDeletesStep.result(), + remoteSegmentStoreDirectoryFactory, + afterCleanupsListener + ); + } else { + asyncCleanupUnlinkedShardLevelBlobs( + repositoryData, + snapshotIds, + writeShardMetaDataAndComputeDeletesStep.result(), + remoteStoreLockManagerFactory, + afterCleanupsListener + ); + } }, listener::onFailure); + } else { // Write the new repository data first (with the removed snapshot), using no shard generations final RepositoryData updatedRepoData = repositoryData.removeSnapshots(snapshotIds, ShardGenerations.EMPTY); @@ -1208,7 +1304,9 @@ private void doDeleteShardSnapshots( foundIndices, rootBlobs, newRepoData, + repositoryData, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, afterCleanupsListener, idToShardInfoMap ); @@ -1235,6 +1333,90 @@ private void doDeleteShardSnapshots( } } + private void cleanUpRemoteStoreFilesForDeletedIndicesV2( + RepositoryData repositoryData, + Collection snapshotIds, + Collection result, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + ActionListener afterCleanupsListener + ) { + try { + Set uniqueIndexIds = new HashSet<>(); + for (ShardSnapshotMetaDeleteResult shardSnapshotMetaDeleteResult : result) { + uniqueIndexIds.add(shardSnapshotMetaDeleteResult.indexId.getId()); + } + // iterate through all the indices and trigger remote store directory cleanup for deleted index segments + for (String indexId : uniqueIndexIds) { + cleanRemoteStoreDirectoryIfNeeded(snapshotIds, indexId, repositoryData, remoteSegmentStoreDirectoryFactory); + } + afterCleanupsListener.onResponse(null); + } catch (Exception e) { + logger.warn("Exception during cleanup of remote directory files for snapshot v2", e); + afterCleanupsListener.onFailure(e); + } + + } + + private void removeSnapshotsPinnedTimestamp( + Map snapshotsWithPinnedTimestamp, + Repository repository, + RepositoryData repositoryData, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + ActionListener pinnedTimestampListener + ) { + // Create a GroupedActionListener to aggregate the results of all unpin operations + GroupedActionListener groupedListener = new GroupedActionListener<>( + ActionListener.wrap( + // This is called once all operations have succeeded + ignored -> pinnedTimestampListener.onResponse(repositoryData), + // This is called if any operation fails + pinnedTimestampListener::onFailure + ), + snapshotsWithPinnedTimestamp.size() + ); + + snapshotsWithPinnedTimestamp.forEach((snapshotId, pinnedTimestamp) -> { + removeSnapshotPinnedTimestamp( + remoteStorePinnedTimestampService, + snapshotId, + repository.getMetadata().name(), + pinnedTimestamp, + groupedListener + ); + }); + } + + private void removeSnapshotPinnedTimestamp( + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + SnapshotId snapshotId, + String repository, + long timestampToUnpin, + ActionListener listener + ) { + remoteStorePinnedTimestampService.unpinTimestamp( + timestampToUnpin, + repository + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + snapshotId.getUUID(), + new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.debug("Timestamp {} unpinned successfully for snapshot {}", timestampToUnpin, snapshotId.getName()); + listener.onResponse(null); + } + + @Override + public void onFailure(Exception e) { + logger.error( + "Failed to unpin timestamp {} for snapshot {} with exception {}", + timestampToUnpin, + snapshotId.getName(), + e + ); + listener.onFailure(e); + } + } + ); + } + /** * Cleans up the indices and data corresponding to all it's shards. * @@ -1251,7 +1433,9 @@ private void cleanupUnlinkedRootAndIndicesBlobs( Map foundIndices, Map rootBlobs, RepositoryData updatedRepoData, + RepositoryData oldRepoData, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, ActionListener listener, Map idToShardInfoMap ) { @@ -1260,7 +1444,9 @@ private void cleanupUnlinkedRootAndIndicesBlobs( foundIndices, rootBlobs, updatedRepoData, + oldRepoData, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, ActionListener.map(listener, ignored -> null), idToShardInfoMap ); @@ -1636,7 +1822,9 @@ private void cleanupStaleBlobs( Map foundIndices, Map rootBlobs, RepositoryData newRepoData, + RepositoryData oldRepoData, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, ActionListener listener, Map idToShardInfoMap ) { @@ -1665,9 +1853,12 @@ private void cleanupStaleBlobs( } else { Map snapshotShardPaths = getSnapshotShardPaths(); cleanupStaleIndices( + deletedSnapshots, foundIndices, survivingIndexIds, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + oldRepoData, groupedListener, snapshotShardPaths, idToShardInfoMap @@ -1697,12 +1888,14 @@ private Map getSnapshotShardPaths() { * @param repositoryStateId Current repository state id * @param repositoryMetaVersion version of the updated repository metadata to write * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files. + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote store segments. * @param listener Listener to complete when done */ public void cleanup( long repositoryStateId, Version repositoryMetaVersion, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, ActionListener listener ) { try { @@ -1735,7 +1928,9 @@ public void cleanup( foundIndices, rootBlobs, repositoryData, + repositoryData, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, ActionListener.map(listener, RepositoryCleanupResult::new), Collections.emptyMap() ), @@ -1826,9 +2021,12 @@ private List cleanupStaleRootFiles( } void cleanupStaleIndices( + Collection deletedSnapshots, Map foundIndices, Set survivingIndexIds, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RepositoryData oldRepoData, GroupedActionListener listener, Map snapshotShardPaths, Map idToShardInfoMap @@ -1856,8 +2054,11 @@ void cleanupStaleIndices( ); for (int i = 0; i < workers; ++i) { executeOneStaleIndexDelete( + deletedSnapshots, staleIndicesToDelete, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + oldRepoData, groupedListener, snapshotShardPaths, idToShardInfoMap @@ -1892,8 +2093,11 @@ private static boolean isIndexPresent(ClusterService clusterService, String inde * @throws InterruptedException if the thread is interrupted while waiting */ private void executeOneStaleIndexDelete( + Collection deletedSnapshots, BlockingQueue> staleIndicesToDelete, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RepositoryData oldRepoData, GroupedActionListener listener, Map snapshotShardPaths, Map idToShardInfoMap @@ -1924,6 +2128,10 @@ private void executeOneStaleIndexDelete( // If there are matchingShardPaths, then we delete them after we have deleted the shard data. deleteResult = deleteResult.add(cleanUpStaleSnapshotShardPathsFile(matchingShardPaths, snapshotShardPaths)); + if (remoteSegmentStoreDirectoryFactory != null) { + cleanRemoteStoreDirectoryIfNeeded(deletedSnapshots, indexSnId, oldRepoData, remoteSegmentStoreDirectoryFactory); + } + // Finally, we delete the [base_path]/indexId folder deleteResult = deleteResult.add(indexEntry.getValue().delete()); // Deleting the index folder logger.debug("[{}] Cleaned up stale index [{}]", metadata.name(), indexSnId); @@ -1945,8 +2153,11 @@ private void executeOneStaleIndexDelete( return DeleteResult.ZERO; } finally { executeOneStaleIndexDelete( + deletedSnapshots, staleIndicesToDelete, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + oldRepoData, listener, snapshotShardPaths, idToShardInfoMap @@ -1955,6 +2166,90 @@ private void executeOneStaleIndexDelete( })); } + /** + * Cleans up the remote store directory if needed. + *

This method cleans up segments in the remote store directory for deleted indices. + * This cleanup flow is executed only for v2 snapshots. For v1 snapshots, + * the cleanup is done per shard after releasing the lock files. + *

+ * + *

Since this method requires old repository data to fetch index metadata of the deleted index, + * the cleanup won't happen on retries in case of failures. This is because subsequent retries may + * not have access to the older repository data.

+ * + * @param indexSnId The snapshot index id of the index to be cleaned up + * @param oldRepoData The old repository metadata used to fetch the index metadata. + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote + * store segments + */ + private void cleanRemoteStoreDirectoryIfNeeded( + Collection deletedSnapshots, + String indexSnId, + RepositoryData oldRepoData, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory + ) { + assert (indexSnId != null); + + IndexId indexId = null; + List snapshotIds = Collections.emptyList(); + try { + for (Map.Entry> entry : oldRepoData.getIndexSnapshots().entrySet()) { + indexId = entry.getKey(); + if (indexId != null && indexId.getId().equals(indexSnId)) { + snapshotIds = entry.getValue(); + break; + } + } + if (snapshotIds.isEmpty()) { + logger.info("No snapshots found for indexSnId: {}", indexSnId); + return; + } + for (SnapshotId snapshotId : snapshotIds) { + try { + // skip cleanup for snapshot not present in deleted snapshots list + if (!deletedSnapshots.contains(snapshotId)) { + continue; + } + IndexMetadata prevIndexMetadata = this.getSnapshotIndexMetaData(oldRepoData, snapshotId, indexId); + if (prevIndexMetadata != null && !isIndexPresent(clusterService, prevIndexMetadata.getIndexUUID())) { + String remoteStoreRepository = IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + assert (remoteStoreRepository != null); + + RemoteStorePathStrategy remoteStorePathStrategy = RemoteStoreUtils.determineRemoteStorePathStrategy( + prevIndexMetadata + ); + + for (int shardId = 0; shardId < prevIndexMetadata.getNumberOfShards(); shardId++) { + remoteDirectoryCleanupAsync( + remoteSegmentStoreDirectoryFactory, + threadPool, + remoteStoreRepository, + prevIndexMetadata.getIndexUUID(), + new ShardId(Index.UNKNOWN_INDEX_NAME, prevIndexMetadata.getIndexUUID(), shardId), + ThreadPool.Names.REMOTE_PURGE, + remoteStorePathStrategy + ); + } + } + } catch (Exception e) { + logger.warn( + new ParameterizedMessage( + "Exception during cleanup of remote directory for snapshot [{}] deleted index [{}]", + snapshotId, + indexSnId + ), + e + ); + } + } + } catch (Exception e) { + logger.error(new ParameterizedMessage("Exception during the remote directory cleanup for indecSnId [{}]", indexSnId), e); + } + + } + /** * Finds and returns a list of shard paths that match the given index ID. * diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index 99a09cb863a8e..2265fe519fc78 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -92,6 +92,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; @@ -125,6 +126,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executor; import java.util.function.Consumer; import java.util.function.Function; @@ -181,6 +183,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus private final RemoteStoreLockManagerFactory remoteStoreLockManagerFactory; + private final RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory; + private final ThreadPool threadPool; private final Map>>> snapshotCompletionListeners = @@ -227,6 +231,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus Setting.Property.Dynamic ); + public static final String SNAPSHOT_PINNED_TIMESTAMP_DELIMITER = "__"; + /** * Setting to specify the maximum number of shards that can be included in the result for the snapshot status * API call. Note that it does not apply to V2-shallow snapshots. @@ -239,7 +245,6 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus Setting.Property.Dynamic ); - private static final String SNAPSHOT_PINNED_TIMESTAMP_DELIMITER = "__"; private volatile int maxConcurrentOperations; public SnapshotsService( @@ -260,6 +265,11 @@ public SnapshotsService( remoteStoreSettings.getSegmentsPathFixedPrefix() ); this.threadPool = transportService.getThreadPool(); + this.remoteSegmentStoreDirectoryFactory = new RemoteSegmentStoreDirectoryFactory( + () -> repositoriesService, + threadPool, + remoteStoreSettings.getSegmentsPathFixedPrefix() + ); this.transportService = transportService; this.remoteStorePinnedTimestampService = remoteStorePinnedTimestampService; @@ -3053,18 +3063,67 @@ private void deleteSnapshotsFromRepository( // the flag. This can be improved by having the info whether there ever were any shallow snapshot present in this repository // or not in RepositoryData. // SEE https://github.com/opensearch-project/OpenSearch/issues/8610 - final boolean cleanupRemoteStoreLockFiles = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings()); - if (cleanupRemoteStoreLockFiles) { - repository.deleteSnapshotsAndReleaseLockFiles( - snapshotIds, - repositoryData.getGenId(), - minCompatibleVersion(minNodeVersion, repositoryData, snapshotIds), - remoteStoreLockManagerFactory, - ActionListener.wrap(updatedRepoData -> { - logger.info("snapshots {} deleted", snapshotIds); - removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); - }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) - ); + final boolean remoteStoreShallowCopyEnabled = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings()); + if (remoteStoreShallowCopyEnabled) { + Map snapshotsWithPinnedTimestamp = new ConcurrentHashMap<>(); + List snapshotsWithLockFiles = Collections.synchronizedList(new ArrayList<>()); + + CountDownLatch latch = new CountDownLatch(1); + + threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> { + try { + for (SnapshotId snapshotId : snapshotIds) { + try { + SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotId); + if (snapshotInfo.getPinnedTimestamp() > 0) { + snapshotsWithPinnedTimestamp.put(snapshotId, snapshotInfo.getPinnedTimestamp()); + } else { + snapshotsWithLockFiles.add(snapshotId); + } + } catch (Exception e) { + logger.warn("Failed to get snapshot info for {} with exception {}", snapshotId, e); + removeSnapshotDeletionFromClusterState(deleteEntry, e, repositoryData); + } + } + } finally { + latch.countDown(); + } + }); + try { + latch.await(); + if (snapshotsWithLockFiles.size() > 0) { + repository.deleteSnapshotsAndReleaseLockFiles( + snapshotsWithLockFiles, + repositoryData.getGenId(), + minCompatibleVersion(minNodeVersion, repositoryData, snapshotsWithLockFiles), + remoteStoreLockManagerFactory, + ActionListener.wrap(updatedRepoData -> { + logger.info("snapshots {} deleted", snapshotsWithLockFiles); + removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); + }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) + ); + } + if (snapshotsWithPinnedTimestamp.size() > 0) { + + repository.deleteSnapshotsWithPinnedTimestamp( + snapshotsWithPinnedTimestamp, + repositoryData.getGenId(), + minCompatibleVersion(minNodeVersion, repositoryData, snapshotsWithPinnedTimestamp.keySet()), + remoteSegmentStoreDirectoryFactory, + remoteStorePinnedTimestampService, + ActionListener.wrap(updatedRepoData -> { + logger.info("snapshots {} deleted", snapshotsWithPinnedTimestamp); + removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); + }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) + ); + } + + } catch (InterruptedException e) { + logger.error("Interrupted while waiting for snapshot info processing", e); + Thread.currentThread().interrupt(); + removeSnapshotDeletionFromClusterState(deleteEntry, e, repositoryData); + } + } else { repository.deleteSnapshots( snapshotIds, diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java index fcc0c5198894f..6540d2f55a349 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java @@ -54,6 +54,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; import org.opensearch.indices.recovery.RecoverySettings; @@ -74,6 +75,7 @@ import org.opensearch.test.OpenSearchIntegTestCase; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -459,11 +461,18 @@ public void testCleanupStaleIndices() throws Exception { foundIndices.put("stale-index", staleIndexContainer); foundIndices.put("current-index", currentIndexContainer); + List snapshotIds = new ArrayList<>(); + snapshotIds.add(new SnapshotId("snap1", UUIDs.randomBase64UUID())); + snapshotIds.add(new SnapshotId("snap2", UUIDs.randomBase64UUID())); + Set survivingIndexIds = new HashSet<>(); survivingIndexIds.add("current-index"); + RepositoryData repositoryData = generateRandomRepoData(); + // Create a mock RemoteStoreLockManagerFactory RemoteStoreLockManagerFactory mockRemoteStoreLockManagerFactory = mock(RemoteStoreLockManagerFactory.class); + RemoteSegmentStoreDirectoryFactory mockRemoteSegmentStoreDirectoryFactory = mock(RemoteSegmentStoreDirectoryFactory.class); RemoteStoreLockManager mockLockManager = mock(RemoteStoreLockManager.class); when(mockRemoteStoreLockManagerFactory.newLockManager(anyString(), anyString(), anyString(), any())).thenReturn(mockLockManager); @@ -479,9 +488,9 @@ public void testCleanupStaleIndices() throws Exception { // Mock the cleanupStaleIndices method to call our test implementation doAnswer(invocation -> { - Map indices = invocation.getArgument(0); - Set surviving = invocation.getArgument(1); - GroupedActionListener listener = invocation.getArgument(3); + Map indices = invocation.getArgument(1); + Set surviving = invocation.getArgument(2); + GroupedActionListener listener = invocation.getArgument(6); // Simulate the cleanup process DeleteResult result = DeleteResult.ZERO; @@ -494,7 +503,7 @@ public void testCleanupStaleIndices() throws Exception { listener.onResponse(result); return null; - }).when(repository).cleanupStaleIndices(any(), any(), any(), any(), any(), anyMap()); + }).when(repository).cleanupStaleIndices(any(), any(), any(), any(), any(), any(), any(), any(), anyMap()); AtomicReference> resultReference = new AtomicReference<>(); CountDownLatch latch = new CountDownLatch(1); @@ -509,9 +518,12 @@ public void testCleanupStaleIndices() throws Exception { // Call the method we're testing repository.cleanupStaleIndices( + snapshotIds, foundIndices, survivingIndexIds, mockRemoteStoreLockManagerFactory, + null, + repositoryData, listener, mockSnapshotShardPaths, Collections.emptyMap() From 468f120141b6b472a143034fe59c12fed06b4a35 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Thu, 5 Sep 2024 12:30:51 +0530 Subject: [PATCH 05/17] Optimise snapshot deletion to speed up snapshot deletion and creation (#15568) (#15724) --------- Signed-off-by: Ashish Singh --- CHANGELOG.md | 1 + .../blobstore/BlobStoreRepository.java | 92 ++++++++++++++++--- .../org/opensearch/threadpool/ThreadPool.java | 7 ++ .../threadpool/ScalingThreadPoolTests.java | 1 + 4 files changed, 87 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47e0d4f8b6556..886c082053fd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) - Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) - Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409)) +- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568)) - [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) - Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) - Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 7b7606d3b70c7..d696cf329a2a5 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -66,6 +66,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.Numbers; import org.opensearch.common.Priority; +import org.opensearch.common.Randomness; import org.opensearch.common.SetOnce; import org.opensearch.common.UUIDs; import org.opensearch.common.blobstore.BlobContainer; @@ -1460,6 +1461,10 @@ private void asyncCleanupUnlinkedShardLevelBlobs( ActionListener listener ) { final List> filesToDelete = resolveFilesToDelete(oldRepositoryData, snapshotIds, deleteResults); + long startTimeNs = System.nanoTime(); + Randomness.shuffle(filesToDelete); + logger.debug("[{}] shuffled the filesToDelete with timeElapsedNs={}", metadata.name(), (System.nanoTime() - startTimeNs)); + if (filesToDelete.isEmpty()) { listener.onResponse(null); return; @@ -1477,8 +1482,8 @@ private void asyncCleanupUnlinkedShardLevelBlobs( staleFilesToDeleteInBatch.size() ); - // Start as many workers as fit into the snapshot pool at once at the most - final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), staleFilesToDeleteInBatch.size()); + // Start as many workers as fit into the snapshot_deletion pool at once at the most + final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size()); for (int i = 0; i < workers; ++i) { executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, groupedListener); } @@ -1582,7 +1587,7 @@ private void executeStaleShardDelete( if (filesToDelete == null) { return; } - threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> { + threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> { try { // filtering files for which remote store lock release and cleanup succeeded, // remaining files for which it failed will be retried in next snapshot delete run. @@ -1646,7 +1651,7 @@ private void writeUpdatedShardMetaDataAndComputeDeletes( ActionListener> onAllShardsCompleted ) { - final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION); final List indices = oldRepositoryData.indicesToUpdateAfterRemovingSnapshot(snapshotIds); if (indices.isEmpty()) { @@ -1836,7 +1841,7 @@ private void cleanupStaleBlobs( listener.onResponse(deleteResult); }, listener::onFailure), 2); - final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION); final List staleRootBlobs = staleRootBlobs(newRepoData, rootBlobs.keySet()); if (staleRootBlobs.isEmpty()) { groupedListener.onResponse(DeleteResult.ZERO); @@ -2049,7 +2054,7 @@ void cleanupStaleIndices( // Start as many workers as fit into the snapshot pool at once at the most final int workers = Math.min( - threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), + threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), foundIndices.size() - survivingIndexIds.size() ); for (int i = 0; i < workers; ++i) { @@ -2107,7 +2112,7 @@ private void executeOneStaleIndexDelete( return; } final String indexSnId = indexEntry.getKey(); - threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(listener, () -> { + threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.supply(listener, () -> { try { logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId); List matchingShardPaths = findMatchingShardPaths(indexSnId, snapshotShardPaths); @@ -2475,9 +2480,10 @@ public void finalizeSnapshot( repositoryUpdatePriority, ActionListener.wrap(newRepoData -> { if (writeShardGens) { - cleanupOldShardGens(existingRepositoryData, updatedRepositoryData); + cleanupOldShardGens(existingRepositoryData, updatedRepositoryData, newRepoData, listener); + } else { + listener.onResponse(newRepoData); } - listener.onResponse(newRepoData); }, onUpdateFailure) ); }, onUpdateFailure), 2 + indices.size()); @@ -2642,7 +2648,12 @@ private void logShardPathsOperationWarning(IndexId indexId, SnapshotId snapshotI } // Delete all old shard gen blobs that aren't referenced any longer as a result from moving to updated repository data - private void cleanupOldShardGens(RepositoryData existingRepositoryData, RepositoryData updatedRepositoryData) { + private void cleanupOldShardGens( + RepositoryData existingRepositoryData, + RepositoryData updatedRepositoryData, + RepositoryData newRepositoryData, + ActionListener listener + ) { final List toDelete = new ArrayList<>(); updatedRepositoryData.shardGenerations() .obsoleteShardGenerations(existingRepositoryData.shardGenerations()) @@ -2651,10 +2662,62 @@ private void cleanupOldShardGens(RepositoryData existingRepositoryData, Reposito (shardId, oldGen) -> toDelete.add(shardPath(indexId, shardId).buildAsString() + INDEX_FILE_PREFIX + oldGen) ) ); + if (toDelete.isEmpty()) { + listener.onResponse(newRepositoryData); + return; + } try { - deleteFromContainer(rootBlobContainer(), toDelete); + AtomicInteger counter = new AtomicInteger(); + Collection> subList = toDelete.stream() + .collect(Collectors.groupingBy(it -> counter.getAndIncrement() / maxShardBlobDeleteBatch)) + .values(); + final BlockingQueue> staleFilesToDeleteInBatch = new LinkedBlockingQueue<>(subList); + logger.info( + "[{}] cleanupOldShardGens toDeleteSize={} groupSize={}", + metadata.name(), + toDelete.size(), + staleFilesToDeleteInBatch.size() + ); + final GroupedActionListener groupedListener = new GroupedActionListener<>(ActionListener.wrap(r -> { + logger.info("[{}] completed cleanupOldShardGens", metadata.name()); + listener.onResponse(newRepositoryData); + }, ex -> { + logger.error(new ParameterizedMessage("[{}] exception in cleanupOldShardGens", metadata.name()), ex); + listener.onResponse(newRepositoryData); + }), staleFilesToDeleteInBatch.size()); + + // Start as many workers as fit into the snapshot pool at once at the most + final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size()); + for (int i = 0; i < workers; ++i) { + executeOldShardGensCleanup(staleFilesToDeleteInBatch, groupedListener); + } } catch (Exception e) { - logger.warn("Failed to clean up old shard generation blobs", e); + logger.warn(new ParameterizedMessage(" [{}] Failed to clean up old shard generation blobs", metadata.name()), e); + listener.onResponse(newRepositoryData); + } + } + + private void executeOldShardGensCleanup(BlockingQueue> staleFilesToDeleteInBatch, GroupedActionListener listener) + throws InterruptedException { + List filesToDelete = staleFilesToDeleteInBatch.poll(0L, TimeUnit.MILLISECONDS); + if (filesToDelete != null) { + threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> { + try { + deleteFromContainer(rootBlobContainer(), filesToDelete); + l.onResponse(null); + } catch (Exception e) { + logger.warn( + () -> new ParameterizedMessage( + "[{}] Failed to delete following blobs during cleanupOldFiles : {}", + metadata.name(), + filesToDelete + ), + e + ); + l.onFailure(e); + } + executeOldShardGensCleanup(staleFilesToDeleteInBatch, listener); + })); } } @@ -2771,10 +2834,11 @@ public long getRemoteDownloadThrottleTimeInNanos() { } protected void assertSnapshotOrGenericThread() { - assert Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT + ']') + assert Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT_DELETION + ']') + || Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT + ']') || Thread.currentThread().getName().contains('[' + ThreadPool.Names.GENERIC + ']') : "Expected current thread [" + Thread.currentThread() - + "] to be the snapshot or generic thread."; + + "] to be the snapshot_deletion or snapshot or generic thread."; } @Override diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index 38bb1a7490a58..6176f2e76d760 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -104,6 +104,7 @@ public static class Names { public static final String REFRESH = "refresh"; public static final String WARMER = "warmer"; public static final String SNAPSHOT = "snapshot"; + public static final String SNAPSHOT_DELETION = "snapshot_deletion"; public static final String FORCE_MERGE = "force_merge"; public static final String FETCH_SHARD_STARTED = "fetch_shard_started"; public static final String FETCH_SHARD_STORE = "fetch_shard_store"; @@ -175,6 +176,7 @@ public static ThreadPoolType fromType(String type) { map.put(Names.REFRESH, ThreadPoolType.SCALING); map.put(Names.WARMER, ThreadPoolType.SCALING); map.put(Names.SNAPSHOT, ThreadPoolType.SCALING); + map.put(Names.SNAPSHOT_DELETION, ThreadPoolType.SCALING); map.put(Names.FORCE_MERGE, ThreadPoolType.FIXED); map.put(Names.FETCH_SHARD_STARTED, ThreadPoolType.SCALING); map.put(Names.FETCH_SHARD_STORE, ThreadPoolType.SCALING); @@ -233,6 +235,7 @@ public ThreadPool( final int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors); final int halfProcMaxAt10 = halfAllocatedProcessorsMaxTen(allocatedProcessors); final int genericThreadPoolMax = boundedBy(4 * allocatedProcessors, 128, 512); + final int snapshotDeletionPoolMax = boundedBy(4 * allocatedProcessors, 64, 256); builders.put(Names.GENERIC, new ScalingExecutorBuilder(Names.GENERIC, 4, genericThreadPoolMax, TimeValue.timeValueSeconds(30))); builders.put(Names.WRITE, new FixedExecutorBuilder(settings, Names.WRITE, allocatedProcessors, 10000)); builders.put(Names.GET, new FixedExecutorBuilder(settings, Names.GET, allocatedProcessors, 1000)); @@ -262,6 +265,10 @@ public ThreadPool( builders.put(Names.REFRESH, new ScalingExecutorBuilder(Names.REFRESH, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5))); builders.put(Names.WARMER, new ScalingExecutorBuilder(Names.WARMER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); builders.put(Names.SNAPSHOT, new ScalingExecutorBuilder(Names.SNAPSHOT, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); + builders.put( + Names.SNAPSHOT_DELETION, + new ScalingExecutorBuilder(Names.SNAPSHOT_DELETION, 1, snapshotDeletionPoolMax, TimeValue.timeValueMinutes(5)) + ); builders.put( Names.FETCH_SHARD_STARTED, new ScalingExecutorBuilder(Names.FETCH_SHARD_STARTED, 1, 2 * allocatedProcessors, TimeValue.timeValueMinutes(5)) diff --git a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java index d8f04a11fe494..4f59f9688fb7e 100644 --- a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java +++ b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java @@ -148,6 +148,7 @@ private int expectedSize(final String threadPoolName, final int numberOfProcesso sizes.put(ThreadPool.Names.REFRESH, ThreadPool::halfAllocatedProcessorsMaxTen); sizes.put(ThreadPool.Names.WARMER, ThreadPool::halfAllocatedProcessorsMaxFive); sizes.put(ThreadPool.Names.SNAPSHOT, ThreadPool::halfAllocatedProcessorsMaxFive); + sizes.put(ThreadPool.Names.SNAPSHOT_DELETION, n -> ThreadPool.boundedBy(4 * n, 64, 256)); sizes.put(ThreadPool.Names.FETCH_SHARD_STARTED, ThreadPool::twiceAllocatedProcessors); sizes.put(ThreadPool.Names.FETCH_SHARD_STORE, ThreadPool::twiceAllocatedProcessors); sizes.put(ThreadPool.Names.TRANSLOG_TRANSFER, ThreadPool::halfAllocatedProcessors); From dcdc4d2d9704221e3103f4b899dafeea11a3ee6f Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:39:03 +0530 Subject: [PATCH 06/17] Throw CircuitBreakingException for too many shards in snashot status API call (#15688) (#15719) (cherry picked from commit f33c786a1fbf44ee48e34674ed42fefd6a8d7dbd) Signed-off-by: Lakshya Taragi Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../snapshots/SnapshotStatusApisIT.java | 19 +++++++-------- .../TransportSnapshotsStatusAction.java | 23 +++++++++++++------ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java index 5a043e69e9735..c3214022df663 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java @@ -51,6 +51,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.breaker.CircuitBreakingException; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.rest.RestStatus; import org.opensearch.index.IndexNotFoundException; @@ -602,11 +603,11 @@ public void testSnapshotStatusApiFailureForTooManyShardsAcrossSnapshots() throws // across a single snapshot assertBusy(() -> { - TooManyShardsInSnapshotsStatusException exception = expectThrows( - TooManyShardsInSnapshotsStatusException.class, + CircuitBreakingException exception = expectThrows( + CircuitBreakingException.class, () -> client().admin().cluster().prepareSnapshotStatus(repositoryName).setSnapshots(snapshot1).execute().actionGet() ); - assertEquals(exception.status(), RestStatus.REQUEST_ENTITY_TOO_LARGE); + assertEquals(exception.status(), RestStatus.TOO_MANY_REQUESTS); assertTrue( exception.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request") ); @@ -614,8 +615,8 @@ public void testSnapshotStatusApiFailureForTooManyShardsAcrossSnapshots() throws // across multiple snapshots assertBusy(() -> { - TooManyShardsInSnapshotsStatusException exception = expectThrows( - TooManyShardsInSnapshotsStatusException.class, + CircuitBreakingException exception = expectThrows( + CircuitBreakingException.class, () -> client().admin() .cluster() .prepareSnapshotStatus(repositoryName) @@ -623,7 +624,7 @@ public void testSnapshotStatusApiFailureForTooManyShardsAcrossSnapshots() throws .execute() .actionGet() ); - assertEquals(exception.status(), RestStatus.REQUEST_ENTITY_TOO_LARGE); + assertEquals(exception.status(), RestStatus.TOO_MANY_REQUESTS); assertTrue( exception.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request") ); @@ -741,8 +742,8 @@ public void testSnapshotStatusFailuresWithIndexFilter() throws Exception { updateSettingsRequest.persistentSettings(Settings.builder().put(MAX_SHARDS_ALLOWED_IN_STATUS_API.getKey(), 2)); assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); - TooManyShardsInSnapshotsStatusException ex = expectThrows( - TooManyShardsInSnapshotsStatusException.class, + CircuitBreakingException ex = expectThrows( + CircuitBreakingException.class, () -> client().admin() .cluster() .prepareSnapshotStatus(repositoryName) @@ -751,7 +752,7 @@ public void testSnapshotStatusFailuresWithIndexFilter() throws Exception { .execute() .actionGet() ); - assertEquals(ex.status(), RestStatus.REQUEST_ENTITY_TOO_LARGE); + assertEquals(ex.status(), RestStatus.TOO_MANY_REQUESTS); assertTrue(ex.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request")); logger.info("Reset MAX_SHARDS_ALLOWED_IN_STATUS_API to default value"); diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java index f2a9b88f790c9..8228cb6301c8c 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java @@ -49,6 +49,8 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.breaker.CircuitBreaker; +import org.opensearch.core.common.breaker.CircuitBreakingException; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.util.CollectionUtils; import org.opensearch.core.index.shard.ShardId; @@ -66,7 +68,6 @@ import org.opensearch.snapshots.SnapshotShardsService; import org.opensearch.snapshots.SnapshotState; import org.opensearch.snapshots.SnapshotsService; -import org.opensearch.snapshots.TooManyShardsInSnapshotsStatusException; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -458,13 +459,17 @@ private Map snapshotsInfo( snapshotsInfoMap.put(snapshotId, snapshotInfo); } if (totalShardsAcrossSnapshots > maximumAllowedShardCount && request.indices().length == 0) { - String message = "Total shard count [" + String message = "[" + + repositoryName + + ":" + + String.join(", ", request.snapshots()) + + "]" + + " Total shard count [" + totalShardsAcrossSnapshots + "] is more than the maximum allowed value of shard count [" + maximumAllowedShardCount + "] for snapshot status request"; - - throw new TooManyShardsInSnapshotsStatusException(repositoryName, message, request.snapshots()); + throw new CircuitBreakingException(message, CircuitBreaker.Durability.PERMANENT); } return unmodifiableMap(snapshotsInfoMap); } @@ -520,15 +525,19 @@ private Map snapshotShards( } if (totalShardsAcrossIndices > maximumAllowedShardCount && requestedIndexNames.isEmpty() == false && isV2Snapshot == false) { - String message = "Total shard count [" + String message = "[" + + repositoryName + + ":" + + String.join(", ", request.snapshots()) + + "]" + + " Total shard count [" + totalShardsAcrossIndices + "] across the requested indices [" + requestedIndexNames.stream().collect(Collectors.joining(", ")) + "] is more than the maximum allowed value of shard count [" + maximumAllowedShardCount + "] for snapshot status request"; - - throw new TooManyShardsInSnapshotsStatusException(repositoryName, message, snapshotName); + throw new CircuitBreakingException(message, CircuitBreaker.Durability.PERMANENT); } final Map shardStatus = new HashMap<>(); From 0d032bef1cedb97dca04a6a708b80b694046476c Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 11:31:04 -0400 Subject: [PATCH 07/17] Fix MacOS assemble workflows (#15741) (#15744) (cherry picked from commit b957eca9306dc8b96fe0bc675404b95ad68d262f) Signed-off-by: Andriy Redko Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .github/workflows/assemble.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml index 200d9cf2d788b..294627622a136 100644 --- a/.github/workflows/assemble.yml +++ b/.github/workflows/assemble.yml @@ -30,11 +30,8 @@ jobs: - name: Setup docker (missing on MacOS) id: setup_docker if: runner.os == 'macos' - uses: douglascamata/setup-docker-macos-action@main - continue-on-error: true - with: - upgrade-qemu: true - colima: v0.6.8 + run: | + exit 0; - name: Run Gradle (assemble) if: runner.os == 'macos' && steps.setup_docker.outcome != 'success' run: | @@ -48,4 +45,4 @@ jobs: - name: Run Gradle (assemble) if: runner.os == 'macos' && steps.setup_docker.outcome == 'success' run: | - ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE -Druntime.java=${{ matrix.java }} + exit 0; From 00b2fcce5e25fedd079ae2327bbc4a53e6474f4a Mon Sep 17 00:00:00 2001 From: Ganesh Krishna Ramadurai Date: Thu, 5 Sep 2024 10:18:00 -0700 Subject: [PATCH 08/17] Provide factory for pluggable deciders (#15729) Signed-off-by: Ganesh Ramadurai --- .../main/java/org/opensearch/node/Node.java | 8 +-- .../org/opensearch/plugins/SearchPlugin.java | 10 +-- .../search/DefaultSearchContext.java | 32 +++++---- .../org/opensearch/search/SearchModule.java | 24 +++---- .../org/opensearch/search/SearchService.java | 10 +-- .../deciders/ConcurrentSearchDecision.java | 2 +- ...va => ConcurrentSearchRequestDecider.java} | 36 ++++++---- .../deciders/ConcurrentSearchVisitor.java | 6 +- .../search/DefaultSearchContextTests.java | 70 +++++++++++-------- .../opensearch/search/SearchModuleTests.java | 49 ++++++------- .../java/org/opensearch/node/MockNode.java | 6 +- 11 files changed, 136 insertions(+), 117 deletions(-) rename server/src/main/java/org/opensearch/search/deciders/{ConcurrentSearchDecider.java => ConcurrentSearchRequestDecider.java} (50%) diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 42b84d8e79d88..26150bdecdcf5 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -233,7 +233,7 @@ import org.opensearch.search.aggregations.support.AggregationUsageService; import org.opensearch.search.backpressure.SearchBackpressureService; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; -import org.opensearch.search.deciders.ConcurrentSearchDecider; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.pipeline.SearchPipelineService; import org.opensearch.search.query.QueryPhase; @@ -1336,7 +1336,7 @@ protected Node( circuitBreakerService, searchModule.getIndexSearcherExecutor(threadPool), taskResourceTrackingService, - searchModule.getConcurrentSearchDeciders() + searchModule.getConcurrentSearchRequestDeciderFactories() ); final List> tasksExecutors = pluginsService.filterPlugins(PersistentTaskPlugin.class) @@ -1986,7 +1986,7 @@ protected SearchService newSearchService( CircuitBreakerService circuitBreakerService, Executor indexSearcherExecutor, TaskResourceTrackingService taskResourceTrackingService, - Collection concurrentSearchDecidersList + Collection concurrentSearchDeciderFactories ) { return new SearchService( clusterService, @@ -2000,7 +2000,7 @@ protected SearchService newSearchService( circuitBreakerService, indexSearcherExecutor, taskResourceTrackingService, - concurrentSearchDecidersList + concurrentSearchDeciderFactories ); } diff --git a/server/src/main/java/org/opensearch/plugins/SearchPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchPlugin.java index 895e6ed2971d8..60cb2184b5ab5 100644 --- a/server/src/main/java/org/opensearch/plugins/SearchPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/SearchPlugin.java @@ -65,7 +65,7 @@ import org.opensearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.opensearch.search.aggregations.pipeline.PipelineAggregator; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; -import org.opensearch.search.deciders.ConcurrentSearchDecider; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.search.fetch.subphase.highlight.Highlighter; import org.opensearch.search.query.QueryPhaseSearcher; @@ -141,12 +141,12 @@ default Map getHighlighters() { } /** - * Allows plugins to register custom decider for concurrent search - * @return A {@link ConcurrentSearchDecider} + * Allows plugins to register a factory to create custom decider for concurrent search + * @return A {@link ConcurrentSearchRequestDecider.Factory} */ @ExperimentalApi - default ConcurrentSearchDecider getConcurrentSearchDecider() { - return null; + default Optional getConcurrentSearchRequestDeciderFactory() { + return Optional.empty(); } /** diff --git a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java index 4576921b8426e..f148bfb164d7f 100644 --- a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java +++ b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java @@ -72,8 +72,8 @@ import org.opensearch.search.aggregations.SearchContextAggregations; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.collapse.CollapseContext; -import org.opensearch.search.deciders.ConcurrentSearchDecider; import org.opensearch.search.deciders.ConcurrentSearchDecision; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.deciders.ConcurrentSearchVisitor; import org.opensearch.search.dfs.DfsSearchResult; import org.opensearch.search.fetch.FetchPhase; @@ -106,13 +106,14 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Executor; import java.util.function.Function; import java.util.function.LongSupplier; -import java.util.stream.Collectors; import static org.opensearch.search.SearchService.CARDINALITY_AGGREGATION_PRUNING_THRESHOLD; import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; @@ -136,7 +137,7 @@ final class DefaultSearchContext extends SearchContext { private final ShardSearchRequest request; private final SearchShardTarget shardTarget; private final LongSupplier relativeTimeSupplier; - private final Collection concurrentSearchDeciders; + private final Collection concurrentSearchDeciderFactories; private SearchType searchType; private final BigArrays bigArrays; private final IndexShard indexShard; @@ -221,7 +222,7 @@ final class DefaultSearchContext extends SearchContext { boolean validate, Executor executor, Function requestToAggReduceContextBuilder, - Collection concurrentSearchDeciders + Collection concurrentSearchDeciderFactories ) throws IOException { this.readerContext = readerContext; this.request = request; @@ -264,7 +265,7 @@ final class DefaultSearchContext extends SearchContext { this.maxAggRewriteFilters = evaluateFilterRewriteSetting(); this.cardinalityAggregationPruningThreshold = evaluateCardinalityAggregationPruningThreshold(); - this.concurrentSearchDeciders = concurrentSearchDeciders; + this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories; } @Override @@ -928,14 +929,21 @@ public boolean shouldUseConcurrentSearch() { private boolean evaluateAutoMode() { - // filter out deciders that want to opt-out of decision-making - final Set filteredDeciders = concurrentSearchDeciders.stream() - .filter(concurrentSearchDecider -> concurrentSearchDecider.canEvaluateForIndex(indexService.getIndexSettings())) - .collect(Collectors.toSet()); + final Set concurrentSearchRequestDeciders = new HashSet<>(); + + // create the ConcurrentSearchRequestDeciders using registered factories + for (ConcurrentSearchRequestDecider.Factory deciderFactory : concurrentSearchDeciderFactories) { + final Optional concurrentSearchRequestDecider = deciderFactory.create( + indexService.getIndexSettings() + ); + concurrentSearchRequestDecider.ifPresent(concurrentSearchRequestDeciders::add); + + } + // evaluate based on concurrent search query visitor - if (filteredDeciders.size() > 0) { + if (concurrentSearchRequestDeciders.size() > 0) { ConcurrentSearchVisitor concurrentSearchVisitor = new ConcurrentSearchVisitor( - filteredDeciders, + concurrentSearchRequestDeciders, indexService.getIndexSettings() ); if (request().source() != null && request().source().query() != null) { @@ -945,7 +953,7 @@ private boolean evaluateAutoMode() { } final List decisions = new ArrayList<>(); - for (ConcurrentSearchDecider decider : filteredDeciders) { + for (ConcurrentSearchRequestDecider decider : concurrentSearchRequestDeciders) { ConcurrentSearchDecision decision = decider.getConcurrentSearchDecision(); if (decision != null) { if (logger.isDebugEnabled()) { diff --git a/server/src/main/java/org/opensearch/search/SearchModule.java b/server/src/main/java/org/opensearch/search/SearchModule.java index e9ed02828b971..b8d3a13e0df20 100644 --- a/server/src/main/java/org/opensearch/search/SearchModule.java +++ b/server/src/main/java/org/opensearch/search/SearchModule.java @@ -255,7 +255,7 @@ import org.opensearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder; import org.opensearch.search.aggregations.pipeline.SumBucketPipelineAggregator; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; -import org.opensearch.search.deciders.ConcurrentSearchDecider; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.search.fetch.subphase.ExplainPhase; @@ -334,7 +334,7 @@ public class SearchModule { private final QueryPhaseSearcher queryPhaseSearcher; private final SearchPlugin.ExecutorServiceProvider indexSearcherExecutorProvider; - private final Collection concurrentSearchDeciders; + private final Collection concurrentSearchDeciderFactories; /** * Constructs a new SearchModule object @@ -364,25 +364,23 @@ public SearchModule(Settings settings, List plugins) { queryPhaseSearcher = registerQueryPhaseSearcher(plugins); indexSearcherExecutorProvider = registerIndexSearcherExecutorProvider(plugins); namedWriteables.addAll(SortValue.namedWriteables()); - concurrentSearchDeciders = registerConcurrentSearchDeciders(plugins); + concurrentSearchDeciderFactories = registerConcurrentSearchDeciderFactories(plugins); } - private Collection registerConcurrentSearchDeciders(List plugins) { - List concurrentSearchDeciders = new ArrayList<>(); + private Collection registerConcurrentSearchDeciderFactories(List plugins) { + List concurrentSearchDeciderFactories = new ArrayList<>(); for (SearchPlugin plugin : plugins) { - ConcurrentSearchDecider decider = plugin.getConcurrentSearchDecider(); - if (decider != null) { - concurrentSearchDeciders.add(decider); - } + final Optional deciderFactory = plugin.getConcurrentSearchRequestDeciderFactory(); + deciderFactory.ifPresent(concurrentSearchDeciderFactories::add); } - return concurrentSearchDeciders; + return concurrentSearchDeciderFactories; } /** - * Returns the concurrent search deciders that the plugins have registered + * Returns the concurrent search decider factories that the plugins have registered */ - public Collection getConcurrentSearchDeciders() { - return concurrentSearchDeciders; + public Collection getConcurrentSearchRequestDeciderFactories() { + return concurrentSearchDeciderFactories; } public List getNamedWriteables() { diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index e33a47fe8e178..626f1b574871e 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -105,7 +105,7 @@ import org.opensearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.collapse.CollapseContext; -import org.opensearch.search.deciders.ConcurrentSearchDecider; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.dfs.DfsPhase; import org.opensearch.search.dfs.DfsSearchResult; import org.opensearch.search.fetch.FetchPhase; @@ -358,7 +358,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private final QueryPhase queryPhase; private final FetchPhase fetchPhase; - private final Collection concurrentSearchDeciders; + private final Collection concurrentSearchDeciderFactories; private volatile long defaultKeepAlive; @@ -404,7 +404,7 @@ public SearchService( CircuitBreakerService circuitBreakerService, Executor indexSearcherExecutor, TaskResourceTrackingService taskResourceTrackingService, - Collection concurrentSearchDeciders + Collection concurrentSearchDeciderFactories ) { Settings settings = clusterService.getSettings(); this.threadPool = threadPool; @@ -460,7 +460,7 @@ public SearchService( allowDerivedField = CLUSTER_ALLOW_DERIVED_FIELD_SETTING.get(settings); clusterService.getClusterSettings().addSettingsUpdateConsumer(CLUSTER_ALLOW_DERIVED_FIELD_SETTING, this::setAllowDerivedField); - this.concurrentSearchDeciders = concurrentSearchDeciders; + this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories; } private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) { @@ -1161,7 +1161,7 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear validate, indexSearcherExecutor, this::aggReduceContextBuilder, - concurrentSearchDeciders + concurrentSearchDeciderFactories ); // we clone the query shard context here just for rewriting otherwise we // might end up with incorrect state since we are using now() or script services diff --git a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecision.java b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecision.java index 2a30413eff9c8..4ac47221856d1 100644 --- a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecision.java +++ b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecision.java @@ -13,7 +13,7 @@ import java.util.Collection; /** - * This Class defines the decisions that a {@link ConcurrentSearchDecider#getConcurrentSearchDecision} can return. + * This Class defines the decisions that a {@link ConcurrentSearchRequestDecider#getConcurrentSearchDecision} can return. * */ @ExperimentalApi diff --git a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecider.java b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchRequestDecider.java similarity index 50% rename from server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecider.java rename to server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchRequestDecider.java index 9c588bb45b4ec..ec40527314454 100644 --- a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecider.java +++ b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchRequestDecider.java @@ -12,17 +12,21 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.query.QueryBuilder; +import java.util.Optional; + /** - * {@link ConcurrentSearchDecider} allows pluggable way to evaluate if a query in the search request + * {@link ConcurrentSearchRequestDecider} allows pluggable way to evaluate if a query in the search request * can use concurrent segment search using the passed in queryBuilders from query tree and index settings * on a per shard request basis. - * Implementations can also opt out of the evaluation process for certain indices based on the index settings. - * For all the deciders which can evaluate query tree for an index, its evaluateForQuery method - * will be called for each node in the query tree. After traversing of the query tree is completed, the final - * decision from the deciders will be obtained using {@link ConcurrentSearchDecider#getConcurrentSearchDecision} + * Implementations will need to implement the Factory interface that can be used to create the ConcurrentSearchRequestDecider + * This factory will be called on each shard search request to create the ConcurrentSearchRequestDecider and get the + * concurrent search decision from the created decider on a per-request basis. + * For all the deciders the evaluateForQuery method will be called for each node in the query tree. + * After traversing of the query tree is completed, the final decision from the deciders will be + * obtained using {@link ConcurrentSearchRequestDecider#getConcurrentSearchDecision} */ @ExperimentalApi -public abstract class ConcurrentSearchDecider { +public abstract class ConcurrentSearchRequestDecider { /** * Evaluate for the passed in queryBuilder node in the query tree of the search request @@ -31,14 +35,6 @@ public abstract class ConcurrentSearchDecider { */ public abstract void evaluateForQuery(QueryBuilder queryBuilder, IndexSettings indexSettings); - /** - * Provides a way for deciders to opt out of decision-making process for certain requests based on - * index settings. - * Return true if interested in decision making for index, - * false, otherwise - */ - public abstract boolean canEvaluateForIndex(IndexSettings indexSettings); - /** * Provide the final decision for concurrent search based on all evaluations * Plugins may need to maintain internal state of evaluations to provide a final decision @@ -47,4 +43,16 @@ public abstract class ConcurrentSearchDecider { */ public abstract ConcurrentSearchDecision getConcurrentSearchDecision(); + /** + * Factory interface that can be implemented to create the ConcurrentSearchRequestDecider object. + * Implementations can use the passed in indexSettings to decide whether to create the decider object or + * return {@link Optional#empty()}. + */ + @ExperimentalApi + public interface Factory { + default Optional create(IndexSettings indexSettings) { + return Optional.empty(); + } + } + } diff --git a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchVisitor.java b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchVisitor.java index 12ba1b2a9cc5f..d1a4fa982dc7e 100644 --- a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchVisitor.java +++ b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchVisitor.java @@ -19,15 +19,15 @@ /** * Class to traverse the QueryBuilder tree and invoke the - * {@link ConcurrentSearchDecider#evaluateForQuery} at each node of the query tree + * {@link ConcurrentSearchRequestDecider#evaluateForQuery} at each node of the query tree */ @ExperimentalApi public class ConcurrentSearchVisitor implements QueryBuilderVisitor { - private final Set deciders; + private final Set deciders; private final IndexSettings indexSettings; - public ConcurrentSearchVisitor(Set concurrentSearchVisitorDeciders, IndexSettings idxSettings) { + public ConcurrentSearchVisitor(Set concurrentSearchVisitorDeciders, IndexSettings idxSettings) { Objects.requireNonNull(concurrentSearchVisitorDeciders, "Concurrent search deciders cannot be null"); deciders = concurrentSearchVisitorDeciders; indexSettings = idxSettings; diff --git a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java index 491a0377ab32e..a56b9c860c85c 100644 --- a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java +++ b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java @@ -76,8 +76,8 @@ import org.opensearch.search.aggregations.MultiBucketConsumerService; import org.opensearch.search.aggregations.SearchContextAggregations; import org.opensearch.search.builder.SearchSourceBuilder; -import org.opensearch.search.deciders.ConcurrentSearchDecider; import org.opensearch.search.deciders.ConcurrentSearchDecision; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.internal.AliasFilter; import org.opensearch.search.internal.LegacyReaderContext; import org.opensearch.search.internal.PitReaderContext; @@ -96,6 +96,8 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -988,14 +990,34 @@ protected Engine.Searcher acquireSearcherInternal(String source) { // Case4: multiple deciders are registered and all of them opt out of decision-making // with supported agg query so concurrent path is used - ConcurrentSearchDecider decider1 = mock(ConcurrentSearchDecider.class); - when(decider1.canEvaluateForIndex(any())).thenReturn(false); - ConcurrentSearchDecider decider2 = mock(ConcurrentSearchDecider.class); - when(decider2.canEvaluateForIndex(any())).thenReturn(false); + ConcurrentSearchRequestDecider decider1 = mock(ConcurrentSearchRequestDecider.class); - Collection concurrentSearchDeciders = new ArrayList<>(); - concurrentSearchDeciders.add(decider1); - concurrentSearchDeciders.add(decider2); + ConcurrentSearchRequestDecider decider2 = mock(ConcurrentSearchRequestDecider.class); + + ConcurrentSearchRequestDecider.Factory factory1 = new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.ofNullable(decider1); + } + }; + + ConcurrentSearchRequestDecider.Factory factory2 = new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.ofNullable(decider2); + } + }; + ConcurrentSearchRequestDecider.Factory factory3 = new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.empty(); + } + }; + + List concurrentSearchRequestDeciders = new ArrayList<>(); + concurrentSearchRequestDeciders.add(factory1); + concurrentSearchRequestDeciders.add(factory2); + concurrentSearchRequestDeciders.add(factory3); context = new DefaultSearchContext( readerContext, @@ -1011,7 +1033,7 @@ protected Engine.Searcher acquireSearcherInternal(String source) { false, executor, null, - concurrentSearchDeciders + concurrentSearchRequestDeciders ); // create a supported agg operation context.aggregations(mockAggregations); @@ -1025,15 +1047,9 @@ protected Engine.Searcher acquireSearcherInternal(String source) { // Case5: multiple deciders are registered and one of them returns ConcurrentSearchDecision.DecisionStatus.NO // use non-concurrent path even if query contains supported agg - when(decider1.canEvaluateForIndex(any())).thenReturn(true); when(decider1.getConcurrentSearchDecision()).thenReturn( new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO, "disable concurrent search") ); - when(decider2.canEvaluateForIndex(any())).thenReturn(false); - - concurrentSearchDeciders.clear(); - concurrentSearchDeciders.add(decider1); - concurrentSearchDeciders.add(decider2); // create a source so that query tree is parsed by visitor SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); @@ -1055,7 +1071,7 @@ protected Engine.Searcher acquireSearcherInternal(String source) { false, executor, null, - concurrentSearchDeciders + concurrentSearchRequestDeciders ); // create a supported agg operation @@ -1071,20 +1087,17 @@ protected Engine.Searcher acquireSearcherInternal(String source) { // Case6: multiple deciders are registered and first decider returns ConcurrentSearchDecision.DecisionStatus.YES // while second decider returns ConcurrentSearchDecision.DecisionStatus.NO // use non-concurrent path even if query contains supported agg - when(decider1.canEvaluateForIndex(any())).thenReturn(true); + when(decider1.getConcurrentSearchDecision()).thenReturn( new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.YES, "enable concurrent search") ); - when(decider2.canEvaluateForIndex(any())).thenReturn(true); + when(decider2.getConcurrentSearchDecision()).thenReturn( new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO, "disable concurrent search") ); - concurrentSearchDeciders.clear(); - concurrentSearchDeciders.add(decider1); - concurrentSearchDeciders.add(decider2); - // create a source so that query tree is parsed by visitor + when(shardSearchRequest.source()).thenReturn(sourceBuilder); context = new DefaultSearchContext( readerContext, @@ -1100,7 +1113,7 @@ protected Engine.Searcher acquireSearcherInternal(String source) { false, executor, null, - concurrentSearchDeciders + concurrentSearchRequestDeciders ); // create a supported agg operation @@ -1115,22 +1128,19 @@ protected Engine.Searcher acquireSearcherInternal(String source) { // Case7: multiple deciders are registered and all return ConcurrentSearchDecision.DecisionStatus.NO_OP // but un-supported agg query is present, use non-concurrent path - when(decider1.canEvaluateForIndex(any())).thenReturn(true); + when(decider1.getConcurrentSearchDecision()).thenReturn( new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO_OP, "noop") ); - when(decider2.canEvaluateForIndex(any())).thenReturn(true); + when(decider2.getConcurrentSearchDecision()).thenReturn( new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO_OP, "noop") ); when(mockAggregations.factories().allFactoriesSupportConcurrentSearch()).thenReturn(false); - concurrentSearchDeciders.clear(); - concurrentSearchDeciders.add(decider1); - concurrentSearchDeciders.add(decider2); - // create a source so that query tree is parsed by visitor + when(shardSearchRequest.source()).thenReturn(sourceBuilder); context = new DefaultSearchContext( readerContext, @@ -1146,7 +1156,7 @@ protected Engine.Searcher acquireSearcherInternal(String source) { false, executor, null, - concurrentSearchDeciders + concurrentSearchRequestDeciders ); // create a supported agg operation diff --git a/server/src/test/java/org/opensearch/search/SearchModuleTests.java b/server/src/test/java/org/opensearch/search/SearchModuleTests.java index b3483b76dee1c..81b7ca8aef30b 100644 --- a/server/src/test/java/org/opensearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/opensearch/search/SearchModuleTests.java @@ -41,6 +41,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.IndexSettings; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryRewriteContext; import org.opensearch.index.query.QueryShardContext; @@ -70,7 +71,7 @@ import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; import org.opensearch.search.aggregations.support.ValuesSourceType; -import org.opensearch.search.deciders.ConcurrentSearchDecider; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.search.fetch.subphase.ExplainPhase; import org.opensearch.search.fetch.subphase.highlight.CustomHighlighter; @@ -508,12 +509,12 @@ public Optional getIndexSearcherExecutorProvider() { expectThrows(IllegalStateException.class, () -> new SearchModule(Settings.EMPTY, searchPlugins)); } - public void testRegisterConcurrentSearchDecidersNoExternalPlugins() { + public void testRegisterConcurrentSearchRequestDecidersNoExternalPlugins() { SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList()); - assertEquals(searchModule.getConcurrentSearchDeciders().size(), 0); + assertEquals(searchModule.getConcurrentSearchRequestDeciderFactories().size(), 0); } - public void testRegisterConcurrentSearchDecidersExternalPluginsWithNoDeciders() { + public void testRegisterConcurrentSearchRequestDecidersExternalPluginsWithNoDeciders() { SearchPlugin plugin1 = new SearchPlugin() { @Override public Optional getIndexSearcherExecutorProvider() { @@ -528,10 +529,10 @@ public Optional getIndexSearcherExecutorProvider() { searchPlugins.add(plugin2); SearchModule searchModule = new SearchModule(Settings.EMPTY, searchPlugins); - assertEquals(searchModule.getConcurrentSearchDeciders().size(), 0); + assertEquals(searchModule.getConcurrentSearchRequestDeciderFactories().size(), 0); } - public void testRegisterConcurrentSearchDecidersExternalPluginsWithDeciders() { + public void testRegisterConcurrentSearchRequestDecidersExternalPluginsWithDeciders() { SearchPlugin pluginDecider1 = new SearchPlugin() { @Override public Optional getIndexSearcherExecutorProvider() { @@ -539,15 +540,25 @@ public Optional getIndexSearcherExecutorProvider() { } @Override - public ConcurrentSearchDecider getConcurrentSearchDecider() { - return mock(ConcurrentSearchDecider.class); + public Optional getConcurrentSearchRequestDeciderFactory() { + return Optional.of(new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.of(mock(ConcurrentSearchRequestDecider.class)); + } + }); } }; SearchPlugin pluginDecider2 = new SearchPlugin() { @Override - public ConcurrentSearchDecider getConcurrentSearchDecider() { - return mock(ConcurrentSearchDecider.class); + public Optional getConcurrentSearchRequestDeciderFactory() { + return Optional.of(new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.of(mock(ConcurrentSearchRequestDecider.class)); + } + }); } }; @@ -556,23 +567,7 @@ public ConcurrentSearchDecider getConcurrentSearchDecider() { searchPlugins.add(pluginDecider2); SearchModule searchModule = new SearchModule(Settings.EMPTY, searchPlugins); - assertEquals(searchModule.getConcurrentSearchDeciders().size(), 2); - } - - public void testRegisterConcurrentSearchDecidersPluginWithNullDecider() { - SearchPlugin pluginWithNullDecider = new SearchPlugin() { - @Override - public ConcurrentSearchDecider getConcurrentSearchDecider() { - return null; - } - }; - - List searchPlugins = new ArrayList<>(); - searchPlugins.add(pluginWithNullDecider); - SearchModule searchModule = new SearchModule(Settings.EMPTY, searchPlugins); - // null decider is filtered out, so 0 deciders - assertEquals(searchModule.getConcurrentSearchDeciders().size(), 0); - + assertEquals(searchModule.getConcurrentSearchRequestDeciderFactories().size(), 2); } private static final String[] NON_DEPRECATED_QUERIES = new String[] { diff --git a/test/framework/src/main/java/org/opensearch/node/MockNode.java b/test/framework/src/main/java/org/opensearch/node/MockNode.java index 09df9b85320f0..97c06962ca2e7 100644 --- a/test/framework/src/main/java/org/opensearch/node/MockNode.java +++ b/test/framework/src/main/java/org/opensearch/node/MockNode.java @@ -57,7 +57,7 @@ import org.opensearch.script.ScriptService; import org.opensearch.search.MockSearchService; import org.opensearch.search.SearchService; -import org.opensearch.search.deciders.ConcurrentSearchDecider; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.query.QueryPhase; import org.opensearch.tasks.TaskResourceTrackingService; @@ -158,7 +158,7 @@ protected SearchService newSearchService( CircuitBreakerService circuitBreakerService, Executor indexSearcherExecutor, TaskResourceTrackingService taskResourceTrackingService, - Collection concurrentSearchDecidersList + Collection concurrentSearchDeciderFactories ) { if (getPluginsService().filterPlugins(MockSearchService.TestPlugin.class).isEmpty()) { return super.newSearchService( @@ -173,7 +173,7 @@ protected SearchService newSearchService( circuitBreakerService, indexSearcherExecutor, taskResourceTrackingService, - concurrentSearchDecidersList + concurrentSearchDeciderFactories ); } return new MockSearchService( From bb0a49758df8380eab748aefeafb5a69ae5cf59f Mon Sep 17 00:00:00 2001 From: Harsha Vamsi Kalluri Date: Thu, 5 Sep 2024 11:49:08 -0700 Subject: [PATCH 09/17] [Backport 2.x] Add new cluster setting for keyword indexordocvalues query (#15637) (#15703) --------- Signed-off-by: Harsha Vamsi Kalluri --- CHANGELOG.md | 1 + .../common/settings/ClusterSettings.java | 1 + .../org/opensearch/index/IndexService.java | 22 ++++++- .../index/mapper/KeywordFieldMapper.java | 15 +++++ .../index/query/QueryShardContext.java | 66 ++++++++++++++++++- .../search/DefaultSearchContext.java | 18 ++++- .../org/opensearch/search/SearchService.java | 8 +++ .../search/internal/SearchContext.java | 5 ++ .../index/mapper/KeywordFieldTypeTests.java | 8 +-- .../index/search/NestedHelperTests.java | 10 +-- .../search/DefaultSearchContextTests.java | 20 +++--- .../index/mapper/FieldTypeTestCase.java | 8 ++- 12 files changed, 153 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 886c082053fd6..5a8391dd79d5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) - Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) - Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) +- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) ### Dependencies - Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index f0ff79f5c74f6..89ef00e2f30a9 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -551,6 +551,7 @@ public void apply(Settings value, Settings current, Settings previous) { SearchService.MAX_AGGREGATION_REWRITE_FILTERS, SearchService.INDICES_MAX_CLAUSE_COUNT_SETTING, SearchService.CARDINALITY_AGGREGATION_PRUNING_THRESHOLD, + SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED, CreatePitController.PIT_INIT_KEEP_ALIVE, Node.WRITE_PORTS_FILE_SETTING, Node.NODE_NAME_SETTING, diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 6e67c8f83ee4c..83a53a48ec936 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -959,7 +959,8 @@ public QueryShardContext newQueryShardContext( IndexSearcher searcher, LongSupplier nowInMillis, String clusterAlias, - boolean validate + boolean validate, + boolean keywordIndexOrDocValuesEnabled ) { final SearchIndexNameMatcher indexNameMatcher = new SearchIndexNameMatcher( index().getName(), @@ -985,10 +986,27 @@ public QueryShardContext newQueryShardContext( indexNameMatcher, allowExpensiveQueries, valuesSourceRegistry, - validate + validate, + keywordIndexOrDocValuesEnabled ); } + /** + * Creates a new QueryShardContext. + *

+ * Passing a {@code null} {@link IndexSearcher} will return a valid context, however it won't be able to make + * {@link IndexReader}-specific optimizations, such as rewriting containing range queries. + */ + public QueryShardContext newQueryShardContext( + int shardId, + IndexSearcher searcher, + LongSupplier nowInMillis, + String clusterAlias, + boolean validate + ) { + return newQueryShardContext(shardId, searcher, nowInMillis, clusterAlias, validate, false); + } + /** * The {@link ThreadPool} to use for this index. */ diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 2116ac522b705..11ff601b3fd6d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -392,6 +392,9 @@ public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); // has index and doc_values enabled if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.termsQuery(values, context); + } BytesRef[] bytesRefs = new BytesRef[values.size()]; for (int i = 0; i < bytesRefs.length; i++) { bytesRefs[i] = indexedValueForSearch(values.get(i)); @@ -429,6 +432,9 @@ public Query prefixQuery( } failIfNotIndexedAndNoDocValues(); if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.prefixQuery(value, method, caseInsensitive, context); + } Query indexQuery = super.prefixQuery(value, method, caseInsensitive, context); Query dvQuery = super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context); return new IndexOrDocValuesQuery(indexQuery, dvQuery); @@ -461,6 +467,9 @@ public Query regexpQuery( } failIfNotIndexedAndNoDocValues(); if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + } Query indexQuery = super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); Query dvQuery = super.regexpQuery( value, @@ -549,6 +558,9 @@ public Query fuzzyQuery( ); } if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context); + } Query indexQuery = super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context); Query dvQuery = super.fuzzyQuery( value, @@ -591,6 +603,9 @@ public Query wildcardQuery( // wildcard // query text if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.wildcardQuery(value, method, caseInsensitive, true, context); + } Query indexQuery = super.wildcardQuery(value, method, caseInsensitive, true, context); Query dvQuery = super.wildcardQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, true, context); return new IndexOrDocValuesQuery(indexQuery, dvQuery); diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java index 05c07a0f32d5a..cc2d53cebbf69 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java @@ -126,6 +126,7 @@ public class QueryShardContext extends QueryRewriteContext { private final ValuesSourceRegistry valuesSourceRegistry; private BitSetProducer parentFilter; private DerivedFieldResolver derivedFieldResolver; + private boolean keywordIndexOrDocValuesEnabled; public QueryShardContext( int shardId, @@ -209,7 +210,55 @@ public QueryShardContext( ), allowExpensiveQueries, valuesSourceRegistry, - validate + validate, + false + ); + } + + public QueryShardContext( + int shardId, + IndexSettings indexSettings, + BigArrays bigArrays, + BitsetFilterCache bitsetFilterCache, + TriFunction, IndexFieldData> indexFieldDataLookup, + MapperService mapperService, + SimilarityService similarityService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + NamedWriteableRegistry namedWriteableRegistry, + Client client, + IndexSearcher searcher, + LongSupplier nowInMillis, + String clusterAlias, + Predicate indexNameMatcher, + BooleanSupplier allowExpensiveQueries, + ValuesSourceRegistry valuesSourceRegistry, + boolean validate, + boolean keywordIndexOrDocValuesEnabled + ) { + this( + shardId, + indexSettings, + bigArrays, + bitsetFilterCache, + indexFieldDataLookup, + mapperService, + similarityService, + scriptService, + xContentRegistry, + namedWriteableRegistry, + client, + searcher, + nowInMillis, + indexNameMatcher, + new Index( + RemoteClusterAware.buildRemoteIndexName(clusterAlias, indexSettings.getIndex().getName()), + indexSettings.getIndex().getUUID() + ), + allowExpensiveQueries, + valuesSourceRegistry, + validate, + keywordIndexOrDocValuesEnabled ); } @@ -232,7 +281,8 @@ public QueryShardContext(QueryShardContext source) { source.fullyQualifiedIndex, source.allowExpensiveQueries, source.valuesSourceRegistry, - source.validate() + source.validate(), + source.keywordIndexOrDocValuesEnabled ); } @@ -254,7 +304,8 @@ private QueryShardContext( Index fullyQualifiedIndex, BooleanSupplier allowExpensiveQueries, ValuesSourceRegistry valuesSourceRegistry, - boolean validate + boolean validate, + boolean keywordIndexOrDocValuesEnabled ) { super(xContentRegistry, namedWriteableRegistry, client, nowInMillis, validate); this.shardId = shardId; @@ -278,6 +329,7 @@ private QueryShardContext( emptyList(), indexSettings.isDerivedFieldAllowed() ); + this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled; } private void reset() { @@ -425,6 +477,14 @@ public MappedFieldType getDerivedFieldType(String fieldName) { throw new UnsupportedOperationException("Use resolveDerivedFieldType() instead."); } + public boolean keywordFieldIndexOrDocValuesEnabled() { + return keywordIndexOrDocValuesEnabled; + } + + public void setKeywordFieldIndexOrDocValuesEnabled(boolean keywordIndexOrDocValuesEnabled) { + this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled; + } + public void setAllowUnmappedFields(boolean allowUnmappedFields) { this.allowUnmappedFields = allowUnmappedFields; } diff --git a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java index f148bfb164d7f..d8fb89921213e 100644 --- a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java +++ b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java @@ -121,6 +121,7 @@ import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; +import static org.opensearch.search.SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED; import static org.opensearch.search.SearchService.MAX_AGGREGATION_REWRITE_FILTERS; /** @@ -207,6 +208,7 @@ final class DefaultSearchContext extends SearchContext { private final SetOnce requestShouldUseConcurrentSearch = new SetOnce<>(); private final int maxAggRewriteFilters; private final int cardinalityAggregationPruningThreshold; + private final boolean keywordIndexOrDocValuesEnabled; DefaultSearchContext( ReaderContext readerContext, @@ -257,7 +259,8 @@ final class DefaultSearchContext extends SearchContext { this.searcher, request::nowInMillis, shardTarget.getClusterAlias(), - validate + validate, + evaluateKeywordIndexOrDocValuesEnabled() ); queryBoost = request.indexBoost(); this.lowLevelCancellation = lowLevelCancellation; @@ -265,6 +268,7 @@ final class DefaultSearchContext extends SearchContext { this.maxAggRewriteFilters = evaluateFilterRewriteSetting(); this.cardinalityAggregationPruningThreshold = evaluateCardinalityAggregationPruningThreshold(); + this.keywordIndexOrDocValuesEnabled = evaluateKeywordIndexOrDocValuesEnabled(); this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories; } @@ -1125,10 +1129,22 @@ public int cardinalityAggregationPruningThreshold() { return cardinalityAggregationPruningThreshold; } + @Override + public boolean keywordIndexOrDocValuesEnabled() { + return keywordIndexOrDocValuesEnabled; + } + private int evaluateCardinalityAggregationPruningThreshold() { if (clusterService != null) { return clusterService.getClusterSettings().get(CARDINALITY_AGGREGATION_PRUNING_THRESHOLD); } return 0; } + + public boolean evaluateKeywordIndexOrDocValuesEnabled() { + if (clusterService != null) { + return clusterService.getClusterSettings().get(KEYWORD_INDEX_OR_DOC_VALUES_ENABLED); + } + return false; + } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 626f1b574871e..f8783c4b3c2da 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -338,6 +338,13 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv Property.NodeScope ); + public static final Setting KEYWORD_INDEX_OR_DOC_VALUES_ENABLED = Setting.boolSetting( + "search.keyword_index_or_doc_values_enabled", + false, + Property.Dynamic, + Property.NodeScope + ); + public static final int DEFAULT_SIZE = 10; public static final int DEFAULT_FROM = 0; @@ -1174,6 +1181,7 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear context.getIndexSettings().isDerivedFieldAllowed() && allowDerivedField ); context.setDerivedFieldResolver(derivedFieldResolver); + context.setKeywordFieldIndexOrDocValuesEnabled(searchContext.keywordIndexOrDocValuesEnabled()); searchContext.getQueryShardContext().setDerivedFieldResolver(derivedFieldResolver); Rewriteable.rewrite(request.getRewriteable(), context, true); assert searchContext.getQueryShardContext().isCacheable(); diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index fb822bcf39619..4b1b720a1aed7 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -530,4 +530,9 @@ public int maxAggRewriteFilters() { public int cardinalityAggregationPruningThreshold() { return 0; } + + public boolean keywordIndexOrDocValuesEnabled() { + return false; + } + } diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index b10035f54a0c0..f291b864beb59 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -136,7 +136,7 @@ public void testTermsQuery() { new TermInSetQuery("field", terms), new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms) ); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), null)); + assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); Query expectedIndex = new TermInSetQuery("field", terms); @@ -225,7 +225,7 @@ public void testRegexpQuery() { new RegexpQuery(new Term("field", "foo.*")), new RegexpQuery(new Term("field", "foo.*"), 0, 0, RegexpQuery.DEFAULT_PROVIDER, 10, MultiTermQuery.DOC_VALUES_REWRITE) ), - ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) + ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); Query indexExpected = new RegexpQuery(new Term("field", "foo.*")); @@ -267,7 +267,7 @@ public void testFuzzyQuery() { new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE) ), - ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC) + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); Query indexExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true); @@ -308,7 +308,7 @@ public void testWildCardQuery() { MultiTermQuery.DOC_VALUES_REWRITE ) ); - assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); Query indexExpected = new WildcardQuery(new Term("field", new BytesRef("foo*"))); MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); diff --git a/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java b/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java index 7ffcc0fb7437a..f7f921e824490 100644 --- a/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java +++ b/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java @@ -57,6 +57,8 @@ import java.io.IOException; import java.util.Collections; +import static org.opensearch.index.mapper.FieldTypeTestCase.MOCK_QSC_ENABLE_INDEX_DOC_VALUES; + public class NestedHelperTests extends OpenSearchSingleNodeTestCase { IndexService indexService; @@ -132,28 +134,28 @@ public void testMatchNo() { } public void testTermsQuery() { - Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), null); + Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertFalse(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); diff --git a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java index a56b9c860c85c..55b30d5068daa 100644 --- a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java +++ b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java @@ -170,9 +170,8 @@ public void testPreProcess() throws Exception { when(indexCache.query()).thenReturn(queryCache); when(indexService.cache()).thenReturn(indexCache); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); MapperService mapperService = mock(MapperService.class); when(mapperService.hasNested()).thenReturn(randomBoolean()); when(indexService.mapperService()).thenReturn(mapperService); @@ -503,9 +502,8 @@ public void testClearQueryCancellationsOnClose() throws IOException { IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); Settings settings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) @@ -600,9 +598,8 @@ public void testSearchPathEvaluation() throws Exception { IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); IndexMetadata indexMetadata = IndexMetadata.builder("index").settings(settings).build(); IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY); @@ -832,9 +829,8 @@ public void testSearchPathEvaluationWithConcurrentSearchModeAsAuto() throws Exce IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); IndexMetadata indexMetadata = IndexMetadata.builder("index").settings(settings).build(); IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY); diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java index 7ed0da8509fab..5d85844f3218d 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java @@ -46,16 +46,18 @@ /** Base test case for subclasses of MappedFieldType */ public abstract class FieldTypeTestCase extends OpenSearchTestCase { - public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true); - public static final QueryShardContext MOCK_QSC_DISALLOW_EXPENSIVE = createMockQueryShardContext(false); + public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true, false); + public static final QueryShardContext MOCK_QSC_DISALLOW_EXPENSIVE = createMockQueryShardContext(false, false); + public static final QueryShardContext MOCK_QSC_ENABLE_INDEX_DOC_VALUES = createMockQueryShardContext(true, true); protected QueryShardContext randomMockShardContext() { return randomFrom(MOCK_QSC, MOCK_QSC_DISALLOW_EXPENSIVE); } - static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries) { + static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries, boolean keywordIndexOrDocValuesEnabled) { QueryShardContext queryShardContext = mock(QueryShardContext.class); when(queryShardContext.allowExpensiveQueries()).thenReturn(allowExpensiveQueries); + when(queryShardContext.keywordFieldIndexOrDocValuesEnabled()).thenReturn(keywordIndexOrDocValuesEnabled); return queryShardContext; } From f85ff03fa8eeb3ec92a8649b1679b1eb4b5df45c Mon Sep 17 00:00:00 2001 From: Rishikesh <62345295+Rishikesh1159@users.noreply.github.com> Date: Thu, 5 Sep 2024 12:31:21 -0700 Subject: [PATCH 10/17] [Backport 2.x] Add support for randomizing Remote Store enabled testing (#13845) * backport #12488 to 2.x branch. Signed-off-by: Rishikesh1159 * Fix indentation. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 Signed-off-by: Rishikesh <62345295+Rishikesh1159@users.noreply.github.com> --- .../recovery/ReplicaToPrimaryPromotionIT.java | 7 +++- .../test/OpenSearchIntegTestCase.java | 32 ++++++++++++++++--- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/ReplicaToPrimaryPromotionIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/ReplicaToPrimaryPromotionIT.java index 3df4ecff5250c..a2543f0592145 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/ReplicaToPrimaryPromotionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/ReplicaToPrimaryPromotionIT.java @@ -56,6 +56,11 @@ protected int numberOfReplicas() { return 1; } + @Override + public boolean useRandomReplicationStrategy() { + return true; + } + public void testPromoteReplicaToPrimary() throws Exception { final String indexName = randomAlphaOfLength(5).toLowerCase(Locale.ROOT); createIndex(indexName); @@ -65,7 +70,7 @@ public void testPromoteReplicaToPrimary() throws Exception { try (BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), numOfDocs)) { waitForDocs(numOfDocs, indexer); } - refresh(indexName); + refreshAndWaitForReplication(indexName); } assertHitCount(client().prepareSearch(indexName).setSize(0).get(), numOfDocs); diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 705610829cc9a..a63abdeb626b7 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -391,6 +391,14 @@ public abstract class OpenSearchIntegTestCase extends OpenSearchTestCase { */ public static final String TESTS_CLUSTER_NAME = "tests.clustername"; + protected static final String REMOTE_BACKED_STORAGE_REPOSITORY_NAME = "test-remote-store-repo"; + + private Path remoteStoreRepositoryPath; + + private ReplicationType randomReplicationType; + + private String randomStorageType; + /** * The lucene_default {@link Codec} is not added to the list as it internally maps to Asserting {@link Codec}. * The override to fetch the {@link CompletionFieldMapper.CompletionFieldType} postings format is not available for this codec. @@ -1985,11 +1993,19 @@ protected Settings nodeSettings(int nodeOrdinal) { builder.put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true); } - // Randomly set a replication strategy for the node. Replication Strategy can still be manually overridden by subclass if needed. + // Randomly set a Replication Strategy and storage type for the node. Both Replication Strategy and Storage Type can still be + // manually overridden by subclass if needed. if (useRandomReplicationStrategy()) { - ReplicationType replicationType = randomBoolean() ? ReplicationType.DOCUMENT : ReplicationType.SEGMENT; - logger.info("Randomly using Replication Strategy as {}.", replicationType.toString()); - builder.put(CLUSTER_REPLICATION_TYPE_SETTING.getKey(), replicationType); + if (randomReplicationType.equals(ReplicationType.SEGMENT) && randomStorageType.equals("REMOTE_STORE")) { + logger.info("Randomly using Replication Strategy as {} and Storage Type as {}.", randomReplicationType, randomStorageType); + if (remoteStoreRepositoryPath == null) { + remoteStoreRepositoryPath = randomRepoPath().toAbsolutePath(); + } + builder.put(remoteStoreClusterSettings(REMOTE_BACKED_STORAGE_REPOSITORY_NAME, remoteStoreRepositoryPath)); + } else { + logger.info("Randomly using Replication Strategy as {} and Storage Type as {}.", randomReplicationType, randomStorageType); + builder.put(CLUSTER_REPLICATION_TYPE_SETTING.getKey(), randomReplicationType); + } } return builder.build(); } @@ -2042,6 +2058,14 @@ protected boolean ignoreExternalCluster() { } protected TestCluster buildTestCluster(Scope scope, long seed) throws IOException { + if (useRandomReplicationStrategy()) { + randomReplicationType = randomBoolean() ? ReplicationType.DOCUMENT : ReplicationType.SEGMENT; + if (randomReplicationType.equals(ReplicationType.SEGMENT)) { + randomStorageType = randomBoolean() ? "REMOTE_STORE" : "LOCAL"; + } else { + randomStorageType = "LOCAL"; + } + } String clusterAddresses = System.getProperty(TESTS_CLUSTER); if (Strings.hasLength(clusterAddresses) && ignoreExternalCluster() == false) { if (scope == Scope.TEST) { From f2ecd3e763598e72c604cc1a0c616bae7dda1940 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 16:06:03 -0400 Subject: [PATCH 11/17] Bump com.azure:azure-identity from 1.13.0 to 1.13.2 in /plugins/repository-azure (#15578) (#15755) * Bump com.azure:azure-identity in /plugins/repository-azure Bumps [com.azure:azure-identity](https://github.com/Azure/azure-sdk-for-java) from 1.13.0 to 1.13.2. - [Release notes](https://github.com/Azure/azure-sdk-for-java/releases) - [Commits](https://github.com/Azure/azure-sdk-for-java/compare/azure-core_1.13.0...azure-identity_1.13.2) --- updated-dependencies: - dependency-name: com.azure:azure-identity dependency-type: direct:production update-type: version-update:semver-patch ... * Updating SHAs * Update changelog --------- (cherry picked from commit e5e8504acdf1339f53dced4c00f5c9cd91b95f2c) Signed-off-by: dependabot[bot] Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + plugins/repository-azure/build.gradle | 2 +- .../repository-azure/licenses/azure-identity-1.13.0.jar.sha1 | 1 - .../repository-azure/licenses/azure-identity-1.13.2.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 plugins/repository-azure/licenses/azure-identity-1.13.0.jar.sha1 create mode 100644 plugins/repository-azure/licenses/azure-identity-1.13.2.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a8391dd79d5f..4dbc667fcdbbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.netflix.nebula.ospackage-base` from 11.9.1 to 11.10.0 ([#15419](https://github.com/opensearch-project/OpenSearch/pull/15419)) - Bump `org.roaringbitmap:RoaringBitmap` from 1.1.0 to 1.2.1 ([#15423](https://github.com/opensearch-project/OpenSearch/pull/15423)) - Bump `icu4j` from 70.1 to 75.1 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) +- Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578)) ### Changed - Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) diff --git a/plugins/repository-azure/build.gradle b/plugins/repository-azure/build.gradle index e5d3bd215f8cb..e3dd8c1bd0dfa 100644 --- a/plugins/repository-azure/build.gradle +++ b/plugins/repository-azure/build.gradle @@ -57,7 +57,7 @@ dependencies { api "io.netty:netty-transport-native-unix-common:${versions.netty}" implementation project(':modules:transport-netty4') api 'com.azure:azure-storage-blob:12.23.0' - api 'com.azure:azure-identity:1.13.0' + api 'com.azure:azure-identity:1.13.2' // Start of transitive dependencies for azure-identity api 'com.microsoft.azure:msal4j-persistence-extension:1.3.0' api "net.java.dev.jna:jna-platform:${versions.jna}" diff --git a/plugins/repository-azure/licenses/azure-identity-1.13.0.jar.sha1 b/plugins/repository-azure/licenses/azure-identity-1.13.0.jar.sha1 deleted file mode 100644 index b59c2a3be5c92..0000000000000 --- a/plugins/repository-azure/licenses/azure-identity-1.13.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -54b44a74636322d06e9dc42d611a9f12a0966790 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-identity-1.13.2.jar.sha1 b/plugins/repository-azure/licenses/azure-identity-1.13.2.jar.sha1 new file mode 100644 index 0000000000000..7c98a9ccba592 --- /dev/null +++ b/plugins/repository-azure/licenses/azure-identity-1.13.2.jar.sha1 @@ -0,0 +1 @@ +50a1daef3eb5c6ab2e1351a3e3f5a7649a8fe464 \ No newline at end of file From d5e86e13ec950fda97e99bd75b1e34d99e10fd45 Mon Sep 17 00:00:00 2001 From: Arpit-Bandejiya Date: Fri, 6 Sep 2024 01:37:24 +0530 Subject: [PATCH 12/17] Schedule reroute after allocator timed out (#15565) (#15749) * Schedule reroute after allocator timed out Signed-off-by: Rishab Nahata (cherry picked from commit 4f50b4d705deaa971d802cd22fb120f75c722517) Co-authored-by: Rishab Nahata --- .../org/opensearch/cluster/ClusterModule.java | 4 + .../allocator/BalancedShardsAllocator.java | 25 ++ .../allocation/allocator/ShardsAllocator.java | 3 + .../gateway/ShardsBatchGatewayAllocator.java | 30 +- .../main/java/org/opensearch/node/Node.java | 1 + .../cluster/ClusterModuleTests.java | 13 + ...TimeBoundBalancedShardsAllocatorTests.java | 257 +++++++++++++----- .../gateway/GatewayAllocatorTests.java | 62 ++++- .../TestShardBatchGatewayAllocator.java | 6 +- 9 files changed, 321 insertions(+), 80 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index 0d15158f31e34..9e2074c7aacf9 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -52,6 +52,7 @@ import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.metadata.WeightedRoutingMetadata; import org.opensearch.cluster.routing.DelayedAllocationService; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; @@ -476,4 +477,7 @@ public void setExistingShardsAllocators(GatewayAllocator gatewayAllocator, Shard allocationService.setExistingShardsAllocators(existingShardsAllocators); } + public void setRerouteServiceForAllocator(RerouteService rerouteService) { + shardsAllocator.setRerouteService(rerouteService); + } } diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index a5193ca602f04..785636fa7ff2a 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -35,6 +35,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.util.IntroSorter; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.RoutingNode; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.ShardMovementStrategy; @@ -49,12 +50,14 @@ import org.opensearch.cluster.routing.allocation.RebalanceParameter; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.cluster.routing.allocation.ShardAllocationDecision; +import org.opensearch.common.Priority; import org.opensearch.common.inject.Inject; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; import java.util.HashMap; import java.util.HashSet; @@ -202,6 +205,7 @@ public class BalancedShardsAllocator implements ShardsAllocator { private volatile boolean ignoreThrottleInRestore; private volatile TimeValue allocatorTimeout; private long startTime; + private RerouteService rerouteService; public BalancedShardsAllocator(Settings settings) { this(settings, new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); @@ -231,6 +235,12 @@ public BalancedShardsAllocator(Settings settings, ClusterSettings clusterSetting clusterSettings.addSettingsUpdateConsumer(ALLOCATOR_TIMEOUT_SETTING, this::setAllocatorTimeout); } + @Override + public void setRerouteService(RerouteService rerouteService) { + assert this.rerouteService == null : "RerouteService is already set"; + this.rerouteService = rerouteService; + } + /** * Changes in deprecated setting SHARD_MOVE_PRIMARY_FIRST_SETTING affect value of its replacement setting SHARD_MOVEMENT_STRATEGY_SETTING. */ @@ -342,6 +352,7 @@ public void allocate(RoutingAllocation allocation) { localShardsBalancer.allocateUnassigned(); localShardsBalancer.moveShards(); localShardsBalancer.balance(); + scheduleRerouteIfAllocatorTimedOut(); final ShardsBalancer remoteShardsBalancer = new RemoteShardsBalancer(logger, allocation); remoteShardsBalancer.allocateUnassigned(); @@ -404,6 +415,20 @@ private void failAllocationOfNewPrimaries(RoutingAllocation allocation) { } } + private void scheduleRerouteIfAllocatorTimedOut() { + if (allocatorTimedOut()) { + assert rerouteService != null : "RerouteService not set to schedule reroute after allocator time out"; + rerouteService.reroute( + "reroute after balanced shards allocator timed out", + Priority.HIGH, + ActionListener.wrap( + r -> logger.trace("reroute after balanced shards allocator timed out completed"), + e -> logger.debug("reroute after balanced shards allocator timed out failed", e) + ) + ); + } + } + /** * Returns the currently configured delta threshold */ diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java index 29e9acca4e6c2..38aafff6ce3e8 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java @@ -32,6 +32,7 @@ package org.opensearch.cluster.routing.allocation.allocator; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.MoveDecision; @@ -73,4 +74,6 @@ public interface ShardsAllocator { * the cluster explain API, then this method should throw a {@code UnsupportedOperationException}. */ ShardAllocationDecision decideShardAllocation(ShardRouting shard, RoutingAllocation allocation); + + default void setRerouteService(RerouteService rerouteService) {} } diff --git a/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java b/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java index d18304ea73ed0..5e2dcbcd70b40 100644 --- a/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java @@ -184,11 +184,11 @@ public void cleanCaches() { // for tests protected ShardsBatchGatewayAllocator() { - this(DEFAULT_SHARD_BATCH_SIZE); + this(DEFAULT_SHARD_BATCH_SIZE, null); } - protected ShardsBatchGatewayAllocator(long batchSize) { - this.rerouteService = null; + protected ShardsBatchGatewayAllocator(long batchSize, RerouteService rerouteService) { + this.rerouteService = rerouteService; this.batchStartedAction = null; this.primaryShardBatchAllocator = null; this.batchStoreAction = null; @@ -297,6 +297,18 @@ public void run() { public void onComplete() { logger.trace("Triggering oncomplete after timeout for [{}] primary shards", timedOutPrimaryShardIds.size()); primaryBatchShardAllocator.allocateUnassignedBatchOnTimeout(timedOutPrimaryShardIds, allocation, true); + if (timedOutPrimaryShardIds.isEmpty() == false) { + logger.trace("scheduling reroute after existing shards allocator timed out for primary shards"); + assert rerouteService != null; + rerouteService.reroute( + "reroute after existing shards allocator timed out", + Priority.HIGH, + ActionListener.wrap( + r -> logger.trace("reroute after existing shards allocator timed out completed"), + e -> logger.debug("reroute after existing shards allocator timed out failed", e) + ) + ); + } } }; } else { @@ -320,6 +332,18 @@ public void run() { public void onComplete() { logger.trace("Triggering oncomplete after timeout for [{}] replica shards", timedOutReplicaShardIds.size()); replicaBatchShardAllocator.allocateUnassignedBatchOnTimeout(timedOutReplicaShardIds, allocation, false); + if (timedOutReplicaShardIds.isEmpty() == false) { + logger.trace("scheduling reroute after existing shards allocator timed out for replica shards"); + assert rerouteService != null; + rerouteService.reroute( + "reroute after existing shards allocator timed out", + Priority.HIGH, + ActionListener.wrap( + r -> logger.trace("reroute after existing shards allocator timed out completed"), + e -> logger.debug("reroute after existing shards allocator timed out failed", e) + ) + ); + } } }; } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 26150bdecdcf5..72531b98ec102 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -867,6 +867,7 @@ protected Node( final RerouteService rerouteService = new BatchedRerouteService(clusterService, clusterModule.getAllocationService()::reroute); rerouteServiceReference.set(rerouteService); clusterService.setRerouteService(rerouteService); + clusterModule.setRerouteServiceForAllocator(rerouteService); final RecoverySettings recoverySettings = new RecoverySettings(settings, settingsModule.getClusterSettings()); diff --git a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java index 97706927ba857..f8240e775cfa5 100644 --- a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java @@ -337,6 +337,19 @@ public void testQueryGroupMetadataRegister() { ); } + public void testRerouteServiceSetForBalancedShardsAllocator() { + ClusterModule clusterModule = new ClusterModule( + Settings.EMPTY, + clusterService, + Collections.emptyList(), + clusterInfoService, + null, + threadContext, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); + clusterModule.setRerouteServiceForAllocator((reason, priority, listener) -> listener.onResponse(clusterService.state())); + } + private static ClusterPlugin existingShardsAllocatorPlugin(final String allocatorName) { return new ClusterPlugin() { @Override diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java index a10c305686638..45a0bd7b18afd 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java @@ -8,6 +8,7 @@ package org.opensearch.cluster.routing.allocation.allocator; +import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.cluster.ClusterInfo; import org.opensearch.cluster.ClusterName; @@ -17,6 +18,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.ShardRouting; @@ -26,14 +28,20 @@ import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; import org.opensearch.cluster.routing.allocation.decider.Decision; import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.test.ClusterServiceUtils; +import org.opensearch.threadpool.TestThreadPool; +import org.junit.After; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.cluster.routing.ShardRoutingState.INITIALIZING; import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; @@ -41,26 +49,49 @@ public class TimeBoundBalancedShardsAllocatorTests extends OpenSearchAllocationTestCase { + private TestThreadPool threadPool; + private ClusterService clusterService; + private ClusterState state; + private final DiscoveryNode node1 = newNode("node1", "node1", Collections.singletonMap("zone", "1a")); private final DiscoveryNode node2 = newNode("node2", "node2", Collections.singletonMap("zone", "1b")); private final DiscoveryNode node3 = newNode("node3", "node3", Collections.singletonMap("zone", "1c")); - public void testAllUnassignedShardsAllocatedWhenNoTimeOut() { + @After + @Override + public void tearDown() throws Exception { + super.tearDown(); + if (threadPool != null) { + final boolean terminated = terminate(threadPool); + assert terminated; + } + if (clusterService != null) { + clusterService.close(); + } + } + + public void setupStateAndService(Metadata metadata, RoutingTable routingTable) { + state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(routingTable) + .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) + .build(); + threadPool = new TestThreadPool(getTestName()); + clusterService = ClusterServiceUtils.createClusterService(state, threadPool); + } + + public void testAllUnassignedShardsAllocatedWhenNoTimeOutAndRerouteNotScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; int totalPrimaryCount = numberOfIndices * numberOfShards; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Settings.Builder settings = Settings.builder(); - // passing total shard count for timed out latch such that no shard times out - BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(totalShardCount)); + // passing sufficiently high count for timeout latch to simulate no time out + BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(Integer.MAX_VALUE)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -69,6 +100,18 @@ public void testAllUnassignedShardsAllocatedWhenNoTimeOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -77,9 +120,10 @@ public void testAllUnassignedShardsAllocatedWhenNoTimeOut() { assertEquals(totalShardCount, initializingShards.size()); assertEquals(0, allocation.routingNodes().unassigned().ignored().size()); assertEquals(totalPrimaryCount, node1Recoveries + node2Recoveries + node3Recoveries); + assertFalse(rerouteScheduled.get()); } - public void testAllUnassignedShardsIgnoredWhenTimedOut() { + public void testAllUnassignedShardsIgnoredWhenTimedOutAndRerouteScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; @@ -89,11 +133,7 @@ public void testAllUnassignedShardsIgnoredWhenTimedOut() { BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(0)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -102,6 +142,18 @@ public void testAllUnassignedShardsIgnoredWhenTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -110,9 +162,10 @@ public void testAllUnassignedShardsIgnoredWhenTimedOut() { assertEquals(0, initializingShards.size()); assertEquals(totalShardCount, allocation.routingNodes().unassigned().ignored().size()); assertEquals(0, node1Recoveries + node2Recoveries + node3Recoveries); + assertTrue(rerouteScheduled.get()); } - public void testAllocatePartialPrimaryShardsUntilTimedOut() { + public void testAllocatePartialPrimaryShardsUntilTimedOutAndRerouteScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; @@ -123,11 +176,7 @@ public void testAllocatePartialPrimaryShardsUntilTimedOut() { BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(shardsToAllocate)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -136,6 +185,18 @@ public void testAllocatePartialPrimaryShardsUntilTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -144,9 +205,10 @@ public void testAllocatePartialPrimaryShardsUntilTimedOut() { assertEquals(shardsToAllocate, initializingShards.size()); assertEquals(totalShardCount - shardsToAllocate, allocation.routingNodes().unassigned().ignored().size()); assertEquals(shardsToAllocate, node1Recoveries + node2Recoveries + node3Recoveries); + assertTrue(rerouteScheduled.get()); } - public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { + public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOutAndRerouteScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; @@ -158,11 +220,7 @@ public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(shardsToAllocate)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -171,6 +229,18 @@ public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -179,20 +249,17 @@ public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { assertEquals(shardsToAllocate, initializingShards.size()); assertEquals(totalShardCount - shardsToAllocate, allocation.routingNodes().unassigned().ignored().size()); assertEquals(numberOfShards * numberOfIndices, node1Recoveries + node2Recoveries + node3Recoveries); + assertTrue(rerouteScheduled.get()); } - public void testAllShardsMoveWhenExcludedAndTimeoutNotBreached() { + public void testAllShardsMoveWhenExcludedAndTimeoutNotBreachedAndRerouteNotScheduled() { int numberOfIndices = 3; int numberOfShards = 5; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService(); state = applyStartedShardsUntilNoChange(state, allocationService); // check all shards allocated @@ -200,8 +267,7 @@ public void testAllShardsMoveWhenExcludedAndTimeoutNotBreached() { assertEquals(totalShardCount, state.getRoutingNodes().shardsWithState(STARTED).size()); int node1ShardCount = state.getRoutingNodes().node("node1").size(); Settings settings = Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build(); - int shardsToMove = 10 + 1000; // such that time out is never breached - BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings, new CountDownLatch(shardsToMove)); + BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings, new CountDownLatch(Integer.MAX_VALUE)); RoutingAllocation allocation = new RoutingAllocation( allocationDecidersForExcludeAPI(settings), new RoutingNodes(state, false), @@ -210,30 +276,39 @@ public void testAllShardsMoveWhenExcludedAndTimeoutNotBreached() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(node1ShardCount, relocatingShards.size()); + assertFalse(rerouteScheduled.get()); } - public void testNoShardsMoveWhenExcludedAndTimeoutBreached() { + public void testNoShardsMoveWhenExcludedAndTimeoutBreachedAndRerouteScheduled() { int numberOfIndices = 3; int numberOfShards = 5; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService(); state = applyStartedShardsUntilNoChange(state, allocationService); // check all shards allocated assertEquals(0, state.getRoutingNodes().shardsWithState(INITIALIZING).size()); assertEquals(totalShardCount, state.getRoutingNodes().shardsWithState(STARTED).size()); Settings settings = Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build(); - int shardsToMove = 0; // such that time out is never breached + int shardsToMove = 0; // such that time out is breached BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings, new CountDownLatch(shardsToMove)); RoutingAllocation allocation = new RoutingAllocation( allocationDecidersForExcludeAPI(settings), @@ -243,23 +318,32 @@ public void testNoShardsMoveWhenExcludedAndTimeoutBreached() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(0, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } - public void testPartialShardsMoveWhenExcludedAndTimeoutBreached() { + public void testPartialShardsMoveWhenExcludedAndTimeoutBreachedAndRerouteScheduled() { int numberOfIndices = 3; int numberOfShards = 5; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService(); state = applyStartedShardsUntilNoChange(state, allocationService); // check all shards allocated @@ -279,23 +363,32 @@ public void testPartialShardsMoveWhenExcludedAndTimeoutBreached() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(shardsToMove / 3, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } - public void testClusterRebalancedWhenNotTimedOut() { + public void testClusterRebalancedWhenNotTimedOutAndRerouteNotScheduled() { int numberOfIndices = 1; int numberOfShards = 15; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService( Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build() ); // such that no shards are allocated to node1 @@ -306,8 +399,7 @@ public void testClusterRebalancedWhenNotTimedOut() { assertEquals(totalShardCount, state.getRoutingNodes().shardsWithState(STARTED).size()); assertEquals(0, node1ShardCount); Settings newSettings = Settings.builder().put("cluster.routing.allocation.exclude.zone", "").build(); - int shardsToMove = 1000; // such that time out is never breached - BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(newSettings, new CountDownLatch(shardsToMove)); + BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(newSettings, new CountDownLatch(Integer.MAX_VALUE)); RoutingAllocation allocation = new RoutingAllocation( allocationDecidersForExcludeAPI(newSettings), new RoutingNodes(state, false), @@ -316,23 +408,32 @@ public void testClusterRebalancedWhenNotTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(totalShardCount / 3, relocatingShards.size()); + assertFalse(rerouteScheduled.get()); } - public void testClusterNotRebalancedWhenTimedOut() { + public void testClusterNotRebalancedWhenTimedOutAndRerouteScheduled() { int numberOfIndices = 1; int numberOfShards = 15; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService( Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build() ); // such that no shards are allocated to node1 @@ -353,23 +454,32 @@ public void testClusterNotRebalancedWhenTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(0, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } - public void testClusterPartialRebalancedWhenTimedOut() { + public void testClusterPartialRebalancedWhenTimedOutAndRerouteScheduled() { int numberOfIndices = 1; int numberOfShards = 15; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService( Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build() ); // such that no shards are allocated to node1 @@ -404,9 +514,22 @@ public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation alloca null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(3, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } public void testAllocatorNeverTimedOutIfValueIsMinusOne() { diff --git a/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java index c7eae77d6deba..ebc2e59fa5a30 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.action.support.nodes.BaseNodeResponse; import org.opensearch.cluster.ClusterInfo; @@ -22,6 +23,7 @@ import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.ShardRouting; @@ -30,6 +32,8 @@ import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -37,7 +41,9 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.shard.ShardId; import org.opensearch.snapshots.SnapshotShardSizeInfo; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.gateway.TestShardBatchGatewayAllocator; +import org.opensearch.threadpool.TestThreadPool; import org.junit.Before; import java.util.ArrayList; @@ -47,13 +53,13 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; public class GatewayAllocatorTests extends OpenSearchAllocationTestCase { @@ -426,22 +432,62 @@ public void testReplicaAllocatorTimeout() { assertEquals(-1, REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getMillis()); } - public void testCollectTimedOutShards() throws InterruptedException { + public void testCollectTimedOutShardsAndScheduleReroute_Success() throws InterruptedException { createIndexAndUpdateClusterState(2, 5, 2); - CountDownLatch latch = new CountDownLatch(10); - testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(latch); + TestThreadPool threadPool = new TestThreadPool(getTestName()); + ClusterService clusterService = ClusterServiceUtils.createClusterService(clusterState, threadPool); + final CountDownLatch rerouteLatch = new CountDownLatch(2); + final RerouteService rerouteService = (reason, priority, listener) -> { + listener.onResponse(clusterService.state()); + assertThat(rerouteLatch.getCount(), greaterThanOrEqualTo(0L)); + assertEquals("reroute after existing shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteLatch.countDown(); + }; + CountDownLatch timedOutShardsLatch = new CountDownLatch(20); + testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(timedOutShardsLatch, 1000, rerouteService); testShardsBatchGatewayAllocator.setPrimaryBatchAllocatorTimeout(TimeValue.ZERO); testShardsBatchGatewayAllocator.setReplicaBatchAllocatorTimeout(TimeValue.ZERO); BatchRunnableExecutor executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, true); executor.run(); - assertTrue(latch.await(1, TimeUnit.MINUTES)); - latch = new CountDownLatch(10); - testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(latch); + assertEquals(timedOutShardsLatch.getCount(), 10); + assertEquals(1, rerouteLatch.getCount()); + executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, false); + executor.run(); + assertEquals(timedOutShardsLatch.getCount(), 0); + assertEquals(0, rerouteLatch.getCount()); // even with failure it doesn't leak any listeners + final boolean terminated = terminate(threadPool); + assert terminated; + clusterService.close(); + } + + public void testCollectTimedOutShardsAndScheduleReroute_Failure() throws InterruptedException { + createIndexAndUpdateClusterState(2, 5, 2); + TestThreadPool threadPool = new TestThreadPool(getTestName()); + ClusterService clusterService = ClusterServiceUtils.createClusterService(clusterState, threadPool); + final CountDownLatch rerouteLatch = new CountDownLatch(2); + final RerouteService rerouteService = (reason, priority, listener) -> { + listener.onFailure(new OpenSearchException("simulated")); + assertThat(rerouteLatch.getCount(), greaterThanOrEqualTo(0L)); + assertEquals("reroute after existing shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteLatch.countDown(); + }; + CountDownLatch timedOutShardsLatch = new CountDownLatch(20); + testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(timedOutShardsLatch, 1000, rerouteService); testShardsBatchGatewayAllocator.setPrimaryBatchAllocatorTimeout(TimeValue.ZERO); testShardsBatchGatewayAllocator.setReplicaBatchAllocatorTimeout(TimeValue.ZERO); + BatchRunnableExecutor executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, true); + executor.run(); + assertEquals(timedOutShardsLatch.getCount(), 10); + assertEquals(1, rerouteLatch.getCount()); executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, false); executor.run(); - assertTrue(latch.await(1, TimeUnit.MINUTES)); + assertEquals(timedOutShardsLatch.getCount(), 0); + assertEquals(0, rerouteLatch.getCount()); // even with failure it doesn't leak any listeners + final boolean terminated = terminate(threadPool); + assert terminated; + clusterService.close(); } private void createIndexAndUpdateClusterState(int count, int numberOfShards, int numberOfReplicas) { diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java index 156b1d7c620e6..c2ff228a6bf3a 100644 --- a/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java @@ -10,6 +10,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.RoutingAllocation; @@ -39,12 +40,13 @@ public TestShardBatchGatewayAllocator() { } - public TestShardBatchGatewayAllocator(CountDownLatch latch) { + public TestShardBatchGatewayAllocator(CountDownLatch latch, long maxBatchSize, RerouteService rerouteService) { + super(maxBatchSize, rerouteService); this.latch = latch; } public TestShardBatchGatewayAllocator(long maxBatchSize) { - super(maxBatchSize); + super(maxBatchSize, null); } Map> knownAllocations = new HashMap<>(); From e853216ab22e50057867e1c0a5ae8bd67a6daf6c Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 5 Sep 2024 16:43:01 -0400 Subject: [PATCH 13/17] Add 2.17 release notes (#15762) (#15768) Signed-off-by: Andriy Redko --- CHANGELOG.md | 90 +-------------- .../opensearch.release-notes-2.17.0.md | 104 ++++++++++++++++++ 2 files changed, 105 insertions(+), 89 deletions(-) create mode 100644 release-notes/opensearch.release-notes-2.17.0.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dbc667fcdbbf..435df7bda62a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,108 +5,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added -- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028)) -- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972)) -- [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708)) -- Add setting to ignore throttling nodes for allocation of unassigned primaries in remote restore ([#14991](https://github.com/opensearch-project/OpenSearch/pull/14991)) -- [Workload Management] Add Delete QueryGroup API Logic ([#14735](https://github.com/opensearch-project/OpenSearch/pull/14735)) -- [Streaming Indexing] Enhance RestClient with a new streaming API support ([#14437](https://github.com/opensearch-project/OpenSearch/pull/14437)) -- Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) -- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680)) -- Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) -- Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) -- [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) -- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) -- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) -- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) -- [Workload Management] Add Update QueryGroup API Logic ([#14775](https://github.com/opensearch-project/OpenSearch/pull/14775)) -- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) -- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) -- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) -- Make balanced shards allocator timebound ([#15239](https://github.com/opensearch-project/OpenSearch/pull/15239)) -- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325)) -- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895)) -- Star tree mapping changes ([#14605](https://github.com/opensearch-project/OpenSearch/pull/14605)) -- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336)) -- Support cancellation for cat shards and node stats API.([#13966](https://github.com/opensearch-project/OpenSearch/pull/13966)) -- [Streaming Indexing] Introduce bulk HTTP API streaming flavor ([#15381](https://github.com/opensearch-project/OpenSearch/pull/15381)) -- Add support for centralize snapshot creation with pinned timestamp ([#15124](https://github.com/opensearch-project/OpenSearch/pull/15124)) -- Add concurrent search support for Derived Fields ([#15326](https://github.com/opensearch-project/OpenSearch/pull/15326)) -- [Workload Management] Add query group stats constructs ([#15343](https://github.com/opensearch-project/OpenSearch/pull/15343))) -- Add limit on number of processors for Ingest pipeline([#15460](https://github.com/opensearch-project/OpenSearch/pull/15465)). -- Add runAs to Subject interface and introduce IdentityAwarePlugin extension point ([#14630](https://github.com/opensearch-project/OpenSearch/pull/14630)) -- [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) -- Adding translog durability validation in index templates ([#15494](https://github.com/opensearch-project/OpenSearch/pull/15494)) -- [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) -- [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527)) -- [Reader Writer Separation] Add experimental search replica shard type to achieve reader writer separation ([#15237](https://github.com/opensearch-project/OpenSearch/pull/15237)) -- Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290)) -- [Remote Publication] Add remote download stats ([#15291](https://github.com/opensearch-project/OpenSearch/pull/15291))) -- Add support to upload snapshot shard blobs with hashed prefix ([#15426](https://github.com/opensearch-project/OpenSearch/pull/15426)) -- Add prefix support to hashed prefix & infix path types on remote store ([#15557](https://github.com/opensearch-project/OpenSearch/pull/15557)) -- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) -- Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) -- Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409)) -- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568)) -- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) -- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) -- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) - MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) ### Dependencies -- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) -- Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) -- OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) -- Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.17.0 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995), [#15420](https://github.com/opensearch-project/OpenSearch/pull/15420)) -- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997)) -- Bump `org.tukaani:xz` from 1.9 to 1.10 ([#15110](https://github.com/opensearch-project/OpenSearch/pull/15110)) -- Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) -- Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.9 ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103), [#15299](https://github.com/opensearch-project/OpenSearch/pull/15299)) -- Bump `com.azure:azure-core` from 1.49.1 to 1.51.0 ([#15111](https://github.com/opensearch-project/OpenSearch/pull/15111)) -- Bump `org.xerial.snappy:snappy-java` from 1.1.10.5 to 1.1.10.6 ([#15207](https://github.com/opensearch-project/OpenSearch/pull/15207)) -- Bump `com.azure:azure-xml` from 1.0.0 to 1.1.0 ([#15206](https://github.com/opensearch-project/OpenSearch/pull/15206)) -- Bump `reactor` from 3.5.19 to 3.5.20 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) -- Bump `reactor-netty` from 1.1.21 to 1.1.22 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) -- Bump `org.apache.kerby:kerb-admin` from 2.0.3 to 2.1.0 ([#15301](https://github.com/opensearch-project/OpenSearch/pull/15301)) -- Bump `com.azure:azure-core-http-netty` from 1.15.1 to 1.15.3 ([#15300](https://github.com/opensearch-project/OpenSearch/pull/15300)) -- Bump `com.gradle.develocity` from 3.17.6 to 3.18 ([#15297](https://github.com/opensearch-project/OpenSearch/pull/15297)) -- Bump `commons-cli:commons-cli` from 1.8.0 to 1.9.0 ([#15298](https://github.com/opensearch-project/OpenSearch/pull/15298)) -- Bump `opentelemetry` from 1.40.0 to 1.41.0 ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) -- Bump `opentelemetry-semconv` from 1.26.0-alpha to 1.27.0-alpha ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) -- Bump `tj-actions/changed-files` from 44 to 45 ([#15422](https://github.com/opensearch-project/OpenSearch/pull/15422)) -- Bump `dnsjava:dnsjava` from 3.6.0 to 3.6.1 ([#15418](https://github.com/opensearch-project/OpenSearch/pull/15418)) -- Bump `com.netflix.nebula.ospackage-base` from 11.9.1 to 11.10.0 ([#15419](https://github.com/opensearch-project/OpenSearch/pull/15419)) -- Bump `org.roaringbitmap:RoaringBitmap` from 1.1.0 to 1.2.1 ([#15423](https://github.com/opensearch-project/OpenSearch/pull/15423)) -- Bump `icu4j` from 70.1 to 75.1 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) - Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578)) ### Changed -- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) -- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371)) -- Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238)) -- Remote publication using minimum node version for backward compatibility ([#15216](https://github.com/opensearch-project/OpenSearch/pull/15216)) ### Deprecated ### Removed -- Remove some unused code in the search backpressure package ([#15518](https://github.com/opensearch-project/OpenSearch/pull/15518)) ### Fixed -- Fix constraint bug which allows more primary shards than average primary shards per index ([#14908](https://github.com/opensearch-project/OpenSearch/pull/14908)) -- Fix NPE when bulk ingest with empty pipeline ([#15033](https://github.com/opensearch-project/OpenSearch/pull/15033)) -- Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) -- Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) -- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) -- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) -- Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233)) -- Fix range aggregation optimization ignoring top level queries ([#15287](https://github.com/opensearch-project/OpenSearch/pull/15287)) -- Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375)) -- Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) -- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) -- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) -- Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607)) ### Security -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.16...2.x +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.17...2.x diff --git a/release-notes/opensearch.release-notes-2.17.0.md b/release-notes/opensearch.release-notes-2.17.0.md new file mode 100644 index 0000000000000..18b7c9ac7cd68 --- /dev/null +++ b/release-notes/opensearch.release-notes-2.17.0.md @@ -0,0 +1,104 @@ +## 2024-09-17 Version 2.17.0 Release Notes + +## [2.17.0] +### Added +- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028)) +- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972)) +- [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708)) +- Add setting to ignore throttling nodes for allocation of unassigned primaries in remote restore ([#14991](https://github.com/opensearch-project/OpenSearch/pull/14991)) +- [Workload Management] Add Delete QueryGroup API Logic ([#14735](https://github.com/opensearch-project/OpenSearch/pull/14735)) +- [Streaming Indexing] Enhance RestClient with a new streaming API support ([#14437](https://github.com/opensearch-project/OpenSearch/pull/14437)) +- Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) +- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680)) +- Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) +- Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) +- [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) +- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) +- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) +- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) +- [Workload Management] Add Update QueryGroup API Logic ([#14775](https://github.com/opensearch-project/OpenSearch/pull/14775)) +- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) +- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) +- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) +- Make balanced shards allocator timebound ([#15239](https://github.com/opensearch-project/OpenSearch/pull/15239)) +- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325)) +- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895)) +- Star tree mapping changes ([#14605](https://github.com/opensearch-project/OpenSearch/pull/14605)) +- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336)) +- Support cancellation for cat shards and node stats API.([#13966](https://github.com/opensearch-project/OpenSearch/pull/13966)) +- [Streaming Indexing] Introduce bulk HTTP API streaming flavor ([#15381](https://github.com/opensearch-project/OpenSearch/pull/15381)) +- Add support for centralize snapshot creation with pinned timestamp ([#15124](https://github.com/opensearch-project/OpenSearch/pull/15124)) +- Add concurrent search support for Derived Fields ([#15326](https://github.com/opensearch-project/OpenSearch/pull/15326)) +- [Workload Management] Add query group stats constructs ([#15343](https://github.com/opensearch-project/OpenSearch/pull/15343))) +- Add limit on number of processors for Ingest pipeline([#15460](https://github.com/opensearch-project/OpenSearch/pull/15465)). +- Add runAs to Subject interface and introduce IdentityAwarePlugin extension point ([#14630](https://github.com/opensearch-project/OpenSearch/pull/14630)) +- [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) +- Adding translog durability validation in index templates ([#15494](https://github.com/opensearch-project/OpenSearch/pull/15494)) +- [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) +- [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527)) +- [Reader Writer Separation] Add experimental search replica shard type to achieve reader writer separation ([#15237](https://github.com/opensearch-project/OpenSearch/pull/15237)) +- Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290)) +- [Remote Publication] Add remote download stats ([#15291](https://github.com/opensearch-project/OpenSearch/pull/15291)) +- Add support to upload snapshot shard blobs with hashed prefix ([#15426](https://github.com/opensearch-project/OpenSearch/pull/15426)) +- Add prefix support to hashed prefix & infix path types on remote store ([#15557](https://github.com/opensearch-project/OpenSearch/pull/15557)) +- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) +- Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) +- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568)) +- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) +- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) +- Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409)) +- ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508)) +- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) +- Reset DiscoveryNodes in all transport node actions request ([#15131](https://github.com/opensearch-project/OpenSearch/pull/15131)) + +### Dependencies +- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) +- Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) +- OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) +- Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.17.0 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995), [#15420](https://github.com/opensearch-project/OpenSearch/pull/15420)) +- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997)) +- Bump `org.tukaani:xz` from 1.9 to 1.10 ([#15110](https://github.com/opensearch-project/OpenSearch/pull/15110)) +- Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) +- Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.9 ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103), [#15299](https://github.com/opensearch-project/OpenSearch/pull/15299)) +- Bump `com.azure:azure-core` from 1.49.1 to 1.51.0 ([#15111](https://github.com/opensearch-project/OpenSearch/pull/15111)) +- Bump `org.xerial.snappy:snappy-java` from 1.1.10.5 to 1.1.10.6 ([#15207](https://github.com/opensearch-project/OpenSearch/pull/15207)) +- Bump `com.azure:azure-xml` from 1.0.0 to 1.1.0 ([#15206](https://github.com/opensearch-project/OpenSearch/pull/15206)) +- Bump `reactor` from 3.5.19 to 3.5.20 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) +- Bump `reactor-netty` from 1.1.21 to 1.1.22 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) +- Bump `org.apache.kerby:kerb-admin` from 2.0.3 to 2.1.0 ([#15301](https://github.com/opensearch-project/OpenSearch/pull/15301)) +- Bump `com.azure:azure-core-http-netty` from 1.15.1 to 1.15.3 ([#15300](https://github.com/opensearch-project/OpenSearch/pull/15300)) +- Bump `com.gradle.develocity` from 3.17.6 to 3.18 ([#15297](https://github.com/opensearch-project/OpenSearch/pull/15297)) +- Bump `commons-cli:commons-cli` from 1.8.0 to 1.9.0 ([#15298](https://github.com/opensearch-project/OpenSearch/pull/15298)) +- Bump `opentelemetry` from 1.40.0 to 1.41.0 ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) +- Bump `opentelemetry-semconv` from 1.26.0-alpha to 1.27.0-alpha ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) +- Bump `tj-actions/changed-files` from 44 to 45 ([#15422](https://github.com/opensearch-project/OpenSearch/pull/15422)) +- Bump `dnsjava:dnsjava` from 3.6.0 to 3.6.1 ([#15418](https://github.com/opensearch-project/OpenSearch/pull/15418)) +- Bump `com.netflix.nebula.ospackage-base` from 11.9.1 to 11.10.0 ([#15419](https://github.com/opensearch-project/OpenSearch/pull/15419)) +- Bump `org.roaringbitmap:RoaringBitmap` from 1.1.0 to 1.2.1 ([#15423](https://github.com/opensearch-project/OpenSearch/pull/15423)) +- Bump `icu4j` from 70.1 to 75.1 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) + +### Changed +- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) +- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371)) +- Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238)) +- Remote publication using minimum node version for backward compatibility ([#15216](https://github.com/opensearch-project/OpenSearch/pull/15216)) + +### Deprecated + +### Removed +- Remove some unused code in the search backpressure package ([#15518](https://github.com/opensearch-project/OpenSearch/pull/15518)) + +### Fixed +- Fix constraint bug which allows more primary shards than average primary shards per index ([#14908](https://github.com/opensearch-project/OpenSearch/pull/14908)) +- Fix NPE when bulk ingest with empty pipeline ([#15033](https://github.com/opensearch-project/OpenSearch/pull/15033)) +- Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) +- Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) +- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) +- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) +- Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233)) +- Fix range aggregation optimization ignoring top level queries ([#15287](https://github.com/opensearch-project/OpenSearch/pull/15287)) +- Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375)) +- Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) +- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) +- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) +- Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607)) From e6862cdc868b20afed6ac23696d5e9ae7df4906e Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 17:58:14 -0400 Subject: [PATCH 14/17] Adding release notes (#15771) (#15779) (cherry picked from commit e892f23c443ddfbfd82bf0e447636231d34ecd2f) Signed-off-by: Harsha Vamsi Kalluri Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- release-notes/opensearch.release-notes-2.17.0.md | 1 + 1 file changed, 1 insertion(+) diff --git a/release-notes/opensearch.release-notes-2.17.0.md b/release-notes/opensearch.release-notes-2.17.0.md index 18b7c9ac7cd68..41c7a89ea634c 100644 --- a/release-notes/opensearch.release-notes-2.17.0.md +++ b/release-notes/opensearch.release-notes-2.17.0.md @@ -50,6 +50,7 @@ - ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508)) - Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) - Reset DiscoveryNodes in all transport node actions request ([#15131](https://github.com/opensearch-project/OpenSearch/pull/15131)) +- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) ### Dependencies - Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) From 93b0755bd64fb35c61aef0f5b223c30bfd9a84b2 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 5 Sep 2024 18:00:25 -0400 Subject: [PATCH 15/17] Fixing MacOS 13 assemble workflows (#15747) (#15773) (cherry picked from commit fe61e4f299d4b832dc232cb223ec946585177f52) Signed-off-by: Andriy Redko Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .github/workflows/assemble.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml index 294627622a136..b3838b8e5ae97 100644 --- a/.github/workflows/assemble.yml +++ b/.github/workflows/assemble.yml @@ -30,8 +30,11 @@ jobs: - name: Setup docker (missing on MacOS) id: setup_docker if: runner.os == 'macos' + continue-on-error: true run: | - exit 0; + brew install docker colima coreutils + gtimeout 15m colima start + shell: bash - name: Run Gradle (assemble) if: runner.os == 'macos' && steps.setup_docker.outcome != 'success' run: | @@ -45,4 +48,4 @@ jobs: - name: Run Gradle (assemble) if: runner.os == 'macos' && steps.setup_docker.outcome == 'success' run: | - exit 0; + ./gradlew assemble --parallel --no-build-cache -PDISABLE_BUILD_CACHE -Druntime.java=${{ matrix.java }} From 3b8a741a8547c21317449b7d783edd378a8eecb7 Mon Sep 17 00:00:00 2001 From: Harsha Vamsi Kalluri Date: Thu, 5 Sep 2024 15:51:01 -0700 Subject: [PATCH 16/17] Remove un-necessary changelog entry (#15781) Signed-off-by: Harsha Vamsi Kalluri --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 435df7bda62a9..6397b81a5f1c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added -- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) ### Dependencies - Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578)) From 7712bea9d400718ac093dc4db5c95691d631adbb Mon Sep 17 00:00:00 2001 From: Sooraj Sinha <81695996+soosinha@users.noreply.github.com> Date: Fri, 6 Sep 2024 13:26:08 +0530 Subject: [PATCH 17/17] Add dedicated string prefix for remote index metadata and remote routing table (#15575) (#15739) * Hashed prefix for index metadata Signed-off-by: Sooraj Sinha --- .../remote/RemoteStatePublicationIT.java | 49 +++++++++- .../RemoteStoreClusterStateRestoreIT.java | 13 ++- .../remote/RemoteWriteableBlobEntity.java | 11 +++ .../RemoteWriteableEntityBlobStore.java | 2 +- .../common/settings/ClusterSettings.java | 5 + .../remote/RemoteClusterStateService.java | 17 +++- .../remote/RemoteIndexMetadataManager.java | 48 ++++++++++ .../remote/model/RemoteIndexMetadata.java | 30 +++++- .../model/RemoteRoutingTableBlobStore.java | 14 +++ .../RemoteIndexMetadataManagerTests.java | 35 ++++++- .../model/RemoteIndexMetadataTests.java | 91 +++++++++++++++++-- 11 files changed, 298 insertions(+), 17 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java index 0e6321867a33b..f794eec65cb1b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java @@ -17,7 +17,11 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; import org.opensearch.discovery.DiscoveryStats; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.model.RemoteClusterMetadataManifest; +import org.opensearch.gateway.remote.model.RemoteRoutingTableBlobStore; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.repositories.RepositoriesService; @@ -48,19 +52,27 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.startsWith; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class RemoteStatePublicationIT extends RemoteStoreBaseIntegTestCase { - private static String INDEX_NAME = "test-index"; + private static final String INDEX_NAME = "test-index"; + private static final String REMOTE_STATE_PREFIX = "!"; + private static final String REMOTE_ROUTING_PREFIX = "_"; private boolean isRemoteStateEnabled = true; private String isRemotePublicationEnabled = "true"; + private boolean hasRemoteStateCharPrefix; + private boolean hasRemoteRoutingCharPrefix; @Before public void setup() { asyncUploadMockFsRepo = false; isRemoteStateEnabled = true; isRemotePublicationEnabled = "true"; + hasRemoteStateCharPrefix = randomBoolean(); + hasRemoteRoutingCharPrefix = randomBoolean(); } @Override @@ -84,6 +96,7 @@ protected Settings nodeSettings(int nodeOrdinal) { "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, routingTableRepoName ); + return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), isRemoteStateEnabled) @@ -94,6 +107,19 @@ protected Settings nodeSettings(int nodeOrdinal) { RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE ) + .put( + RemoteClusterStateService.CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX.getKey(), + hasRemoteStateCharPrefix ? REMOTE_STATE_PREFIX : "" + ) + .put( + RemoteRoutingTableBlobStore.CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX.getKey(), + hasRemoteRoutingCharPrefix ? REMOTE_ROUTING_PREFIX : "" + ) + .put(RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX.toString()) + .put( + RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING.getKey(), + PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString() + ) .build(); } @@ -137,6 +163,27 @@ public void testPublication() throws Exception { Map manifestFiles = getMetadataFiles(repository, RemoteClusterMetadataManifest.MANIFEST); assertTrue(manifestFiles.containsKey(RemoteClusterMetadataManifest.MANIFEST)); + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() + ); + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + assertThat(manifest.getIndices().size(), is(1)); + if (hasRemoteStateCharPrefix) { + for (UploadedIndexMetadata md : manifest.getIndices()) { + assertThat(md.getUploadedFilename(), startsWith(REMOTE_STATE_PREFIX)); + } + } + assertThat(manifest.getIndicesRouting().size(), is(1)); + if (hasRemoteRoutingCharPrefix) { + for (UploadedIndexMetadata md : manifest.getIndicesRouting()) { + assertThat(md.getUploadedFilename(), startsWith(REMOTE_ROUTING_PREFIX)); + } + } + // get settings from each node and verify that it is updated Settings settings = clusterService().getSettings(); logger.info("settings : {}", settings); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 6ec973090883b..d078ba05faa12 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -328,10 +328,17 @@ public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathTh internalCluster().stopAllNodes(); // Step - 3 Delete index metadata file in remote try { - Files.move( - segmentRepoPath.resolve(encodeString(clusterName) + "/cluster-state/" + prevClusterUUID + "/index"), - segmentRepoPath.resolve("cluster-state/") + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() ); + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + for (UploadedIndexMetadata md : manifest.getIndices()) { + Files.move(segmentRepoPath.resolve(md.getUploadedFilename()), segmentRepoPath.resolve("cluster-state/")); + } } catch (IOException e) { throw new RuntimeException(e); } diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java index f034ce2d1adf1..4d9adb48d2a3a 100644 --- a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java +++ b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java @@ -63,6 +63,17 @@ public String[] getBlobPathTokens() { public abstract String generateBlobFileName(); + /** + * Generate the blob path for the remote entity by adding a custom prefix. + * This custom prefix may be generated by any of the strategies defined in {@link org.opensearch.index.remote.RemoteStoreEnums} + * The default implementation returns the same path as passed in the argument. + * @param blobPath The remote path on which the remote entity is to be uploaded + * @return The modified remote path after adding a custom prefix at which the remote entity will be uploaded. + */ + public BlobPath getPrefixedPath(BlobPath blobPath) { + return blobPath; + } + public String clusterUUID() { return clusterUUID; } diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java index baa44a7c9bde9..b5e074874dd38 100644 --- a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java +++ b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java @@ -102,7 +102,7 @@ public BlobPath getBlobPathForUpload(final RemoteWriteableBlobEntity obj) { for (String token : obj.getBlobPathParameters().getPathTokens()) { blobPath = blobPath.add(token); } - return blobPath; + return obj.getPrefixedPath(blobPath); } public BlobPath getBlobPathForDownload(final RemoteWriteableBlobEntity obj) { diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 89ef00e2f30a9..67cf1305d3c92 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -109,6 +109,7 @@ import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemoteIndexMetadataManager; import org.opensearch.gateway.remote.model.RemoteRoutingTableBlobStore; import org.opensearch.http.HttpTransportSettings; import org.opensearch.index.IndexModule; @@ -738,6 +739,9 @@ public void apply(Settings value, Settings current, Settings previous) { GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, RemoteClusterStateService.REMOTE_STATE_READ_TIMEOUT_SETTING, + RemoteClusterStateService.CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX, + RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_TYPE_SETTING, + RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING, @@ -746,6 +750,7 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteRoutingTableBlobStore.REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING, RemoteRoutingTableBlobStore.REMOTE_ROUTING_TABLE_PATH_HASH_ALGO_SETTING, RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING, + RemoteRoutingTableBlobStore.CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX, AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, CpuBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index a223bfbe736c3..fe34b68702c41 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -150,6 +150,16 @@ public class RemoteClusterStateService implements Closeable { Setting.Property.NodeScope ); + /** + * Controls the fixed prefix for the cluster state path on remote store. + */ + public static final Setting CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX = Setting.simpleString( + "cluster.remote_store.state.path.prefix", + "", + Property.NodeScope, + Property.Final + ); + /** * Validation mode for cluster state checksum. * None: Validation will be disabled. @@ -211,6 +221,7 @@ public static RemoteClusterStateValidationMode parseString(String mode) { + "indices, coordination metadata updated : [{}], settings metadata updated : [{}], templates metadata " + "updated : [{}], custom metadata updated : [{}], indices routing updated : [{}]"; private final boolean isPublicationEnabled; + private final String remotePathPrefix; // ToXContent Params with gateway mode. // We are using gateway context mode to persist all custom metadata. @@ -252,6 +263,7 @@ public RemoteClusterStateService( this.isPublicationEnabled = FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) && RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled(settings) && RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled(settings); + this.remotePathPrefix = CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX.get(settings); this.remoteRoutingTableService = RemoteRoutingTableServiceFactory.getService( repositoriesService, settings, @@ -728,7 +740,10 @@ UploadedMetadataResults writeMetadataInParallel( indexMetadata, clusterState.metadata().clusterUUID(), blobStoreRepository.getCompressor(), - blobStoreRepository.getNamedXContentRegistry() + blobStoreRepository.getNamedXContentRegistry(), + remoteIndexMetadataManager.getPathTypeSetting(), + remoteIndexMetadataManager.getPathHashAlgoSetting(), + remotePathPrefix ), listener ); diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java index d1f08a7c2a33d..d8e8ffc68834d 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java @@ -20,6 +20,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.model.RemoteIndexMetadata; import org.opensearch.gateway.remote.model.RemoteReadResult; +import org.opensearch.index.remote.RemoteStoreEnums; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.threadpool.ThreadPool; @@ -44,11 +45,38 @@ public class RemoteIndexMetadataManager extends AbstractRemoteWritableEntityMana Setting.Property.Deprecated ); + /** + * This setting is used to set the remote index metadata blob store path type strategy. + */ + public static final Setting REMOTE_INDEX_METADATA_PATH_TYPE_SETTING = new Setting<>( + "cluster.remote_store.index_metadata.path_type", + RemoteStoreEnums.PathType.HASHED_PREFIX.toString(), + RemoteStoreEnums.PathType::parseString, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + + /** + * This setting is used to set the remote index metadata blob store path hash algorithm strategy. + * This setting will come to effect if the {@link #REMOTE_INDEX_METADATA_PATH_TYPE_SETTING} + * is either {@code HASHED_PREFIX} or {@code HASHED_INFIX}. + */ + public static final Setting REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING = new Setting<>( + "cluster.remote_store.index_metadata.path_hash_algo", + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64.toString(), + RemoteStoreEnums.PathHashAlgorithm::parseString, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private final Compressor compressor; private final NamedXContentRegistry namedXContentRegistry; private volatile TimeValue indexMetadataUploadTimeout; + private RemoteStoreEnums.PathType pathType; + private RemoteStoreEnums.PathHashAlgorithm pathHashAlgo; + public RemoteIndexMetadataManager( ClusterSettings clusterSettings, String clusterName, @@ -70,7 +98,11 @@ public RemoteIndexMetadataManager( this.namedXContentRegistry = blobStoreRepository.getNamedXContentRegistry(); this.compressor = blobStoreRepository.getCompressor(); this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + this.pathType = clusterSettings.get(REMOTE_INDEX_METADATA_PATH_TYPE_SETTING); + this.pathHashAlgo = clusterSettings.get(REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING); clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + clusterSettings.addSettingsUpdateConsumer(REMOTE_INDEX_METADATA_PATH_TYPE_SETTING, this::setPathTypeSetting); + clusterSettings.addSettingsUpdateConsumer(REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING, this::setPathHashAlgoSetting); } /** @@ -127,4 +159,20 @@ protected ActionListener getWrappedReadListener( ex -> listener.onFailure(new RemoteStateTransferException("Download failed for " + component, remoteEntity, ex)) ); } + + private void setPathTypeSetting(RemoteStoreEnums.PathType pathType) { + this.pathType = pathType; + } + + private void setPathHashAlgoSetting(RemoteStoreEnums.PathHashAlgorithm pathHashAlgo) { + this.pathHashAlgo = pathHashAlgo; + } + + protected RemoteStoreEnums.PathType getPathTypeSetting() { + return pathType; + } + + protected RemoteStoreEnums.PathHashAlgorithm getPathHashAlgoSetting() { + return pathHashAlgo; + } } diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java index 5308f92c633b1..d9384ac1c1b84 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java @@ -9,6 +9,7 @@ package org.opensearch.gateway.remote.model; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.io.Streams; import org.opensearch.common.remote.AbstractClusterMetadataWriteableBlobEntity; import org.opensearch.common.remote.BlobPathParameters; @@ -17,6 +18,8 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; import org.opensearch.gateway.remote.RemoteClusterStateUtils; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; @@ -41,15 +44,24 @@ public class RemoteIndexMetadata extends AbstractClusterMetadataWriteableBlobEnt public static final String INDEX = "index"; private IndexMetadata indexMetadata; + private RemoteStoreEnums.PathType pathType; + private RemoteStoreEnums.PathHashAlgorithm pathHashAlgo; + private String fixedPrefix; public RemoteIndexMetadata( final IndexMetadata indexMetadata, final String clusterUUID, final Compressor compressor, - final NamedXContentRegistry namedXContentRegistry + final NamedXContentRegistry namedXContentRegistry, + final RemoteStoreEnums.PathType pathType, + final RemoteStoreEnums.PathHashAlgorithm pathHashAlgo, + final String fixedPrefix ) { super(clusterUUID, compressor, namedXContentRegistry); this.indexMetadata = indexMetadata; + this.pathType = pathType; + this.pathHashAlgo = pathHashAlgo; + this.fixedPrefix = fixedPrefix; } public RemoteIndexMetadata( @@ -86,6 +98,22 @@ public String generateBlobFileName() { return blobFileName; } + @Override + public BlobPath getPrefixedPath(BlobPath blobPath) { + if (pathType == null) { + return blobPath; + } + assert pathHashAlgo != null; + return pathType.path( + RemoteStorePathStrategy.BasePathInput.builder() + .fixedPrefix(fixedPrefix) + .basePath(blobPath) + .indexUUID(indexMetadata.getIndexUUID()) + .build(), + pathHashAlgo + ); + } + @Override public UploadedMetadata getUploadedMetadata() { assert blobName != null; diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java index 912861ffd8370..5a869c266fbdc 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java @@ -15,6 +15,7 @@ import org.opensearch.common.remote.RemoteWriteableEntityBlobStore; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Setting.Property; import org.opensearch.gateway.remote.RemoteClusterStateUtils; import org.opensearch.gateway.remote.routingtable.RemoteIndexRoutingTable; import org.opensearch.index.remote.RemoteStoreEnums; @@ -58,8 +59,19 @@ public class RemoteRoutingTableBlobStore CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX = Setting.simpleString( + "cluster.remote_store.routing_table.path.prefix", + "", + Property.NodeScope, + Property.Final + ); + private RemoteStoreEnums.PathType pathType; private RemoteStoreEnums.PathHashAlgorithm pathHashAlgo; + private String pathPrefix; public RemoteRoutingTableBlobStore( BlobStoreTransferService blobStoreTransferService, @@ -79,6 +91,7 @@ public RemoteRoutingTableBlobStore( ); this.pathType = clusterSettings.get(REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING); this.pathHashAlgo = clusterSettings.get(REMOTE_ROUTING_TABLE_PATH_HASH_ALGO_SETTING); + this.pathPrefix = clusterSettings.get(CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX); clusterSettings.addSettingsUpdateConsumer(REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING, this::setPathTypeSetting); clusterSettings.addSettingsUpdateConsumer(REMOTE_ROUTING_TABLE_PATH_HASH_ALGO_SETTING, this::setPathHashAlgoSetting); } @@ -90,6 +103,7 @@ public BlobPath getBlobPathForUpload(final RemoteWriteableBlobEntity(listener, latch) ); latch.await(); @@ -132,7 +136,7 @@ public void testGetAsyncWriteRunnable_IOFailure() throws Exception { remoteIndexMetadataManager.writeAsync( INDEX, - new RemoteIndexMetadata(indexMetadata, "cluster-uuid", compressor, null), + new RemoteIndexMetadata(indexMetadata, "cluster-uuid", compressor, null, null, null, null), new LatchedActionListener<>(listener, latch) ); latch.await(); @@ -182,6 +186,31 @@ public void testGetAsyncReadRunnable_IOFailure() throws Exception { assertTrue(listener.getFailure() instanceof RemoteStateTransferException); } + public void testRemoteIndexMetadataPathTypeSetting() { + // Assert the default is HASHED_PREFIX + assertEquals(HASHED_PREFIX.toString(), remoteIndexMetadataManager.getPathTypeSetting().toString()); + + Settings newSettings = Settings.builder() + .put("cluster.remote_store.index_metadata.path_type", RemoteStoreEnums.PathType.FIXED.toString()) + .build(); + clusterSettings.applySettings(newSettings); + assertEquals(RemoteStoreEnums.PathType.FIXED.toString(), remoteIndexMetadataManager.getPathTypeSetting().toString()); + } + + public void testRemoteIndexMetadataHashAlgoSetting() { + // Assert the default is FNV_1A_BASE64 + assertEquals(FNV_1A_BASE64.toString(), remoteIndexMetadataManager.getPathHashAlgoSetting().toString()); + + Settings newSettings = Settings.builder() + .put("cluster.remote_store.index_metadata.path_hash_algo", RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString()) + .build(); + clusterSettings.applySettings(newSettings); + assertEquals( + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString(), + remoteIndexMetadataManager.getPathHashAlgoSetting().toString() + ); + } + private IndexMetadata getIndexMetadata(String name, @Nullable Boolean writeIndex, String... aliases) { IndexMetadata.Builder builder = IndexMetadata.builder(name) .settings( diff --git a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java index 7f9c3fdbae91b..9923f23875af0 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java @@ -23,6 +23,8 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; import org.opensearch.gateway.remote.RemoteClusterStateUtils; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.indices.IndicesModule; @@ -93,7 +95,15 @@ public void tearDown() throws Exception { public void testClusterUUID() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThat(remoteObjectForUpload.clusterUUID(), is(clusterUUID)); RemoteIndexMetadata remoteObjectForDownload = new RemoteIndexMetadata( @@ -107,7 +117,15 @@ public void testClusterUUID() { public void testFullBlobName() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThat(remoteObjectForUpload.getFullBlobName(), nullValue()); RemoteIndexMetadata remoteObjectForDownload = new RemoteIndexMetadata( @@ -121,7 +139,15 @@ public void testFullBlobName() { public void testBlobFileName() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThat(remoteObjectForUpload.getBlobFileName(), nullValue()); RemoteIndexMetadata remoteObjectForDownload = new RemoteIndexMetadata( @@ -135,7 +161,15 @@ public void testBlobFileName() { public void testBlobPathParameters() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); BlobPathParameters params = remoteObjectForUpload.getBlobPathParameters(); assertThat(params.getPathTokens(), is(List.of(INDEX, indexMetadata.getIndexUUID()))); assertThat(params.getFilePrefix(), is("metadata")); @@ -143,7 +177,15 @@ public void testBlobPathParameters() { public void testGenerateBlobFileName() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); String blobFileName = remoteObjectForUpload.generateBlobFileName(); String[] nameTokens = blobFileName.split(RemoteClusterStateUtils.DELIMITER); assertThat(nameTokens[0], is("metadata")); @@ -154,7 +196,15 @@ public void testGenerateBlobFileName() { public void testGetUploadedMetadata() throws IOException { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThrows(AssertionError.class, remoteObjectForUpload::getUploadedMetadata); remoteObjectForUpload.setFullBlobName(new BlobPath().add(TEST_BLOB_PATH)); UploadedMetadata uploadedMetadata = remoteObjectForUpload.getUploadedMetadata(); @@ -163,7 +213,15 @@ public void testGetUploadedMetadata() throws IOException { public void testSerDe() throws IOException { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); try (InputStream inputStream = remoteObjectForUpload.serialize()) { assertThat(inputStream.available(), greaterThan(0)); IndexMetadata readIndexMetadata = remoteObjectForUpload.deserialize(inputStream); @@ -171,6 +229,25 @@ public void testSerDe() throws IOException { } } + public void testPrefixedPath() { + IndexMetadata indexMetadata = getIndexMetadata(); + String fixedPrefix = "*"; + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_COMPOSITE_1, + fixedPrefix + ); + String testPath = "test-path"; + String expectedPath = fixedPrefix + "410100110100101/test-path/index-uuid/"; + BlobPath prefixedPath = remoteObjectForUpload.getPrefixedPath(BlobPath.cleanPath().add(testPath)); + assertThat(prefixedPath.buildAsString(), is(expectedPath)); + + } + private IndexMetadata getIndexMetadata() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder()