Merge branch 'main' into fixing_expected_doc_order_in_rrf_and_rank_tests

pmpailis · Oct 4, 2024 · 2a6270e · 2a6270e
2 parents 876c837 + ddfdd40
commit 2a6270e
Show file tree

Hide file tree

Showing 21 changed files with 130 additions and 45 deletions.
diff --git a/.../src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java b/.../src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java
@@ -12,6 +12,7 @@
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.DataOutput;
+import org.elasticsearch.index.codec.tsdb.DocValuesForUtil;
 import org.openjdk.jmh.infra.Blackhole;
 
 import java.io.IOException;
@@ -43,7 +44,7 @@ public void setupInvocation(int bitsPerValue) {
 
     @Override
     public void benchmark(int bitsPerValue, Blackhole bh) throws IOException {
-        forUtil.decode(bitsPerValue, this.dataInput, this.output);
+        DocValuesForUtil.decode(bitsPerValue, this.dataInput, this.output);
         bh.consume(this.output);
     }
 }
diff --git a/docs/changelog/113989.yaml b/docs/changelog/113989.yaml
@@ -0,0 +1,5 @@
+pr: 113989
+summary: Add `max_multipart_parts` setting to S3 repository
+area: Snapshot/Restore
+type: enhancement
+issues: []
diff --git a/docs/reference/snapshot-restore/repository-s3.asciidoc b/docs/reference/snapshot-restore/repository-s3.asciidoc
@@ -261,9 +261,11 @@ multiple deployments may share the same bucket.
 
 `chunk_size`::
 
-    (<<byte-units,byte value>>) Big files can be broken down into chunks during snapshotting if needed.
-    Specify the chunk size as a value and unit, for example:
-    `1TB`, `1GB`, `10MB`. Defaults to the maximum size of a blob in the S3 which is `5TB`.
+    (<<byte-units,byte value>>) The maximum size of object that {es} will write to the repository
+    when creating a snapshot. Files which are larger than `chunk_size` will be chunked into several
+    smaller objects. {es} may also split a file across multiple objects to satisfy other constraints
+    such as the `max_multipart_parts` limit. Defaults to `5TB` which is the
+    https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum size of an object in AWS S3].
 
 `compress`::
 
@@ -292,6 +294,14 @@ include::repository-shared-settings.asciidoc[]
     size allowed by S3. Defaults to `100mb` or `5%` of JVM heap, whichever is
     smaller.
 
+`max_multipart_parts` ::
+
+    (<<number,integer>>) The maximum number of parts that {es} will write during a multipart upload
+    of a single object. Files which are larger than `buffer_size × max_multipart_parts` will be
+    chunked into several smaller objects. {es} may also split a file across multiple objects to
+    satisfy other constraints such as the `chunk_size` limit. Defaults to `10000` which is the
+    https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum number of parts in a multipart upload in AWS S3].
+
 `canned_acl`::
 
     The S3 repository supports all

diff --git a/...ta-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java b/...ta-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java
@@ -11,6 +11,7 @@
 import org.elasticsearch.action.datastreams.GetDataStreamAction;
 import org.elasticsearch.action.support.IndicesOptions;
 import org.elasticsearch.client.internal.node.NodeClient;
+import org.elasticsearch.cluster.metadata.DataStream;
 import org.elasticsearch.cluster.metadata.DataStreamLifecycle;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.util.set.Sets;
@@ -35,14 +36,14 @@ public class RestGetDataStreamsAction extends BaseRestHandler {
             Set.of(
                 "name",
                 "include_defaults",
-                "timeout",
                 "master_timeout",
                 IndicesOptions.WildcardOptions.EXPAND_WILDCARDS,
                 IndicesOptions.ConcreteTargetOptions.IGNORE_UNAVAILABLE,
                 IndicesOptions.WildcardOptions.ALLOW_NO_INDICES,
                 IndicesOptions.GatekeeperOptions.IGNORE_THROTTLED,
                 "verbose"
-            )
+            ),
+            DataStream.isFailureStoreFeatureFlagEnabled() ? Set.of(IndicesOptions.FailureStoreOptions.FAILURE_STORE) : Set.of()
         )
     );
 

diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java
@@ -140,6 +140,11 @@ class S3Repository extends MeteredBlobStoreRepository {
         MAX_FILE_SIZE_USING_MULTIPART
     );
 
+    /**
+     * Maximum parts number for multipart upload. (see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html)
+     */
+    static final Setting<Integer> MAX_MULTIPART_PARTS = Setting.intSetting("max_multipart_parts", 10_000, 1, 10_000);
+
     /**
      * Sets the S3 storage class type for the backup files. Values may be standard, reduced_redundancy,
      * standard_ia, onezone_ia and intelligent_tiering. Defaults to standard.
@@ -253,7 +258,9 @@ class S3Repository extends MeteredBlobStoreRepository {
         }
 
         this.bufferSize = BUFFER_SIZE_SETTING.get(metadata.settings());
-        this.chunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
+        var maxChunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
+        var maxPartsNum = MAX_MULTIPART_PARTS.get(metadata.settings());
+        this.chunkSize = objectSizeLimit(maxChunkSize, bufferSize, maxPartsNum);
 
         // We make sure that chunkSize is bigger or equal than/to bufferSize
         if (this.chunkSize.getBytes() < bufferSize.getBytes()) {
@@ -302,6 +309,20 @@ private static Map<String, String> buildLocation(RepositoryMetadata metadata) {
         return Map.of("base_path", BASE_PATH_SETTING.get(metadata.settings()), "bucket", BUCKET_SETTING.get(metadata.settings()));
     }
 
+    /**
+     * Calculates S3 object size limit based on 2 constraints: maximum object(chunk) size
+     * and maximum number of parts for multipart upload.
+     * https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
+     *
+     * @param chunkSize s3 object size
+     * @param bufferSize s3 multipart upload part size
+     * @param maxPartsNum s3 multipart upload max parts number
+     */
+    private static ByteSizeValue objectSizeLimit(ByteSizeValue chunkSize, ByteSizeValue bufferSize, int maxPartsNum) {
+        var bytes = Math.min(chunkSize.getBytes(), bufferSize.getBytes() * maxPartsNum);
+        return ByteSizeValue.ofBytes(bytes);
+    }
+
     /**
      * Holds a reference to delayed repository operation {@link Scheduler.Cancellable} so it can be cancelled should the repository be
      * closed concurrently.

diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java
@@ -175,4 +175,37 @@ public void testAnalysisFailureDetail() {
         }
     }
 
+    // ensures that chunkSize is limited to chunk_size setting, when buffer_size * parts_num is bigger
+    public void testChunkSizeLimit() {
+        var meta = new RepositoryMetadata(
+            "dummy-repo",
+            "mock",
+            Settings.builder()
+                .put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
+                .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "1GB")
+                .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
+                .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000) // ~1TB
+                .build()
+        );
+        try (var repo = createS3Repo(meta)) {
+            assertEquals(ByteSizeValue.ofGb(1), repo.chunkSize());
+        }
+    }
+
+    // ensures that chunkSize is limited to buffer_size * parts_num, when chunk_size setting is bigger
+    public void testPartsNumLimit() {
+        var meta = new RepositoryMetadata(
+            "dummy-repo",
+            "mock",
+            Settings.builder()
+                .put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
+                .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "5TB")
+                .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
+                .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000)
+                .build()
+        );
+        try (var repo = createS3Repo(meta)) {
+            assertEquals(ByteSizeValue.ofMb(1_000_000), repo.chunkSize());
+        }
+    }
 }
diff --git a/muted-tests.yml b/muted-tests.yml
@@ -354,6 +354,9 @@ tests:
 - class: org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilderIT
   method: testRRFWithCollapse
   issue: https://github.com/elastic/elasticsearch/issues/114074
+- class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT
+  method: testPutE5Small_withPlatformSpecificVariant
+  issue: https://github.com/elastic/elasticsearch/issues/113950
 
 # Examples:
 #

diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle
@@ -57,9 +57,4 @@ tasks.named("precommit").configure {
 tasks.named("yamlRestCompatTestTransform").configure({task ->
     task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility")
     task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility")
-    task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "skip test to submit #113690")
-    task.skipTest("indices.create/20_synthetic_source/disabled object", "temporary until backported")
-    task.skipTest("indices.create/20_synthetic_source/disabled object contains array", "temporary until backported")
-    task.skipTest("indices.create/20_synthetic_source/object with dynamic override", "temporary until backported")
-    task.skipTest("indices.create/20_synthetic_source/disabled root object", "temporary until backported")
 })
diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java
@@ -67,7 +67,7 @@ public Elasticsearch814Codec() {
      */
     public Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode mode) {
         super("Elasticsearch814", lucene99Codec);
-        this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode);
+        this.storedFieldsFormat = mode.getFormat();
     }
 
     @Override

diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java
@@ -28,25 +28,27 @@
  */
 public class Elasticsearch816Codec extends CodecService.DeduplicateFieldInfosCodec {
 
+    private static final Lucene912Codec LUCENE_912_CODEC = new Lucene912Codec();
+    private static final PostingsFormat defaultPostingsFormat = new Lucene912PostingsFormat();
+    private static final DocValuesFormat defaultDVFormat = new Lucene90DocValuesFormat();
+    private static final KnnVectorsFormat defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
+
     private final StoredFieldsFormat storedFieldsFormat;
 
-    private final PostingsFormat defaultPostingsFormat;
     private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
         @Override
         public PostingsFormat getPostingsFormatForField(String field) {
             return Elasticsearch816Codec.this.getPostingsFormatForField(field);
         }
     };
 
-    private final DocValuesFormat defaultDVFormat;
     private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
         @Override
         public DocValuesFormat getDocValuesFormatForField(String field) {
             return Elasticsearch816Codec.this.getDocValuesFormatForField(field);
         }
     };
 
-    private final KnnVectorsFormat defaultKnnVectorsFormat;
     private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
         @Override
         public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
@@ -64,11 +66,8 @@ public Elasticsearch816Codec() {
      * worse space-efficiency or vice-versa.
      */
     public Elasticsearch816Codec(Zstd814StoredFieldsFormat.Mode mode) {
-        super("Elasticsearch816", new Lucene912Codec());
-        this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode);
-        this.defaultPostingsFormat = new Lucene912PostingsFormat();
-        this.defaultDVFormat = new Lucene90DocValuesFormat();
-        this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
+        super("Elasticsearch816", LUCENE_912_CODEC);
+        this.storedFieldsFormat = mode.getFormat();
     }
 
     @Override

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java
@@ -21,17 +21,10 @@ public class DocValuesForUtil {
     private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE;
     private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE;
     private static final int BITS_IN_SEVEN_BYTES = 7 * Byte.SIZE;
-    private final int blockSize;
-    private final byte[] encoded;
+    private static final int blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
+    private final byte[] encoded = new byte[1024];
 
-    public DocValuesForUtil() {
-        this(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
-    }
-
-    private DocValuesForUtil(int blockSize) {
-        this.blockSize = blockSize;
-        this.encoded = new byte[1024];
-    }
+    public DocValuesForUtil() {}
 
     public static int roundBits(int bitsPerValue) {
         if (bitsPerValue > 24 && bitsPerValue <= 32) {
@@ -74,7 +67,7 @@ private void encodeFiveSixOrSevenBytesPerValue(long[] in, int bitsPerValue, fina
         out.writeBytes(this.encoded, bytesPerValue * in.length);
     }
 
-    public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException {
+    public static void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException {
         if (bitsPerValue <= 24) {
             ForUtil.decode(bitsPerValue, in, out);
         } else if (bitsPerValue <= 32) {
@@ -88,7 +81,7 @@ public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOEx
         }
     }
 
-    private void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException {
+    private static void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException {
         // NOTE: we expect multibyte values to be written "least significant byte" first
         int bytesPerValue = bitsPerValue / Byte.SIZE;
         long mask = (1L << bitsPerValue) - 1;

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java
@@ -275,7 +275,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
             Arrays.fill(out, runLen, out.length, v2);
         } else if (encoding == 2) {
             // bit-packed
-            forUtil.decode(bitsPerOrd, in, out);
+            DocValuesForUtil.decode(bitsPerOrd, in, out);
         } else if (encoding == 3) {
             // cycle encoding
             int cycleLength = (int) v1;
@@ -299,7 +299,7 @@ void decode(DataInput in, long[] out) throws IOException {
         final int bitsPerValue = token >>> 3;
 
         if (bitsPerValue != 0) {
-            forUtil.decode(bitsPerValue, in, out);
+            DocValuesForUtil.decode(bitsPerValue, in, out);
         } else {
             Arrays.fill(out, 0L);
         }

diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java
@@ -355,7 +355,7 @@ public TermsEnum termsEnum() throws IOException {
         }
     }
 
-    private abstract class BaseSortedSetDocValues extends SortedSetDocValues {
+    private abstract static class BaseSortedSetDocValues extends SortedSetDocValues {
 
         final SortedSetEntry entry;
         final IndexInput data;

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java
@@ -36,7 +36,7 @@ public class ES813FlatVectorFormat extends KnnVectorsFormat {
 
     static final String NAME = "ES813FlatVectorFormat";
 
-    private final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);
+    private static final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);
 
     /**
      * Sole constructor

diff --git a/...rc/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/...rc/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java
@@ -49,6 +49,10 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
 
     private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);
 
+    static final FlatVectorsScorer flatVectorScorer = new ESFlatVectorsScorer(
+        new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE)
+    );
+
     /** The minimum confidence interval */
     private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f;
 
@@ -60,7 +64,6 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
      * calculated as `1-1/(vector_dimensions + 1)`
      */
     public final Float confidenceInterval;
-    final FlatVectorsScorer flatVectorScorer;
 
     private final byte bits;
     private final boolean compress;
@@ -83,7 +86,6 @@ public ES814ScalarQuantizedVectorsFormat(Float confidenceInterval, int bits, boo
             throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits);
         }
         this.confidenceInterval = confidenceInterval;
-        this.flatVectorScorer = new ESFlatVectorsScorer(new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE));
         this.bits = (byte) bits;
         this.compress = compress;
     }

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java
@@ -22,7 +22,7 @@ public class ES815BitFlatVectorFormat extends KnnVectorsFormat {
 
     static final String NAME = "ES815BitFlatVectorFormat";
 
-    private final FlatVectorsFormat format = new ES815BitFlatVectorsFormat();
+    private static final FlatVectorsFormat format = new ES815BitFlatVectorsFormat();
 
     /**
      * Sole constructor

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java
@@ -27,7 +27,7 @@
 
 class ES815BitFlatVectorsFormat extends FlatVectorsFormat {
 
-    private final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE);
+    private static final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE);
 
     protected ES815BitFlatVectorsFormat() {
         super("ES815BitFlatVectorsFormat");

diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java
@@ -30,7 +30,7 @@ public class ES815HnswBitVectorsFormat extends KnnVectorsFormat {
     private final int maxConn;
     private final int beamWidth;
 
-    private final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat();
+    private static final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat();
 
     public ES815HnswBitVectorsFormat() {
         this(16, 100);

diff --git a/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java
@@ -52,17 +52,23 @@ public enum Mode {
         BEST_COMPRESSION(3, BEST_COMPRESSION_BLOCK_SIZE, 2048);
 
         final int level, blockSizeInBytes, blockDocCount;
+        final Zstd814StoredFieldsFormat format;
 
         Mode(int level, int blockSizeInBytes, int blockDocCount) {
             this.level = level;
             this.blockSizeInBytes = blockSizeInBytes;
             this.blockDocCount = blockDocCount;
+            this.format = new Zstd814StoredFieldsFormat(this);
+        }
+
+        public Zstd814StoredFieldsFormat getFormat() {
+            return format;
         }
     }
 
     private final Mode mode;
 
-    public Zstd814StoredFieldsFormat(Mode mode) {
+    private Zstd814StoredFieldsFormat(Mode mode) {
         super("ZstdStoredFields814", new ZstdCompressionMode(mode.level), mode.blockSizeInBytes, mode.blockDocCount, 10);
         this.mode = mode;
     }