Skip to content

Commit

Permalink
Merge branch 'main' into fixing_expected_doc_order_in_rrf_and_rank_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pmpailis authored Oct 4, 2024
2 parents 876c837 + ddfdd40 commit 2a6270e
Show file tree
Hide file tree
Showing 21 changed files with 130 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.DataOutput;
import org.elasticsearch.index.codec.tsdb.DocValuesForUtil;
import org.openjdk.jmh.infra.Blackhole;

import java.io.IOException;
Expand Down Expand Up @@ -43,7 +44,7 @@ public void setupInvocation(int bitsPerValue) {

@Override
public void benchmark(int bitsPerValue, Blackhole bh) throws IOException {
forUtil.decode(bitsPerValue, this.dataInput, this.output);
DocValuesForUtil.decode(bitsPerValue, this.dataInput, this.output);
bh.consume(this.output);
}
}
5 changes: 5 additions & 0 deletions docs/changelog/113989.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113989
summary: Add `max_multipart_parts` setting to S3 repository
area: Snapshot/Restore
type: enhancement
issues: []
16 changes: 13 additions & 3 deletions docs/reference/snapshot-restore/repository-s3.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,11 @@ multiple deployments may share the same bucket.

`chunk_size`::

(<<byte-units,byte value>>) Big files can be broken down into chunks during snapshotting if needed.
Specify the chunk size as a value and unit, for example:
`1TB`, `1GB`, `10MB`. Defaults to the maximum size of a blob in the S3 which is `5TB`.
(<<byte-units,byte value>>) The maximum size of object that {es} will write to the repository
when creating a snapshot. Files which are larger than `chunk_size` will be chunked into several
smaller objects. {es} may also split a file across multiple objects to satisfy other constraints
such as the `max_multipart_parts` limit. Defaults to `5TB` which is the
https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum size of an object in AWS S3].

`compress`::

Expand Down Expand Up @@ -292,6 +294,14 @@ include::repository-shared-settings.asciidoc[]
size allowed by S3. Defaults to `100mb` or `5%` of JVM heap, whichever is
smaller.

`max_multipart_parts` ::

(<<number,integer>>) The maximum number of parts that {es} will write during a multipart upload
of a single object. Files which are larger than `buffer_size × max_multipart_parts` will be
chunked into several smaller objects. {es} may also split a file across multiple objects to
satisfy other constraints such as the `chunk_size` limit. Defaults to `10000` which is the
https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum number of parts in a multipart upload in AWS S3].

`canned_acl`::

The S3 repository supports all
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.elasticsearch.action.datastreams.GetDataStreamAction;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.client.internal.node.NodeClient;
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.cluster.metadata.DataStreamLifecycle;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.util.set.Sets;
Expand All @@ -35,14 +36,14 @@ public class RestGetDataStreamsAction extends BaseRestHandler {
Set.of(
"name",
"include_defaults",
"timeout",
"master_timeout",
IndicesOptions.WildcardOptions.EXPAND_WILDCARDS,
IndicesOptions.ConcreteTargetOptions.IGNORE_UNAVAILABLE,
IndicesOptions.WildcardOptions.ALLOW_NO_INDICES,
IndicesOptions.GatekeeperOptions.IGNORE_THROTTLED,
"verbose"
)
),
DataStream.isFailureStoreFeatureFlagEnabled() ? Set.of(IndicesOptions.FailureStoreOptions.FAILURE_STORE) : Set.of()
)
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ class S3Repository extends MeteredBlobStoreRepository {
MAX_FILE_SIZE_USING_MULTIPART
);

/**
* Maximum parts number for multipart upload. (see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html)
*/
static final Setting<Integer> MAX_MULTIPART_PARTS = Setting.intSetting("max_multipart_parts", 10_000, 1, 10_000);

/**
* Sets the S3 storage class type for the backup files. Values may be standard, reduced_redundancy,
* standard_ia, onezone_ia and intelligent_tiering. Defaults to standard.
Expand Down Expand Up @@ -253,7 +258,9 @@ class S3Repository extends MeteredBlobStoreRepository {
}

this.bufferSize = BUFFER_SIZE_SETTING.get(metadata.settings());
this.chunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
var maxChunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
var maxPartsNum = MAX_MULTIPART_PARTS.get(metadata.settings());
this.chunkSize = objectSizeLimit(maxChunkSize, bufferSize, maxPartsNum);

// We make sure that chunkSize is bigger or equal than/to bufferSize
if (this.chunkSize.getBytes() < bufferSize.getBytes()) {
Expand Down Expand Up @@ -302,6 +309,20 @@ private static Map<String, String> buildLocation(RepositoryMetadata metadata) {
return Map.of("base_path", BASE_PATH_SETTING.get(metadata.settings()), "bucket", BUCKET_SETTING.get(metadata.settings()));
}

/**
* Calculates S3 object size limit based on 2 constraints: maximum object(chunk) size
* and maximum number of parts for multipart upload.
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html
*
* @param chunkSize s3 object size
* @param bufferSize s3 multipart upload part size
* @param maxPartsNum s3 multipart upload max parts number
*/
private static ByteSizeValue objectSizeLimit(ByteSizeValue chunkSize, ByteSizeValue bufferSize, int maxPartsNum) {
var bytes = Math.min(chunkSize.getBytes(), bufferSize.getBytes() * maxPartsNum);
return ByteSizeValue.ofBytes(bytes);
}

/**
* Holds a reference to delayed repository operation {@link Scheduler.Cancellable} so it can be cancelled should the repository be
* closed concurrently.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,37 @@ public void testAnalysisFailureDetail() {
}
}

// ensures that chunkSize is limited to chunk_size setting, when buffer_size * parts_num is bigger
public void testChunkSizeLimit() {
var meta = new RepositoryMetadata(
"dummy-repo",
"mock",
Settings.builder()
.put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
.put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "1GB")
.put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
.put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000) // ~1TB
.build()
);
try (var repo = createS3Repo(meta)) {
assertEquals(ByteSizeValue.ofGb(1), repo.chunkSize());
}
}

// ensures that chunkSize is limited to buffer_size * parts_num, when chunk_size setting is bigger
public void testPartsNumLimit() {
var meta = new RepositoryMetadata(
"dummy-repo",
"mock",
Settings.builder()
.put(S3Repository.BUCKET_SETTING.getKey(), "bucket")
.put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "5TB")
.put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB")
.put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000)
.build()
);
try (var repo = createS3Repo(meta)) {
assertEquals(ByteSizeValue.ofMb(1_000_000), repo.chunkSize());
}
}
}
3 changes: 3 additions & 0 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ tests:
- class: org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilderIT
method: testRRFWithCollapse
issue: https://github.com/elastic/elasticsearch/issues/114074
- class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT
method: testPutE5Small_withPlatformSpecificVariant
issue: https://github.com/elastic/elasticsearch/issues/113950

# Examples:
#
Expand Down
5 changes: 0 additions & 5 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,4 @@ tasks.named("precommit").configure {
tasks.named("yamlRestCompatTestTransform").configure({task ->
task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility")
task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility")
task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "skip test to submit #113690")
task.skipTest("indices.create/20_synthetic_source/disabled object", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/disabled object contains array", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/object with dynamic override", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/disabled root object", "temporary until backported")
})
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public Elasticsearch814Codec() {
*/
public Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode mode) {
super("Elasticsearch814", lucene99Codec);
this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode);
this.storedFieldsFormat = mode.getFormat();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,27 @@
*/
public class Elasticsearch816Codec extends CodecService.DeduplicateFieldInfosCodec {

private static final Lucene912Codec LUCENE_912_CODEC = new Lucene912Codec();
private static final PostingsFormat defaultPostingsFormat = new Lucene912PostingsFormat();
private static final DocValuesFormat defaultDVFormat = new Lucene90DocValuesFormat();
private static final KnnVectorsFormat defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();

private final StoredFieldsFormat storedFieldsFormat;

private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch816Codec.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch816Codec.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
Expand All @@ -64,11 +66,8 @@ public Elasticsearch816Codec() {
* worse space-efficiency or vice-versa.
*/
public Elasticsearch816Codec(Zstd814StoredFieldsFormat.Mode mode) {
super("Elasticsearch816", new Lucene912Codec());
this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode);
this.defaultPostingsFormat = new Lucene912PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
super("Elasticsearch816", LUCENE_912_CODEC);
this.storedFieldsFormat = mode.getFormat();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,10 @@ public class DocValuesForUtil {
private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE;
private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE;
private static final int BITS_IN_SEVEN_BYTES = 7 * Byte.SIZE;
private final int blockSize;
private final byte[] encoded;
private static final int blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
private final byte[] encoded = new byte[1024];

public DocValuesForUtil() {
this(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE);
}

private DocValuesForUtil(int blockSize) {
this.blockSize = blockSize;
this.encoded = new byte[1024];
}
public DocValuesForUtil() {}

public static int roundBits(int bitsPerValue) {
if (bitsPerValue > 24 && bitsPerValue <= 32) {
Expand Down Expand Up @@ -74,7 +67,7 @@ private void encodeFiveSixOrSevenBytesPerValue(long[] in, int bitsPerValue, fina
out.writeBytes(this.encoded, bytesPerValue * in.length);
}

public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException {
public static void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException {
if (bitsPerValue <= 24) {
ForUtil.decode(bitsPerValue, in, out);
} else if (bitsPerValue <= 32) {
Expand All @@ -88,7 +81,7 @@ public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOEx
}
}

private void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException {
private static void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException {
// NOTE: we expect multibyte values to be written "least significant byte" first
int bytesPerValue = bitsPerValue / Byte.SIZE;
long mask = (1L << bitsPerValue) - 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
Arrays.fill(out, runLen, out.length, v2);
} else if (encoding == 2) {
// bit-packed
forUtil.decode(bitsPerOrd, in, out);
DocValuesForUtil.decode(bitsPerOrd, in, out);
} else if (encoding == 3) {
// cycle encoding
int cycleLength = (int) v1;
Expand All @@ -299,7 +299,7 @@ void decode(DataInput in, long[] out) throws IOException {
final int bitsPerValue = token >>> 3;

if (bitsPerValue != 0) {
forUtil.decode(bitsPerValue, in, out);
DocValuesForUtil.decode(bitsPerValue, in, out);
} else {
Arrays.fill(out, 0L);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ public TermsEnum termsEnum() throws IOException {
}
}

private abstract class BaseSortedSetDocValues extends SortedSetDocValues {
private abstract static class BaseSortedSetDocValues extends SortedSetDocValues {

final SortedSetEntry entry;
final IndexInput data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class ES813FlatVectorFormat extends KnnVectorsFormat {

static final String NAME = "ES813FlatVectorFormat";

private final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);
private static final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);

/**
* Sole constructor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat {

private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);

static final FlatVectorsScorer flatVectorScorer = new ESFlatVectorsScorer(
new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE)
);

/** The minimum confidence interval */
private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f;

Expand All @@ -60,7 +64,6 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat {
* calculated as `1-1/(vector_dimensions + 1)`
*/
public final Float confidenceInterval;
final FlatVectorsScorer flatVectorScorer;

private final byte bits;
private final boolean compress;
Expand All @@ -83,7 +86,6 @@ public ES814ScalarQuantizedVectorsFormat(Float confidenceInterval, int bits, boo
throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits);
}
this.confidenceInterval = confidenceInterval;
this.flatVectorScorer = new ESFlatVectorsScorer(new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE));
this.bits = (byte) bits;
this.compress = compress;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class ES815BitFlatVectorFormat extends KnnVectorsFormat {

static final String NAME = "ES815BitFlatVectorFormat";

private final FlatVectorsFormat format = new ES815BitFlatVectorsFormat();
private static final FlatVectorsFormat format = new ES815BitFlatVectorsFormat();

/**
* Sole constructor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

class ES815BitFlatVectorsFormat extends FlatVectorsFormat {

private final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE);
private static final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE);

protected ES815BitFlatVectorsFormat() {
super("ES815BitFlatVectorsFormat");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class ES815HnswBitVectorsFormat extends KnnVectorsFormat {
private final int maxConn;
private final int beamWidth;

private final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat();
private static final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat();

public ES815HnswBitVectorsFormat() {
this(16, 100);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,23 @@ public enum Mode {
BEST_COMPRESSION(3, BEST_COMPRESSION_BLOCK_SIZE, 2048);

final int level, blockSizeInBytes, blockDocCount;
final Zstd814StoredFieldsFormat format;

Mode(int level, int blockSizeInBytes, int blockDocCount) {
this.level = level;
this.blockSizeInBytes = blockSizeInBytes;
this.blockDocCount = blockDocCount;
this.format = new Zstd814StoredFieldsFormat(this);
}

public Zstd814StoredFieldsFormat getFormat() {
return format;
}
}

private final Mode mode;

public Zstd814StoredFieldsFormat(Mode mode) {
private Zstd814StoredFieldsFormat(Mode mode) {
super("ZstdStoredFields814", new ZstdCompressionMode(mode.level), mode.blockSizeInBytes, mode.blockDocCount, 10);
this.mode = mode;
}
Expand Down
Loading

0 comments on commit 2a6270e

Please sign in to comment.