From ddfdd40b167b7cb6ddaf682997c44859d62e6379 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 4 Oct 2024 08:34:24 +0200 Subject: [PATCH] Reduce footprint of codec instances further (#114072) Lots of effectively singleton objects here and fields that can be made static, saves a little more on the per-index overhead and might reveal further simplifications. --- .../codec/tsdb/internal/DecodeBenchmark.java | 3 ++- .../index/codec/Elasticsearch814Codec.java | 2 +- .../index/codec/Elasticsearch816Codec.java | 15 +++++++-------- .../index/codec/tsdb/DocValuesForUtil.java | 17 +++++------------ .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 4 ++-- .../codec/tsdb/ES87TSDBDocValuesProducer.java | 2 +- .../codec/vectors/ES813FlatVectorFormat.java | 2 +- .../ES814ScalarQuantizedVectorsFormat.java | 6 ++++-- .../codec/vectors/ES815BitFlatVectorFormat.java | 2 +- .../vectors/ES815BitFlatVectorsFormat.java | 2 +- .../vectors/ES815HnswBitVectorsFormat.java | 2 +- .../codec/zstd/Zstd814StoredFieldsFormat.java | 8 +++++++- .../index/codec/tsdb/DocValuesForUtilTests.java | 3 +-- 13 files changed, 34 insertions(+), 34 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java index 284324b3d9206..b8f0a11e21c8f 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java @@ -12,6 +12,7 @@ import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.DataOutput; +import org.elasticsearch.index.codec.tsdb.DocValuesForUtil; import org.openjdk.jmh.infra.Blackhole; import java.io.IOException; @@ -43,7 +44,7 @@ public void setupInvocation(int bitsPerValue) { @Override public void benchmark(int bitsPerValue, Blackhole bh) throws IOException { - forUtil.decode(bitsPerValue, this.dataInput, this.output); + DocValuesForUtil.decode(bitsPerValue, this.dataInput, this.output); bh.consume(this.output); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java index f3d758f4fc8b7..ae372ea8194bc 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java @@ -67,7 +67,7 @@ public Elasticsearch814Codec() { */ public Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode mode) { super("Elasticsearch814", lucene99Codec); - this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode); + this.storedFieldsFormat = mode.getFormat(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java index 00711c7ecc306..27ff19a9d8e40 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java @@ -28,9 +28,13 @@ */ public class Elasticsearch816Codec extends CodecService.DeduplicateFieldInfosCodec { + private static final Lucene912Codec LUCENE_912_CODEC = new Lucene912Codec(); + private static final PostingsFormat defaultPostingsFormat = new Lucene912PostingsFormat(); + private static final DocValuesFormat defaultDVFormat = new Lucene90DocValuesFormat(); + private static final KnnVectorsFormat defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); + private final StoredFieldsFormat storedFieldsFormat; - private final PostingsFormat defaultPostingsFormat; private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override public PostingsFormat getPostingsFormatForField(String field) { @@ -38,7 +42,6 @@ public PostingsFormat getPostingsFormatForField(String field) { } }; - private final DocValuesFormat defaultDVFormat; private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { @@ -46,7 +49,6 @@ public DocValuesFormat getDocValuesFormatForField(String field) { } }; - private final KnnVectorsFormat defaultKnnVectorsFormat; private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() { @Override public KnnVectorsFormat getKnnVectorsFormatForField(String field) { @@ -64,11 +66,8 @@ public Elasticsearch816Codec() { * worse space-efficiency or vice-versa. */ public Elasticsearch816Codec(Zstd814StoredFieldsFormat.Mode mode) { - super("Elasticsearch816", new Lucene912Codec()); - this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode); - this.defaultPostingsFormat = new Lucene912PostingsFormat(); - this.defaultDVFormat = new Lucene90DocValuesFormat(); - this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); + super("Elasticsearch816", LUCENE_912_CODEC); + this.storedFieldsFormat = mode.getFormat(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java index 671931ac7154a..648913098ff0d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java @@ -21,17 +21,10 @@ public class DocValuesForUtil { private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE; private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE; private static final int BITS_IN_SEVEN_BYTES = 7 * Byte.SIZE; - private final int blockSize; - private final byte[] encoded; + private static final int blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; + private final byte[] encoded = new byte[1024]; - public DocValuesForUtil() { - this(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); - } - - private DocValuesForUtil(int blockSize) { - this.blockSize = blockSize; - this.encoded = new byte[1024]; - } + public DocValuesForUtil() {} public static int roundBits(int bitsPerValue) { if (bitsPerValue > 24 && bitsPerValue <= 32) { @@ -74,7 +67,7 @@ private void encodeFiveSixOrSevenBytesPerValue(long[] in, int bitsPerValue, fina out.writeBytes(this.encoded, bytesPerValue * in.length); } - public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException { + public static void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException { if (bitsPerValue <= 24) { ForUtil.decode(bitsPerValue, in, out); } else if (bitsPerValue <= 32) { @@ -88,7 +81,7 @@ public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOEx } } - private void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException { + private static void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException { // NOTE: we expect multibyte values to be written "least significant byte" first int bytesPerValue = bitsPerValue / Byte.SIZE; long mask = (1L << bitsPerValue) - 1; diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index f152a0b0601a2..4e95ce34dc410 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -275,7 +275,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException Arrays.fill(out, runLen, out.length, v2); } else if (encoding == 2) { // bit-packed - forUtil.decode(bitsPerOrd, in, out); + DocValuesForUtil.decode(bitsPerOrd, in, out); } else if (encoding == 3) { // cycle encoding int cycleLength = (int) v1; @@ -299,7 +299,7 @@ void decode(DataInput in, long[] out) throws IOException { final int bitsPerValue = token >>> 3; if (bitsPerValue != 0) { - forUtil.decode(bitsPerValue, in, out); + DocValuesForUtil.decode(bitsPerValue, in, out); } else { Arrays.fill(out, 0L); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index a887516e5e7cc..e3c2daddba80e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -355,7 +355,7 @@ public TermsEnum termsEnum() throws IOException { } } - private abstract class BaseSortedSetDocValues extends SortedSetDocValues { + private abstract static class BaseSortedSetDocValues extends SortedSetDocValues { final SortedSetEntry entry; final IndexInput data; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java index 7a8d09c02ba3b..b1e91ad75e9a2 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java @@ -36,7 +36,7 @@ public class ES813FlatVectorFormat extends KnnVectorsFormat { static final String NAME = "ES813FlatVectorFormat"; - private final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE); + private static final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE); /** * Sole constructor diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 4313aa40cf13e..4bf396e8d5ad1 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -49,6 +49,10 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat { private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE); + static final FlatVectorsScorer flatVectorScorer = new ESFlatVectorsScorer( + new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE) + ); + /** The minimum confidence interval */ private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; @@ -60,7 +64,6 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat { * calculated as `1-1/(vector_dimensions + 1)` */ public final Float confidenceInterval; - final FlatVectorsScorer flatVectorScorer; private final byte bits; private final boolean compress; @@ -83,7 +86,6 @@ public ES814ScalarQuantizedVectorsFormat(Float confidenceInterval, int bits, boo throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits); } this.confidenceInterval = confidenceInterval; - this.flatVectorScorer = new ESFlatVectorsScorer(new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE)); this.bits = (byte) bits; this.compress = compress; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java index 2df0757a8b8ee..af771b6a27f19 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java @@ -22,7 +22,7 @@ public class ES815BitFlatVectorFormat extends KnnVectorsFormat { static final String NAME = "ES815BitFlatVectorFormat"; - private final FlatVectorsFormat format = new ES815BitFlatVectorsFormat(); + private static final FlatVectorsFormat format = new ES815BitFlatVectorsFormat(); /** * Sole constructor diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java index f1ae4e3fdeded..5969c9d5db6d7 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java @@ -27,7 +27,7 @@ class ES815BitFlatVectorsFormat extends FlatVectorsFormat { - private final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE); + private static final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE); protected ES815BitFlatVectorsFormat() { super("ES815BitFlatVectorsFormat"); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java index 55271719a4574..5e4656ea94c5b 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java @@ -30,7 +30,7 @@ public class ES815HnswBitVectorsFormat extends KnnVectorsFormat { private final int maxConn; private final int beamWidth; - private final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat(); + private static final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat(); public ES815HnswBitVectorsFormat() { this(16, 100); diff --git a/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java index 84871b5c811dd..6aa77b7222696 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java @@ -52,17 +52,23 @@ public enum Mode { BEST_COMPRESSION(3, BEST_COMPRESSION_BLOCK_SIZE, 2048); final int level, blockSizeInBytes, blockDocCount; + final Zstd814StoredFieldsFormat format; Mode(int level, int blockSizeInBytes, int blockDocCount) { this.level = level; this.blockSizeInBytes = blockSizeInBytes; this.blockDocCount = blockDocCount; + this.format = new Zstd814StoredFieldsFormat(this); + } + + public Zstd814StoredFieldsFormat getFormat() { + return format; } } private final Mode mode; - public Zstd814StoredFieldsFormat(Mode mode) { + private Zstd814StoredFieldsFormat(Mode mode) { super("ZstdStoredFields814", new ZstdCompressionMode(mode.level), mode.blockSizeInBytes, mode.blockDocCount, 10); this.mode = mode; } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java index 72295743608c3..7da5463ea46ff 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java @@ -70,11 +70,10 @@ public void testEncodeDecode() throws IOException { { // decode IndexInput in = d.openInput("test.bin", IOContext.READONCE); - final DocValuesForUtil forUtil = new DocValuesForUtil(); final long[] restored = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; for (int i = 0; i < iterations; ++i) { final int bitsPerValue = in.readByte(); - forUtil.decode(bitsPerValue, in, restored); + DocValuesForUtil.decode(bitsPerValue, in, restored); assertArrayEquals( Arrays.toString(restored), ArrayUtil.copyOfSubArray(