Skip to content

Commit

Permalink
Refactor/bbq format (#117847)
Browse files Browse the repository at this point in the history
* Refactor bbq format to be contained in a package

* fixing license headers

* fixing module

* fix style
  • Loading branch information
benwtrent authored Dec 2, 2024
1 parent c54d4b6 commit 6c2f607
Show file tree
Hide file tree
Showing 15 changed files with 45 additions and 31 deletions.
4 changes: 2 additions & 2 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,8 @@
org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat,
org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat,
org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat;
org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat;

provides org.apache.lucene.codecs.Codec
with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.search.VectorScorer;
import org.apache.lucene.util.VectorUtil;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import java.io.IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.VectorUtil;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.index.KnnVectorValues;
Expand All @@ -26,6 +26,8 @@
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;
import org.elasticsearch.simdvec.ESVectorUtil;

import java.io.IOException;
Expand All @@ -35,10 +37,10 @@
import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;

/** Vector scorer over binarized vector values */
public class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer {
class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer {
private final FlatVectorsScorer nonQuantizedDelegate;

public ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) {
ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) {
this.nonQuantizedDelegate = nonQuantizedDelegate;
}

Expand Down Expand Up @@ -144,18 +146,18 @@ public RandomVectorScorerSupplier copy() throws IOException {
}

/** A binarized query representing its quantized form along with factors */
public record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {}
record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {}

/** Vector scorer over binarized vector values */
public static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {
static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {
private final BinaryQueryVector queryVector;
private final BinarizedByteVectorValues targetVectors;
private final VectorSimilarityFunction similarityFunction;

private final float sqrtDimensions;
private final float maxX1;

public BinarizedRandomVectorScorer(
BinarizedRandomVectorScorer(
BinaryQueryVector queryVectors,
BinarizedByteVectorValues targetVectors,
VectorSimilarityFunction similarityFunction
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
Expand All @@ -43,6 +43,7 @@
import org.apache.lucene.util.SuppressForbidden;
import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import java.io.IOException;
import java.util.HashMap;
Expand All @@ -55,7 +56,7 @@
* Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10
*/
@SuppressForbidden(reason = "Lucene classes")
public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader {
class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader {

private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES816BinaryQuantizedVectorsReader.class);

Expand All @@ -64,7 +65,7 @@ public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader {
private final FlatVectorsReader rawVectorsReader;
private final ES816BinaryFlatVectorsScorer vectorScorer;

public ES816BinaryQuantizedVectorsReader(
ES816BinaryQuantizedVectorsReader(
SegmentReadState state,
FlatVectorsReader rawVectorsReader,
ES816BinaryFlatVectorsScorer vectorsScorer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnVectorsReader;
Expand Down Expand Up @@ -48,6 +48,8 @@
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import java.io.Closeable;
import java.io.IOException;
Expand All @@ -61,14 +63,14 @@
import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance;
import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT;
import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT;
import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;

/**
* Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10
*/
@SuppressForbidden(reason = "Lucene classes")
public class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
private static final long SHALLOW_RAM_BYTES_USED = shallowSizeOfInstance(ES816BinaryQuantizedVectorsWriter.class);

private final SegmentWriteState segmentWriteState;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
Expand All @@ -29,6 +29,7 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.packed.DirectMonotonicReader;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import java.io.IOException;
import java.nio.ByteBuffer;
Expand All @@ -37,7 +38,7 @@
import static org.elasticsearch.index.codec.vectors.BQVectorUtils.constSqrt;

/** Binarized vector values loaded from off-heap */
public abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues {
abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues {

protected final int dimension;
protected final int size;
Expand Down Expand Up @@ -251,8 +252,8 @@ public static OffHeapBinarizedVectorValues load(
}

/** Dense off-heap binarized vector values */
public static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues {
public DenseOffHeapVectorValues(
static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues {
DenseOffHeapVectorValues(
int dimension,
int size,
float[] centroid,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@
import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
import org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat;
import org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat;
import org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat;
import org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat;
import org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat;
import org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.ArraySourceValueFetcher;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat
org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat
org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat
org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat
org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat
org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat
org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.VectorUtil;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import java.util.Random;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.VectorScorer;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.VectorUtil;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import java.io.IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
Expand All @@ -41,6 +41,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.index.codec.vectors.BQVectorUtils;

import java.io.IOException;
import java.util.Locale;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
*
* Modifications copyright (C) 2024 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.vectors;
package org.elasticsearch.index.codec.vectors.es816;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
Expand Down

0 comments on commit 6c2f607

Please sign in to comment.