Skip to content

Commit

Permalink
Change docValuesSkipIndex from a boolean to an enum. (#13784)
Browse files Browse the repository at this point in the history
At the moment, our skip indexes record min/max ordinal/value per range
of doc IDs. It would be natural to extend it to other pre-aggregated
data such as a sum and value count, which facets could take advantage
of. This change switches `docValuesSkipIndex` from a boolean to an enum
so that we could release such changes in the future in an additive
fashion, by adding constants to this enum and new methods to
`DocValuesSkipper`.
  • Loading branch information
jpountz authored Sep 17, 2024
1 parent 644feeb commit b59a357
Show file tree
Hide file tree
Showing 39 changed files with 214 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
Expand Down Expand Up @@ -209,7 +210,7 @@ private FieldInfo[] readFieldInfos(IndexInput input, int version) throws IOExcep
storePayloads,
indexOptions,
docValuesType,
false,
DocValuesSkipIndexType.NONE,
dvGen,
attributes,
pointDataDimensionCount,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
Expand Down Expand Up @@ -186,7 +187,7 @@ public FieldInfos read(
storePayloads,
indexOptions,
docValuesType,
false,
DocValuesSkipIndexType.NONE,
dvGen,
attributes,
pointDataDimensionCount,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
Expand Down Expand Up @@ -125,8 +126,8 @@ public FieldInfos read(

SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
boolean docValueSkipper =
Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
DocValuesSkipIndexType docValueSkipper =
docValuesSkipIndexType(readString(DOCVALUES_SKIP_INDEX.length, scratch));

SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
Expand Down Expand Up @@ -221,6 +222,10 @@ public DocValuesType docValuesType(String dvType) {
return DocValuesType.valueOf(dvType);
}

public DocValuesSkipIndexType docValuesSkipIndexType(String dvSkipIndexType) {
return DocValuesSkipIndexType.valueOf(dvSkipIndexType);
}

public VectorEncoding vectorEncoding(String vectorEncoding) {
return VectorEncoding.valueOf(vectorEncoding);
}
Expand Down Expand Up @@ -284,7 +289,7 @@ public void write(
SimpleTextUtil.writeNewline(out);

SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
SimpleTextUtil.write(out, getDocValuesSkipIndexType(fi.docValuesSkipIndexType()), scratch);
SimpleTextUtil.writeNewline(out);

SimpleTextUtil.write(out, DOCVALUES_GEN);
Expand Down Expand Up @@ -355,4 +360,8 @@ public void write(
private static String getDocValuesType(DocValuesType type) {
return type.toString();
}

private static String getDocValuesSkipIndexType(DocValuesSkipIndexType type) {
return type.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.codecs.lucene90.tests.MockTermStateFactory;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
Expand Down Expand Up @@ -111,7 +112,7 @@ private static FieldInfo getMockFieldInfo(String fieldName, int number) {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
Collections.emptyMap(),
0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
import org.apache.lucene.codecs.uniformsplit.TermBytes;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
Expand Down Expand Up @@ -198,7 +199,7 @@ private static FieldInfo mockFieldInfo(String fieldName, int number) {
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
false,
DocValuesSkipIndexType.NONE,
-1,
Collections.emptyMap(),
0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
Expand Down Expand Up @@ -77,7 +78,7 @@ protected DocValuesProducer() {}
/**
* Returns a {@link DocValuesSkipper} for this field. The returned instance need not be
* thread-safe: it will only be used by a single thread. The return value is undefined if {@link
* FieldInfo#hasDocValuesSkipIndex()} doesn't return {@code true}.
* FieldInfo#docValuesSkipIndexType()} returns {@link DocValuesSkipIndexType#NONE}.
*/
public abstract DocValuesSkipper getSkipper(FieldInfo field) throws IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
Expand Down Expand Up @@ -143,7 +144,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
return DocValues.singleton(valuesProducer.getNumeric(field));
}
};
if (field.hasDocValuesSkipIndex()) {
if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
writeSkipIndex(field, producer);
}
writeValues(field, producer, false);
Expand Down Expand Up @@ -248,7 +249,7 @@ public static SkipAccumulator merge(List<SkipAccumulator> list, int index, int l

private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
assert field.hasDocValuesSkipIndex();
assert field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE;
final long start = data.getFilePointer();
final SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
long globalMaxValue = Long.MIN_VALUE;
Expand Down Expand Up @@ -700,7 +701,7 @@ public long cost() {
return DocValues.singleton(sortedOrds);
}
};
if (field.hasDocValuesSkipIndex()) {
if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
writeSkipIndex(field, producer);
}
if (addTypeByte) {
Expand Down Expand Up @@ -873,7 +874,7 @@ public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProdu

private void doAddSortedNumericField(
FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException {
if (field.hasDocValuesSkipIndex()) {
if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
writeSkipIndex(field, valuesProducer);
}
if (ords) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
Expand Down Expand Up @@ -191,7 +192,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
byte type = meta.readByte();
if (info.hasDocValuesSkipIndex()) {
if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
skippers.put(info.number, readDocValueSkipperMeta(meta));
}
if (type == Lucene90DocValuesFormat.NUMERIC) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
Expand Down Expand Up @@ -163,8 +164,6 @@ public FieldInfos read(
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
boolean isParentField =
format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
boolean hasDocValuesSkipIndex =
format >= FORMAT_DOCVALUE_SKIPPER ? (bits & DOCVALUES_SKIPPER) != 0 : false;

if ((bits & 0xC0) != 0) {
throw new CorruptIndexException(
Expand All @@ -187,6 +186,12 @@ public FieldInfos read(

// DV Types are packed in one byte
final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
final DocValuesSkipIndexType docValuesSkipIndex;
if (format >= FORMAT_DOCVALUE_SKIPPER) {
docValuesSkipIndex = getDocValuesSkipIndexType(input, input.readByte());
} else {
docValuesSkipIndex = DocValuesSkipIndexType.NONE;
}
final long dvGen = input.readLong();
Map<String, String> attributes = input.readMapOfStrings();
// just use the last field's map if its the same
Expand Down Expand Up @@ -217,7 +222,7 @@ public FieldInfos read(
storePayloads,
indexOptions,
docValuesType,
hasDocValuesSkipIndex,
docValuesSkipIndex,
dvGen,
attributes,
pointDataDimensionCount,
Expand Down Expand Up @@ -270,6 +275,18 @@ private static byte docValuesByte(DocValuesType type) {
}
}

private static byte docValuesSkipIndexByte(DocValuesSkipIndexType type) {
switch (type) {
case NONE:
return 0;
case RANGE:
return 1;
default:
// BUG
throw new AssertionError("unhandled DocValuesSkipIndexType: " + type);
}
}

private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
switch (b) {
case 0:
Expand All @@ -289,6 +306,18 @@ private static DocValuesType getDocValuesType(IndexInput input, byte b) throws I
}
}

private static DocValuesSkipIndexType getDocValuesSkipIndexType(IndexInput input, byte b)
throws IOException {
switch (b) {
case 0:
return DocValuesSkipIndexType.NONE;
case 1:
return DocValuesSkipIndexType.RANGE;
default:
throw new CorruptIndexException("invalid docvaluesskipindex byte: " + b, input);
}
}

private static VectorEncoding getVectorEncoding(IndexInput input, byte b) throws IOException {
if (b < 0 || b >= VectorEncoding.values().length) {
throw new CorruptIndexException("invalid vector encoding: " + b, input);
Expand Down Expand Up @@ -404,13 +433,13 @@ public void write(
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
if (fi.hasDocValuesSkipIndex()) bits |= DOCVALUES_SKIPPER;
output.writeByte(bits);

output.writeByte(indexOptionsByte(fi.getIndexOptions()));

// pack the DV type and hasNorms in one byte
output.writeByte(docValuesByte(fi.getDocValuesType()));
output.writeByte(docValuesSkipIndexByte(fi.docValuesSkipIndexType()));
output.writeLong(fi.getDocValuesGen());
output.writeMapOfStrings(fi.attributes());
output.writeVInt(fi.getPointDimensionCount());
Expand Down
11 changes: 6 additions & 5 deletions lucene/core/src/java/org/apache/lucene/document/FieldType.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer; // javadocs
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriterConfig;
Expand All @@ -41,7 +42,7 @@ public class FieldType implements IndexableFieldType {
private IndexOptions indexOptions = IndexOptions.NONE;
private boolean frozen;
private DocValuesType docValuesType = DocValuesType.NONE;
private boolean docValuesSkipIndex;
private DocValuesSkipIndexType docValuesSkipIndex = DocValuesSkipIndexType.NONE;
private int dimensionCount;
private int indexDimensionCount;
private int dimensionNumBytes;
Expand All @@ -61,7 +62,7 @@ public FieldType(IndexableFieldType ref) {
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.docValuesType = ref.docValuesType();
this.docValuesSkipIndex = ref.hasDocValuesSkipIndex();
this.docValuesSkipIndex = ref.docValuesSkipIndexType();
this.dimensionCount = ref.pointDimensionCount();
this.indexDimensionCount = ref.pointIndexDimensionCount();
this.dimensionNumBytes = ref.pointNumBytes();
Expand Down Expand Up @@ -508,7 +509,7 @@ public void setDocValuesType(DocValuesType type) {
}

@Override
public boolean hasDocValuesSkipIndex() {
public DocValuesSkipIndexType docValuesSkipIndexType() {
return docValuesSkipIndex;
}

Expand All @@ -518,7 +519,7 @@ public boolean hasDocValuesSkipIndex() {
* correlate with fields that are part of the index sort, so that values can be expected to be
* clustered in the doc ID space.
*/
public void setDocValuesSkipIndex(boolean docValuesSkipIndex) {
public void setDocValuesSkipIndexType(DocValuesSkipIndexType docValuesSkipIndex) {
checkIfFrozen();
this.docValuesSkipIndex = docValuesSkipIndex;
}
Expand All @@ -531,7 +532,7 @@ public int hashCode() {
result = prime * result + indexDimensionCount;
result = prime * result + dimensionNumBytes;
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
result = prime * result + Boolean.hashCode(docValuesSkipIndex);
result = prime * result + (docValuesSkipIndex == null ? 0 : docValuesSkipIndex.hashCode());
result = prime * result + indexOptions.hashCode();
result = prime * result + (omitNorms ? 1231 : 1237);
result = prime * result + (storeTermVectorOffsets ? 1231 : 1237);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.document;

import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
Expand All @@ -42,13 +43,13 @@ public class NumericDocValuesField extends Field {
TYPE.freeze();

INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
INDEXED_TYPE.freeze();
}

/**
* Creates a new {@link NumericDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
* creates a {@link FieldType#docValuesSkipIndexType() skip index}.
*
* @param name field name
* @param value 64-bit long value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.document;

import java.util.Collection;
import org.apache.lucene.index.DocValuesSkipIndexType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MultiTermQuery;
Expand Down Expand Up @@ -48,13 +49,13 @@ public class SortedDocValuesField extends Field {
TYPE.freeze();

INDEXED_TYPE = new FieldType(TYPE);
INDEXED_TYPE.setDocValuesSkipIndex(true);
INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
INDEXED_TYPE.freeze();
}

/**
* Creates a new {@link SortedDocValuesField} with the specified 64-bit long value that also
* creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
* creates a {@link FieldType#docValuesSkipIndexType() skip index}.
*
* @param name field name
* @param bytes binary content
Expand Down
Loading

0 comments on commit b59a357

Please sign in to comment.