Change docValuesSkipIndex from a boolean to an enum. (#13784)

At the moment, our skip indexes record min/max ordinal/value per range of doc IDs. It would be natural to extend it to other pre-aggregated data such as a sum and value count, which facets could take advantage of. This change switches `docValuesSkipIndex` from a boolean to an enum so that we could release such changes in the future in an additive fashion, by adding constants to this enum and new methods to `DocValuesSkipper`.
apache · Sep 17, 2024 · b59a357 · b59a357
1 parent 644feeb
commit b59a357
Show file tree

Hide file tree

Showing 39 changed files with 214 additions and 108 deletions.
diff --git a/...-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60FieldInfosFormat.java b/...-codecs/src/java/org/apache/lucene/backward_codecs/lucene60/Lucene60FieldInfosFormat.java
@@ -24,6 +24,7 @@
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
@@ -209,7 +210,7 @@ private FieldInfo[] readFieldInfos(IndexInput input, int version) throws IOExcep
                 storePayloads,
                 indexOptions,
                 docValuesType,
-                false,
+                DocValuesSkipIndexType.NONE,
                 dvGen,
                 attributes,
                 pointDataDimensionCount,

diff --git a/...-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90FieldInfosFormat.java b/...-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90FieldInfosFormat.java
@@ -23,6 +23,7 @@
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
@@ -186,7 +187,7 @@ public FieldInfos read(
                     storePayloads,
                     indexOptions,
                     docValuesType,
-                    false,
+                    DocValuesSkipIndexType.NONE,
                     dvGen,
                     attributes,
                     pointDataDimensionCount,

diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java
@@ -22,6 +22,7 @@
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
@@ -125,8 +126,8 @@ public FieldInfos read(
 
         SimpleTextUtil.readLine(input, scratch);
         assert StringHelper.startsWith(scratch.get(), DOCVALUES_SKIP_INDEX);
-        boolean docValueSkipper =
-            Boolean.parseBoolean(readString(DOCVALUES_SKIP_INDEX.length, scratch));
+        DocValuesSkipIndexType docValueSkipper =
+            docValuesSkipIndexType(readString(DOCVALUES_SKIP_INDEX.length, scratch));
 
         SimpleTextUtil.readLine(input, scratch);
         assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
@@ -221,6 +222,10 @@ public DocValuesType docValuesType(String dvType) {
     return DocValuesType.valueOf(dvType);
   }
 
+  public DocValuesSkipIndexType docValuesSkipIndexType(String dvSkipIndexType) {
+    return DocValuesSkipIndexType.valueOf(dvSkipIndexType);
+  }
+
   public VectorEncoding vectorEncoding(String vectorEncoding) {
     return VectorEncoding.valueOf(vectorEncoding);
   }
@@ -284,7 +289,7 @@ public void write(
         SimpleTextUtil.writeNewline(out);
 
         SimpleTextUtil.write(out, DOCVALUES_SKIP_INDEX);
-        SimpleTextUtil.write(out, Boolean.toString(fi.hasDocValuesSkipIndex()), scratch);
+        SimpleTextUtil.write(out, getDocValuesSkipIndexType(fi.docValuesSkipIndexType()), scratch);
         SimpleTextUtil.writeNewline(out);
 
         SimpleTextUtil.write(out, DOCVALUES_GEN);
@@ -355,4 +360,8 @@ public void write(
   private static String getDocValuesType(DocValuesType type) {
     return type.toString();
   }
+
+  private static String getDocValuesSkipIndexType(DocValuesSkipIndexType type) {
+    return type.toString();
+  }
 }
diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestBlockWriter.java b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestBlockWriter.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.util.Collections;
 import org.apache.lucene.codecs.lucene90.tests.MockTermStateFactory;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexOptions;
@@ -111,7 +112,7 @@ private static FieldInfo getMockFieldInfo(String fieldName, int number) {
         true,
         IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
         DocValuesType.NONE,
-        false,
+        DocValuesSkipIndexType.NONE,
         -1,
         Collections.emptyMap(),
         0,

diff --git a/.../codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTBlockReader.java b/.../codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTBlockReader.java
@@ -34,6 +34,7 @@
 import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
 import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
 import org.apache.lucene.codecs.uniformsplit.TermBytes;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
@@ -198,7 +199,7 @@ private static FieldInfo mockFieldInfo(String fieldName, int number) {
         true,
         IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
         DocValuesType.NONE,
-        false,
+        DocValuesSkipIndexType.NONE,
         -1,
         Collections.emptyMap(),
         0,

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@@ -19,6 +19,7 @@
 import java.io.Closeable;
 import java.io.IOException;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesSkipper;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
@@ -77,7 +78,7 @@ protected DocValuesProducer() {}
   /**
    * Returns a {@link DocValuesSkipper} for this field. The returned instance need not be
    * thread-safe: it will only be used by a single thread. The return value is undefined if {@link
-   * FieldInfo#hasDocValuesSkipIndex()} doesn't return {@code true}.
+   * FieldInfo#docValuesSkipIndexType()} returns {@link DocValuesSkipIndexType#NONE}.
    */
   public abstract DocValuesSkipper getSkipper(FieldInfo field) throws IOException;
 

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
@@ -31,6 +31,7 @@
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.EmptyDocValuesProducer;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
@@ -143,7 +144,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti
             return DocValues.singleton(valuesProducer.getNumeric(field));
           }
         };
-    if (field.hasDocValuesSkipIndex()) {
+    if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
       writeSkipIndex(field, producer);
     }
     writeValues(field, producer, false);
@@ -248,7 +249,7 @@ public static SkipAccumulator merge(List<SkipAccumulator> list, int index, int l
 
   private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
-    assert field.hasDocValuesSkipIndex();
+    assert field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE;
     final long start = data.getFilePointer();
     final SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
     long globalMaxValue = Long.MIN_VALUE;
@@ -700,7 +701,7 @@ public long cost() {
             return DocValues.singleton(sortedOrds);
           }
         };
-    if (field.hasDocValuesSkipIndex()) {
+    if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
       writeSkipIndex(field, producer);
     }
     if (addTypeByte) {
@@ -873,7 +874,7 @@ public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProdu
 
   private void doAddSortedNumericField(
       FieldInfo field, DocValuesProducer valuesProducer, boolean ords) throws IOException {
-    if (field.hasDocValuesSkipIndex()) {
+    if (field.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
       writeSkipIndex(field, valuesProducer);
     }
     if (ords) {

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -27,6 +27,7 @@
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesSkipper;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
@@ -191,7 +192,7 @@ private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
         throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
       }
       byte type = meta.readByte();
-      if (info.hasDocValuesSkipIndex()) {
+      if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
         skippers.put(info.number, readDocValueSkipperMeta(meta));
       }
       if (type == Lucene90DocValuesFormat.NUMERIC) {

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/Lucene94FieldInfosFormat.java
@@ -24,6 +24,7 @@
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
@@ -163,8 +164,6 @@ public FieldInfos read(
           boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
           boolean isParentField =
               format >= FORMAT_PARENT_FIELD ? (bits & PARENT_FIELD_FIELD) != 0 : false;
-          boolean hasDocValuesSkipIndex =
-              format >= FORMAT_DOCVALUE_SKIPPER ? (bits & DOCVALUES_SKIPPER) != 0 : false;
 
           if ((bits & 0xC0) != 0) {
             throw new CorruptIndexException(
@@ -187,6 +186,12 @@ public FieldInfos read(
 
           // DV Types are packed in one byte
           final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
+          final DocValuesSkipIndexType docValuesSkipIndex;
+          if (format >= FORMAT_DOCVALUE_SKIPPER) {
+            docValuesSkipIndex = getDocValuesSkipIndexType(input, input.readByte());
+          } else {
+            docValuesSkipIndex = DocValuesSkipIndexType.NONE;
+          }
           final long dvGen = input.readLong();
           Map<String, String> attributes = input.readMapOfStrings();
           // just use the last field's map if its the same
@@ -217,7 +222,7 @@ public FieldInfos read(
                     storePayloads,
                     indexOptions,
                     docValuesType,
-                    hasDocValuesSkipIndex,
+                    docValuesSkipIndex,
                     dvGen,
                     attributes,
                     pointDataDimensionCount,
@@ -270,6 +275,18 @@ private static byte docValuesByte(DocValuesType type) {
     }
   }
 
+  private static byte docValuesSkipIndexByte(DocValuesSkipIndexType type) {
+    switch (type) {
+      case NONE:
+        return 0;
+      case RANGE:
+        return 1;
+      default:
+        // BUG
+        throw new AssertionError("unhandled DocValuesSkipIndexType: " + type);
+    }
+  }
+
   private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
     switch (b) {
       case 0:
@@ -289,6 +306,18 @@ private static DocValuesType getDocValuesType(IndexInput input, byte b) throws I
     }
   }
 
+  private static DocValuesSkipIndexType getDocValuesSkipIndexType(IndexInput input, byte b)
+      throws IOException {
+    switch (b) {
+      case 0:
+        return DocValuesSkipIndexType.NONE;
+      case 1:
+        return DocValuesSkipIndexType.RANGE;
+      default:
+        throw new CorruptIndexException("invalid docvaluesskipindex byte: " + b, input);
+    }
+  }
+
   private static VectorEncoding getVectorEncoding(IndexInput input, byte b) throws IOException {
     if (b < 0 || b >= VectorEncoding.values().length) {
       throw new CorruptIndexException("invalid vector encoding: " + b, input);
@@ -404,13 +433,13 @@ public void write(
         if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
         if (fi.isSoftDeletesField()) bits |= SOFT_DELETES_FIELD;
         if (fi.isParentField()) bits |= PARENT_FIELD_FIELD;
-        if (fi.hasDocValuesSkipIndex()) bits |= DOCVALUES_SKIPPER;
         output.writeByte(bits);
 
         output.writeByte(indexOptionsByte(fi.getIndexOptions()));
 
         // pack the DV type and hasNorms in one byte
         output.writeByte(docValuesByte(fi.getDocValuesType()));
+        output.writeByte(docValuesSkipIndexByte(fi.docValuesSkipIndexType()));
         output.writeLong(fi.getDocValuesGen());
         output.writeMapOfStrings(fi.attributes());
         output.writeVInt(fi.getPointDimensionCount());

diff --git a/lucene/core/src/java/org/apache/lucene/document/FieldType.java b/lucene/core/src/java/org/apache/lucene/document/FieldType.java
@@ -20,6 +20,7 @@
 import java.util.Map;
 import java.util.Objects;
 import org.apache.lucene.analysis.Analyzer; // javadocs
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexWriterConfig;
@@ -41,7 +42,7 @@ public class FieldType implements IndexableFieldType {
   private IndexOptions indexOptions = IndexOptions.NONE;
   private boolean frozen;
   private DocValuesType docValuesType = DocValuesType.NONE;
-  private boolean docValuesSkipIndex;
+  private DocValuesSkipIndexType docValuesSkipIndex = DocValuesSkipIndexType.NONE;
   private int dimensionCount;
   private int indexDimensionCount;
   private int dimensionNumBytes;
@@ -61,7 +62,7 @@ public FieldType(IndexableFieldType ref) {
     this.omitNorms = ref.omitNorms();
     this.indexOptions = ref.indexOptions();
     this.docValuesType = ref.docValuesType();
-    this.docValuesSkipIndex = ref.hasDocValuesSkipIndex();
+    this.docValuesSkipIndex = ref.docValuesSkipIndexType();
     this.dimensionCount = ref.pointDimensionCount();
     this.indexDimensionCount = ref.pointIndexDimensionCount();
     this.dimensionNumBytes = ref.pointNumBytes();
@@ -508,7 +509,7 @@ public void setDocValuesType(DocValuesType type) {
   }
 
   @Override
-  public boolean hasDocValuesSkipIndex() {
+  public DocValuesSkipIndexType docValuesSkipIndexType() {
     return docValuesSkipIndex;
   }
 
@@ -518,7 +519,7 @@ public boolean hasDocValuesSkipIndex() {
    * correlate with fields that are part of the index sort, so that values can be expected to be
    * clustered in the doc ID space.
    */
-  public void setDocValuesSkipIndex(boolean docValuesSkipIndex) {
+  public void setDocValuesSkipIndexType(DocValuesSkipIndexType docValuesSkipIndex) {
     checkIfFrozen();
     this.docValuesSkipIndex = docValuesSkipIndex;
   }
@@ -531,7 +532,7 @@ public int hashCode() {
     result = prime * result + indexDimensionCount;
     result = prime * result + dimensionNumBytes;
     result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
-    result = prime * result + Boolean.hashCode(docValuesSkipIndex);
+    result = prime * result + (docValuesSkipIndex == null ? 0 : docValuesSkipIndex.hashCode());
     result = prime * result + indexOptions.hashCode();
     result = prime * result + (omitNorms ? 1231 : 1237);
     result = prime * result + (storeTermVectorOffsets ? 1231 : 1237);

diff --git a/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java
@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.document;
 
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.Query;
@@ -42,13 +43,13 @@ public class NumericDocValuesField extends Field {
     TYPE.freeze();
 
     INDEXED_TYPE = new FieldType(TYPE);
-    INDEXED_TYPE.setDocValuesSkipIndex(true);
+    INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
     INDEXED_TYPE.freeze();
   }
 
   /**
    * Creates a new {@link NumericDocValuesField} with the specified 64-bit long value that also
-   * creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
+   * creates a {@link FieldType#docValuesSkipIndexType() skip index}.
    *
    * @param name field name
    * @param value 64-bit long value

diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.document;
 
 import java.util.Collection;
+import org.apache.lucene.index.DocValuesSkipIndexType;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.MultiTermQuery;
@@ -48,13 +49,13 @@ public class SortedDocValuesField extends Field {
     TYPE.freeze();
 
     INDEXED_TYPE = new FieldType(TYPE);
-    INDEXED_TYPE.setDocValuesSkipIndex(true);
+    INDEXED_TYPE.setDocValuesSkipIndexType(DocValuesSkipIndexType.RANGE);
     INDEXED_TYPE.freeze();
   }
 
   /**
    * Creates a new {@link SortedDocValuesField} with the specified 64-bit long value that also
-   * creates a {@link FieldType#hasDocValuesSkipIndex() skip index}.
+   * creates a {@link FieldType#docValuesSkipIndexType() skip index}.
    *
    * @param name field name
    * @param bytes binary content