Reusing docValuesReader and refactoring DataCubesConsumer

Signed-off-by: Bharathwaj G <[email protected]>
apache · May 7, 2024 · ee62003 · ee62003
1 parent b17258a
commit ee62003
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 209 deletions.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DataCubesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/DataCubesFormat.java
@@ -18,7 +18,7 @@
 
 import java.io.IOException;
 import org.apache.lucene.index.DataCubesConfig;
-import org.apache.lucene.index.DataCubesDocValuesConsumer;
+import org.apache.lucene.index.DataCubesConsumer;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.util.NamedSPILoader;
@@ -43,15 +43,14 @@ public String getName() {
   public abstract DataCubesProducer<?> fieldsProducer(SegmentReadState state) throws IOException;
 
   /**
-   * Returns a {@link org.apache.lucene.index.DataCubesDocValuesConsumer} to write dataCubesValues
-   * to the index based on docValues NOTE: by the time this call returns, it must hold open any
-   * files it will need to use; else, those files may be deleted. Additionally, required files may
-   * be deleted during the execution of this call before there is a chance to open them. Under these
-   * circumstances an IOException should be thrown by the implementation. IOExceptions are expected
-   * and will automatically cause a retry of the segment opening logic with the newly revised
-   * segments
+   * Returns a {@link DataCubesConsumer} to write dataCubesValues to the index based on docValues
+   * NOTE: by the time this call returns, it must hold open any files it will need to use; else,
+   * those files may be deleted. Additionally, required files may be deleted during the execution of
+   * this call before there is a chance to open them. Under these circumstances an IOException
+   * should be thrown by the implementation. IOExceptions are expected and will automatically cause
+   * a retry of the segment opening logic with the newly revised segments
    */
-  public abstract DataCubesDocValuesConsumer fieldsConsumer(
+  public abstract DataCubesConsumer fieldsConsumer(
       SegmentWriteState state, DataCubesConfig dataCubesConfig) throws IOException;
 
   /** A {@code DataCubesFormat} that has nothing indexed */
@@ -63,7 +62,7 @@ public DataCubesProducer<?> fieldsProducer(SegmentReadState state) throws IOExce
         }
 
         @Override
-        public DataCubesDocValuesConsumer fieldsConsumer(
+        public DataCubesConsumer fieldsConsumer(
             SegmentWriteState state, DataCubesConfig dataCubesConfig) throws IOException {
           throw new UnsupportedOperationException("Attempt to write EMPTY DataCube values");
         }

diff --git a/lucene/core/src/java/org/apache/lucene/index/DataCubesConsumer.java b/lucene/core/src/java/org/apache/lucene/index/DataCubesConsumer.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Abstract API to write DataCubeIndices
+ *
+ * @lucene.experimental
+ */
+public abstract class DataCubesConsumer implements Closeable {
+
+  /** Sole constructor */
+  public DataCubesConsumer() throws IOException {
+    super();
+  }
+
+  /** Create the DataCubes index based on the DocValues and flushes the indices to disk */
+  public abstract void flush(
+      SegmentWriteState state,
+      DataCubesConfig dataCubesConfig,
+      LeafReader docValuesReader,
+      Sorter.DocMap sortMap)
+      throws IOException;
+
+  /** Merges in the DataCubes fields from the readers in <code>mergeState</code>. */
+  public abstract void merge(MergeState mergeState) throws IOException;
+
+  @Override
+  public void close() throws IOException {}
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DataCubesDocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/index/DataCubesDocValuesConsumer.java
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
@@ -280,22 +280,20 @@ Sorter.DocMap flush(SegmentWriteState state) throws IOException {
       infoStream.message(
           "IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write norms");
     }
-    SegmentReadState readState =
-        new SegmentReadState(
-            state.directory,
-            state.segmentInfo,
-            state.fieldInfos,
-            IOContext.DEFAULT,
-            state.segmentSuffix);
-
-    // This must be called before writeDocValues because this uses docValuesWriter
     DataCubesConfig dataCubesConfig = state.segmentInfo.getDataCubesConfig();
     t0 = System.nanoTime();
     writeDataCubes(state, sortMap, dataCubesConfig);
     if (infoStream.isEnabled("IW")) {
       infoStream.message(
           "IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write dataCubes");
     }
+    SegmentReadState readState =
+        new SegmentReadState(
+            state.directory,
+            state.segmentInfo,
+            state.fieldInfos,
+            IOContext.DEFAULT,
+            state.segmentSuffix);
 
     t0 = System.nanoTime();
     writeDocValues(state, sortMap);
@@ -422,69 +420,23 @@ private void writeDataCubes(
       SegmentWriteState state, Sorter.DocMap sortMap, DataCubesConfig dataCubesConfig)
       throws IOException {
     if (dataCubesConfig == null) return;
-    DataCubesDocValuesConsumer dataCubeDocValuesConsumer = null;
+    DataCubesConsumer dataCubesConsumer = null;
     boolean success = false;
+    LeafReader docValuesReader = getDocValuesLeafReader();
     try {
-      for (int i = 0; i < fieldHash.length; i++) {
-        PerField perField = fieldHash[i];
-        while (perField != null) {
-          if (perField.docValuesWriter != null) {
-            if (perField.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
-              // BUG
-              throw new AssertionError(
-                  "segment="
-                      + state.segmentInfo
-                      + ": field=\""
-                      + perField.fieldInfo.name
-                      + "\" has no docValues but wrote them");
-            }
-            if (dataCubeDocValuesConsumer == null) {
-              // lazy init
-              DataCubesFormat fmt = state.segmentInfo.getCodec().dataCubesFormat();
-              dataCubeDocValuesConsumer = fmt.fieldsConsumer(state, dataCubesConfig);
-            }
-            perField.docValuesWriter.flush(state, sortMap, dataCubeDocValuesConsumer);
-          } else if (perField.fieldInfo != null
-              && perField.fieldInfo.getDocValuesType() != DocValuesType.NONE) {
-            // BUG
-            throw new AssertionError(
-                "segment="
-                    + state.segmentInfo
-                    + ": field=\""
-                    + perField.fieldInfo.name
-                    + "\" has docValues but did not write them");
-          }
-          perField = perField.next;
-        }
-      }
-      // IMPORTANT : This call creates the data cubes structures
-      if (dataCubeDocValuesConsumer != null) {
-        dataCubeDocValuesConsumer.flush(dataCubesConfig);
+      DataCubesFormat fmt = state.segmentInfo.getCodec().dataCubesFormat();
+      dataCubesConsumer = fmt.fieldsConsumer(state, dataCubesConfig);
+      // This creates the data cubes structures
+      if (dataCubesConsumer != null) {
+        dataCubesConsumer.flush(state, dataCubesConfig, docValuesReader, sortMap);
       }
-
-      // TODO: catch missing DV fields here?  else we have
-      // null/"" depending on how docs landed in segments?
-      // but we can't detect all cases, and we should leave
-      // this behavior undefined. dv is not "schemaless": it's column-stride.
       success = true;
     } finally {
       if (success) {
-        IOUtils.close(dataCubeDocValuesConsumer);
+        IOUtils.close(dataCubesConsumer);
       } else {
-        IOUtils.closeWhileHandlingException(dataCubeDocValuesConsumer);
-      }
-    }
-
-    if (state.fieldInfos.hasDocValues() == false) {
-      if (dataCubeDocValuesConsumer != null) {
-        // BUG
-        throw new AssertionError(
-            "segment=" + state.segmentInfo + ": fieldInfos has no docValues but wrote them");
+        IOUtils.closeWhileHandlingException(dataCubesConsumer);
       }
-    } else if (dataCubeDocValuesConsumer == null) {
-      // BUG
-      throw new AssertionError(
-          "segment=" + state.segmentInfo + ": fieldInfos has docValues but did not wrote them");
     }
   }
 

diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -294,7 +294,7 @@ private void mergeVectorValues(
 
   private void mergeDataCubes(
       SegmentWriteState segmentWriteState, SegmentReadState segmentReadState) throws IOException {
-    try (DataCubesDocValuesConsumer consumer =
+    try (DataCubesConsumer consumer =
         codec
             .dataCubesFormat()
             .fieldsConsumer(segmentWriteState, mergeState.segmentInfo.getDataCubesConfig())) {

diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
@@ -320,8 +320,6 @@ public void checkIntegrity() throws IOException {}
 
       @Override
       public DataCubeValues<?> getDataCubeValues(String field) throws IOException {
-        // TODO : change this
-        // return reader.getAggregatedDocValues();
         return reader.getDataCubeValues(field);
       }