Skip to content

Commit

Permalink
Reusing docValuesReader and refactoring DataCubesConsumer
Browse files Browse the repository at this point in the history
Signed-off-by: Bharathwaj G <[email protected]>
  • Loading branch information
bharath-techie committed May 7, 2024
1 parent b17258a commit fd201f6
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 218 deletions.
19 changes: 9 additions & 10 deletions lucene/core/src/java/org/apache/lucene/codecs/DataCubesFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import java.io.IOException;
import org.apache.lucene.index.DataCubesConfig;
import org.apache.lucene.index.DataCubesDocValuesConsumer;
import org.apache.lucene.index.DataCubesConsumer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.NamedSPILoader;
Expand All @@ -43,15 +43,14 @@ public String getName() {
public abstract DataCubesProducer<?> fieldsProducer(SegmentReadState state) throws IOException;

/**
* Returns a {@link org.apache.lucene.index.DataCubesDocValuesConsumer} to write dataCubesValues
* to the index based on docValues NOTE: by the time this call returns, it must hold open any
* files it will need to use; else, those files may be deleted. Additionally, required files may
* be deleted during the execution of this call before there is a chance to open them. Under these
* circumstances an IOException should be thrown by the implementation. IOExceptions are expected
* and will automatically cause a retry of the segment opening logic with the newly revised
* segments
* Returns a {@link DataCubesConsumer} to write dataCubesValues to the index based on docValues
* NOTE: by the time this call returns, it must hold open any files it will need to use; else,
* those files may be deleted. Additionally, required files may be deleted during the execution of
* this call before there is a chance to open them. Under these circumstances an IOException
* should be thrown by the implementation. IOExceptions are expected and will automatically cause
* a retry of the segment opening logic with the newly revised segments
*/
public abstract DataCubesDocValuesConsumer fieldsConsumer(
public abstract DataCubesConsumer fieldsConsumer(
SegmentWriteState state, DataCubesConfig dataCubesConfig) throws IOException;

/** A {@code DataCubesFormat} that has nothing indexed */
Expand All @@ -63,7 +62,7 @@ public DataCubesProducer<?> fieldsProducer(SegmentReadState state) throws IOExce
}

@Override
public DataCubesDocValuesConsumer fieldsConsumer(
public DataCubesConsumer fieldsConsumer(
SegmentWriteState state, DataCubesConfig dataCubesConfig) throws IOException {
throw new UnsupportedOperationException("Attempt to write EMPTY DataCube values");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ public final ByteVectorValues getByteVectorValues(String field) throws IOExcepti
}

@Override
public DataCubeValues<?> getDataCubeValues(String field) throws IOException {
public final DataCubeValues<?> getDataCubeValues(String field) throws IOException {
ensureOpen();
return getDataCubesProducer().getDataCubeValues(field);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public String getName() {
/**
* Writes a DataCubeField to a DataOutput
*
* <p>This is used to record index sort information in segment headers
* <p>This is used to record DataCubeField information in segment headers
*/
public abstract void writeDataCubeField(DataCubeField cf, DataOutput out) throws IOException;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;

import java.io.Closeable;
import java.io.IOException;

/**
* Abstract API to write DataCubeIndices
*
* @lucene.experimental
*/
public abstract class DataCubesConsumer implements Closeable {

/** Sole constructor */
public DataCubesConsumer() throws IOException {
super();
}

/** Create the DataCubes index based on the DocValues and flushes the indices to disk */
public abstract void flush(
SegmentWriteState state,
DataCubesConfig dataCubesConfig,
LeafReader docValuesReader,
Sorter.DocMap sortMap)
throws IOException;

/** Merges in the DataCubes fields from the readers in <code>mergeState</code>. */
public abstract void merge(MergeState mergeState) throws IOException;

@Override
public void close() throws IOException {}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,6 @@ public PointsReader getPointsReader() {
return in.getPointsReader();
}

@Override
public DataCubeValues<?> getDataCubeValues(String field) throws IOException {
return in.getDataCubeValues(field);
}

@Override
public DataCubesProducer<?> getDataCubesProducer() {
return in.getDataCubesProducer();
Expand Down
80 changes: 16 additions & 64 deletions lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
Original file line number Diff line number Diff line change
Expand Up @@ -280,22 +280,20 @@ Sorter.DocMap flush(SegmentWriteState state) throws IOException {
infoStream.message(
"IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write norms");
}
SegmentReadState readState =
new SegmentReadState(
state.directory,
state.segmentInfo,
state.fieldInfos,
IOContext.DEFAULT,
state.segmentSuffix);

// This must be called before writeDocValues because this uses docValuesWriter
DataCubesConfig dataCubesConfig = state.segmentInfo.getDataCubesConfig();
t0 = System.nanoTime();
writeDataCubes(state, sortMap, dataCubesConfig);
if (infoStream.isEnabled("IW")) {
infoStream.message(
"IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write dataCubes");
}
SegmentReadState readState =
new SegmentReadState(
state.directory,
state.segmentInfo,
state.fieldInfos,
IOContext.DEFAULT,
state.segmentSuffix);

t0 = System.nanoTime();
writeDocValues(state, sortMap);
Expand Down Expand Up @@ -422,69 +420,23 @@ private void writeDataCubes(
SegmentWriteState state, Sorter.DocMap sortMap, DataCubesConfig dataCubesConfig)
throws IOException {
if (dataCubesConfig == null) return;
DataCubesDocValuesConsumer dataCubeDocValuesConsumer = null;
DataCubesConsumer dataCubesConsumer = null;
boolean success = false;
LeafReader docValuesReader = getDocValuesLeafReader();
try {
for (int i = 0; i < fieldHash.length; i++) {
PerField perField = fieldHash[i];
while (perField != null) {
if (perField.docValuesWriter != null) {
if (perField.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
// BUG
throw new AssertionError(
"segment="
+ state.segmentInfo
+ ": field=\""
+ perField.fieldInfo.name
+ "\" has no docValues but wrote them");
}
if (dataCubeDocValuesConsumer == null) {
// lazy init
DataCubesFormat fmt = state.segmentInfo.getCodec().dataCubesFormat();
dataCubeDocValuesConsumer = fmt.fieldsConsumer(state, dataCubesConfig);
}
perField.docValuesWriter.flush(state, sortMap, dataCubeDocValuesConsumer);
} else if (perField.fieldInfo != null
&& perField.fieldInfo.getDocValuesType() != DocValuesType.NONE) {
// BUG
throw new AssertionError(
"segment="
+ state.segmentInfo
+ ": field=\""
+ perField.fieldInfo.name
+ "\" has docValues but did not write them");
}
perField = perField.next;
}
}
// IMPORTANT : This call creates the data cubes structures
if (dataCubeDocValuesConsumer != null) {
dataCubeDocValuesConsumer.flush(dataCubesConfig);
DataCubesFormat fmt = state.segmentInfo.getCodec().dataCubesFormat();
dataCubesConsumer = fmt.fieldsConsumer(state, dataCubesConfig);
// This creates the data cubes structures
if (dataCubesConsumer != null) {
dataCubesConsumer.flush(state, dataCubesConfig, docValuesReader, sortMap);
}

// TODO: catch missing DV fields here? else we have
// null/"" depending on how docs landed in segments?
// but we can't detect all cases, and we should leave
// this behavior undefined. dv is not "schemaless": it's column-stride.
success = true;
} finally {
if (success) {
IOUtils.close(dataCubeDocValuesConsumer);
IOUtils.close(dataCubesConsumer);
} else {
IOUtils.closeWhileHandlingException(dataCubeDocValuesConsumer);
}
}

if (state.fieldInfos.hasDocValues() == false) {
if (dataCubeDocValuesConsumer != null) {
// BUG
throw new AssertionError(
"segment=" + state.segmentInfo + ": fieldInfos has no docValues but wrote them");
IOUtils.closeWhileHandlingException(dataCubesConsumer);
}
} else if (dataCubeDocValuesConsumer == null) {
// BUG
throw new AssertionError(
"segment=" + state.segmentInfo + ": fieldInfos has docValues but did not wrote them");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ private void mergeVectorValues(

private void mergeDataCubes(
SegmentWriteState segmentWriteState, SegmentReadState segmentReadState) throws IOException {
try (DataCubesDocValuesConsumer consumer =
try (DataCubesConsumer consumer =
codec
.dataCubesFormat()
.fieldsConsumer(segmentWriteState, mergeState.segmentInfo.getDataCubesConfig())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public FieldsProducer getPostingsReader() {
@Override
public DataCubesProducer<?> getDataCubesProducer() {
reader.ensureOpen();
return readerToDataCubeProducer(reader);
return dataCubeValuesToReader(reader);
}

@Override
Expand Down Expand Up @@ -313,15 +313,13 @@ public void close() {}
};
}

private static DataCubesProducer<Object> readerToDataCubeProducer(final LeafReader reader) {
private static DataCubesProducer<Object> dataCubeValuesToReader(final LeafReader reader) {
return new DataCubesProducer<>() {
@Override
public void checkIntegrity() throws IOException {}

@Override
public DataCubeValues<?> getDataCubeValues(String field) throws IOException {
// TODO : change this
// return reader.getAggregatedDocValues();
return reader.getDataCubeValues(field);
}

Expand Down

0 comments on commit fd201f6

Please sign in to comment.