Skip to content

Commit

Permalink
Star Tree File Formats (opensearch-project#14809)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 authored and dk2k committed Oct 17, 2024
1 parent d596298 commit ecf248a
Show file tree
Hide file tree
Showing 38 changed files with 3,479 additions and 823 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.index.codec.composite;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;

import java.io.IOException;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat {
private final DocValuesFormat delegate;
private final MapperService mapperService;

/** Data codec name for Composite Doc Values Format */
public static final String DATA_CODEC_NAME = "Composite99FormatData";

/** Meta codec name for Composite Doc Values Format */
public static final String META_CODEC_NAME = "Composite99FormatMeta";

/** Filename extension for the composite index data */
public static final String DATA_EXTENSION = "cid";

/** Filename extension for the composite index meta */
public static final String META_EXTENSION = "cim";

/** Data doc values codec name for Composite Doc Values Format */
public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData";

/** Meta doc values codec name for Composite Doc Values Format */
public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata";

/** Filename extension for the composite index data doc values */
public static final String DATA_DOC_VALUES_EXTENSION = "cidvd";

/** Filename extension for the composite index meta doc values */
public static final String META_DOC_VALUES_EXTENSION = "cidvm";

/** Initial version for the Composite90DocValuesFormat */
public static final int VERSION_START = 0;

/** Current version for the Composite90DocValuesFormat */
public static final int VERSION_CURRENT = VERSION_START;

// needed for SPI
public Composite99DocValuesFormat() {
this(new Lucene90DocValuesFormat(), null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,47 @@

package org.opensearch.index.codec.composite.composite99;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.CompositeIndexValues;
import org.opensearch.index.codec.composite.LuceneDocValuesProducerFactory;
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
import org.opensearch.index.compositeindex.datacube.Metric;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.mapper.CompositeMappedFieldType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER;
import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.getFieldInfoList;

/**
* Reader for star tree index and star tree doc values from the segments
Expand All @@ -32,11 +57,158 @@
*/
@ExperimentalApi
public class Composite99DocValuesReader extends DocValuesProducer implements CompositeIndexReader {
private DocValuesProducer delegate;
private static final Logger logger = LogManager.getLogger(Composite99DocValuesReader.class);

private final DocValuesProducer delegate;
private IndexInput dataIn;
private ChecksumIndexInput metaIn;
private final Map<String, IndexInput> compositeIndexInputMap = new LinkedHashMap<>();
private final Map<String, CompositeIndexMetadata> compositeIndexMetadataMap = new LinkedHashMap<>();
private final List<String> fields;
private DocValuesProducer compositeDocValuesProducer;
private final List<CompositeIndexFieldInfo> compositeFieldInfos = new ArrayList<>();
private SegmentReadState readState;

public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState state) throws IOException {
public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState readState) throws IOException {
this.delegate = producer;
// TODO : read star tree files
this.fields = new ArrayList<>();

String metaFileName = IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Composite99DocValuesFormat.META_EXTENSION
);

String dataFileName = IndexFileNames.segmentFileName(
readState.segmentInfo.name,
readState.segmentSuffix,
Composite99DocValuesFormat.DATA_EXTENSION
);

boolean success = false;
try {

// initialize meta input
dataIn = readState.directory.openInput(dataFileName, readState.context);
CodecUtil.checkIndexHeader(
dataIn,
Composite99DocValuesFormat.DATA_CODEC_NAME,
Composite99DocValuesFormat.VERSION_START,
Composite99DocValuesFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix
);

// initialize data input
metaIn = readState.directory.openChecksumInput(metaFileName, readState.context);
Throwable priorE = null;
try {
CodecUtil.checkIndexHeader(
metaIn,
Composite99DocValuesFormat.META_CODEC_NAME,
Composite99DocValuesFormat.VERSION_START,
Composite99DocValuesFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix
);

while (true) {

// validate magic marker
long magicMarker = metaIn.readLong();
if (magicMarker == -1) {
break;
} else if (magicMarker < 0) {
throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn);
} else if (COMPOSITE_FIELD_MARKER != magicMarker) {
logger.error("Invalid composite field magic marker");
throw new IOException("Invalid composite field magic marker");
}

int version = metaIn.readVInt();
if (VERSION_CURRENT != version) {
logger.error("Invalid composite field version");
throw new IOException("Invalid composite field version");
}

// construct composite index metadata
String compositeFieldName = metaIn.readString();
CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName(
metaIn.readString()
);

switch (compositeFieldType) {
case STAR_TREE:
StarTreeMetadata starTreeMetadata = new StarTreeMetadata(
metaIn,
compositeFieldName,
compositeFieldType,
version
);
compositeFieldInfos.add(new CompositeIndexFieldInfo(compositeFieldName, compositeFieldType));

IndexInput starTreeIndexInput = dataIn.slice(
"star-tree data slice for respective star-tree fields",
starTreeMetadata.getDataStartFilePointer(),
starTreeMetadata.getDataLength()
);
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

List<String> dimensionFields = starTreeMetadata.getDimensionFields();

// generating star tree unique fields (fully qualified name for dimension and metrics)
for (String dimensions : dimensionFields) {
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
}

// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
for (MetricStat metricStat : metric.getMetrics()) {
fields.add(
fullyQualifiedFieldNameForStarTreeMetricsDocValues(
compositeFieldName,
metric.getField(),
metricStat.getTypeName()
)
);

}
}

break;
default:
throw new CorruptIndexException("Invalid composite field type found in the file", dataIn);
}
}

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context);

// initialize star-tree doc values producer

compositeDocValuesProducer = LuceneDocValuesProducerFactory.getDocValuesProducerForCompositeCodec(
Composite99Codec.COMPOSITE_INDEX_CODEC_NAME,
this.readState,
Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC,
Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION,
Composite99DocValuesFormat.META_DOC_VALUES_CODEC,
Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION
);

} catch (Throwable t) {
priorE = t;
} finally {
CodecUtil.checkFooter(metaIn, priorE);
}
success = true;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(this);
}
}
}

@Override
Expand Down Expand Up @@ -67,24 +239,63 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
@Override
public void checkIntegrity() throws IOException {
delegate.checkIntegrity();
// Todo : check integrity of composite index related [star tree] files
CodecUtil.checksumEntireFile(dataIn);
}

@Override
public void close() throws IOException {
delegate.close();
// Todo: close composite index related files [star tree] files
boolean success = false;
try {
IOUtils.close(metaIn, dataIn);
IOUtils.close(compositeDocValuesProducer);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(metaIn, dataIn);
}
compositeIndexInputMap.clear();
compositeIndexMetadataMap.clear();
fields.clear();
metaIn = null;
dataIn = null;
}
}

@Override
public List<CompositeIndexFieldInfo> getCompositeIndexFields() {
// todo : read from file formats and get the field names.
return new ArrayList<>();
return compositeFieldInfos;
}

@Override
public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo compositeIndexFieldInfo) throws IOException {
// TODO : read compositeIndexValues [starTreeValues] from star tree files
throw new UnsupportedOperationException();

switch (compositeIndexFieldInfo.getType()) {
case STAR_TREE:
return new StarTreeValues(
compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()),
compositeIndexInputMap.get(compositeIndexFieldInfo.getField()),
compositeDocValuesProducer,
this.readState
);

default:
throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName());
}

}

/**
* Returns the sorted numeric doc values for the given sorted numeric field.
* If the sorted numeric field is null, it returns an empty doc id set iterator.
* <p>
* Sorted numeric field can be null for cases where the segment doesn't hold a particular value.
*
* @param sortedNumeric the sorted numeric doc values for a field
* @return empty sorted numeric values if the field is not present, else sortedNumeric
*/
public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocValues sortedNumeric) {
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
}

}
Loading

0 comments on commit ecf248a

Please sign in to comment.