Skip to content

Commit

Permalink
Complete keyword changes for star tree (#16233)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Bharathwaj G <[email protected]>
Signed-off-by: bharath-techie <[email protected]>
  • Loading branch information
bharath-techie authored Nov 12, 2024
1 parent b9d9729 commit 7f27ddc
Show file tree
Hide file tree
Showing 45 changed files with 2,120 additions and 307 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Increase segrep pressure checkpoint default limit to 30 ([#16577](https://github.com/opensearch-project/OpenSearch/pull/16577/files))
- Add dynamic setting allowing size > 0 requests to be cached in the request cache ([#16483](https://github.com/opensearch-project/OpenSearch/pull/16483))
- Make IndexStoreListener a pluggable interface ([#16583](https://github.com/opensearch-project/OpenSearch/pull/16583))
- Support for keyword fields in star-tree index ([#16233](https://github.com/opensearch-project/OpenSearch/pull/16233))
- Add a flag in QueryShardContext to differentiate inner hit query ([#16600](https://github.com/opensearch-project/OpenSearch/pull/16600))

### Dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
.build();

private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
try {
return jsonBuilder().startObject()
.startObject("composite")
Expand All @@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.field("name", getDim(invalidDim, ipdim))
.endObject()
.startObject()
.field("name", "keyword_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", getDim(invalidMetric, false))
.field("name", getMetric(invalidMetric, false))
.endObject()
.endArray()
.endObject()
Expand All @@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.startObject("ip")
.field("type", "ip")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
}

private static String getDim(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return random().nextBoolean() ? "numeric" : "keyword";
} else if (isKeyword) {
return "ip";
}
return "numeric_dv";
}

private static String getMetric(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return "numeric";
} else if (isKeyword) {
return "keyword";
return "ip";
}
return "numeric_dv";
}
Expand Down Expand Up @@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down Expand Up @@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
IllegalArgumentException.class,
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
);
assertEquals(
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
ex.getMessage()
);
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
}

public void testMaxDimsCompositeIndex() {
Expand Down Expand Up @@ -734,7 +748,7 @@ public void testUnsupportedDim() {
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
);
assertEquals(
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
ex.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.search.DocIdSetIterator;

/**
* Base wrapper class for DocValuesWriter.
*/
public interface DocValuesWriterWrapper<T extends DocIdSetIterator> {
T getDocValues();
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
*
* @opensearch.experimental
*/
public class SortedNumericDocValuesWriterWrapper {
public class SortedNumericDocValuesWriterWrapper implements DocValuesWriterWrapper<SortedNumericDocValues> {

private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;
private final SortedNumericDocValuesWriter sortedNumericDocValuesWriterDelegate;

/**
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance.
Expand All @@ -29,7 +29,7 @@ public class SortedNumericDocValuesWriterWrapper {
* @param counter a counter for tracking memory usage
*/
public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) {
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter);
sortedNumericDocValuesWriterDelegate = new SortedNumericDocValuesWriter(fieldInfo, counter);
}

/**
Expand All @@ -39,15 +39,16 @@ public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter)
* @param value the value to add
*/
public void addValue(int docID, long value) {
sortedNumericDocValuesWriter.addValue(docID, value);
sortedNumericDocValuesWriterDelegate.addValue(docID, value);
}

/**
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedNumericDocValues} instance
*/
@Override
public SortedNumericDocValues getDocValues() {
return sortedNumericDocValuesWriter.getDocValues();
return sortedNumericDocValuesWriterDelegate.getDocValues();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;

/**
* A wrapper class for writing sorted set doc values.
* <p>
* This class provides a convenient way to add sorted set doc values to a field
* and retrieve the corresponding {@link SortedSetDocValues} instance.
*
* @opensearch.experimental
*/
public class SortedSetDocValuesWriterWrapper implements DocValuesWriterWrapper<SortedSetDocValues> {

private final SortedSetDocValuesWriter sortedSetDocValuesWriterDelegate;

/**
* Sole constructor. Constructs a new {@link SortedSetDocValuesWriterWrapper} instance.
*
* @param fieldInfo the field information for the field being written
* @param counter a counter for tracking memory usage
* @param byteBlockPool a byte block pool for allocating byte blocks
* @see SortedSetDocValuesWriter
*/
public SortedSetDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter, ByteBlockPool byteBlockPool) {
sortedSetDocValuesWriterDelegate = new SortedSetDocValuesWriter(fieldInfo, counter, byteBlockPool);
}

/**
* Adds a bytes ref value to the sorted set doc values for the specified document.
*
* @param docID the document ID
* @param value the value to add
*/
public void addValue(int docID, BytesRef value) {
sortedSetDocValuesWriterDelegate.addValue(docID, value);
}

/**
* Returns the {@link SortedSetDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedSetDocValues} instance
*/
@Override
public SortedSetDocValues getDocValues() {
return sortedSetDocValuesWriterDelegate.getDocValues();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
Expand All @@ -40,6 +40,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -111,7 +112,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
readState.segmentInfo.getId(),
readState.segmentSuffix
);

Map<String, DocValuesType> dimensionFieldTypeMap = new HashMap<>();
while (true) {

// validate magic marker
Expand Down Expand Up @@ -155,13 +156,16 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

List<String> dimensionFields = starTreeMetadata.getDimensionFields();

Map<String, DocValuesType> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
// generating star tree unique fields (fully qualified name for dimension and metrics)
for (String dimensions : dimensionFields) {
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
for (Map.Entry<String, DocValuesType> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
String dimName = fullyQualifiedFieldNameForStarTreeDimensionsDocValues(
compositeFieldName,
dimensionEntry.getKey()
);
fields.add(dimName);
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue());
}

// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
for (MetricStat metricStat : metric.getBaseMetrics()) {
Expand All @@ -184,7 +188,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields, dimensionFieldTypeMap));
this.readState = new SegmentReadState(
readState.directory,
readState.segmentInfo,
Expand Down Expand Up @@ -291,17 +295,4 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp

}

/**
* Returns the sorted numeric doc values for the given sorted numeric field.
* If the sorted numeric field is null, it returns an empty doc id set iterator.
* <p>
* Sorted numeric field can be null for cases where the segment doesn't hold a particular value.
*
* @param sortedNumeric the sorted numeric doc values for a field
* @return empty sorted numeric values if the field is not present, else sortedNumeric
*/
public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocValues sortedNumeric) {
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
}

}
Loading

0 comments on commit 7f27ddc

Please sign in to comment.