Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] [Derived Fields] Add support for emitting multiple values in DerivedFieldScripts #13083

Merged
merged 2 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.opensearch.repositories.RepositoriesService;
import org.opensearch.rest.RestController;
import org.opensearch.rest.RestHandler;
import org.opensearch.script.DerivedFieldScript;
import org.opensearch.script.IngestScript;
import org.opensearch.script.ScoreScript;
import org.opensearch.script.ScriptContext;
Expand Down Expand Up @@ -108,6 +109,11 @@ public final class PainlessPlugin extends Plugin implements ScriptPlugin, Extens
ingest.add(WhitelistLoader.loadFromResourceFiles(Whitelist.class, "org.opensearch.ingest.txt"));
map.put(IngestScript.CONTEXT, ingest);

// Functions available to derived fields
List<Whitelist> derived = new ArrayList<>(Whitelist.BASE_WHITELISTS);
derived.add(WhitelistLoader.loadFromResourceFiles(Whitelist.class, "org.opensearch.derived.txt"));
map.put(DerivedFieldScript.CONTEXT, derived);

allowlists = map;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#
# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#

# This file contains an allowlist for functions to be used in derived field context

class org.opensearch.script.DerivedFieldScript @no_import {
}

static_import {
void emit(org.opensearch.script.DerivedFieldScript, Object) bound_to org.opensearch.script.ScriptEmitValues$EmitSingle
void emit(org.opensearch.script.DerivedFieldScript, double, double) bound_to org.opensearch.script.ScriptEmitValues$GeoPoint
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.painless;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.memory.MemoryIndex;
import org.opensearch.common.collect.Tuple;
import org.opensearch.common.geo.GeoPoint;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.fielddata.IndexGeoPointFieldData;
import org.opensearch.index.fielddata.IndexNumericFieldData;
import org.opensearch.index.fielddata.LeafGeoPointFieldData;
import org.opensearch.index.fielddata.LeafNumericFieldData;
import org.opensearch.index.fielddata.MultiGeoPointValues;
import org.opensearch.index.fielddata.SortedNumericDoubleValues;
import org.opensearch.index.fielddata.plain.AbstractLeafGeoPointFieldData;
import org.opensearch.index.fielddata.plain.LeafDoubleFieldData;
import org.opensearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType;
import org.opensearch.index.mapper.NumberFieldMapper.NumberType;
import org.opensearch.painless.spi.Whitelist;
import org.opensearch.painless.spi.WhitelistLoader;
import org.opensearch.script.DerivedFieldScript;
import org.opensearch.script.ScriptContext;
import org.opensearch.script.ScriptException;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class DerivedFieldScriptTests extends ScriptTestCase {

private static PainlessScriptEngine SCRIPT_ENGINE;

@Override
public void setUp() throws Exception {
super.setUp();

// Adding derived field script to the contexts for the script engine
Map<ScriptContext<?>, List<Whitelist>> contexts = newDefaultContexts();
List<Whitelist> allowlists = new ArrayList<>(Whitelist.BASE_WHITELISTS);
allowlists.add(WhitelistLoader.loadFromResourceFiles(Whitelist.class, "org.opensearch.derived.txt"));
contexts.put(DerivedFieldScript.CONTEXT, allowlists);

SCRIPT_ENGINE = new PainlessScriptEngine(Settings.EMPTY, contexts);
}

@Override
public void tearDown() throws Exception {
super.tearDown();
SCRIPT_ENGINE = null;
}

@Override
protected PainlessScriptEngine getEngine() {
return SCRIPT_ENGINE;
}

private DerivedFieldScript.LeafFactory compile(String expression, SearchLookup lookup) {
DerivedFieldScript.Factory factory = SCRIPT_ENGINE.compile(
"derived_script_test",
expression,
DerivedFieldScript.CONTEXT,
Collections.emptyMap()
);
return factory.newFactory(Collections.emptyMap(), lookup);
}

public void testEmittingDoubleField() throws IOException {
// Mocking field value to be returned
NumberFieldType fieldType = new NumberFieldType("test_double_field", NumberType.DOUBLE);
MapperService mapperService = mock(MapperService.class);
when(mapperService.fieldType("test_double_field")).thenReturn(fieldType);

SortedNumericDoubleValues doubleValues = mock(SortedNumericDoubleValues.class);
when(doubleValues.docValueCount()).thenReturn(1);
when(doubleValues.advanceExact(anyInt())).thenReturn(true);
when(doubleValues.nextValue()).thenReturn(2.718);

LeafNumericFieldData atomicFieldData = mock(LeafDoubleFieldData.class); // SortedNumericDoubleFieldData
when(atomicFieldData.getDoubleValues()).thenReturn(doubleValues);

IndexNumericFieldData fieldData = mock(IndexNumericFieldData.class); // SortedNumericIndexFieldData
when(fieldData.getFieldName()).thenReturn("test_double_field");
when(fieldData.load(any())).thenReturn(atomicFieldData);

SearchLookup lookup = new SearchLookup(mapperService, (ignored, searchLookup) -> fieldData);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

// Execute the script
DerivedFieldScript script = compile("emit(doc['test_double_field'].value)", lookup).newInstance(leafReaderContext);
script.setDocument(1);
script.execute();

List<Object> result = script.getEmittedValues();
assertEquals(List.of(2.718), result);
}

public void testEmittingGeoPoint() throws IOException {
// Mocking field value to be returned
GeoPointFieldType fieldType = new GeoPointFieldType("test_geo_field");
MapperService mapperService = mock(MapperService.class);
when(mapperService.fieldType("test_geo_field")).thenReturn(fieldType);

MultiGeoPointValues geoPointValues = mock(MultiGeoPointValues.class);
when(geoPointValues.docValueCount()).thenReturn(1);
when(geoPointValues.advanceExact(anyInt())).thenReturn(true);
when(geoPointValues.nextValue()).thenReturn(new GeoPoint(5, 8));

LeafGeoPointFieldData atomicFieldData = mock(AbstractLeafGeoPointFieldData.class); // LatLonPointDVLeafFieldData
when(atomicFieldData.getGeoPointValues()).thenReturn(geoPointValues);

IndexGeoPointFieldData fieldData = mock(IndexGeoPointFieldData.class);
when(fieldData.getFieldName()).thenReturn("test_geo_field");
when(fieldData.load(any())).thenReturn(atomicFieldData);

SearchLookup lookup = new SearchLookup(mapperService, (ignored, searchLookup) -> fieldData);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

// Execute the script
DerivedFieldScript script = compile("emit(doc['test_geo_field'].value.getLat(), doc['test_geo_field'].value.getLon())", lookup)
.newInstance(leafReaderContext);
script.setDocument(1);
script.execute();

List<Object> result = script.getEmittedValues();
assertEquals(List.of(new Tuple<>(5.0, 8.0)), result);
}

public void testEmittingMultipleValues() throws IOException {
SearchLookup lookup = mock(SearchLookup.class);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

LeafSearchLookup leafSearchLookup = mock(LeafSearchLookup.class);
when(lookup.getLeafSearchLookup(leafReaderContext)).thenReturn(leafSearchLookup);

// Execute the script
DerivedFieldScript script = compile(
"def l = new ArrayList(); l.add('test'); l.add('multiple'); l.add('values'); for (String x : l) emit(x)",
lookup
).newInstance(leafReaderContext);
script.setDocument(1);
script.execute();

List<Object> result = script.getEmittedValues();
assertEquals(List.of("test", "multiple", "values"), result);
}

public void testExceedingByteSizeLimit() throws IOException {
SearchLookup lookup = mock(SearchLookup.class);

// We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked
MemoryIndex index = new MemoryIndex();
LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);

LeafSearchLookup leafSearchLookup = mock(LeafSearchLookup.class);
when(lookup.getLeafSearchLookup(leafReaderContext)).thenReturn(leafSearchLookup);

// Emitting a large string to exceed the byte size limit
DerivedFieldScript stringScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit('a' + i);", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, () -> {
stringScript.setDocument(1);
stringScript.execute();
});

// Emitting an integer to check byte size limit
DerivedFieldScript intScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(42)", lookup).newInstance(leafReaderContext);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
intScript.setDocument(1);
intScript.execute();
});

// Emitting a long to check byte size limit
DerivedFieldScript longScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1234567890123456789L)", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
longScript.setDocument(1);
longScript.execute();
});

// Emitting a double to check byte size limit
DerivedFieldScript doubleScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(3.14159)", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
doubleScript.setDocument(1);
doubleScript.execute();
});

// Emitting a GeoPoint to check byte size limit
DerivedFieldScript geoPointScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1.23, 4.56);", lookup).newInstance(
leafReaderContext
);
expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> {
geoPointScript.setDocument(1);
geoPointScript.execute();
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScrip
@Override
public List<Object> fetchValues(SourceLookup lookup) {
derivedFieldScript.setDocument(lookup.docId());
// TODO: remove List.of() when derivedFieldScript.execute() returns list of objects.
return List.of(derivedFieldScript.execute());
derivedFieldScript.execute();
return derivedFieldScript.getEmittedValues();
}

public void setNextReader(LeafReaderContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
package org.opensearch.script;

import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.collect.Tuple;
import org.opensearch.index.fielddata.ScriptDocValues;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

Expand All @@ -30,7 +33,7 @@

public static final String[] PARAMETERS = {};
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("derived_field", Factory.class);
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class);
private static final int MAX_BYTE_SIZE = 1024 * 1024; // Maximum allowed byte size (1 MB)

private static final Map<String, Function<Object, Object>> PARAMS_FUNCTIONS = Map.of(
"doc",
Expand All @@ -49,16 +52,27 @@
*/
private final LeafSearchLookup leafLookup;

/**
* The field values emitted from the script.
*/
private List<Object> emittedValues;

private int totalByteSize;

public DerivedFieldScript(Map<String, Object> params, SearchLookup lookup, LeafReaderContext leafContext) {
Map<String, Object> parameters = new HashMap<>(params);
this.leafLookup = lookup.getLeafSearchLookup(leafContext);
parameters.putAll(leafLookup.asMap());
this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS);
this.emittedValues = new ArrayList<>();
this.totalByteSize = 0;
}

protected DerivedFieldScript() {
params = null;
leafLookup = null;
public DerivedFieldScript() {
this.params = null;
this.leafLookup = null;
this.emittedValues = new ArrayList<>();
this.totalByteSize = 0;

Check warning on line 75 in server/src/main/java/org/opensearch/script/DerivedFieldScript.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/script/DerivedFieldScript.java#L71-L75

Added lines #L71 - L75 were not covered by tests
}

/**
Expand All @@ -75,14 +89,54 @@
return leafLookup.doc();
}

/**
* Return the emitted values from the script execution.
*/
public List<Object> getEmittedValues() {
return emittedValues;
}

/**
* Set the current document to run the script on next.
* Clears the emittedValues as well since they should be scoped per document.
*/
public void setDocument(int docid) {
this.emittedValues = new ArrayList<>();
this.totalByteSize = 0;
leafLookup.setDocument(docid);
}

public abstract Object execute();
public void addEmittedValue(Object o) {
int byteSize = getObjectByteSize(o);
int newTotalByteSize = totalByteSize + byteSize;
if (newTotalByteSize <= MAX_BYTE_SIZE) {
emittedValues.add(o);
totalByteSize = newTotalByteSize;
} else {
throw new IllegalStateException("Exceeded maximum allowed byte size for emitted values");
}
}

private int getObjectByteSize(Object obj) {
if (obj instanceof String) {
return ((String) obj).getBytes(StandardCharsets.UTF_8).length;
} else if (obj instanceof Integer) {
return Integer.BYTES;
} else if (obj instanceof Long) {
return Long.BYTES;
} else if (obj instanceof Double) {
return Double.BYTES;
} else if (obj instanceof Boolean) {
return Byte.BYTES; // Assuming 1 byte for boolean
} else if (obj instanceof Tuple) {
// Assuming each element in the tuple is a double for GeoPoint case
return Double.BYTES * 2;
} else {
throw new IllegalArgumentException("Unsupported object type passed in emit()");

Check warning on line 135 in server/src/main/java/org/opensearch/script/DerivedFieldScript.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/script/DerivedFieldScript.java#L135

Added line #L135 was not covered by tests
}
}

public void execute() {}

Check warning on line 139 in server/src/main/java/org/opensearch/script/DerivedFieldScript.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/script/DerivedFieldScript.java#L139

Added line #L139 was not covered by tests

/**
* A factory to construct {@link DerivedFieldScript} instances.
Expand All @@ -95,7 +149,6 @@

/**
* A factory to construct stateful {@link DerivedFieldScript} factories for a specific index.
*
* @opensearch.internal
*/
public interface Factory extends ScriptFactory {
Expand Down
Loading
Loading