Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix parseToken array popping #13620

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963))
- Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080))
- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126))
- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.common.xcontent;

import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.common.Strings;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.AbstractXContentParser;
import org.opensearch.core.xcontent.DeprecationHandler;
Expand All @@ -23,6 +24,9 @@
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.LinkedList;

/**
* JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
Expand All @@ -32,21 +36,20 @@
*/
public class JsonToStringXContentParser extends AbstractXContentParser {
private final String fieldTypeName;
private XContentParser parser;
private final XContentParser parser;

private ArrayList<String> valueList = new ArrayList<>();
private ArrayList<String> valueAndPathList = new ArrayList<>();
private ArrayList<String> keyList = new ArrayList<>();
private final ArrayList<String> valueList = new ArrayList<>();
private final ArrayList<String> valueAndPathList = new ArrayList<>();
private final ArrayList<String> keyList = new ArrayList<>();

private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent);
private final XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent);

private NamedXContentRegistry xContentRegistry;
private final NamedXContentRegistry xContentRegistry;

private DeprecationHandler deprecationHandler;
private final DeprecationHandler deprecationHandler;

private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath";
private static final String VALUE_SUFFIX = "._value";
private static final String DOT_SYMBOL = ".";
private static final String EQUAL_SYMBOL = "=";

public JsonToStringXContentParser(
Expand All @@ -63,9 +66,14 @@ public JsonToStringXContentParser(
}

public XContentParser parseObject() throws IOException {
assert currentToken() == Token.START_OBJECT;
parser.nextToken(); // Skip the outer START_OBJECT. Need to return on END_OBJECT.

builder.startObject();
StringBuilder path = new StringBuilder(fieldTypeName);
parseToken(path, null);
LinkedList<String> path = new LinkedList<>(Collections.singleton(fieldTypeName));
while (currentToken() != Token.END_OBJECT) {
parseToken(path);
}
builder.field(this.fieldTypeName, keyList);
builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList);
builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList);
Expand All @@ -74,75 +82,55 @@ public XContentParser parseObject() throws IOException {
return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString));
}

private void parseToken(StringBuilder path, String currentFieldName) throws IOException {

while (this.parser.nextToken() != Token.END_OBJECT) {
if (this.parser.currentName() != null) {
currentFieldName = this.parser.currentName();
private void parseToken(Deque<String> path) throws IOException {
if (this.parser.currentToken() == Token.FIELD_NAME) {
String fieldName = this.parser.currentName();
path.addLast(fieldName); // Pushing onto the stack *must* be matched by pop
String parts = fieldName;
while (parts.contains(".")) { // Extract the intermediate keys maybe present in fieldName
int dotPos = parts.indexOf('.');
String part = parts.substring(0, dotPos);
this.keyList.add(part);
parts = parts.substring(dotPos + 1);
}
StringBuilder parsedFields = new StringBuilder();

if (this.parser.currentToken() == Token.FIELD_NAME) {
path.append(DOT_SYMBOL).append(currentFieldName);
int dotIndex = currentFieldName.indexOf(DOT_SYMBOL);
String fieldNameSuffix = currentFieldName;
// The field name may be of the form foo.bar.baz
// If that's the case, each "part" is a key.
while (dotIndex >= 0) {
String fieldNamePrefix = fieldNameSuffix.substring(0, dotIndex);
if (!fieldNamePrefix.isEmpty()) {
this.keyList.add(fieldNamePrefix);
}
fieldNameSuffix = fieldNameSuffix.substring(dotIndex + 1);
dotIndex = fieldNameSuffix.indexOf(DOT_SYMBOL);
}
if (!fieldNameSuffix.isEmpty()) {
this.keyList.add(fieldNameSuffix);
}
} else if (this.parser.currentToken() == Token.START_ARRAY) {
parseToken(path, currentFieldName);
break;
} else if (this.parser.currentToken() == Token.END_ARRAY) {
// skip
} else if (this.parser.currentToken() == Token.START_OBJECT) {
parseToken(path, currentFieldName);
int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length());

if (dotIndex != -1 && path.length() > currentFieldName.length()) {
path.setLength(path.length() - currentFieldName.length() - 1);
}
} else {
if (!path.toString().contains(currentFieldName)) {
path.append(DOT_SYMBOL).append(currentFieldName);
}
parseValue(parsedFields);
this.valueList.add(parsedFields.toString());
this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields);
int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length());
if (dotIndex != -1 && path.length() > currentFieldName.length()) {
path.setLength(path.length() - currentFieldName.length() - 1);
}
this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part
this.parser.nextToken(); // advance to the value of fieldName
parseToken(path); // parse the value for fieldName (which will be an array, an object, or a primitive value)
path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName)
// Note that whichever other branch we just passed through has already ended with nextToken(), so we
// don't need to call it.
} else if (this.parser.currentToken() == Token.START_ARRAY) {
parser.nextToken();
while (this.parser.currentToken() != Token.END_ARRAY) {
parseToken(path);
}

this.parser.nextToken();
} else if (this.parser.currentToken() == Token.START_OBJECT) {
parser.nextToken();
while (this.parser.currentToken() != Token.END_OBJECT) {
parseToken(path);
}
this.parser.nextToken();
} else if (this.parser.currentToken().isValue()) {
String parsedValue = parseValue();
if (parsedValue != null) {
this.valueList.add(parsedValue);
this.valueAndPathList.add(Strings.collectionToDelimitedString(path, ".") + EQUAL_SYMBOL + parsedValue);
}
this.parser.nextToken();
}
}

private void parseValue(StringBuilder parsedFields) throws IOException {
private String parseValue() throws IOException {
switch (this.parser.currentToken()) {
case VALUE_BOOLEAN:
case VALUE_NUMBER:
case VALUE_STRING:
case VALUE_NULL:
parsedFields.append(this.parser.textOrNull());
break;
return this.parser.textOrNull();
// Handle other token types as needed
case FIELD_NAME:
case VALUE_EMBEDDED_OBJECT:
case END_ARRAY:
case START_ARRAY:
break;
default:
throw new IOException("Unsupported token type [" + parser.currentToken() + "]");
throw new IOException("Unsupported value token type [" + parser.currentToken() + "]");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
String value = context.externalValue().toString();
parseValueAddFields(context, value, fieldType().name());
} else {
JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser(
JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
context.parser(),
Expand All @@ -579,7 +579,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
It reads the JSON object and parsed to a list of string
*/
XContentParser parser = JsonToStringParser.parseObject();
XContentParser parser = jsonToStringParser.parseObject();

XContentParser.Token currentToken;
while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
public class JsonToStringXContentParserTests extends OpenSearchTestCase {

private String flattenJsonString(String fieldName, String in) throws IOException {
String transformed;
try (
XContentParser parser = JsonXContent.jsonXContent.createParser(
xContentRegistry(),
Expand All @@ -33,10 +32,11 @@ private String flattenJsonString(String fieldName, String in) throws IOException
parser,
fieldName
);
// Skip the START_OBJECT token:
// Point to the first token (should be START_OBJECT)
jsonToStringXContentParser.nextToken();

XContentParser transformedParser = jsonToStringXContentParser.parseObject();
assertSame(XContentParser.Token.END_OBJECT, jsonToStringXContentParser.currentToken());
try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) {
jsonBuilder.copyCurrentStructure(transformedParser);
return jsonBuilder.toString();
Expand Down Expand Up @@ -110,4 +110,57 @@ public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException {
);
}

public void testArrayOfObjects() throws IOException {
String jsonExample = "{"
+ "\"field\": {"
+ " \"detail\": {"
+ " \"foooooooooooo\": ["
+ " {\"name\":\"baz\"},"
+ " {\"name\":\"baz\"}"
+ " ]"
+ " }"
+ "}}";

assertEquals(
"{"
+ "\"flat\":[\"field\",\"detail\",\"foooooooooooo\",\"name\",\"name\"],"
+ "\"flat._value\":[\"baz\",\"baz\"],"
+ "\"flat._valueAndPath\":["
+ "\"flat.field.detail.foooooooooooo.name=baz\","
+ "\"flat.field.detail.foooooooooooo.name=baz\""
+ "]}",
flattenJsonString("flat", jsonExample)
);
}

public void testArraysOfObjectsAndValues() throws IOException {
String jsonExample = "{"
+ "\"field\": {"
+ " \"detail\": {"
+ " \"foooooooooooo\": ["
+ " {\"name\":\"baz\"},"
+ " {\"name\":\"baz\"}"
+ " ]"
+ " },"
+ " \"numbers\" : ["
+ " 1,"
+ " 2,"
+ " 3"
+ " ]"
+ "}}";

assertEquals(
"{"
+ "\"flat\":[\"field\",\"detail\",\"foooooooooooo\",\"name\",\"name\",\"numbers\"],"
+ "\"flat._value\":[\"baz\",\"baz\",\"1\",\"2\",\"3\"],"
+ "\"flat._valueAndPath\":["
+ "\"flat.field.detail.foooooooooooo.name=baz\","
+ "\"flat.field.detail.foooooooooooo.name=baz\","
+ "\"flat.field.numbers=1\","
+ "\"flat.field.numbers=2\","
+ "\"flat.field.numbers=3\""
+ "]}",
flattenJsonString("flat", jsonExample)
);
}
}
Loading