Skip to content

Commit

Permalink
Adjust DotExpandingXContentParser behaviour (elastic#83313)
Browse files Browse the repository at this point in the history
With elastic#79922 we have introduced a parser that expands dots in fields names on the fly, so that the expansion no longer needs to be handled by consumers.

The currentName method of such parser was not returning all the time the expected name, compared to the corresponding parser that receives as input the document with expanded fields.

This commit expands testing and addresses the issues that were found.
  • Loading branch information
javanna authored Jan 31, 2022
1 parent 6152ffd commit 4bf3068
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@ private static String[] splitAndValidatePath(String fullFieldPath) {
for (String part : parts) {
// check if the field name contains only whitespace
if (part.isEmpty()) {
throw new IllegalArgumentException("object field cannot contain only whitespace: ['" + fullFieldPath + "']");
throw new IllegalArgumentException("field name cannot contain only whitespace: ['" + fullFieldPath + "']");
}
if (part.isBlank()) {
throw new IllegalArgumentException(
"object field starting or ending with a [.] makes object resolution ambiguous: [" + fullFieldPath + "]"
"field name starting or ending with a [.] makes object resolution ambiguous: [" + fullFieldPath + "]"
);
}
}
Expand All @@ -110,16 +110,17 @@ public static XContentParser expandDots(XContentParser in) throws IOException {
}

private enum State {
PRE,
DURING,
POST
EXPANDING_START_OBJECT,
PARSING_ORIGINAL_CONTENT,
ENDING_EXPANDED_OBJECT
}

final String[] subPaths;
final XContentParser subparser;

int level = 0;
private State state = State.PRE;
private int expandedTokens = 0;
private int innerLevel = -1;
private State state = State.EXPANDING_START_OBJECT;

private DotExpandingXContentParser(XContentParser subparser, XContentParser root, String[] subPaths) {
super(root);
Expand All @@ -129,88 +130,102 @@ private DotExpandingXContentParser(XContentParser subparser, XContentParser root

@Override
public Token nextToken() throws IOException {
if (state == State.PRE) {
level++;
if (level == subPaths.length * 2 - 1) {
state = State.DURING;
return in.currentToken();
if (state == State.EXPANDING_START_OBJECT) {
expandedTokens++;
assert expandedTokens < subPaths.length * 2;
if (expandedTokens == subPaths.length * 2 - 1) {
state = State.PARSING_ORIGINAL_CONTENT;
Token token = in.currentToken();
if (token == Token.START_OBJECT || token == Token.START_ARRAY) {
innerLevel++;
}
return token;
}
if (level % 2 == 0) {
// The expansion consists of adding pairs of START_OBJECT and FIELD_NAME tokens
if (expandedTokens % 2 == 0) {
return Token.FIELD_NAME;
}
return Token.START_OBJECT;
}
if (state == State.DURING) {
if (state == State.PARSING_ORIGINAL_CONTENT) {
Token token = subparser.nextToken();
if (token == Token.START_OBJECT || token == Token.START_ARRAY) {
innerLevel++;
}
if (token == Token.END_OBJECT || token == Token.END_ARRAY) {
innerLevel--;
}
if (token != null) {
return token;
}
state = State.POST;
}
assert state == State.POST;
if (level >= 1) {
level -= 2;
state = State.ENDING_EXPANDED_OBJECT;
}
return level < 0 ? null : Token.END_OBJECT;
assert expandedTokens % 2 == 1;
expandedTokens -= 2;
return expandedTokens < 0 ? null : Token.END_OBJECT;
}

@Override
public Token currentToken() {
if (state == State.PRE) {
return level % 2 == 1 ? Token.START_OBJECT : Token.FIELD_NAME;
}
if (state == State.POST) {
if (level > 1) {
return Token.END_OBJECT;
}
}
return in.currentToken();
return switch (state) {
case EXPANDING_START_OBJECT -> expandedTokens % 2 == 1 ? Token.START_OBJECT : Token.FIELD_NAME;
case ENDING_EXPANDED_OBJECT -> Token.END_OBJECT;
case PARSING_ORIGINAL_CONTENT -> in.currentToken();
};
}

@Override
public String currentName() throws IOException {
if (state == State.DURING) {
return in.currentName();
}
if (state == State.POST) {
if (level <= 1) {
if (state == State.PARSING_ORIGINAL_CONTENT) {
assert expandedTokens == subPaths.length * 2 - 1;
// whenever we are parsing some inner object/array we can easily delegate to the inner parser
// e.g. field.with.dots: { obj:{ parsing here } }
if (innerLevel > 0) {
return in.currentName();
}
Token token = currentToken();
// if we are parsing the outer object/array, only at the start object/array we need to return
// e.g. dots instead of field.with.dots otherwise we can simply delegate to the inner parser
// which will do the right thing
if (innerLevel == 0 && token != Token.START_OBJECT && token != Token.START_ARRAY) {
return in.currentName();
}
throw new IllegalStateException("Can't get current name during END_OBJECT");
// note that innerLevel can be -1 if there are no inner object/array e.g. field.with.dots: value
// as well as while there is and we are parsing their END_OBJECT or END_ARRAY
}
return subPaths[level / 2];
return subPaths[expandedTokens / 2];
}

@Override
public void skipChildren() throws IOException {
if (state == State.PRE) {
if (state == State.EXPANDING_START_OBJECT) {
in.skipChildren();
state = State.POST;
state = State.ENDING_EXPANDED_OBJECT;
}
if (state == State.DURING) {
if (state == State.PARSING_ORIGINAL_CONTENT) {
subparser.skipChildren();
}
}

@Override
public String textOrNull() throws IOException {
if (state == State.PRE) {
if (state == State.EXPANDING_START_OBJECT) {
throw new IllegalStateException("Can't get text on a " + currentToken() + " at " + getTokenLocation());
}
return super.textOrNull();
}

@Override
public Number numberValue() throws IOException {
if (state == State.PRE) {
if (state == State.EXPANDING_START_OBJECT) {
throw new IllegalStateException("Can't get numeric value on a " + currentToken() + " at " + getTokenLocation());
}
return super.numberValue();
}

@Override
public boolean booleanValue() throws IOException {
if (state == State.PRE) {
if (state == State.EXPANDING_START_OBJECT) {
throw new IllegalStateException("Can't get boolean value on a " + currentToken() + " at " + getTokenLocation());
}
return super.booleanValue();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,23 @@

public class DotExpandingXContentParserTests extends ESTestCase {

private void assertXContentMatches(String expected, String actual) throws IOException {
XContentParser inputParser = createParser(JsonXContent.jsonXContent, actual);
private void assertXContentMatches(String dotsExpanded, String withDots) throws IOException {
XContentParser inputParser = createParser(JsonXContent.jsonXContent, withDots);
XContentParser expandedParser = DotExpandingXContentParser.expandDots(inputParser);

XContentBuilder actualOutput = XContentBuilder.builder(JsonXContent.jsonXContent).copyCurrentStructure(expandedParser);
assertEquals(expected, Strings.toString(actualOutput));
assertEquals(dotsExpanded, Strings.toString(actualOutput));

XContentParser expectedParser = createParser(JsonXContent.jsonXContent, dotsExpanded);
XContentParser actualParser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, withDots));
XContentParser.Token currentToken;
while ((currentToken = actualParser.nextToken()) != null) {
assertEquals(currentToken, expectedParser.nextToken());
assertEquals(expectedParser.currentToken(), actualParser.currentToken());
assertEquals(actualParser.currentToken().name(), expectedParser.currentName(), actualParser.currentName());

}
assertNull(expectedParser.nextToken());
}

public void testEmbeddedObject() throws IOException {
Expand All @@ -33,7 +44,16 @@ public void testEmbeddedObject() throws IOException {
""");
}

public void testEmbeddedArray() throws IOException {
public void testEmbeddedObjects() throws IOException {

assertXContentMatches("""
{"test":{"with":{"dots":{"obj":{"field":"value","array":["value",{"field":"value"}]}}}},"nodots":"value2"}\
""", """
{"test.with.dots":{"obj":{"field":"value","array":["value",{"field":"value"}]}},"nodots":"value2"}\
""");
}

public void testEmbeddedArrayOfValues() throws IOException {

assertXContentMatches("""
{"test":{"with":{"dots":["field","value"]}},"nodots":"value2"}\
Expand All @@ -43,6 +63,26 @@ public void testEmbeddedArray() throws IOException {

}

public void testEmbeddedArrayOfObjects() throws IOException {

assertXContentMatches("""
{"test":{"with":{"dots":[{"field":"value"},{"field":"value"}]}},"nodots":"value2"}\
""", """
{"test.with.dots":[{"field":"value"},{"field":"value"}],"nodots":"value2"}\
""");

}

public void testEmbeddedArrayMixedContent() throws IOException {

assertXContentMatches("""
{"test":{"with":{"dots":["value",{"field":"value"}]}},"nodots":"value2"}\
""", """
{"test.with.dots":["value",{"field":"value"}],"nodots":"value2"}\
""");

}

public void testEmbeddedValue() throws IOException {

assertXContentMatches("""
Expand Down Expand Up @@ -85,6 +125,40 @@ public void testSkipChildren() throws IOException {
assertNull(parser.nextToken());
}

public void testSkipChildrenWithinInnerObject() throws IOException {
XContentParser parser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, """
{ "test.with.dots" : {"obj" : {"field":"value"}}, "nodots" : "value2" }"""));

parser.nextToken(); // start object
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("test", parser.currentName());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("with", parser.currentName());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("dots", parser.currentName());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("obj", parser.currentName());
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken());
parser.skipChildren();
assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken());
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
assertEquals("nodots", parser.currentName());
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken());
assertEquals("value2", parser.text());
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());
}

public void testNestedExpansions() throws IOException {
assertXContentMatches("""
{"first":{"dot":{"second":{"dot":"value"},"third":"value"}},"nodots":"value"}\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ public void testDynamicRuntimeObjectFields() {
() -> client().prepareIndex("test").setSource("obj.runtime", "value").get()
);
assertEquals(
"object mapping for [obj.runtime] tried to parse field [obj.runtime] as object, but found a concrete value",
"object mapping for [obj.runtime] tried to parse field [runtime] as object, but found a concrete value",
exception.getMessage()
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1787,7 +1787,7 @@ public void testDynamicFieldsStartingAndEndingWithDot() throws Exception {
{"top..foo.":{"a":1}}
""")));

assertThat(e.getCause().getMessage(), containsString("object field cannot contain only whitespace: ['top..foo.']"));
assertThat(e.getCause().getMessage(), containsString("field name cannot contain only whitespace: ['top..foo.']"));
}

public void testDynamicFieldsEmptyName() throws Exception {
Expand Down

0 comments on commit 4bf3068

Please sign in to comment.