From 84ef7824b26dace44fe775d66c95bb1bf1c10a41 Mon Sep 17 00:00:00 2001
From: Rob Marrowstone <99764876+rmarrowstone@users.noreply.github.com>
Date: Tue, 17 Sep 2024 17:02:11 -0700
Subject: [PATCH] Add a more efficient PathExtractor Implementation (#28)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The existing implementation loops through every potential search
path at each step of matching. It therefore exhibits exponential time
complexity as the number of extracted fields grows. Some of the
Amazon usage involves extractions of thousands of attributes and
users needed to craft a workaround for the performance hit.
This change adds a more efficient PathExtractor implementation that
supports a narrower set of SearchPaths and combinations thereof, but
avoids the exponential time cost. This is reflected in the benchmark
results and testing on my own toy dataset. The results show that
the relative performance of the new implementation improves as the
number of extracted attributes increases.
It is built with a key assumption that does not hold for the existing
implementation: that users may not register paths that would result
in a single element matching more than one terminal matcher.
I searched for Amazon internal usages and believe that the supported
usage patterns cover any and all usage I could find. I found no usage
of Annotations not on the Top-Level-Values themselves.
Note: I changed the "dom" benchmarks to iterate over Top-Level-Values.
This makes them faster and a fairer comparison target.
```
Control (Iterate over IonValues)
PathExtractorBenchmark.domBinary thrpt 5 5.060 ± 0.075 ops/s
PathExtractorBenchmark.domText thrpt 5 1.172 ± 0.040 ops/s
Benchmark (legacy) Mode Cnt Score Error Units
PathExtractorBenchmark.fullBinary thrpt 5 3.846 ± 0.033 ops/s
PathExtractorBenchmark.fullText thrpt 5 1.094 ± 0.008 ops/s
PathExtractorBenchmark.partialBinary thrpt 5 43.020 ± 2.681 ops/s
PathExtractorBenchmark.partialBinaryNoDom thrpt 5 42.628 ± 2.549 ops/s
PathExtractorBenchmark.partialText thrpt 5 2.461 ± 0.121 ops/s
PathExtractorBenchmark.partialTextNoDom thrpt 5 2.368 ± 0.121 ops/s
Benchmark (strict). Mode Cnt Score Error Units
PathExtractorBenchmark.fullBinary thrpt 5 6.011 ± 0.107 ops/s
PathExtractorBenchmark.fullText thrpt 5 1.214 ± 0.025 ops/s
PathExtractorBenchmark.partialBinary thrpt 5 57.329 ± 13.585 ops/s
PathExtractorBenchmark.partialBinaryNoDom thrpt 5 56.598 ± 2.424 ops/s
PathExtractorBenchmark.partialText thrpt 5 2.430 ± 0.073 ops/s
PathExtractorBenchmark.partialTextNoDom thrpt 5 2.416 ± 0.175 ops/s
```
Co-authored-by: Tyler Gregg
---
README.md | 31 +-
build.gradle | 4 +-
.../benchmarks/PathExtractorBenchmark.java | 20 +-
.../amazon/ionpathextraction/FsmMatcher.java | 43 +++
.../ionpathextraction/FsmMatcherBuilder.java | 281 ++++++++++++++++++
.../ionpathextraction/FsmPathExtractor.java | 153 ++++++++++
.../PathExtractorBuilder.java | 77 ++++-
.../ionpathextraction/PathExtractorImpl.java | 6 +-
.../amazon/ionpathextraction/SearchPath.java | 17 +-
.../UnsupportedPathExpression.java | 24 ++
.../internal/Annotations.java | 29 +-
.../internal/PathExtractorConfig.java | 14 +-
.../internal/Preconditions.java | 38 ++-
.../pathcomponents/Index.java | 4 +
.../pathcomponents/PathComponent.java | 10 +-
.../pathcomponents/Text.java | 6 +-
.../pathcomponents/Wildcard.java | 6 +-
.../ionpathextraction/FsmPathExtractorTest.kt | 33 ++
.../PathExtractorImplTest.kt | 5 +
.../ionpathextraction/PathExtractorTest.kt | 70 ++---
src/test/resources/test-cases.ion | 50 +++-
21 files changed, 852 insertions(+), 69 deletions(-)
create mode 100644 src/main/java/com/amazon/ionpathextraction/FsmMatcher.java
create mode 100644 src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java
create mode 100644 src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java
create mode 100644 src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java
create mode 100644 src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt
create mode 100644 src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt
diff --git a/README.md b/README.md
index beb058c..cd03d36 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,8 @@ data on reader: {foo: ["foo1", "foo2"] , bar: "myBarValue", bar: A::"annotatedVa
(*) - matches ["foo1", "foo2"], "myBarValue" and A::"annotatedValue"
() - matches {foo: ["foo1", "foo2"] , bar: "myBarValue", bar: A::"annotatedValue"}
(bar) - matches "myBarValue" and A::"annotatedValue"
-(A::bar) - matches A::"annotatedValue"
+(A::*) - matches A::"annotatedValue"
+(A::bar) - matches A::"annotatedValue" (is not supported in "strict" mode, see #Optimization below)
```
The `()` matcher matches all values in the stream but you can also use annotations with it, example:
@@ -63,6 +64,17 @@ PathExtractorBuilder.standard()
see PathExtractorBuilder [javadoc](https://static.javadoc.io/com.amazon.ion/ion-java-path-extraction/1.0.1/com/amazon/ionpathextraction/PathExtractorBuilder.html) for more information on configuration options and search path registration.
+### Optimization
+
+There are two implementations: "strict" and "legacy". The strict implementation is more performant, particularly as the
+number of fields extracted grows. By default `PathExtractorBuilder.build()` will try to build you a strict extractor and
+will fall back to the legacy extractor. You may be explicit that you want a specific implementation by calling
+`PathExtractorBuilder.buildStrict()` or `PathExtractorBuilder.buildLegacy()`.
+
+The strict implementation supports basic paths, with field names, index ordinals, and annotations on top-level-values or
+wildcards. It does not support mixing field names and index ordinals, multiple callbacks on the same path or annotations
+on non-wildcard values. Case-insensitive annotations matching is not supported.
+
### Notification
Each time the `PathExtractor` encounters a value that matches a registered search path it will invoke the respective
callback passing the reader positioned at the current value. See `PathExtractorBuilder#withSearchPath` methods for more
@@ -165,6 +177,7 @@ binary file is ~81M and the text file ~95M. There are four benchmarks types:
1. `partial`: materializes a single struct fields as `IonValue` using a path extractor.a
1. `partialNoDom`: access the java representation directly of a single struct field without materializing an `IonValue`.
+All the path extractor benchmarks are run in "strict" mode.
There is a binary and a text version for all four benchmark types. See the [PathExtractorBenchmark](https://github.com/amzn/ion-java-path-extraction/blob/master/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java) class for
more details.
@@ -173,14 +186,14 @@ Results below, higher is better.
```
Benchmark Mode Cnt Score Error Units
-PathExtractorBenchmark.domBinary thrpt 10 1.128 ± 0.050 ops/s
-PathExtractorBenchmark.domText thrpt 10 0.601 ± 0.019 ops/s
-PathExtractorBenchmark.fullBinary thrpt 10 1.227 ± 0.014 ops/s
-PathExtractorBenchmark.fullText thrpt 10 0.665 ± 0.010 ops/s
-PathExtractorBenchmark.partialBinary thrpt 10 14.912 ± 0.271 ops/s
-PathExtractorBenchmark.partialBinaryNoDom thrpt 10 15.650 ± 0.297 ops/s
-PathExtractorBenchmark.partialText thrpt 10 1.343 ± 0.029 ops/s
-PathExtractorBenchmark.partialTextNoDom thrpt 10 1.307 ± 0.015 ops/s
+PathExtractorBenchmark.domBinary thrpt 5 5.060 ± 0.075 ops/s
+PathExtractorBenchmark.domText thrpt 5 1.172 ± 0.040 ops/s
+PathExtractorBenchmark.fullBinary thrpt 5 6.011 ± 0.107 ops/s
+PathExtractorBenchmark.fullText thrpt 5 1.214 ± 0.025 ops/s
+PathExtractorBenchmark.partialBinary thrpt 5 57.329 ± 13.585 ops/s
+PathExtractorBenchmark.partialBinaryNoDom thrpt 5 56.598 ± 2.424 ops/s
+PathExtractorBenchmark.partialText thrpt 5 2.430 ± 0.073 ops/s
+PathExtractorBenchmark.partialTextNoDom thrpt 5 2.416 ± 0.175 ops/s
```
Using the path extractor has equivalent performance for both text and binary when fully materializing the document and
diff --git a/build.gradle b/build.gradle
index b91801a..e0ecb5e 100644
--- a/build.gradle
+++ b/build.gradle
@@ -83,10 +83,10 @@ jmh {
failOnError = true
// warmup
- warmupIterations = 5
+ warmupIterations = 2
// iterations
- iterations = 10
+ iterations = 5
}
checkstyle {
diff --git a/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java b/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java
index 9eff26a..3d4eb9b 100644
--- a/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java
+++ b/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java
@@ -15,6 +15,7 @@
import com.amazon.ion.IonReader;
import com.amazon.ion.IonSystem;
+import com.amazon.ion.IonValue;
import com.amazon.ion.IonWriter;
import com.amazon.ion.system.IonBinaryWriterBuilder;
import com.amazon.ion.system.IonReaderBuilder;
@@ -28,6 +29,7 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
+import java.util.Iterator;
import java.util.function.Function;
import java.util.stream.Stream;
import org.openjdk.jmh.annotations.Benchmark;
@@ -171,7 +173,14 @@ public Object partialTextNoDom(final ThreadState threadState) {
*/
@Benchmark
public Object domBinary() {
- return DOM_FACTORY.getLoader().load(bytesBinary);
+ IonReader reader = newReader(new ByteArrayInputStream(bytesBinary));
+ // iterating over Top-Level-Values is more apples:apples to path extractor
+ // vs loading all as a datagram
+ Iterator iter = DOM_FACTORY.iterate(reader);
+ while (iter.hasNext()) {
+ iter.next();
+ }
+ return reader;
}
/**
@@ -179,7 +188,14 @@ public Object domBinary() {
*/
@Benchmark
public Object domText() {
- return DOM_FACTORY.getLoader().load(bytesText);
+ IonReader reader = newReader(new ByteArrayInputStream(bytesText));
+ // iterating over Top-Level-Values is more apples:apples to path extractor
+ // vs loading all as a datagram
+ Iterator iter = DOM_FACTORY.iterate(reader);
+ while (iter.hasNext()) {
+ iter.next();
+ }
+ return reader;
}
/**
diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java
new file mode 100644
index 0000000..30d2650
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+import com.amazon.ion.IonReader;
+import java.util.function.BiFunction;
+import java.util.function.Supplier;
+
+/**
+ * Base class for match states in the Finite State Machine matching implementation.
+ */
+abstract class FsmMatcher {
+ /**
+ * Callback for match state. May be null.
+ */
+ BiFunction callback;
+
+ /**
+ * Indicates there are no possible child transitions.
+ */
+ boolean terminal = false;
+
+ /**
+ * Return the child matcher for the given reader context.
+ * Return null if there is no match.
+ *
+ * @param position will be -1 for top-level-values, otherwise will be the position ordinal
+ * of the value in the container, both for sequences and structs.
+ * @param fieldName will be non-null only for struct values.
+ */
+ abstract FsmMatcher transition(String fieldName, int position, Supplier annotations);
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java
new file mode 100644
index 0000000..030dbfb
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+import com.amazon.ion.IonReader;
+import com.amazon.ionpathextraction.internal.Annotations;
+import com.amazon.ionpathextraction.pathcomponents.Index;
+import com.amazon.ionpathextraction.pathcomponents.PathComponent;
+import com.amazon.ionpathextraction.pathcomponents.Text;
+import com.amazon.ionpathextraction.pathcomponents.Wildcard;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/**
+ * Builds a root FsmMatcher for a set of SearchPaths.
+ *
+ * One key principle in the implementation is to close over as much branching as possible at build time.
+ * For example: for a case-insensitive field lookup, lower case the field names once, at build time.
+ *
+ * The second key principle is that there should be at-most-one Matcher state for a given reader context.
+ * So any combination of different paths which could both be active for the same reader context are disallowed.
+ * For example: allowing a mix of field names and ordinal positions for a given sub-path.
+ *
+ * Beyond that, there are some usage patterns which could be included, such as annotations filtering on
+ * field names or ordinals, but for which there was no observed usage.
+ */
+class FsmMatcherBuilder {
+ private final PathTreeNode root = new PathTreeNode();
+ private final boolean caseInsensitiveAll;
+ private final boolean caseInsensitiveFields;
+
+ FsmMatcherBuilder(final boolean caseInsensitiveAll, final boolean caseInsensitiveFields) {
+ this.caseInsensitiveAll = caseInsensitiveAll;
+ this.caseInsensitiveFields = caseInsensitiveFields;
+ }
+
+ /**
+ * Incorporate the searchPath into the matcher tree to be built.
+ *
+ * @throws UnsupportedPathExpression if the SearchPath is not supported.
+ */
+ void accept(final SearchPath searchPath) {
+ List steps = searchPath.getNormalizedPath();
+ PathTreeNode currentNode = root;
+ for (PathComponent step : steps) {
+ currentNode = currentNode.acceptStep(step);
+ }
+ currentNode.setCallback(searchPath.getCallback());
+ }
+
+ /**
+ * Build the FsmMatcher for the set of paths.
+ *
+ * @throws UnsupportedPathExpression if the combination of SearchPaths is not supported.
+ */
+ FsmMatcher build() {
+ return root.buildMatcher();
+ }
+
+ /**
+ * Mutable builder node to model the path tree before building into a FsmMatcher.
+ */
+ private class PathTreeNode {
+ BiFunction callback;
+ PathTreeNode wildcard;
+ Map annotatedSplats = new HashMap<>();
+ Map fields = new HashMap<>();
+ Map indexes = new HashMap<>();
+
+ /**
+ * Find or create a new PathTreeNode for the child step.
+ *
+ * @return the new or existing node.
+ * @throws UnsupportedPathExpression if the step contains path components that are not supported
+ */
+ private PathTreeNode acceptStep(final PathComponent step) {
+ if (step.hasAnnotations() && caseInsensitiveAll) {
+ throw new UnsupportedPathExpression(
+ "Case Insensitive Matching of Annotations is not yet supported by this matcher.\n"
+ + "Use the legacy matcher or the withMatchFieldNamesCaseInsensitive option instead.");
+ }
+
+ PathTreeNode child;
+ if (step instanceof Wildcard) {
+ if (step.hasAnnotations()) {
+ child = annotatedSplats.computeIfAbsent(step.getAnnotations(), a -> new PathTreeNode());
+ } else {
+ if (wildcard == null) {
+ wildcard = new PathTreeNode();
+ }
+ child = wildcard;
+ }
+ } else {
+ if (step.hasAnnotations()) {
+ // this is not too bad to do, but it takes care to do without impacting the non-annotated case
+ // which is the majority of usage. one would also want to mind the principle to avoid multiple
+ // distinct match paths for a given reader context and only allow either annotated or not
+ // for a given field name or index ordinal.
+ throw new UnsupportedPathExpression("Annotations are only supported on wildcards!");
+ }
+
+ if (step instanceof Text) {
+ String fieldName = caseInsensitiveFields
+ ? ((Text) step).getFieldName().toLowerCase()
+ : ((Text) step).getFieldName();
+ child = fields.computeIfAbsent(fieldName, f -> new PathTreeNode());
+ } else if (step instanceof Index) {
+ child = indexes.computeIfAbsent(((Index) step).getOrdinal(), i -> new PathTreeNode());
+ } else {
+ throw new IllegalArgumentException("step of unknown runtime type: " + step.getClass());
+ }
+ }
+ return child;
+ }
+
+ private void setCallback(final BiFunction callback) {
+ if (this.callback == null) {
+ this.callback = callback;
+ } else {
+ // this would actually be pretty simple to do: just create a ComposedCallback of BiFunctions.
+ throw new UnsupportedPathExpression("Cannot set multiple callbacks for same path!");
+ }
+ }
+
+ private FsmMatcher buildMatcher() {
+ List> matchers = new ArrayList<>();
+ if (wildcard != null) {
+ matchers.add(new SplatMatcher<>(wildcard.buildMatcher(), callback));
+ }
+ if (!annotatedSplats.isEmpty()) {
+ List> children = new ArrayList<>(annotatedSplats.size());
+ List annotations = new ArrayList<>(annotatedSplats.size());
+ for (Map.Entry entry : annotatedSplats.entrySet()) {
+ children.add(entry.getValue().buildMatcher());
+ annotations.add(entry.getKey().getAnnotations());
+ }
+ matchers.add(new AnnotationsMatcher<>(annotations, children));
+ }
+ if (!fields.isEmpty()) {
+ Map> children = fields.entrySet().stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher()));
+ FsmMatcher fieldMatcher = caseInsensitiveFields
+ ? new CaseInsensitiveFieldMatcher<>(children, callback)
+ : new FieldMatcher<>(children, callback);
+ matchers.add(fieldMatcher);
+ }
+ if (!indexes.isEmpty()) {
+ Map> children = indexes.entrySet().stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher()));
+ matchers.add(new IndexMatcher<>(children, callback));
+ }
+
+ if (matchers.isEmpty()) {
+ return new TerminalMatcher<>(callback);
+ } else if (matchers.size() == 1) {
+ return matchers.get(0);
+ } else {
+ // the main issue with allowing more than one is that it means that any given match context
+ // may produce multiple matches, and search path writers may become reliant on the order
+ // in which callbacks for such cases are called. And in the general case, that might mean
+ // a crazy mix between the different types of matching, which devolves to the for-each loop
+ // we see in the PathExtractorImpl.
+ // That seems like a lot of complexity for a usage pattern of questionable value.
+ // So if you're reading this, and you think "oh this is a silly restriction", then take
+ // the time to understand why it's important to the path writer and reconsider accordingly.
+ throw new UnsupportedPathExpression(
+ "Only one variant of wildcard, annotated wildcard, field names, or ordinals is supported!");
+ }
+ }
+ }
+
+ private static class SplatMatcher extends FsmMatcher {
+ FsmMatcher child;
+
+ SplatMatcher(
+ final FsmMatcher child,
+ final BiFunction callback) {
+ this.child = child;
+ this.callback = callback;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return child;
+ }
+ }
+
+ private static class FieldMatcher extends FsmMatcher {
+ Map> fields;
+
+ FieldMatcher(
+ final Map> fields,
+ final BiFunction callback) {
+ this.fields = fields;
+ this.callback = callback;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return fields.get(fieldName);
+ }
+ }
+
+ private static class CaseInsensitiveFieldMatcher extends FieldMatcher {
+ CaseInsensitiveFieldMatcher(
+ final Map> fields,
+ final BiFunction callback) {
+ super(fields, callback);
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return fields.get(fieldName.toLowerCase());
+ }
+ }
+
+ private static class IndexMatcher extends FsmMatcher {
+ Map> indexes;
+
+ IndexMatcher(
+ final Map> indexes,
+ final BiFunction callback) {
+ this.indexes = indexes;
+ this.callback = callback;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return indexes.get(position);
+ }
+ }
+
+ private static class TerminalMatcher extends FsmMatcher {
+ TerminalMatcher(final BiFunction callback) {
+ this.callback = callback;
+ this.terminal = true;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return null;
+ }
+ }
+
+ private static class AnnotationsMatcher extends FsmMatcher {
+ List candidates;
+ List> matchers;
+
+ AnnotationsMatcher(final List candidates, final List> matchers) {
+ this.candidates = candidates;
+ this.matchers = matchers;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ for (int i = 0; i < candidates.size(); i++) {
+ if (Arrays.equals(candidates.get(i), annotations.get())) {
+ return matchers.get(i);
+ }
+ }
+ return null;
+ }
+ }
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java
new file mode 100644
index 0000000..6038a4d
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument;
+import static com.amazon.ionpathextraction.internal.Preconditions.checkState;
+
+import com.amazon.ion.IonReader;
+import com.amazon.ion.IonType;
+import com.amazon.ionpathextraction.internal.PathExtractorConfig;
+import java.util.List;
+import java.util.function.BiFunction;
+
+/**
+ * A PathExtractor modeled as a Finite State Machine.
+ *
+ * Compared to the PathExtractorImpl, this supports a narrower set of
+ * SearchPaths and their combinations, but is more performant, particularly
+ * when a large number of field names are searched for.
+ *
+ * A more comprehensive explanation of the strictness from a user PoV can be
+ * found on the PathExtractorBuilder.buildStrict() API method. Notes on the
+ * 'why' can be found in the FsmMatcherBuilder.
+ */
+class FsmPathExtractor implements PathExtractor {
+ private final FsmMatcher rootMatcher;
+ private final PathExtractorConfig config;
+
+ private FsmPathExtractor(
+ final FsmMatcher rootMatcher,
+ final PathExtractorConfig config) {
+ this.rootMatcher = rootMatcher;
+ this.config = config;
+ }
+
+ static FsmPathExtractor create(
+ final List> searchPaths,
+ final PathExtractorConfig config) {
+ FsmMatcherBuilder builder = new FsmMatcherBuilder<>(
+ config.isMatchCaseInsensitive(),
+ config.isMatchFieldsCaseInsensitive());
+ for (SearchPath path : searchPaths) {
+ builder.accept(path);
+ }
+
+ return new FsmPathExtractor<>(builder.build(), config);
+ }
+
+ @Override
+ public void match(final IonReader reader) {
+ match(reader, null);
+ }
+
+ @Override
+ public void match(final IonReader reader, final T context) {
+ checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
+ "reader must be at depth zero, it was at: %s", reader.getDepth());
+
+ while (reader.next() != null) {
+ matchCurrentValue(reader, context);
+ }
+ }
+
+ @Override
+ public void matchCurrentValue(final IonReader reader) {
+ matchCurrentValue(reader, null);
+ }
+
+ @Override
+ public void matchCurrentValue(final IonReader reader, final T context) {
+ checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
+ "reader must be at depth zero, it was at: %s", reader.getDepth());
+ checkArgument(reader.getType() != null,
+ "reader must be positioned at a value; call IonReader.next() first.");
+
+ matchRecursive(reader, rootMatcher, context, -1, reader.getDepth());
+ }
+
+ private int matchRecursive(
+ final IonReader reader,
+ final FsmMatcher matcher,
+ final T context,
+ final int position,
+ final int initialDepth) {
+ FsmMatcher child = matcher.transition(reader.getFieldName(), position, reader::getTypeAnnotations);
+ if (child == null) {
+ return 0;
+ }
+
+ if (child.callback != null) {
+ int stepOut = invokeCallback(reader, child.callback, initialDepth, context);
+ if (stepOut > 0) {
+ return stepOut;
+ }
+ }
+
+ if (IonType.isContainer(reader.getType()) && !child.terminal) {
+ reader.stepIn();
+ int childPos = 0;
+ while (reader.next() != null) {
+ int stepOut = matchRecursive(reader, child, context, childPos++, initialDepth);
+ if (stepOut > 0) {
+ reader.stepOut();
+ return stepOut - 1;
+ }
+ }
+ reader.stepOut();
+ }
+
+ return 0;
+ }
+
+ private int invokeCallback(
+ final IonReader reader,
+ final BiFunction callback,
+ final int initialReaderDepth,
+ final T context) {
+ int previousReaderDepth = reader.getDepth();
+
+ int stepOutTimes = callback.apply(reader, context);
+ int newReaderDepth = reader.getDepth();
+
+ checkState(previousReaderDepth == newReaderDepth,
+ "Reader must be at same depth when returning from callbacks. initial: %s, new: %s",
+ previousReaderDepth,
+ newReaderDepth);
+
+ // we don't allow users to step out the initial reader depth
+ int readerRelativeDepth = reader.getDepth() - initialReaderDepth;
+
+ checkState(stepOutTimes <= readerRelativeDepth,
+ STEP_OUT_TOO_FAR_MSG,
+ stepOutTimes,
+ readerRelativeDepth);
+
+ return stepOutTimes;
+ }
+
+ private static final String STEP_OUT_TOO_FAR_MSG =
+ "Callback return cannot be greater than the reader current relative depth. "
+ + "return: %s, relative reader depth: %s";
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java
index bd6b597..080269d 100644
--- a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java
+++ b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java
@@ -35,6 +35,7 @@ public final class PathExtractorBuilder {
private final List> searchPaths = new ArrayList<>();
private boolean matchRelativePaths;
private boolean matchCaseInsensitive;
+ private boolean matchFieldsCaseInsensitive;
private PathExtractorBuilder() {
}
@@ -48,19 +49,69 @@ public static PathExtractorBuilder standard() {
PathExtractorBuilder builder = new PathExtractorBuilder<>();
builder.matchCaseInsensitive = DEFAULT_CASE_INSENSITIVE;
builder.matchRelativePaths = DEFAULT_MATCH_RELATIVE_PATHS;
+ builder.matchFieldsCaseInsensitive = DEFAULT_CASE_INSENSITIVE;
return builder;
}
/**
* Instantiates a thread safe {@link PathExtractor} configured by this builder.
- *
+ * Attempts to build a "strict" PathExtractor which is much more performant, particularly for extractions with many
+ * field names. Falls back to the "legacy" implementation if the paths registered are incompatible with the "strict"
+ * implementation.
+ *
+ * Use buildStrict to ensure the more optimal implementation is used.
* @return new {@link PathExtractor} instance.
*/
public PathExtractor build() {
- return new PathExtractorImpl<>(searchPaths, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive));
+ try {
+ return buildStrict();
+ } catch (UnsupportedPathExpression e) {
+ return buildLegacy();
+ }
}
+ /**
+ * Instantiate a "stricter" and more optimized PathExtractor.
+ *
+ * Supports search paths where there is only one "variant" of step type from each parent step, and only one callback
+ * per state.
+ * Annotations matching is only supported on the root or wildcards.
+ * Case insensitivity is supported on field names, not annotations.
+ *
+ * Examples of supported paths (and any combination of the below):
+ * `A::()`
+ * `(foo bar)`
+ * `(foo qux)`
+ * `(spam 0)`
+ * `(spam 1)`
+ * `(quid * quo)`
+ * `(lorem A::B::* ipsum)`
+ *
+ * Examples of unsupported paths:
+ * `(a::foo)` annotations on field names not supported, yet.
+ * `(a::1)` annotations on index ordinals not supported, yet.
+ * `(foo bar) (foo 1) (foo *)` combination of field names, index ordinals or wildcards not supported.
+ * `a::() ()` combination of annotated and non-annotated root (or other wildcard) matching.
+ *
+ * @return new {@link PathExtractor} instance.
+ * @throws UnsupportedPathExpression if any search path or the paths combined, are not supported.
+ */
+ public PathExtractor buildStrict() {
+ return FsmPathExtractor.create(searchPaths,
+ new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive));
+ }
+
+ /**
+ * Instantiate a "legacy" PathExtractor implementation.
+ * The returned PathExtractor is inefficient when a large number of field names is searched,
+ * but a wider variety of search paths are supported.
+ */
+ public PathExtractor buildLegacy() {
+ return new PathExtractorImpl<>(searchPaths,
+ new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive));
+ }
+
/**
* Sets matchRelativePaths config. When true the path extractor will accept readers at any depth, when false the
* reader must be at depth zero.
@@ -78,8 +129,9 @@ public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativ
}
/**
- * Sets matchCaseInsensitive config. When true the path extractor will match fields ignoring case, when false the
- * path extractor will mach respecting the path components case.
+ * Sets matchCaseInsensitive config. When true the path extractor will match fields _and annotations_ ignoring case.
+ * When false the path extractor will match respecting the path components case.
+ * To set case insensitivity for _only field names_ use the `withMatchFieldNamesCaseInsensitive` builder.
*
*
* defaults to false.
@@ -89,6 +141,23 @@ public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativ
*/
public PathExtractorBuilder withMatchCaseInsensitive(final boolean matchCaseInsensitive) {
this.matchCaseInsensitive = matchCaseInsensitive;
+ this.matchFieldsCaseInsensitive = matchCaseInsensitive;
+
+ return this;
+ }
+
+ /**
+ * Sets matchFieldNamesCaseInsensitive config. When true the path extractor will match field names ignoring case.
+ * For example: 'Foo' will match 'foo'.
+ *
+ *
+ * defaults to false.
+ *
+ * @param matchCaseInsensitive new config value.
+ * @return builder for chaining.
+ */
+ public PathExtractorBuilder withMatchFieldNamesCaseInsensitive(final boolean matchCaseInsensitive) {
+ this.matchFieldsCaseInsensitive = matchCaseInsensitive;
return this;
}
diff --git a/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java b/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java
index f7284c1..9cac3c6 100644
--- a/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java
+++ b/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java
@@ -63,7 +63,7 @@ public void match(final IonReader reader) {
@Override
public void match(final IonReader reader, final T context) {
checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
- "reader must be at depth zero, it was at:" + reader.getDepth());
+ "reader must be at depth zero, it was at: " + reader.getDepth());
// short circuit when there are zero SearchPaths
if (searchPaths.isEmpty()) {
@@ -83,7 +83,7 @@ public void matchCurrentValue(final IonReader reader) {
@Override
public void matchCurrentValue(final IonReader reader, final T context) {
checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
- "reader must be at depth zero, it was at:" + reader.getDepth());
+ "reader must be at depth zero, it was at: " + reader.getDepth());
checkArgument(reader.getType() != null,
"reader must be positioned at a value; call IonReader.next() first.");
@@ -189,7 +189,7 @@ private boolean isTerminal(final int pathComponentIndex, final SearchPath sea
private static class Tracker {
private final Deque>> stack;
- private int initialReaderDepth;
+ private final int initialReaderDepth;
Tracker(final int size, final List> searchPaths, final int initialReaderDepth) {
stack = new ArrayDeque<>(size);
diff --git a/src/main/java/com/amazon/ionpathextraction/SearchPath.java b/src/main/java/com/amazon/ionpathextraction/SearchPath.java
index 5e57927..9f04198 100644
--- a/src/main/java/com/amazon/ionpathextraction/SearchPath.java
+++ b/src/main/java/com/amazon/ionpathextraction/SearchPath.java
@@ -17,6 +17,8 @@
import com.amazon.ionpathextraction.internal.Annotations;
import com.amazon.ionpathextraction.internal.MatchContext;
import com.amazon.ionpathextraction.pathcomponents.PathComponent;
+import com.amazon.ionpathextraction.pathcomponents.Wildcard;
+import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;
@@ -46,6 +48,19 @@ int size() {
return pathComponents.size();
}
+ /**
+ * Produces a "normalized" path for the SearchPath.
+ * Basically: the SearchPath has the annotations (or not) for matching top-level-values.
+ * The "normalized" path treats this as an explicit Wildcard step and adds it to the head
+ * of the PathComponents.
+ */
+ List getNormalizedPath() {
+ List normalizedPath = new ArrayList<>(pathComponents.size() + 1);
+ normalizedPath.add(new Wildcard(annotations));
+ normalizedPath.addAll(pathComponents);
+ return normalizedPath;
+ }
+
/**
* Callback to be invoked when the Search Path is matched.
*/
@@ -67,4 +82,4 @@ boolean partialMatchAt(final MatchContext context) {
return false;
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java b/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java
new file mode 100644
index 0000000..0806ce6
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+/**
+ * Thrown when trying to build a "strict" PathExtractor if a SearchPath or set of paths is not supported.
+ * A user should rewrite their extraction to match the strictness invariant or use the "legacy" PathExtractor.
+ */
+public class UnsupportedPathExpression extends RuntimeException {
+ public UnsupportedPathExpression(final String msg) {
+ super(msg);
+ }
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java b/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java
index df3e823..1ea737d 100644
--- a/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java
+++ b/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java
@@ -13,6 +13,7 @@
package com.amazon.ionpathextraction.internal;
+import java.util.Arrays;
import java.util.stream.IntStream;
/**
@@ -24,7 +25,9 @@
* Internal only. Not intended for application use.
*
*/
-public class Annotations {
+public final class Annotations {
+
+ public static final Annotations EMPTY = new Annotations(new String[] {});
final String[] rawAnnotations;
@@ -35,6 +38,14 @@ public Annotations(final String[] rawAnnotations) {
this.rawAnnotations = rawAnnotations;
}
+ public String[] getAnnotations() {
+ return rawAnnotations;
+ }
+
+ public boolean hasAnnotations() {
+ return rawAnnotations.length > 0;
+ }
+
/**
* returns true if it matches on the annotations provided.
*/
@@ -51,4 +62,20 @@ private static boolean arrayEquals(final String[] left, final String[] right, fi
return IntStream.range(0, left.length)
.allMatch(i -> ignoreCase ? left[i].equalsIgnoreCase(right[i]) : left[i].equals(right[i]));
}
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof Annotations)) {
+ return false;
+ }
+ return Arrays.equals(rawAnnotations, ((Annotations) o).rawAnnotations);
+ }
+
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(rawAnnotations);
+ }
}
diff --git a/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java b/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java
index df8951c..42e6bec 100644
--- a/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java
+++ b/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java
@@ -20,10 +20,18 @@ public final class PathExtractorConfig {
private final boolean matchRelativePaths;
private final boolean matchCaseInsensitive;
+ private final boolean matchFieldsCaseInsensitive;
- public PathExtractorConfig(final boolean matchRelativePaths, final boolean matchCaseInsensitive) {
+ /**
+ * Instantiate a PathExtractorConfig.
+ */
+ public PathExtractorConfig(
+ final boolean matchRelativePaths,
+ final boolean matchCaseInsensitive,
+ final boolean matchFieldsCaseInsensitive) {
this.matchRelativePaths = matchRelativePaths;
this.matchCaseInsensitive = matchCaseInsensitive;
+ this.matchFieldsCaseInsensitive = matchFieldsCaseInsensitive;
}
public boolean isMatchRelativePaths() {
@@ -33,4 +41,8 @@ public boolean isMatchRelativePaths() {
public boolean isMatchCaseInsensitive() {
return matchCaseInsensitive;
}
+
+ public boolean isMatchFieldsCaseInsensitive() {
+ return matchFieldsCaseInsensitive;
+ }
}
diff --git a/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java b/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java
index 4ff7594..c42ba3c 100644
--- a/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java
+++ b/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java
@@ -28,27 +28,61 @@ public class Preconditions {
/**
* Validates argument, fails if condition is not met.
+ * Prefer {@link #checkArgument(boolean isValid, String messageFormat, Object[] args) } over concatenating
+ * Strings at call-site.
*
* @param isValid if condition is met.
* @param message error message.
* @throws PathExtractionException if not valid.
*/
- public static void checkArgument(final Boolean isValid, final String message) {
+ public static void checkArgument(final boolean isValid, final String message) {
if (!isValid) {
throw new PathExtractionException(message);
}
}
+ /**
+ * Validates argument, fails if condition is not met.
+ * This overload only builds the error message if isValid is false.
+ *
+ * @param isValid if condition is met.
+ * @param messageFormat error message _format_.
+ * @param args arguments to String.format()
+ * @throws PathExtractionException if not valid.
+ */
+ public static void checkArgument(final boolean isValid, final String messageFormat, final Object... args) {
+ if (!isValid) {
+ throw new PathExtractionException(String.format(messageFormat, args));
+ }
+ }
+
/**
* Validates a state, fails if condition is not met.
+ * Prefer {@link #checkState(boolean isValid, String messageFormat, Object[] args) } over concatenating
+ * Strings at call-site.
*
* @param isValid if condition is met.
* @param message error message.
* @throws PathExtractionException if not valid.
*/
- public static void checkState(final Boolean isValid, final String message) {
+ public static void checkState(final boolean isValid, final String message) {
if (!isValid) {
throw new PathExtractionException(message);
}
}
+
+ /**
+ * Validates a state, fails if condition is not met.
+ * This overload only builds the error message if isValid is false.
+ *
+ * @param isValid if condition is met.
+ * @param messageFormat error message _format_.
+ * @param args arguments to String.format()
+ * @throws PathExtractionException if not valid.
+ */
+ public static void checkState(final boolean isValid, final String messageFormat, final Object... args) {
+ if (!isValid) {
+ throw new PathExtractionException(String.format(messageFormat, args));
+ }
+ }
}
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java
index 21c5d09..5e03fee 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java
@@ -41,6 +41,10 @@ public Index(final int ordinal, final String[] annotations) {
this.ordinal = ordinal;
}
+ public Integer getOrdinal() {
+ return ordinal;
+ }
+
@Override
public boolean innerMatches(final MatchContext context) {
return ordinal == context.getReaderContainerIndex();
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java
index 3f88a28..bef9cbb 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java
@@ -29,7 +29,7 @@
*/
public abstract class PathComponent {
- private final Annotations annotations;
+ protected final Annotations annotations;
PathComponent(final Annotations annotations) {
checkArgument(annotations != null, "fieldName cannot be null");
@@ -37,6 +37,14 @@ public abstract class PathComponent {
this.annotations = annotations;
}
+ public Annotations getAnnotations() {
+ return annotations;
+ }
+
+ public boolean hasAnnotations() {
+ return annotations.hasAnnotations();
+ }
+
/**
* Checks if this component matches the current reader position with the given configuration.
*
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java
index e597af5..76d6e92 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java
@@ -46,6 +46,10 @@ public Text(final String fieldName, final String[] annotations) {
this.fieldName = fieldName;
}
+ public String getFieldName() {
+ return fieldName;
+ }
+
@Override
public boolean innerMatches(final MatchContext context) {
final IonReader reader = context.getReader();
@@ -53,7 +57,7 @@ public boolean innerMatches(final MatchContext context) {
return false;
}
- return context.getConfig().isMatchCaseInsensitive()
+ return context.getConfig().isMatchFieldsCaseInsensitive()
? fieldName.equalsIgnoreCase(reader.getFieldName())
: fieldName.equals(reader.getFieldName());
}
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java
index c5d0c0a..63525c4 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java
@@ -32,7 +32,11 @@ public final class Wildcard extends PathComponent {
public static final String TEXT = "*";
public Wildcard(final String[] annotations) {
- super(new Annotations(annotations));
+ this(new Annotations(annotations));
+ }
+
+ public Wildcard(final Annotations annotations) {
+ super(annotations);
}
@Override
diff --git a/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt
new file mode 100644
index 0000000..41deb22
--- /dev/null
+++ b/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt
@@ -0,0 +1,33 @@
+package com.amazon.ionpathextraction
+
+import org.junit.jupiter.api.assertThrows
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.MethodSource
+
+class FsmPathExtractorTest : PathExtractorTest() {
+ override fun PathExtractorBuilder.buildExtractor(): PathExtractor = buildStrict()
+
+ @ParameterizedTest
+ @MethodSource("testCases")
+ override fun testSearchPaths(testCase: Companion.TestCase) {
+ if (testCase.legacyOnly) {
+ assertThrows {
+ super.testSearchPaths(testCase)
+ }
+ } else {
+ super.testSearchPaths(testCase)
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource("testCases")
+ override fun testSearchPathsMatchCurrentValue(testCase: Companion.TestCase) {
+ if (testCase.legacyOnly) {
+ assertThrows {
+ super.testSearchPaths(testCase)
+ }
+ } else {
+ super.testSearchPaths(testCase)
+ }
+ }
+}
diff --git a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt
new file mode 100644
index 0000000..7bb66d0
--- /dev/null
+++ b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt
@@ -0,0 +1,5 @@
+package com.amazon.ionpathextraction
+
+class PathExtractorImplTest : PathExtractorTest() {
+ override fun PathExtractorBuilder.buildExtractor(): PathExtractor = buildLegacy()
+}
diff --git a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt
index 6394e05..a847019 100644
--- a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt
+++ b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt
@@ -31,7 +31,7 @@ import java.io.File
import java.util.stream.Stream
import kotlin.test.assertTrue
-class PathExtractorTest {
+abstract class PathExtractorTest {
companion object {
private val ION = IonSystemBuilder.standard().build()
@@ -39,11 +39,15 @@ class PathExtractorTest {
val data: String,
val expected: IonList,
val stepOutNumber: Int,
- val hasMultipleTopLevelValues: Boolean) {
+ val hasMultipleTopLevelValues: Boolean,
+ val legacyOnly: Boolean = false,
+ val caseInsensitive: String = "None") {
override fun toString(): String = "SearchPaths=$searchPaths, " +
"Data=$data, " +
"Expected=$expected, " +
- "StepOutN=$stepOutNumber"
+ "StepOutN=$stepOutNumber" +
+ "Legacy=$legacyOnly" +
+ "CaseInsensitive=$caseInsensitive"
}
private fun IonValue.toText(): String {
@@ -79,7 +83,9 @@ class PathExtractorTest {
struct["data"].toText(),
struct["expected"] as IonList,
struct["stepOutN"]?.let { (it as IonInt).intValue() } ?: 0,
- struct["data"].hasTypeAnnotation("${'$'}datagram")
+ struct["data"].hasTypeAnnotation("${'$'}datagram"),
+ struct.hasTypeAnnotation("legacy"),
+ struct.get("caseInsensitive")?.toText() ?: "None"
)
}.stream()
@@ -100,6 +106,8 @@ class PathExtractorTest {
}
}
+ abstract fun PathExtractorBuilder.buildExtractor(): PathExtractor
+
private val emptyCallback: (IonReader) -> Int = { 0 }
private fun collectToIonList(stepOutN: Int): (IonReader, IonList) -> Int = { reader, out ->
@@ -109,12 +117,18 @@ class PathExtractorTest {
@ParameterizedTest
@MethodSource("testCases")
- fun testSearchPaths(testCase: TestCase) {
+ open fun testSearchPaths(testCase: TestCase) {
val builder = PathExtractorBuilder.standard()
testCase.searchPaths.forEach { builder.withSearchPath(it, collectToIonList(testCase.stepOutNumber)) }
- val extractor = builder.build()
+ when (testCase.caseInsensitive) {
+ "Both" -> builder.withMatchCaseInsensitive(true)
+ "Fields" -> builder.withMatchFieldNamesCaseInsensitive(true)
+ "None" -> Unit
+ else -> throw IllegalArgumentException("Unexpected value for caseInsensitive: ${testCase.caseInsensitive}")
+ }
+ val extractor = builder.buildExtractor()
val out = ION.newEmptyList()
extractor.match(ION.newReader(testCase.data), out)
@@ -124,7 +138,7 @@ class PathExtractorTest {
@ParameterizedTest
@MethodSource("testCases")
- fun testSearchPathsMatchCurrentValue(testCase: TestCase) {
+ open fun testSearchPathsMatchCurrentValue(testCase: TestCase) {
if (testCase.hasMultipleTopLevelValues) {
// For simplicity, skip tests with multiple top-level values. This will be tested via other test methods.
return
@@ -132,7 +146,7 @@ class PathExtractorTest {
val builder = PathExtractorBuilder.standard()
testCase.searchPaths.forEach { builder.withSearchPath(it, collectToIonList(testCase.stepOutNumber)) }
- val extractor = builder.build()
+ val extractor = builder.buildExtractor()
val out = ION.newEmptyList()
val reader = ION.newReader(testCase.data)
@@ -159,7 +173,7 @@ class PathExtractorTest {
timesCallback2Called++
0
}
- .build()
+ .buildExtractor()
api.match(extractor, ION.newReader("{ bar: 1, bar: 2, foo: 3 }"))
@@ -173,11 +187,11 @@ class PathExtractorTest {
fun matchCurrentValueOnlyMatchesCurrentValue() {
val extractor1 = PathExtractorBuilder.standard()
.withSearchPath("(foo)", collectToIonList(0))
- .build()
+ .buildExtractor()
val extractor2 = PathExtractorBuilder.standard()
.withSearchPath("(*)", collectToIonList(1))
.withMatchRelativePaths(true)
- .build()
+ .buildExtractor()
val reader = ION.newReader("{foo: 123, foo: [456]} {bar: [42, 43, 44]} end")
val out = ION.newEmptyList()
@@ -201,7 +215,7 @@ class PathExtractorTest {
fun matchCurrentValueWhenNotPositionedOnValueFails() {
val extractor = PathExtractorBuilder.standard()
.withSearchPath("(foo)") { _ -> 0 }
- .build()
+ .buildExtractor()
val reader = ION.newReader("[{foo: 1}]")
val exception = assertThrows { extractor.matchCurrentValue(reader) }
@@ -213,14 +227,14 @@ class PathExtractorTest {
fun readerAtInvalidDepth(api: API) {
val extractor = PathExtractorBuilder.standard()
.withSearchPath("(foo)") { _ -> 0 }
- .build()
+ .buildExtractor()
val reader = ION.newReader("[{foo: 1}]")
assertTrue(reader.next() != null)
reader.stepIn()
val exception = assertThrows { api.match(extractor, reader) }
- assertEquals("reader must be at depth zero, it was at:1", exception.message)
+ assertEquals("reader must be at depth zero, it was at: 1", exception.message)
}
@ParameterizedTest
@@ -229,7 +243,7 @@ class PathExtractorTest {
val extractor = PathExtractorBuilder.standard()
.withMatchRelativePaths(true)
.withSearchPath("(foo)", collectToIonList(0))
- .build()
+ .buildExtractor()
val reader = ION.newReader("[{foo: 1}]")
assertTrue(reader.next() != null)
@@ -241,30 +255,12 @@ class PathExtractorTest {
assertEquals(ION.singleValue("[1]"), out)
}
- @ParameterizedTest
- @EnumSource(API::class)
- fun caseInsensitive(api: API) {
- val extractor = PathExtractorBuilder.standard()
- .withMatchCaseInsensitive(true)
- .withSearchPath("(foo)", collectToIonList(0))
- .build()
-
- val out = ION.newEmptyList()
- api.match(extractor, ION.newReader("{FOO: 1, foO: 2}{foo: 3}{fOo: 4}{bar: 5}"), out)
-
- if (api == API.MATCH_CURRENT_VALUE) {
- assertEquals(ION.singleValue("[1,2]"), out)
- } else {
- assertEquals(ION.singleValue("[1,2,3,4]"), out)
- }
- }
-
@ParameterizedTest
@EnumSource(API::class)
fun stepOutMoreThanPermitted(api: API) {
val extractor = PathExtractorBuilder.standard()
.withSearchPath("(foo)") { _ -> 200 }
- .build()
+ .buildExtractor()
val exception = assertThrows {
api.match(extractor, ION.newReader("{foo: 1}"))
@@ -281,7 +277,7 @@ class PathExtractorTest {
.withMatchRelativePaths(true)
// even though you could step out twice in reader you can't given the initial reader depth
.withSearchPath("(bar)") { _ -> 2 }
- .build()
+ .buildExtractor()
val newReader = ION.newReader("{foo: {bar: 1}}")
newReader.next()
@@ -314,7 +310,7 @@ class PathExtractorTest {
0
}
}
- }.build()
+ }.buildExtractor()
api.match(extractor, ION.newReader("{foo: {bar: 1}}"))
@@ -390,7 +386,7 @@ class PathExtractorTest {
val actualData = ION.newValue(ionReader1)
assertEquals(value["col2"], actualData)
0
- }.withSearchPath("(col1)") { _ -> 0 }.build()
+ }.withSearchPath("(col1)") { _ -> 0 }.buildExtractor()
extractor.match(ionReader)
}
}
\ No newline at end of file
diff --git a/src/test/resources/test-cases.ion b/src/test/resources/test-cases.ion
index 2409739..459c3ce 100644
--- a/src/test/resources/test-cases.ion
+++ b/src/test/resources/test-cases.ion
@@ -238,13 +238,13 @@ search paths
// Multiple search paths -----------------------------------------------------------------
// all match
-{ searchPaths: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] }
+legacy::{ searchPaths: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] }
// none match
-{ searchPaths: [(1), [foo]], data: [0], expected: [] }
+legacy::{ searchPaths: [(1), [foo]], data: [0], expected: [] }
// multiple matchers match the same value
-{ searchPaths: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] }
+legacy::{ searchPaths: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] }
{ searchPaths: [(foo 1), (foo 2)], data: {foo: [0, 1, 2]}, expected: [1, 2] }
@@ -264,7 +264,9 @@ search paths
{ searchPath: (A::'*'), data: [A::1, 2], expected: [A::1] }
{ searchPath: ('$ion_extractor_field'::*), data: {'*': A::1, foo: 2}, expected: [A::1]}
{ searchPath: (A::B::C::*), data: [A::B::C::1, B::A::C::2], expected: [A::B::C::1] }
-{
+legacy::{
+ // annotations on ordinals or fields _could_ be supported in the FSM impl
+ // but usage appears non-existent at time of writing (Sep 2024)
searchPath: (foo A::2 bar),
data: {
foo: [0, 1, A::{bar: 1}],
@@ -272,3 +274,43 @@ search paths
},
expected: [1]
}
+legacy::{
+ searchPath: (f::foo),
+ data: { foo: f::17, foo: F::31, Foo: f::51, Foo: F::67 },
+ expected: [f::17],
+ // default is None but explicit here as it is essential to the test case
+ caseInsensitive: None
+}
+
+// case insensitivity
+{
+ searchPath: (foo),
+ data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{fOo: 4},{bar: 5}],
+ expected: [1, 2, 3, 4],
+ caseInsensitive: Both
+}
+legacy::{
+ // these resolve to the same path for the Fsm Impl
+ searchPaths: [(foo), (Foo)],
+ data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{Foo: 4},{bar: 5}],
+ expected: [1, 1, 2, 2, 3, 3, 4, 4],
+ caseInsensitive: Both
+}
+{
+ searchPath: (foo),
+ data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{Foo: 4},{bar: 5}],
+ expected: [1, 2, 3, 4],
+ caseInsensitive: Fields
+}
+legacy::{
+ searchPath: F::(),
+ data: $datagram::[F::17, g::31, f::51],
+ expected: [F::17, f::51],
+ caseInsensitive: Both
+}
+{
+ searchPath: F::(),
+ data: $datagram::[F::17, g::31, f::51],
+ expected: [F::17],
+ caseInsensitive: Fields
+}