diff --git a/README.md b/README.md
index beb058c..cd03d36 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,8 @@ data on reader: {foo: ["foo1", "foo2"] , bar: "myBarValue", bar: A::"annotatedVa
(*) - matches ["foo1", "foo2"], "myBarValue" and A::"annotatedValue"
() - matches {foo: ["foo1", "foo2"] , bar: "myBarValue", bar: A::"annotatedValue"}
(bar) - matches "myBarValue" and A::"annotatedValue"
-(A::bar) - matches A::"annotatedValue"
+(A::*) - matches A::"annotatedValue"
+(A::bar) - matches A::"annotatedValue" (is not supported in "strict" mode, see #Optimization below)
```
The `()` matcher matches all values in the stream but you can also use annotations with it, example:
@@ -63,6 +64,17 @@ PathExtractorBuilder.standard()
see PathExtractorBuilder [javadoc](https://static.javadoc.io/com.amazon.ion/ion-java-path-extraction/1.0.1/com/amazon/ionpathextraction/PathExtractorBuilder.html) for more information on configuration options and search path registration.
+### Optimization
+
+There are two implementations: "strict" and "legacy". The strict implementation is more performant, particularly as the
+number of fields extracted grows. By default `PathExtractorBuilder.build()` will try to build you a strict extractor and
+will fall back to the legacy extractor. You may be explicit that you want a specific implementation by calling
+`PathExtractorBuilder.buildStrict()` or `PathExtractorBuilder.buildLegacy()`.
+
+The strict implementation supports basic paths, with field names, index ordinals, and annotations on top-level-values or
+wildcards. It does not support mixing field names and index ordinals, multiple callbacks on the same path or annotations
+on non-wildcard values. Case-insensitive annotations matching is not supported.
+
### Notification
Each time the `PathExtractor` encounters a value that matches a registered search path it will invoke the respective
callback passing the reader positioned at the current value. See `PathExtractorBuilder#withSearchPath` methods for more
@@ -165,6 +177,7 @@ binary file is ~81M and the text file ~95M. There are four benchmarks types:
1. `partial`: materializes a single struct fields as `IonValue` using a path extractor.a
1. `partialNoDom`: access the java representation directly of a single struct field without materializing an `IonValue`.
+All the path extractor benchmarks are run in "strict" mode.
There is a binary and a text version for all four benchmark types. See the [PathExtractorBenchmark](https://github.com/amzn/ion-java-path-extraction/blob/master/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java) class for
more details.
@@ -173,14 +186,14 @@ Results below, higher is better.
```
Benchmark Mode Cnt Score Error Units
-PathExtractorBenchmark.domBinary thrpt 10 1.128 ± 0.050 ops/s
-PathExtractorBenchmark.domText thrpt 10 0.601 ± 0.019 ops/s
-PathExtractorBenchmark.fullBinary thrpt 10 1.227 ± 0.014 ops/s
-PathExtractorBenchmark.fullText thrpt 10 0.665 ± 0.010 ops/s
-PathExtractorBenchmark.partialBinary thrpt 10 14.912 ± 0.271 ops/s
-PathExtractorBenchmark.partialBinaryNoDom thrpt 10 15.650 ± 0.297 ops/s
-PathExtractorBenchmark.partialText thrpt 10 1.343 ± 0.029 ops/s
-PathExtractorBenchmark.partialTextNoDom thrpt 10 1.307 ± 0.015 ops/s
+PathExtractorBenchmark.domBinary thrpt 5 5.060 ± 0.075 ops/s
+PathExtractorBenchmark.domText thrpt 5 1.172 ± 0.040 ops/s
+PathExtractorBenchmark.fullBinary thrpt 5 6.011 ± 0.107 ops/s
+PathExtractorBenchmark.fullText thrpt 5 1.214 ± 0.025 ops/s
+PathExtractorBenchmark.partialBinary thrpt 5 57.329 ± 13.585 ops/s
+PathExtractorBenchmark.partialBinaryNoDom thrpt 5 56.598 ± 2.424 ops/s
+PathExtractorBenchmark.partialText thrpt 5 2.430 ± 0.073 ops/s
+PathExtractorBenchmark.partialTextNoDom thrpt 5 2.416 ± 0.175 ops/s
```
Using the path extractor has equivalent performance for both text and binary when fully materializing the document and
diff --git a/build.gradle b/build.gradle
index b91801a..e0ecb5e 100644
--- a/build.gradle
+++ b/build.gradle
@@ -83,10 +83,10 @@ jmh {
failOnError = true
// warmup
- warmupIterations = 5
+ warmupIterations = 2
// iterations
- iterations = 10
+ iterations = 5
}
checkstyle {
diff --git a/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java b/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java
index 9eff26a..3d4eb9b 100644
--- a/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java
+++ b/src/jmh/java/com/amazon/ionpathextraction/benchmarks/PathExtractorBenchmark.java
@@ -15,6 +15,7 @@
import com.amazon.ion.IonReader;
import com.amazon.ion.IonSystem;
+import com.amazon.ion.IonValue;
import com.amazon.ion.IonWriter;
import com.amazon.ion.system.IonBinaryWriterBuilder;
import com.amazon.ion.system.IonReaderBuilder;
@@ -28,6 +29,7 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
+import java.util.Iterator;
import java.util.function.Function;
import java.util.stream.Stream;
import org.openjdk.jmh.annotations.Benchmark;
@@ -171,7 +173,14 @@ public Object partialTextNoDom(final ThreadState threadState) {
*/
@Benchmark
public Object domBinary() {
- return DOM_FACTORY.getLoader().load(bytesBinary);
+ IonReader reader = newReader(new ByteArrayInputStream(bytesBinary));
+ // iterating over Top-Level-Values is more apples:apples to path extractor
+ // vs loading all as a datagram
+ Iterator iter = DOM_FACTORY.iterate(reader);
+ while (iter.hasNext()) {
+ iter.next();
+ }
+ return reader;
}
/**
@@ -179,7 +188,14 @@ public Object domBinary() {
*/
@Benchmark
public Object domText() {
- return DOM_FACTORY.getLoader().load(bytesText);
+ IonReader reader = newReader(new ByteArrayInputStream(bytesText));
+ // iterating over Top-Level-Values is more apples:apples to path extractor
+ // vs loading all as a datagram
+ Iterator iter = DOM_FACTORY.iterate(reader);
+ while (iter.hasNext()) {
+ iter.next();
+ }
+ return reader;
}
/**
diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java
new file mode 100644
index 0000000..30d2650
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcher.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+import com.amazon.ion.IonReader;
+import java.util.function.BiFunction;
+import java.util.function.Supplier;
+
+/**
+ * Base class for match states in the Finite State Machine matching implementation.
+ */
+abstract class FsmMatcher {
+ /**
+ * Callback for match state. May be null.
+ */
+ BiFunction callback;
+
+ /**
+ * Indicates there are no possible child transitions.
+ */
+ boolean terminal = false;
+
+ /**
+ * Return the child matcher for the given reader context.
+ * Return null if there is no match.
+ *
+ * @param position will be -1 for top-level-values, otherwise will be the position ordinal
+ * of the value in the container, both for sequences and structs.
+ * @param fieldName will be non-null only for struct values.
+ */
+ abstract FsmMatcher transition(String fieldName, int position, Supplier annotations);
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java
new file mode 100644
index 0000000..030dbfb
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/FsmMatcherBuilder.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+import com.amazon.ion.IonReader;
+import com.amazon.ionpathextraction.internal.Annotations;
+import com.amazon.ionpathextraction.pathcomponents.Index;
+import com.amazon.ionpathextraction.pathcomponents.PathComponent;
+import com.amazon.ionpathextraction.pathcomponents.Text;
+import com.amazon.ionpathextraction.pathcomponents.Wildcard;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/**
+ * Builds a root FsmMatcher for a set of SearchPaths.
+ *
+ * One key principle in the implementation is to close over as much branching as possible at build time.
+ * For example: for a case-insensitive field lookup, lower case the field names once, at build time.
+ *
+ * The second key principle is that there should be at-most-one Matcher state for a given reader context.
+ * So any combination of different paths which could both be active for the same reader context are disallowed.
+ * For example: allowing a mix of field names and ordinal positions for a given sub-path.
+ *
+ * Beyond that, there are some usage patterns which could be included, such as annotations filtering on
+ * field names or ordinals, but for which there was no observed usage.
+ */
+class FsmMatcherBuilder {
+ private final PathTreeNode root = new PathTreeNode();
+ private final boolean caseInsensitiveAll;
+ private final boolean caseInsensitiveFields;
+
+ FsmMatcherBuilder(final boolean caseInsensitiveAll, final boolean caseInsensitiveFields) {
+ this.caseInsensitiveAll = caseInsensitiveAll;
+ this.caseInsensitiveFields = caseInsensitiveFields;
+ }
+
+ /**
+ * Incorporate the searchPath into the matcher tree to be built.
+ *
+ * @throws UnsupportedPathExpression if the SearchPath is not supported.
+ */
+ void accept(final SearchPath searchPath) {
+ List steps = searchPath.getNormalizedPath();
+ PathTreeNode currentNode = root;
+ for (PathComponent step : steps) {
+ currentNode = currentNode.acceptStep(step);
+ }
+ currentNode.setCallback(searchPath.getCallback());
+ }
+
+ /**
+ * Build the FsmMatcher for the set of paths.
+ *
+ * @throws UnsupportedPathExpression if the combination of SearchPaths is not supported.
+ */
+ FsmMatcher build() {
+ return root.buildMatcher();
+ }
+
+ /**
+ * Mutable builder node to model the path tree before building into a FsmMatcher.
+ */
+ private class PathTreeNode {
+ BiFunction callback;
+ PathTreeNode wildcard;
+ Map annotatedSplats = new HashMap<>();
+ Map fields = new HashMap<>();
+ Map indexes = new HashMap<>();
+
+ /**
+ * Find or create a new PathTreeNode for the child step.
+ *
+ * @return the new or existing node.
+ * @throws UnsupportedPathExpression if the step contains path components that are not supported
+ */
+ private PathTreeNode acceptStep(final PathComponent step) {
+ if (step.hasAnnotations() && caseInsensitiveAll) {
+ throw new UnsupportedPathExpression(
+ "Case Insensitive Matching of Annotations is not yet supported by this matcher.\n"
+ + "Use the legacy matcher or the withMatchFieldNamesCaseInsensitive option instead.");
+ }
+
+ PathTreeNode child;
+ if (step instanceof Wildcard) {
+ if (step.hasAnnotations()) {
+ child = annotatedSplats.computeIfAbsent(step.getAnnotations(), a -> new PathTreeNode());
+ } else {
+ if (wildcard == null) {
+ wildcard = new PathTreeNode();
+ }
+ child = wildcard;
+ }
+ } else {
+ if (step.hasAnnotations()) {
+ // this is not too bad to do, but it takes care to do without impacting the non-annotated case
+ // which is the majority of usage. one would also want to mind the principle to avoid multiple
+ // distinct match paths for a given reader context and only allow either annotated or not
+ // for a given field name or index ordinal.
+ throw new UnsupportedPathExpression("Annotations are only supported on wildcards!");
+ }
+
+ if (step instanceof Text) {
+ String fieldName = caseInsensitiveFields
+ ? ((Text) step).getFieldName().toLowerCase()
+ : ((Text) step).getFieldName();
+ child = fields.computeIfAbsent(fieldName, f -> new PathTreeNode());
+ } else if (step instanceof Index) {
+ child = indexes.computeIfAbsent(((Index) step).getOrdinal(), i -> new PathTreeNode());
+ } else {
+ throw new IllegalArgumentException("step of unknown runtime type: " + step.getClass());
+ }
+ }
+ return child;
+ }
+
+ private void setCallback(final BiFunction callback) {
+ if (this.callback == null) {
+ this.callback = callback;
+ } else {
+ // this would actually be pretty simple to do: just create a ComposedCallback of BiFunctions.
+ throw new UnsupportedPathExpression("Cannot set multiple callbacks for same path!");
+ }
+ }
+
+ private FsmMatcher buildMatcher() {
+ List> matchers = new ArrayList<>();
+ if (wildcard != null) {
+ matchers.add(new SplatMatcher<>(wildcard.buildMatcher(), callback));
+ }
+ if (!annotatedSplats.isEmpty()) {
+ List> children = new ArrayList<>(annotatedSplats.size());
+ List annotations = new ArrayList<>(annotatedSplats.size());
+ for (Map.Entry entry : annotatedSplats.entrySet()) {
+ children.add(entry.getValue().buildMatcher());
+ annotations.add(entry.getKey().getAnnotations());
+ }
+ matchers.add(new AnnotationsMatcher<>(annotations, children));
+ }
+ if (!fields.isEmpty()) {
+ Map> children = fields.entrySet().stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher()));
+ FsmMatcher fieldMatcher = caseInsensitiveFields
+ ? new CaseInsensitiveFieldMatcher<>(children, callback)
+ : new FieldMatcher<>(children, callback);
+ matchers.add(fieldMatcher);
+ }
+ if (!indexes.isEmpty()) {
+ Map> children = indexes.entrySet().stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().buildMatcher()));
+ matchers.add(new IndexMatcher<>(children, callback));
+ }
+
+ if (matchers.isEmpty()) {
+ return new TerminalMatcher<>(callback);
+ } else if (matchers.size() == 1) {
+ return matchers.get(0);
+ } else {
+ // the main issue with allowing more than one is that it means that any given match context
+ // may produce multiple matches, and search path writers may become reliant on the order
+ // in which callbacks for such cases are called. And in the general case, that might mean
+ // a crazy mix between the different types of matching, which devolves to the for-each loop
+ // we see in the PathExtractorImpl.
+ // That seems like a lot of complexity for a usage pattern of questionable value.
+ // So if you're reading this, and you think "oh this is a silly restriction", then take
+ // the time to understand why it's important to the path writer and reconsider accordingly.
+ throw new UnsupportedPathExpression(
+ "Only one variant of wildcard, annotated wildcard, field names, or ordinals is supported!");
+ }
+ }
+ }
+
+ private static class SplatMatcher extends FsmMatcher {
+ FsmMatcher child;
+
+ SplatMatcher(
+ final FsmMatcher child,
+ final BiFunction callback) {
+ this.child = child;
+ this.callback = callback;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return child;
+ }
+ }
+
+ private static class FieldMatcher extends FsmMatcher {
+ Map> fields;
+
+ FieldMatcher(
+ final Map> fields,
+ final BiFunction callback) {
+ this.fields = fields;
+ this.callback = callback;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return fields.get(fieldName);
+ }
+ }
+
+ private static class CaseInsensitiveFieldMatcher extends FieldMatcher {
+ CaseInsensitiveFieldMatcher(
+ final Map> fields,
+ final BiFunction callback) {
+ super(fields, callback);
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return fields.get(fieldName.toLowerCase());
+ }
+ }
+
+ private static class IndexMatcher extends FsmMatcher {
+ Map> indexes;
+
+ IndexMatcher(
+ final Map> indexes,
+ final BiFunction callback) {
+ this.indexes = indexes;
+ this.callback = callback;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return indexes.get(position);
+ }
+ }
+
+ private static class TerminalMatcher extends FsmMatcher {
+ TerminalMatcher(final BiFunction callback) {
+ this.callback = callback;
+ this.terminal = true;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ return null;
+ }
+ }
+
+ private static class AnnotationsMatcher extends FsmMatcher {
+ List candidates;
+ List> matchers;
+
+ AnnotationsMatcher(final List candidates, final List> matchers) {
+ this.candidates = candidates;
+ this.matchers = matchers;
+ }
+
+ @Override
+ FsmMatcher transition(final String fieldName, final int position, final Supplier annotations) {
+ for (int i = 0; i < candidates.size(); i++) {
+ if (Arrays.equals(candidates.get(i), annotations.get())) {
+ return matchers.get(i);
+ }
+ }
+ return null;
+ }
+ }
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java
new file mode 100644
index 0000000..6038a4d
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/FsmPathExtractor.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+import static com.amazon.ionpathextraction.internal.Preconditions.checkArgument;
+import static com.amazon.ionpathextraction.internal.Preconditions.checkState;
+
+import com.amazon.ion.IonReader;
+import com.amazon.ion.IonType;
+import com.amazon.ionpathextraction.internal.PathExtractorConfig;
+import java.util.List;
+import java.util.function.BiFunction;
+
+/**
+ * A PathExtractor modeled as a Finite State Machine.
+ *
+ * Compared to the PathExtractorImpl, this supports a narrower set of
+ * SearchPaths and their combinations, but is more performant, particularly
+ * when a large number of field names are searched for.
+ *
+ * A more comprehensive explanation of the strictness from a user PoV can be
+ * found on the PathExtractorBuilder.buildStrict() API method. Notes on the
+ * 'why' can be found in the FsmMatcherBuilder.
+ */
+class FsmPathExtractor implements PathExtractor {
+ private final FsmMatcher rootMatcher;
+ private final PathExtractorConfig config;
+
+ private FsmPathExtractor(
+ final FsmMatcher rootMatcher,
+ final PathExtractorConfig config) {
+ this.rootMatcher = rootMatcher;
+ this.config = config;
+ }
+
+ static FsmPathExtractor create(
+ final List> searchPaths,
+ final PathExtractorConfig config) {
+ FsmMatcherBuilder builder = new FsmMatcherBuilder<>(
+ config.isMatchCaseInsensitive(),
+ config.isMatchFieldsCaseInsensitive());
+ for (SearchPath path : searchPaths) {
+ builder.accept(path);
+ }
+
+ return new FsmPathExtractor<>(builder.build(), config);
+ }
+
+ @Override
+ public void match(final IonReader reader) {
+ match(reader, null);
+ }
+
+ @Override
+ public void match(final IonReader reader, final T context) {
+ checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
+ "reader must be at depth zero, it was at: %s", reader.getDepth());
+
+ while (reader.next() != null) {
+ matchCurrentValue(reader, context);
+ }
+ }
+
+ @Override
+ public void matchCurrentValue(final IonReader reader) {
+ matchCurrentValue(reader, null);
+ }
+
+ @Override
+ public void matchCurrentValue(final IonReader reader, final T context) {
+ checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
+ "reader must be at depth zero, it was at: %s", reader.getDepth());
+ checkArgument(reader.getType() != null,
+ "reader must be positioned at a value; call IonReader.next() first.");
+
+ matchRecursive(reader, rootMatcher, context, -1, reader.getDepth());
+ }
+
+ private int matchRecursive(
+ final IonReader reader,
+ final FsmMatcher matcher,
+ final T context,
+ final int position,
+ final int initialDepth) {
+ FsmMatcher child = matcher.transition(reader.getFieldName(), position, reader::getTypeAnnotations);
+ if (child == null) {
+ return 0;
+ }
+
+ if (child.callback != null) {
+ int stepOut = invokeCallback(reader, child.callback, initialDepth, context);
+ if (stepOut > 0) {
+ return stepOut;
+ }
+ }
+
+ if (IonType.isContainer(reader.getType()) && !child.terminal) {
+ reader.stepIn();
+ int childPos = 0;
+ while (reader.next() != null) {
+ int stepOut = matchRecursive(reader, child, context, childPos++, initialDepth);
+ if (stepOut > 0) {
+ reader.stepOut();
+ return stepOut - 1;
+ }
+ }
+ reader.stepOut();
+ }
+
+ return 0;
+ }
+
+ private int invokeCallback(
+ final IonReader reader,
+ final BiFunction callback,
+ final int initialReaderDepth,
+ final T context) {
+ int previousReaderDepth = reader.getDepth();
+
+ int stepOutTimes = callback.apply(reader, context);
+ int newReaderDepth = reader.getDepth();
+
+ checkState(previousReaderDepth == newReaderDepth,
+ "Reader must be at same depth when returning from callbacks. initial: %s, new: %s",
+ previousReaderDepth,
+ newReaderDepth);
+
+ // we don't allow users to step out the initial reader depth
+ int readerRelativeDepth = reader.getDepth() - initialReaderDepth;
+
+ checkState(stepOutTimes <= readerRelativeDepth,
+ STEP_OUT_TOO_FAR_MSG,
+ stepOutTimes,
+ readerRelativeDepth);
+
+ return stepOutTimes;
+ }
+
+ private static final String STEP_OUT_TOO_FAR_MSG =
+ "Callback return cannot be greater than the reader current relative depth. "
+ + "return: %s, relative reader depth: %s";
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java
index bd6b597..080269d 100644
--- a/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java
+++ b/src/main/java/com/amazon/ionpathextraction/PathExtractorBuilder.java
@@ -35,6 +35,7 @@ public final class PathExtractorBuilder {
private final List> searchPaths = new ArrayList<>();
private boolean matchRelativePaths;
private boolean matchCaseInsensitive;
+ private boolean matchFieldsCaseInsensitive;
private PathExtractorBuilder() {
}
@@ -48,19 +49,69 @@ public static PathExtractorBuilder standard() {
PathExtractorBuilder builder = new PathExtractorBuilder<>();
builder.matchCaseInsensitive = DEFAULT_CASE_INSENSITIVE;
builder.matchRelativePaths = DEFAULT_MATCH_RELATIVE_PATHS;
+ builder.matchFieldsCaseInsensitive = DEFAULT_CASE_INSENSITIVE;
return builder;
}
/**
* Instantiates a thread safe {@link PathExtractor} configured by this builder.
- *
+ * Attempts to build a "strict" PathExtractor which is much more performant, particularly for extractions with many
+ * field names. Falls back to the "legacy" implementation if the paths registered are incompatible with the "strict"
+ * implementation.
+ *
+ * Use buildStrict to ensure the more optimal implementation is used.
* @return new {@link PathExtractor} instance.
*/
public PathExtractor build() {
- return new PathExtractorImpl<>(searchPaths, new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive));
+ try {
+ return buildStrict();
+ } catch (UnsupportedPathExpression e) {
+ return buildLegacy();
+ }
}
+ /**
+ * Instantiate a "stricter" and more optimized PathExtractor.
+ *
+ * Supports search paths where there is only one "variant" of step type from each parent step, and only one callback
+ * per state.
+ * Annotations matching is only supported on the root or wildcards.
+ * Case insensitivity is supported on field names, not annotations.
+ *
+ * Examples of supported paths (and any combination of the below):
+ * `A::()`
+ * `(foo bar)`
+ * `(foo qux)`
+ * `(spam 0)`
+ * `(spam 1)`
+ * `(quid * quo)`
+ * `(lorem A::B::* ipsum)`
+ *
+ * Examples of unsupported paths:
+ * `(a::foo)` annotations on field names not supported, yet.
+ * `(a::1)` annotations on index ordinals not supported, yet.
+ * `(foo bar) (foo 1) (foo *)` combination of field names, index ordinals or wildcards not supported.
+ * `a::() ()` combination of annotated and non-annotated root (or other wildcard) matching.
+ *
+ * @return new {@link PathExtractor} instance.
+ * @throws UnsupportedPathExpression if any search path or the paths combined, are not supported.
+ */
+ public PathExtractor buildStrict() {
+ return FsmPathExtractor.create(searchPaths,
+ new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive));
+ }
+
+ /**
+ * Instantiate a "legacy" PathExtractor implementation.
+ * The returned PathExtractor is inefficient when a large number of field names is searched,
+ * but a wider variety of search paths are supported.
+ */
+ public PathExtractor buildLegacy() {
+ return new PathExtractorImpl<>(searchPaths,
+ new PathExtractorConfig(matchRelativePaths, matchCaseInsensitive, matchFieldsCaseInsensitive));
+ }
+
/**
* Sets matchRelativePaths config. When true the path extractor will accept readers at any depth, when false the
* reader must be at depth zero.
@@ -78,8 +129,9 @@ public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativ
}
/**
- * Sets matchCaseInsensitive config. When true the path extractor will match fields ignoring case, when false the
- * path extractor will mach respecting the path components case.
+ * Sets matchCaseInsensitive config. When true the path extractor will match fields _and annotations_ ignoring case.
+ * When false the path extractor will match respecting the path components case.
+ * To set case insensitivity for _only field names_ use the `withMatchFieldNamesCaseInsensitive` builder.
*
*
* defaults to false.
@@ -89,6 +141,23 @@ public PathExtractorBuilder withMatchRelativePaths(final boolean matchRelativ
*/
public PathExtractorBuilder withMatchCaseInsensitive(final boolean matchCaseInsensitive) {
this.matchCaseInsensitive = matchCaseInsensitive;
+ this.matchFieldsCaseInsensitive = matchCaseInsensitive;
+
+ return this;
+ }
+
+ /**
+ * Sets matchFieldNamesCaseInsensitive config. When true the path extractor will match field names ignoring case.
+ * For example: 'Foo' will match 'foo'.
+ *
+ *
+ * defaults to false.
+ *
+ * @param matchCaseInsensitive new config value.
+ * @return builder for chaining.
+ */
+ public PathExtractorBuilder withMatchFieldNamesCaseInsensitive(final boolean matchCaseInsensitive) {
+ this.matchFieldsCaseInsensitive = matchCaseInsensitive;
return this;
}
diff --git a/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java b/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java
index f7284c1..9cac3c6 100644
--- a/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java
+++ b/src/main/java/com/amazon/ionpathextraction/PathExtractorImpl.java
@@ -63,7 +63,7 @@ public void match(final IonReader reader) {
@Override
public void match(final IonReader reader, final T context) {
checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
- "reader must be at depth zero, it was at:" + reader.getDepth());
+ "reader must be at depth zero, it was at: " + reader.getDepth());
// short circuit when there are zero SearchPaths
if (searchPaths.isEmpty()) {
@@ -83,7 +83,7 @@ public void matchCurrentValue(final IonReader reader) {
@Override
public void matchCurrentValue(final IonReader reader, final T context) {
checkArgument(reader.getDepth() == 0 || config.isMatchRelativePaths(),
- "reader must be at depth zero, it was at:" + reader.getDepth());
+ "reader must be at depth zero, it was at: " + reader.getDepth());
checkArgument(reader.getType() != null,
"reader must be positioned at a value; call IonReader.next() first.");
@@ -189,7 +189,7 @@ private boolean isTerminal(final int pathComponentIndex, final SearchPath sea
private static class Tracker {
private final Deque>> stack;
- private int initialReaderDepth;
+ private final int initialReaderDepth;
Tracker(final int size, final List> searchPaths, final int initialReaderDepth) {
stack = new ArrayDeque<>(size);
diff --git a/src/main/java/com/amazon/ionpathextraction/SearchPath.java b/src/main/java/com/amazon/ionpathextraction/SearchPath.java
index 5e57927..9f04198 100644
--- a/src/main/java/com/amazon/ionpathextraction/SearchPath.java
+++ b/src/main/java/com/amazon/ionpathextraction/SearchPath.java
@@ -17,6 +17,8 @@
import com.amazon.ionpathextraction.internal.Annotations;
import com.amazon.ionpathextraction.internal.MatchContext;
import com.amazon.ionpathextraction.pathcomponents.PathComponent;
+import com.amazon.ionpathextraction.pathcomponents.Wildcard;
+import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;
@@ -46,6 +48,19 @@ int size() {
return pathComponents.size();
}
+ /**
+ * Produces a "normalized" path for the SearchPath.
+ * Basically: the SearchPath has the annotations (or not) for matching top-level-values.
+ * The "normalized" path treats this as an explicit Wildcard step and adds it to the head
+ * of the PathComponents.
+ */
+ List getNormalizedPath() {
+ List normalizedPath = new ArrayList<>(pathComponents.size() + 1);
+ normalizedPath.add(new Wildcard(annotations));
+ normalizedPath.addAll(pathComponents);
+ return normalizedPath;
+ }
+
/**
* Callback to be invoked when the Search Path is matched.
*/
@@ -67,4 +82,4 @@ boolean partialMatchAt(final MatchContext context) {
return false;
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java b/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java
new file mode 100644
index 0000000..0806ce6
--- /dev/null
+++ b/src/main/java/com/amazon/ionpathextraction/UnsupportedPathExpression.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License").
+ * You may not use this file except in compliance with the License.
+ * A copy of the License is located at:
+ *
+ * http://aws.amazon.com/apache2.0/
+ *
+ * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+
+package com.amazon.ionpathextraction;
+
+/**
+ * Thrown when trying to build a "strict" PathExtractor if a SearchPath or set of paths is not supported.
+ * A user should rewrite their extraction to match the strictness invariant or use the "legacy" PathExtractor.
+ */
+public class UnsupportedPathExpression extends RuntimeException {
+ public UnsupportedPathExpression(final String msg) {
+ super(msg);
+ }
+}
diff --git a/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java b/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java
index df3e823..1ea737d 100644
--- a/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java
+++ b/src/main/java/com/amazon/ionpathextraction/internal/Annotations.java
@@ -13,6 +13,7 @@
package com.amazon.ionpathextraction.internal;
+import java.util.Arrays;
import java.util.stream.IntStream;
/**
@@ -24,7 +25,9 @@
* Internal only. Not intended for application use.
*
*/
-public class Annotations {
+public final class Annotations {
+
+ public static final Annotations EMPTY = new Annotations(new String[] {});
final String[] rawAnnotations;
@@ -35,6 +38,14 @@ public Annotations(final String[] rawAnnotations) {
this.rawAnnotations = rawAnnotations;
}
+ public String[] getAnnotations() {
+ return rawAnnotations;
+ }
+
+ public boolean hasAnnotations() {
+ return rawAnnotations.length > 0;
+ }
+
/**
* returns true if it matches on the annotations provided.
*/
@@ -51,4 +62,20 @@ private static boolean arrayEquals(final String[] left, final String[] right, fi
return IntStream.range(0, left.length)
.allMatch(i -> ignoreCase ? left[i].equalsIgnoreCase(right[i]) : left[i].equals(right[i]));
}
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (!(o instanceof Annotations)) {
+ return false;
+ }
+ return Arrays.equals(rawAnnotations, ((Annotations) o).rawAnnotations);
+ }
+
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(rawAnnotations);
+ }
}
diff --git a/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java b/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java
index df8951c..42e6bec 100644
--- a/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java
+++ b/src/main/java/com/amazon/ionpathextraction/internal/PathExtractorConfig.java
@@ -20,10 +20,18 @@ public final class PathExtractorConfig {
private final boolean matchRelativePaths;
private final boolean matchCaseInsensitive;
+ private final boolean matchFieldsCaseInsensitive;
- public PathExtractorConfig(final boolean matchRelativePaths, final boolean matchCaseInsensitive) {
+ /**
+ * Instantiate a PathExtractorConfig.
+ */
+ public PathExtractorConfig(
+ final boolean matchRelativePaths,
+ final boolean matchCaseInsensitive,
+ final boolean matchFieldsCaseInsensitive) {
this.matchRelativePaths = matchRelativePaths;
this.matchCaseInsensitive = matchCaseInsensitive;
+ this.matchFieldsCaseInsensitive = matchFieldsCaseInsensitive;
}
public boolean isMatchRelativePaths() {
@@ -33,4 +41,8 @@ public boolean isMatchRelativePaths() {
public boolean isMatchCaseInsensitive() {
return matchCaseInsensitive;
}
+
+ public boolean isMatchFieldsCaseInsensitive() {
+ return matchFieldsCaseInsensitive;
+ }
}
diff --git a/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java b/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java
index 4ff7594..c42ba3c 100644
--- a/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java
+++ b/src/main/java/com/amazon/ionpathextraction/internal/Preconditions.java
@@ -28,27 +28,61 @@ public class Preconditions {
/**
* Validates argument, fails if condition is not met.
+ * Prefer {@link #checkArgument(boolean isValid, String messageFormat, Object[] args) } over concatenating
+ * Strings at call-site.
*
* @param isValid if condition is met.
* @param message error message.
* @throws PathExtractionException if not valid.
*/
- public static void checkArgument(final Boolean isValid, final String message) {
+ public static void checkArgument(final boolean isValid, final String message) {
if (!isValid) {
throw new PathExtractionException(message);
}
}
+ /**
+ * Validates argument, fails if condition is not met.
+ * This overload only builds the error message if isValid is false.
+ *
+ * @param isValid if condition is met.
+ * @param messageFormat error message _format_.
+ * @param args arguments to String.format()
+ * @throws PathExtractionException if not valid.
+ */
+ public static void checkArgument(final boolean isValid, final String messageFormat, final Object... args) {
+ if (!isValid) {
+ throw new PathExtractionException(String.format(messageFormat, args));
+ }
+ }
+
/**
* Validates a state, fails if condition is not met.
+ * Prefer {@link #checkState(boolean isValid, String messageFormat, Object[] args) } over concatenating
+ * Strings at call-site.
*
* @param isValid if condition is met.
* @param message error message.
* @throws PathExtractionException if not valid.
*/
- public static void checkState(final Boolean isValid, final String message) {
+ public static void checkState(final boolean isValid, final String message) {
if (!isValid) {
throw new PathExtractionException(message);
}
}
+
+ /**
+ * Validates a state, fails if condition is not met.
+ * This overload only builds the error message if isValid is false.
+ *
+ * @param isValid if condition is met.
+ * @param messageFormat error message _format_.
+ * @param args arguments to String.format()
+ * @throws PathExtractionException if not valid.
+ */
+ public static void checkState(final boolean isValid, final String messageFormat, final Object... args) {
+ if (!isValid) {
+ throw new PathExtractionException(String.format(messageFormat, args));
+ }
+ }
}
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java
index 21c5d09..5e03fee 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Index.java
@@ -41,6 +41,10 @@ public Index(final int ordinal, final String[] annotations) {
this.ordinal = ordinal;
}
+ public Integer getOrdinal() {
+ return ordinal;
+ }
+
@Override
public boolean innerMatches(final MatchContext context) {
return ordinal == context.getReaderContainerIndex();
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java
index 3f88a28..bef9cbb 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/PathComponent.java
@@ -29,7 +29,7 @@
*/
public abstract class PathComponent {
- private final Annotations annotations;
+ protected final Annotations annotations;
PathComponent(final Annotations annotations) {
checkArgument(annotations != null, "fieldName cannot be null");
@@ -37,6 +37,14 @@ public abstract class PathComponent {
this.annotations = annotations;
}
+ public Annotations getAnnotations() {
+ return annotations;
+ }
+
+ public boolean hasAnnotations() {
+ return annotations.hasAnnotations();
+ }
+
/**
* Checks if this component matches the current reader position with the given configuration.
*
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java
index e597af5..76d6e92 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Text.java
@@ -46,6 +46,10 @@ public Text(final String fieldName, final String[] annotations) {
this.fieldName = fieldName;
}
+ public String getFieldName() {
+ return fieldName;
+ }
+
@Override
public boolean innerMatches(final MatchContext context) {
final IonReader reader = context.getReader();
@@ -53,7 +57,7 @@ public boolean innerMatches(final MatchContext context) {
return false;
}
- return context.getConfig().isMatchCaseInsensitive()
+ return context.getConfig().isMatchFieldsCaseInsensitive()
? fieldName.equalsIgnoreCase(reader.getFieldName())
: fieldName.equals(reader.getFieldName());
}
diff --git a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java
index c5d0c0a..63525c4 100644
--- a/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java
+++ b/src/main/java/com/amazon/ionpathextraction/pathcomponents/Wildcard.java
@@ -32,7 +32,11 @@ public final class Wildcard extends PathComponent {
public static final String TEXT = "*";
public Wildcard(final String[] annotations) {
- super(new Annotations(annotations));
+ this(new Annotations(annotations));
+ }
+
+ public Wildcard(final Annotations annotations) {
+ super(annotations);
}
@Override
diff --git a/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt
new file mode 100644
index 0000000..41deb22
--- /dev/null
+++ b/src/test/kotlin/com/amazon/ionpathextraction/FsmPathExtractorTest.kt
@@ -0,0 +1,33 @@
+package com.amazon.ionpathextraction
+
+import org.junit.jupiter.api.assertThrows
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.MethodSource
+
+class FsmPathExtractorTest : PathExtractorTest() {
+ override fun PathExtractorBuilder.buildExtractor(): PathExtractor = buildStrict()
+
+ @ParameterizedTest
+ @MethodSource("testCases")
+ override fun testSearchPaths(testCase: Companion.TestCase) {
+ if (testCase.legacyOnly) {
+ assertThrows {
+ super.testSearchPaths(testCase)
+ }
+ } else {
+ super.testSearchPaths(testCase)
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource("testCases")
+ override fun testSearchPathsMatchCurrentValue(testCase: Companion.TestCase) {
+ if (testCase.legacyOnly) {
+ assertThrows {
+ super.testSearchPaths(testCase)
+ }
+ } else {
+ super.testSearchPaths(testCase)
+ }
+ }
+}
diff --git a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt
new file mode 100644
index 0000000..7bb66d0
--- /dev/null
+++ b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorImplTest.kt
@@ -0,0 +1,5 @@
+package com.amazon.ionpathextraction
+
+class PathExtractorImplTest : PathExtractorTest() {
+ override fun PathExtractorBuilder.buildExtractor(): PathExtractor = buildLegacy()
+}
diff --git a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt
index 6394e05..a847019 100644
--- a/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt
+++ b/src/test/kotlin/com/amazon/ionpathextraction/PathExtractorTest.kt
@@ -31,7 +31,7 @@ import java.io.File
import java.util.stream.Stream
import kotlin.test.assertTrue
-class PathExtractorTest {
+abstract class PathExtractorTest {
companion object {
private val ION = IonSystemBuilder.standard().build()
@@ -39,11 +39,15 @@ class PathExtractorTest {
val data: String,
val expected: IonList,
val stepOutNumber: Int,
- val hasMultipleTopLevelValues: Boolean) {
+ val hasMultipleTopLevelValues: Boolean,
+ val legacyOnly: Boolean = false,
+ val caseInsensitive: String = "None") {
override fun toString(): String = "SearchPaths=$searchPaths, " +
"Data=$data, " +
"Expected=$expected, " +
- "StepOutN=$stepOutNumber"
+ "StepOutN=$stepOutNumber" +
+ "Legacy=$legacyOnly" +
+ "CaseInsensitive=$caseInsensitive"
}
private fun IonValue.toText(): String {
@@ -79,7 +83,9 @@ class PathExtractorTest {
struct["data"].toText(),
struct["expected"] as IonList,
struct["stepOutN"]?.let { (it as IonInt).intValue() } ?: 0,
- struct["data"].hasTypeAnnotation("${'$'}datagram")
+ struct["data"].hasTypeAnnotation("${'$'}datagram"),
+ struct.hasTypeAnnotation("legacy"),
+ struct.get("caseInsensitive")?.toText() ?: "None"
)
}.stream()
@@ -100,6 +106,8 @@ class PathExtractorTest {
}
}
+ abstract fun PathExtractorBuilder.buildExtractor(): PathExtractor
+
private val emptyCallback: (IonReader) -> Int = { 0 }
private fun collectToIonList(stepOutN: Int): (IonReader, IonList) -> Int = { reader, out ->
@@ -109,12 +117,18 @@ class PathExtractorTest {
@ParameterizedTest
@MethodSource("testCases")
- fun testSearchPaths(testCase: TestCase) {
+ open fun testSearchPaths(testCase: TestCase) {
val builder = PathExtractorBuilder.standard()
testCase.searchPaths.forEach { builder.withSearchPath(it, collectToIonList(testCase.stepOutNumber)) }
- val extractor = builder.build()
+ when (testCase.caseInsensitive) {
+ "Both" -> builder.withMatchCaseInsensitive(true)
+ "Fields" -> builder.withMatchFieldNamesCaseInsensitive(true)
+ "None" -> Unit
+ else -> throw IllegalArgumentException("Unexpected value for caseInsensitive: ${testCase.caseInsensitive}")
+ }
+ val extractor = builder.buildExtractor()
val out = ION.newEmptyList()
extractor.match(ION.newReader(testCase.data), out)
@@ -124,7 +138,7 @@ class PathExtractorTest {
@ParameterizedTest
@MethodSource("testCases")
- fun testSearchPathsMatchCurrentValue(testCase: TestCase) {
+ open fun testSearchPathsMatchCurrentValue(testCase: TestCase) {
if (testCase.hasMultipleTopLevelValues) {
// For simplicity, skip tests with multiple top-level values. This will be tested via other test methods.
return
@@ -132,7 +146,7 @@ class PathExtractorTest {
val builder = PathExtractorBuilder.standard()
testCase.searchPaths.forEach { builder.withSearchPath(it, collectToIonList(testCase.stepOutNumber)) }
- val extractor = builder.build()
+ val extractor = builder.buildExtractor()
val out = ION.newEmptyList()
val reader = ION.newReader(testCase.data)
@@ -159,7 +173,7 @@ class PathExtractorTest {
timesCallback2Called++
0
}
- .build()
+ .buildExtractor()
api.match(extractor, ION.newReader("{ bar: 1, bar: 2, foo: 3 }"))
@@ -173,11 +187,11 @@ class PathExtractorTest {
fun matchCurrentValueOnlyMatchesCurrentValue() {
val extractor1 = PathExtractorBuilder.standard()
.withSearchPath("(foo)", collectToIonList(0))
- .build()
+ .buildExtractor()
val extractor2 = PathExtractorBuilder.standard()
.withSearchPath("(*)", collectToIonList(1))
.withMatchRelativePaths(true)
- .build()
+ .buildExtractor()
val reader = ION.newReader("{foo: 123, foo: [456]} {bar: [42, 43, 44]} end")
val out = ION.newEmptyList()
@@ -201,7 +215,7 @@ class PathExtractorTest {
fun matchCurrentValueWhenNotPositionedOnValueFails() {
val extractor = PathExtractorBuilder.standard()
.withSearchPath("(foo)") { _ -> 0 }
- .build()
+ .buildExtractor()
val reader = ION.newReader("[{foo: 1}]")
val exception = assertThrows { extractor.matchCurrentValue(reader) }
@@ -213,14 +227,14 @@ class PathExtractorTest {
fun readerAtInvalidDepth(api: API) {
val extractor = PathExtractorBuilder.standard()
.withSearchPath("(foo)") { _ -> 0 }
- .build()
+ .buildExtractor()
val reader = ION.newReader("[{foo: 1}]")
assertTrue(reader.next() != null)
reader.stepIn()
val exception = assertThrows { api.match(extractor, reader) }
- assertEquals("reader must be at depth zero, it was at:1", exception.message)
+ assertEquals("reader must be at depth zero, it was at: 1", exception.message)
}
@ParameterizedTest
@@ -229,7 +243,7 @@ class PathExtractorTest {
val extractor = PathExtractorBuilder.standard()
.withMatchRelativePaths(true)
.withSearchPath("(foo)", collectToIonList(0))
- .build()
+ .buildExtractor()
val reader = ION.newReader("[{foo: 1}]")
assertTrue(reader.next() != null)
@@ -241,30 +255,12 @@ class PathExtractorTest {
assertEquals(ION.singleValue("[1]"), out)
}
- @ParameterizedTest
- @EnumSource(API::class)
- fun caseInsensitive(api: API) {
- val extractor = PathExtractorBuilder.standard()
- .withMatchCaseInsensitive(true)
- .withSearchPath("(foo)", collectToIonList(0))
- .build()
-
- val out = ION.newEmptyList()
- api.match(extractor, ION.newReader("{FOO: 1, foO: 2}{foo: 3}{fOo: 4}{bar: 5}"), out)
-
- if (api == API.MATCH_CURRENT_VALUE) {
- assertEquals(ION.singleValue("[1,2]"), out)
- } else {
- assertEquals(ION.singleValue("[1,2,3,4]"), out)
- }
- }
-
@ParameterizedTest
@EnumSource(API::class)
fun stepOutMoreThanPermitted(api: API) {
val extractor = PathExtractorBuilder.standard()
.withSearchPath("(foo)") { _ -> 200 }
- .build()
+ .buildExtractor()
val exception = assertThrows {
api.match(extractor, ION.newReader("{foo: 1}"))
@@ -281,7 +277,7 @@ class PathExtractorTest {
.withMatchRelativePaths(true)
// even though you could step out twice in reader you can't given the initial reader depth
.withSearchPath("(bar)") { _ -> 2 }
- .build()
+ .buildExtractor()
val newReader = ION.newReader("{foo: {bar: 1}}")
newReader.next()
@@ -314,7 +310,7 @@ class PathExtractorTest {
0
}
}
- }.build()
+ }.buildExtractor()
api.match(extractor, ION.newReader("{foo: {bar: 1}}"))
@@ -390,7 +386,7 @@ class PathExtractorTest {
val actualData = ION.newValue(ionReader1)
assertEquals(value["col2"], actualData)
0
- }.withSearchPath("(col1)") { _ -> 0 }.build()
+ }.withSearchPath("(col1)") { _ -> 0 }.buildExtractor()
extractor.match(ionReader)
}
}
\ No newline at end of file
diff --git a/src/test/resources/test-cases.ion b/src/test/resources/test-cases.ion
index 2409739..459c3ce 100644
--- a/src/test/resources/test-cases.ion
+++ b/src/test/resources/test-cases.ion
@@ -238,13 +238,13 @@ search paths
// Multiple search paths -----------------------------------------------------------------
// all match
-{ searchPaths: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] }
+legacy::{ searchPaths: [(0), (foo)], data: {bar: 1, foo: 2}, expected: [1, 2] }
// none match
-{ searchPaths: [(1), [foo]], data: [0], expected: [] }
+legacy::{ searchPaths: [(1), [foo]], data: [0], expected: [] }
// multiple matchers match the same value
-{ searchPaths: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] }
+legacy::{ searchPaths: [(1), (*)], data: [1, 2, 3], expected: [1, 2, 2, 3] }
{ searchPaths: [(foo 1), (foo 2)], data: {foo: [0, 1, 2]}, expected: [1, 2] }
@@ -264,7 +264,9 @@ search paths
{ searchPath: (A::'*'), data: [A::1, 2], expected: [A::1] }
{ searchPath: ('$ion_extractor_field'::*), data: {'*': A::1, foo: 2}, expected: [A::1]}
{ searchPath: (A::B::C::*), data: [A::B::C::1, B::A::C::2], expected: [A::B::C::1] }
-{
+legacy::{
+ // annotations on ordinals or fields _could_ be supported in the FSM impl
+ // but usage appears non-existent at time of writing (Sep 2024)
searchPath: (foo A::2 bar),
data: {
foo: [0, 1, A::{bar: 1}],
@@ -272,3 +274,43 @@ search paths
},
expected: [1]
}
+legacy::{
+ searchPath: (f::foo),
+ data: { foo: f::17, foo: F::31, Foo: f::51, Foo: F::67 },
+ expected: [f::17],
+ // default is None but explicit here as it is essential to the test case
+ caseInsensitive: None
+}
+
+// case insensitivity
+{
+ searchPath: (foo),
+ data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{fOo: 4},{bar: 5}],
+ expected: [1, 2, 3, 4],
+ caseInsensitive: Both
+}
+legacy::{
+ // these resolve to the same path for the Fsm Impl
+ searchPaths: [(foo), (Foo)],
+ data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{Foo: 4},{bar: 5}],
+ expected: [1, 1, 2, 2, 3, 3, 4, 4],
+ caseInsensitive: Both
+}
+{
+ searchPath: (foo),
+ data: $datagram::[{FOO: 1, foO: 2},{foo: 3},{Foo: 4},{bar: 5}],
+ expected: [1, 2, 3, 4],
+ caseInsensitive: Fields
+}
+legacy::{
+ searchPath: F::(),
+ data: $datagram::[F::17, g::31, f::51],
+ expected: [F::17, f::51],
+ caseInsensitive: Both
+}
+{
+ searchPath: F::(),
+ data: $datagram::[F::17, g::31, f::51],
+ expected: [F::17],
+ caseInsensitive: Fields
+}