From 9b0edb73c173959f07c5793d3f88f1b7d6f5c9bd Mon Sep 17 00:00:00 2001 From: Matthew Pope Date: Wed, 7 Aug 2024 17:24:03 -0700 Subject: [PATCH] Implementation of loadAllElements with iterative fallback once max depth is reached --- .../com/amazon/ionelement/api/IonUtils.kt | 2 + .../ionelement/impl/IonElementLoaderImpl.kt | 158 ++++++++++++++++-- .../ionelement/impl/StructElementImpl.kt | 2 +- .../ionelement/IonElementLoaderTests.kt | 31 +++- 4 files changed, 177 insertions(+), 16 deletions(-) diff --git a/src/main/kotlin/com/amazon/ionelement/api/IonUtils.kt b/src/main/kotlin/com/amazon/ionelement/api/IonUtils.kt index 3b36d94..81a101a 100644 --- a/src/main/kotlin/com/amazon/ionelement/api/IonUtils.kt +++ b/src/main/kotlin/com/amazon/ionelement/api/IonUtils.kt @@ -42,6 +42,8 @@ public fun IonElement.toIonValue(factory: ValueFactory): IonValue { * Bridge function that converts from the mutable [IonValue] to an [AnyElement]. * * New code that does not need to integrate with uses of the mutable DOM should not use this. + * + * This will fail for IonDatagram if the IonDatagram does not contain exactly one user value. */ public fun IonValue.toIonElement(): AnyElement = this.system.newReader(this).use { reader -> diff --git a/src/main/kotlin/com/amazon/ionelement/impl/IonElementLoaderImpl.kt b/src/main/kotlin/com/amazon/ionelement/impl/IonElementLoaderImpl.kt index 997e269..be046c9 100644 --- a/src/main/kotlin/com/amazon/ionelement/impl/IonElementLoaderImpl.kt +++ b/src/main/kotlin/com/amazon/ionelement/impl/IonElementLoaderImpl.kt @@ -25,9 +25,19 @@ import com.amazon.ion.TextSpan import com.amazon.ion.system.IonReaderBuilder import com.amazon.ionelement.api.* import com.amazon.ionelement.impl.collections.* +import java.util.ArrayDeque +import java.util.ArrayList +import kotlinx.collections.immutable.adapters.ImmutableListAdapter internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions) : IonElementLoader { + // TODO: It seems like some data can be read faster with a recursive approach, but other data is + // faster with an iterative approach. Consider making this configurable. It probably doesn't + // need to be finely-tune-ableā€”just 0 or 100 (i.e. on/off) is probably sufficient. + companion object { + private const val MAX_RECURSION_DEPTH: Int = 100 + } + /** * Catches an [IonException] occurring in [block] and throws an [IonElementLoaderException] with * the current [IonLocation] of the fault, if one is available. Note that depending on the state of the @@ -86,6 +96,10 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions IonReaderBuilder.standard().build(ionText).use(::loadAllElements) override fun loadCurrentElement(ionReader: IonReader): AnyElement { + return loadCurrentElementRecursively(ionReader) + } + + private fun loadCurrentElementRecursively(ionReader: IonReader): AnyElement { return handleReaderException(ionReader) { val valueType = requireNotNull(ionReader.type) { "The IonReader was not positioned at an element." } @@ -128,26 +142,41 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions IonType.BLOB -> BlobElementImpl(ionReader.newBytes(), annotations, metas) IonType.LIST -> { ionReader.stepIn() - val list = ListElementImpl(loadAllElements(ionReader).toImmutableListUnsafe(), annotations, metas) + val listContent = ArrayList() + if (ionReader.depth < MAX_RECURSION_DEPTH) { + while (ionReader.next() != null) { + listContent.add(loadCurrentElementRecursively(ionReader)) + } + } else { + loadAllElementsIteratively(ionReader, listContent as MutableList) + } ionReader.stepOut() - list + ListElementImpl(listContent.toImmutableListUnsafe(), annotations, metas) } IonType.SEXP -> { ionReader.stepIn() - val sexp = SexpElementImpl(loadAllElements(ionReader).toImmutableListUnsafe(), annotations, metas) + val sexpContent = ArrayList() + if (ionReader.depth < MAX_RECURSION_DEPTH) { + while (ionReader.next() != null) { + sexpContent.add(loadCurrentElementRecursively(ionReader)) + } + } else { + loadAllElementsIteratively(ionReader, sexpContent as MutableList) + } ionReader.stepOut() - sexp + SexpElementImpl(sexpContent.toImmutableListUnsafe(), annotations, metas) } IonType.STRUCT -> { - val fields = mutableListOf() + val fields = ArrayList() ionReader.stepIn() - while (ionReader.next() != null) { - fields.add( - StructFieldImpl( - ionReader.fieldName, - loadCurrentElement(ionReader) - ) - ) + if (ionReader.depth < MAX_RECURSION_DEPTH) { + while (ionReader.next() != null) { + val fieldName = ionReader.fieldName + val element = loadCurrentElementRecursively(ionReader) + fields.add(StructFieldImpl(fieldName, element)) + } + } else { + loadAllElementsIteratively(ionReader, fields as MutableList) } ionReader.stepOut() StructElementImpl(fields.toImmutableListUnsafe(), annotations, metas) @@ -158,4 +187,109 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions }.asAnyElement() } } + + private fun loadAllElementsIteratively(ionReader: IonReader, into: MutableList) { + // Intentionally not using a "recycling" stack because we have mutable lists that we are going to wrap as + // ImmutableLists and then forget about the reference to the mutable list. + val openContainerStack = ArrayDeque>() + var elements: MutableList = into + + while (true) { + val valueType = ionReader.next() + + // End of container or input + if (valueType == null) { + // We're at the top (relative to where we started) + if (elements === into) { + return + } else { + ionReader.stepOut() + elements = openContainerStack.pop() + continue + } + } + + // Read a value + val annotations = ionReader.typeAnnotations!!.toImmutableListUnsafe() + + var metas = EMPTY_METAS + if (options.includeLocationMeta) { + val location = ionReader.currentLocation() + if (location != null) metas = location.toMetaContainer() + } + + if (ionReader.isNullValue) { + elements.addContainerElement(ionReader, ionNull(valueType.toElementType(), annotations, metas).asAnyElement()) + continue + } else when (valueType) { + IonType.BOOL -> elements.addContainerElement(ionReader, BoolElementImpl(ionReader.booleanValue(), annotations, metas)) + IonType.INT -> { + val intValue = when (ionReader.integerSize!!) { + IntegerSize.BIG_INTEGER -> { + val bigIntValue = ionReader.bigIntegerValue() + if (bigIntValue !in RANGE_OF_LONG) + BigIntIntElementImpl(bigIntValue, annotations, metas) + else { + LongIntElementImpl(ionReader.longValue(), annotations, metas) + } + } + IntegerSize.LONG, + IntegerSize.INT -> LongIntElementImpl(ionReader.longValue(), annotations, metas) + } + elements.addContainerElement(ionReader, intValue) + } + IonType.FLOAT -> elements.addContainerElement(ionReader, FloatElementImpl(ionReader.doubleValue(), annotations, metas)) + IonType.DECIMAL -> elements.addContainerElement(ionReader, DecimalElementImpl(ionReader.decimalValue(), annotations, metas)) + IonType.TIMESTAMP -> elements.addContainerElement(ionReader, TimestampElementImpl(ionReader.timestampValue(), annotations, metas)) + IonType.STRING -> elements.addContainerElement(ionReader, StringElementImpl(ionReader.stringValue(), annotations, metas)) + IonType.SYMBOL -> elements.addContainerElement(ionReader, SymbolElementImpl(ionReader.stringValue(), annotations, metas)) + IonType.CLOB -> elements.addContainerElement(ionReader, ClobElementImpl(ionReader.newBytes(), annotations, metas)) + IonType.BLOB -> elements.addContainerElement(ionReader, BlobElementImpl(ionReader.newBytes(), annotations, metas)) + IonType.LIST -> { + val listContent = ArrayList() + // `listContent` gets wrapped in an `ImmutableListWrapper` so that we can create a ListElementImpl + // right away. Then, we add child elements to `ListContent`. Technically, this is a violation of the + // contract for `ImmutableListAdapter`, but it is safe to do so here because no reads will occur + // after we are done modifying the backing list. + // Same thing applies for `sexpContent` and `structContent` in their respective branches. + elements.addContainerElement(ionReader, ListElementImpl(ImmutableListAdapter(listContent), annotations, metas)) + ionReader.stepIn() + openContainerStack.push(elements) + elements = listContent as MutableList + } + IonType.SEXP -> { + val sexpContent = ArrayList() + elements.addContainerElement(ionReader, SexpElementImpl(ImmutableListAdapter(sexpContent), annotations, metas)) + ionReader.stepIn() + openContainerStack.push(elements) + elements = sexpContent as MutableList + } + IonType.STRUCT -> { + val structContent = ArrayList() + elements.addContainerElement( + ionReader, + StructElementImpl( + ImmutableListAdapter(structContent), + annotations, + metas + ) + ) + ionReader.stepIn() + openContainerStack.push(elements) + elements = structContent as MutableList + } + IonType.DATAGRAM -> error("IonElementLoaderImpl does not know what to do with IonType.DATAGRAM") + IonType.NULL -> error("IonType.NULL branch should be unreachable") + } + } + } + + private fun MutableList.addContainerElement(ionReader: IonReader, value: AnyElement) { + val fieldName = ionReader.fieldName + if (fieldName != null) { + add(StructFieldImpl(fieldName, value)) + } else { + add(value) + } + } } diff --git a/src/main/kotlin/com/amazon/ionelement/impl/StructElementImpl.kt b/src/main/kotlin/com/amazon/ionelement/impl/StructElementImpl.kt index 6a236b8..38f81e1 100644 --- a/src/main/kotlin/com/amazon/ionelement/impl/StructElementImpl.kt +++ b/src/main/kotlin/com/amazon/ionelement/impl/StructElementImpl.kt @@ -31,7 +31,7 @@ internal class StructElementImpl( ) : AnyElementBase(), StructElement { override val type: ElementType get() = ElementType.STRUCT - override val size = allFields.size + override val size: Int get() = allFields.size // Note that we are not using `by lazy` here because it requires 2 additional allocations and // has been demonstrated to significantly increase memory consumption! diff --git a/src/test/kotlin/com/amazon/ionelement/IonElementLoaderTests.kt b/src/test/kotlin/com/amazon/ionelement/IonElementLoaderTests.kt index ca6e946..ef6bc02 100644 --- a/src/test/kotlin/com/amazon/ionelement/IonElementLoaderTests.kt +++ b/src/test/kotlin/com/amazon/ionelement/IonElementLoaderTests.kt @@ -21,6 +21,7 @@ import com.amazon.ionelement.util.ION import com.amazon.ionelement.util.IonElementLoaderTestCase import com.amazon.ionelement.util.convertToString import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.MethodSource @@ -45,6 +46,8 @@ class IonElementLoaderTests { // Converting from IonValue to IonElement should result in an IonElement that is equivalent to the // parsed IonElement assertEquals(parsedIonValue.toIonElement(), parsedIonElement) + + assertEquals(tc.expectedElement, parsedIonElement) } companion object { @@ -57,7 +60,7 @@ class IonElementLoaderTests { IonElementLoaderTestCase("1", ionInt(1)), - IonElementLoaderTestCase("existence::42", ionInt(1).withAnnotations("existence")), + IonElementLoaderTestCase("existence::42", ionInt(42).withAnnotations("existence")), IonElementLoaderTestCase("\"some string\"", ionString("some string")), @@ -65,7 +68,7 @@ class IonElementLoaderTests { IonElementLoaderTestCase("[1, 2, 3]", ionListOf(ionInt(1), ionInt(2), ionInt(3))), - IonElementLoaderTestCase("(1 2 3)", ionListOf(ionInt(1), ionInt(2), ionInt(3))), + IonElementLoaderTestCase("(1 2 3)", ionSexpOf(ionInt(1), ionInt(2), ionInt(3))), IonElementLoaderTestCase( "{ foo: 1, bar: 2, bat: 3 }", @@ -74,7 +77,29 @@ class IonElementLoaderTests { "bar" to ionInt(2), "bat" to ionInt(3) ) - ) + ), + + // Nested container cases + IonElementLoaderTestCase("((null.list))", ionSexpOf(ionSexpOf(ionNull(ElementType.LIST)))), + IonElementLoaderTestCase("(1 (2 3))", ionSexpOf(ionInt(1), ionSexpOf(ionInt(2), ionInt(3)))), + IonElementLoaderTestCase("{foo:[1]}", ionStructOf("foo" to ionListOf(ionInt(1)))), + IonElementLoaderTestCase("[{foo:1}]", ionListOf(ionStructOf("foo" to ionInt(1)))), + IonElementLoaderTestCase("{foo:{bar:1}}", ionStructOf("foo" to ionStructOf("bar" to ionInt(1)))), + IonElementLoaderTestCase("{foo:[{}]}", ionStructOf("foo" to ionListOf(ionStructOf(emptyList())))), + IonElementLoaderTestCase("[{}]", ionListOf(ionStructOf(emptyList()))), + IonElementLoaderTestCase("[{}, {}]", ionListOf(ionStructOf(emptyList()), ionStructOf(emptyList()))), + IonElementLoaderTestCase("[{foo:1, bar: 2}]", ionListOf(ionStructOf("foo" to ionInt(1), "bar" to ionInt(2)))), + IonElementLoaderTestCase( + "{foo:[{bar:({})}]}", + ionStructOf("foo" to ionListOf(ionStructOf("bar" to ionSexpOf(ionStructOf(emptyList()))))) + ), ) } + + @Test + fun `regardless of depth, no StackOverflowError is thrown`() { + // Throws StackOverflowError in ion-element@v1.2.0 and prior versions when there's ~4k nested containers + val data = "[".repeat(999999) + "]".repeat(999999) + loadAllElements(data) + } }