-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make loadAllElements
fall back to an iterative implementation once a certain depth is reached
#103
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,9 +25,19 @@ import com.amazon.ion.TextSpan | |
import com.amazon.ion.system.IonReaderBuilder | ||
import com.amazon.ionelement.api.* | ||
import com.amazon.ionelement.impl.collections.* | ||
import java.util.ArrayDeque | ||
import java.util.ArrayList | ||
import kotlinx.collections.immutable.adapters.ImmutableListAdapter | ||
|
||
internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions) : IonElementLoader { | ||
|
||
// TODO: It seems like some data can be read faster with a recursive approach, but other data is | ||
// faster with an iterative approach. Consider making this configurable. It probably doesn't | ||
// need to be finely-tune-able—just 0 or 100 (i.e. on/off) is probably sufficient. | ||
companion object { | ||
private const val MAX_RECURSION_DEPTH: Int = 100 | ||
} | ||
|
||
/** | ||
* Catches an [IonException] occurring in [block] and throws an [IonElementLoaderException] with | ||
* the current [IonLocation] of the fault, if one is available. Note that depending on the state of the | ||
|
@@ -86,6 +96,10 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions | |
IonReaderBuilder.standard().build(ionText).use(::loadAllElements) | ||
|
||
override fun loadCurrentElement(ionReader: IonReader): AnyElement { | ||
return loadCurrentElementRecursively(ionReader) | ||
} | ||
|
||
private fun loadCurrentElementRecursively(ionReader: IonReader): AnyElement { | ||
return handleReaderException(ionReader) { | ||
val valueType = requireNotNull(ionReader.type) { "The IonReader was not positioned at an element." } | ||
|
||
|
@@ -128,26 +142,41 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions | |
IonType.BLOB -> BlobElementImpl(ionReader.newBytes(), annotations, metas) | ||
IonType.LIST -> { | ||
ionReader.stepIn() | ||
val list = ListElementImpl(loadAllElements(ionReader).toImmutableListUnsafe(), annotations, metas) | ||
val listContent = ArrayList<AnyElement>() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it would improve performance to pool these using something like IonJava's RecyclingStack. We could get away with having one per depth level right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm intentionally not using a recycling stack because on L154, we wrap it in an immutable list, and then forget about the underlying mutable reference. ListElementImpl(listContent.toImmutableListUnsafe(), annotations, metas) However, it would be worth investigating whether there is a performance benefit to having a pool of ArrayLists that have a relatively larger capacity, and then copying to an exact-sized immutable list. |
||
if (ionReader.depth < MAX_RECURSION_DEPTH) { | ||
while (ionReader.next() != null) { | ||
listContent.add(loadCurrentElementRecursively(ionReader)) | ||
} | ||
} else { | ||
loadAllElementsIteratively(ionReader, listContent as MutableList<Any>) | ||
} | ||
ionReader.stepOut() | ||
list | ||
ListElementImpl(listContent.toImmutableListUnsafe(), annotations, metas) | ||
} | ||
IonType.SEXP -> { | ||
ionReader.stepIn() | ||
val sexp = SexpElementImpl(loadAllElements(ionReader).toImmutableListUnsafe(), annotations, metas) | ||
val sexpContent = ArrayList<AnyElement>() | ||
if (ionReader.depth < MAX_RECURSION_DEPTH) { | ||
while (ionReader.next() != null) { | ||
sexpContent.add(loadCurrentElementRecursively(ionReader)) | ||
} | ||
} else { | ||
loadAllElementsIteratively(ionReader, sexpContent as MutableList<Any>) | ||
} | ||
ionReader.stepOut() | ||
sexp | ||
SexpElementImpl(sexpContent.toImmutableListUnsafe(), annotations, metas) | ||
} | ||
IonType.STRUCT -> { | ||
val fields = mutableListOf<StructField>() | ||
val fields = ArrayList<StructField>() | ||
ionReader.stepIn() | ||
while (ionReader.next() != null) { | ||
fields.add( | ||
StructFieldImpl( | ||
ionReader.fieldName, | ||
loadCurrentElement(ionReader) | ||
) | ||
) | ||
if (ionReader.depth < MAX_RECURSION_DEPTH) { | ||
while (ionReader.next() != null) { | ||
val fieldName = ionReader.fieldName | ||
val element = loadCurrentElementRecursively(ionReader) | ||
fields.add(StructFieldImpl(fieldName, element)) | ||
} | ||
} else { | ||
loadAllElementsIteratively(ionReader, fields as MutableList<Any>) | ||
} | ||
ionReader.stepOut() | ||
StructElementImpl(fields.toImmutableListUnsafe(), annotations, metas) | ||
|
@@ -158,4 +187,109 @@ internal class IonElementLoaderImpl(private val options: IonElementLoaderOptions | |
}.asAnyElement() | ||
} | ||
} | ||
|
||
private fun loadAllElementsIteratively(ionReader: IonReader, into: MutableList<Any>) { | ||
// Intentionally not using a "recycling" stack because we have mutable lists that we are going to wrap as | ||
// ImmutableLists and then forget about the reference to the mutable list. | ||
val openContainerStack = ArrayDeque<MutableList<Any>>() | ||
var elements: MutableList<Any> = into | ||
|
||
while (true) { | ||
val valueType = ionReader.next() | ||
|
||
// End of container or input | ||
if (valueType == null) { | ||
// We're at the top (relative to where we started) | ||
if (elements === into) { | ||
return | ||
} else { | ||
ionReader.stepOut() | ||
elements = openContainerStack.pop() | ||
continue | ||
} | ||
} | ||
|
||
// Read a value | ||
val annotations = ionReader.typeAnnotations!!.toImmutableListUnsafe() | ||
|
||
var metas = EMPTY_METAS | ||
if (options.includeLocationMeta) { | ||
val location = ionReader.currentLocation() | ||
if (location != null) metas = location.toMetaContainer() | ||
} | ||
|
||
if (ionReader.isNullValue) { | ||
elements.addContainerElement(ionReader, ionNull(valueType.toElementType(), annotations, metas).asAnyElement()) | ||
continue | ||
} else when (valueType) { | ||
IonType.BOOL -> elements.addContainerElement(ionReader, BoolElementImpl(ionReader.booleanValue(), annotations, metas)) | ||
IonType.INT -> { | ||
val intValue = when (ionReader.integerSize!!) { | ||
IntegerSize.BIG_INTEGER -> { | ||
val bigIntValue = ionReader.bigIntegerValue() | ||
if (bigIntValue !in RANGE_OF_LONG) | ||
BigIntIntElementImpl(bigIntValue, annotations, metas) | ||
else { | ||
LongIntElementImpl(ionReader.longValue(), annotations, metas) | ||
} | ||
} | ||
IntegerSize.LONG, | ||
IntegerSize.INT -> LongIntElementImpl(ionReader.longValue(), annotations, metas) | ||
} | ||
elements.addContainerElement(ionReader, intValue) | ||
} | ||
IonType.FLOAT -> elements.addContainerElement(ionReader, FloatElementImpl(ionReader.doubleValue(), annotations, metas)) | ||
IonType.DECIMAL -> elements.addContainerElement(ionReader, DecimalElementImpl(ionReader.decimalValue(), annotations, metas)) | ||
IonType.TIMESTAMP -> elements.addContainerElement(ionReader, TimestampElementImpl(ionReader.timestampValue(), annotations, metas)) | ||
IonType.STRING -> elements.addContainerElement(ionReader, StringElementImpl(ionReader.stringValue(), annotations, metas)) | ||
IonType.SYMBOL -> elements.addContainerElement(ionReader, SymbolElementImpl(ionReader.stringValue(), annotations, metas)) | ||
IonType.CLOB -> elements.addContainerElement(ionReader, ClobElementImpl(ionReader.newBytes(), annotations, metas)) | ||
IonType.BLOB -> elements.addContainerElement(ionReader, BlobElementImpl(ionReader.newBytes(), annotations, metas)) | ||
IonType.LIST -> { | ||
val listContent = ArrayList<AnyElement>() | ||
// `listContent` gets wrapped in an `ImmutableListWrapper` so that we can create a ListElementImpl | ||
// right away. Then, we add child elements to `ListContent`. Technically, this is a violation of the | ||
// contract for `ImmutableListAdapter`, but it is safe to do so here because no reads will occur | ||
// after we are done modifying the backing list. | ||
// Same thing applies for `sexpContent` and `structContent` in their respective branches. | ||
elements.addContainerElement(ionReader, ListElementImpl(ImmutableListAdapter(listContent), annotations, metas)) | ||
ionReader.stepIn() | ||
openContainerStack.push(elements) | ||
elements = listContent as MutableList<Any> | ||
} | ||
IonType.SEXP -> { | ||
val sexpContent = ArrayList<AnyElement>() | ||
elements.addContainerElement(ionReader, SexpElementImpl(ImmutableListAdapter(sexpContent), annotations, metas)) | ||
ionReader.stepIn() | ||
openContainerStack.push(elements) | ||
elements = sexpContent as MutableList<Any> | ||
} | ||
IonType.STRUCT -> { | ||
val structContent = ArrayList<StructField>() | ||
elements.addContainerElement( | ||
ionReader, | ||
StructElementImpl( | ||
ImmutableListAdapter(structContent), | ||
annotations, | ||
metas | ||
) | ||
) | ||
ionReader.stepIn() | ||
openContainerStack.push(elements) | ||
elements = structContent as MutableList<Any> | ||
} | ||
IonType.DATAGRAM -> error("IonElementLoaderImpl does not know what to do with IonType.DATAGRAM") | ||
IonType.NULL -> error("IonType.NULL branch should be unreachable") | ||
} | ||
} | ||
} | ||
|
||
private fun MutableList<Any>.addContainerElement(ionReader: IonReader, value: AnyElement) { | ||
val fieldName = ionReader.fieldName | ||
if (fieldName != null) { | ||
add(StructFieldImpl(fieldName, value)) | ||
} else { | ||
add(value) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,7 +31,7 @@ internal class StructElementImpl( | |
) : AnyElementBase(), StructElement { | ||
|
||
override val type: ElementType get() = ElementType.STRUCT | ||
override val size = allFields.size | ||
override val size: Int get() = allFields.size | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Both |
||
|
||
// Note that we are not using `by lazy` here because it requires 2 additional allocations and | ||
// has been demonstrated to significantly increase memory consumption! | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,12 +15,17 @@ | |
|
||
package com.amazon.ionelement | ||
|
||
import com.amazon.ion.Decimal | ||
import com.amazon.ion.system.IonReaderBuilder | ||
import com.amazon.ionelement.api.* | ||
import com.amazon.ionelement.util.INCLUDE_LOCATION_META | ||
import com.amazon.ionelement.util.ION | ||
import com.amazon.ionelement.util.IonElementLoaderTestCase | ||
import com.amazon.ionelement.util.convertToString | ||
import java.math.BigInteger | ||
import org.junit.jupiter.api.Assertions.assertEquals | ||
import org.junit.jupiter.api.Test | ||
import org.junit.jupiter.api.assertThrows | ||
import org.junit.jupiter.params.ParameterizedTest | ||
import org.junit.jupiter.params.provider.MethodSource | ||
|
||
|
@@ -45,6 +50,8 @@ class IonElementLoaderTests { | |
// Converting from IonValue to IonElement should result in an IonElement that is equivalent to the | ||
// parsed IonElement | ||
assertEquals(parsedIonValue.toIonElement(), parsedIonElement) | ||
|
||
assertEquals(tc.expectedElement, parsedIonElement) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ I was surprised to discover that there was previously no assertion that the parsed element was equal to the expected element. 😮 |
||
} | ||
|
||
companion object { | ||
|
@@ -57,15 +64,29 @@ class IonElementLoaderTests { | |
|
||
IonElementLoaderTestCase("1", ionInt(1)), | ||
|
||
IonElementLoaderTestCase("existence::42", ionInt(1).withAnnotations("existence")), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ A couple of test cases were wrong, but otherwise these changes are just adding test cases for every Ion type. |
||
IonElementLoaderTestCase("9223372036854775807", ionInt(Long.MAX_VALUE)), | ||
|
||
IonElementLoaderTestCase("\"some string\"", ionString("some string")), | ||
IonElementLoaderTestCase("9223372036854775808", ionInt(BigInteger.valueOf(Long.MAX_VALUE) + BigInteger.ONE)), | ||
|
||
IonElementLoaderTestCase("existence::42", ionInt(42).withAnnotations("existence")), | ||
|
||
IonElementLoaderTestCase("0.", ionDecimal(Decimal.ZERO)), | ||
|
||
IonElementLoaderTestCase("1e0", ionFloat(1.0)), | ||
|
||
IonElementLoaderTestCase("2019-10-30T04:23:59Z", ionTimestamp("2019-10-30T04:23:59Z")), | ||
|
||
IonElementLoaderTestCase("\"some string\"", ionString("some string")), | ||
|
||
IonElementLoaderTestCase("'some symbol'", ionSymbol("some symbol")), | ||
|
||
IonElementLoaderTestCase("{{\"some clob\"}}", ionClob("some clob".encodeToByteArray())), | ||
|
||
IonElementLoaderTestCase("{{ }}", ionBlob(byteArrayOf())), | ||
|
||
IonElementLoaderTestCase("[1, 2, 3]", ionListOf(ionInt(1), ionInt(2), ionInt(3))), | ||
|
||
IonElementLoaderTestCase("(1 2 3)", ionListOf(ionInt(1), ionInt(2), ionInt(3))), | ||
IonElementLoaderTestCase("(1 2 3)", ionSexpOf(ionInt(1), ionInt(2), ionInt(3))), | ||
|
||
IonElementLoaderTestCase( | ||
"{ foo: 1, bar: 2, bat: 3 }", | ||
|
@@ -74,7 +95,79 @@ class IonElementLoaderTests { | |
"bar" to ionInt(2), | ||
"bat" to ionInt(3) | ||
) | ||
) | ||
), | ||
|
||
// Nested container cases | ||
IonElementLoaderTestCase("((null.list))", ionSexpOf(ionSexpOf(ionNull(ElementType.LIST)))), | ||
IonElementLoaderTestCase("(1 (2 3))", ionSexpOf(ionInt(1), ionSexpOf(ionInt(2), ionInt(3)))), | ||
IonElementLoaderTestCase("{foo:[1]}", ionStructOf("foo" to ionListOf(ionInt(1)))), | ||
IonElementLoaderTestCase("[{foo:1}]", ionListOf(ionStructOf("foo" to ionInt(1)))), | ||
IonElementLoaderTestCase("{foo:{bar:1}}", ionStructOf("foo" to ionStructOf("bar" to ionInt(1)))), | ||
IonElementLoaderTestCase("{foo:[{}]}", ionStructOf("foo" to ionListOf(ionStructOf(emptyList())))), | ||
IonElementLoaderTestCase("[{}]", ionListOf(ionStructOf(emptyList()))), | ||
IonElementLoaderTestCase("[{}, {}]", ionListOf(ionStructOf(emptyList()), ionStructOf(emptyList()))), | ||
IonElementLoaderTestCase("[{foo:1, bar: 2}]", ionListOf(ionStructOf("foo" to ionInt(1), "bar" to ionInt(2)))), | ||
IonElementLoaderTestCase( | ||
"{foo:[{bar:({})}]}", | ||
ionStructOf("foo" to ionListOf(ionStructOf("bar" to ionSexpOf(ionStructOf(emptyList()))))) | ||
), | ||
) | ||
} | ||
|
||
@Test | ||
fun `regardless of depth, no StackOverflowError is thrown`() { | ||
// Throws StackOverflowError in [email protected] and prior versions when there's ~4k nested containers | ||
// Run for every container type to ensure that they all correctly fall back to the iterative impl. | ||
|
||
val listData = "[".repeat(999999) + "]".repeat(999999) | ||
loadAllElements(listData) | ||
|
||
val sexpData = "(".repeat(999999) + ")".repeat(999999) | ||
loadAllElements(sexpData) | ||
|
||
val structData = "{a:".repeat(999999) + "b" + "}".repeat(999999) | ||
loadAllElements(structData) | ||
} | ||
|
||
@ParameterizedTest | ||
@MethodSource("parametersForDemoTest") | ||
fun `deeply nested values should be loaded correctly`(tc: IonElementLoaderTestCase) { | ||
// Wrap everything in many layers of Ion lists so that we can be sure to trigger the iterative fallback. | ||
val nestingLevels = 500 | ||
val textIon = "[".repeat(nestingLevels) + tc.textIon + "]".repeat(nestingLevels) | ||
var expectedElement = tc.expectedElement | ||
repeat(nestingLevels) { expectedElement = ionListOf(expectedElement) } | ||
|
||
val parsedIonValue = ION.singleValue(textIon) | ||
val parsedIonElement = loadSingleElement(textIon, INCLUDE_LOCATION_META) | ||
|
||
// Text generated from both should match | ||
assertEquals(convertToString(parsedIonValue), parsedIonElement.toString()) | ||
|
||
// Converting from IonElement to IonValue results in an IonValue that is equivalent to the parsed IonValue | ||
assertEquals(parsedIonElement.toIonValue(ION), parsedIonValue) | ||
|
||
// Converting from IonValue to IonElement should result in an IonElement that is equivalent to the | ||
// parsed IonElement | ||
assertEquals(parsedIonValue.toIonElement(), parsedIonElement) | ||
|
||
assertEquals(expectedElement, parsedIonElement) | ||
} | ||
|
||
@Test | ||
fun `loadCurrentElement throws exception when not positioned on a value`() { | ||
val reader = IonReaderBuilder.standard().build("foo") | ||
// We do not advance to the first value in the reader. | ||
assertThrows<IllegalArgumentException> { loadCurrentElement(reader) } | ||
} | ||
|
||
@Test | ||
fun `loadSingleElement throws exception when no values in reader`() { | ||
assertThrows<IllegalArgumentException> { loadSingleElement("") } | ||
} | ||
|
||
@Test | ||
fun `loadSingleElement throws exception when more than one values in reader`() { | ||
assertThrows<IllegalArgumentException> { loadSingleElement("a b") } | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ This was a non-obvious edge case that I ran into while testing my changes, so I figured I'd add a note here.