From 22bee87e54bbb8e24baa83c1487ffca49728dd13 Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Tue, 26 Nov 2024 13:04:09 -0800 Subject: [PATCH 1/2] Supports macro-aware transcoding of Ion 1.1 streams. --- pom.xml | 2 +- src/com/amazon/ion/benchmark/Format.java | 13 +- .../benchmark/IonMeasurableWriteTask_1_1.java | 69 +++++ .../amazon/ion/benchmark/IonUtilities.java | 103 ++++++-- .../RecordingMacroAwareIonWriter.java | 249 ++++++++++++++++++ tst/com/amazon/ion/benchmark/OptionsTest.java | 127 ++++++++- .../amazon/ion/benchmark/binaryAllTypes11.10n | Bin 175 -> 173 bytes .../ion/benchmark/binaryMacroInvocations.10n | Bin 0 -> 79 bytes .../ion/benchmark/textMacroInvocations.ion | 42 +++ 9 files changed, 572 insertions(+), 33 deletions(-) create mode 100644 src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java create mode 100644 src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java create mode 100644 tst/com/amazon/ion/benchmark/binaryMacroInvocations.10n create mode 100644 tst/com/amazon/ion/benchmark/textMacroInvocations.ion diff --git a/pom.xml b/pom.xml index 548cb7f..426f1ba 100644 --- a/pom.xml +++ b/pom.xml @@ -63,7 +63,7 @@ com.amazon.ion ion-java - [1.11.8-SNAPSHOT,] + [1.11.10-SNAPSHOT,] com.fasterxml.jackson.core diff --git a/src/com/amazon/ion/benchmark/Format.java b/src/com/amazon/ion/benchmark/Format.java index 252bcf6..92ba902 100644 --- a/src/com/amazon/ion/benchmark/Format.java +++ b/src/com/amazon/ion/benchmark/Format.java @@ -6,6 +6,7 @@ import java.io.IOException; import java.nio.file.Path; +import static com.amazon.ion.benchmark.IonUtilities.getMinorVersion; import static com.amazon.ion.benchmark.IonUtilities.isFormatHeaderPresent; /** @@ -71,7 +72,7 @@ MeasurableReadTask createReadTask(Path inputPath, ReadOptionsCombination options @Override MeasurableWriteTask createWriteTask(Path inputPath, WriteOptionsCombination options) throws IOException { - return new IonMeasurableWriteTask(inputPath, options); + return IonUtilities.createIonMeasurableWriteTask(inputPath, options); } @Override @@ -85,8 +86,8 @@ Path convert(Path input, Path output, OptionsCombinationBase options) throws IOE Format sourceFormat = classify(input); switch (sourceFormat) { case ION_TEXT: - if (options.limit == Integer.MAX_VALUE) { - // The input is already text and it is not being limited. + if (options.limit == Integer.MAX_VALUE && (getMinorVersion(ION_TEXT, input.toFile()) == options.ionMinorVersion)) { + // The input is already text in the requested minor version, and it is not being limited. return input; } IonUtilities.rewriteIonFile(ION_TEXT, input, output, options, IonUtilities::newTextWriterSupplier); @@ -124,7 +125,7 @@ MeasurableReadTask createReadTask(Path inputPath, ReadOptionsCombination options @Override MeasurableWriteTask createWriteTask(Path inputPath, WriteOptionsCombination options) throws IOException { - return new IonMeasurableWriteTask(inputPath, options); + return IonUtilities.createIonMeasurableWriteTask(inputPath, options); } @Override @@ -140,7 +141,7 @@ Path convert(Path input, Path output, OptionsCombinationBase options) throws IOE case ION_TEXT: case ION_BINARY: // Down-convert to JSON. - IonUtilities.rewriteIonFile(ION_BINARY, input, output, options, IonUtilities::newJsonWriterSupplier); + IonUtilities.rewriteIonFile(sourceFormat, input, output, options, IonUtilities::newJsonWriterSupplier); break; case JSON: if (options.limit == Integer.MAX_VALUE) { @@ -188,7 +189,7 @@ Path convert(Path input, Path output, OptionsCombinationBase options) throws IOE switch (sourceFormat) { case ION_BINARY: case ION_TEXT: - IonUtilities.rewriteIonFile(ION_BINARY, input, output, options, IonUtilities::newCborWriterSupplier); + IonUtilities.rewriteIonFile(sourceFormat, input, output, options, IonUtilities::newCborWriterSupplier); break; case JSON: JacksonUtilities.rewriteJsonToCbor(input, output, options); diff --git a/src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java b/src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java new file mode 100644 index 0000000..c310282 --- /dev/null +++ b/src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java @@ -0,0 +1,69 @@ +package com.amazon.ion.benchmark; + +import com.amazon.ion.MacroAwareIonReader; +import com.amazon.ion.MacroAwareIonWriter; +import com.amazon.ion.impl._Private_IonReaderBuilder; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.function.Consumer; + +/** + * A MeasurableWriteTask for writing data in the Ion 1.1+ format (either text or binary) using input data that is + * also in the Ion 1.1+ format. This ensures encoding directives and macro invocations are preserved, providing + * a more accurate measurement of the write performance of the input. In cases where either the input or the output + * is Ion 1.0, {@link IonMeasurableWriteTask} should be used instead, as encoding directives and macro invocations + * cannot be preserved in that case. + */ +public class IonMeasurableWriteTask_1_1 extends MeasurableWriteTask { + + private final IonUtilities.IonWriterSupplier writerBuilder; + + /** + * @param inputPath path to the data to re-write. + * @param options options to use when writing. + * @throws IOException if thrown when handling the options. + */ + IonMeasurableWriteTask_1_1(Path inputPath, WriteOptionsCombination options) throws IOException { + super(inputPath, options); + if (options.format == Format.ION_TEXT) { + writerBuilder = IonUtilities.newTextWriterSupplier(options); + } else if (options.format == Format.ION_BINARY) { + writerBuilder = IonUtilities.newBinaryWriterSupplier(options); + } else { + throw new IllegalStateException("IonFormatWriter is compatible only with ION_TEXT and ION_BINARY"); + } + } + + @Override + void generateWriteInstructionsDom(Consumer> instructionsSink) { + throw new UnsupportedOperationException("Write benchmarking of Ion 1.1 from the DOM is not yet supported."); + } + + @Override + void generateWriteInstructionsStreaming(Consumer> instructionsSink) throws IOException { + if (options.limit != Integer.MAX_VALUE || options.flushPeriod != null) { + throw new UnsupportedOperationException("Benchmarking Ion 1.1 write using --limit or --ion-flush-period is not yet supported."); + } + // TODO support buildMacroAware from InputStream to avoid having to buffer all bytes. + try ( + MacroAwareIonReader reader = ((_Private_IonReaderBuilder) IonUtilities.newReaderBuilderForInput(options)) + .buildMacroAware(Files.readAllBytes(inputFile.toPath())) + ) { + reader.transcodeTo(new RecordingMacroAwareIonWriter(instructionsSink)); + } + } + + @Override + MacroAwareIonWriter newWriter(OutputStream outputStream) throws IOException { + return (MacroAwareIonWriter) writerBuilder.get(outputStream); + } + + @Override + void closeWriter(MacroAwareIonWriter writer) throws IOException { + // Note: this closes the underlying OutputStream. + writer.close(); + } +} diff --git a/src/com/amazon/ion/benchmark/IonUtilities.java b/src/com/amazon/ion/benchmark/IonUtilities.java index 04d4a09..3ebe1d6 100644 --- a/src/com/amazon/ion/benchmark/IonUtilities.java +++ b/src/com/amazon/ion/benchmark/IonUtilities.java @@ -6,10 +6,13 @@ import com.amazon.ion.IonSystem; import com.amazon.ion.IonType; import com.amazon.ion.IonWriter; +import com.amazon.ion.MacroAwareIonReader; +import com.amazon.ion.MacroAwareIonWriter; import com.amazon.ion.OffsetSpan; import com.amazon.ion.SpanProvider; import com.amazon.ion.SymbolTable; import com.amazon.ion.impl._Private_IonConstants; +import com.amazon.ion.impl._Private_IonReaderBuilder; import com.amazon.ion.impl._Private_IonSystem; import com.amazon.ion.impl._Private_IonWriter; import com.amazon.ion.impl.bin.LengthPrefixStrategy; @@ -19,6 +22,7 @@ import com.amazon.ion.system.IonReaderBuilder; import com.amazon.ion.system.IonSystemBuilder; import com.amazon.ion.system.IonTextWriterBuilder; +import com.amazon.ion.system.IonTextWriterBuilder_1_1; import com.amazon.ion.system.SimpleCatalog; import java.io.BufferedInputStream; @@ -28,6 +32,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.channels.FileChannel; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.ArrayList; @@ -311,7 +316,12 @@ static IonWriterSupplier newCborWriterSupplier(OptionsCombinationBase options) { * @throws IOException if thrown when parsing shared symbol tables. */ static IonWriterSupplier newTextWriterSupplier(OptionsCombinationBase options) throws IOException { - return newTextWriterSupplier(options, IonTextWriterBuilder.standard()); + if (options.ionMinorVersion == 0) { + return newTextWriterSupplier(options, IonTextWriterBuilder.standard()); + } else if (options.ionMinorVersion == 1) { + return newTextWriterSupplier_1_1(options); + } + throw new IllegalStateException(); } /** @@ -325,7 +335,7 @@ static IonWriterSupplier newJsonWriterSupplier(OptionsCombinationBase options) t } /** - * Creates a new IonWriterSupplier for text IonWriters. + * Creates a new IonWriterSupplier for Ion 1.0 text or JSON IonWriters. * @param options the options to use when creating writers. * @param builder the builder to use to construct new writers. * @return a new instance. @@ -335,6 +345,18 @@ private static IonWriterSupplier newTextWriterSupplier(OptionsCombinationBase op return builder.withImports(parseImportsFromFile(options.importsForBenchmarkFile))::build; } + /** + * Creates a new IonWriterSupplier for Ion 1.1 text IonWriters. + * @param options the options to use when creating writers. + * @return a new instance. + * @throws IOException if thrown when parsing shared symbol tables. + */ + private static IonWriterSupplier newTextWriterSupplier_1_1(OptionsCombinationBase options) throws IOException { + IonTextWriterBuilder_1_1 builder = IonEncodingVersion.ION_1_1.textWriterBuilder(); + builder.withImports(parseImportsFromFile(options.importsForBenchmarkFile)); + return builder::build; + } + /** * Create a new IonCatalog populated with SharedSymbolTables read from the given file. * @param importsFile the file containing the shared symbol tables to import. @@ -458,6 +480,23 @@ private static void writeValuesWithOptions( } } + /** + * Rewrite the given Ion 1.1+ file to another Ion 1.1+ stream using the given options. + * @param input an ion 1.1+ file. + * @param options the options to use when re-writing. + * @param writer the writer of the new stream. + * @throws IOException if thrown when reading or writing. + */ + private static void rewriteIon11File(Path input, OptionsCombinationBase options, IonWriter writer) throws IOException{ + if (options.limit != Integer.MAX_VALUE) { + throw new UnsupportedOperationException("Macro-aware transcoding of Ion 1.1 with the --limit option not yet supported."); + } + // TODO add a method to MacroAwareIonReader to write one value at a time so that 'limit' can be used + try (MacroAwareIonReader macroAwareIonReader = ((_Private_IonReaderBuilder) newReaderBuilderForInput(options)).buildMacroAware(Files.readAllBytes(input))) { + macroAwareIonReader.transcodeTo((MacroAwareIonWriter) writer); + } + } + /** * Rewrite the given Ion file using the given options. * @param inputFormat the format of 'input'; must be ION_BINARY, ION_TEXT, or JSON. @@ -475,29 +514,33 @@ static void rewriteIonFile(Format inputFormat, Path input, Path output, OptionsC IonReader reader = null; TranscodeFunction transcodeFunction = STANDARD_TRANSCODE; try { - if ( - options.flushPeriod == null && - options.importsForInputFile == null && - options.importsForBenchmarkFile == null && - options.format == Format.ION_BINARY && - // Minor versions may add new kinds of system values, so it is not possible to maintain system value - // boundaries when downgrading to a previous minor version. - getMinorVersion(inputFormat, input.toFile()) <= options.ionMinorVersion - ) { - // Use system-level reader to preserve the same symbol tables from the input. - writer = writerSupplier.get(options.newOutputStream(outputFile)); - reader = ((_Private_IonSystem) ION_SYSTEM).newSystemReader(options.newInputStream(inputFile)); - if (options.ionMinorVersion > getMinorVersion(inputFormat, input.toFile())) { - // Because symbol IDs will be transferred during the system transcode, *and* this is a format - // upgrade, the symbol IDs need to be transformed to point to the same text in the new format. - transcodeFunction = TRANSCODE_1_0_TO_1_1; - } + int inputMinorVersion = getMinorVersion(inputFormat, input.toFile()); + writer = writerSupplier.get(options.newOutputStream(outputFile)); + if (inputMinorVersion > 0 && options.ionMinorVersion > 0) { + rewriteIon11File(input, options, writer); } else { - // Do not preserve the existing symbol table boundaries. - writer = writerSupplier.get(options.newOutputStream(outputFile)); - reader = newReaderBuilderForInput(options).build(options.newInputStream(inputFile)); + if ( + options.flushPeriod == null && + options.importsForInputFile == null && + options.importsForBenchmarkFile == null && + options.format == Format.ION_BINARY && + // Minor versions may add new kinds of system values, so it is not possible to maintain system value + // boundaries when downgrading to a previous minor version. + inputMinorVersion <= options.ionMinorVersion + ) { + // Use system-level reader to preserve the same symbol tables from the input. + reader = ((_Private_IonSystem) ION_SYSTEM).newSystemReader(options.newInputStream(inputFile)); + if (options.ionMinorVersion > inputMinorVersion) { + // Because symbol IDs will be transferred during the system transcode, *and* this is a format + // upgrade, the symbol IDs need to be transformed to point to the same text in the new format. + transcodeFunction = TRANSCODE_1_0_TO_1_1; + } + } else { + // Do not preserve the existing symbol table boundaries. + reader = newReaderBuilderForInput(options).build(options.newInputStream(inputFile)); + } + writeValuesWithOptions(reader, writer, options, transcodeFunction); } - writeValuesWithOptions(reader, writer, options, transcodeFunction); } finally { if (writer != null) { writer.close(); @@ -557,4 +600,18 @@ static IonSystem ionSystemForInput(OptionsCombinationBase options) throws IOExce } return IonSystemBuilder.standard().withCatalog(newCatalog(options.importsForInputFile)).build(); } + + /** + * Create a MeasurableWriteTask of the appropriate type for the given input and options. + * @param inputPath the input data. + * @param options the benchmark options. + * @return a new MeasurableWriteTask. + * @throws IOException if thrown when trying to classify the input file. + */ + static MeasurableWriteTask createIonMeasurableWriteTask(Path inputPath, WriteOptionsCombination options) throws IOException { + if (options.ionMinorVersion > 0 && getMinorVersion(Format.classify(inputPath), inputPath.toFile()) > 0) { + return new IonMeasurableWriteTask_1_1(inputPath, options); + } + return new IonMeasurableWriteTask(inputPath, options); + } } diff --git a/src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java b/src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java new file mode 100644 index 0000000..9a78993 --- /dev/null +++ b/src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java @@ -0,0 +1,249 @@ +package com.amazon.ion.benchmark; + +import com.amazon.ion.IonReader; +import com.amazon.ion.IonSystem; +import com.amazon.ion.IonType; +import com.amazon.ion.IonValue; +import com.amazon.ion.MacroAwareIonWriter; +import com.amazon.ion.SymbolTable; +import com.amazon.ion.SymbolToken; +import com.amazon.ion.Timestamp; +import com.amazon.ion.impl.macro.Macro; +import com.amazon.ion.impl.macro.MacroRef; +import com.amazon.ion.system.IonSystemBuilder; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + +/** + * Records MacroAwareIonWriter method calls as WriteInstructions that may be replayed later. + */ +class RecordingMacroAwareIonWriter implements MacroAwareIonWriter { + + private final IonSystem SYSTEM = IonSystemBuilder.standard().build(); + private final Consumer> instructionsSink; + + /** + * @param instructionsSink sink for recorded instructions. + */ + RecordingMacroAwareIonWriter(Consumer> instructionsSink) { + this.instructionsSink = instructionsSink; + } + + @Override + public void startEncodingSegmentWithIonVersionMarker() { + instructionsSink.accept(MacroAwareIonWriter::startEncodingSegmentWithIonVersionMarker); + } + + @Override + public void startEncodingSegmentWithEncodingDirective( + Map newMacros, + boolean isMacroTableAppend, + List newSymbols, + boolean isSymbolTableAppend, + boolean encodingDirectiveAlreadyWritten + ) { + // Note: the collections passed in may be mutated after this method returns. Therefore, we copy them so that + // when replayed they do not have different contents. + Map newMacrosCopy = new LinkedHashMap<>(newMacros); + List listCopy = new ArrayList<>(newSymbols); + instructionsSink.accept( + w -> w.startEncodingSegmentWithEncodingDirective(newMacrosCopy, isMacroTableAppend, listCopy, isSymbolTableAppend, encodingDirectiveAlreadyWritten) + ); + } + + @Override + public void startMacro(Macro macro) { + instructionsSink.accept(w -> w.startMacro(macro)); + } + + @Override + public void startMacro(String s, Macro macro) { + instructionsSink.accept(w -> w.startMacro(s, macro)); + } + + @Override + public void endMacro() { + instructionsSink.accept(MacroAwareIonWriter::endMacro); + } + + @Override + public void startExpressionGroup() { + instructionsSink.accept(MacroAwareIonWriter::startExpressionGroup); + } + + @Override + public void endExpressionGroup() { + instructionsSink.accept(MacroAwareIonWriter::endExpressionGroup); + } + + @Override + public SymbolTable getSymbolTable() { + throw new UnsupportedOperationException("getSymbolTable() should not be called during benchmarking."); + } + + @Override + public void flush() { + instructionsSink.accept(MacroAwareIonWriter::flush); + } + + @Override + public void finish() { + instructionsSink.accept(MacroAwareIonWriter::finish); + } + + @Override + public void close() { + instructionsSink.accept(MacroAwareIonWriter::close); + } + + @Override + public void setFieldName(String name) { + instructionsSink.accept(w -> w.setFieldName(name)); + } + + @Override + public void setFieldNameSymbol(SymbolToken name) { + instructionsSink.accept(w -> w.setFieldNameSymbol(name)); + } + + @Override + public void setTypeAnnotations(String... annotations) { + instructionsSink.accept(w -> w.setTypeAnnotations(annotations)); + } + + @Override + public void setTypeAnnotationSymbols(SymbolToken... annotations) { + instructionsSink.accept(w -> w.setTypeAnnotationSymbols(annotations)); + } + + @Override + public void addTypeAnnotation(String annotation) { + instructionsSink.accept(w -> w.addTypeAnnotation(annotation)); + } + + @Override + public void stepIn(IonType containerType) { + instructionsSink.accept(w-> w.stepIn(containerType)); + } + + @Override + public void stepOut() { + instructionsSink.accept(MacroAwareIonWriter::stepOut); + } + + @Override + public boolean isInStruct() { + throw new UnsupportedOperationException("isInStruct() should not be called during benchmarking."); + } + + @Override + public void writeValue(IonValue value) { + throw new UnsupportedOperationException("writeValue(IonValue) should not be called during benchmarking."); + } + + @Override + public void writeValue(IonReader reader) { + // Note: w -> w.writeValue(reader) is not correct because it does not capture the value at which the reader + // is currently positioned. For now, we use IonValue to achieve this, though it would be more efficient to + // capture a primitive and store an instruction that writes that primitive to the writer directly. + IonValue value = SYSTEM.newValue(reader); + instructionsSink.accept(value::writeTo); + } + + @Override + public void writeValues(IonReader reader) { + throw new UnsupportedOperationException("writeValues() should not be called during benchmarking."); + } + + @Override + public void writeNull() { + instructionsSink.accept(MacroAwareIonWriter::writeNull); + } + + @Override + public void writeNull(IonType type) { + instructionsSink.accept(w -> w.writeNull(type)); + } + + @Override + public void writeBool(boolean value) { + instructionsSink.accept(w -> w.writeBool(value)); + } + + @Override + public void writeInt(long value) { + instructionsSink.accept(w -> w.writeInt(value)); + } + + @Override + public void writeInt(BigInteger value) { + instructionsSink.accept(w -> w.writeInt(value)); + } + + @Override + public void writeFloat(double value) { + instructionsSink.accept(w -> w.writeFloat(value)); + } + + @Override + public void writeDecimal(BigDecimal value) { + instructionsSink.accept(w -> w.writeDecimal(value)); + } + + @Override + public void writeTimestamp(Timestamp value) { + instructionsSink.accept(w -> w.writeTimestamp(value)); + } + + @Override + public void writeTimestampUTC(Date value) { + instructionsSink.accept(w -> w.writeTimestampUTC(value)); + } + + @Override + public void writeSymbol(String content) { + instructionsSink.accept(w -> w.writeSymbol(content)); + } + + @Override + public void writeSymbolToken(SymbolToken content) { + instructionsSink.accept(w -> w.writeSymbolToken(content)); + } + + @Override + public void writeString(String value) { + instructionsSink.accept(w -> w.writeString(value)); + } + + @Override + public void writeClob(byte[] value) { + instructionsSink.accept(w -> w.writeClob(value)); + } + + @Override + public void writeClob(byte[] value, int start, int len) { + instructionsSink.accept(w -> w.writeClob(value, start, len)); + } + + @Override + public void writeBlob(byte[] value) { + instructionsSink.accept(w -> w.writeBlob(value)); + } + + @Override + public void writeBlob(byte[] value, int start, int len) { + instructionsSink.accept(w -> w.writeBlob(value, start, len)); + } + + @Override + public T asFacet(Class facetType) { + throw new UnsupportedOperationException("asFacet() should not be called during benchmarking."); + } +} diff --git a/tst/com/amazon/ion/benchmark/OptionsTest.java b/tst/com/amazon/ion/benchmark/OptionsTest.java index c0e3747..b635609 100644 --- a/tst/com/amazon/ion/benchmark/OptionsTest.java +++ b/tst/com/amazon/ion/benchmark/OptionsTest.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -436,9 +437,11 @@ private static void assertImportsEqual(String expectedImportsFile, byte[] bytes) * @param expectedFormat the format of the data that is expected to be tested. * @param isConversionRequired false if the task is expected to be able to read the input file without conversion * or copying; otherwise, false. + * @return the data that was read by the task. This will be different from the data in the input file if the options + * require conversion. * @throws Exception if an unexpected error occurs. */ - private static void assertReadTaskExecutesCorrectly( + private static byte[] assertReadTaskExecutesCorrectly( String inputFileName, ReadOptionsCombination optionsCombination, Format expectedFormat, @@ -484,6 +487,7 @@ private static void assertReadTaskExecutesCorrectly( } // Verify that the original file was not deleted. assertTrue(inputPath.toFile().exists()); + return streamBytes; } /** @@ -495,9 +499,10 @@ private static void assertReadTaskExecutesCorrectly( * @param optionsCombination a combination of write command options. * @param expectedOutputFormat the expected format of the data to be written. * @param expectedIoType the expected IO type. + * @return the data that was written by the task. * @throws Exception if an unexpected error occurs. */ - private static void assertWriteTaskExecutesCorrectly( + private static byte[] assertWriteTaskExecutesCorrectly( String inputFileName, WriteOptionsCombination optionsCombination, Format expectedOutputFormat, @@ -557,6 +562,7 @@ private static void assertWriteTaskExecutesCorrectly( } assertNull(task.currentFile); assertNull(task.currentBuffer); + return outputBytes; } /** @@ -2224,7 +2230,7 @@ private void convertAndVerifySymbolTableBoundaries(int outputMinorVersion) throw reader.next(); } SymbolTable systemSymbolTable = reader.getSymbolTable(); - int firstLocalSid = systemSymbolTable.getMaxId() + 1; + int firstLocalSid = outputMinorVersion == 0 ? systemSymbolTable.getMaxId() + 1 : 1; assertEquals(IonType.STRUCT, reader.getType()); assertEquals("$ion_symbol_table", reader.getTypeAnnotations()[0]); assertEquals(IonType.SYMBOL, reader.next()); @@ -2435,4 +2441,119 @@ public void writeIonWithInlineSymbolsAndDelimitedContainers() throws Exception { writeIonWithInlineSymbolsAndDelimitedContainers("binaryAllTypes.10n"); writeIonWithInlineSymbolsAndDelimitedContainers("binaryAllTypes11.10n"); } + + /** + * Counts the number of times the given substring occurs in the given string. + * @param string the string. + * @param substring the substring. + * @return the number of occurrences. + */ + private static int countOccurrencesOfSubstring(String string, String substring) { + int lastMatchIndex = 0; + int count = 0; + while (lastMatchIndex >= 0) { + lastMatchIndex = string.indexOf(substring, lastMatchIndex); + if (lastMatchIndex >= 0) { + lastMatchIndex += substring.length(); + count++; + } + } + return count; + } + + @Test + public void writeIon11WithMacros() throws Exception { + WriteOptionsCombination optionsCombination = parseSingleOptionsCombination( + "write", + "--format", + "ion_text", + "--ion-minor-version", + "1", + "--io-type", + "buffer", + "binaryMacroInvocations.10n" + ); + byte[] data = assertWriteTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + IoType.BUFFER + ); + String ion11Text = new String(data, StandardCharsets.UTF_8); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "add_symbols")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "add_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_symbols")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "(:Pi)")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "(:foo)")); + } + + @Test + public void writeIon11WithMacrosToIon10() throws Exception { + WriteOptionsCombination optionsCombination = parseSingleOptionsCombination( + "write", + "--format", + "ion_text", + "--ion-minor-version", + "0", // This forces a conversion that cannot preserve the encoding directives or macro invocations. + "--io-type", + "buffer", + "binaryMacroInvocations.10n" + ); + assertWriteTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + IoType.BUFFER + ); + } + + @Test + public void readIon11WithMacros() throws Exception { + ReadOptionsCombination optionsCombination = parseSingleOptionsCombination( + "read", + "--format", + "ion_text", + "--ion-minor-version", + "1", + "--io-type", + "buffer", + "binaryMacroInvocations.10n" + ); + byte[] data = assertReadTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + true + ); + String ion11Text = new String(data, StandardCharsets.UTF_8); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "add_symbols")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "add_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_symbols")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "(:Pi)")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "(:foo)")); + } + + @Test + public void readIon10ConvertedFromIon11WithMacros() throws Exception { + ReadOptionsCombination optionsCombination = parseSingleOptionsCombination( + "read", + "--format", + "ion_text", + "--ion-minor-version", + "0", // This forces a conversion that cannot preserve the encoding directives or macro invocations. + "--io-type", + "buffer", + "binaryMacroInvocations.10n" + ); + assertReadTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + true + ); + } + + // TODO test writing Ion 1.1 binary from Ion 1.1 text input (once supported by IonJava). } diff --git a/tst/com/amazon/ion/benchmark/binaryAllTypes11.10n b/tst/com/amazon/ion/benchmark/binaryAllTypes11.10n index 0ce71bc926f4f4b589c771552055c835a941a546..01b750465f20233cc2cecc10fa6066b4ef9c322e 100644 GIT binary patch delta 85 zcmV-b0IL750j&WD-~j>Z?+XF}kqa9g@apR80P6wk0_y|o1nURu2>qd(#8q1Hi@91F~Uz-vh?s1mOkY1_$L25wfwc8f_(@ delta 110 zcmV-!0FnQ#0j~i<-~j>ZYZ+PnJ z>j3Kk>jLWo>jdiu>j>)x>jmox>k8`&VFgg#Z8m diff --git a/tst/com/amazon/ion/benchmark/binaryMacroInvocations.10n b/tst/com/amazon/ion/benchmark/binaryMacroInvocations.10n new file mode 100644 index 0000000000000000000000000000000000000000..f563c296007a41d63c791d212624dfbc7af7cffc GIT binary patch literal 79 zcmaFB$oT3#4-@00fXol?`4~UF<9+D#=|jnP{nIQT9y&3+=LSnnPRq}SNi%)=FgYo) U=mX Date: Tue, 3 Dec 2024 14:29:10 -0800 Subject: [PATCH 2/2] Adds support for --limit and --ion-flush-period for Ion 1.1 streams. --- .../benchmark/IonMeasurableWriteTask_1_1.java | 14 +-- .../amazon/ion/benchmark/IonUtilities.java | 28 +++-- .../RecordingMacroAwareIonWriter.java | 7 ++ tst/com/amazon/ion/benchmark/OptionsTest.java | 116 ++++++++++++++++++ 4 files changed, 143 insertions(+), 22 deletions(-) diff --git a/src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java b/src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java index c310282..64d6439 100644 --- a/src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java +++ b/src/com/amazon/ion/benchmark/IonMeasurableWriteTask_1_1.java @@ -1,12 +1,9 @@ package com.amazon.ion.benchmark; -import com.amazon.ion.MacroAwareIonReader; import com.amazon.ion.MacroAwareIonWriter; -import com.amazon.ion.impl._Private_IonReaderBuilder; import java.io.IOException; import java.io.OutputStream; -import java.nio.file.Files; import java.nio.file.Path; import java.util.function.Consumer; @@ -44,16 +41,7 @@ void generateWriteInstructionsDom(Consumer @Override void generateWriteInstructionsStreaming(Consumer> instructionsSink) throws IOException { - if (options.limit != Integer.MAX_VALUE || options.flushPeriod != null) { - throw new UnsupportedOperationException("Benchmarking Ion 1.1 write using --limit or --ion-flush-period is not yet supported."); - } - // TODO support buildMacroAware from InputStream to avoid having to buffer all bytes. - try ( - MacroAwareIonReader reader = ((_Private_IonReaderBuilder) IonUtilities.newReaderBuilderForInput(options)) - .buildMacroAware(Files.readAllBytes(inputFile.toPath())) - ) { - reader.transcodeTo(new RecordingMacroAwareIonWriter(instructionsSink)); - } + IonUtilities.rewriteIon11File(inputFile, options, new RecordingMacroAwareIonWriter(instructionsSink)); } @Override diff --git a/src/com/amazon/ion/benchmark/IonUtilities.java b/src/com/amazon/ion/benchmark/IonUtilities.java index 3ebe1d6..abc7e88 100644 --- a/src/com/amazon/ion/benchmark/IonUtilities.java +++ b/src/com/amazon/ion/benchmark/IonUtilities.java @@ -482,18 +482,28 @@ private static void writeValuesWithOptions( /** * Rewrite the given Ion 1.1+ file to another Ion 1.1+ stream using the given options. - * @param input an ion 1.1+ file. + * @param inputFile an ion 1.1+ file. * @param options the options to use when re-writing. * @param writer the writer of the new stream. * @throws IOException if thrown when reading or writing. */ - private static void rewriteIon11File(Path input, OptionsCombinationBase options, IonWriter writer) throws IOException{ - if (options.limit != Integer.MAX_VALUE) { - throw new UnsupportedOperationException("Macro-aware transcoding of Ion 1.1 with the --limit option not yet supported."); - } - // TODO add a method to MacroAwareIonReader to write one value at a time so that 'limit' can be used - try (MacroAwareIonReader macroAwareIonReader = ((_Private_IonReaderBuilder) newReaderBuilderForInput(options)).buildMacroAware(Files.readAllBytes(input))) { - macroAwareIonReader.transcodeTo((MacroAwareIonWriter) writer); + static void rewriteIon11File(File inputFile, OptionsCombinationBase options, IonWriter writer) throws IOException { + try ( + MacroAwareIonReader macroAwareIonReader = ((_Private_IonReaderBuilder) newReaderBuilderForInput(options)) + .buildMacroAware(options.newInputStream(inputFile)) + ) { + macroAwareIonReader.prepareTranscodeTo((MacroAwareIonWriter) writer); + int i = 0; + boolean isUnlimited = options.limit == Integer.MAX_VALUE; + while (isUnlimited || i < options.limit) { + if (!macroAwareIonReader.transcodeNext()) { + break; + } + if (options.flushPeriod != null && i % options.flushPeriod == 0) { + writer.flush(); + } + i++; + } } } @@ -517,7 +527,7 @@ static void rewriteIonFile(Format inputFormat, Path input, Path output, OptionsC int inputMinorVersion = getMinorVersion(inputFormat, input.toFile()); writer = writerSupplier.get(options.newOutputStream(outputFile)); if (inputMinorVersion > 0 && options.ionMinorVersion > 0) { - rewriteIon11File(input, options, writer); + rewriteIon11File(inputFile, options, writer); } else { if ( options.flushPeriod == null && diff --git a/src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java b/src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java index 9a78993..0f85d80 100644 --- a/src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java +++ b/src/com/amazon/ion/benchmark/RecordingMacroAwareIonWriter.java @@ -153,6 +153,13 @@ public void writeValue(IonReader reader) { // Note: w -> w.writeValue(reader) is not correct because it does not capture the value at which the reader // is currently positioned. For now, we use IonValue to achieve this, though it would be more efficient to // capture a primitive and store an instruction that writes that primitive to the writer directly. + if (reader.isInStruct()) { + // The field name must be captured and written manually because the IonValue below is not considered + // a child of a container. + SymbolToken fieldName = reader.getFieldNameSymbol(); + instructionsSink.accept(w -> w.setFieldNameSymbol(fieldName)); + } + // Note: the following includes any annotations on the value. IonValue value = SYSTEM.newValue(reader); instructionsSink.accept(value::writeTo); } diff --git a/tst/com/amazon/ion/benchmark/OptionsTest.java b/tst/com/amazon/ion/benchmark/OptionsTest.java index b635609..ba8d0c6 100644 --- a/tst/com/amazon/ion/benchmark/OptionsTest.java +++ b/tst/com/amazon/ion/benchmark/OptionsTest.java @@ -2488,6 +2488,64 @@ public void writeIon11WithMacros() throws Exception { assertEquals(2, countOccurrencesOfSubstring(ion11Text, "(:foo)")); } + @Test + public void writeIon11WithMacrosAndLimit() throws Exception { + WriteOptionsCombination optionsCombination = parseSingleOptionsCombination( + "write", + "--format", + "ion_text", + "--ion-minor-version", + "1", + "--io-type", + "file", + "--limit", + "2", + "binaryMacroInvocations.10n" + ); + byte[] data = assertWriteTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + IoType.FILE + ); + String ion11Text = new String(data, StandardCharsets.UTF_8); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "add_symbols")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "add_macros")); + assertEquals(0, countOccurrencesOfSubstring(ion11Text, "set_symbols")); + assertEquals(0, countOccurrencesOfSubstring(ion11Text, "set_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "(:Pi)")); + assertEquals(0, countOccurrencesOfSubstring(ion11Text, "(:foo)")); + } + + @Test + public void writeIon11WithMacrosAndFlushPeriod() throws Exception { + WriteOptionsCombination optionsCombination = parseSingleOptionsCombination( + "write", + "--format", + "ion_text", + "--ion-minor-version", + "1", + "--io-type", + "file", + "--ion-flush-period", + "2", + "binaryMacroInvocations.10n" + ); + byte[] data = assertWriteTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + IoType.FILE + ); + String ion11Text = new String(data, StandardCharsets.UTF_8); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "add_symbols")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "add_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_symbols")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "(:Pi)")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "(:foo)")); + } + @Test public void writeIon11WithMacrosToIon10() throws Exception { WriteOptionsCombination optionsCombination = parseSingleOptionsCombination( @@ -2535,6 +2593,64 @@ public void readIon11WithMacros() throws Exception { assertEquals(2, countOccurrencesOfSubstring(ion11Text, "(:foo)")); } + @Test + public void readIon11WithMacrosAndLimit() throws Exception { + ReadOptionsCombination optionsCombination = parseSingleOptionsCombination( + "read", + "--format", + "ion_text", + "--ion-minor-version", + "1", + "--io-type", + "file", + "--limit", + "3", + "binaryMacroInvocations.10n" + ); + byte[] data = assertReadTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + true + ); + String ion11Text = new String(data, StandardCharsets.UTF_8); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "add_symbols")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "add_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_symbols")); + assertEquals(0, countOccurrencesOfSubstring(ion11Text, "set_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "(:Pi)")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "(:foo)")); + } + + @Test + public void readIon11WithMacrosAndFlushPeriod() throws Exception { + ReadOptionsCombination optionsCombination = parseSingleOptionsCombination( + "read", + "--format", + "ion_text", + "--ion-minor-version", + "1", + "--io-type", + "file", + "--ion-flush-period", + "2", + "binaryMacroInvocations.10n" + ); + byte[] data = assertReadTaskExecutesCorrectly( + "binaryMacroInvocations.10n", + optionsCombination, + Format.ION_TEXT, + true + ); + String ion11Text = new String(data, StandardCharsets.UTF_8); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "add_symbols")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "add_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_symbols")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "set_macros")); + assertEquals(1, countOccurrencesOfSubstring(ion11Text, "(:Pi)")); + assertEquals(2, countOccurrencesOfSubstring(ion11Text, "(:foo)")); + } + @Test public void readIon10ConvertedFromIon11WithMacros() throws Exception { ReadOptionsCombination optionsCombination = parseSingleOptionsCombination(