From b182a41e86cbbb4909f8bcc9c9ba94f4741486a6 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 14 Nov 2023 14:25:06 +0100 Subject: [PATCH] Remove the deprecated OriginalType --- .../parquet/arrow/schema/List3Levels.java | 3 +- .../parquet/schema/LogicalTypeAnnotation.java | 176 ------------------ .../java/org/apache/parquet/schema/Type.java | 7 - .../java/org/apache/parquet/schema/Types.java | 23 +-- .../parquet/parser/TestParquetParser.java | 42 ++++- .../converter/ParquetMetadataConverter.java | 40 ++-- .../TestParquetMetadataConverter.java | 4 - pom.xml | 7 + 8 files changed, 82 insertions(+), 220 deletions(-) diff --git a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/List3Levels.java b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/List3Levels.java index cf21cb19b9..bc54cbed0a 100644 --- a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/List3Levels.java +++ b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/List3Levels.java @@ -21,6 +21,7 @@ import static org.apache.parquet.schema.Type.Repetition.REPEATED; import org.apache.parquet.schema.GroupType; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.Type; @@ -41,7 +42,7 @@ class List3Levels { * @param list the Parquet List */ public List3Levels(GroupType list) { - if (list.getOriginalType() != OriginalType.LIST || list.getFields().size() != 1) { + if (list.getLogicalTypeAnnotation() != LogicalTypeAnnotation.listType() || list.getFields().size() != 1) { throw new IllegalArgumentException("invalid list type: " + list); } this.list = list; diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index c4e50f2926..b39f82899b 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -145,16 +145,6 @@ protected LogicalTypeAnnotation fromString(List params) { protected abstract LogicalTypeAnnotation fromString(List params); } - /** - * Convert this logical type to old logical type representation in parquet-mr (if there's any). - * Those logical type implementations, which don't have a corresponding mapping should return null. - * - * API should be considered private - * - * @return the OriginalType representation of the new logical type, or null if there's none - */ - public abstract OriginalType toOriginalType(); - /** * Visits this logical type with the given visitor * @@ -303,16 +293,6 @@ public static class StringLogicalTypeAnnotation extends LogicalTypeAnnotation { private StringLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.UTF8; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -346,16 +326,6 @@ public static class MapLogicalTypeAnnotation extends LogicalTypeAnnotation { private MapLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.MAP; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -384,16 +354,6 @@ public static class ListLogicalTypeAnnotation extends LogicalTypeAnnotation { private ListLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.LIST; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -422,16 +382,6 @@ public static class EnumLogicalTypeAnnotation extends LogicalTypeAnnotation { private EnumLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.ENUM; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -478,16 +428,6 @@ public int getScale() { return scale; } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.DECIMAL; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -535,16 +475,6 @@ public static class DateLogicalTypeAnnotation extends LogicalTypeAnnotation { private DateLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.DATE; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -587,23 +517,6 @@ private TimeLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.unit = unit; } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - switch (unit) { - case MILLIS: - return OriginalType.TIME_MILLIS; - case MICROS: - return OriginalType.TIME_MICROS; - default: - return null; - } - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -670,23 +583,6 @@ private TimestampLogicalTypeAnnotation(boolean isAdjustedToUTC, TimeUnit unit) { this.unit = unit; } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - switch (unit) { - case MILLIS: - return OriginalType.TIMESTAMP_MILLIS; - case MICROS: - return OriginalType.TIMESTAMP_MICROS; - default: - return null; - } - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -760,27 +656,6 @@ private IntLogicalTypeAnnotation(int bitWidth, boolean isSigned) { this.isSigned = isSigned; } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - switch (bitWidth) { - case 8: - return isSigned ? OriginalType.INT_8 : OriginalType.UINT_8; - case 16: - return isSigned ? OriginalType.INT_16 : OriginalType.UINT_16; - case 32: - return isSigned ? OriginalType.INT_32 : OriginalType.UINT_32; - case 64: - return isSigned ? OriginalType.INT_64 : OriginalType.UINT_64; - default: - return null; - } - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -836,16 +711,6 @@ public static class JsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private JsonLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.JSON; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -879,16 +744,6 @@ public static class BsonLogicalTypeAnnotation extends LogicalTypeAnnotation { private BsonLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.BSON; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -923,17 +778,6 @@ public static class UUIDLogicalTypeAnnotation extends LogicalTypeAnnotation { private UUIDLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - // No OriginalType for UUID - return null; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -963,16 +807,6 @@ public static LogicalTypeAnnotation getInstance() { private IntervalLogicalTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.INTERVAL; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); @@ -1018,16 +852,6 @@ public static MapKeyValueTypeAnnotation getInstance() { private MapKeyValueTypeAnnotation() { } - /** - * API Should be considered private - * - * @return the original type - */ - @Override - public OriginalType toOriginalType() { - return OriginalType.MAP_KEY_VALUE; - } - @Override public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { return logicalTypeAnnotationVisitor.visit(this); diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java index 310227ac20..c91daeac66 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java @@ -227,13 +227,6 @@ public LogicalTypeAnnotation getLogicalTypeAnnotation() { return logicalTypeAnnotation; } - /** - * @return the original type (LIST, MAP, ...) - */ - public OriginalType getOriginalType() { - return logicalTypeAnnotation == null ? null : logicalTypeAnnotation.toOriginalType(); - } - /** * @return if this is a primitive type */ diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 9a978b5d31..c2f7fff032 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -329,10 +329,6 @@ public P named(String name) { "[BUG] Parent and return type are null: must override named"); } } - - protected OriginalType getOriginalType () { - return logicalTypeAnnotation == null ? null : logicalTypeAnnotation.toOriginalType(); - } } public abstract static class @@ -441,6 +437,9 @@ protected PrimitiveType build(String name) { } DecimalMetadata meta = decimalMetadata(); + if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { + this.logicalTypeAnnotation = LogicalTypeAnnotation.fromOriginalType(OriginalType.DECIMAL, meta); + } // validate type annotations and required metadata if (logicalTypeAnnotation != null) { @@ -580,11 +579,7 @@ private Optional checkInt64PrimitiveType(LogicalTypeAnnotation logicalT }).orElseThrow(() -> new IllegalStateException(logicalTypeAnnotation + " can not be applied to a primitive type")); } - if (newLogicalTypeSet) { - return new PrimitiveType(repetition, primitiveType, length, name, logicalTypeAnnotation, id, columnOrder); - } else { - return new PrimitiveType(repetition, primitiveType, length, name, getOriginalType(), meta, id, columnOrder); - } + return new PrimitiveType(repetition, primitiveType, length, name, logicalTypeAnnotation, id, columnOrder); } private static long maxPrecision(int numBytes) { @@ -596,7 +591,6 @@ private static long maxPrecision(int numBytes) { } protected DecimalMetadata decimalMetadata() { - DecimalMetadata meta = null; if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) { LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; if (newLogicalTypeSet) { @@ -617,9 +611,8 @@ protected DecimalMetadata decimalMetadata() { "Invalid DECIMAL scale: %s", this.scale); Preconditions.checkArgument(this.scale <= precision, "Invalid DECIMAL scale: cannot be greater than precision"); - meta = new DecimalMetadata(precision, scale); } - return meta; + return new DecimalMetadata(precision, scale); } } @@ -772,11 +765,7 @@ public THIS addFields(Type... types) { @Override protected GroupType build(String name) { - if (newLogicalTypeSet) { - return new GroupType(repetition, name, logicalTypeAnnotation, fields, id); - } else { - return new GroupType(repetition, name, getOriginalType(), fields, id); - } + return new GroupType(repetition, name, logicalTypeAnnotation, fields, id); } public MapBuilder map( diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java index fa200ab424..5ef66c4806 100644 --- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java +++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java @@ -36,6 +36,7 @@ import static org.apache.parquet.schema.OriginalType.*; import static org.apache.parquet.schema.Types.buildMessage; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.junit.Test; import org.apache.parquet.schema.GroupType; @@ -228,7 +229,7 @@ public void testLISTAnnotation() { } @Test - public void testDecimalFixedAnnotation() { + public void testDecimalFixedAnnotationOriginalType() { String message = "message DecimalMessage {\n" + " required FIXED_LEN_BYTE_ARRAY(4) aDecimal (DECIMAL(9,2));\n" + @@ -247,7 +248,26 @@ public void testDecimalFixedAnnotation() { } @Test - public void testDecimalBinaryAnnotation() { + public void testDecimalFixedAnnotation() { + String message = + "message DecimalMessage {\n" + + " required FIXED_LEN_BYTE_ARRAY(4) aDecimal (DECIMAL(9,2));\n" + + "}\n"; + + MessageType parsed = parseMessageType(message); + MessageType expected = buildMessage() + .required(FIXED_LEN_BYTE_ARRAY).length(4) + .as(LogicalTypeAnnotation.decimalType(2, 9)) + .named("aDecimal") + .named("DecimalMessage"); + + assertEquals(expected, parsed); + MessageType reparsed = parseMessageType(parsed.toString()); + assertEquals(expected, reparsed); + } + + @Test + public void testDecimalBinaryAnnotationOriginalType() { String message = "message DecimalMessage {\n" + " required binary aDecimal (DECIMAL(9,2));\n" + @@ -264,6 +284,24 @@ public void testDecimalBinaryAnnotation() { assertEquals(expected, reparsed); } + @Test + public void testDecimalBinaryAnnotation() { + String message = + "message DecimalMessage {\n" + + " required binary aDecimal (DECIMAL(9,2));\n" + + "}\n"; + + MessageType parsed = parseMessageType(message); + MessageType expected = buildMessage() + .required(BINARY).as(LogicalTypeAnnotation.decimalType(2, 9)) + .named("aDecimal") + .named("DecimalMessage"); + + assertEquals(expected, parsed); + MessageType reparsed = parseMessageType(parsed.toString()); + assertEquals(expected, reparsed); + } + @Test public void testTimeAnnotations() { String message = "message TimeMessage {" + diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 09b21538e5..e2e6c2ed56 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -380,7 +380,7 @@ public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnota case NANOS: return empty(); default: - throw new RuntimeException("Unknown converted type for " + timeLogicalType.toOriginalType()); + throw new RuntimeException("Unknown converted type for " + timeLogicalType); } } @@ -394,7 +394,7 @@ public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeA case NANOS: return empty(); default: - throw new RuntimeException("Unknown converted type for " + timestampLogicalType.toOriginalType()); + throw new RuntimeException("Unknown converted type for " + timestampLogicalType); } } @@ -411,7 +411,7 @@ public Optional visit(LogicalTypeAnnotation.IntLogicalTypeAnnotat case 64: return of(signed ? ConvertedType.INT_64 : ConvertedType.UINT_64); default: - throw new RuntimeException("Unknown original type " + intLogicalType.toOriginalType()); + throw new RuntimeException("Unknown original type " + intLogicalType); } } @@ -1026,6 +1026,20 @@ Type getType(PrimitiveTypeName type) { } } + private boolean getAdjustToUtc(SchemaElement schemaElement) { + if (schemaElement != null && schemaElement.logicalType != null) { + Object field = schemaElement.logicalType.getFieldValue(); + if (field instanceof TimeType) { + return ((TimeType)field).isAdjustedToUTC; + } + else if (field instanceof TimestampType) { + return ((TimestampType)field).isAdjustedToUTC; + } + } + + return true; + } + // Visible for testing LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement schemaElement) { switch (type) { @@ -1046,13 +1060,13 @@ LogicalTypeAnnotation getLogicalTypeAnnotation(ConvertedType type, SchemaElement case DATE: return LogicalTypeAnnotation.dateType(); case TIME_MILLIS: - return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return LogicalTypeAnnotation.timeType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MILLIS); case TIME_MICROS: - return LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return LogicalTypeAnnotation.timeType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MICROS); case TIMESTAMP_MILLIS: - return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS); + return LogicalTypeAnnotation.timestampType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MILLIS); case TIMESTAMP_MICROS: - return LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS); + return LogicalTypeAnnotation.timestampType(getAdjustToUtc(schemaElement), LogicalTypeAnnotation.TimeUnit.MICROS); case INTERVAL: return LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance(); case INT_8: @@ -1715,15 +1729,15 @@ private void buildChildren(Types.GroupBuilder builder, childBuilder.as(getLogicalTypeAnnotation(schemaElement.logicalType)); } if (schemaElement.isSetConverted_type()) { - OriginalType originalType = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement).toOriginalType(); - OriginalType newOriginalType = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ? - getLogicalTypeAnnotation(schemaElement.logicalType).toOriginalType() : null; - if (!originalType.equals(newOriginalType)) { - if (newOriginalType != null) { + LogicalTypeAnnotation typeAnnotation = getLogicalTypeAnnotation(schemaElement.converted_type, schemaElement); + LogicalTypeAnnotation newTypeAnnotation = (schemaElement.isSetLogicalType() && getLogicalTypeAnnotation(schemaElement.logicalType) != null) ? + getLogicalTypeAnnotation(schemaElement.logicalType) : null; + if (!typeAnnotation.equals(newTypeAnnotation)) { + if (newTypeAnnotation != null) { LOG.warn("Converted type and logical type metadata mismatch (convertedType: {}, logical type: {}). Using value in converted type.", schemaElement.converted_type, schemaElement.logicalType); } - childBuilder.as(originalType); + childBuilder.as(typeAnnotation); } } if (schemaElement.isSetField_id()) { diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index 3997808cfb..ac30999109 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -422,10 +422,6 @@ public void testEnumEquivalence() { for (Type type : Type.values()) { assertEquals(type, parquetMetadataConverter.getType(parquetMetadataConverter.getPrimitive(type))); } - for (OriginalType original : OriginalType.values()) { - assertEquals(original, parquetMetadataConverter.getLogicalTypeAnnotation( - parquetMetadataConverter.convertToConvertedType(LogicalTypeAnnotation.fromOriginalType(original, null)), null).toOriginalType()); - } for (ConvertedType converted : ConvertedType.values()) { assertEquals(converted, parquetMetadataConverter.convertToConvertedType(parquetMetadataConverter.getLogicalTypeAnnotation(converted, null))); } diff --git a/pom.xml b/pom.xml index bc3c654618..3e720f436b 100644 --- a/pom.xml +++ b/pom.xml @@ -553,6 +553,13 @@ org.apache.parquet.hadoop.thrift.TBaseWriteSupport#setThriftClass(org.apache.parquet.conf.ParquetConfiguration,java.lang.Class) org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.hadoop.fs.Path,boolean) org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.parquet.io.InputFile,boolean) + + org.apache.parquet.schema.GroupType + org.apache.parquet.schema.LogicalTypeAnnotation + org.apache.parquet.schema.MessageType + org.apache.parquet.schema.PrimitiveType + org.apache.parquet.schema.Type + org.apache.parquet.schema.Types