Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Add UnknownType #12012

Merged
merged 7 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion api/src/main/java/org/apache/iceberg/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ public class Schema implements Serializable {

@VisibleForTesting
static final Map<Type.TypeID, Integer> MIN_FORMAT_VERSIONS =
ImmutableMap.of(Type.TypeID.TIMESTAMP_NANO, 3, Type.TypeID.VARIANT, 3);
ImmutableMap.of(
Type.TypeID.TIMESTAMP_NANO, 3,
Type.TypeID.VARIANT, 3,
Type.TypeID.UNKNOWN, 3);

private final StructType struct;
private final int schemaId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,8 @@ private static String sanitize(Type type, Object value, long now, int today) {
case FIXED:
case BINARY:
case VARIANT:
// for boolean, uuid, decimal, fixed, variant, and binary, match the string result
case UNKNOWN:
// for boolean, uuid, decimal, fixed, variant, unknown, and binary, match the string result
return sanitizeSimpleString(value.toString());
Fokko marked this conversation as resolved.
Show resolved Hide resolved
}
throw new UnsupportedOperationException(
Expand Down
3 changes: 2 additions & 1 deletion api/src/main/java/org/apache/iceberg/types/Type.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ enum TypeID {
STRUCT(StructLike.class),
LIST(List.class),
MAP(Map.class),
VARIANT(Object.class);
VARIANT(Object.class),
UNKNOWN(Object.class);

private final Class<?> javaClass;

Expand Down
3 changes: 3 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/TypeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,9 @@ private static int estimateSize(Type type) {
case BINARY:
case VARIANT:
return 80;
case UNKNOWN:
// Consider Unknown as null
return 0;
case DECIMAL:
// 12 (header) + (12 + 12 + 4) (BigInteger) + 4 (scale) = 44 bytes
return 44;
Expand Down
19 changes: 19 additions & 0 deletions api/src/main/java/org/apache/iceberg/types/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ private Types() {}
.put(StringType.get().toString(), StringType.get())
.put(UUIDType.get().toString(), UUIDType.get())
.put(BinaryType.get().toString(), BinaryType.get())
.put(UnknownType.get().toString(), UnknownType.get())
.buildOrThrow();

private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]");
Expand Down Expand Up @@ -447,6 +448,24 @@ public int hashCode() {
}
}

public static class UnknownType extends PrimitiveType {
private static final UnknownType INSTANCE = new UnknownType();

public static UnknownType get() {
return INSTANCE;
}

@Override
public TypeID typeId() {
return TypeID.UNKNOWN;
}

@Override
public String toString() {
return "unknown";
}
}

public static class DecimalType extends PrimitiveType {
public static DecimalType of(int precision, int scale) {
return new DecimalType(precision, scale);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ public class TestPartitionSpecValidation {
NestedField.required(4, "d", Types.TimestampType.withZone()),
NestedField.required(5, "another_d", Types.TimestampType.withZone()),
NestedField.required(6, "s", Types.StringType.get()),
NestedField.required(7, "v", Types.VariantType.get()));
NestedField.required(7, "v", Types.VariantType.get()),
NestedField.required(8, "u", Types.UnknownType.get()));

@Test
public void testMultipleTimestampPartitions() {
Expand Down Expand Up @@ -325,4 +326,15 @@ public void testVariantUnsupported() {
.isInstanceOf(ValidationException.class)
.hasMessage("Cannot partition by non-primitive source field: variant");
}

@Test
public void testUnknownUnsupported() {
assertThatThrownBy(
() ->
PartitionSpec.builderFor(SCHEMA)
.add(8, 1005, "unknown_partition1", Transforms.bucket(5))
.build())
.isInstanceOf(ValidationException.class)
.hasMessage("Invalid source type unknown for transform: bucket[5]");
}
}
3 changes: 2 additions & 1 deletion api/src/test/java/org/apache/iceberg/TestSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ public class TestSchema {
ImmutableList.of(
Types.TimestampNanoType.withoutZone(),
Types.TimestampNanoType.withZone(),
Types.VariantType.get());
Types.VariantType.get(),
Types.UnknownType.get());

private static final Schema INITIAL_DEFAULT_SCHEMA =
new Schema(
Expand Down
14 changes: 14 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,20 @@ public void testVariantUnsupported() {
assertThat(bucket.canTransform(Types.VariantType.get())).isFalse();
}

@Test
public void testUnknownUnsupported() {
HonahX marked this conversation as resolved.
Show resolved Hide resolved
assertThatThrownBy(() -> Transforms.bucket(Types.UnknownType.get(), 3))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Cannot bucket by type: unknown");

Transform<Object, Integer> bucket = Transforms.bucket(3);
assertThatThrownBy(() -> bucket.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Cannot bucket by type: unknown");

assertThat(bucket.canTransform(Types.UnknownType.get())).isFalse();
}

private byte[] randomBytes(int length) {
byte[] bytes = new byte[length];
testRandom.nextBytes(bytes);
Expand Down
43 changes: 43 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestDates.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.transforms;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.types.Type;
Expand Down Expand Up @@ -267,4 +268,46 @@ public void testDatesReturnType() {
Type dayResultType = day.getResultType(type);
assertThat(dayResultType).isEqualTo(Types.DateType.get());
}

@Test
public void testUnknownUnsupportedYear() {
assertThatThrownBy(() -> Transforms.year(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> year = Transforms.year();
assertThatThrownBy(() -> year.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(year.canTransform(Types.UnknownType.get())).isFalse();
}

@Test
public void testUnknownUnsupportedMonth() {
assertThatThrownBy(() -> Transforms.month(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> month = Transforms.month();
assertThatThrownBy(() -> month.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(month.canTransform(Types.UnknownType.get())).isFalse();
}

@Test
public void testUnknownUnsupportedDay() {
assertThatThrownBy(() -> Transforms.day(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> day = Transforms.day();
assertThatThrownBy(() -> day.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(day.canTransform(Types.UnknownType.get())).isFalse();
}
}
10 changes: 10 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,16 @@ public void testBigDecimalToHumanString() {
.isEqualTo(decimalString);
}

@Test
public void testUnknownToHumanString() {
Types.UnknownType unknownType = Types.UnknownType.get();
Transform<Object, Object> identity = Transforms.identity();

assertThat(identity.toHumanString(unknownType, null))
.as("Should produce \"null\" for null")
.isEqualTo("null");
}

@Test
public void testVariantUnsupported() {
assertThatThrownBy(() -> Transforms.identity().bind(Types.VariantType.get()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.transforms;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.types.Type;
Expand Down Expand Up @@ -633,4 +634,18 @@ public void testTimestampNanosReturnType() {
Type hourResultType = hour.getResultType(type);
assertThat(hourResultType).isEqualTo(Types.IntegerType.get());
}

@Test
public void testUnknownUnsupported() {
assertThatThrownBy(() -> Transforms.hour(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

Transform<Object, Integer> hour = Transforms.hour();
assertThatThrownBy(() -> hour.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Unsupported type: unknown");

assertThat(hour.canTransform(Types.UnknownType.get())).isFalse();
}
}
14 changes: 14 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestTruncate.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,18 @@ public void testVerifiedIllegalWidth() {
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Invalid truncate width: 0 (must be > 0)");
}

@Test
public void testUnknownUnsupported() {
assertThatThrownBy(() -> Transforms.truncate(Types.UnknownType.get(), 22))
.isInstanceOf(UnsupportedOperationException.class)
.hasMessage("Cannot truncate type: unknown");

Transform<Object, Object> truncate = Transforms.truncate(22);
assertThatThrownBy(() -> truncate.bind(Types.UnknownType.get()))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Cannot bind to unsupported type: unknown");

assertThat(truncate.canTransform(Types.UnknownType.get())).isFalse();
}
}
37 changes: 37 additions & 0 deletions api/src/test/java/org/apache/iceberg/transforms/TestVoid.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.transforms;

import static org.assertj.core.api.Assertions.assertThat;

import org.apache.iceberg.types.Types;
import org.junit.jupiter.api.Test;

public class TestVoid {

@Test
public void testUnknownToHumanString() {
Types.UnknownType unknownType = Types.UnknownType.get();
Transform<Object, Void> identity = Transforms.alwaysNull();

assertThat(identity.toHumanString(unknownType, null))
.as("Should produce \"null\" for null")
.isEqualTo("null");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,15 @@ public void testVariant() throws Exception {
.isEqualTo(variant);
}

@Test
public void testUnknown() throws Exception {
Types.UnknownType unknown = Types.UnknownType.get();
Type copy = TestHelpers.roundTripSerialize(unknown);
assertThat(copy)
.as("Unknown serialization should be equal to starting type")
.isEqualTo(unknown);
}

@Test
public void testSchema() throws Exception {
Schema schema =
Expand Down
4 changes: 2 additions & 2 deletions api/src/test/java/org/apache/iceberg/types/TestTypes.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public void fromPrimitiveString() {
assertThat(Types.fromPrimitiveString("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2, 3));

assertThatExceptionOfType(IllegalArgumentException.class)
.isThrownBy(() -> Types.fromPrimitiveString("Unknown"))
.withMessageContaining("Unknown");
.isThrownBy(() -> Types.fromPrimitiveString("abcdefghij"))
.withMessage("Cannot parse type string to primitive: abcdefghij");
}
}
8 changes: 8 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestSortOrder.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.SortOrderUtil;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.io.TempDir;
Expand Down Expand Up @@ -342,6 +343,13 @@ public void testVariantUnsupported() {
.hasMessage("Unsupported type for identity: variant");
}

@Test
public void testUnknownSupported() {
Schema v3Schema = new Schema(Types.NestedField.optional(1, "u", Types.UnknownType.get()));

SortOrder.builderFor(v3Schema).withOrderId(10).asc("u").build();
Fokko marked this conversation as resolved.
Show resolved Hide resolved
}

@TestTemplate
public void testPreservingOrderSortedColumnNames() {
SortOrder order =
Expand Down