Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ajantha-bhat committed Jan 10, 2025
1 parent 233a00b commit de49dee
Show file tree
Hide file tree
Showing 14 changed files with 661 additions and 439 deletions.
31 changes: 22 additions & 9 deletions .palantir/revapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1172,27 +1172,40 @@ acceptedBreaks:
\ java.util.function.Consumer<T>)"
justification: "Removing deprecated code"
org.apache.iceberg:iceberg-parquet:
- code: "java.method.abstractMethodAdded"
new: "method org.apache.iceberg.parquet.ParquetValueReaders.PrimitiveReader<?>\
\ org.apache.iceberg.data.parquet.BaseParquetReaders<T>::dateReader(org.apache.parquet.column.ColumnDescriptor)"
justification: "Refactor Parquet reader and writer"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.iceberg.parquet.ParquetValueReaders.PrimitiveReader<?>\
\ org.apache.iceberg.data.parquet.BaseParquetReaders<T>::fixedReader(org.apache.parquet.column.ColumnDescriptor)"
justification: "{Refactor Parquet reader and writer}"
justification: "Refactor Parquet reader and writer"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.iceberg.parquet.ParquetValueReaders.PrimitiveReader<?>\
\ org.apache.iceberg.data.parquet.BaseParquetReaders<T>::int96Reader(org.apache.parquet.column.ColumnDescriptor)"
justification: "{Refactor Parquet reader and writer}"
justification: "Refactor Parquet reader and writer"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.iceberg.parquet.ParquetValueReaders.PrimitiveReader<?>\
\ org.apache.iceberg.data.parquet.BaseParquetReaders<T>::timeReader(org.apache.parquet.column.ColumnDescriptor,\
\ org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit)"
justification: "Refactor Parquet reader and writer"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.iceberg.parquet.ParquetValueReaders.PrimitiveReader<?>\
\ org.apache.iceberg.data.parquet.BaseParquetReaders<T>::timestampReader(org.apache.parquet.column.ColumnDescriptor,\
\ org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit, boolean)"
justification: "Refactor Parquet reader and writer"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.iceberg.parquet.ParquetValueReaders.PrimitiveReader<?>\
\ org.apache.iceberg.data.parquet.BaseParquetReaders<T>::uuidReader(org.apache.parquet.column.ColumnDescriptor)"
justification: "Refactor Parquet reader and writer"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.iceberg.parquet.ParquetValueWriters.PrimitiveWriter<?>\
\ org.apache.iceberg.data.parquet.BaseParquetWriter<T>::fixedWriter(org.apache.parquet.column.ColumnDescriptor)"
justification: "{Refactor Parquet reader and writer}"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<org.apache.iceberg.parquet.ParquetValueReader<?>>\
\ org.apache.iceberg.data.parquet.BaseParquetReaders<T>::logicalTypeReaderVisitor(org.apache.parquet.column.ColumnDescriptor,\
\ org.apache.iceberg.types.Type.PrimitiveType, org.apache.parquet.schema.PrimitiveType)"
justification: "{Refactor Parquet reader and writer}"
justification: "Refactor Parquet reader and writer"
- code: "java.method.abstractMethodAdded"
new: "method org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<org.apache.iceberg.parquet.ParquetValueWriters.PrimitiveWriter<?>>\
\ org.apache.iceberg.data.parquet.BaseParquetWriter<T>::logicalTypeWriterVisitor(org.apache.parquet.column.ColumnDescriptor)"
justification: "{Refactor Parquet reader and writer}"
justification: "Refactor Parquet reader and writer"
apache-iceberg-0.14.0:
org.apache.iceberg:iceberg-api:
- code: "java.class.defaultSerializationChanged"
Expand Down
57 changes: 57 additions & 0 deletions api/src/test/java/org/apache/iceberg/util/RandomUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,14 @@
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.function.Supplier;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

Expand Down Expand Up @@ -228,4 +235,54 @@ private static BigInteger randomUnscaled(int precision, Random random) {

return new BigInteger(sb.toString());
}

public static List<Object> generateList(
Random random, Types.ListType list, Supplier<Object> elementResult) {
int numElements = random.nextInt(20);

List<Object> result = Lists.newArrayListWithExpectedSize(numElements);
for (int i = 0; i < numElements; i += 1) {
// return null 5% of the time when the value is optional
if (list.isElementOptional() && random.nextInt(20) == 1) {
result.add(null);
} else {
result.add(elementResult.get());
}
}

return result;
}

public static Map<Object, Object> generateMap(
Random random, Types.MapType map, Supplier<Object> keyResult, Supplier<Object> valueResult) {
int numEntries = random.nextInt(20);

Map<Object, Object> result = Maps.newLinkedHashMap();
Supplier<Object> keyFunc;
if (map.keyType() == Types.StringType.get()) {
keyFunc = () -> keyResult.get().toString();
} else {
keyFunc = keyResult;
}

Set<Object> keySet = Sets.newHashSet();
for (int i = 0; i < numEntries; i += 1) {
Object key = keyFunc.get();
// ensure no collisions
while (keySet.contains(key)) {
key = keyFunc.get();
}

keySet.add(key);

// return null 5% of the time when the value is optional
if (map.isValueOptional() && random.nextInt(20) == 1) {
result.put(key, null);
} else {
result.put(key, valueResult.get());
}
}

return result;
}
}
110 changes: 110 additions & 0 deletions core/src/test/java/org/apache/iceberg/InternalTestHelpers.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import static org.assertj.core.api.Assertions.assertThat;

import java.util.List;
import java.util.Map;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

public class InternalTestHelpers {

private InternalTestHelpers() {}

public static void assertEquals(Types.StructType struct, StructLike expected, StructLike actual) {
List<Types.NestedField> fields = struct.fields();
for (int i = 0; i < fields.size(); i += 1) {
Type fieldType = fields.get(i).type();

Object expectedValue = expected.get(i, fieldType.typeId().javaClass());
Object actualValue = actual.get(i, fieldType.typeId().javaClass());

assertEquals(fieldType, expectedValue, actualValue);
}
}

public static void assertEquals(Types.ListType list, List<?> expected, List<?> actual) {
Type elementType = list.elementType();

assertThat(actual).as("List size should match").hasSameSizeAs(expected);

for (int i = 0; i < expected.size(); i += 1) {
Object expectedValue = expected.get(i);
Object actualValue = actual.get(i);

assertEquals(elementType, expectedValue, actualValue);
}
}

public static void assertEquals(Types.MapType map, Map<?, ?> expected, Map<?, ?> actual) {
Type valueType = map.valueType();

assertThat(actual).as("Map keys should match").hasSameSizeAs(expected);

for (Object expectedKey : expected.keySet()) {
Object expectedValue = expected.get(expectedKey);
Object actualValue = actual.get(expectedKey);

assertEquals(valueType, expectedValue, actualValue);
}
}

private static void assertEquals(Type type, Object expected, Object actual) {
if (expected == null && actual == null) {
return;
}

switch (type.typeId()) {
case BOOLEAN:
case INTEGER:
case LONG:
case FLOAT:
case DOUBLE:
case STRING:
case DATE:
case TIME:
case TIMESTAMP:
case UUID:
case FIXED:
case BINARY:
case DECIMAL:
assertThat(actual).as("Primitive value should be equal to expected").isEqualTo(expected);
break;
case STRUCT:
assertThat(expected).as("Expected should be a StructLike").isInstanceOf(StructLike.class);
assertThat(actual).as("Actual should be a StructLike").isInstanceOf(StructLike.class);
assertEquals(type.asStructType(), (StructLike) expected, (StructLike) actual);
break;
case LIST:
assertThat(expected).as("Expected should be a List").isInstanceOf(List.class);
assertThat(actual).as("Actual should be a List").isInstanceOf(List.class);
assertEquals(type.asListType(), (List<?>) expected, (List<?>) actual);
break;
case MAP:
assertThat(expected).as("Expected should be a Map").isInstanceOf(Map.class);
assertThat(actual).as("Actual should be a Map").isInstanceOf(Map.class);
assertEquals(type.asMapType(), (Map<?, ?>) expected, (Map<?, ?>) actual);
break;
default:
throw new IllegalArgumentException("Not a supported type: " + type);
}
}
}
104 changes: 104 additions & 0 deletions core/src/test/java/org/apache/iceberg/RandomInternalData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;

import java.nio.ByteBuffer;
import java.util.List;
import java.util.Random;
import java.util.UUID;
import java.util.function.Supplier;
import org.apache.iceberg.data.GenericRecord;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.RandomUtil;

public class RandomInternalData {

private RandomInternalData() {}

public static List<StructLike> generate(Schema schema, int numRecords, long seed) {
RandomDataGenerator generator = new RandomDataGenerator(seed);
List<StructLike> records = Lists.newArrayListWithExpectedSize(numRecords);
for (int i = 0; i < numRecords; i += 1) {
records.add((StructLike) TypeUtil.visit(schema, generator));
}

return records;
}

private static class RandomDataGenerator extends TypeUtil.CustomOrderSchemaVisitor<Object> {
private final Random random;

private RandomDataGenerator(long seed) {
this.random = new Random(seed);
}

@Override
public StructLike schema(Schema schema, Supplier<Object> structResult) {
return (StructLike) structResult.get();
}

@Override
public StructLike struct(Types.StructType struct, Iterable<Object> fieldResults) {
StructLike rec = GenericRecord.create(struct);
List<Object> values = Lists.newArrayList(fieldResults);
for (int i = 0; i < values.size(); i += 1) {
rec.set(i, values.get(i));
}

return rec;
}

@Override
public Object field(Types.NestedField field, Supplier<Object> fieldResult) {
// return null 5% of the time when the value is optional
if (field.isOptional() && random.nextInt(20) == 1) {
return null;
}
return fieldResult.get();
}

@Override
public Object list(Types.ListType list, Supplier<Object> elementResult) {
return RandomUtil.generateList(random, list, elementResult);
}

@Override
public Object map(Types.MapType map, Supplier<Object> keyResult, Supplier<Object> valueResult) {
return RandomUtil.generateMap(random, map, keyResult, valueResult);
}

@Override
public Object primitive(Type.PrimitiveType primitive) {
Object result = RandomUtil.generatePrimitive(primitive, random);

switch (primitive.typeId()) {
case FIXED:
case BINARY:
return ByteBuffer.wrap((byte[]) result);
case UUID:
return UUID.nameUUIDFromBytes((byte[]) result);
default:
return result;
}
}
}
}
Loading

0 comments on commit de49dee

Please sign in to comment.