Skip to content

Commit

Permalink
[LI] Remove hive-serde dependency from HiveMetadataPreservingTableOpe…
Browse files Browse the repository at this point in the history
…rations (#139)
  • Loading branch information
jack-moseley authored and rzhang10 committed Mar 3, 2023
1 parent 6896d13 commit 7dd301f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
Expand All @@ -41,17 +40,15 @@
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.iceberg.ClientPool;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.common.DynMethods;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.exceptions.CommitStateUnknownException;
import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.hive.HiveSchemaUtil;
import org.apache.iceberg.hive.HiveTableOperations;
import org.apache.iceberg.hive.MetastoreUtil;
import org.apache.iceberg.io.FileIO;
Expand All @@ -78,6 +75,8 @@ public class HiveMetadataPreservingTableOperations extends HiveTableOperations {
private final String database;
private final String tableName;
private final String fullName;
// Redefine avro.schema.literal to avoid dependency on hive-serde
public static final String AVRO_SCHEMA_LITERAL_KEY = "avro.schema.literal";

private static final DynMethods.UnboundMethod ALTER_TABLE =
DynMethods.builder("alter_table")
Expand Down Expand Up @@ -297,13 +296,7 @@ static boolean fixMismatchedSchema(Table table) {
return false;
}
Schema schema = new Schema.Parser().parse(avroSchemaLiteral);
List<FieldSchema> hiveCols;
try {
hiveCols = getColsFromAvroSchema(schema);
} catch (SerDeException e) {
LOG.error("Failed to get get columns from avro schema when checking schema", e);
return false;
}
List<FieldSchema> hiveCols = HiveSchemaUtil.convert(AvroSchemaUtil.toIceberg(schema));

boolean schemaMismatched;
if (table.getSd().getCols().size() != hiveCols.size()) {
Expand Down Expand Up @@ -340,29 +333,10 @@ static boolean fixMismatchedSchema(Table table) {
return schemaMismatched;
}

private static List<FieldSchema> getColsFromAvroSchema(Schema schema) throws SerDeException {
AvroObjectInspectorGenerator avroOI = new AvroObjectInspectorGenerator(schema);
List<String> columnNames = avroOI.getColumnNames();
List<TypeInfo> columnTypes = avroOI.getColumnTypes();
if (columnNames.size() != columnTypes.size()) {
throw new IllegalStateException();
}

return IntStream.range(0, columnNames.size())
.mapToObj(i -> new FieldSchema(columnNames.get(i), columnTypes.get(i).getTypeName(), ""))
.collect(Collectors.toList());
}

private static String getAvroSchemaLiteral(Table table) {
String schemaStr =
table.getParameters().get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName());
String schemaStr = table.getParameters().get(AVRO_SCHEMA_LITERAL_KEY);
if (Strings.isNullOrEmpty(schemaStr)) {
schemaStr =
table
.getSd()
.getSerdeInfo()
.getParameters()
.get(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName());
schemaStr = table.getSd().getSerdeInfo().getParameters().get(AVRO_SCHEMA_LITERAL_KEY);
}
return schemaStr;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ public void testFixMismatchedSchema() {

long currentTimeMillis = System.currentTimeMillis();
StorageDescriptor storageDescriptor = new StorageDescriptor();
FieldSchema field1 = new FieldSchema("name", "string", "");
FieldSchema field2 = new FieldSchema("id", "int", "");
FieldSchema field3 = new FieldSchema("nested", "struct<field1:string,field2:string>", "");
FieldSchema field1 = new FieldSchema("name", "string", null);
FieldSchema field2 = new FieldSchema("id", "int", null);
FieldSchema field3 = new FieldSchema("nested", "struct<field1:string,field2:string>", null);
// Set cols with incorrect nested type
storageDescriptor.setCols(
ImmutableList.of(
Expand Down

0 comments on commit 7dd301f

Please sign in to comment.