Skip to content

Commit

Permalink
Start adding hudi test data
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewmturner committed Oct 14, 2024
1 parent d956894 commit 6b1001c
Show file tree
Hide file tree
Showing 16 changed files with 334 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"partitionToWriteStats" : {
"20" : [ {
"fileId" : "76e0556b-390d-4249-b7ad-9059e2bc2cbd-0",
"path" : "20/76e0556b-390d-4249-b7ad-9059e2bc2cbd-0_0-98-141_20240418172802262.parquet",
"cdcStats" : null,
"prevCommit" : "null",
"numWrites" : 1,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 1,
"totalWriteBytes" : 441161,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "20",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 441161,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : {
"totalScanTime" : 0,
"totalUpsertTime" : 0,
"totalCreateTime" : 107
}
} ],
"10" : [ {
"fileId" : "97de74b1-2a8e-4bb7-874c-0a74e1f42a77-0",
"path" : "10/97de74b1-2a8e-4bb7-874c-0a74e1f42a77-0_1-98-142_20240418172802262.parquet",
"cdcStats" : null,
"prevCommit" : "null",
"numWrites" : 2,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 2,
"totalWriteBytes" : 441426,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "10",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 441426,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : {
"totalScanTime" : 0,
"totalUpsertTime" : 0,
"totalCreateTime" : 107
}
} ]
},
"compacted" : false,
"extraMetadata" : {
"schema" : "{\"type\":\"record\",\"name\":\"v6_simplekeygen_nonhivestyle_record\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle\",\"fields\":[{\"name\":\"id\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isActive\",\"type\":[\"null\",\"boolean\"],\"default\":null},{\"name\":\"shortField\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"intField\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"longField\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"floatField\",\"type\":[\"null\",\"float\"],\"default\":null},{\"name\":\"doubleField\",\"type\":[\"null\",\"double\"],\"default\":null},{\"name\":\"decimalField\",\"type\":[\"null\",{\"type\":\"fixed\",\"name\":\"fixed\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record.decimalField\",\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":5}],\"default\":null},{\"name\":\"dateField\",\"type\":[\"null\",{\"type\":\"int\",\"logicalType\":\"date\"}],\"default\":null},{\"name\":\"timestampField\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}],\"default\":null},{\"name\":\"binaryField\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"arrayField\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",{\"type\":\"record\",\"name\":\"arrayField\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record\",\"fields\":[{\"name\":\"arr_struct_f1\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"arr_struct_f2\",\"type\":[\"null\",\"int\"],\"default\":null}]}]}],\"default\":null},{\"name\":\"mapField\",\"type\":[\"null\",{\"type\":\"map\",\"values\":[\"null\",{\"type\":\"record\",\"name\":\"mapField\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record\",\"fields\":[{\"name\":\"map_field_value_struct_f1\",\"type\":[\"null\",\"double\"],\"default\":null},{\"name\":\"map_field_value_struct_f2\",\"type\":[\"null\",\"boolean\"],\"default\":null}]}]}],\"default\":null},{\"name\":\"structField\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"structField\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record\",\"fields\":[{\"name\":\"field1\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"field2\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"child_struct\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"child_struct\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record.structField\",\"fields\":[{\"name\":\"child_field1\",\"type\":[\"null\",\"double\"],\"default\":null},{\"name\":\"child_field2\",\"type\":[\"null\",\"boolean\"],\"default\":null}]}],\"default\":null}]}],\"default\":null},{\"name\":\"byteField\",\"type\":[\"null\",\"int\"],\"default\":null}]}"
},
"operationType" : "UPSERT"
}
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"partitionToWriteStats" : {
"20" : [ {
"fileId" : "",
"path" : null,
"cdcStats" : null,
"prevCommit" : "null",
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 1,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : null,
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : null
} ],
"10" : [ {
"fileId" : "",
"path" : null,
"cdcStats" : null,
"prevCommit" : "null",
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 2,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : null,
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : null
} ]
},
"compacted" : false,
"extraMetadata" : { },
"operationType" : "UPSERT"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"partitionToWriteStats" : {
"30" : [ {
"fileId" : "6db57019-98ee-480e-8eb1-fb3de48e1c24-0",
"path" : "30/6db57019-98ee-480e-8eb1-fb3de48e1c24-0_1-119-167_20240418172804498.parquet",
"cdcStats" : null,
"prevCommit" : "null",
"numWrites" : 1,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 1,
"totalWriteBytes" : 441186,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "30",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 441186,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : {
"totalScanTime" : 0,
"totalUpsertTime" : 0,
"totalCreateTime" : 90
}
} ],
"10" : [ {
"fileId" : "97de74b1-2a8e-4bb7-874c-0a74e1f42a77-0",
"path" : "10/97de74b1-2a8e-4bb7-874c-0a74e1f42a77-0_0-119-166_20240418172804498.parquet",
"cdcStats" : null,
"prevCommit" : "20240418172802262",
"numWrites" : 2,
"numDeletes" : 0,
"numUpdateWrites" : 1,
"numInserts" : 0,
"totalWriteBytes" : 441352,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "10",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 441352,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : {
"totalScanTime" : 0,
"totalUpsertTime" : 103,
"totalCreateTime" : 0
}
} ]
},
"compacted" : false,
"extraMetadata" : {
"schema" : "{\"type\":\"record\",\"name\":\"v6_simplekeygen_nonhivestyle_record\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle\",\"fields\":[{\"name\":\"id\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"isActive\",\"type\":[\"null\",\"boolean\"],\"default\":null},{\"name\":\"shortField\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"intField\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"longField\",\"type\":[\"null\",\"long\"],\"default\":null},{\"name\":\"floatField\",\"type\":[\"null\",\"float\"],\"default\":null},{\"name\":\"doubleField\",\"type\":[\"null\",\"double\"],\"default\":null},{\"name\":\"decimalField\",\"type\":[\"null\",{\"type\":\"fixed\",\"name\":\"fixed\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record.decimalField\",\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":5}],\"default\":null},{\"name\":\"dateField\",\"type\":[\"null\",{\"type\":\"int\",\"logicalType\":\"date\"}],\"default\":null},{\"name\":\"timestampField\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}],\"default\":null},{\"name\":\"binaryField\",\"type\":[\"null\",\"bytes\"],\"default\":null},{\"name\":\"arrayField\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",{\"type\":\"record\",\"name\":\"arrayField\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record\",\"fields\":[{\"name\":\"arr_struct_f1\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"arr_struct_f2\",\"type\":[\"null\",\"int\"],\"default\":null}]}]}],\"default\":null},{\"name\":\"mapField\",\"type\":[\"null\",{\"type\":\"map\",\"values\":[\"null\",{\"type\":\"record\",\"name\":\"mapField\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record\",\"fields\":[{\"name\":\"map_field_value_struct_f1\",\"type\":[\"null\",\"double\"],\"default\":null},{\"name\":\"map_field_value_struct_f2\",\"type\":[\"null\",\"boolean\"],\"default\":null}]}]}],\"default\":null},{\"name\":\"structField\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"structField\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record\",\"fields\":[{\"name\":\"field1\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"field2\",\"type\":[\"null\",\"int\"],\"default\":null},{\"name\":\"child_struct\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"child_struct\",\"namespace\":\"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record.structField\",\"fields\":[{\"name\":\"child_field1\",\"type\":[\"null\",\"double\"],\"default\":null},{\"name\":\"child_field2\",\"type\":[\"null\",\"boolean\"],\"default\":null}]}],\"default\":null}]}],\"default\":null},{\"name\":\"byteField\",\"type\":[\"null\",\"int\"],\"default\":null}]}"
},
"operationType" : "UPSERT"
}
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"partitionToWriteStats" : {
"30" : [ {
"fileId" : "",
"path" : null,
"cdcStats" : null,
"prevCommit" : "null",
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 1,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : null,
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : null
} ],
"10" : [ {
"fileId" : "",
"path" : null,
"cdcStats" : null,
"prevCommit" : "null",
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 0,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : null,
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : null
}, {
"fileId" : "97de74b1-2a8e-4bb7-874c-0a74e1f42a77-0",
"path" : null,
"cdcStats" : null,
"prevCommit" : "20240418172802262",
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 1,
"numInserts" : 0,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : null,
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0,
"minEventTime" : null,
"maxEventTime" : null,
"runtimeStats" : null
} ]
},
"compacted" : false,
"extraMetadata" : { },
"operationType" : "UPSERT"
}
17 changes: 17 additions & 0 deletions data/hudi/v6_simplekeygen_nonhivestyle/.hoodie/hoodie.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#Properties saved on 2024-04-18T22:27:38.493Z
#Thu Apr 18 17:27:38 CDT 2024
hoodie.table.precombine.field=longField
hoodie.datasource.write.drop.partition.columns=false
hoodie.table.partition.fields=byteField
hoodie.table.type=COPY_ON_WRITE
hoodie.archivelog.folder=archived
hoodie.timeline.layout.version=1
hoodie.table.version=6
hoodie.table.recordkey.fields=id
hoodie.database.name=default
hoodie.datasource.write.partitionpath.urlencode=false
hoodie.table.name=v6_simplekeygen_nonhivestyle
hoodie.table.keygenerator.class=org.apache.hudi.keygen.SimpleKeyGenerator
hoodie.datasource.write.hive_style_partitioning=false
hoodie.table.create.schema={"type"\:"record","name"\:"v6_simplekeygen_nonhivestyle_record","namespace"\:"hoodie.v6_simplekeygen_nonhivestyle","fields"\:[{"name"\:"id","type"\:["int","null"]},{"name"\:"name","type"\:["string","null"]},{"name"\:"isActive","type"\:["boolean","null"]},{"name"\:"shortField","type"\:["int","null"]},{"name"\:"intField","type"\:["int","null"]},{"name"\:"longField","type"\:["long","null"]},{"name"\:"floatField","type"\:["float","null"]},{"name"\:"doubleField","type"\:["double","null"]},{"name"\:"decimalField","type"\:[{"type"\:"fixed","name"\:"fixed","namespace"\:"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record.decimalField","size"\:5,"logicalType"\:"decimal","precision"\:10,"scale"\:5},"null"]},{"name"\:"dateField","type"\:[{"type"\:"int","logicalType"\:"date"},"null"]},{"name"\:"timestampField","type"\:[{"type"\:"long","logicalType"\:"timestamp-micros"},"null"]},{"name"\:"binaryField","type"\:["bytes","null"]},{"name"\:"arrayField","type"\:[{"type"\:"array","items"\:[{"type"\:"record","name"\:"arrayField","namespace"\:"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record","fields"\:[{"name"\:"arr_struct_f1","type"\:["string","null"]},{"name"\:"arr_struct_f2","type"\:["int","null"]}]},"null"]},"null"]},{"name"\:"mapField","type"\:[{"type"\:"map","values"\:[{"type"\:"record","name"\:"mapField","namespace"\:"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record","fields"\:[{"name"\:"map_field_value_struct_f1","type"\:["double","null"]},{"name"\:"map_field_value_struct_f2","type"\:["boolean","null"]}]},"null"]},"null"]},{"name"\:"structField","type"\:[{"type"\:"record","name"\:"structField","namespace"\:"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record","fields"\:[{"name"\:"field1","type"\:["string","null"]},{"name"\:"field2","type"\:["int","null"]},{"name"\:"child_struct","type"\:[{"type"\:"record","name"\:"child_struct","namespace"\:"hoodie.v6_simplekeygen_nonhivestyle.v6_simplekeygen_nonhivestyle_record.structField","fields"\:[{"name"\:"child_field1","type"\:["double","null"]},{"name"\:"child_field2","type"\:["boolean","null"]}]},"null"]}]},"null"]},{"name"\:"byteField","type"\:["int","null"]}]}
hoodie.table.checksum=616688896
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#partition metadata
#Thu Apr 18 17:28:02 CDT 2024
commitTime=20240418172802262
partitionDepth=1
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#partition metadata
#Thu Apr 18 17:28:02 CDT 2024
commitTime=20240418172802262
partitionDepth=1
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#partition metadata
#Thu Apr 18 17:28:04 CDT 2024
commitTime=20240418172804498
partitionDepth=1
Binary file not shown.
31 changes: 31 additions & 0 deletions tests/extension_cases/hudi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::extension_cases::TestExecution;

#[tokio::test]
async fn test_hudi() {
let test_exec = TestExecution::new();

test_exec.with_setup(
"CREATE EXTERNAL TABLE h STORED AS HUDI LOCATION './data/hudi/v6_simplekeygen_nonhivestyle';",
)
.await;

let output = test_exec.run_and_format("SELECT * FROM h").await;
assert_eq!(output, vec![""]);
}
4 changes: 2 additions & 2 deletions tests/extension_cases/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
mod flightsql;
#[cfg(feature = "functions-json")]
mod functions_json;
#[cfg(feature = "hudi")]
mod hudi;
#[cfg(feature = "s3")]
mod s3;

Expand Down Expand Up @@ -53,7 +55,6 @@ impl TestExecution {
}

/// Run the setup SQL query, discarding the result
#[allow(dead_code)]
pub async fn with_setup(self, sql: &str) -> Self {
debug!("Running setup query: {sql}");
let dialect = datafusion::sql::sqlparser::dialect::GenericDialect {};
Expand Down Expand Up @@ -88,7 +89,6 @@ impl TestExecution {

/// Runs the specified SQL query, returning the result as a Vec<String>
/// suitable for comparison with insta
#[allow(dead_code)]
pub async fn run_and_format(&mut self, sql: &str) -> Vec<String> {
format_results(&self.run(sql).await.expect("Error running query"))
}
Expand Down

0 comments on commit 6b1001c

Please sign in to comment.