From 604b6e298aad6396a44f5463708164c2b3b35611 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Wed, 20 Nov 2024 16:08:42 -0800 Subject: [PATCH] Revert "Merge branch 'table_changes_scan' of github.com:OussamaSaoudi-db/delta-kernel-rs into table_changes_scan" This reverts commit 3f47e724aed835aa86b7252e290ebb0641cd3dc8, reversing changes made to 1e15b9b68d2cfd81c6dcf8a65844e90c2b08c027. --- CHANGELOG.md | 158 +++++++++--------- Cargo.toml | 2 +- README.md | 2 +- ffi/src/expressions/kernel.rs | 2 +- .../src/engine/parquet_row_group_skipping.rs | 2 +- kernel/src/expressions/scalars.rs | 2 +- kernel/src/lib.rs | 4 +- kernel/src/log_segment/tests.rs | 4 +- kernel/src/scan/mod.rs | 49 ++---- kernel/src/snapshot.rs | 4 +- kernel/tests/read.rs | 2 +- 11 files changed, 107 insertions(+), 124 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7adc6abb..f642260f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ # Changelog -## [v0.4.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.4.1/) (2024-10-28) +## [v0.4.1](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.4.1/) (2024-10-28) -[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.4.0...v0.4.1) +[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.4.0...v0.4.1) **API Changes** @@ -15,11 +15,11 @@ column as all-null is unsound, if the schema was not already verified to prove t logical schema actually includes the missing column. We disable it until we can add the necessary validation. [\#435] -[\#435]: https://github.com/delta-io/delta-kernel-rs/pull/435 +[\#435]: https://github.com/delta-incubator/delta-kernel-rs/pull/435 -## [v0.4.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.4.0/) (2024-10-23) +## [v0.4.0](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.4.0/) (2024-10-23) -[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.3.1...v0.4.0) +[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.3.1...v0.4.0) **API Changes** @@ -70,96 +70,96 @@ validation. [\#435] - fixed broken sync engine json parsing and harmonized sync/async json parsing [\#373] - filesystem client now always returns a sorted list [\#344] -[\#331]: https://github.com/delta-io/delta-kernel-rs/pull/331 -[\#332]: https://github.com/delta-io/delta-kernel-rs/pull/332 -[\#334]: https://github.com/delta-io/delta-kernel-rs/pull/334 -[\#335]: https://github.com/delta-io/delta-kernel-rs/pull/335 -[\#336]: https://github.com/delta-io/delta-kernel-rs/pull/336 -[\#337]: https://github.com/delta-io/delta-kernel-rs/pull/337 -[\#339]: https://github.com/delta-io/delta-kernel-rs/pull/339 -[\#340]: https://github.com/delta-io/delta-kernel-rs/pull/340 -[\#342]: https://github.com/delta-io/delta-kernel-rs/pull/342 -[\#343]: https://github.com/delta-io/delta-kernel-rs/pull/343 -[\#344]: https://github.com/delta-io/delta-kernel-rs/pull/344 -[\#347]: https://github.com/delta-io/delta-kernel-rs/pull/347 -[\#354]: https://github.com/delta-io/delta-kernel-rs/pull/354 -[\#357]: https://github.com/delta-io/delta-kernel-rs/pull/357 -[\#360]: https://github.com/delta-io/delta-kernel-rs/pull/360 -[\#362]: https://github.com/delta-io/delta-kernel-rs/pull/362 -[\#364]: https://github.com/delta-io/delta-kernel-rs/pull/364 -[\#366]: https://github.com/delta-io/delta-kernel-rs/pull/366 -[\#369]: https://github.com/delta-io/delta-kernel-rs/pull/369 -[\#373]: https://github.com/delta-io/delta-kernel-rs/pull/373 -[\#374]: https://github.com/delta-io/delta-kernel-rs/pull/374 -[\#381]: https://github.com/delta-io/delta-kernel-rs/pull/381 -[\#383]: https://github.com/delta-io/delta-kernel-rs/pull/383 -[\#384]: https://github.com/delta-io/delta-kernel-rs/pull/384 -[\#385]: https://github.com/delta-io/delta-kernel-rs/pull/385 -[\#386]: https://github.com/delta-io/delta-kernel-rs/pull/386 -[\#395]: https://github.com/delta-io/delta-kernel-rs/pull/395 -[\#398]: https://github.com/delta-io/delta-kernel-rs/pull/398 -[\#399]: https://github.com/delta-io/delta-kernel-rs/pull/399 -[\#401]: https://github.com/delta-io/delta-kernel-rs/pull/401 -[\#402]: https://github.com/delta-io/delta-kernel-rs/pull/402 -[\#409]: https://github.com/delta-io/delta-kernel-rs/pull/409 -[\#413]: https://github.com/delta-io/delta-kernel-rs/pull/413 - - -## [v0.3.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.3.1/) (2024-09-10) - -[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.3.0...v0.3.1) +[\#331]: https://github.com/delta-incubator/delta-kernel-rs/pull/331 +[\#332]: https://github.com/delta-incubator/delta-kernel-rs/pull/332 +[\#334]: https://github.com/delta-incubator/delta-kernel-rs/pull/334 +[\#335]: https://github.com/delta-incubator/delta-kernel-rs/pull/335 +[\#336]: https://github.com/delta-incubator/delta-kernel-rs/pull/336 +[\#337]: https://github.com/delta-incubator/delta-kernel-rs/pull/337 +[\#339]: https://github.com/delta-incubator/delta-kernel-rs/pull/339 +[\#340]: https://github.com/delta-incubator/delta-kernel-rs/pull/340 +[\#342]: https://github.com/delta-incubator/delta-kernel-rs/pull/342 +[\#343]: https://github.com/delta-incubator/delta-kernel-rs/pull/343 +[\#344]: https://github.com/delta-incubator/delta-kernel-rs/pull/344 +[\#347]: https://github.com/delta-incubator/delta-kernel-rs/pull/347 +[\#354]: https://github.com/delta-incubator/delta-kernel-rs/pull/354 +[\#357]: https://github.com/delta-incubator/delta-kernel-rs/pull/357 +[\#360]: https://github.com/delta-incubator/delta-kernel-rs/pull/360 +[\#362]: https://github.com/delta-incubator/delta-kernel-rs/pull/362 +[\#364]: https://github.com/delta-incubator/delta-kernel-rs/pull/364 +[\#366]: https://github.com/delta-incubator/delta-kernel-rs/pull/366 +[\#369]: https://github.com/delta-incubator/delta-kernel-rs/pull/369 +[\#373]: https://github.com/delta-incubator/delta-kernel-rs/pull/373 +[\#374]: https://github.com/delta-incubator/delta-kernel-rs/pull/374 +[\#381]: https://github.com/delta-incubator/delta-kernel-rs/pull/381 +[\#383]: https://github.com/delta-incubator/delta-kernel-rs/pull/383 +[\#384]: https://github.com/delta-incubator/delta-kernel-rs/pull/384 +[\#385]: https://github.com/delta-incubator/delta-kernel-rs/pull/385 +[\#386]: https://github.com/delta-incubator/delta-kernel-rs/pull/386 +[\#395]: https://github.com/delta-incubator/delta-kernel-rs/pull/395 +[\#398]: https://github.com/delta-incubator/delta-kernel-rs/pull/398 +[\#399]: https://github.com/delta-incubator/delta-kernel-rs/pull/399 +[\#401]: https://github.com/delta-incubator/delta-kernel-rs/pull/401 +[\#402]: https://github.com/delta-incubator/delta-kernel-rs/pull/402 +[\#409]: https://github.com/delta-incubator/delta-kernel-rs/pull/409 +[\#413]: https://github.com/delta-incubator/delta-kernel-rs/pull/413 + + +## [v0.3.1](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.3.1/) (2024-09-10) + +[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.3.0...v0.3.1) **API Changes** *Additions* -1. Two new binary expressions: `In` and `NotIn`, as well as a new `Scalar::Array` variant to represent arrays in the expression framework [\#270](https://github.com/delta-io/delta-kernel-rs/pull/270) NOTE: exact API for these expressions is still evolving. +1. Two new binary expressions: `In` and `NotIn`, as well as a new `Scalar::Array` variant to represent arrays in the expression framework [\#270](https://github.com/delta-incubator/delta-kernel-rs/pull/270) NOTE: exact API for these expressions is still evolving. **Implemented enhancements:** -- Enabled more golden table tests [\#301](https://github.com/delta-io/delta-kernel-rs/pull/301) +- Enabled more golden table tests [\#301](https://github.com/delta-incubator/delta-kernel-rs/pull/301) **Fixed bugs:** -- Allow kernel to read tables with invalid `_last_checkpoint` [\#311](https://github.com/delta-io/delta-kernel-rs/pull/311) -- List log files with checkpoint hint when constructing latest snapshot (when version requested is `None`) [\#312](https://github.com/delta-io/delta-kernel-rs/pull/312) -- Fix incorrect offset value when computing list offsets [\#327](https://github.com/delta-io/delta-kernel-rs/pull/327) -- Fix metadata string conversion in default engine arrow conversion [\#328](https://github.com/delta-io/delta-kernel-rs/pull/328) +- Allow kernel to read tables with invalid `_last_checkpoint` [\#311](https://github.com/delta-incubator/delta-kernel-rs/pull/311) +- List log files with checkpoint hint when constructing latest snapshot (when version requested is `None`) [\#312](https://github.com/delta-incubator/delta-kernel-rs/pull/312) +- Fix incorrect offset value when computing list offsets [\#327](https://github.com/delta-incubator/delta-kernel-rs/pull/327) +- Fix metadata string conversion in default engine arrow conversion [\#328](https://github.com/delta-incubator/delta-kernel-rs/pull/328) -## [v0.3.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.3.0/) (2024-08-07) +## [v0.3.0](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.3.0/) (2024-08-07) -[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.2.0...v0.3.0) +[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.2.0...v0.3.0) **API Changes** *Breaking* -1. `delta_kernel::column_mapping` module moved to `delta_kernel::features::column_mapping` [\#222](https://github.com/delta-io/delta-kernel-rs/pull/297) +1. `delta_kernel::column_mapping` module moved to `delta_kernel::features::column_mapping` [\#222](https://github.com/delta-incubator/delta-kernel-rs/pull/297) *Additions* -1. New deletion vector API `row_indexes` (and accompanying FFI) to get row indexes instead of seletion vector of deleted rows. This can be more efficient for sparse DVs. [\#215](https://github.com/delta-io/delta-kernel-rs/pull/215) -2. Typed table features: `ReaderFeatures`, `WriterFeatures` enums and `has_reader_feature`/`has_writer_feature` API [\#222](https://github.com/delta-io/delta-kernel-rs/pull/297) +1. New deletion vector API `row_indexes` (and accompanying FFI) to get row indexes instead of seletion vector of deleted rows. This can be more efficient for sparse DVs. [\#215](https://github.com/delta-incubator/delta-kernel-rs/pull/215) +2. Typed table features: `ReaderFeatures`, `WriterFeatures` enums and `has_reader_feature`/`has_writer_feature` API [\#222](https://github.com/delta-incubator/delta-kernel-rs/pull/297) **Implemented enhancements:** -- Add `--limit` option to example `read-table-multi-threaded` [\#297](https://github.com/delta-io/delta-kernel-rs/pull/297) -- FFI now built with cmake. Move to using the read-test example as an ffi-test. And building on macos. [\#288](https://github.com/delta-io/delta-kernel-rs/pull/288) -- Golden table tests migrated from delta-spark/delta-kernel java [\#295](https://github.com/delta-io/delta-kernel-rs/pull/295) -- Code coverage implemented via [cargo-llvm-cov](https://github.com/taiki-e/cargo-llvm-cov) and reported with [codecov](https://app.codecov.io/github/delta-io/delta-kernel-rs) [\#287](https://github.com/delta-io/delta-kernel-rs/pull/287) -- All tests enabled to run in CI [\#284](https://github.com/delta-io/delta-kernel-rs/pull/284) -- Updated DAT to 0.3 [\#290](https://github.com/delta-io/delta-kernel-rs/pull/290) +- Add `--limit` option to example `read-table-multi-threaded` [\#297](https://github.com/delta-incubator/delta-kernel-rs/pull/297) +- FFI now built with cmake. Move to using the read-test example as an ffi-test. And building on macos. [\#288](https://github.com/delta-incubator/delta-kernel-rs/pull/288) +- Golden table tests migrated from delta-spark/delta-kernel java [\#295](https://github.com/delta-incubator/delta-kernel-rs/pull/295) +- Code coverage implemented via [cargo-llvm-cov](https://github.com/taiki-e/cargo-llvm-cov) and reported with [codecov](https://app.codecov.io/github/delta-incubator/delta-kernel-rs) [\#287](https://github.com/delta-incubator/delta-kernel-rs/pull/287) +- All tests enabled to run in CI [\#284](https://github.com/delta-incubator/delta-kernel-rs/pull/284) +- Updated DAT to 0.3 [\#290](https://github.com/delta-incubator/delta-kernel-rs/pull/290) **Fixed bugs:** -- Evaluate timestamps as "UTC" instead of "+00:00" for timezone [\#295](https://github.com/delta-io/delta-kernel-rs/pull/295) -- Make Map arrow type field naming consistent with parquet field naming [\#299](https://github.com/delta-io/delta-kernel-rs/pull/299) +- Evaluate timestamps as "UTC" instead of "+00:00" for timezone [\#295](https://github.com/delta-incubator/delta-kernel-rs/pull/295) +- Make Map arrow type field naming consistent with parquet field naming [\#299](https://github.com/delta-incubator/delta-kernel-rs/pull/299) -## [v0.2.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.2.0/) (2024-07-17) +## [v0.2.0](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.2.0/) (2024-07-17) -[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.1.1...v0.2.0) +[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.1.1...v0.2.0) **API Changes** @@ -178,32 +178,32 @@ validation. [\#435] **Implemented enhancements:** -- Handle nested structs in `schemaString` (allows reading iceberg compat tables) [\#257](https://github.com/delta-io/delta-kernel-rs/pull/257) -- Expose top level stats in scans [\#227](https://github.com/delta-io/delta-kernel-rs/pull/227) -- Hugely expanded C-FFI example [\#203](https://github.com/delta-io/delta-kernel-rs/pull/203) -- Add `scan_builder` function to `Snapshot` [\#273](https://github.com/delta-io/delta-kernel-rs/pull/273) -- Add `hdfs_native_store` support [\#273](https://github.com/delta-io/delta-kernel-rs/pull/274) -- Proper reading of Parquet files, including only reading requested leaves, type casting, and reordering [\#271](https://github.com/delta-io/delta-kernel-rs/pull/271) -- Allow building the package if you are behind an https proxy [\#282](https://github.com/delta-io/delta-kernel-rs/pull/282) +- Handle nested structs in `schemaString` (allows reading iceberg compat tables) [\#257](https://github.com/delta-incubator/delta-kernel-rs/pull/257) +- Expose top level stats in scans [\#227](https://github.com/delta-incubator/delta-kernel-rs/pull/227) +- Hugely expanded C-FFI example [\#203](https://github.com/delta-incubator/delta-kernel-rs/pull/203) +- Add `scan_builder` function to `Snapshot` [\#273](https://github.com/delta-incubator/delta-kernel-rs/pull/273) +- Add `hdfs_native_store` support [\#273](https://github.com/delta-incubator/delta-kernel-rs/pull/274) +- Proper reading of Parquet files, including only reading requested leaves, type casting, and reordering [\#271](https://github.com/delta-incubator/delta-kernel-rs/pull/271) +- Allow building the package if you are behind an https proxy [\#282](https://github.com/delta-incubator/delta-kernel-rs/pull/282) **Fixed bugs:** -- Don't error if more fields exist than expected in a struct expression [\#267](https://github.com/delta-io/delta-kernel-rs/pull/267) -- Handle cases where the deletion vector length is less than the total number of rows in the chunk [\#276](https://github.com/delta-io/delta-kernel-rs/pull/276) -- Fix partition map indexing if column mapping is in effect [\#278](https://github.com/delta-io/delta-kernel-rs/pull/278) +- Don't error if more fields exist than expected in a struct expression [\#267](https://github.com/delta-incubator/delta-kernel-rs/pull/267) +- Handle cases where the deletion vector length is less than the total number of rows in the chunk [\#276](https://github.com/delta-incubator/delta-kernel-rs/pull/276) +- Fix partition map indexing if column mapping is in effect [\#278](https://github.com/delta-incubator/delta-kernel-rs/pull/278) -## [v0.1.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.1.0/) (2024-06-03) +## [v0.1.1](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.1.0/) (2024-06-03) -[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.1.0...v0.1.1) +[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.1.0...v0.1.1) **Implemented enhancements:** -- Support unary `NOT` and `IsNull` for data skipping [\#231](https://github.com/delta-io/delta-kernel-rs/pull/231) -- Add unary visitors to c ffi [\#247](https://github.com/delta-io/delta-kernel-rs/pull/247) +- Support unary `NOT` and `IsNull` for data skipping [\#231](https://github.com/delta-incubator/delta-kernel-rs/pull/231) +- Add unary visitors to c ffi [\#247](https://github.com/delta-incubator/delta-kernel-rs/pull/247) - Minor other QOL improvements -## [v0.1.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.1.0/) (2024-06-12) +## [v0.1.0](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.1.0/) (2024-06-12) Initial public release \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 21d26343..6179aed7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ edition = "2021" homepage = "https://delta.io" keywords = ["deltalake", "delta", "datalake"] license = "Apache-2.0" -repository = "https://github.com/delta-io/delta-kernel-rs" +repository = "https://github.com/delta-incubator/delta-kernel-rs" readme = "README.md" version = "0.4.0" diff --git a/README.md b/README.md index 421828e0..15472736 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ Some design principles which should be considered: [delta-github]: https://github.com/delta-io/delta [java-kernel]: https://github.com/delta-io/delta/tree/master/kernel [rustup]: https://rustup.rs -[architecture.md]: https://github.com/delta-io/delta-kernel-rs/tree/master/architecture.md +[architecture.md]: https://github.com/delta-incubator/delta-kernel-rs/tree/master/architecture.md [dat]: https://github.com/delta-incubator/dat [derive-macros]: https://doc.rust-lang.org/reference/procedural-macros.html [API Docs]: https://docs.rs/delta_kernel/latest/delta_kernel/ diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index 53d71296..38c7b39b 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -53,7 +53,7 @@ type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, chi /// to visitor methods /// TODO: Visit type information in struct field and null. This will likely involve using the schema /// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant -/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412 +/// issue: https://github.com/delta-incubator/delta-kernel-rs/issues/412 #[repr(C)] pub struct EngineExpressionVisitor { /// An opaque engine state pointer diff --git a/kernel/src/engine/parquet_row_group_skipping.rs b/kernel/src/engine/parquet_row_group_skipping.rs index c446121a..e2e58648 100644 --- a/kernel/src/engine/parquet_row_group_skipping.rs +++ b/kernel/src/engine/parquet_row_group_skipping.rs @@ -188,7 +188,7 @@ impl<'a> ParquetStatsProvider for RowGroupFilter<'a> { // actually exists in the table's logical schema, and that any necessary logical to // physical name mapping has been performed. Because we currently lack both the // validation and the name mapping support, we must disable this optimization for the - // time being. See https://github.com/delta-io/delta-kernel-rs/issues/434. + // time being. See https://github.com/delta-incubator/delta-kernel-rs/issues/434. return Some(self.get_parquet_rowcount_stat()).filter(|_| false); }; diff --git a/kernel/src/expressions/scalars.rs b/kernel/src/expressions/scalars.rs index a923f5e4..bbbabe17 100644 --- a/kernel/src/expressions/scalars.rs +++ b/kernel/src/expressions/scalars.rs @@ -30,7 +30,7 @@ impl ArrayData { } #[deprecated( - note = "These fields will be removed eventually and are unstable. See https://github.com/delta-io/delta-kernel-rs/issues/291" + note = "These fields will be removed eventually and are unstable. See https://github.com/delta-incubator/delta-kernel-rs/issues/291" )] pub fn array_elements(&self) -> &[Scalar] { &self.elements diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 0142c513..deeeee9c 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -11,8 +11,8 @@ //! [read-table-single-threaded] example (and for a more complex multi-threaded reader see the //! [read-table-multi-threaded] example). //! -//! [read-table-single-threaded]: https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-single-threaded -//! [read-table-multi-threaded]: https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-multi-threaded +//! [read-table-single-threaded]: https://github.com/delta-incubator/delta-kernel-rs/tree/main/kernel/examples/read-table-single-threaded +//! [read-table-multi-threaded]: https://github.com/delta-incubator/delta-kernel-rs/tree/main/kernel/examples/read-table-multi-threaded //! //! # Engine traits //! diff --git a/kernel/src/log_segment/tests.rs b/kernel/src/log_segment/tests.rs index ed029b00..a47a05f1 100644 --- a/kernel/src/log_segment/tests.rs +++ b/kernel/src/log_segment/tests.rs @@ -49,7 +49,7 @@ fn test_replay_for_metadata() { // // NOTE: Each checkpoint part is a single-row file -- guaranteed to produce one row group. // - // WARNING: https://github.com/delta-io/delta-kernel-rs/issues/434 -- We currently + // WARNING: https://github.com/delta-incubator/delta-kernel-rs/issues/434 -- We currently // read parts 1 and 5 (4 in all instead of 2) because row group skipping is disabled for // missing columns, but can still skip part 3 because has valid nullcount stats for P&M. assert_eq!(data.len(), 4); @@ -260,7 +260,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() { #[ignore] #[test] fn build_snapshot_with_missing_checkpoint_part_no_hint() { - // TODO: Handle checkpoints correctly so that this test passes: https://github.com/delta-io/delta-kernel-rs/issues/497 + // TODO: Handle checkpoints correctly so that this test passes: https://github.com/delta-incubator/delta-kernel-rs/issues/497 // Part 2 of 3 is missing from checkpoint 5. The Snapshot should be made of checkpoint // number 3 and commit files 4 to 7. diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index f3bc8a38..16151596 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -23,22 +23,14 @@ mod data_skipping; pub mod log_replay; pub mod state; -pub trait Scannable { - type ScanType; - fn build_scan( - self: Arc, - schema: Option, - predicate: Option, - ) -> DeltaResult; -} /// Builder to scan a snapshot of a table. -pub struct ScanBuilder { - scannable: Arc, +pub struct ScanBuilder { + snapshot: Arc, schema: Option, predicate: Option, } -impl std::fmt::Debug for ScanBuilder { +impl std::fmt::Debug for ScanBuilder { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { f.debug_struct("ScanBuilder") .field("schema", &self.schema) @@ -47,11 +39,11 @@ impl std::fmt::Debug for ScanBuilder { } } -impl ScanBuilder { +impl ScanBuilder { /// Create a new [`ScanBuilder`] instance. - pub fn new(scannable: impl Into>) -> Self { + pub fn new(snapshot: impl Into>) -> Self { Self { - scannable: scannable.into(), + snapshot: snapshot.into(), schema: None, predicate: None, } @@ -89,41 +81,32 @@ impl ScanBuilder { self } - pub fn build(self) -> DeltaResult { - self.scannable.build_scan(self.schema, self.predicate) - } -} - -impl Scannable for Snapshot { - type ScanType = Scan; /// Build the [`Scan`]. /// /// This does not scan the table at this point, but does do some work to ensure that the /// provided schema make sense, and to prepare some metadata that the scan will need. The /// [`Scan`] type itself can be used to fetch the files and associated metadata required to /// perform actual data reads. - fn build_scan( - self: Arc, - schema: Option, - predicate: Option, - ) -> DeltaResult { + pub fn build(self) -> DeltaResult { // if no schema is provided, use snapshot's entire schema (e.g. SELECT *) - let logical_schema = schema.unwrap_or_else(|| self.schema().clone().into()); + let logical_schema = self + .schema + .unwrap_or_else(|| self.snapshot.schema().clone().into()); let (all_fields, read_fields, have_partition_cols) = get_state_info( logical_schema.as_ref(), - &self.metadata().partition_columns, - self.column_mapping_mode, + &self.snapshot.metadata().partition_columns, + self.snapshot.column_mapping_mode, )?; let physical_schema = Arc::new(StructType::new(read_fields)); // important! before a read/write to the table we must check it is supported - self.protocol().ensure_read_supported()?; + self.snapshot.protocol().ensure_read_supported()?; Ok(Scan { - snapshot: self, + snapshot: self.snapshot, logical_schema, physical_schema, - predicate, + predicate: self.predicate, all_fields, have_partition_cols, }) @@ -810,7 +793,7 @@ mod tests { // Predicate over a logically valid but physically missing column. No data files should be // returned because the column is inferred to be all-null. // - // WARNING: https://github.com/delta-io/delta-kernel-rs/issues/434 - This + // WARNING: https://github.com/delta-incubator/delta-kernel-rs/issues/434 - This // optimization is currently disabled, so the one data file is still returned. let predicate = Arc::new(column_expr!("missing").lt(1000i64)); let scan = snapshot diff --git a/kernel/src/snapshot.rs b/kernel/src/snapshot.rs index 112fb1cb..874ef8d9 100644 --- a/kernel/src/snapshot.rs +++ b/kernel/src/snapshot.rs @@ -129,12 +129,12 @@ impl Snapshot { } /// Create a [`ScanBuilder`] for an `Arc`. - pub fn scan_builder(self: Arc) -> ScanBuilder { + pub fn scan_builder(self: Arc) -> ScanBuilder { ScanBuilder::new(self) } /// Consume this `Snapshot` to create a [`ScanBuilder`] - pub fn into_scan_builder(self) -> ScanBuilder { + pub fn into_scan_builder(self) -> ScanBuilder { ScanBuilder::new(self) } } diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index 96ddf842..02024a79 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -1033,7 +1033,7 @@ fn predicate_references_invalid_missing_column() -> Result<(), Box