Skip to content

Commit

Permalink
Revert "Merge branch 'table_changes_scan' of github.com:OussamaSaoudi…
Browse files Browse the repository at this point in the history
…-db/delta-kernel-rs into table_changes_scan"

This reverts commit 3f47e72, reversing
changes made to 1e15b9b.
  • Loading branch information
OussamaSaoudi-db committed Nov 21, 2024
1 parent 3f47e72 commit 604b6e2
Show file tree
Hide file tree
Showing 11 changed files with 107 additions and 124 deletions.
158 changes: 79 additions & 79 deletions CHANGELOG.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ edition = "2021"
homepage = "https://delta.io"
keywords = ["deltalake", "delta", "datalake"]
license = "Apache-2.0"
repository = "https://github.com/delta-io/delta-kernel-rs"
repository = "https://github.com/delta-incubator/delta-kernel-rs"
readme = "README.md"
version = "0.4.0"

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ Some design principles which should be considered:
[delta-github]: https://github.com/delta-io/delta
[java-kernel]: https://github.com/delta-io/delta/tree/master/kernel
[rustup]: https://rustup.rs
[architecture.md]: https://github.com/delta-io/delta-kernel-rs/tree/master/architecture.md
[architecture.md]: https://github.com/delta-incubator/delta-kernel-rs/tree/master/architecture.md
[dat]: https://github.com/delta-incubator/dat
[derive-macros]: https://doc.rust-lang.org/reference/procedural-macros.html
[API Docs]: https://docs.rs/delta_kernel/latest/delta_kernel/
Expand Down
2 changes: 1 addition & 1 deletion ffi/src/expressions/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, chi
/// to visitor methods
/// TODO: Visit type information in struct field and null. This will likely involve using the schema
/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant
/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412
/// issue: https://github.com/delta-incubator/delta-kernel-rs/issues/412
#[repr(C)]
pub struct EngineExpressionVisitor {
/// An opaque engine state pointer
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/engine/parquet_row_group_skipping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ impl<'a> ParquetStatsProvider for RowGroupFilter<'a> {
// actually exists in the table's logical schema, and that any necessary logical to
// physical name mapping has been performed. Because we currently lack both the
// validation and the name mapping support, we must disable this optimization for the
// time being. See https://github.com/delta-io/delta-kernel-rs/issues/434.
// time being. See https://github.com/delta-incubator/delta-kernel-rs/issues/434.
return Some(self.get_parquet_rowcount_stat()).filter(|_| false);
};

Expand Down
2 changes: 1 addition & 1 deletion kernel/src/expressions/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl ArrayData {
}

#[deprecated(
note = "These fields will be removed eventually and are unstable. See https://github.com/delta-io/delta-kernel-rs/issues/291"
note = "These fields will be removed eventually and are unstable. See https://github.com/delta-incubator/delta-kernel-rs/issues/291"
)]
pub fn array_elements(&self) -> &[Scalar] {
&self.elements
Expand Down
4 changes: 2 additions & 2 deletions kernel/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
//! [read-table-single-threaded] example (and for a more complex multi-threaded reader see the
//! [read-table-multi-threaded] example).
//!
//! [read-table-single-threaded]: https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-single-threaded
//! [read-table-multi-threaded]: https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-multi-threaded
//! [read-table-single-threaded]: https://github.com/delta-incubator/delta-kernel-rs/tree/main/kernel/examples/read-table-single-threaded
//! [read-table-multi-threaded]: https://github.com/delta-incubator/delta-kernel-rs/tree/main/kernel/examples/read-table-multi-threaded
//!
//! # Engine traits
//!
Expand Down
4 changes: 2 additions & 2 deletions kernel/src/log_segment/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fn test_replay_for_metadata() {
//
// NOTE: Each checkpoint part is a single-row file -- guaranteed to produce one row group.
//
// WARNING: https://github.com/delta-io/delta-kernel-rs/issues/434 -- We currently
// WARNING: https://github.com/delta-incubator/delta-kernel-rs/issues/434 -- We currently
// read parts 1 and 5 (4 in all instead of 2) because row group skipping is disabled for
// missing columns, but can still skip part 3 because has valid nullcount stats for P&M.
assert_eq!(data.len(), 4);
Expand Down Expand Up @@ -260,7 +260,7 @@ fn build_snapshot_with_bad_checkpoint_hint_fails() {
#[ignore]
#[test]
fn build_snapshot_with_missing_checkpoint_part_no_hint() {
// TODO: Handle checkpoints correctly so that this test passes: https://github.com/delta-io/delta-kernel-rs/issues/497
// TODO: Handle checkpoints correctly so that this test passes: https://github.com/delta-incubator/delta-kernel-rs/issues/497

// Part 2 of 3 is missing from checkpoint 5. The Snapshot should be made of checkpoint
// number 3 and commit files 4 to 7.
Expand Down
49 changes: 16 additions & 33 deletions kernel/src/scan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,14 @@ mod data_skipping;
pub mod log_replay;
pub mod state;

pub trait Scannable {
type ScanType;
fn build_scan(
self: Arc<Self>,
schema: Option<SchemaRef>,
predicate: Option<ExpressionRef>,
) -> DeltaResult<Self::ScanType>;
}
/// Builder to scan a snapshot of a table.
pub struct ScanBuilder<T: Scannable> {
scannable: Arc<T>,
pub struct ScanBuilder {
snapshot: Arc<Snapshot>,
schema: Option<SchemaRef>,
predicate: Option<ExpressionRef>,
}

impl<T: Scannable> std::fmt::Debug for ScanBuilder<T> {
impl std::fmt::Debug for ScanBuilder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
f.debug_struct("ScanBuilder")
.field("schema", &self.schema)
Expand All @@ -47,11 +39,11 @@ impl<T: Scannable> std::fmt::Debug for ScanBuilder<T> {
}
}

impl<T: Scannable> ScanBuilder<T> {
impl ScanBuilder {
/// Create a new [`ScanBuilder`] instance.
pub fn new(scannable: impl Into<Arc<T>>) -> Self {
pub fn new(snapshot: impl Into<Arc<Snapshot>>) -> Self {
Self {
scannable: scannable.into(),
snapshot: snapshot.into(),
schema: None,
predicate: None,
}
Expand Down Expand Up @@ -89,41 +81,32 @@ impl<T: Scannable> ScanBuilder<T> {
self
}

pub fn build(self) -> DeltaResult<T::ScanType> {
self.scannable.build_scan(self.schema, self.predicate)
}
}

impl Scannable for Snapshot {
type ScanType = Scan;
/// Build the [`Scan`].
///
/// This does not scan the table at this point, but does do some work to ensure that the
/// provided schema make sense, and to prepare some metadata that the scan will need. The
/// [`Scan`] type itself can be used to fetch the files and associated metadata required to
/// perform actual data reads.
fn build_scan(
self: Arc<Self>,
schema: Option<SchemaRef>,
predicate: Option<ExpressionRef>,
) -> DeltaResult<Scan> {
pub fn build(self) -> DeltaResult<Scan> {
// if no schema is provided, use snapshot's entire schema (e.g. SELECT *)
let logical_schema = schema.unwrap_or_else(|| self.schema().clone().into());
let logical_schema = self
.schema
.unwrap_or_else(|| self.snapshot.schema().clone().into());
let (all_fields, read_fields, have_partition_cols) = get_state_info(
logical_schema.as_ref(),
&self.metadata().partition_columns,
self.column_mapping_mode,
&self.snapshot.metadata().partition_columns,
self.snapshot.column_mapping_mode,
)?;
let physical_schema = Arc::new(StructType::new(read_fields));

// important! before a read/write to the table we must check it is supported
self.protocol().ensure_read_supported()?;
self.snapshot.protocol().ensure_read_supported()?;

Ok(Scan {
snapshot: self,
snapshot: self.snapshot,
logical_schema,
physical_schema,
predicate,
predicate: self.predicate,
all_fields,
have_partition_cols,
})
Expand Down Expand Up @@ -810,7 +793,7 @@ mod tests {
// Predicate over a logically valid but physically missing column. No data files should be
// returned because the column is inferred to be all-null.
//
// WARNING: https://github.com/delta-io/delta-kernel-rs/issues/434 - This
// WARNING: https://github.com/delta-incubator/delta-kernel-rs/issues/434 - This
// optimization is currently disabled, so the one data file is still returned.
let predicate = Arc::new(column_expr!("missing").lt(1000i64));
let scan = snapshot
Expand Down
4 changes: 2 additions & 2 deletions kernel/src/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,12 @@ impl Snapshot {
}

/// Create a [`ScanBuilder`] for an `Arc<Snapshot>`.
pub fn scan_builder(self: Arc<Self>) -> ScanBuilder<Self> {
pub fn scan_builder(self: Arc<Self>) -> ScanBuilder {
ScanBuilder::new(self)
}

/// Consume this `Snapshot` to create a [`ScanBuilder`]
pub fn into_scan_builder(self) -> ScanBuilder<Self> {
pub fn into_scan_builder(self) -> ScanBuilder {
ScanBuilder::new(self)
}
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/tests/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,7 @@ fn predicate_references_invalid_missing_column() -> Result<(), Box<dyn std::erro
// Attempted skipping over a logically valid but physically missing column. We should be able to
// skip the data file because the missing column is inferred to be all-null.
//
// WARNING: https://github.com/delta-io/delta-kernel-rs/issues/434 -- currently disabled.
// WARNING: https://github.com/delta-incubator/delta-kernel-rs/issues/434 -- currently disabled.
//
//let expected = vec![
// "+--------+",
Expand Down

0 comments on commit 604b6e2

Please sign in to comment.