Skip to content

Commit

Permalink
make paths relative again
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed May 7, 2024
1 parent a6f5912 commit 15b4b04
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 71 deletions.
25 changes: 17 additions & 8 deletions src/functions/deltatable_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "duckdb/planner/operator/logical_get.hpp"
#include "duckdb/main/extension_util.hpp"
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
#include "duckdb/common/local_file_system.hpp"
#include "duckdb/common/types/data_chunk.hpp"
#include "duckdb/parser/expression/constant_expression.hpp"
#include "duckdb/parser/expression/function_expression.hpp"
Expand Down Expand Up @@ -37,7 +38,7 @@ static void visit_callback(ffi::NullableCvoid engine_context, const struct ffi::
// printf("Fetch metadata for %s\n", path_string.c_str());

// First we append the file to our resolved files
context->resolved_files.push_back(DeltaTableSnapshot::CleanPath(path_string));
context->resolved_files.push_back(DeltaTableSnapshot::ToDuckDBPath(path_string));
context->metadata.push_back({});

D_ASSERT(context->resolved_files.size() == context->metadata.size());
Expand Down Expand Up @@ -121,20 +122,32 @@ static ffi::EngineInterfaceBuilder* CreateBuilder(ClientContext &context, const
return builder;
}

DeltaTableSnapshot::DeltaTableSnapshot(ClientContext &context_p, const string &path) : MultiFileList({path}, FileGlobOptions::ALLOW_EMPTY), context(context_p) {
DeltaTableSnapshot::DeltaTableSnapshot(ClientContext &context_p, const string &path) : MultiFileList({ToDeltaPath(path)}, FileGlobOptions::ALLOW_EMPTY), context(context_p) {
}

string DeltaTableSnapshot::GetPath() {
return GetPaths()[0];
}

string DeltaTableSnapshot::CleanPath(const string &raw_path) {
string DeltaTableSnapshot::ToDuckDBPath(const string &raw_path) {
if (StringUtil::StartsWith(raw_path, "file://")) {
return raw_path.substr(7);
}
return raw_path;
}

string DeltaTableSnapshot::ToDeltaPath(const string &raw_path) {
string path;
if (StringUtil::StartsWith(raw_path, "./")) {
LocalFileSystem fs;
path = fs.JoinPath(fs.GetWorkingDirectory(), raw_path.substr(2));
path = "file://" + path;
} else {
path = raw_path;
}
return path;
}

void DeltaTableSnapshot::Bind(vector<LogicalType> &return_types, vector<string> &names) {
if (!initialized) {
InitializeFiles();
Expand Down Expand Up @@ -299,8 +312,7 @@ bool DeltaMultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &
bind_data.required_columns.push_back({
"file_row_number",
LogicalType::BIGINT,
file_row_number_enabled,
bind_data.file_row_number_idx // TODO is this even set already?
file_row_number_enabled
});

return true;
Expand Down Expand Up @@ -423,7 +435,6 @@ void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFile
const MultiFileReaderData &reader_data, DataChunk &chunk) {
// Base class finalization first
MultiFileReader::FinalizeChunk(context, bind_data, reader_data, chunk);
chunk.Print();

D_ASSERT(reader_data.file_metadata.file_list);

Expand All @@ -433,8 +444,6 @@ void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFile

if (metadata.selection_vector.get() && chunk.size() != 0) {
idx_t select_count;
idx_t file_row_number_col_idx;

auto res = reader_data.required_column_map.find("file_row_number");
if (res == reader_data.required_column_map.end()) {
throw InternalException("Failed to find file_row_number column used to apply the deletion vector at");
Expand Down
3 changes: 2 additions & 1 deletion src/include/functions/deltatable_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ struct DeltaFileMetaData {
struct DeltaTableSnapshot : public MultiFileList {
DeltaTableSnapshot(ClientContext &context, const string &path);
string GetPath();
static string CleanPath(const string &raw_path);
static string ToDuckDBPath(const string &raw_path);
static string ToDeltaPath(const string &raw_path);

//! MultiFileList API
public:
Expand Down
12 changes: 6 additions & 6 deletions test/sql/dat/basic_append.test
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ mode skip
# Query the whole table
query II
SELECT letter, number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
----
d 4
e 5
Expand All @@ -30,7 +30,7 @@ c 3

query I
SELECT letter
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
----
d
e
Expand All @@ -40,7 +40,7 @@ c

query I
SELECT number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
----
4
5
Expand All @@ -53,7 +53,7 @@ FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel
# Now we add a filter that filters out one of the files
query II
SELECT letter, number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
WHERE number < 2
----
a 1
Expand All @@ -63,7 +63,7 @@ mode unskip
# Now we add a filter that filters out the other file
query II
SELECT letter, number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
WHERE number > 4
----
e 5
Expand All @@ -73,6 +73,6 @@ mode skip
# Now we add a filter that filters out all columns
query II
SELECT letter, number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
WHERE number > 6
----
4 changes: 2 additions & 2 deletions test/sql/dat/test_custom_delta_scan_param.test
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ require-env DAT_AVAILABLE

query II
SELECT letter, number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta')
----
d 4
e 5
Expand All @@ -29,7 +29,7 @@ mode skip
# Demo delta_file_number parameter (i.e. Delta extension provided)
query III
SELECT letter, number, delta_file_number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta', delta_file_number=1, file_row_number=1)
FROM delta_scan('./delta-kernel-rs/acceptance/tests/dat/out/reader_tests/generated/basic_append/delta', delta_file_number=1, file_row_number=1)
----
d 4 0
e 5 0
Expand Down
62 changes: 30 additions & 32 deletions test/sql/delta_scan_simple_part_generated.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require-env GENERATED_DATA_AVAILABLE
# With a projection and delta constant column
query III
SELECT delta_file_number, part, i
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake', delta_file_number=1);
FROM delta_scan('./data/generated/simple_partitioned/delta_lake', delta_file_number=1);
----
0 0 0
0 0 2
Expand All @@ -24,11 +24,9 @@ FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generat
1 1 7
1 1 9

mode skip

# Simplest case
query II
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake');
FROM delta_scan('./data/generated/simple_partitioned/delta_lake/');
----
0 0
2 0
Expand All @@ -44,7 +42,7 @@ FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generat
# With a projection
query II
SELECT part, i
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake');
FROM delta_scan('./data/generated/simple_partitioned/delta_lake');
----
0 0
0 2
Expand All @@ -60,7 +58,7 @@ FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generat
# With a projection and delta constant column
query III
SELECT delta_file_number, part, i
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake', delta_file_number=1);
FROM delta_scan('./data/generated/simple_partitioned/delta_lake', delta_file_number=1);
----
0 0 0
0 0 2
Expand All @@ -76,7 +74,7 @@ FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generat
# different permutation
query III
SELECT part, delta_file_number, i
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake', delta_file_number=1);
FROM delta_scan('./data/generated/simple_partitioned/delta_lake', delta_file_number=1);
----
0 0 0
0 0 2
Expand All @@ -92,7 +90,7 @@ FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generat
# different permutation again
query III
SELECT part, i, delta_file_number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake', delta_file_number=1);
FROM delta_scan('./data/generated/simple_partitioned/delta_lake', delta_file_number=1);
----
0 0 0
0 2 0
Expand All @@ -107,32 +105,32 @@ FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generat

# With a projection and both a base multifilereader column and the file_row_number option
query IIII
SELECT filename, part, i, file_row_number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake', file_row_number=1, filename=1);
SELECT parse_filename(filename), part, i, file_row_number
FROM delta_scan('./data/generated/simple_partitioned/delta_lake', file_row_number=1, filename=1);
----
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 0 0
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 2 1
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 4 2
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 6 3
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 8 4
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 1 0
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 3 1
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 5 2
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 7 3
/Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 9 4
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 0 0
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 2 1
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 4 2
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 6 3
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 8 4
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 1 0
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 3 1
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 5 2
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 7 3
0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 9 4

# Final boss: add the delta_file_number to the mix
query IIIII
SELECT delta_file_number, filename, part, i, file_row_number
FROM delta_scan('file:///Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake', file_row_number=1, filename=1, delta_file_number=1);
SELECT delta_file_number, parse_filename(filename), part, i, file_row_number
FROM delta_scan('./data/generated/simple_partitioned/delta_lake', file_row_number=1, filename=1, delta_file_number=1);
----
0 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 0 0
0 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 2 1
0 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 4 2
0 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 6 3
0 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=0/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 8 4
1 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 1 0
1 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 3 1
1 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 5 2
1 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 7 3
1 /Users/sam/Development/delta-kernel-testing/data/generated/simple_partitioned/delta_lake/part=1/0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 9 4
0 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 0 0
0 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 2 1
0 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 4 2
0 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 6 3
0 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 0 8 4
1 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 1 0
1 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 3 1
1 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 5 2
1 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 7 3
1 0-acbdd600-ea69-4180-81c7-530d09bcfcfe-0.parquet 1 9 4
Loading

0 comments on commit 15b4b04

Please sign in to comment.