Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Apr 18, 2024
1 parent 69fb300 commit 47edfde
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 24 deletions.
24 changes: 11 additions & 13 deletions src/functions/deltatable_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,19 @@ unique_ptr<MultiFileReader> DeltaMultiFileReader::CreateInstance() {
return std::move(make_uniq<DeltaMultiFileReader>());
}

bool DeltaMultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &files,
vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData &bind_data) {
auto &delta_table_snapshot = dynamic_cast<DeltaTableSnapshot&>(files);
bool DeltaMultiFileReader::Bind(vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData &bind_data) {

auto &delta_table_snapshot = dynamic_cast<DeltaTableSnapshot&>(*files);
delta_table_snapshot.Bind(return_types, names);

return true;
};

void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFileList &files,
vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData& bind_data) {
MultiFileReader::BindOptions(options, files, return_types, names, bind_data);
void DeltaMultiFileReader::BindOptions(vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData& bind_data) {
MultiFileReader::BindOptions(return_types, names, bind_data);

//! TODO Hacky asf
auto custom_bind_data = make_uniq<DeltaMultiFileReaderBindData>(dynamic_cast<DeltaTableSnapshot&>(files));
auto custom_bind_data = make_uniq<DeltaMultiFileReaderBindData>(dynamic_cast<DeltaTableSnapshot&>(*files));

auto demo_gen_col_opt = options.custom_options.find("delta_file_number");
if (demo_gen_col_opt != options.custom_options.end()) {
Expand All @@ -135,12 +133,12 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil
bind_data.custom_data = std::move(custom_bind_data);
}

void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderBindData &options,
const string &filename, const vector<string> &local_names,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context) {
MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names, global_column_ids, reader_data, context);
MultiFileReader::FinalizeBind(options, filename, local_names, global_types, global_names, global_column_ids, reader_data, context);


// The DeltaMultiFileReader specific finalization
Expand All @@ -153,13 +151,13 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio
}
}

unique_ptr<MultiFileList> DeltaMultiFileReader::GetFileList(ClientContext &context, const Value &input, const string &name,
void DeltaMultiFileReader::InitializeFiles(ClientContext &context, const Value &input, const string &name,
FileGlobOptions options) {
if (input.type() != LogicalType::VARCHAR) {
throw BinderException("'delta_scan' only supports single path");
}

return make_uniq<DeltaTableSnapshot>(input.GetValue<string>());
files = make_uniq<DeltaTableSnapshot>(input.GetValue<string>());
}

void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFileReaderBindData &bind_data,
Expand Down Expand Up @@ -195,15 +193,15 @@ void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFile
}
};

bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options, ClientContext &context) {
bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, ClientContext &context) {
auto loption = StringUtil::Lower(key);

if (loption == "delta_file_number") {
options.custom_options[loption] = val;
return true;
}

return MultiFileReader::ParseOption(key, val, options, context);
return MultiFileReader::ParseOption(key, val, context);
}

DeltaMultiFileReaderBindData::DeltaMultiFileReaderBindData(DeltaTableSnapshot & delta_table_snapshot): current_snapshot(delta_table_snapshot){
Expand Down
19 changes: 8 additions & 11 deletions src/include/functions/deltatable_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,19 @@ struct DeltaTableSnapshot : public MultiFileList {

struct DeltaMultiFileReader : public MultiFileReader {
static unique_ptr<MultiFileReader> CreateInstance();
//! Return a DeltaTableSnapshot
unique_ptr<MultiFileList> GetFileList(ClientContext &context, const Value &input, const string &name,
FileGlobOptions options = FileGlobOptions::DISALLOW_EMPTY) override;

//! Override the regular InitializeFiles; instead of producing a MultiFileList, we want a DeltaTableSnapshot
void InitializeFiles(ClientContext &context, const Value &input, const string &name,
FileGlobOptions options) override;

//! Override the regular parquet bind using the MultiFileReader Bind. The bind from these are what DuckDB's file
//! readers will try read
bool Bind(MultiFileReaderOptions &options, MultiFileList &files,
vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData &bind_data) override;
bool Bind(vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData &bind_data) override;

//! Override the Options bind. (could be superfluous?) can Bind and BindOptions be the same call?
void BindOptions(MultiFileReaderOptions &options, MultiFileList &files,
vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData& bind_data) override;
void BindOptions(vector<LogicalType> &return_types, vector<string> &names, MultiFileReaderBindData& bind_data) override;

void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
const string &filename, const vector<string> &local_names,
void FinalizeBind(const MultiFileReaderBindData &options, const string &filename, const vector<string> &local_names,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<column_t> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context) override;
Expand All @@ -86,8 +84,7 @@ struct DeltaMultiFileReader : public MultiFileReader {
const MultiFileReaderData &reader_data, DataChunk &chunk, const string &filename) override;

//! Override the ParseOption call to parse delta_scan specific options
bool ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options,
ClientContext &context) override;
bool ParseOption(const string &key, const Value &val, ClientContext &context) override;
};

struct DeltaMultiFileReaderBindData : public CustomMultiFileReaderBindData {
Expand Down

0 comments on commit 47edfde

Please sign in to comment.