Skip to content

Commit

Permalink
explanatory comments
Browse files Browse the repository at this point in the history
  • Loading branch information
parmesant committed Oct 3, 2024
1 parent f36ae17 commit 23fea12
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 2 deletions.
3 changes: 1 addition & 2 deletions server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ arrow-array = { version = "53.0.0" }
arrow-json = "53.0.0"
arrow-ipc = { version = "53.0.0", features = ["zstd"] }
arrow-select = "53.0.0"
# datafusion = { git = "https://github.com/apache/datafusion.git", rev = "a64df83502821f18067fb4ff65dd217815b305c9" }
datafusion = "42.0.0"
object_store = { version = "0.11.0", features = ["cloud", "aws"] } # cannot update object_store as datafusion has not caught up
object_store = { version = "0.11.0", features = ["cloud", "aws"] }
parquet = "53.0.0"
arrow-flight = { version = "53.0.0", features = [ "tls" ] }
tonic = {version = "0.12.1", features = ["tls", "transport", "gzip", "zstd"] }
Expand Down
3 changes: 3 additions & 0 deletions server/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,9 @@ impl Cli {
.help("Set a fixed memory limit for query"),
)
.arg(
// RowGroupSize controls the number of rows present in one row group
// More rows = better compression but HIGHER Memory consumption during read/write
// 1048576 is the default value for DataFusion
Arg::new(Self::ROW_GROUP_SIZE)
.long(Self::ROW_GROUP_SIZE)
.env("P_PARQUET_ROW_GROUP_SIZE")
Expand Down
11 changes: 11 additions & 0 deletions server/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,20 @@ impl Query {
.with_prefer_existing_sort(true)
.with_round_robin_repartition(true);

// For more details refer https://datafusion.apache.org/user-guide/configs.html

// Reduce the number of rows read (if possible)
config.options_mut().execution.parquet.enable_page_index = true;

// Pushdown filters allows DF to push the filters as far down in the plan as possible
// and thus, reducing the number of rows decoded
config.options_mut().execution.parquet.pushdown_filters = true;

// Reorder filters allows DF to decide the order of filters minimizing the cost of filter evaluation
config.options_mut().execution.parquet.reorder_filters = true;

// Enable StringViewArray
// https://www.influxdata.com/blog/faster-queries-with-stringview-part-one-influxdb/
config
.options_mut()
.execution
Expand Down

0 comments on commit 23fea12

Please sign in to comment.