diff --git a/server/Cargo.toml b/server/Cargo.toml index 1f5b089d..1fce35b0 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -15,9 +15,8 @@ arrow-array = { version = "53.0.0" } arrow-json = "53.0.0" arrow-ipc = { version = "53.0.0", features = ["zstd"] } arrow-select = "53.0.0" -# datafusion = { git = "https://github.com/apache/datafusion.git", rev = "a64df83502821f18067fb4ff65dd217815b305c9" } datafusion = "42.0.0" -object_store = { version = "0.11.0", features = ["cloud", "aws"] } # cannot update object_store as datafusion has not caught up +object_store = { version = "0.11.0", features = ["cloud", "aws"] } parquet = "53.0.0" arrow-flight = { version = "53.0.0", features = [ "tls" ] } tonic = {version = "0.12.1", features = ["tls", "transport", "gzip", "zstd"] } diff --git a/server/src/cli.rs b/server/src/cli.rs index 20498346..982a2a76 100644 --- a/server/src/cli.rs +++ b/server/src/cli.rs @@ -421,6 +421,9 @@ impl Cli { .help("Set a fixed memory limit for query"), ) .arg( + // RowGroupSize controls the number of rows present in one row group + // More rows = better compression but HIGHER Memory consumption during read/write + // 1048576 is the default value for DataFusion Arg::new(Self::ROW_GROUP_SIZE) .long(Self::ROW_GROUP_SIZE) .env("P_PARQUET_ROW_GROUP_SIZE") diff --git a/server/src/query.rs b/server/src/query.rs index e00b8c96..60a7b90a 100644 --- a/server/src/query.rs +++ b/server/src/query.rs @@ -86,9 +86,20 @@ impl Query { .with_prefer_existing_sort(true) .with_round_robin_repartition(true); + // For more details refer https://datafusion.apache.org/user-guide/configs.html + + // Reduce the number of rows read (if possible) config.options_mut().execution.parquet.enable_page_index = true; + + // Pushdown filters allows DF to push the filters as far down in the plan as possible + // and thus, reducing the number of rows decoded config.options_mut().execution.parquet.pushdown_filters = true; + + // Reorder filters allows DF to decide the order of filters minimizing the cost of filter evaluation config.options_mut().execution.parquet.reorder_filters = true; + + // Enable StringViewArray + // https://www.influxdata.com/blog/faster-queries-with-stringview-part-one-influxdb/ config .options_mut() .execution