explanatory comments

parseablehq · Oct 3, 2024 · 23fea12 · 23fea12
1 parent f36ae17
commit 23fea12
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 2 deletions.
diff --git a/server/Cargo.toml b/server/Cargo.toml
@@ -15,9 +15,8 @@ arrow-array = { version = "53.0.0" }
 arrow-json = "53.0.0"
 arrow-ipc = { version = "53.0.0", features = ["zstd"] }
 arrow-select = "53.0.0"
-# datafusion = { git = "https://github.com/apache/datafusion.git", rev = "a64df83502821f18067fb4ff65dd217815b305c9" }
 datafusion = "42.0.0"
-object_store = { version = "0.11.0", features = ["cloud", "aws"] } # cannot update object_store as datafusion has not caught up
+object_store = { version = "0.11.0", features = ["cloud", "aws"] }
 parquet = "53.0.0"
 arrow-flight = { version = "53.0.0", features = [ "tls" ] }
 tonic = {version = "0.12.1", features = ["tls", "transport", "gzip", "zstd"] }

diff --git a/server/src/cli.rs b/server/src/cli.rs
@@ -421,6 +421,9 @@ impl Cli {
  .help("Set a fixed memory limit for query"),
  )
  .arg(
+ // RowGroupSize controls the number of rows present in one row group
+ // More rows = better compression but HIGHER Memory consumption during read/write
+ // 1048576 is the default value for DataFusion 
  Arg::new(Self::ROW_GROUP_SIZE)
  .long(Self::ROW_GROUP_SIZE)
  .env("P_PARQUET_ROW_GROUP_SIZE")

diff --git a/server/src/query.rs b/server/src/query.rs
@@ -86,9 +86,20 @@ impl Query {
  .with_prefer_existing_sort(true)
  .with_round_robin_repartition(true);
 
+ // For more details refer https://datafusion.apache.org/user-guide/configs.html
+
+ // Reduce the number of rows read (if possible)
  config.options_mut().execution.parquet.enable_page_index = true;
+
+ // Pushdown filters allows DF to push the filters as far down in the plan as possible
+ // and thus, reducing the number of rows decoded
  config.options_mut().execution.parquet.pushdown_filters = true;
+
+ // Reorder filters allows DF to decide the order of filters minimizing the cost of filter evaluation
  config.options_mut().execution.parquet.reorder_filters = true;
+
+ // Enable StringViewArray
+ // https://www.influxdata.com/blog/faster-queries-with-stringview-part-one-influxdb/
  config
  .options_mut()
  .execution