diff --git a/Cargo.lock b/Cargo.lock
index f58d0c17..716c79b5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2850,6 +2850,7 @@ dependencies = [
  "pretty-xmlish",
  "serde",
  "serde_with",
+ "test-case",
  "tracing",
  "tracing-subscriber",
  "union-find",
diff --git a/dev_scripts/which_queries_work.sh b/dev_scripts/which_queries_work.sh
index b5bc553c..bf9b4db8 100755
--- a/dev_scripts/which_queries_work.sh
+++ b/dev_scripts/which_queries_work.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 benchmark_name=$1
-USAGE="Usage: $0 [job|tpch]"
+USAGE="Usage: $0 [job|joblight|tpch]"
 
 if [ $# -ne 1 ]; then
     echo >&2 $USAGE
diff --git a/optd-datafusion-repr/Cargo.toml b/optd-datafusion-repr/Cargo.toml
index c17b9d18..efe7cf83 100644
--- a/optd-datafusion-repr/Cargo.toml
+++ b/optd-datafusion-repr/Cargo.toml
@@ -26,3 +26,4 @@ serde = { version = "1.0", features = ["derive"] }
 serde_with = {version = "3.7.0", features = ["json"]}
 bincode = "1.3.3"
 union-find = { git = "https://github.com/Gun9niR/union-find-rs.git", rev = "794821514f7daefcbb8d5f38ef04e62fc18b5665" }
+test-case = "3.3"
diff --git a/optd-datafusion-repr/src/cost/base_cost.rs b/optd-datafusion-repr/src/cost/base_cost.rs
index 922fb825..1ec44f70 100644
--- a/optd-datafusion-repr/src/cost/base_cost.rs
+++ b/optd-datafusion-repr/src/cost/base_cost.rs
@@ -318,6 +318,7 @@ mod tests {
     pub const TABLE1_NAME: &str = "table1";
     pub const TABLE2_NAME: &str = "table2";
     pub const TABLE3_NAME: &str = "table3";
+    pub const TABLE4_NAME: &str = "table4";
 
     // one column is sufficient for all filter selectivity tests
     pub fn create_one_column_cost_model(per_column_stats: TestPerColumnStats) -> TestOptCostModel {
@@ -379,6 +380,49 @@ mod tests {
         )
     }
 
+    /// Create a cost model with three columns, one for each table. Each column has 100 values.
+    pub fn create_four_table_cost_model(
+        tbl1_per_column_stats: TestPerColumnStats,
+        tbl2_per_column_stats: TestPerColumnStats,
+        tbl3_per_column_stats: TestPerColumnStats,
+        tbl4_per_column_stats: TestPerColumnStats,
+    ) -> TestOptCostModel {
+        OptCostModel::new(
+            vec![
+                (
+                    String::from(TABLE1_NAME),
+                    TableStats::new(
+                        100,
+                        vec![(vec![0], tbl1_per_column_stats)].into_iter().collect(),
+                    ),
+                ),
+                (
+                    String::from(TABLE2_NAME),
+                    TableStats::new(
+                        100,
+                        vec![(vec![0], tbl2_per_column_stats)].into_iter().collect(),
+                    ),
+                ),
+                (
+                    String::from(TABLE3_NAME),
+                    TableStats::new(
+                        100,
+                        vec![(vec![0], tbl3_per_column_stats)].into_iter().collect(),
+                    ),
+                ),
+                (
+                    String::from(TABLE4_NAME),
+                    TableStats::new(
+                        100,
+                        vec![(vec![0], tbl4_per_column_stats)].into_iter().collect(),
+                    ),
+                ),
+            ]
+            .into_iter()
+            .collect(),
+        )
+    }
+
     /// We need custom row counts because some join algorithms rely on the row cnt
     pub fn create_two_table_cost_model_custom_row_cnts(
         tbl1_per_column_stats: TestPerColumnStats,
diff --git a/optd-datafusion-repr/src/cost/base_cost/join.rs b/optd-datafusion-repr/src/cost/base_cost/join.rs
index 1c2fc13e..238313ea 100644
--- a/optd-datafusion-repr/src/cost/base_cost/join.rs
+++ b/optd-datafusion-repr/src/cost/base_cost/join.rs
@@ -1,4 +1,4 @@
-use std::ops::ControlFlow;
+use std::collections::HashSet;
 
 use itertools::Itertools;
 use optd_core::{
@@ -6,7 +6,6 @@ use optd_core::{
     cost::Cost,
 };
 use serde::{de::DeserializeOwned, Serialize};
-use union_find::{disjoint_sets::DisjointSets, union_find::UnionFind};
 
 use crate::{
     cost::base_cost::{
@@ -345,115 +344,95 @@ impl<
         selectivity
     }
 
-    /// Given a set of equality predicates P that define N equal columns, find the selectivity of
-    /// the most selective N - 1 predicates that "touches" all the columns.
+    /// Given a set of N columns involved in a multi-equality, find the total selectivity
+    /// of the multi-equality.
     ///
-    /// We solve the problem using MST (Minimum Spanning Tree), where the columns are nodes and the
-    /// predicates are undirected edges. Since all the columns are equal, the graph is connected.
-    fn get_join_selecitivity_from_most_selective_predicates(
+    /// This is a generalization of get_join_selectivity_from_on_col_ref_pair().
+    fn get_join_selectivity_from_most_selective_columns(
         &self,
-        predicates: Vec<EqPredicate>,
-        num_cols: usize,
+        base_col_refs: HashSet<BaseTableColumnRef>,
     ) -> f64 {
-        let mut acc_sel = 1.0;
-        let mut num_picked_predicates = 0;
-        let mut disjoint_sets = DisjointSets::new();
-
-        // Use Kruskal to compute MST.
-        // Step 1: sort predicates by selectivity in ascending order.
-        let mut sorted_predicates = predicates
+        assert!(base_col_refs.len() > 1);
+        let num_base_col_refs = base_col_refs.len();
+        base_col_refs
             .into_iter()
-            .map(|p| {
-                let sel: f64 = self.get_join_selectivity_from_on_col_ref_pair(
-                    &p.left.clone().into(),
-                    &p.right.clone().into(),
-                );
-                (p, sel)
+            .map(|base_col_ref| {
+                match self.get_column_comb_stats(&base_col_ref.table, &[base_col_ref.col_idx]) {
+                    Some(per_col_stats) => per_col_stats.ndistinct,
+                    None => DEFAULT_NUM_DISTINCT,
+                }
             })
-            .sorted_by(|(_, sel1), (_, sel2)| sel1.partial_cmp(sel2).unwrap());
-
-        // Step 2: pick predicates until all columns are "connected" by the predicates.
-        sorted_predicates.try_for_each(|(p, sel)| {
-            if !disjoint_sets.contains(&p.left) {
-                disjoint_sets.make_set(p.left.clone()).unwrap();
-            }
-            if !disjoint_sets.contains(&p.right) {
-                disjoint_sets.make_set(p.right.clone()).unwrap();
-            }
-            if !disjoint_sets.same_set(&p.left, &p.right).unwrap() {
-                acc_sel *= sel;
-                num_picked_predicates += 1;
-                disjoint_sets.union(&p.left, &p.right).unwrap();
-            }
-            if num_picked_predicates == num_cols - 1 {
-                ControlFlow::Break(())
-            } else {
-                ControlFlow::Continue(())
-            }
-        });
-        debug_assert_eq!(
-            num_picked_predicates,
-            num_cols - 1,
-            "we should have picked N - 1 predicates"
-        );
-        debug_assert_eq!(
-            disjoint_sets.num_sets(),
-            1,
-            "all columns should be connected by the predicates"
-        );
-        debug_assert_eq!(
-            disjoint_sets.num_items(),
-            num_cols,
-            "all columns should be connected by the predicates"
-        );
-        acc_sel
+            .map(|ndistinct| 1.0 / ndistinct as f64)
+            .sorted_by(|a, b| {
+                a.partial_cmp(b)
+                    .expect("No floats should be NaN since n-distinct is never 0")
+            })
+            .take(num_base_col_refs - 1)
+            .product()
     }
 
-    /// A predicate set contains "redundant" predicates if some of them can be expressed with the rest.
-    /// E.g. In { A = B, B = C, A = C }, one of the predicates is redundant.
-    /// In this case, we want to pick the most selective predicates that touch all the columns
-    /// that this set of predicates touches.
+    /// A predicate set defines a "multi-equality graph", which is an unweighted undirected graph. The
+    /// nodes are columns while edges are predicates. The old graph is defined by `past_eq_columns`
+    /// while the `predicate` is the new addition to this graph. This unweighted undirected graph
+    /// consists of a number of connected components, where each connected component represents columns
+    /// that are set to be equal to each other. Single nodes not connected to anything are considered
+    /// standalone connected components.
     ///
-    /// If we have N columns that are equal, and the set of equality predicates P that defines the
-    /// equalities (|P| >= N - 1), we pick the N - 1 most selective predicates (denoted P') that
-    /// define the equalities by computing the MST of the graph where the columns are nodes and the
-    /// predicates are edges (see `get_join_selecitivity_from_most_selective_predicates` for
-    /// implementation).
+    /// The selectivity of each connected component of N nodes is equal to the product of 1/ndistinct of
+    /// the N-1 nodes with the highest ndistinct values. You can see this if you imagine that all columns
+    /// being joined are unique columns and that they follow the inclusion principle (every element of the
+    /// smaller tables is present in the larger tables). When these assumptions are not true, the selectivity
+    /// may not be completely accurate. However, it is still fairly accurate.
     ///
-    /// But since child has already picked some predicates which might not be the most selective
-    /// (because it has not seen the most selective ones), when we encounter a potentially more
-    /// selective `predicate` (in the parameter) and a set of previously seen predicates
-    /// `past_eq_columns`, `predicate` produces a selectivity adjustment factor, which is the
-    /// multiplied selectivity of the most selective N - 1 predicate among `past_eq_columns` union
-    /// `predicate` divided by the selectivity of the `past_eq_columns`.
+    /// However, we cannot simply add `predicate` to the multi-equality graph and compute the selectivity of
+    /// the entire connected component, because this would be "double counting" a lot of nodes. The join(s)
+    /// before this join would already have a selectivity value. Thus, we compute the selectivity of the
+    /// join(s) before this join (the first block of the function) and then the selectivity of the connected
+    /// component after this join. The quotient is the "adjustment" factor.
     ///
     /// NOTE: This function modifies `past_eq_columns` by adding `predicate` to it.
-    fn get_join_selectivity_adjustment_from_redundant_predicates(
+    fn get_join_selectivity_adjustment_when_adding_to_multi_equality_graph(
         &self,
-        predicate: EqPredicate,
+        predicate: &EqPredicate,
         past_eq_columns: &mut EqBaseTableColumnSets,
     ) -> f64 {
-        let left = predicate.left.clone();
-        // Compute the selectivity of the most selective N - 1 predicates.
+        // To find the adjustment, we need to know the selectivity of the graph before `predicate` is added.
+        //
+        // There are two cases: (1) adding `predicate` does not change the # of connected components, and
+        // (2) adding `predicate` reduces the # of connected by 1. Note that columns not involved in any
+        // predicates are considered a part of the graph and are a connected component on their own.
         let children_pred_sel = {
-            let predicates = past_eq_columns.find_predicates_for_eq_column_set(&left);
-            self.get_join_selecitivity_from_most_selective_predicates(
-                predicates,
-                past_eq_columns.num_eq_columns(&left),
-            )
+            if past_eq_columns.is_eq(&predicate.left, &predicate.right) {
+                self.get_join_selectivity_from_most_selective_columns(
+                    past_eq_columns.find_cols_for_eq_column_set(&predicate.left),
+                )
+            } else {
+                let left_sel = if past_eq_columns.contains(&predicate.left) {
+                    self.get_join_selectivity_from_most_selective_columns(
+                        past_eq_columns.find_cols_for_eq_column_set(&predicate.left),
+                    )
+                } else {
+                    1.0
+                };
+                let right_sel = if past_eq_columns.contains(&predicate.right) {
+                    self.get_join_selectivity_from_most_selective_columns(
+                        past_eq_columns.find_cols_for_eq_column_set(&predicate.right),
+                    )
+                } else {
+                    1.0
+                };
+                left_sel * right_sel
+            }
         };
-        // Add predicate to past_eq_columns.
-        past_eq_columns.add_predicate(predicate);
-        // Repeat the same process with the new predicate.
+
+        // Add predicate to past_eq_columns and compute the selectivity of the connected component it creates.
+        past_eq_columns.add_predicate(predicate.clone());
         let new_pred_sel = {
-            let predicates = past_eq_columns.find_predicates_for_eq_column_set(&left);
-            self.get_join_selecitivity_from_most_selective_predicates(
-                predicates,
-                past_eq_columns.num_eq_columns(&left),
-            )
+            let cols = past_eq_columns.find_cols_for_eq_column_set(&predicate.left);
+            self.get_join_selectivity_from_most_selective_columns(cols)
         };
 
-        // Compute division of MSTs as the selectivity.
+        // Compute the adjustment factor.
         new_pred_sel / children_pred_sel
     }
 
@@ -492,14 +471,11 @@ impl<
                     (left_col_ref, right_col_ref)
                 {
                     let predicate = EqPredicate::new(left.clone(), right.clone());
-                    if past_eq_columns.is_eq(left, right) {
-                        return self.get_join_selectivity_adjustment_from_redundant_predicates(
-                            predicate,
+                    return self
+                        .get_join_selectivity_adjustment_when_adding_to_multi_equality_graph(
+                            &predicate,
                             &mut past_eq_columns,
                         );
-                    } else {
-                        past_eq_columns.add_predicate(predicate);
-                    }
                 }
 
                 self.get_join_selectivity_from_on_col_ref_pair(left_col_ref, right_col_ref)
@@ -510,6 +486,8 @@ impl<
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashSet;
+
     use optd_core::rel_node::Value;
 
     use crate::{
@@ -1131,13 +1109,27 @@ mod tests {
         );
     }
 
-    // Ensure that in `select t1, t2, t3 where t1.a = t2.a and t2.a = t3.a and t1.a = t3.a`,
-    // even if the first join picks the most selective predicate (which should have been discarded,
-    // since we need to ensure the most selective N - 1 predicates are picked), the selectivity is
-    // adjusted in the second join so that the final selectivity is the product of the
-    // selectivities of the 2 most selective redicates.
-    #[test]
-    fn test_inner_redundant_predicate() {
+    /// Test all possible permutations of three-table joins.
+    /// A three-table join consists of at least two joins. `join1_on_cond` is the condition of the first
+    ///   join. There can only be one condition because only two tables are involved at the time of the
+    ///   first join.
+    #[test_case::test_case(&[(0, 1)])]
+    #[test_case::test_case(&[(0, 2)])]
+    #[test_case::test_case(&[(1, 2)])]
+    #[test_case::test_case(&[(0, 1), (0, 2)])]
+    #[test_case::test_case(&[(0, 1), (1, 2)])]
+    #[test_case::test_case(&[(0, 2), (1, 2)])]
+    #[test_case::test_case(&[(0, 1), (0, 2), (1, 2)])]
+    fn test_three_table_join_for_initial_join_on_conds(initial_join_on_conds: &[(usize, usize)]) {
+        assert!(
+            !initial_join_on_conds.is_empty(),
+            "initial_join_on_conds should be non-empty"
+        );
+        assert_eq!(
+            initial_join_on_conds.len(),
+            initial_join_on_conds.iter().collect::<HashSet<_>>().len(),
+            "initial_join_on_conds shouldn't contain duplicates"
+        );
         let cost_model = create_three_table_cost_model(
             TestPerColumnStats::new(
                 TestMostCommonValues::empty(),
@@ -1147,50 +1139,175 @@ mod tests {
             ),
             TestPerColumnStats::new(
                 TestMostCommonValues::empty(),
-                4,
+                3,
                 0.0,
                 Some(TestDistribution::empty()),
             ),
             TestPerColumnStats::new(
                 TestMostCommonValues::empty(),
-                5,
+                4,
                 0.0,
                 Some(TestDistribution::empty()),
             ),
         );
-        let col01_sel = 0.25;
-        let col02_sel = 0.2;
-        let col12_sel = 0.2;
-        let col0_base_ref = BaseTableColumnRef {
-            table: String::from(TABLE1_NAME),
-            col_idx: 0,
-        };
-        let col1_base_ref = BaseTableColumnRef {
-            table: String::from(TABLE2_NAME),
-            col_idx: 0,
-        };
-        let col2_base_ref = BaseTableColumnRef {
-            table: String::from(TABLE3_NAME),
-            col_idx: 0,
-        };
-        let col0_ref: ColumnRef = col0_base_ref.clone().into();
-        let col1_ref: ColumnRef = col1_base_ref.clone().into();
-        let col2_ref: ColumnRef = col2_base_ref.clone().into();
+        let col_base_refs = vec![
+            BaseTableColumnRef {
+                table: String::from(TABLE1_NAME),
+                col_idx: 0,
+            },
+            BaseTableColumnRef {
+                table: String::from(TABLE2_NAME),
+                col_idx: 0,
+            },
+            BaseTableColumnRef {
+                table: String::from(TABLE3_NAME),
+                col_idx: 0,
+            },
+        ];
+        let col_refs: Vec<ColumnRef> = col_base_refs
+            .clone()
+            .into_iter()
+            .map(|col_base_ref| col_base_ref.into())
+            .collect();
 
         let mut eq_columns = EqBaseTableColumnSets::new();
-        eq_columns.add_predicate(EqPredicate::new(col0_base_ref, col1_base_ref));
+        for initial_join_on_cond in initial_join_on_conds {
+            eq_columns.add_predicate(EqPredicate::new(
+                col_base_refs[initial_join_on_cond.0].clone(),
+                col_base_refs[initial_join_on_cond.1].clone(),
+            ));
+        }
+        let initial_selectivity = {
+            if initial_join_on_conds.len() == 1 {
+                let initial_join_on_cond = initial_join_on_conds.first().unwrap();
+                if initial_join_on_cond == &(0, 1) {
+                    1.0 / 3.0
+                } else if initial_join_on_cond == &(0, 2) || initial_join_on_cond == &(1, 2) {
+                    1.0 / 4.0
+                } else {
+                    panic!();
+                }
+            } else {
+                1.0 / 12.0
+            }
+        };
         let semantic_correlation = SemanticCorrelation::new(eq_columns);
-        let column_refs = GroupColumnRefs::new_test(
-            vec![col0_ref.clone(), col1_ref.clone(), col2_ref.clone()],
-            Some(semantic_correlation),
-        );
+        let column_refs = GroupColumnRefs::new_test(col_refs, Some(semantic_correlation));
 
+        // Try all join conditions of the final join which would lead to all three tables being joined.
+        let eq0and1 = bin_op(BinOpType::Eq, col_ref(0), col_ref(1));
         let eq0and2 = bin_op(BinOpType::Eq, col_ref(0), col_ref(2));
         let eq1and2 = bin_op(BinOpType::Eq, col_ref(1), col_ref(2));
-        let expr_tree = log_op(LogOpType::And, vec![eq0and2, eq1and2]);
-        assert_approx_eq::assert_approx_eq!(
-            test_get_join_selectivity(&cost_model, false, JoinType::Inner, expr_tree, &column_refs),
-            col02_sel * (col02_sel * col12_sel) / (col01_sel * col12_sel)
+        let and_01_02 = log_op(LogOpType::And, vec![eq0and1.clone(), eq0and2.clone()]);
+        let and_01_12 = log_op(LogOpType::And, vec![eq0and1.clone(), eq1and2.clone()]);
+        let and_02_12 = log_op(LogOpType::And, vec![eq0and2.clone(), eq1and2.clone()]);
+        let and_01_02_12 = log_op(
+            LogOpType::And,
+            vec![eq0and1.clone(), eq0and2.clone(), eq1and2.clone()],
         );
+        let mut join2_expr_trees = vec![and_01_02, and_01_12, and_02_12, and_01_02_12];
+        if initial_join_on_conds.len() == 1 {
+            let initial_join_on_cond = initial_join_on_conds.first().unwrap();
+            if initial_join_on_cond == &(0, 1) {
+                join2_expr_trees.push(eq0and2);
+                join2_expr_trees.push(eq1and2);
+            } else if initial_join_on_cond == &(0, 2) {
+                join2_expr_trees.push(eq0and1);
+                join2_expr_trees.push(eq1and2);
+            } else if initial_join_on_cond == &(1, 2) {
+                join2_expr_trees.push(eq0and1);
+                join2_expr_trees.push(eq0and2);
+            } else {
+                panic!();
+            }
+        }
+        for expr_tree in join2_expr_trees {
+            let overall_selectivity = initial_selectivity
+                * test_get_join_selectivity(
+                    &cost_model,
+                    false,
+                    JoinType::Inner,
+                    expr_tree.clone(),
+                    &column_refs,
+                );
+            assert_approx_eq::assert_approx_eq!(overall_selectivity, 1.0 / 12.0);
+        }
+    }
+
+    #[test]
+    fn test_join_which_connects_two_components_together() {
+        let cost_model = create_four_table_cost_model(
+            TestPerColumnStats::new(
+                TestMostCommonValues::empty(),
+                2,
+                0.0,
+                Some(TestDistribution::empty()),
+            ),
+            TestPerColumnStats::new(
+                TestMostCommonValues::empty(),
+                3,
+                0.0,
+                Some(TestDistribution::empty()),
+            ),
+            TestPerColumnStats::new(
+                TestMostCommonValues::empty(),
+                4,
+                0.0,
+                Some(TestDistribution::empty()),
+            ),
+            TestPerColumnStats::new(
+                TestMostCommonValues::empty(),
+                5,
+                0.0,
+                Some(TestDistribution::empty()),
+            ),
+        );
+        let col_base_refs = vec![
+            BaseTableColumnRef {
+                table: String::from(TABLE1_NAME),
+                col_idx: 0,
+            },
+            BaseTableColumnRef {
+                table: String::from(TABLE2_NAME),
+                col_idx: 0,
+            },
+            BaseTableColumnRef {
+                table: String::from(TABLE3_NAME),
+                col_idx: 0,
+            },
+            BaseTableColumnRef {
+                table: String::from(TABLE4_NAME),
+                col_idx: 0,
+            },
+        ];
+        let col_refs: Vec<ColumnRef> = col_base_refs
+            .clone()
+            .into_iter()
+            .map(|col_base_ref| col_base_ref.into())
+            .collect();
+
+        let mut eq_columns = EqBaseTableColumnSets::new();
+        eq_columns.add_predicate(EqPredicate::new(
+            col_base_refs[0].clone(),
+            col_base_refs[1].clone(),
+        ));
+        eq_columns.add_predicate(EqPredicate::new(
+            col_base_refs[2].clone(),
+            col_base_refs[3].clone(),
+        ));
+        let initial_selectivity = 1.0 / (3.0 * 5.0);
+        let semantic_correlation = SemanticCorrelation::new(eq_columns);
+        let column_refs = GroupColumnRefs::new_test(col_refs, Some(semantic_correlation));
+
+        let eq1and2 = bin_op(BinOpType::Eq, col_ref(1), col_ref(2));
+        let overall_selectivity = initial_selectivity
+            * test_get_join_selectivity(
+                &cost_model,
+                false,
+                JoinType::Inner,
+                eq1and2.clone(),
+                &column_refs,
+            );
+        assert_approx_eq::assert_approx_eq!(overall_selectivity, 1.0 / (3.0 * 4.0 * 5.0));
     }
 }
diff --git a/optd-datafusion-repr/src/properties/column_ref.rs b/optd-datafusion-repr/src/properties/column_ref.rs
index 4893a9ea..08151931 100644
--- a/optd-datafusion-repr/src/properties/column_ref.rs
+++ b/optd-datafusion-repr/src/properties/column_ref.rs
@@ -135,13 +135,17 @@ impl EqBaseTableColumnSets {
         self.eq_predicates.insert(predicate);
     }
 
-    /// Determine if two columns are equal.
+    /// Determine if two columns are in the same set.
     pub fn is_eq(&mut self, left: &BaseTableColumnRef, right: &BaseTableColumnRef) -> bool {
         self.disjoint_eq_col_sets
             .same_set(left, right)
             .unwrap_or(false)
     }
 
+    pub fn contains(&self, base_col_ref: &BaseTableColumnRef) -> bool {
+        self.disjoint_eq_col_sets.contains(base_col_ref)
+    }
+
     /// Get the number of columns that are equal to `col`, including `col` itself.
     pub fn num_eq_columns(&mut self, col: &BaseTableColumnRef) -> usize {
         self.disjoint_eq_col_sets.set_size(col).unwrap()
@@ -165,6 +169,18 @@ impl EqBaseTableColumnSets {
         predicates
     }
 
+    /// Find the set of columns that define the equality of the set of columns `col` belongs to.
+    pub fn find_cols_for_eq_column_set(
+        &mut self,
+        col: &BaseTableColumnRef,
+    ) -> HashSet<BaseTableColumnRef> {
+        let predicates = self.find_predicates_for_eq_column_set(col);
+        predicates
+            .into_iter()
+            .flat_map(|predicate| vec![predicate.left, predicate.right])
+            .collect()
+    }
+
     /// Union two `EqBaseTableColumnSets` to produce a new disjoint sets.
     pub fn union(x: &EqBaseTableColumnSets, y: &EqBaseTableColumnSets) -> EqBaseTableColumnSets {
         let mut eq_col_sets = Self::new();
diff --git a/optd-gungnir/src/stats/hyperloglog.rs b/optd-gungnir/src/stats/hyperloglog.rs
index 840afde5..d49f6f38 100644
--- a/optd-gungnir/src/stats/hyperloglog.rs
+++ b/optd-gungnir/src/stats/hyperloglog.rs
@@ -10,7 +10,7 @@ use optd_core::rel_node::Value;
 use crate::stats::murmur2::murmur_hash;
 use std::{cmp::max, marker::PhantomData};
 
-pub const DEFAULT_PRECISION: u8 = 12;
+pub const DEFAULT_PRECISION: u8 = 16;
 
 /// Trait to transform any object into a stream of bytes.
 pub trait ByteSerializable {
diff --git a/optd-gungnir/src/stats/misragries.rs b/optd-gungnir/src/stats/misragries.rs
index c4b27636..6c31c102 100644
--- a/optd-gungnir/src/stats/misragries.rs
+++ b/optd-gungnir/src/stats/misragries.rs
@@ -9,7 +9,7 @@ use std::{cmp::min, collections::HashMap, hash::Hash};
 
 use itertools::Itertools;
 
-pub const DEFAULT_K_TO_TRACK: u16 = 100;
+pub const DEFAULT_K_TO_TRACK: u16 = 1000;
 
 /// The Misra-Gries structure to approximate the k most frequent elements in
 /// a stream of N elements. It will always identify elements with frequency
diff --git a/optd-perftest/src/datafusion_dbms.rs b/optd-perftest/src/datafusion_dbms.rs
index 8c3ce67a..94371166 100644
--- a/optd-perftest/src/datafusion_dbms.rs
+++ b/optd-perftest/src/datafusion_dbms.rs
@@ -155,7 +155,7 @@ impl DatafusionDBMS {
 
         let mut estcards = vec![];
         for (query_id, sql_fpath) in tpch_kit.get_sql_fpath_ordered_iter(tpch_kit_config)? {
-            println!(
+            log::debug!(
                 "about to evaluate datafusion's estcard for TPC-H Q{}",
                 query_id
             );
@@ -177,9 +177,10 @@ impl DatafusionDBMS {
             } else {
                 "JOB-light"
             };
-            println!(
+            log::debug!(
                 "about to evaluate datafusion's estcard for {} Q{}",
-                benchmark_name, query_id
+                benchmark_name,
+                query_id
             );
             let sql = fs::read_to_string(sql_fpath)?;
             let estcard = self.eval_query_estcard(&sql).await?;
diff --git a/optd-perftest/src/job.rs b/optd-perftest/src/job.rs
index 8fc941fd..c14ad2a2 100644
--- a/optd-perftest/src/job.rs
+++ b/optd-perftest/src/job.rs
@@ -21,10 +21,11 @@ pub const WORKING_JOB_QUERY_IDS: &[&str] = &[
     "33b", "33c",
 ];
 pub const WORKING_JOBLIGHT_QUERY_IDS: &[&str] = &[
-    "1a", "1b", "1d", "2a", "3a", "3b", "3c", "4a", "4b", "4c", "5c", "6a", "6b", "6c", "6d", "6e",
-    "7b", "8a", "8b", "8c", "9b", "10a", "10c", "12a", "12b", "12c", "13a", "14a", "14b", "14c",
-    "15a", "15b", "15c", "16a", "17a", "17b", "17c", "18a", "18c", "19b", "20a", "20b", "20c",
-    "22a", "22b", "22c", "23a", "23b", "24a", "24b", "25a", "26a", "26b", "28a",
+    "1a", "1b", "1c", "1d", "2a", "3a", "3b", "4a", "4b", "4c", "5a", "5b", "5c", "6a", "6b", "6c",
+    "6d", "7a", "7b", "7c", "8a", "8b", "8c", "9a", "9b", "10a", "10b", "10c", "11a", "11b", "11c",
+    "12a", "12b", "12c", "13a", "14a", "14b", "14c", "16a", "17a", "17b", "17c", "18a", "19b",
+    "20a", "20b", "20c", "21a", "21b", "22b", "23b", "24a", "24b", "25a", "26a", "26b", "27a",
+    "27b",
 ];
 
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -154,9 +155,9 @@ impl JobKit {
         job_kit_config: &JobKitConfig,
     ) -> io::Result<impl Iterator<Item = (String, PathBuf)>> {
         let queries_dpath = (if job_kit_config.is_light {
-            &self.job_queries_dpath
-        } else {
             &self.joblight_queries_dpath
+        } else {
+            &self.job_queries_dpath
         })
         .clone();
         let sql_fpath_ordered_iter =