Skip to content

Commit

Permalink
fix(cost): return full selectivity for self joins (#201)
Browse files Browse the repository at this point in the history
will go after #200

Signed-off-by: Alex Chi <[email protected]>
Co-authored-by: Benjamin Owad <[email protected]>
  • Loading branch information
skyzh and jurplel authored Oct 30, 2024
1 parent 7b530d1 commit 68c697d
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 3 deletions.
4 changes: 4 additions & 0 deletions optd-datafusion-repr/src/cost/base_cost/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,10 @@ impl<
predicate: &EqPredicate,
past_eq_columns: &mut EqBaseTableColumnSets,
) -> f64 {
if predicate.left == predicate.right {
// self-join, TODO: is this correct?
return 1.0;
}
// To find the adjustment, we need to know the selectivity of the graph before `predicate` is added.
//
// There are two cases: (1) adding `predicate` does not change the # of connected components, and
Expand Down
10 changes: 7 additions & 3 deletions optd-datafusion-repr/src/properties/column_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,9 @@ impl PropertyBuilder<OptRelNodeTyp> for ColumnRefPropertyBuilder {
GroupColumnRefs::new(column_refs, child.output_correlation.clone())
}
// Should account for all physical join types.
OptRelNodeTyp::Join(join_type) | OptRelNodeTyp::RawDepJoin(join_type) | OptRelNodeTyp::DepJoin(join_type)=> {
OptRelNodeTyp::Join(join_type)
| OptRelNodeTyp::RawDepJoin(join_type)
| OptRelNodeTyp::DepJoin(join_type) => {
// Concatenate left and right children column refs.
let column_refs = Self::concat_children_col_refs(&children[0..2]);
// Merge the equal columns of two children as input correlation.
Expand Down Expand Up @@ -465,12 +467,14 @@ impl PropertyBuilder<OptRelNodeTyp> for ColumnRefPropertyBuilder {
GroupColumnRefs::new(column_refs, correlation)
}
OptRelNodeTyp::Constant(_)
| OptRelNodeTyp::ExternColumnRef // TODO Possibly very very wrong---consult cost model team
| OptRelNodeTyp::Func(_)
| OptRelNodeTyp::DataType(_)
| OptRelNodeTyp::Between
| OptRelNodeTyp::Like
| OptRelNodeTyp::InList => GroupColumnRefs::new(vec![ColumnRef::Derived], None),
| OptRelNodeTyp::InList
| OptRelNodeTyp::ExternColumnRef => {
GroupColumnRefs::new(vec![ColumnRef::Derived], None)
}
_ => unimplemented!("Unsupported rel node type {:?}", typ),
}
}
Expand Down
31 changes: 31 additions & 0 deletions optd-sqlplannertest/tests/joins/self-join.planner.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
-- (no id or description)
create table t1(t1v1 int, t1v2 int);
create table t2(t2v1 int, t2v3 int);
insert into t1 values (0, 0), (1, 1), (2, 2);
insert into t2 values (0, 200), (1, 201), (2, 202);

/*
3
3
*/

-- test self join
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;

/*
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
└── LogicalFilter
├── cond:Eq
│ ├── #0
│ └── #2
└── LogicalJoin { join_type: Cross, cond: true }
├── LogicalScan { table: t1 }
└── LogicalScan { table: t1 }
PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
├── PhysicalScan { table: t1 }
└── PhysicalScan { table: t1 }
0 0 0 0
1 1 1 1
2 2 2 2
*/

13 changes: 13 additions & 0 deletions optd-sqlplannertest/tests/joins/self-join.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
- sql: |
create table t1(t1v1 int, t1v2 int);
create table t2(t2v1 int, t2v3 int);
insert into t1 values (0, 0), (1, 1), (2, 2);
insert into t2 values (0, 200), (1, 201), (2, 202);
tasks:
- execute
- sql: |
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;
desc: test self join
tasks:
- explain:logical_optd,physical_optd
- execute

0 comments on commit 68c697d

Please sign in to comment.