From b08ac1e960bdebc0a8a103caff9bdb0386bbb587 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 17 Jul 2024 14:23:54 +0200 Subject: [PATCH] Rename "field" to "column" throughout --- docs/architecture.md | 2 +- docs/sql.md | 12 ++-- src/sql/engine/session.rs | 12 ++-- src/sql/execution/execute.rs | 4 +- src/sql/execution/join.rs | 8 +-- src/sql/execution/write.rs | 10 ++-- src/sql/parser/ast.rs | 8 +-- src/sql/parser/parser.rs | 8 +-- src/sql/planner/optimizer.rs | 58 ++++++++++---------- src/sql/planner/plan.rs | 28 +++++----- src/sql/planner/planner.rs | 34 ++++++------ src/sql/testscripts/expressions/literals | 2 +- src/sql/testscripts/queries/aggregate | 4 +- src/sql/testscripts/queries/group_by | 10 ++-- src/sql/testscripts/queries/having | 4 +- src/sql/testscripts/queries/join_inner | 2 +- src/sql/testscripts/queries/limit | 2 +- src/sql/testscripts/queries/offset | 2 +- src/sql/testscripts/queries/order | 18 +++--- src/sql/testscripts/queries/select | 22 ++++---- src/sql/testscripts/queries/where_ | 16 +++--- src/sql/testscripts/writes/delete_where | 2 +- src/sql/testscripts/writes/insert | 2 +- src/sql/testscripts/writes/update | 2 +- src/sql/testscripts/writes/update_index | 2 +- src/sql/testscripts/writes/update_null | 2 +- src/sql/testscripts/writes/update_where | 2 +- src/sql/types/expression.rs | 70 ++++++++++++------------ src/sql/types/value.rs | 6 +- tests/e2e/client.rs | 2 +- 30 files changed, 179 insertions(+), 177 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index 66cb9946f..12e94211b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -479,7 +479,7 @@ ast::Statement::Select{ The parser will interpret the SQL _syntax_, determining the type of query and its parameters, returning an error for any invalid syntax. However, it has no idea if the table `people` -actually exists, or if the field `birthyear` is an integer - that is the job of the planner. +actually exists, or if the column `birthyear` is an integer - that is the job of the planner. Notably, the parser also parses expressions, such as `1 + 2 * 3`. This is non-trivial due to precedence rules, i.e. `2 * 3` should be evaluated first, but not if there are parentheses diff --git a/docs/sql.md b/docs/sql.md index 62e899e86..385ba9e07 100644 --- a/docs/sql.md +++ b/docs/sql.md @@ -57,7 +57,7 @@ The `-` prefix operator can be used to take negative numbers. ### Expressions -Expressions can be used wherever a value is expected, e.g. as `SELECT` fields and `INSERT` values. They are made up of constants, a column references, an operator invocations, and a function calls. +Expressions can be used wherever a value is expected, e.g. as `SELECT` columns nd `INSERT` values. They are made up of constants, a column references, an operator invocations, and a function calls. Column references can either be unqualified, e.g. `name`, or prefixed with the relation identifier separated by `.`, e.g. `person.name`. Unqualified identifiers must be unambiguous. @@ -290,7 +290,7 @@ If column names are given, an identical number of values must be given. If no co * ***`column_name`***: a column to insert into in the given table. Errors if it does not exist. -* ***`expression`***: an expression to insert into the corresponding column. Must be a constant expression, i.e. it cannot refer to table fields. +* ***`expression`***: an expression to insert into the corresponding column. Must be a constant expression, i.e. it cannot refer to table columns. #### Example @@ -337,9 +337,9 @@ RIGHT [ OUTER ] JOIN Fetches rows or expressions, either from table ***`table_name`*** (if given) or generated. -* ***`expression`***: [expression](#expressions) to fetch (can be a simple field name). +* ***`expression`***: [expression](#expressions) to fetch (can be a simple column name). -* ***`output_name`***: output column [identifier](#identifier), defaults to field name (if single field) otherwise nothing (displayed as `?`). +* ***`output_name`***: output column [identifier](#identifier), defaults to column name (if single column) otherwise nothing (displayed as `?`). * ***`table_name`***: table to fetch rows from. @@ -347,11 +347,11 @@ Fetches rows or expressions, either from table ***`table_name`*** (if given) or * ***`predicate`***: only return rows for which this [expression](#expressions) evaluates to `TRUE`. -* ***`group_expr`***: an expression to group aggregates by. Non-aggregate `SELECT` expressions must either reference a field given in `group_expr`, be idential with a `group_expr`, or have an `output_name` that is referenced by a `group_expr` field. +* ***`group_expr`***: an expression to group aggregates by. Non-aggregate `SELECT` expressions must either reference a column given in `group_expr`, be idential with a `group_expr`, or have an `output_name` that is referenced by a `group_expr` column. * ***`having_expr`***: only return aggregate results for which this [expression](#expressions) evaluates to `TRUE`. -* ***`order_expr`***: order rows by this expression (can be a simple field name). +* ***`order_expr`***: order rows by this expression (can be a simple column name). * ***`count`***: maximum number of rows to return. Must be a constant integer expression. diff --git a/src/sql/engine/session.rs b/src/sql/engine/session.rs index e901b878a..59d5d6f0c 100644 --- a/src/sql/engine/session.rs +++ b/src/sql/engine/session.rs @@ -184,17 +184,17 @@ impl TryFrom for Row { } } -/// Extracts the value of the first field in the first row. +/// Extracts the value of the first column in the first row. impl TryFrom for Value { type Error = Error; fn try_from(result: StatementResult) -> Result { let row: Row = result.try_into()?; - row.into_iter().next().ok_or(errdata!("no fields returned")) + row.into_iter().next().ok_or(errdata!("no columns returned")) } } -/// Extracts the first boolean value of the first field in the first row. +/// Extracts the first boolean value of the first column in the first row. impl TryFrom for bool { type Error = Error; @@ -204,7 +204,7 @@ impl TryFrom for bool { } } -/// Extracts the first f64 value of the first field in the first row. +/// Extracts the first f64 value of the first column in the first row. impl TryFrom for f64 { type Error = Error; @@ -214,7 +214,7 @@ impl TryFrom for f64 { } } -/// Extracts the first i64 value of the first field in the first row. +/// Extracts the first i64 value of the first column in the first row. impl TryFrom for i64 { type Error = Error; @@ -224,7 +224,7 @@ impl TryFrom for i64 { } } -/// Extracts the first string value of the first field in the first row. +/// Extracts the first string value of the first column in the first row. impl TryFrom for String { type Error = Error; diff --git a/src/sql/execution/execute.rs b/src/sql/execution/execute.rs index cf06ce11a..4594dd7ca 100644 --- a/src/sql/execution/execute.rs +++ b/src/sql/execution/execute.rs @@ -70,11 +70,11 @@ pub fn execute(node: Node, txn: &impl Transaction) -> Result { Ok(transform::filter(source, predicate)) } - Node::HashJoin { left, left_field, right, right_field, outer } => { + Node::HashJoin { left, left_column, right, right_column, outer } => { let right_size = right.size(); let left = execute(*left, txn)?; let right = execute(*right, txn)?; - join::hash(left, left_field, right, right_field, right_size, outer) + join::hash(left, left_column, right, right_column, right_size, outer) } Node::IndexLookup { table, column, values, alias: _ } => { diff --git a/src/sql/execution/join.rs b/src/sql/execution/join.rs index 65f2ad985..1e17fe44e 100644 --- a/src/sql/execution/join.rs +++ b/src/sql/execution/join.rs @@ -122,9 +122,9 @@ impl Iterator for NestedLoopIterator { /// TODO: add more tests for the multiple match case. pub(super) fn hash( left: Rows, - left_field: usize, + left_column: usize, right: Rows, - right_field: usize, + right_column: usize, right_size: usize, outer: bool, ) -> Result { @@ -132,7 +132,7 @@ pub(super) fn hash( let mut rows = right; let mut right: HashMap> = HashMap::new(); while let Some(row) = rows.next().transpose()? { - let id = row[right_field].clone(); + let id = row[right_column].clone(); right.entry(id).or_default().push(row); } @@ -146,7 +146,7 @@ pub(super) fn hash( return Box::new(std::iter::once(result)); }; // Join the left row with any matching right rows. - match right.get(&row[left_field]) { + match right.get(&row[left_column]) { Some(matches) => Box::new( std::iter::once(row) .cartesian_product(matches.clone()) diff --git a/src/sql/execution/write.rs b/src/sql/execution/write.rs index de68e29af..212ef74cb 100644 --- a/src/sql/execution/write.rs +++ b/src/sql/execution/write.rs @@ -6,7 +6,7 @@ use crate::{errdata, errinput}; use std::collections::{BTreeMap, HashMap}; /// Deletes rows, taking primary keys from the source (i.e. DELETE) using the -/// primary_key field index. Returns the number of rows deleted. +/// primary_key column index. Returns the number of rows deleted. pub(super) fn delete( txn: &impl Transaction, table: String, @@ -51,8 +51,8 @@ pub(super) fn insert( } } - // Fill in the row with default values for missing fields, and map - // source fields to table fields. + // Fill in the row with default values for missing columns, and map + // source columns to table columns. let mut row = Vec::with_capacity(table.columns.len()); for (cidx, column) in table.columns.iter().enumerate() { if column_map.is_none() && cidx < values.len() { @@ -87,8 +87,8 @@ pub(super) fn update( let mut updates = BTreeMap::new(); while let Some(row) = source.next().transpose()? { let mut new = row.clone(); - for (field, expr) in &expressions { - new[*field] = expr.evaluate(Some(&row))?; + for (column, expr) in &expressions { + new[*column] = expr.evaluate(Some(&row))?; } let id = row.into_iter().nth(primary_key).ok_or::(errdata!("short row"))?; updates.insert(id, new); diff --git a/src/sql/parser/ast.rs b/src/sql/parser/ast.rs index 80ecc608f..d59689afd 100644 --- a/src/sql/parser/ast.rs +++ b/src/sql/parser/ast.rs @@ -98,8 +98,8 @@ pub enum Order { /// Expressions. Can be nested. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum Expression { - /// A field reference, with an optional table qualifier. - Field(Option, String), + /// A column reference, optionally qualified with a table name. + Column(Option, String), /// A literal value. Literal(Literal), /// A function call (name and parameters). @@ -217,7 +217,7 @@ impl Expression { Self::Function(_, exprs) => exprs.iter().any(|expr| expr.walk(visitor)), - Self::Literal(_) | Self::Field(_, _) => true, + Self::Literal(_) | Self::Column(_, _) => true, } } @@ -264,7 +264,7 @@ impl Expression { Self::Function(_, exprs) => exprs.iter().for_each(|expr| expr.collect(visitor, c)), - Self::Literal(_) | Self::Field(_, _) => {} + Self::Literal(_) | Self::Column(_, _) => {} } } } diff --git a/src/sql/parser/parser.rs b/src/sql/parser/parser.rs index 459a8003b..38fa78252 100644 --- a/src/sql/parser/parser.rs +++ b/src/sql/parser/parser.rs @@ -510,7 +510,7 @@ impl<'a> Parser<'a> { /// Parses an expression atom. This is either: /// /// * A literal value. - /// * A field name. + /// * A column name. /// * A function call. /// * A parenthesized expression. fn parse_expression_atom(&mut self) -> Result { @@ -544,11 +544,11 @@ impl<'a> Parser<'a> { ast::Expression::Function(name, args) } - // Field name, either qualified as table.field or unqualified. + // Column name, either qualified as table.column or unqualified. Token::Ident(table) if self.next_is(Token::Period) => { - ast::Expression::Field(Some(table), self.next_ident()?) + ast::Expression::Column(Some(table), self.next_ident()?) } - Token::Ident(field) => ast::Expression::Field(None, field), + Token::Ident(column) => ast::Expression::Column(None, column), // Parenthesized expression. Token::OpenParen => { diff --git a/src/sql/planner/optimizer.rs b/src/sql/planner/optimizer.rs index e5f5e32b7..c41ecf45a 100644 --- a/src/sql/planner/optimizer.rs +++ b/src/sql/planner/optimizer.rs @@ -25,7 +25,7 @@ pub(super) fn fold_constants(node: Node) -> Result { // Transforms expressions. let transform = |expr: Expression| { // If the expression is constant, evaluate it. - if !expr.contains(&|e| matches!(e, Expression::Field(_))) { + if !expr.contains(&|e| matches!(e, Expression::Column(_))) { return expr.evaluate(None).map(Expression::Constant); } // If the expression is a logical operator, and one of the sides is @@ -55,7 +55,7 @@ pub(super) fn fold_constants(node: Node) -> Result { // Transform expressions after descending, both to perform the logical // short-circuiting on child expressions that have already been folded, and - // to reduce the quadratic cost when an expression contains a field. + // to reduce the quadratic cost when an expression contains a column. node.transform(&|n| n.transform_expressions(&Ok, &transform), &Ok) } @@ -123,7 +123,7 @@ pub(super) fn push_filters(node: Node) -> Result { for expr in cnf { let (mut ref_left, mut ref_right) = (false, false); expr.walk(&mut |e| { - if let Expression::Field(index) = e { + if let Expression::Column(index) = e { ref_left = ref_left || *index < left.size(); ref_right = ref_right || *index >= left.size(); } @@ -146,32 +146,32 @@ pub(super) fn push_filters(node: Node) -> Result { // commonly happens when joining a foreign key (which is indexed) on a // primary key, and we want to make use of the foreign key index, e.g.: // SELECT m.name, g.name FROM movies m JOIN genres g ON m.genre_id = g.id AND g.id = 7; - let left_lookups: HashMap = push_left // field → push_left index + let left_lookups: HashMap = push_left // column → push_left index .iter() .enumerate() - .filter_map(|(i, expr)| expr.is_field_lookup().map(|field| (field, i))) + .filter_map(|(i, expr)| expr.is_column_lookup().map(|column| (column, i))) .collect(); - let right_lookups: HashMap = push_right // field → push_right index + let right_lookups: HashMap = push_right // column → push_right index .iter() .enumerate() - .filter_map(|(i, expr)| expr.is_field_lookup().map(|field| (field, i))) + .filter_map(|(i, expr)| expr.is_column_lookup().map(|column| (column, i))) .collect(); for expr in &predicate { // Find equijoins. let Expression::Equal(lhs, rhs) = expr else { continue }; - let Expression::Field(l) = lhs.as_ref() else { continue }; - let Expression::Field(r) = rhs.as_ref() else { continue }; + let Expression::Column(l) = lhs.as_ref() else { continue }; + let Expression::Column(r) = rhs.as_ref() else { continue }; // The lhs may be a reference to the right source; swap them. let (l, r) = if l > r { (r, l) } else { (l, r) }; - // Check if either side is a field lookup, and copy it over. + // Check if either side is a column lookup, and copy it over. if let Some(expr) = left_lookups.get(l).map(|i| push_left[*i].clone()) { - push_right.push(expr.replace_field(*l, *r)); + push_right.push(expr.replace_column(*l, *r)); } if let Some(expr) = right_lookups.get(r).map(|i| push_right[*i].clone()) { - push_left.push(expr.replace_field(*r, *l)); + push_left.push(expr.replace_column(*r, *l)); } } @@ -184,11 +184,11 @@ pub(super) fn push_filters(node: Node) -> Result { } if let Some(mut expr) = Expression::and_vec(push_right) { - // Right fields have indexes in the joined row; shift them left. - expr = expr.shift_field(-(left.size() as isize)); + // Right columns have indexes in the joined row; shift them left. + expr = expr.shift_column(-(left.size() as isize)); if let Some(mut expr) = push_into(expr, &mut right) { - // Pushdown failed, undo the field index shift. - expr = expr.shift_field(left.size() as isize); + // Pushdown failed, undo the column index shift. + expr = expr.shift_column(left.size() as isize); predicate.push(expr) } } @@ -222,7 +222,7 @@ pub(super) fn index_lookup(node: Node) -> Result { // Find the first expression that's either a primary key or secondary // index lookup. We could be more clever here, but this is fine. let Some(i) = cnf.iter().enumerate().find_map(|(i, e)| { - e.is_field_lookup() + e.is_column_lookup() .filter(|f| *f == table.primary_key || table.columns[*f].index) .and(Some(i)) }) else { @@ -230,7 +230,7 @@ pub(super) fn index_lookup(node: Node) -> Result { }; // Extract the lookup values and expression from the cnf vector. - let (column, values) = cnf.remove(i).into_field_values().expect("field lookup failed"); + let (column, values) = cnf.remove(i).into_column_values().expect("column lookup failed"); // Build the primary key or secondary index lookup node. if column == table.primary_key { @@ -249,7 +249,7 @@ pub(super) fn index_lookup(node: Node) -> Result { node.transform(&Ok, &|n| Ok(transform(n))) } -/// Uses a hash join instead of a nested loop join for single-field equijoins. +/// Uses a hash join instead of a nested loop join for single-column equijoins. pub(super) fn join_type(node: Node) -> Result { let transform = |node| match node { // We could use a single match if we had deref patterns, but alas. @@ -259,16 +259,16 @@ pub(super) fn join_type(node: Node) -> Result { predicate: Some(Expression::Equal(lhs, rhs)), outer, } => match (*lhs, *rhs) { - (Expression::Field(mut left_field), Expression::Field(mut right_field)) => { - // The LHS field may be a field in the right table; swap them. - if right_field < left_field { - (left_field, right_field) = (right_field, left_field); + (Expression::Column(mut left_column), Expression::Column(mut right_column)) => { + // The LHS column may be a column in the right table; swap them. + if right_column < left_column { + (left_column, right_column) = (right_column, left_column); } - // The NestedLoopJoin predicate uses field indexes in the joined - // row, while the HashJoin uses field indexes for each table - // individually. Adjust the RHS field reference. - right_field -= left.size(); - Node::HashJoin { left, left_field, right, right_field, outer } + // The NestedLoopJoin predicate uses column indexes in the + // joined row, while the HashJoin uses column indexes for each + // table individually. Adjust the RHS column reference. + right_column -= left.size(); + Node::HashJoin { left, left_column, right, right_column, outer } } (lhs, rhs) => { let predicate = Some(Expression::Equal(lhs.into(), rhs.into())); @@ -342,7 +342,7 @@ pub(super) fn short_circuit(node: Node) -> Result { && expressions .iter() .enumerate() - .all(|(i, e)| matches!(e, Expression::Field(f) if i == *f)) => + .all(|(i, e)| matches!(e, Expression::Column(f) if i == *f)) => { *source } diff --git a/src/sql/planner/plan.rs b/src/sql/planner/plan.rs index 34d7a9c73..74190a400 100644 --- a/src/sql/planner/plan.rs +++ b/src/sql/planner/plan.rs @@ -82,15 +82,15 @@ pub enum Node { /// Filters source rows, by only emitting rows for which the predicate /// evaluates to true. Filter { source: Box, predicate: Expression }, - /// Joins the left and right sources on the given fields by building an + /// Joins the left and right sources on the given columns by building an /// in-memory hashmap of the right source and looking up matches for each /// row in the left source. When outer is true (e.g. LEFT JOIN), a left row /// without a right match is emitted anyway, with NULLs for the right row. HashJoin { left: Box, - left_field: usize, + left_column: usize, right: Box, - right_field: usize, + right_column: usize, outer: bool, }, /// Looks up the given values in a secondary index and emits matching rows. @@ -149,11 +149,11 @@ impl Node { Self::Filter { source, predicate } => { Self::Filter { source: transform(source)?, predicate } } - Self::HashJoin { left, left_field, right, right_field, outer } => Self::HashJoin { + Self::HashJoin { left, left_column, right, right_column, outer } => Self::HashJoin { left: transform(left)?, - left_field, + left_column, right: transform(right)?, - right_field, + right_column, outer, }, Self::Limit { source, limit } => Self::Limit { source: transform(source)?, limit }, @@ -252,13 +252,13 @@ impl Node { // Some nodes rearrange columns. Route them to the correct // upstream column where appropriate. Self::Aggregate { source, group_by, .. } => match group_by.get(index) { - Some(Expression::Field(index)) => source.column_label(*index), + Some(Expression::Column(index)) => source.column_label(*index), Some(_) | None => Label::None, }, Self::Projection { source, expressions, aliases } => match aliases.get(index) { Some(Label::None) | None => match expressions.get(index) { - // Unaliased field references route to the source. - Some(Expression::Field(index)) => source.column_label(*index), + // Unaliased column references route to the source. + Some(Expression::Column(index)) => source.column_label(*index), // Unaliased expressions don't have a name. Some(_) | None => Label::None, }, @@ -403,14 +403,14 @@ impl Node { write!(f, "Filter: {}", predicate.format(source))?; source.format(f, prefix, false, true)?; } - Self::HashJoin { left, left_field, right, right_field, outer } => { + Self::HashJoin { left, left_column, right, right_column, outer } => { let kind = if *outer { "outer" } else { "inner" }; - let left_label = match left.column_label(*left_field) { - Label::None => format!("left #{left_field}"), + let left_label = match left.column_label(*left_column) { + Label::None => format!("left #{left_column}"), label => format!("{label}"), }; - let right_label = match right.column_label(*right_field) { - Label::None => format!("right #{right_field}"), + let right_label = match right.column_label(*right_column) { + Label::None => format!("right #{right_column}"), label => format!("{label}"), }; write!(f, "HashJoin: {kind} on {left_label} = {right_label}")?; diff --git a/src/sql/planner/planner.rs b/src/sql/planner/planner.rs index a03e391d7..0fae412e5 100644 --- a/src/sql/planner/planner.rs +++ b/src/sql/planner/planner.rs @@ -414,7 +414,7 @@ impl<'a, C: Catalog> Planner<'a, C> { aggregates } - /// Builds hidden columns for a projection to pass through fields that are + /// Builds hidden columns for a projection to pass through columns that are /// used by downstream nodes. Consider e.g.: /// /// SELECT id FROM table ORDER BY value @@ -440,13 +440,13 @@ impl<'a, C: Catalog> Planner<'a, C> { if let Some(index) = scope.lookup_aggregate(expr) { if child_scope.lookup_aggregate(expr).is_none() { child_scope.add_passthrough(scope, index, true); - hidden.push(Expression::Field(index)); + hidden.push(Expression::Column(index)); return true; } } - // Look for field references that don't exist post-projection. - let ast::Expression::Field(table, column) = expr else { + // Look for column references that don't exist post-projection. + let ast::Expression::Column(table, column) = expr else { return true; }; if child_scope.lookup_column(table.as_deref(), column).is_ok() { @@ -459,7 +459,7 @@ impl<'a, C: Catalog> Planner<'a, C> { }; // Add the hidden column to the projection. child_scope.add_passthrough(scope, index, true); - hidden.push(Expression::Field(index)); + hidden.push(Expression::Column(index)); true }); } @@ -473,7 +473,7 @@ impl<'a, C: Catalog> Planner<'a, C> { // Look up aggregate functions or GROUP BY expressions. These were added // to the parent scope when building the Aggregate node, if any. if let Some(index) = scope.lookup_aggregate(&expr) { - return Ok(Field(index)); + return Ok(Column(index)); } // Helper for building a boxed expression. @@ -489,8 +489,8 @@ impl<'a, C: Catalog> Planner<'a, C> { ast::Literal::Float(f) => Value::Float(f), ast::Literal::String(s) => Value::String(s), }), - ast::Expression::Field(table, name) => { - Field(scope.lookup_column(table.as_deref(), &name)?) + ast::Expression::Column(table, name) => { + Column(scope.lookup_column(table.as_deref(), &name)?) } // Currently, all functions are aggregates, and processed above. // TODO: consider adding some basic functions for fun. @@ -547,7 +547,7 @@ impl<'a, C: Catalog> Planner<'a, C> { /// the plan and used during execution. pub struct Scope { /// The currently visible columns. If empty, only constant expressions can - /// be used (no field references). + /// be used (no column references). columns: Vec