From f6371395f716de10b5487c2401aaacabff870002 Mon Sep 17 00:00:00 2001
From: Eric Kidd <git@randomhacks.net>
Date: Mon, 16 Oct 2023 11:07:59 -0400
Subject: [PATCH] Continue testing string functions

We implement several string-related functions. This required adding
another pass to the transpiler, so we could rewrite the AST.
---
 src/ast.rs                                | 179 ++++++++++++----------
 src/drivers/mod.rs                        |  36 ++++-
 src/drivers/snowflake/mod.rs              |  46 +++++-
 src/drivers/snowflake/rename_functions.rs |  65 ++++++++
 src/drivers/sqlite3/mod.rs                |  40 ++++-
 tests/sql/functions/README.md             |  18 +--
 tests/sql/functions/simple/coalesce.sql   |  24 +++
 tests/sql/functions/simple/regexp.sql     |   1 -
 tests/sql/functions/simple/sha256.sql     |  13 ++
 tests/sql/functions/simple/strings.sql    |  21 +++
 10 files changed, 344 insertions(+), 99 deletions(-)
 create mode 100644 src/drivers/snowflake/rename_functions.rs
 create mode 100644 tests/sql/functions/simple/coalesce.sql
 create mode 100644 tests/sql/functions/simple/sha256.sql
 create mode 100644 tests/sql/functions/simple/strings.sql
diff --git a/src/ast.rs b/src/ast.rs
index f788dd3..6fbdf8e 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -7,7 +7,7 @@
 //! - [`EmitDefault`]: Emit the AST as BigQuery SQL, extremely close to the
 //!   original input. This is optional, and only required if that type's
 //!   [`Emit`] implementation wants to use it.
-//! - [`Drive`]: Provided by the [`derive-visitor`
+//! - [`Drive`] and [`DriveMut`]: Provided by the [`derive-visitor`
 //!   crate](https://github.com/nikis05/derive-visitor). This provides an API to
 //!   traverse the AST generically, using the [`derive_visitor::Visitor`] trait.
 //!   This is honestly deep Rust magic, but it prevents us from needing to
@@ -27,7 +27,7 @@ use codespan_reporting::{
     diagnostic::{Diagnostic, Label},
     files::SimpleFile,
 };
-use derive_visitor::Drive;
+use derive_visitor::{Drive, DriveMut};
 use joinery_macros::{Emit, EmitDefault};
 
 use crate::{
@@ -188,7 +188,7 @@ impl<T: Emit> Emit for Vec<T> {
 }
 
 /// Our basic token type. This is used for all punctuation and keywords.
-#[derive(Clone, Debug, Drive)]
+#[derive(Clone, Debug, Drive, DriveMut)]
 pub struct Token {
     #[drive(skip)]
     pub span: Span,
@@ -342,6 +342,15 @@ impl<T: fmt::Debug> NodeVec<T> {
     }
 }
 
+impl<T: fmt::Debug + Clone> Clone for NodeVec<T> {
+    fn clone(&self) -> Self {
+        NodeVec {
+            nodes: self.nodes.clone(),
+            separators: self.separators.clone(),
+        }
+    }
+}
+
 impl<T: fmt::Debug> Default for NodeVec<T> {
     fn default() -> Self {
         NodeVec {
@@ -360,6 +369,16 @@ impl<'a, T: fmt::Debug> IntoIterator for &'a NodeVec<T> {
     }
 }
 
+// Mutable iterator for `DriveMut`.
+impl<'a, T: fmt::Debug> IntoIterator for &'a mut NodeVec<T> {
+    type Item = &'a mut T;
+    type IntoIter = std::slice::IterMut<'a, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.nodes.iter_mut()
+    }
+}
+
 impl<T: fmt::Debug + Emit> Emit for NodeVec<T> {
     fn emit(&self, t: Target, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         for (i, node) in self.nodes.iter().enumerate() {
@@ -384,7 +403,7 @@ impl<T: fmt::Debug + Emit> Emit for NodeVec<T> {
 }
 
 /// An identifier, such as a column name.
-#[derive(Clone, Debug, Drive)]
+#[derive(Clone, Debug, Drive, DriveMut)]
 pub struct Identifier {
     /// Our original token.
     pub token: Token,
@@ -450,7 +469,7 @@ impl Emit for Identifier {
 }
 
 /// A table name.
-#[derive(Clone, Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum TableName {
     ProjectDatasetTable {
         project: Identifier,
@@ -505,7 +524,7 @@ impl Emit for TableName {
 }
 
 /// A table and a column name.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct TableAndColumnName {
     pub table_name: TableName,
     pub dot: Token,
@@ -513,7 +532,7 @@ pub struct TableAndColumnName {
 }
 
 /// An entire SQL program.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct SqlProgram {
     /// Any whitespace that appears before the first statement. This is represented
     /// as a token with an empty `token_str()`.
@@ -525,7 +544,7 @@ pub struct SqlProgram {
 }
 
 /// A statement in our abstract syntax tree.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum Statement {
     Query(QueryStatement),
     DeleteFrom(DeleteFromStatement),
@@ -536,7 +555,7 @@ pub enum Statement {
 }
 
 /// A query statement. This exists mainly because it's in the official grammar.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct QueryStatement {
     pub query_expression: QueryExpression,
 }
@@ -547,7 +566,7 @@ pub struct QueryStatement {
 ///
 /// [official grammar]:
 ///     https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#sql_syntax.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum QueryExpression {
     SelectExpression(SelectExpression),
     Nested {
@@ -588,7 +607,7 @@ impl Emit for QueryExpression {
 }
 
 /// Common table expressions (CTEs).
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct CommonTableExpression {
     pub name: Identifier,
     pub as_token: Token,
@@ -598,7 +617,7 @@ pub struct CommonTableExpression {
 }
 
 /// Set operators.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum SetOperator {
     UnionAll {
         union_token: Token,
@@ -645,7 +664,7 @@ impl Emit for SetOperator {
 }
 
 /// A `SELECT` expression.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct SelectExpression {
     pub select_options: SelectOptions,
     pub select_list: SelectList,
@@ -661,14 +680,14 @@ pub struct SelectExpression {
 }
 
 /// The head of a `SELECT`, including any modifiers.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct SelectOptions {
     pub select_token: Token,
     pub distinct: Option<Distinct>,
 }
 
 /// The `DISTINCT` modifier.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct Distinct {
     pub distinct_token: Token,
 }
@@ -691,13 +710,13 @@ pub struct Distinct {
 ///   select_expression:
 ///     expression [ [ AS ] alias ]
 /// ```
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct SelectList {
     pub items: NodeVec<SelectListItem>,
 }
 
 /// A single item in a `SELECT` list.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum SelectListItem {
     /// An expression, optionally with an alias.
     Expression {
@@ -716,7 +735,7 @@ pub enum SelectListItem {
 }
 
 /// An `EXCEPT` clause.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct Except {
     pub except_token: Token,
     pub paren1: Token,
@@ -740,7 +759,7 @@ impl Emit for Except {
 }
 
 /// An SQL expression.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum Expression {
     Literal {
         token: Token,
@@ -894,7 +913,7 @@ impl Emit for Expression {
 }
 
 /// A literal value.
-#[derive(Debug, Drive)]
+#[derive(Clone, Debug, Drive, DriveMut)]
 pub enum LiteralValue {
     Bool(#[drive(skip)] bool),
     Int64(#[drive(skip)] i64),
@@ -903,7 +922,7 @@ pub enum LiteralValue {
 }
 
 /// An `INTERVAL` expression.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct IntervalExpression {
     pub interval_token: Token,
     pub number: Token, // Not even bothering to parse this for now.
@@ -926,7 +945,7 @@ impl Emit for IntervalExpression {
 }
 
 /// A date part in an `INTERVAL` expression.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct DatePart {
     pub date_part_token: Token,
 }
@@ -944,7 +963,7 @@ impl Emit for DatePart {
 }
 
 /// A cast expression.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct Cast {
     cast_token: Token,
     paren1: Token,
@@ -977,7 +996,7 @@ impl Emit for Cast {
 ///
 /// [official grammar]:
 ///     https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum InValueSet {
     QueryExpression {
         paren1: Token,
@@ -1002,7 +1021,7 @@ pub enum InValueSet {
 ///
 /// Not all combinations of our fields are valid. For example, we can't have
 /// a missing `ARRAY` and a `delim1` of `(`. We'll let the parser handle that.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct ArrayExpression {
     pub array_token: Option<Token>,
     pub element_type: Option<ArrayElementType>,
@@ -1035,7 +1054,7 @@ impl Emit for ArrayExpression {
 
 /// An `ARRAY` definition. Either a `SELECT` expression or a list of
 /// expressions.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum ArrayDefinition {
     Query(Box<QueryExpression>),
     Elements(NodeVec<Expression>),
@@ -1056,7 +1075,7 @@ impl Emit for ArrayDefinition {
 }
 
 /// A struct expression.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct StructExpression {
     pub struct_token: Token,
     pub paren1: Token,
@@ -1080,7 +1099,7 @@ impl Emit for StructExpression {
 }
 
 /// The type of the elements in an `ARRAY` expression.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct ArrayElementType {
     pub lt: Token,
     pub elem_type: DataType,
@@ -1088,7 +1107,7 @@ pub struct ArrayElementType {
 }
 
 /// A `COUNT` expression.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum CountExpression {
     CountStar {
         count_token: Token,
@@ -1106,7 +1125,7 @@ pub enum CountExpression {
 }
 
 /// A `CASE WHEN` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct CaseWhenClause {
     pub when_token: Token,
     pub condition: Box<Expression>,
@@ -1115,7 +1134,7 @@ pub struct CaseWhenClause {
 }
 
 /// A `CASE ELSE` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct CaseElseClause {
     pub else_token: Token,
     pub result: Box<Expression>,
@@ -1123,7 +1142,7 @@ pub struct CaseElseClause {
 
 /// `CURRENT_DATE` may appear as either `CURRENT_DATE` or `CURRENT_DATE()`.
 /// And different databases seem to support one or the other or both.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct CurrentDate {
     pub current_date_token: Token,
     pub empty_parens: Option<EmptyParens>,
@@ -1140,7 +1159,7 @@ impl Emit for CurrentDate {
 }
 
 /// An empty `()` expression.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct EmptyParens {
     pub paren1: Token,
     pub paren2: Token,
@@ -1149,7 +1168,7 @@ pub struct EmptyParens {
 /// Special "functions" that manipulate dates. These all take a [`DatePart`]
 /// as a final argument. So in Lisp sense, these are special forms or macros,
 /// not ordinary function calls.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct SpecialDateFunctionCall {
     pub function_name: Identifier,
     pub paren1: Token,
@@ -1158,14 +1177,14 @@ pub struct SpecialDateFunctionCall {
 }
 
 /// An expression or a date part.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum ExpressionOrDatePart {
     Expression(Expression),
     DatePart(DatePart),
 }
 
 /// A function call.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct FunctionCall {
     pub name: FunctionName,
     pub paren1: Token,
@@ -1175,7 +1194,7 @@ pub struct FunctionCall {
 }
 
 /// A function name.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum FunctionName {
     ProjectDatasetFunction {
         project: Identifier,
@@ -1195,6 +1214,14 @@ pub enum FunctionName {
 }
 
 impl FunctionName {
+    pub fn function_identifier(&self) -> &Identifier {
+        match self {
+            FunctionName::ProjectDatasetFunction { function, .. }
+            | FunctionName::DatasetFunction { function, .. }
+            | FunctionName::Function { function } => function,
+        }
+    }
+
     /// Get the unescaped function name, in the original BigQuery form.
     pub fn unescaped_bigquery(&self) -> String {
         match self {
@@ -1219,11 +1246,7 @@ impl Emit for FunctionName {
         match t {
             Target::SQLite3 => {
                 let name = self.unescaped_bigquery();
-                let ws = match self {
-                    FunctionName::ProjectDatasetFunction { function, .. }
-                    | FunctionName::DatasetFunction { function, .. }
-                    | FunctionName::Function { function } => function.token.ws_only(),
-                };
+                let ws = self.function_identifier().token.ws_only();
                 write!(f, "{}{}", SQLite3Ident(&name), t.f(&ws))
             }
             _ => self.emit_default(t, f),
@@ -1236,7 +1259,7 @@ impl Emit for FunctionName {
 /// See the [official grammar][]. We only implement part of this.
 ///
 /// [official grammar]: https://cloud.google.com/bigquery/docs/reference/standard-sql/window-function-calls#syntax
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct OverClause {
     pub over_token: Token,
     pub paren1: Token,
@@ -1247,7 +1270,7 @@ pub struct OverClause {
 }
 
 /// A `PARTITION BY` clause for a window function.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct PartitionBy {
     pub partition_token: Token,
     pub by_token: Token,
@@ -1255,7 +1278,7 @@ pub struct PartitionBy {
 }
 
 /// An `ORDER BY` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct OrderBy {
     pub order_token: Token,
     pub by_token: Token,
@@ -1263,28 +1286,28 @@ pub struct OrderBy {
 }
 
 /// An item in an `ORDER BY` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct OrderByItem {
     pub expression: Expression,
     pub asc_desc: Option<AscDesc>,
 }
 
 /// An `ASC` or `DESC` modifier.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct AscDesc {
     direction: Token,
     nulls_clause: Option<NullsClause>,
 }
 
 /// A `NULLS FIRST` or `NULLS LAST` modifier.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct NullsClause {
     nulls_token: Token,
     first_last_token: Token,
 }
 
 /// A `LIMIT` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct Limit {
     pub limit_token: Token,
     pub value: Box<Expression>,
@@ -1295,14 +1318,14 @@ pub struct Limit {
 /// See the [official grammar][]. We only implement part of this.
 ///
 /// [official grammar]: https://cloud.google.com/bigquery/docs/reference/standard-sql/window-function-calls#def_window_frame
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct WindowFrame {
     pub rows_token: Token,
     pub definition: WindowFrameDefinition,
 }
 
 /// A window frame definition.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum WindowFrameDefinition {
     Start(WindowFrameStart),
     Between {
@@ -1314,7 +1337,7 @@ pub enum WindowFrameDefinition {
 }
 
 /// A window frame start. Keep this simple for now.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum WindowFrameStart {
     UnboundedPreceding {
         unbounded_token: Token,
@@ -1323,7 +1346,7 @@ pub enum WindowFrameStart {
 }
 
 /// A window frame end. Keep this simple for now.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum WindowFrameEnd {
     CurrentRow {
         current_token: Token,
@@ -1332,7 +1355,7 @@ pub enum WindowFrameEnd {
 }
 
 /// Data types.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum DataType {
     Bool(Token),
     Bytes(Token),
@@ -1413,14 +1436,14 @@ impl Emit for DataType {
 }
 
 /// A field in a `STRUCT` type.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct StructField {
     pub name: Option<Identifier>,
     pub data_type: DataType,
 }
 
 /// An array index expression.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct IndexExpression {
     pub expression: Box<Expression>,
     pub bracket1: Token,
@@ -1445,7 +1468,7 @@ impl Emit for IndexExpression {
 }
 
 /// Different ways to index arrays.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub enum IndexOffset {
     Simple(Box<Expression>),
     Offset {
@@ -1484,14 +1507,14 @@ impl Emit for IndexOffset {
 }
 
 /// An `AS` alias.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct Alias {
     pub as_token: Option<Token>,
     pub ident: Identifier,
 }
 
 /// The `FROM` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct FromClause {
     pub from_token: Token,
     pub from_item: FromItem,
@@ -1499,7 +1522,7 @@ pub struct FromClause {
 }
 
 /// Items which may appear in a `FROM` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum FromItem {
     /// A table name, optionally with an alias.
     TableName {
@@ -1525,7 +1548,7 @@ pub enum FromItem {
 }
 
 /// A join operation.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum JoinOperation {
     /// A `JOIN` clause.
     ConditionJoin {
@@ -1543,7 +1566,7 @@ pub enum JoinOperation {
 }
 
 /// The type of a join.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum JoinType {
     Inner {
         inner_token: Option<Token>,
@@ -1563,7 +1586,7 @@ pub enum JoinType {
 }
 
 /// The condition used for a `JOIN`.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum ConditionJoinOperator {
     Using {
         using_token: Token,
@@ -1578,14 +1601,14 @@ pub enum ConditionJoinOperator {
 }
 
 /// A `WHERE` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct WhereClause {
     pub where_token: Token,
     pub expression: Expression,
 }
 
 /// A `GROUP BY` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct GroupBy {
     pub group_token: Token,
     pub by_token: Token,
@@ -1593,14 +1616,14 @@ pub struct GroupBy {
 }
 
 /// A `HAVING` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct Having {
     pub having_token: Token,
     pub expression: Expression,
 }
 
 /// A `QUALIFY` clause.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct Qualify {
     pub qualify_token: Token,
     pub expression: Expression,
@@ -1620,7 +1643,7 @@ impl Emit for Qualify {
 }
 
 /// A `DELETE FROM` statement.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct DeleteFromStatement {
     pub delete_token: Token,
     pub from_token: Token,
@@ -1630,7 +1653,7 @@ pub struct DeleteFromStatement {
 }
 
 /// A `INSERT INTO` statement. We only support the `SELECT` version.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct InsertIntoStatement {
     pub insert_token: Token,
     pub into_token: Token,
@@ -1639,7 +1662,7 @@ pub struct InsertIntoStatement {
 }
 
 /// The data to be inserted into a table.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum InsertedData {
     /// A `SELECT` statement.
     Select { query: QueryExpression },
@@ -1651,7 +1674,7 @@ pub enum InsertedData {
 }
 
 /// A row in a `VALUES` clause.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct Row {
     pub paren1: Token,
     pub expressions: NodeVec<Expression>,
@@ -1659,7 +1682,7 @@ pub struct Row {
 }
 
 /// A `CREATE TABLE` statement.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct CreateTableStatement {
     pub create_token: Token,
     pub or_replace: Option<OrReplace>,
@@ -1683,7 +1706,7 @@ impl Emit for CreateTableStatement {
 }
 
 /// A `CREATE VIEW` statement.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct CreateViewStatement {
     pub create_token: Token,
     pub or_replace: Option<OrReplace>,
@@ -1708,7 +1731,7 @@ impl Emit for CreateViewStatement {
 }
 
 /// The `OR REPLACE` modifier.
-#[derive(Debug, Drive, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, EmitDefault)]
 pub struct OrReplace {
     pub or_token: Token,
     pub replace_token: Token,
@@ -1724,13 +1747,13 @@ impl Emit for OrReplace {
 }
 
 /// The `TEMPORARY` modifier.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct Temporary {
     pub temporary_token: Token,
 }
 
 /// The part of a `CREATE TABLE` statement that defines the columns.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub enum CreateTableDefinition {
     /// ( column_definition [, ...] )
     Columns {
@@ -1746,14 +1769,14 @@ pub enum CreateTableDefinition {
 }
 
 /// A column definition.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct ColumnDefinition {
     pub name: Identifier,
     pub data_type: DataType,
 }
 
 /// A `DROP VIEW` statement.
-#[derive(Debug, Drive, Emit, EmitDefault)]
+#[derive(Clone, Debug, Drive, DriveMut, Emit, EmitDefault)]
 pub struct DropViewStatement {
     pub drop_token: Token,
     pub view_token: Token,
diff --git a/src/drivers/mod.rs b/src/drivers/mod.rs
index ed41d7a..fb6580a 100644
--- a/src/drivers/mod.rs
+++ b/src/drivers/mod.rs
@@ -1,11 +1,11 @@
 //! Database drivers.
 
-use std::{collections::VecDeque, fmt, str::FromStr};
+use std::{borrow::Cow, collections::VecDeque, fmt, str::FromStr};
 
 use async_trait::async_trait;
 
 use crate::{
-    ast::{Emit, Target},
+    ast::{self, Emit, Target},
     errors::{format_err, Error, Result},
 };
 
@@ -67,12 +67,26 @@ pub trait Driver: Send + Sync + 'static {
 
     /// Execute a query represented as an AST. This can execute multiple
     /// statements.
-    async fn execute_ast(&mut self, ast: &crate::ast::SqlProgram) -> Result<()> {
-        for statement in &ast.statements {
-            let sql = statement.emit_to_string(self.target());
+    async fn execute_ast(&mut self, ast: &ast::SqlProgram) -> Result<()> {
+        let rewritten = self.rewrite_ast(ast)?;
+        for sql in rewritten.extra_native_sql {
             self.execute_native_sql_statement(&sql).await?;
         }
-        Ok(())
+        let sql = rewritten.ast.emit_to_string(self.target());
+        self.execute_native_sql_statement(&sql).await
+    }
+
+    /// Rewrite an AST to convert function names, etc., into versions that can
+    /// be passed to [`Emitted::emit_to_string`] for this database. This allows
+    /// us to do less database-specific work in [`Emit::emit`], and more in the
+    /// database drivers themselves. This can't change lexical syntax, but it
+    /// can change the structure of the AST.
+    fn rewrite_ast<'ast>(&self, ast: &'ast ast::SqlProgram) -> Result<RewrittenAst<'ast>> {
+        // Default implementation does nothing.
+        Ok(RewrittenAst {
+            extra_native_sql: vec![],
+            ast: Cow::Borrowed(ast),
+        })
     }
 
     /// Drop a table if it exists.
@@ -85,6 +99,16 @@ pub trait Driver: Send + Sync + 'static {
     async fn compare_tables(&mut self, result_table: &str, expected_table: &str) -> Result<()>;
 }
 
+/// The output of [`Driver::rewrite_ast`].
+pub struct RewrittenAst<'a> {
+    /// Extra native SQL statements to execute before the AST. Probably
+    /// temporary UDFs and things like that.
+    pub extra_native_sql: Vec<String>,
+
+    /// The new AST.
+    pub ast: Cow<'a, ast::SqlProgram>,
+}
+
 /// Extensions to [`Driver`] that are not ["object safe"][safe].
 ///
 /// [safe]: https://doc.rust-lang.org/reference/items/traits.html#object-safety
diff --git a/src/drivers/snowflake/mod.rs b/src/drivers/snowflake/mod.rs
index 144c424..c82aa79 100644
--- a/src/drivers/snowflake/mod.rs
+++ b/src/drivers/snowflake/mod.rs
@@ -1,21 +1,24 @@
 //! A Snowflake driver.
 
-use std::{env, fmt, str::FromStr};
+use std::{borrow::Cow, env, fmt, str::FromStr};
 
 use arrow_json::writer::record_batches_to_json_rows;
 use async_trait::async_trait;
+use derive_visitor::DriveMut;
 use once_cell::sync::Lazy;
 use regex::Regex;
 use serde_json::Value;
 use snowflake_api::{QueryResult, SnowflakeApi};
-use tracing::instrument;
+use tracing::{debug, instrument};
 
 use crate::{
-    ast::Target,
+    ast::{self, Emit, Target},
     errors::{format_err, Context, Error, Result},
 };
 
-use super::{sqlite3::SQLite3Ident, Column, Driver, DriverImpl, Locator};
+use super::{sqlite3::SQLite3Ident, Column, Driver, DriverImpl, Locator, RewrittenAst};
+
+mod rename_functions;
 
 /// Locator prefix for Snowflake.
 pub const SNOWFLAKE_LOCATOR_PREFIX: &str = "snowflake:";
@@ -190,6 +193,7 @@ impl Driver for SnowflakeDriver {
 
     #[instrument(skip(self, sql), err)]
     async fn execute_native_sql_statement(&mut self, sql: &str) -> Result<()> {
+        debug!(%sql, "executing SQL");
         self.connection
             .exec(sql)
             .await
@@ -197,6 +201,40 @@ impl Driver for SnowflakeDriver {
         Ok(())
     }
 
+    async fn execute_ast(&mut self, ast: &ast::SqlProgram) -> Result<()> {
+        let rewritten = self.rewrite_ast(ast)?;
+        for sql in rewritten.extra_native_sql {
+            self.execute_native_sql_statement(&sql).await?;
+        }
+
+        // We can only execute one statement at a time.
+        for statement in &rewritten.ast.statements {
+            let sql = statement.emit_to_string(self.target());
+            self.execute_native_sql_statement(&sql).await?;
+        }
+
+        // Reset session to drop `TEMP` tables and UDFs.
+        self.connection
+            .close_session()
+            .await
+            .context("could not end Snowflake session")
+    }
+
+    fn rewrite_ast<'ast>(&self, ast: &'ast ast::SqlProgram) -> Result<RewrittenAst<'ast>> {
+        let mut ast = ast.clone();
+        let mut renamer = rename_functions::RenameFunctions::default();
+        ast.drive_mut(&mut renamer);
+        let extra_native_sql = renamer
+            .udfs
+            .values()
+            .map(|udf| udf.to_sql())
+            .collect::<Vec<_>>();
+        Ok(RewrittenAst {
+            extra_native_sql,
+            ast: Cow::Owned(ast),
+        })
+    }
+
     #[instrument(skip(self))]
     async fn drop_table_if_exists(&mut self, table_name: &str) -> Result<()> {
         self.execute_native_sql_statement(&format!(
diff --git a/src/drivers/snowflake/rename_functions.rs b/src/drivers/snowflake/rename_functions.rs
new file mode 100644
index 0000000..a14ff4f
--- /dev/null
+++ b/src/drivers/snowflake/rename_functions.rs
@@ -0,0 +1,65 @@
+//! A simple tree-walker that renames functions to their Snowflake equivalents.
+
+use std::collections::HashMap;
+
+use derive_visitor::VisitorMut;
+
+use crate::ast::{FunctionName, Identifier};
+
+// A `phf_map!` of BigQuery function names to Snowflake function names. Use
+// this for simple renaming.
+static FUNCTION_NAMES: phf::Map<&'static str, &'static str> = phf::phf_map! {
+    "REGEXP_EXTRACT" => "REGEXP_SUBSTR",
+    "SHA256" => "SHA2_BINARY", // Second argument defaults to SHA256.
+};
+
+/// A Snowflake UDF (user-defined function).
+pub struct Udf {
+    pub decl: &'static str,
+    pub sql: &'static str,
+}
+
+impl Udf {
+    /// Generate the SQL to create this UDF.
+    pub fn to_sql(&self) -> String {
+        format!(
+            "CREATE OR REPLACE TEMP FUNCTION {} AS $$\n{}\n$$\n",
+            self.decl, self.sql
+        )
+    }
+}
+
+/// A `phf_map!` of BigQuery UDF names to Snowflake UDFs. Use this when we
+/// actually need to create a UDF as a helper function.
+static UDFS: phf::Map<&'static str, &'static Udf> = phf::phf_map! {
+    "TO_HEX" => &Udf { decl: "TO_HEX(b BINARY) RETURNS STRING", sql: "HEX_ENCODE(b, 0)" },
+};
+
+#[derive(Default, VisitorMut)]
+#[visitor(FunctionName(enter))]
+pub struct RenameFunctions {
+    // UDFs that we need to create, if we haven't already.
+    pub udfs: HashMap<String, &'static Udf>,
+}
+
+impl RenameFunctions {
+    fn enter_function_name(&mut self, function_name: &mut FunctionName) {
+        if let FunctionName::Function { function } = function_name {
+            let name = function.unescaped_bigquery().to_ascii_uppercase();
+            if let Some(snowflake_name) = FUNCTION_NAMES.get(&name) {
+                // Rename the function.
+                let orig_ident = function_name.function_identifier();
+                *function_name = FunctionName::Function {
+                    function: Identifier {
+                        token: orig_ident.token.with_token_str(snowflake_name),
+                        text: snowflake_name.to_string(),
+                    },
+                };
+            } else if let Some(udf) = UDFS.get(&name) {
+                // We'll need a UDF, so add it to our list it if isn't already
+                // there.
+                self.udfs.insert(name, udf);
+            }
+        }
+    }
+}
diff --git a/src/drivers/sqlite3/mod.rs b/src/drivers/sqlite3/mod.rs
index 9a94f72..29fb35c 100644
--- a/src/drivers/sqlite3/mod.rs
+++ b/src/drivers/sqlite3/mod.rs
@@ -4,7 +4,10 @@ use std::{fmt, str::FromStr, vec};
 
 use async_rusqlite::Connection;
 use async_trait::async_trait;
-use rusqlite::{functions::FunctionFlags, types};
+use rusqlite::{
+    functions::{self, FunctionFlags},
+    types,
+};
 
 use crate::{
     ast::Target,
@@ -102,6 +105,10 @@ impl SQLite3Driver {
             // yet, but it allows us to parse and execute queries that use `UNNEST`.
             register_unnest(conn).expect("failed to register UNNEST");
 
+            // Install real functions, where we have them.
+            conn.create_scalar_function("concat", -1, FunctionFlags::SQLITE_UTF8, func_concat)
+                .expect("failed to create concat function");
+
             // Install some dummy functions that always return NULL.
             let dummy_fns = &[
                 ("array", -1),
@@ -159,6 +166,37 @@ impl Driver for SQLite3Driver {
     }
 }
 
+/// Concatenate a list of values into a string. Mimics BigQuery's `CONCAT`.
+fn func_concat(ctx: &functions::Context<'_>) -> rusqlite::Result<types::Value> {
+    let mut result = String::new();
+    for idx in 0..ctx.len() {
+        match ctx.get_raw(idx) {
+            types::ValueRef::Null => return Ok(types::Value::Null),
+            types::ValueRef::Integer(i) => {
+                result.push_str(&i.to_string());
+            }
+            types::ValueRef::Real(r) => {
+                result.push_str(&r.to_string());
+            }
+            types::ValueRef::Text(s) => {
+                let s = String::from_utf8(s.to_owned()).map_err(|_| {
+                    rusqlite::Error::InvalidFunctionParameterType(idx, types::Type::Text)
+                })?;
+                result.push_str(&s);
+            }
+            types::ValueRef::Blob(_) => {
+                // `CONCAT` should also support being called with _all_ `BINARY`
+                // values, but you can't mix them with other types.
+                return Err(rusqlite::Error::InvalidFunctionParameterType(
+                    idx,
+                    types::Type::Blob,
+                ));
+            }
+        }
+    }
+    Ok(types::Value::Text(result))
+}
+
 #[async_trait]
 impl DriverImpl for SQLite3Driver {
     type Type = String;
diff --git a/tests/sql/functions/README.md b/tests/sql/functions/README.md
index a7a3b2b..6f477c4 100644
--- a/tests/sql/functions/README.md
+++ b/tests/sql/functions/README.md
@@ -9,22 +9,22 @@ Here is a list of functions that are high priorities to implement. You can
 generate your own version of this list by running `joinery parse
 --count-function-calls queries.csv`.
 
-- [ ] REGEXP_REPLACE(_,_,_)
-- [ ] REGEXP_EXTRACT(_,_)
-- [ ] COALESCE(*)
-- [ ] LOWER(_)
-- [ ] TO_HEX(_)
-- [ ] SHA256(_)
+- [x] REGEXP_REPLACE(_,_,_)
+- [x] REGEXP_EXTRACT(_,_)
+- [x] COALESCE(*)
+- [x] LOWER(_)
+- [x] TO_HEX(_)
+- [x] SHA256(_)
 - [ ] LENGTH(_)
-- [ ] CONCAT(*)
-- [ ] TRIM(_)
+- [x] CONCAT(*)
+- [x] TRIM(_)
 - [ ] ARRAY_TO_STRING(_,_)
 - [ ] SUM(_)
 - [ ] FARM_FINGERPRINT(_)
 - [ ] ANY_VALUE(_)
 - [ ] ROW_NUMBER() OVER(..)
 - [ ] COUNTIF(_)
-- [ ] UPPER(_)
+- [x] UPPER(_)
 - [ ] ARRAY_AGG(_)
 - [ ] DATE_TRUNC(_,_) (special)
 - [ ] MIN(_)
diff --git a/tests/sql/functions/simple/coalesce.sql b/tests/sql/functions/simple/coalesce.sql
new file mode 100644
index 0000000..e8d766c
--- /dev/null
+++ b/tests/sql/functions/simple/coalesce.sql
@@ -0,0 +1,24 @@
+-- COALESCE
+
+CREATE OR REPLACE TABLE __result1 AS
+SELECT
+    -- This is not supported on BigQuery.
+    --
+    -- COALESCE() AS coalesce_empty,
+    --
+    -- This is supported by BigQuery, but not by other databases. We can remove
+    -- it in the transpiler if absolutely necessary, but it's probably better to
+    -- make it an error.
+    --
+    -- COALESCE(1) AS coalesce_one,
+    COALESCE(1, 2) AS coalesce_two,
+    COALESCE(NULL, 2) AS coalesce_two_null,
+    COALESCE(NULL, 2, 3) AS coalesce_three_null;
+
+CREATE OR REPLACE TABLE __expected1 (
+    coalesce_two INT64,
+    coalesce_two_null INT64,
+    coalesce_three_null INT64,
+);
+INSERT INTO __expected1 VALUES
+  (1, 2, 2);
diff --git a/tests/sql/functions/simple/regexp.sql b/tests/sql/functions/simple/regexp.sql
index 518153f..d9f99cb 100644
--- a/tests/sql/functions/simple/regexp.sql
+++ b/tests/sql/functions/simple/regexp.sql
@@ -1,4 +1,3 @@
--- pending: snowflake REGEX_EXTRACT needs to be wrapped with REGEXP_SUBSTR
 -- pending: sqlite3 No regex fuctions
 --
 -- REGEXP_REPLACE, REGEXP_EXTRACT
diff --git a/tests/sql/functions/simple/sha256.sql b/tests/sql/functions/simple/sha256.sql
new file mode 100644
index 0000000..a4b9d78
--- /dev/null
+++ b/tests/sql/functions/simple/sha256.sql
@@ -0,0 +1,13 @@
+-- pending: sqlite3 No SHA256 function
+
+CREATE OR REPLACE TABLE __result1 AS
+SELECT
+    to_hex(sha256('hi')) AS hi,
+    to_hex(sha256(null)) AS null_arg;
+
+CREATE OR REPLACE TABLE __expected1 (
+    hi STRING,
+    null_arg STRING,
+);
+INSERT INTO __expected1 VALUES
+  ('8f434346648f6b96df89dda901c5176b10a6d83961dd3c1ac88b59b2dc327aa4', NULL);
diff --git a/tests/sql/functions/simple/strings.sql b/tests/sql/functions/simple/strings.sql
new file mode 100644
index 0000000..521cae1
--- /dev/null
+++ b/tests/sql/functions/simple/strings.sql
@@ -0,0 +1,21 @@
+-- LOWER, UPPER, TRIM, CONCAT
+CREATE OR REPLACE TABLE __result1 AS
+SELECT
+    LOWER('FOO') AS lower,
+    UPPER('foo') AS upper,
+    TRIM('  foo  ') AS trim,
+    CONCAT('foo', 'bar') AS concat,
+    CONCAT('x', 1) concat_casted,
+    CONCAT('x', NULL) concat_null;
+
+CREATE OR REPLACE TABLE __expected1 (
+    lower STRING,
+    upper STRING,
+    trim STRING,
+    concat STRING,
+    concat_casted STRING,
+    concat_null STRING,
+);
+INSERT INTO __expected1 VALUES
+  ('foo', 'FOO', 'foo', 'foobar', 'x1', NULL);
+