From c1359f44559c01d2748737e8c1bfa24c5e08cbd7 Mon Sep 17 00:00:00 2001
From: Krishna Gopal <kgopal@pinterest.com>
Date: Mon, 14 Aug 2023 09:51:29 -0700
Subject: [PATCH] Implement query linter suggestions (#1306)

* Implement Presto SQLGlot Optimizers (#1300)

Create a PrestoOptimizingValidator that extends the PrestoExplainValidator to run validators that provide suggestions to rewrite & optimize the query the query

* Implement UI for query optimization suggestions (#1302)

Create a new tooltip for users to accept query optimization suggestions

* Update querybook version, fix PrestoOptimizingValidator (#1304)

* Fix minor UI suggestions (#1305)
---
 .../docs/changelog/breaking_change.md         |   3 +
 package.json                                  |   2 +-
 .../validation/base_query_validator.py        |  21 +-
 .../validators/base_sqlglot_validator.py      |  62 ++++
 .../validators/presto_optimizing_validator.py | 223 +++++++++++++
 .../test_presto_explain_validator.py          |   8 +-
 .../test_presto_optimizing_validator.py       | 302 ++++++++++++++++++
 .../CodeMirrorTooltip/CodeMirrorTooltip.tsx   |  20 +-
 .../CodeMirrorTooltip/SuggestionTooltip.tsx   |  43 +++
 .../components/QueryEditor/QueryEditor.tsx    | 172 +++++++---
 .../TemplateQueryView/TemplatedQueryView.tsx  |  15 +-
 querybook/webapp/const/queryExecution.ts      |   7 +-
 querybook/webapp/hooks/useDebouncedFn.ts      |  10 +
 .../webapp/lib/codemirror/codemirror-lint.ts  |  17 +-
 .../lib/sql-helper/sql-context-free-linter.ts |   2 +
 .../sql-context-sensitive-linter.ts           |   2 +
 querybook/webapp/lib/sql-helper/sql-lexer.ts  |   1 +
 requirements/parser/sqlglot.txt               |   2 +-
 requirements/test.txt                         |   1 +
 19 files changed, 838 insertions(+), 75 deletions(-)
 create mode 100644 querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py
 create mode 100644 querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py
 create mode 100644 querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py
 create mode 100644 querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx
 create mode 100644 querybook/webapp/hooks/useDebouncedFn.ts

diff --git a/docs_website/docs/changelog/breaking_change.md b/docs_website/docs/changelog/breaking_change.md
index 9b0a134ed..e42ef7e1e 100644
--- a/docs_website/docs/changelog/breaking_change.md
+++ b/docs_website/docs/changelog/breaking_change.md
@@ -7,6 +7,9 @@ slug: /changelog
 
 Here are the list of breaking changes that you should be aware of when updating Querybook:
 
+## v3.27.0
+Updated properties of `QueryValidationResult` object. `line` and `ch` are replaced with `start_line` and `start_ch` respectively.
+
 ## v3.22.0
 
 Updated the charset of table `data_element` to `utf8mb4`. For those mysql db's default charset is not utf8, please run below sql to update it if needed.
diff --git a/package.json b/package.json
index 415df13ae..4b1582516 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "querybook",
-    "version": "3.26.3",
+    "version": "3.27.0",
     "description": "A Big Data Webapp",
     "private": true,
     "scripts": {
diff --git a/querybook/server/lib/query_analysis/validation/base_query_validator.py b/querybook/server/lib/query_analysis/validation/base_query_validator.py
index cdbbe1490..d2ed3a3d2 100644
--- a/querybook/server/lib/query_analysis/validation/base_query_validator.py
+++ b/querybook/server/lib/query_analysis/validation/base_query_validator.py
@@ -21,25 +21,34 @@ class QueryValidationSeverity(Enum):
 class QueryValidationResult(object):
     def __init__(
         self,
-        line: int,  # 0 based
-        ch: int,  # location of the starting token
+        start_line: int,  # 0 based
+        start_ch: int,  # location of the starting token
         severity: QueryValidationSeverity,
         message: str,
         obj_type: QueryValidationResultObjectType = QueryValidationResultObjectType.LINT,
+        end_line: int = None,  # 0 based
+        end_ch: int = None,  # location of the ending token
+        suggestion: str = None,
     ):
         self.type = obj_type
-        self.line = line
-        self.ch = ch
+        self.start_line = start_line
+        self.start_ch = start_ch
+        self.end_line = end_line
+        self.end_ch = end_ch
         self.severity = severity
         self.message = message
+        self.suggestion = suggestion
 
     def to_dict(self):
         return {
             "type": self.type.value,
-            "line": self.line,
-            "ch": self.ch,
+            "start_line": self.start_line,
+            "start_ch": self.start_ch,
+            "end_line": self.end_line,
+            "end_ch": self.end_ch,
             "severity": self.severity.value,
             "message": self.message,
+            "suggestion": self.suggestion,
         }
 
 
diff --git a/querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py b/querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py
new file mode 100644
index 000000000..067f1eef0
--- /dev/null
+++ b/querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py
@@ -0,0 +1,62 @@
+from abc import ABCMeta, abstractmethod
+from typing import List, Tuple
+from sqlglot import Tokenizer
+from sqlglot.tokens import Token
+
+from lib.query_analysis.validation.base_query_validator import (
+    QueryValidationResult,
+    QueryValidationResultObjectType,
+    QueryValidationSeverity,
+)
+
+
+class BaseSQLGlotValidator(metaclass=ABCMeta):
+    @property
+    @abstractmethod
+    def message(self) -> str:
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def severity(self) -> QueryValidationSeverity:
+        raise NotImplementedError()
+
+    @property
+    @abstractmethod
+    def tokenizer(self) -> Tokenizer:
+        raise NotImplementedError()
+
+    def _tokenize_query(self, query: str) -> List[Token]:
+        return self.tokenizer.tokenize(query)
+
+    def _get_query_coordinate_by_index(self, query: str, index: int) -> Tuple[int, int]:
+        rows = query[: index + 1].splitlines(keepends=False)
+        return len(rows) - 1, len(rows[-1]) - 1
+
+    def _get_query_validation_result(
+        self,
+        query: str,
+        start_index: int,
+        end_index: int,
+        suggestion: str = None,
+        validation_result_object_type=QueryValidationResultObjectType.LINT,
+    ):
+        start_line, start_ch = self._get_query_coordinate_by_index(query, start_index)
+        end_line, end_ch = self._get_query_coordinate_by_index(query, end_index)
+
+        return QueryValidationResult(
+            start_line,
+            start_ch,
+            self.severity,
+            self.message,
+            validation_result_object_type,
+            end_line=end_line,
+            end_ch=end_ch,
+            suggestion=suggestion,
+        )
+
+    @abstractmethod
+    def get_query_validation_results(
+        self, query: str, raw_tokens: List[Token] = None
+    ) -> List[QueryValidationResult]:
+        raise NotImplementedError()
diff --git a/querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py b/querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py
new file mode 100644
index 000000000..5d726a4ff
--- /dev/null
+++ b/querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py
@@ -0,0 +1,223 @@
+from typing import List
+from sqlglot import TokenType, Tokenizer
+from sqlglot.dialects import Trino
+from sqlglot.tokens import Token
+
+from lib.query_analysis.validation.base_query_validator import (
+    BaseQueryValidator,
+    QueryValidationResult,
+    QueryValidationSeverity,
+)
+from lib.query_analysis.validation.validators.presto_explain_validator import (
+    PrestoExplainValidator,
+)
+from lib.query_analysis.validation.validators.base_sqlglot_validator import (
+    BaseSQLGlotValidator,
+)
+
+
+class BasePrestoSQLGlotValidator(BaseSQLGlotValidator):
+    @property
+    def tokenizer(self) -> Tokenizer:
+        return Trino.Tokenizer()
+
+
+class UnionAllValidator(BasePrestoSQLGlotValidator):
+    @property
+    def message(self):
+        return "Using UNION ALL instead of UNION will execute faster"
+
+    @property
+    def severity(self) -> str:
+        return QueryValidationSeverity.WARNING
+
+    def get_query_validation_results(
+        self, query: str, raw_tokens: List[Token] = None
+    ) -> List[QueryValidationResult]:
+        if raw_tokens is None:
+            raw_tokens = self._tokenize_query(query)
+        validation_errors = []
+        for i, token in enumerate(raw_tokens):
+            if token.token_type == TokenType.UNION:
+                if (
+                    i < len(raw_tokens) - 1
+                    and raw_tokens[i + 1].token_type != TokenType.ALL
+                ):
+                    validation_errors.append(
+                        self._get_query_validation_result(
+                            query, token.start, token.end, "UNION ALL"
+                        )
+                    )
+        return validation_errors
+
+
+class ApproxDistinctValidator(BasePrestoSQLGlotValidator):
+    @property
+    def message(self):
+        return (
+            "Using APPROX_DISTINCT(x) instead of COUNT(DISTINCT x) will execute faster"
+        )
+
+    @property
+    def severity(self) -> str:
+        return QueryValidationSeverity.WARNING
+
+    def get_query_validation_results(
+        self, query: str, raw_tokens: List[Token] = None
+    ) -> List[QueryValidationResult]:
+        if raw_tokens is None:
+            raw_tokens = self._tokenize_query(query)
+
+        validation_errors = []
+        for i, token in enumerate(raw_tokens):
+            if (
+                i < len(raw_tokens) - 2
+                and token.token_type == TokenType.VAR
+                and token.text.lower().strip() == "count"
+                and raw_tokens[i + 1].token_type == TokenType.L_PAREN
+                and raw_tokens[i + 2].token_type == TokenType.DISTINCT
+            ):
+                validation_errors.append(
+                    self._get_query_validation_result(
+                        query,
+                        token.start,
+                        raw_tokens[i + 2].end,
+                        "APPROX_DISTINCT(",
+                    )
+                )
+        return validation_errors
+
+
+class RegexpLikeValidator(BasePrestoSQLGlotValidator):
+    @property
+    def message(self):
+        return "Combining multiple LIKEs into one REGEXP_LIKE will execute faster"
+
+    @property
+    def severity(self) -> str:
+        return QueryValidationSeverity.WARNING
+
+    def _get_regexp_like_suggestion(self, column_name: str, like_strings: List[str]):
+        sanitized_like_strings = [
+            like_string.strip("\"'") for like_string in like_strings
+        ]
+        return f"REGEXP_LIKE({column_name}, '{'|'.join(sanitized_like_strings)}')"
+
+    def get_query_validation_results(
+        self, query: str, raw_tokens: List[Token] = None
+    ) -> List[QueryValidationResult]:
+        if raw_tokens is None:
+            raw_tokens = self._tokenize_query(query)
+
+        validation_errors = []
+
+        start_column_token = None
+        like_strings = []
+        token_idx = 0
+        while token_idx < len(raw_tokens) - 2:
+            token_1 = raw_tokens[token_idx]
+            token_2 = raw_tokens[token_idx + 1]
+            token_3 = raw_tokens[token_idx + 2]
+
+            # Check if the next set of three tokens matches a "like" phrase (i.e. <column> LIKE <string>)
+            if (
+                token_1.token_type == TokenType.VAR
+                and (
+                    start_column_token is None
+                    or token_1.text == start_column_token.text
+                )
+                and token_2.token_type == TokenType.LIKE
+                and token_3.token_type == TokenType.STRING
+            ):
+                if start_column_token is None:
+                    start_column_token = raw_tokens[token_idx]
+                like_strings.append(token_3.text)
+                token_idx += 3
+                if (
+                    token_idx == len(raw_tokens)
+                    or raw_tokens[token_idx].token_type != TokenType.OR
+                ):  # No "OR" token following the phrase, so we cannot combine additional phrases
+                    # Check if there are multiple phrases that can be combined
+                    if len(like_strings) > 1:
+                        validation_errors.append(
+                            self._get_query_validation_result(
+                                query,
+                                start_column_token.start,
+                                raw_tokens[token_idx - 1].end,
+                                suggestion=self._get_regexp_like_suggestion(
+                                    start_column_token.text, like_strings
+                                ),
+                            )
+                        )
+                    start_column_token = None
+                    like_strings = []
+
+            # If next tokens do not match the "like" phrase pattern, check if a suggestion can be made if there are previously matched phrases
+            elif start_column_token is not None:
+                if (
+                    len(like_strings) > 1
+                ):  # Check if a validation suggestion can be created
+                    validation_errors.append(
+                        self._get_query_validation_result(
+                            query,
+                            start_column_token.start,
+                            raw_tokens[token_idx - 1].end,
+                            suggestion=self._get_regexp_like_suggestion(
+                                start_column_token.text, like_strings
+                            ),
+                        )
+                    )
+                start_column_token = None
+                like_strings = []
+            token_idx += 1
+
+        return validation_errors
+
+
+class PrestoOptimizingValidator(BaseQueryValidator):
+    def languages(self):
+        return ["presto", "trino"]
+
+    def _get_explain_validator(self):
+        return PrestoExplainValidator("")
+
+    def _get_sqlglot_validators(self) -> List[BaseSQLGlotValidator]:
+        return [
+            UnionAllValidator(),
+            ApproxDistinctValidator(),
+            RegexpLikeValidator(),
+        ]
+
+    def _get_sql_glot_validation_results(
+        self, query: str
+    ) -> List[QueryValidationResult]:
+        validation_suggestions = []
+
+        query_raw_tokens = None
+        for validator in self._get_sqlglot_validators():
+            if query_raw_tokens is None:
+                query_raw_tokens = validator._tokenize_query(query)
+            validation_suggestions.extend(
+                validator.get_query_validation_results(
+                    query, raw_tokens=query_raw_tokens
+                )
+            )
+
+        return validation_suggestions
+
+    def _get_presto_explain_validation_results(
+        self, query: str, uid: int, engine_id: int
+    ) -> List[QueryValidationResult]:
+        return self._get_explain_validator().validate(query, uid, engine_id)
+
+    def validate(
+        self,
+        query: str,
+        uid: int,
+        engine_id: int,
+    ) -> List[QueryValidationResult]:
+        validation_results = [
+            *self._get_presto_explain_validation_results(query, uid, engine_id),
+            *self._get_sql_glot_validation_results(query),
+        ]
+        return validation_results
diff --git a/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py
index ae0328f17..fc910698c 100644
--- a/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py
+++ b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py
@@ -41,11 +41,11 @@ def test_simple(self):
             1, statement_start_locations, error_line=0, error_ch=2, error_msg=""
         )
 
-        self.assertEqual(validation_result.line, 0)
-        self.assertEqual(validation_result.ch, 12)
+        self.assertEqual(validation_result.start_line, 0)
+        self.assertEqual(validation_result.start_ch, 12)
 
         validation_result = self._validator._map_statement_error_to_query(
             2, statement_start_locations, error_line=0, error_ch=5, error_msg=""
         )
-        self.assertEqual(validation_result.line, 1)
-        self.assertEqual(validation_result.ch, 5)
+        self.assertEqual(validation_result.start_line, 1)
+        self.assertEqual(validation_result.start_ch, 5)
diff --git a/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py
new file mode 100644
index 000000000..0abb7aa51
--- /dev/null
+++ b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py
@@ -0,0 +1,302 @@
+from typing import List
+from unittest import TestCase
+
+from lib.query_analysis.validation.base_query_validator import (
+    QueryValidationResult,
+    QueryValidationResultObjectType,
+    QueryValidationSeverity,
+)
+from lib.query_analysis.validation.validators.presto_optimizing_validator import (
+    ApproxDistinctValidator,
+    RegexpLikeValidator,
+    UnionAllValidator,
+    PrestoOptimizingValidator,
+)
+
+
+class BaseValidatorTestCase(TestCase):
+    def _verify_query_validation_results(
+        self,
+        validation_results: List[QueryValidationResult],
+        expected_results: List[QueryValidationResult],
+    ):
+        self.assertEqual(len(validation_results), len(expected_results))
+        for i, validation_result in enumerate(validation_results):
+            expected_result = expected_results[i]
+            self.assertEqual(validation_result.to_dict(), expected_result.to_dict())
+
+    def _get_regexp_like_validation_result(
+        self,
+        start_line: int,
+        start_ch: int,
+        end_line: int,
+        end_ch: int,
+        suggestion: str = None,
+    ):
+        return QueryValidationResult(
+            start_line,
+            start_ch,
+            QueryValidationSeverity.WARNING,
+            "Combining multiple LIKEs into one REGEXP_LIKE will execute faster",
+            QueryValidationResultObjectType.LINT,
+            end_line=end_line,
+            end_ch=end_ch,
+            suggestion=suggestion,
+        )
+
+    def _get_union_all_validation_result(
+        self, start_line: int, start_ch: int, end_line: int, end_ch: int
+    ):
+        return QueryValidationResult(
+            start_line,
+            start_ch,
+            QueryValidationSeverity.WARNING,
+            "Using UNION ALL instead of UNION will execute faster",
+            QueryValidationResultObjectType.LINT,
+            end_line=end_line,
+            end_ch=end_ch,
+            suggestion="UNION ALL",
+        )
+
+    def _get_approx_distinct_validation_result(
+        self, start_line: int, start_ch: int, end_line: int, end_ch: int
+    ):
+        return QueryValidationResult(
+            start_line,
+            start_ch,
+            QueryValidationSeverity.WARNING,
+            "Using APPROX_DISTINCT(x) instead of COUNT(DISTINCT x) will execute faster",
+            QueryValidationResultObjectType.LINT,
+            end_line=end_line,
+            end_ch=end_ch,
+            suggestion="APPROX_DISTINCT(",
+        )
+
+
+class UnionAllValidatorTestCase(BaseValidatorTestCase):
+    def setUp(self):
+        self._validator = UnionAllValidator()
+
+    def test_basic_union(self):
+        query = "SELECT * FROM a \nUNION SELECT * FROM b"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_union_all_validation_result(
+                    1,
+                    0,
+                    1,
+                    4,
+                )
+            ],
+        )
+
+    def test_multiple_unions(self):
+        query = "SELECT * FROM a \nUNION SELECT * FROM b \nUNION SELECT * FROM c"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_union_all_validation_result(
+                    1,
+                    0,
+                    1,
+                    4,
+                ),
+                self._get_union_all_validation_result(
+                    2,
+                    0,
+                    2,
+                    4,
+                ),
+            ],
+        )
+
+    def test_union_all(self):
+        query = "SELECT * FROM a UNION ALL SELECT * FROM b"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query), []
+        )
+
+
+class ApproxDistinctValidatorTestCase(BaseValidatorTestCase):
+    def setUp(self):
+        self._validator = ApproxDistinctValidator()
+
+    def test_basic_count_distinct(self):
+        query = "SELECT COUNT(DISTINCT x) FROM a"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [self._get_approx_distinct_validation_result(0, 7, 0, 20)],
+        )
+
+    def test_count_not_followed_by_distinct(self):
+        query = "SELECT \nCOUNT * FROM a"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [],
+        )
+
+    def test_multiple_count_distincts(self):
+        query = (
+            "SELECT \nCOUNT(DISTINCT y) FROM a UNION SELECT \nCOUNT(DISTINCT x) FROM b"
+        )
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_approx_distinct_validation_result(1, 0, 1, 13),
+                self._get_approx_distinct_validation_result(2, 0, 2, 13),
+            ],
+        )
+
+    def test_count_distinct_in_where_clause(self):
+        query = (
+            "SELECT \nCOUNT(DISTINCT a), b FROM table_a WHERE \nCOUNT(DISTINCT a) > 10"
+        )
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_approx_distinct_validation_result(1, 0, 1, 13),
+                self._get_approx_distinct_validation_result(2, 0, 2, 13),
+            ],
+        )
+
+
+class RegexpLikeValidatorTestCase(BaseValidatorTestCase):
+    def setUp(self):
+        self._validator = RegexpLikeValidator()
+
+    def test_basic_combine_case(self):
+        query = "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE \n'bar'"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_regexp_like_validation_result(
+                    1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')"
+                )
+            ],
+        )
+
+    def test_and_clause(self):
+        query = "SELECT * from a WHERE \nx LIKE 'foo%' AND x LIKE \n'%bar'"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [],
+        )
+
+    def test_more_than_two_phrases(self):
+        query = "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE 'bar' OR x LIKE \n'baz'"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_regexp_like_validation_result(
+                    1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar|baz')"
+                )
+            ],
+        )
+
+    def test_different_column_names(self):
+        query = "SELECT * from a WHERE \nx LIKE 'foo' OR y LIKE 'bar'"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [],
+        )
+
+    def test_both_or_and(self):
+        query = (
+            "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE \n'bar' AND y LIKE 'foo'"
+        )
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_regexp_like_validation_result(
+                    1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')"
+                )
+            ],
+        )
+
+    def test_multiple_suggestions(self):
+        query = "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE \n'bar' AND \ny LIKE 'foo' OR y LIKE \n'bar'"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [
+                self._get_regexp_like_validation_result(
+                    1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')"
+                ),
+                self._get_regexp_like_validation_result(
+                    3, 0, 4, 4, "REGEXP_LIKE(y, 'foo|bar')"
+                ),
+            ],
+        )
+
+    def test_phrase_not_match(self):
+        query = "SELECT * from a WHERE x LIKE 'foo' OR x = 'bar'"
+        self._verify_query_validation_results(
+            self._validator.get_query_validation_results(query),
+            [],
+        )
+
+
+class PrestoOptimizingValidatorTestCase(BaseValidatorTestCase):
+    def setUp(self):
+        self._validator = PrestoOptimizingValidator("")
+
+    def test_union_and_count_distinct(self):
+        query = "SELECT \nCOUNT( DISTINCT x) from a \nUNION select \ncount(distinct y) from b"
+        self._verify_query_validation_results(
+            self._validator._get_sql_glot_validation_results(query),
+            [
+                self._get_union_all_validation_result(2, 0, 2, 4),
+                self._get_approx_distinct_validation_result(1, 0, 1, 14),
+                self._get_approx_distinct_validation_result(3, 0, 3, 13),
+            ],
+        )
+
+    def test_union_and_regexp_like(self):
+        query = "SELECT * from a WHERE \nx like 'foo' or x like \n'bar' \nUNION select * from b where y like 'foo' AND x like 'bar'"
+        self._verify_query_validation_results(
+            self._validator._get_sql_glot_validation_results(query),
+            [
+                self._get_union_all_validation_result(3, 0, 3, 4),
+                self._get_regexp_like_validation_result(
+                    1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')"
+                ),
+            ],
+        )
+
+    def test_count_distinct_and_regexp_like(self):
+        query = "SELECT \nCOUNT( DISTINCT x) from a WHERE \nx LIKE 'foo' or x like \n'bar' and y like 'foo'"
+        self._verify_query_validation_results(
+            self._validator._get_sql_glot_validation_results(query),
+            [
+                self._get_approx_distinct_validation_result(1, 0, 1, 14),
+                self._get_regexp_like_validation_result(
+                    2, 0, 3, 4, "REGEXP_LIKE(x, 'foo|bar')"
+                ),
+            ],
+        )
+
+    def test_all_errors(self):
+        query = "SELECT \nCOUNT( DISTINCT x) from a WHERE \nx LIKE 'foo' or x like \n'bar' and y like 'foo' \nUNION select * from b"
+        self._verify_query_validation_results(
+            self._validator._get_sql_glot_validation_results(query),
+            [
+                self._get_union_all_validation_result(4, 0, 4, 4),
+                self._get_approx_distinct_validation_result(1, 0, 1, 14),
+                self._get_regexp_like_validation_result(
+                    2, 0, 3, 4, "REGEXP_LIKE(x, 'foo|bar')"
+                ),
+            ],
+        )
+
+    def test_extra_whitespace(self):
+        query = "SELECT \n  COUNT( DISTINCT x) from a WHERE \n\t  x LIKE 'foo' or x like \n'bar' and y like 'foo' \n     UNION select * from b"
+        self._verify_query_validation_results(
+            self._validator._get_sql_glot_validation_results(query),
+            [
+                self._get_union_all_validation_result(4, 5, 4, 9),
+                self._get_approx_distinct_validation_result(1, 2, 1, 16),
+                self._get_regexp_like_validation_result(
+                    2, 3, 3, 4, "REGEXP_LIKE(x, 'foo|bar')"
+                ),
+            ],
+        )
diff --git a/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx b/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx
index 86ff1e68d..5782b54fd 100644
--- a/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx
+++ b/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx
@@ -9,6 +9,7 @@ import { IStoreState } from 'redux/store/types';
 
 import { FunctionDocumentationTooltip } from './FunctionDocumentationTooltip';
 import { TableTooltip } from './TableTooltip';
+import { SuggestionTooltip } from './SuggestionTooltip';
 
 import './CodeMirrorTooltip.scss';
 
@@ -16,14 +17,24 @@ export interface ICodeMirrorTooltipProps {
     tableId?: number;
     functionDocumentations?: IFunctionDescription[];
     error?: React.ReactChild;
+    suggestionText?: string;
 
     openTableModal?: () => any;
+    onAcceptSuggestion?: (suggestion: string) => void;
     hide: () => any;
 }
 
 export const CodeMirrorTooltip: React.FunctionComponent<
     ICodeMirrorTooltipProps
-> = ({ tableId, hide, functionDocumentations, error, openTableModal }) => {
+> = ({
+    tableId,
+    hide,
+    functionDocumentations,
+    error,
+    openTableModal,
+    suggestionText,
+    onAcceptSuggestion,
+}) => {
     const { table, schema, columns } = useShallowSelector(
         (state: IStoreState) => {
             const tableFromState = state.dataSources.dataTablesById[tableId];
@@ -81,6 +92,13 @@ export const CodeMirrorTooltip: React.FunctionComponent<
                 <div>{error}</div>
             </div>
         );
+    } else if (suggestionText && onAcceptSuggestion) {
+        contentDOM = (
+            <SuggestionTooltip
+                suggestionText={suggestionText}
+                onAcceptSuggestion={onAcceptSuggestion}
+            />
+        );
     }
 
     return <div className="CodeMirrorTooltip">{contentDOM}</div>;
diff --git a/querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx b/querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx
new file mode 100644
index 000000000..302f0039a
--- /dev/null
+++ b/querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx
@@ -0,0 +1,43 @@
+import React, { useCallback, useMemo } from 'react';
+import { Button } from 'ui/Button/Button';
+
+interface IProps {
+    suggestionText: string;
+    onAcceptSuggestion: (suggestion: string) => void;
+}
+
+const SUGGESTION_MAX_LENGTH = 20;
+
+export const SuggestionTooltip: React.FunctionComponent<IProps> = ({
+    suggestionText,
+    onAcceptSuggestion,
+}) => {
+    const truncatedSuggestion = useMemo(
+        () =>
+            suggestionText.length > SUGGESTION_MAX_LENGTH
+                ? suggestionText.slice(0, SUGGESTION_MAX_LENGTH - 3) + '...'
+                : suggestionText,
+        [suggestionText]
+    );
+
+    const onClick = useCallback(() => {
+        onAcceptSuggestion(suggestionText);
+    }, [onAcceptSuggestion, suggestionText]);
+
+    return (
+        <div className="rich-text-content">
+            <div>
+                Replace with <code>{truncatedSuggestion}</code>
+            </div>
+            <div className="mt8 right-align">
+                <Button
+                    title="Accept"
+                    onClick={onClick}
+                    theme="fill"
+                    color="confirm"
+                    icon="Check"
+                />
+            </div>
+        </div>
+    );
+};
diff --git a/querybook/webapp/components/QueryEditor/QueryEditor.tsx b/querybook/webapp/components/QueryEditor/QueryEditor.tsx
index 78405160f..bc62326e5 100644
--- a/querybook/webapp/components/QueryEditor/QueryEditor.tsx
+++ b/querybook/webapp/components/QueryEditor/QueryEditor.tsx
@@ -22,6 +22,7 @@ import {
 import { useAutoComplete } from 'hooks/queryEditor/useAutoComplete';
 import { useCodeAnalysis } from 'hooks/queryEditor/useCodeAnalysis';
 import { useLint } from 'hooks/queryEditor/useLint';
+import { useDebouncedFn } from 'hooks/useDebouncedFn';
 import CodeMirror, { CodeMirrorKeyMap } from 'lib/codemirror';
 import { SQL_JINJA_MODE } from 'lib/codemirror/codemirror-mode';
 import {
@@ -29,12 +30,7 @@ import {
     ExcludedTriggerKeys,
 } from 'lib/sql-helper/sql-autocompleter';
 import { format, ISQLFormatOptions } from 'lib/sql-helper/sql-formatter';
-import {
-    ILinterWarning,
-    IRange,
-    IToken,
-    TableToken,
-} from 'lib/sql-helper/sql-lexer';
+import { ILinterWarning, IRange, TableToken } from 'lib/sql-helper/sql-lexer';
 import { formatNumber } from 'lib/utils/number';
 import { navigateWithinEnv } from 'lib/utils/query-string';
 import { IconButton } from 'ui/Button/IconButton';
@@ -168,6 +164,14 @@ export const QueryEditor: React.FC<
             return list.length > 0 ? list[0] : null;
         }, [queryAnnotations]);
 
+        const annotationSuggestions: ILinterWarning[] = useMemo(
+            () =>
+                queryAnnotations.filter(
+                    (annotation) => annotation.suggestion != null
+                ),
+            [queryAnnotations]
+        );
+
         const openTableModal = useCallback((tableId: number) => {
             navigateWithinEnv(`/table/${tableId}/`, {
                 isModal: true,
@@ -233,6 +237,26 @@ export const QueryEditor: React.FC<
             [getTableByName, codeAnalysisRef]
         );
 
+        const getSuggestionByPosition = useCallback(
+            (pos: CodeMirror.Position) => {
+                const currLine = pos.line;
+                const currCh = pos.ch;
+
+                for (const suggestion of annotationSuggestions) {
+                    if (
+                        suggestion.from.line <= currLine &&
+                        suggestion.to.line >= currLine &&
+                        suggestion.from.ch <= currCh &&
+                        suggestion.to.ch >= currCh
+                    ) {
+                        return suggestion;
+                    }
+                }
+                return null;
+            },
+            [annotationSuggestions]
+        );
+
         const onOpenTableModal = useCallback(
             (editor: CodeMirror.Editor) => {
                 const pos = editor.getDoc().getCursor();
@@ -290,13 +314,10 @@ export const QueryEditor: React.FC<
 
         const markTextAndShowTooltip = (
             editor: CodeMirror.Editor,
-            pos: CodeMirror.Position,
-            token: IToken,
+            markTextFrom: CodeMirror.Position,
+            markTextTo: CodeMirror.Position,
             tooltipProps: Omit<ICodeMirrorTooltipProps, 'hide'>
         ) => {
-            const markTextFrom = { line: pos.line, ch: token.start };
-            const markTextTo = { line: pos.line, ch: token.end };
-
             markerRef.current = editor
                 .getDoc()
                 .markText(markTextFrom, markTextTo, {
@@ -358,56 +379,101 @@ export const QueryEditor: React.FC<
             [language, functionDocumentationByNameByLanguage]
         );
 
-        const onTextHover = useMemo(
-            () =>
-                debounce(
-                    async (editor: CodeMirror.Editor, node, e, pos, token) => {
-                        if (
-                            markerRef.current == null &&
-                            (token.type === 'variable-2' || token.type == null)
-                        ) {
-                            // Check if token is inside a table
-                            const tokenInsideTable = await isTokenInTable(
-                                pos,
-                                token
+        const onTextHoverLongDebounce = useDebouncedFn(
+            async (editor: CodeMirror.Editor, node, e, pos, token) => {
+                if (
+                    markerRef.current == null &&
+                    (token.type === 'variable-2' || token.type == null)
+                ) {
+                    // Check if token is inside a table
+                    const tokenInsideTable = await isTokenInTable(pos, token);
+                    const markTextFrom = {
+                        line: pos.line,
+                        ch: token.start,
+                    };
+                    const markTextTo = {
+                        line: pos.line,
+                        ch: token.end,
+                    };
+                    if (tokenInsideTable) {
+                        const { tableInfo } = tokenInsideTable;
+                        if (tableInfo) {
+                            markTextAndShowTooltip(
+                                editor,
+                                markTextFrom,
+                                markTextTo,
+                                {
+                                    tableId: tableInfo.id,
+                                    openTableModal: () =>
+                                        openTableModal(tableInfo.id),
+                                }
                             );
-
-                            if (tokenInsideTable) {
-                                const { tableInfo } = tokenInsideTable;
-                                if (tableInfo) {
-                                    markTextAndShowTooltip(editor, pos, token, {
-                                        tableId: tableInfo.id,
-                                        openTableModal: () =>
-                                            openTableModal(tableInfo.id),
-                                    });
-                                } else {
-                                    markTextAndShowTooltip(editor, pos, token, {
-                                        error: 'Table does not exist!',
-                                    });
+                        } else {
+                            markTextAndShowTooltip(
+                                editor,
+                                markTextFrom,
+                                markTextTo,
+                                {
+                                    error: 'Table does not exist!',
                                 }
-                            }
+                            );
+                        }
+                    }
 
-                            const nextChar = editor.getDoc().getLine(pos.line)[
-                                token.end
-                            ];
-                            if (nextChar === '(') {
-                                // if it seems like a function call
-                                const functionDef = matchFunctionWithDefinition(
-                                    token.string
-                                );
-                                if (functionDef) {
-                                    markTextAndShowTooltip(editor, pos, token, {
-                                        functionDocumentations: functionDef,
-                                    });
+                    const nextChar = editor.getDoc().getLine(pos.line)[
+                        token.end
+                    ];
+                    if (nextChar === '(') {
+                        // if it seems like a function call
+                        const functionDef = matchFunctionWithDefinition(
+                            token.string
+                        );
+                        if (functionDef) {
+                            markTextAndShowTooltip(
+                                editor,
+                                markTextFrom,
+                                markTextTo,
+                                {
+                                    functionDocumentations: functionDef,
                                 }
-                            }
+                            );
                         }
-                    },
-                    600
-                ),
+                    }
+                }
+            },
+            600,
             [isTokenInTable, matchFunctionWithDefinition, openTableModal]
         );
 
+        const onTextHoverShortDebounce = useDebouncedFn(
+            (editor: CodeMirror.Editor, node, e, pos, _token) => {
+                if (markerRef.current == null) {
+                    const suggestionAnnotation = getSuggestionByPosition(pos);
+                    if (suggestionAnnotation != null) {
+                        const { suggestion, from, to } = suggestionAnnotation;
+                        markTextAndShowTooltip(editor, from, to, {
+                            onAcceptSuggestion: (suggestion: string) =>
+                                editor.replaceRange(suggestion, from, to),
+                            suggestionText: suggestion,
+                        });
+                    }
+                }
+            },
+            100,
+            [getSuggestionByPosition]
+        );
+
+        const onTextHover = useCallback(
+            async (editor: CodeMirror.Editor, node, e, pos, token) => {
+                // Debounce asynchronous checks with a longer delay (e.g. for requesting table metadata)
+                onTextHoverLongDebounce(editor, node, e, pos, token);
+
+                // Faster checks use a shorter delay
+                onTextHoverShortDebounce(editor, node, e, pos, token);
+            },
+            [onTextHoverLongDebounce, onTextHoverShortDebounce]
+        );
+
         const showAutoCompletion = useMemo(
             () =>
                 debounce((editor: CodeMirror.Editor) => {
diff --git a/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx b/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx
index f572e2ba6..e6738daba 100644
--- a/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx
+++ b/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx
@@ -60,12 +60,18 @@ function useValidateQuery(
             .map((error) => {
                 const token = tokens.find(
                     (token) =>
-                        token.line === error.line && token.start === error.ch
+                        token.line === error.start_line &&
+                        token.start === error.start_ch
                 );
                 if (token) {
                     return {
-                        from: queryPositions[token.line] + token.start,
-                        to: queryPositions[token.line] + token.end,
+                        from: queryPositions[error.start_line] + error.start_ch,
+                        to:
+                            error.end_line && error.end_ch
+                                ? queryPositions[error.end_line] +
+                                  error.end_ch +
+                                  1
+                                : queryPositions[token.line] + token.end,
                         className: 'code-highlight-red',
                     };
                 }
@@ -145,7 +151,8 @@ export const TemplatedQueryView: React.FC<ITemplatedQueryViewProps> = ({
 
             const errorsDOM = queryValidationErrors.map((err, i) => (
                 <p key={i}>
-                    Line: {err.line} Ch: {err.ch}, Message: {err.message}
+                    Line: {err.start_line} Ch: {err.start_ch}, Message:{' '}
+                    {err.message}
                 </p>
             ));
 
diff --git a/querybook/webapp/const/queryExecution.ts b/querybook/webapp/const/queryExecution.ts
index 9b1f632bf..28344278c 100644
--- a/querybook/webapp/const/queryExecution.ts
+++ b/querybook/webapp/const/queryExecution.ts
@@ -145,9 +145,12 @@ export interface IQueryExecutionNotification {
 }
 
 export interface IQueryValidationResult {
-    line: number;
-    ch: number;
+    start_line: number;
+    start_ch: number;
+    end_line: number | null;
+    end_ch: number | null;
     message: string;
     severity: string;
     type: string;
+    suggestion: string | null;
 }
diff --git a/querybook/webapp/hooks/useDebouncedFn.ts b/querybook/webapp/hooks/useDebouncedFn.ts
new file mode 100644
index 000000000..9d11d3855
--- /dev/null
+++ b/querybook/webapp/hooks/useDebouncedFn.ts
@@ -0,0 +1,10 @@
+import { useMemo } from 'react';
+import { debounce } from 'lodash';
+
+export function useDebouncedFn<F extends (...args: any) => any>(
+    fn: F,
+    timeout: number,
+    deps: any[]
+) {
+    return useMemo(() => debounce(fn, timeout), deps);
+}
diff --git a/querybook/webapp/lib/codemirror/codemirror-lint.ts b/querybook/webapp/lib/codemirror/codemirror-lint.ts
index 7af84efc2..5de4b53fc 100644
--- a/querybook/webapp/lib/codemirror/codemirror-lint.ts
+++ b/querybook/webapp/lib/codemirror/codemirror-lint.ts
@@ -17,7 +17,16 @@ export function createSQLLinter(
             );
 
         return validationResults.map((validationError) => {
-            const { type, line, ch, severity, message } = validationError;
+            const {
+                type,
+                start_line: line,
+                start_ch: ch,
+                end_line: endLine,
+                end_ch: endCh,
+                severity,
+                message,
+                suggestion,
+            } = validationError;
 
             const errorToken = cm.getTokenAt({
                 line,
@@ -32,12 +41,13 @@ export function createSQLLinter(
                         line,
                     },
                     to: {
-                        ch: errorToken.end,
-                        line,
+                        ch: endCh != null ? endCh + 1 : errorToken.end,
+                        line: endLine ?? line,
                     },
                     severity,
                     message,
                     type,
+                    suggestion,
                 } as ILinterWarning;
             } else {
                 return {
@@ -52,6 +62,7 @@ export function createSQLLinter(
                     severity,
                     message,
                     type,
+                    suggestion,
                 } as ILinterWarning;
             }
         });
diff --git a/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts b/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts
index 0dbbd7832..c43cb7b1b 100644
--- a/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts
+++ b/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts
@@ -56,6 +56,7 @@ const contextFreeLinterWarningsByLanguage: Record<
                                     line: distinct.line,
                                     ch: distinct.end,
                                 },
+                                suggestion: null,
                             });
                         }
                     }
@@ -98,6 +99,7 @@ const contextFreeLinterWarningsByLanguage: Record<
                                 line: firstToken.line,
                                 ch: firstToken.end,
                             },
+                            suggestion: null,
                         });
                     }
                 }
diff --git a/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts b/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts
index 69f5247d0..5d7703165 100644
--- a/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts
+++ b/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts
@@ -33,6 +33,7 @@ export function getContextSensitiveWarnings(
                         line: table.line,
                         ch: table.end,
                     },
+                    suggestion: null,
                 });
             }
         } else {
@@ -71,6 +72,7 @@ export function getContextSensitiveWarnings(
                         line: table.line,
                         ch: table.end,
                     },
+                    suggestion: null,
                 });
             }
         }
diff --git a/querybook/webapp/lib/sql-helper/sql-lexer.ts b/querybook/webapp/lib/sql-helper/sql-lexer.ts
index fa9cff7c7..d83ce80f5 100644
--- a/querybook/webapp/lib/sql-helper/sql-lexer.ts
+++ b/querybook/webapp/lib/sql-helper/sql-lexer.ts
@@ -177,6 +177,7 @@ export interface ILinterWarning extends IRange {
     type: 'lint' | 'general';
     message: string;
     severity: 'error' | 'warning' | 'info';
+    suggestion: string | null;
 }
 
 export interface ILineage {
diff --git a/requirements/parser/sqlglot.txt b/requirements/parser/sqlglot.txt
index 6989f97a4..1f816ef36 100644
--- a/requirements/parser/sqlglot.txt
+++ b/requirements/parser/sqlglot.txt
@@ -1 +1 @@
-sqlglot==5.1.3
+sqlglot==17.8.0
diff --git a/requirements/test.txt b/requirements/test.txt
index e407f751f..397765b37 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -6,3 +6,4 @@ moto==2.2.4
 -r engine/hive.txt
 -r metastore/glue.txt
 -r exporter/gspread.txt
+-r parser/sqlglot.txt