From c1359f44559c01d2748737e8c1bfa24c5e08cbd7 Mon Sep 17 00:00:00 2001 From: Krishna Gopal Date: Mon, 14 Aug 2023 09:51:29 -0700 Subject: [PATCH] Implement query linter suggestions (#1306) * Implement Presto SQLGlot Optimizers (#1300) Create a PrestoOptimizingValidator that extends the PrestoExplainValidator to run validators that provide suggestions to rewrite & optimize the query the query * Implement UI for query optimization suggestions (#1302) Create a new tooltip for users to accept query optimization suggestions * Update querybook version, fix PrestoOptimizingValidator (#1304) * Fix minor UI suggestions (#1305) --- .../docs/changelog/breaking_change.md | 3 + package.json | 2 +- .../validation/base_query_validator.py | 21 +- .../validators/base_sqlglot_validator.py | 62 ++++ .../validators/presto_optimizing_validator.py | 223 +++++++++++++ .../test_presto_explain_validator.py | 8 +- .../test_presto_optimizing_validator.py | 302 ++++++++++++++++++ .../CodeMirrorTooltip/CodeMirrorTooltip.tsx | 20 +- .../CodeMirrorTooltip/SuggestionTooltip.tsx | 43 +++ .../components/QueryEditor/QueryEditor.tsx | 172 +++++++--- .../TemplateQueryView/TemplatedQueryView.tsx | 15 +- querybook/webapp/const/queryExecution.ts | 7 +- querybook/webapp/hooks/useDebouncedFn.ts | 10 + .../webapp/lib/codemirror/codemirror-lint.ts | 17 +- .../lib/sql-helper/sql-context-free-linter.ts | 2 + .../sql-context-sensitive-linter.ts | 2 + querybook/webapp/lib/sql-helper/sql-lexer.ts | 1 + requirements/parser/sqlglot.txt | 2 +- requirements/test.txt | 1 + 19 files changed, 838 insertions(+), 75 deletions(-) create mode 100644 querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py create mode 100644 querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py create mode 100644 querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py create mode 100644 querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx create mode 100644 querybook/webapp/hooks/useDebouncedFn.ts diff --git a/docs_website/docs/changelog/breaking_change.md b/docs_website/docs/changelog/breaking_change.md index 9b0a134ed..e42ef7e1e 100644 --- a/docs_website/docs/changelog/breaking_change.md +++ b/docs_website/docs/changelog/breaking_change.md @@ -7,6 +7,9 @@ slug: /changelog Here are the list of breaking changes that you should be aware of when updating Querybook: +## v3.27.0 +Updated properties of `QueryValidationResult` object. `line` and `ch` are replaced with `start_line` and `start_ch` respectively. + ## v3.22.0 Updated the charset of table `data_element` to `utf8mb4`. For those mysql db's default charset is not utf8, please run below sql to update it if needed. diff --git a/package.json b/package.json index 415df13ae..4b1582516 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "querybook", - "version": "3.26.3", + "version": "3.27.0", "description": "A Big Data Webapp", "private": true, "scripts": { diff --git a/querybook/server/lib/query_analysis/validation/base_query_validator.py b/querybook/server/lib/query_analysis/validation/base_query_validator.py index cdbbe1490..d2ed3a3d2 100644 --- a/querybook/server/lib/query_analysis/validation/base_query_validator.py +++ b/querybook/server/lib/query_analysis/validation/base_query_validator.py @@ -21,25 +21,34 @@ class QueryValidationSeverity(Enum): class QueryValidationResult(object): def __init__( self, - line: int, # 0 based - ch: int, # location of the starting token + start_line: int, # 0 based + start_ch: int, # location of the starting token severity: QueryValidationSeverity, message: str, obj_type: QueryValidationResultObjectType = QueryValidationResultObjectType.LINT, + end_line: int = None, # 0 based + end_ch: int = None, # location of the ending token + suggestion: str = None, ): self.type = obj_type - self.line = line - self.ch = ch + self.start_line = start_line + self.start_ch = start_ch + self.end_line = end_line + self.end_ch = end_ch self.severity = severity self.message = message + self.suggestion = suggestion def to_dict(self): return { "type": self.type.value, - "line": self.line, - "ch": self.ch, + "start_line": self.start_line, + "start_ch": self.start_ch, + "end_line": self.end_line, + "end_ch": self.end_ch, "severity": self.severity.value, "message": self.message, + "suggestion": self.suggestion, } diff --git a/querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py b/querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py new file mode 100644 index 000000000..067f1eef0 --- /dev/null +++ b/querybook/server/lib/query_analysis/validation/validators/base_sqlglot_validator.py @@ -0,0 +1,62 @@ +from abc import ABCMeta, abstractmethod +from typing import List, Tuple +from sqlglot import Tokenizer +from sqlglot.tokens import Token + +from lib.query_analysis.validation.base_query_validator import ( + QueryValidationResult, + QueryValidationResultObjectType, + QueryValidationSeverity, +) + + +class BaseSQLGlotValidator(metaclass=ABCMeta): + @property + @abstractmethod + def message(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def severity(self) -> QueryValidationSeverity: + raise NotImplementedError() + + @property + @abstractmethod + def tokenizer(self) -> Tokenizer: + raise NotImplementedError() + + def _tokenize_query(self, query: str) -> List[Token]: + return self.tokenizer.tokenize(query) + + def _get_query_coordinate_by_index(self, query: str, index: int) -> Tuple[int, int]: + rows = query[: index + 1].splitlines(keepends=False) + return len(rows) - 1, len(rows[-1]) - 1 + + def _get_query_validation_result( + self, + query: str, + start_index: int, + end_index: int, + suggestion: str = None, + validation_result_object_type=QueryValidationResultObjectType.LINT, + ): + start_line, start_ch = self._get_query_coordinate_by_index(query, start_index) + end_line, end_ch = self._get_query_coordinate_by_index(query, end_index) + + return QueryValidationResult( + start_line, + start_ch, + self.severity, + self.message, + validation_result_object_type, + end_line=end_line, + end_ch=end_ch, + suggestion=suggestion, + ) + + @abstractmethod + def get_query_validation_results( + self, query: str, raw_tokens: List[Token] = None + ) -> List[QueryValidationResult]: + raise NotImplementedError() diff --git a/querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py b/querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py new file mode 100644 index 000000000..5d726a4ff --- /dev/null +++ b/querybook/server/lib/query_analysis/validation/validators/presto_optimizing_validator.py @@ -0,0 +1,223 @@ +from typing import List +from sqlglot import TokenType, Tokenizer +from sqlglot.dialects import Trino +from sqlglot.tokens import Token + +from lib.query_analysis.validation.base_query_validator import ( + BaseQueryValidator, + QueryValidationResult, + QueryValidationSeverity, +) +from lib.query_analysis.validation.validators.presto_explain_validator import ( + PrestoExplainValidator, +) +from lib.query_analysis.validation.validators.base_sqlglot_validator import ( + BaseSQLGlotValidator, +) + + +class BasePrestoSQLGlotValidator(BaseSQLGlotValidator): + @property + def tokenizer(self) -> Tokenizer: + return Trino.Tokenizer() + + +class UnionAllValidator(BasePrestoSQLGlotValidator): + @property + def message(self): + return "Using UNION ALL instead of UNION will execute faster" + + @property + def severity(self) -> str: + return QueryValidationSeverity.WARNING + + def get_query_validation_results( + self, query: str, raw_tokens: List[Token] = None + ) -> List[QueryValidationResult]: + if raw_tokens is None: + raw_tokens = self._tokenize_query(query) + validation_errors = [] + for i, token in enumerate(raw_tokens): + if token.token_type == TokenType.UNION: + if ( + i < len(raw_tokens) - 1 + and raw_tokens[i + 1].token_type != TokenType.ALL + ): + validation_errors.append( + self._get_query_validation_result( + query, token.start, token.end, "UNION ALL" + ) + ) + return validation_errors + + +class ApproxDistinctValidator(BasePrestoSQLGlotValidator): + @property + def message(self): + return ( + "Using APPROX_DISTINCT(x) instead of COUNT(DISTINCT x) will execute faster" + ) + + @property + def severity(self) -> str: + return QueryValidationSeverity.WARNING + + def get_query_validation_results( + self, query: str, raw_tokens: List[Token] = None + ) -> List[QueryValidationResult]: + if raw_tokens is None: + raw_tokens = self._tokenize_query(query) + + validation_errors = [] + for i, token in enumerate(raw_tokens): + if ( + i < len(raw_tokens) - 2 + and token.token_type == TokenType.VAR + and token.text.lower().strip() == "count" + and raw_tokens[i + 1].token_type == TokenType.L_PAREN + and raw_tokens[i + 2].token_type == TokenType.DISTINCT + ): + validation_errors.append( + self._get_query_validation_result( + query, + token.start, + raw_tokens[i + 2].end, + "APPROX_DISTINCT(", + ) + ) + return validation_errors + + +class RegexpLikeValidator(BasePrestoSQLGlotValidator): + @property + def message(self): + return "Combining multiple LIKEs into one REGEXP_LIKE will execute faster" + + @property + def severity(self) -> str: + return QueryValidationSeverity.WARNING + + def _get_regexp_like_suggestion(self, column_name: str, like_strings: List[str]): + sanitized_like_strings = [ + like_string.strip("\"'") for like_string in like_strings + ] + return f"REGEXP_LIKE({column_name}, '{'|'.join(sanitized_like_strings)}')" + + def get_query_validation_results( + self, query: str, raw_tokens: List[Token] = None + ) -> List[QueryValidationResult]: + if raw_tokens is None: + raw_tokens = self._tokenize_query(query) + + validation_errors = [] + + start_column_token = None + like_strings = [] + token_idx = 0 + while token_idx < len(raw_tokens) - 2: + token_1 = raw_tokens[token_idx] + token_2 = raw_tokens[token_idx + 1] + token_3 = raw_tokens[token_idx + 2] + + # Check if the next set of three tokens matches a "like" phrase (i.e. LIKE ) + if ( + token_1.token_type == TokenType.VAR + and ( + start_column_token is None + or token_1.text == start_column_token.text + ) + and token_2.token_type == TokenType.LIKE + and token_3.token_type == TokenType.STRING + ): + if start_column_token is None: + start_column_token = raw_tokens[token_idx] + like_strings.append(token_3.text) + token_idx += 3 + if ( + token_idx == len(raw_tokens) + or raw_tokens[token_idx].token_type != TokenType.OR + ): # No "OR" token following the phrase, so we cannot combine additional phrases + # Check if there are multiple phrases that can be combined + if len(like_strings) > 1: + validation_errors.append( + self._get_query_validation_result( + query, + start_column_token.start, + raw_tokens[token_idx - 1].end, + suggestion=self._get_regexp_like_suggestion( + start_column_token.text, like_strings + ), + ) + ) + start_column_token = None + like_strings = [] + + # If next tokens do not match the "like" phrase pattern, check if a suggestion can be made if there are previously matched phrases + elif start_column_token is not None: + if ( + len(like_strings) > 1 + ): # Check if a validation suggestion can be created + validation_errors.append( + self._get_query_validation_result( + query, + start_column_token.start, + raw_tokens[token_idx - 1].end, + suggestion=self._get_regexp_like_suggestion( + start_column_token.text, like_strings + ), + ) + ) + start_column_token = None + like_strings = [] + token_idx += 1 + + return validation_errors + + +class PrestoOptimizingValidator(BaseQueryValidator): + def languages(self): + return ["presto", "trino"] + + def _get_explain_validator(self): + return PrestoExplainValidator("") + + def _get_sqlglot_validators(self) -> List[BaseSQLGlotValidator]: + return [ + UnionAllValidator(), + ApproxDistinctValidator(), + RegexpLikeValidator(), + ] + + def _get_sql_glot_validation_results( + self, query: str + ) -> List[QueryValidationResult]: + validation_suggestions = [] + + query_raw_tokens = None + for validator in self._get_sqlglot_validators(): + if query_raw_tokens is None: + query_raw_tokens = validator._tokenize_query(query) + validation_suggestions.extend( + validator.get_query_validation_results( + query, raw_tokens=query_raw_tokens + ) + ) + + return validation_suggestions + + def _get_presto_explain_validation_results( + self, query: str, uid: int, engine_id: int + ) -> List[QueryValidationResult]: + return self._get_explain_validator().validate(query, uid, engine_id) + + def validate( + self, + query: str, + uid: int, + engine_id: int, + ) -> List[QueryValidationResult]: + validation_results = [ + *self._get_presto_explain_validation_results(query, uid, engine_id), + *self._get_sql_glot_validation_results(query), + ] + return validation_results diff --git a/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py index ae0328f17..fc910698c 100644 --- a/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py +++ b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_explain_validator.py @@ -41,11 +41,11 @@ def test_simple(self): 1, statement_start_locations, error_line=0, error_ch=2, error_msg="" ) - self.assertEqual(validation_result.line, 0) - self.assertEqual(validation_result.ch, 12) + self.assertEqual(validation_result.start_line, 0) + self.assertEqual(validation_result.start_ch, 12) validation_result = self._validator._map_statement_error_to_query( 2, statement_start_locations, error_line=0, error_ch=5, error_msg="" ) - self.assertEqual(validation_result.line, 1) - self.assertEqual(validation_result.ch, 5) + self.assertEqual(validation_result.start_line, 1) + self.assertEqual(validation_result.start_ch, 5) diff --git a/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py new file mode 100644 index 000000000..0abb7aa51 --- /dev/null +++ b/querybook/tests/test_lib/test_query_analysis/test_validation/test_validators/test_presto_optimizing_validator.py @@ -0,0 +1,302 @@ +from typing import List +from unittest import TestCase + +from lib.query_analysis.validation.base_query_validator import ( + QueryValidationResult, + QueryValidationResultObjectType, + QueryValidationSeverity, +) +from lib.query_analysis.validation.validators.presto_optimizing_validator import ( + ApproxDistinctValidator, + RegexpLikeValidator, + UnionAllValidator, + PrestoOptimizingValidator, +) + + +class BaseValidatorTestCase(TestCase): + def _verify_query_validation_results( + self, + validation_results: List[QueryValidationResult], + expected_results: List[QueryValidationResult], + ): + self.assertEqual(len(validation_results), len(expected_results)) + for i, validation_result in enumerate(validation_results): + expected_result = expected_results[i] + self.assertEqual(validation_result.to_dict(), expected_result.to_dict()) + + def _get_regexp_like_validation_result( + self, + start_line: int, + start_ch: int, + end_line: int, + end_ch: int, + suggestion: str = None, + ): + return QueryValidationResult( + start_line, + start_ch, + QueryValidationSeverity.WARNING, + "Combining multiple LIKEs into one REGEXP_LIKE will execute faster", + QueryValidationResultObjectType.LINT, + end_line=end_line, + end_ch=end_ch, + suggestion=suggestion, + ) + + def _get_union_all_validation_result( + self, start_line: int, start_ch: int, end_line: int, end_ch: int + ): + return QueryValidationResult( + start_line, + start_ch, + QueryValidationSeverity.WARNING, + "Using UNION ALL instead of UNION will execute faster", + QueryValidationResultObjectType.LINT, + end_line=end_line, + end_ch=end_ch, + suggestion="UNION ALL", + ) + + def _get_approx_distinct_validation_result( + self, start_line: int, start_ch: int, end_line: int, end_ch: int + ): + return QueryValidationResult( + start_line, + start_ch, + QueryValidationSeverity.WARNING, + "Using APPROX_DISTINCT(x) instead of COUNT(DISTINCT x) will execute faster", + QueryValidationResultObjectType.LINT, + end_line=end_line, + end_ch=end_ch, + suggestion="APPROX_DISTINCT(", + ) + + +class UnionAllValidatorTestCase(BaseValidatorTestCase): + def setUp(self): + self._validator = UnionAllValidator() + + def test_basic_union(self): + query = "SELECT * FROM a \nUNION SELECT * FROM b" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_union_all_validation_result( + 1, + 0, + 1, + 4, + ) + ], + ) + + def test_multiple_unions(self): + query = "SELECT * FROM a \nUNION SELECT * FROM b \nUNION SELECT * FROM c" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_union_all_validation_result( + 1, + 0, + 1, + 4, + ), + self._get_union_all_validation_result( + 2, + 0, + 2, + 4, + ), + ], + ) + + def test_union_all(self): + query = "SELECT * FROM a UNION ALL SELECT * FROM b" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), [] + ) + + +class ApproxDistinctValidatorTestCase(BaseValidatorTestCase): + def setUp(self): + self._validator = ApproxDistinctValidator() + + def test_basic_count_distinct(self): + query = "SELECT COUNT(DISTINCT x) FROM a" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [self._get_approx_distinct_validation_result(0, 7, 0, 20)], + ) + + def test_count_not_followed_by_distinct(self): + query = "SELECT \nCOUNT * FROM a" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [], + ) + + def test_multiple_count_distincts(self): + query = ( + "SELECT \nCOUNT(DISTINCT y) FROM a UNION SELECT \nCOUNT(DISTINCT x) FROM b" + ) + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_approx_distinct_validation_result(1, 0, 1, 13), + self._get_approx_distinct_validation_result(2, 0, 2, 13), + ], + ) + + def test_count_distinct_in_where_clause(self): + query = ( + "SELECT \nCOUNT(DISTINCT a), b FROM table_a WHERE \nCOUNT(DISTINCT a) > 10" + ) + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_approx_distinct_validation_result(1, 0, 1, 13), + self._get_approx_distinct_validation_result(2, 0, 2, 13), + ], + ) + + +class RegexpLikeValidatorTestCase(BaseValidatorTestCase): + def setUp(self): + self._validator = RegexpLikeValidator() + + def test_basic_combine_case(self): + query = "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE \n'bar'" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_regexp_like_validation_result( + 1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')" + ) + ], + ) + + def test_and_clause(self): + query = "SELECT * from a WHERE \nx LIKE 'foo%' AND x LIKE \n'%bar'" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [], + ) + + def test_more_than_two_phrases(self): + query = "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE 'bar' OR x LIKE \n'baz'" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_regexp_like_validation_result( + 1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar|baz')" + ) + ], + ) + + def test_different_column_names(self): + query = "SELECT * from a WHERE \nx LIKE 'foo' OR y LIKE 'bar'" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [], + ) + + def test_both_or_and(self): + query = ( + "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE \n'bar' AND y LIKE 'foo'" + ) + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_regexp_like_validation_result( + 1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')" + ) + ], + ) + + def test_multiple_suggestions(self): + query = "SELECT * from a WHERE \nx LIKE 'foo' OR x LIKE \n'bar' AND \ny LIKE 'foo' OR y LIKE \n'bar'" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [ + self._get_regexp_like_validation_result( + 1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')" + ), + self._get_regexp_like_validation_result( + 3, 0, 4, 4, "REGEXP_LIKE(y, 'foo|bar')" + ), + ], + ) + + def test_phrase_not_match(self): + query = "SELECT * from a WHERE x LIKE 'foo' OR x = 'bar'" + self._verify_query_validation_results( + self._validator.get_query_validation_results(query), + [], + ) + + +class PrestoOptimizingValidatorTestCase(BaseValidatorTestCase): + def setUp(self): + self._validator = PrestoOptimizingValidator("") + + def test_union_and_count_distinct(self): + query = "SELECT \nCOUNT( DISTINCT x) from a \nUNION select \ncount(distinct y) from b" + self._verify_query_validation_results( + self._validator._get_sql_glot_validation_results(query), + [ + self._get_union_all_validation_result(2, 0, 2, 4), + self._get_approx_distinct_validation_result(1, 0, 1, 14), + self._get_approx_distinct_validation_result(3, 0, 3, 13), + ], + ) + + def test_union_and_regexp_like(self): + query = "SELECT * from a WHERE \nx like 'foo' or x like \n'bar' \nUNION select * from b where y like 'foo' AND x like 'bar'" + self._verify_query_validation_results( + self._validator._get_sql_glot_validation_results(query), + [ + self._get_union_all_validation_result(3, 0, 3, 4), + self._get_regexp_like_validation_result( + 1, 0, 2, 4, "REGEXP_LIKE(x, 'foo|bar')" + ), + ], + ) + + def test_count_distinct_and_regexp_like(self): + query = "SELECT \nCOUNT( DISTINCT x) from a WHERE \nx LIKE 'foo' or x like \n'bar' and y like 'foo'" + self._verify_query_validation_results( + self._validator._get_sql_glot_validation_results(query), + [ + self._get_approx_distinct_validation_result(1, 0, 1, 14), + self._get_regexp_like_validation_result( + 2, 0, 3, 4, "REGEXP_LIKE(x, 'foo|bar')" + ), + ], + ) + + def test_all_errors(self): + query = "SELECT \nCOUNT( DISTINCT x) from a WHERE \nx LIKE 'foo' or x like \n'bar' and y like 'foo' \nUNION select * from b" + self._verify_query_validation_results( + self._validator._get_sql_glot_validation_results(query), + [ + self._get_union_all_validation_result(4, 0, 4, 4), + self._get_approx_distinct_validation_result(1, 0, 1, 14), + self._get_regexp_like_validation_result( + 2, 0, 3, 4, "REGEXP_LIKE(x, 'foo|bar')" + ), + ], + ) + + def test_extra_whitespace(self): + query = "SELECT \n COUNT( DISTINCT x) from a WHERE \n\t x LIKE 'foo' or x like \n'bar' and y like 'foo' \n UNION select * from b" + self._verify_query_validation_results( + self._validator._get_sql_glot_validation_results(query), + [ + self._get_union_all_validation_result(4, 5, 4, 9), + self._get_approx_distinct_validation_result(1, 2, 1, 16), + self._get_regexp_like_validation_result( + 2, 3, 3, 4, "REGEXP_LIKE(x, 'foo|bar')" + ), + ], + ) diff --git a/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx b/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx index 86ff1e68d..5782b54fd 100644 --- a/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx +++ b/querybook/webapp/components/CodeMirrorTooltip/CodeMirrorTooltip.tsx @@ -9,6 +9,7 @@ import { IStoreState } from 'redux/store/types'; import { FunctionDocumentationTooltip } from './FunctionDocumentationTooltip'; import { TableTooltip } from './TableTooltip'; +import { SuggestionTooltip } from './SuggestionTooltip'; import './CodeMirrorTooltip.scss'; @@ -16,14 +17,24 @@ export interface ICodeMirrorTooltipProps { tableId?: number; functionDocumentations?: IFunctionDescription[]; error?: React.ReactChild; + suggestionText?: string; openTableModal?: () => any; + onAcceptSuggestion?: (suggestion: string) => void; hide: () => any; } export const CodeMirrorTooltip: React.FunctionComponent< ICodeMirrorTooltipProps -> = ({ tableId, hide, functionDocumentations, error, openTableModal }) => { +> = ({ + tableId, + hide, + functionDocumentations, + error, + openTableModal, + suggestionText, + onAcceptSuggestion, +}) => { const { table, schema, columns } = useShallowSelector( (state: IStoreState) => { const tableFromState = state.dataSources.dataTablesById[tableId]; @@ -81,6 +92,13 @@ export const CodeMirrorTooltip: React.FunctionComponent<
{error}
); + } else if (suggestionText && onAcceptSuggestion) { + contentDOM = ( + + ); } return
{contentDOM}
; diff --git a/querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx b/querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx new file mode 100644 index 000000000..302f0039a --- /dev/null +++ b/querybook/webapp/components/CodeMirrorTooltip/SuggestionTooltip.tsx @@ -0,0 +1,43 @@ +import React, { useCallback, useMemo } from 'react'; +import { Button } from 'ui/Button/Button'; + +interface IProps { + suggestionText: string; + onAcceptSuggestion: (suggestion: string) => void; +} + +const SUGGESTION_MAX_LENGTH = 20; + +export const SuggestionTooltip: React.FunctionComponent = ({ + suggestionText, + onAcceptSuggestion, +}) => { + const truncatedSuggestion = useMemo( + () => + suggestionText.length > SUGGESTION_MAX_LENGTH + ? suggestionText.slice(0, SUGGESTION_MAX_LENGTH - 3) + '...' + : suggestionText, + [suggestionText] + ); + + const onClick = useCallback(() => { + onAcceptSuggestion(suggestionText); + }, [onAcceptSuggestion, suggestionText]); + + return ( +
+
+ Replace with {truncatedSuggestion} +
+
+
+
+ ); +}; diff --git a/querybook/webapp/components/QueryEditor/QueryEditor.tsx b/querybook/webapp/components/QueryEditor/QueryEditor.tsx index 78405160f..bc62326e5 100644 --- a/querybook/webapp/components/QueryEditor/QueryEditor.tsx +++ b/querybook/webapp/components/QueryEditor/QueryEditor.tsx @@ -22,6 +22,7 @@ import { import { useAutoComplete } from 'hooks/queryEditor/useAutoComplete'; import { useCodeAnalysis } from 'hooks/queryEditor/useCodeAnalysis'; import { useLint } from 'hooks/queryEditor/useLint'; +import { useDebouncedFn } from 'hooks/useDebouncedFn'; import CodeMirror, { CodeMirrorKeyMap } from 'lib/codemirror'; import { SQL_JINJA_MODE } from 'lib/codemirror/codemirror-mode'; import { @@ -29,12 +30,7 @@ import { ExcludedTriggerKeys, } from 'lib/sql-helper/sql-autocompleter'; import { format, ISQLFormatOptions } from 'lib/sql-helper/sql-formatter'; -import { - ILinterWarning, - IRange, - IToken, - TableToken, -} from 'lib/sql-helper/sql-lexer'; +import { ILinterWarning, IRange, TableToken } from 'lib/sql-helper/sql-lexer'; import { formatNumber } from 'lib/utils/number'; import { navigateWithinEnv } from 'lib/utils/query-string'; import { IconButton } from 'ui/Button/IconButton'; @@ -168,6 +164,14 @@ export const QueryEditor: React.FC< return list.length > 0 ? list[0] : null; }, [queryAnnotations]); + const annotationSuggestions: ILinterWarning[] = useMemo( + () => + queryAnnotations.filter( + (annotation) => annotation.suggestion != null + ), + [queryAnnotations] + ); + const openTableModal = useCallback((tableId: number) => { navigateWithinEnv(`/table/${tableId}/`, { isModal: true, @@ -233,6 +237,26 @@ export const QueryEditor: React.FC< [getTableByName, codeAnalysisRef] ); + const getSuggestionByPosition = useCallback( + (pos: CodeMirror.Position) => { + const currLine = pos.line; + const currCh = pos.ch; + + for (const suggestion of annotationSuggestions) { + if ( + suggestion.from.line <= currLine && + suggestion.to.line >= currLine && + suggestion.from.ch <= currCh && + suggestion.to.ch >= currCh + ) { + return suggestion; + } + } + return null; + }, + [annotationSuggestions] + ); + const onOpenTableModal = useCallback( (editor: CodeMirror.Editor) => { const pos = editor.getDoc().getCursor(); @@ -290,13 +314,10 @@ export const QueryEditor: React.FC< const markTextAndShowTooltip = ( editor: CodeMirror.Editor, - pos: CodeMirror.Position, - token: IToken, + markTextFrom: CodeMirror.Position, + markTextTo: CodeMirror.Position, tooltipProps: Omit ) => { - const markTextFrom = { line: pos.line, ch: token.start }; - const markTextTo = { line: pos.line, ch: token.end }; - markerRef.current = editor .getDoc() .markText(markTextFrom, markTextTo, { @@ -358,56 +379,101 @@ export const QueryEditor: React.FC< [language, functionDocumentationByNameByLanguage] ); - const onTextHover = useMemo( - () => - debounce( - async (editor: CodeMirror.Editor, node, e, pos, token) => { - if ( - markerRef.current == null && - (token.type === 'variable-2' || token.type == null) - ) { - // Check if token is inside a table - const tokenInsideTable = await isTokenInTable( - pos, - token + const onTextHoverLongDebounce = useDebouncedFn( + async (editor: CodeMirror.Editor, node, e, pos, token) => { + if ( + markerRef.current == null && + (token.type === 'variable-2' || token.type == null) + ) { + // Check if token is inside a table + const tokenInsideTable = await isTokenInTable(pos, token); + const markTextFrom = { + line: pos.line, + ch: token.start, + }; + const markTextTo = { + line: pos.line, + ch: token.end, + }; + if (tokenInsideTable) { + const { tableInfo } = tokenInsideTable; + if (tableInfo) { + markTextAndShowTooltip( + editor, + markTextFrom, + markTextTo, + { + tableId: tableInfo.id, + openTableModal: () => + openTableModal(tableInfo.id), + } ); - - if (tokenInsideTable) { - const { tableInfo } = tokenInsideTable; - if (tableInfo) { - markTextAndShowTooltip(editor, pos, token, { - tableId: tableInfo.id, - openTableModal: () => - openTableModal(tableInfo.id), - }); - } else { - markTextAndShowTooltip(editor, pos, token, { - error: 'Table does not exist!', - }); + } else { + markTextAndShowTooltip( + editor, + markTextFrom, + markTextTo, + { + error: 'Table does not exist!', } - } + ); + } + } - const nextChar = editor.getDoc().getLine(pos.line)[ - token.end - ]; - if (nextChar === '(') { - // if it seems like a function call - const functionDef = matchFunctionWithDefinition( - token.string - ); - if (functionDef) { - markTextAndShowTooltip(editor, pos, token, { - functionDocumentations: functionDef, - }); + const nextChar = editor.getDoc().getLine(pos.line)[ + token.end + ]; + if (nextChar === '(') { + // if it seems like a function call + const functionDef = matchFunctionWithDefinition( + token.string + ); + if (functionDef) { + markTextAndShowTooltip( + editor, + markTextFrom, + markTextTo, + { + functionDocumentations: functionDef, } - } + ); } - }, - 600 - ), + } + } + }, + 600, [isTokenInTable, matchFunctionWithDefinition, openTableModal] ); + const onTextHoverShortDebounce = useDebouncedFn( + (editor: CodeMirror.Editor, node, e, pos, _token) => { + if (markerRef.current == null) { + const suggestionAnnotation = getSuggestionByPosition(pos); + if (suggestionAnnotation != null) { + const { suggestion, from, to } = suggestionAnnotation; + markTextAndShowTooltip(editor, from, to, { + onAcceptSuggestion: (suggestion: string) => + editor.replaceRange(suggestion, from, to), + suggestionText: suggestion, + }); + } + } + }, + 100, + [getSuggestionByPosition] + ); + + const onTextHover = useCallback( + async (editor: CodeMirror.Editor, node, e, pos, token) => { + // Debounce asynchronous checks with a longer delay (e.g. for requesting table metadata) + onTextHoverLongDebounce(editor, node, e, pos, token); + + // Faster checks use a shorter delay + onTextHoverShortDebounce(editor, node, e, pos, token); + }, + [onTextHoverLongDebounce, onTextHoverShortDebounce] + ); + const showAutoCompletion = useMemo( () => debounce((editor: CodeMirror.Editor) => { diff --git a/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx b/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx index f572e2ba6..e6738daba 100644 --- a/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx +++ b/querybook/webapp/components/TemplateQueryView/TemplatedQueryView.tsx @@ -60,12 +60,18 @@ function useValidateQuery( .map((error) => { const token = tokens.find( (token) => - token.line === error.line && token.start === error.ch + token.line === error.start_line && + token.start === error.start_ch ); if (token) { return { - from: queryPositions[token.line] + token.start, - to: queryPositions[token.line] + token.end, + from: queryPositions[error.start_line] + error.start_ch, + to: + error.end_line && error.end_ch + ? queryPositions[error.end_line] + + error.end_ch + + 1 + : queryPositions[token.line] + token.end, className: 'code-highlight-red', }; } @@ -145,7 +151,8 @@ export const TemplatedQueryView: React.FC = ({ const errorsDOM = queryValidationErrors.map((err, i) => (

- Line: {err.line} Ch: {err.ch}, Message: {err.message} + Line: {err.start_line} Ch: {err.start_ch}, Message:{' '} + {err.message}

)); diff --git a/querybook/webapp/const/queryExecution.ts b/querybook/webapp/const/queryExecution.ts index 9b1f632bf..28344278c 100644 --- a/querybook/webapp/const/queryExecution.ts +++ b/querybook/webapp/const/queryExecution.ts @@ -145,9 +145,12 @@ export interface IQueryExecutionNotification { } export interface IQueryValidationResult { - line: number; - ch: number; + start_line: number; + start_ch: number; + end_line: number | null; + end_ch: number | null; message: string; severity: string; type: string; + suggestion: string | null; } diff --git a/querybook/webapp/hooks/useDebouncedFn.ts b/querybook/webapp/hooks/useDebouncedFn.ts new file mode 100644 index 000000000..9d11d3855 --- /dev/null +++ b/querybook/webapp/hooks/useDebouncedFn.ts @@ -0,0 +1,10 @@ +import { useMemo } from 'react'; +import { debounce } from 'lodash'; + +export function useDebouncedFn any>( + fn: F, + timeout: number, + deps: any[] +) { + return useMemo(() => debounce(fn, timeout), deps); +} diff --git a/querybook/webapp/lib/codemirror/codemirror-lint.ts b/querybook/webapp/lib/codemirror/codemirror-lint.ts index 7af84efc2..5de4b53fc 100644 --- a/querybook/webapp/lib/codemirror/codemirror-lint.ts +++ b/querybook/webapp/lib/codemirror/codemirror-lint.ts @@ -17,7 +17,16 @@ export function createSQLLinter( ); return validationResults.map((validationError) => { - const { type, line, ch, severity, message } = validationError; + const { + type, + start_line: line, + start_ch: ch, + end_line: endLine, + end_ch: endCh, + severity, + message, + suggestion, + } = validationError; const errorToken = cm.getTokenAt({ line, @@ -32,12 +41,13 @@ export function createSQLLinter( line, }, to: { - ch: errorToken.end, - line, + ch: endCh != null ? endCh + 1 : errorToken.end, + line: endLine ?? line, }, severity, message, type, + suggestion, } as ILinterWarning; } else { return { @@ -52,6 +62,7 @@ export function createSQLLinter( severity, message, type, + suggestion, } as ILinterWarning; } }); diff --git a/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts b/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts index 0dbbd7832..c43cb7b1b 100644 --- a/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts +++ b/querybook/webapp/lib/sql-helper/sql-context-free-linter.ts @@ -56,6 +56,7 @@ const contextFreeLinterWarningsByLanguage: Record< line: distinct.line, ch: distinct.end, }, + suggestion: null, }); } } @@ -98,6 +99,7 @@ const contextFreeLinterWarningsByLanguage: Record< line: firstToken.line, ch: firstToken.end, }, + suggestion: null, }); } } diff --git a/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts b/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts index 69f5247d0..5d7703165 100644 --- a/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts +++ b/querybook/webapp/lib/sql-helper/sql-context-sensitive-linter.ts @@ -33,6 +33,7 @@ export function getContextSensitiveWarnings( line: table.line, ch: table.end, }, + suggestion: null, }); } } else { @@ -71,6 +72,7 @@ export function getContextSensitiveWarnings( line: table.line, ch: table.end, }, + suggestion: null, }); } } diff --git a/querybook/webapp/lib/sql-helper/sql-lexer.ts b/querybook/webapp/lib/sql-helper/sql-lexer.ts index fa9cff7c7..d83ce80f5 100644 --- a/querybook/webapp/lib/sql-helper/sql-lexer.ts +++ b/querybook/webapp/lib/sql-helper/sql-lexer.ts @@ -177,6 +177,7 @@ export interface ILinterWarning extends IRange { type: 'lint' | 'general'; message: string; severity: 'error' | 'warning' | 'info'; + suggestion: string | null; } export interface ILineage { diff --git a/requirements/parser/sqlglot.txt b/requirements/parser/sqlglot.txt index 6989f97a4..1f816ef36 100644 --- a/requirements/parser/sqlglot.txt +++ b/requirements/parser/sqlglot.txt @@ -1 +1 @@ -sqlglot==5.1.3 +sqlglot==17.8.0 diff --git a/requirements/test.txt b/requirements/test.txt index e407f751f..397765b37 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -6,3 +6,4 @@ moto==2.2.4 -r engine/hive.txt -r metastore/glue.txt -r exporter/gspread.txt +-r parser/sqlglot.txt