From a897954afcef35d39feef3a4760a21b999cbf2fd Mon Sep 17 00:00:00 2001 From: Mayur Singal <39544459+ulixius9@users.noreply.github.com> Date: Mon, 8 May 2023 15:18:51 +0530 Subject: [PATCH] Improve filtering for lineage query (#11457) --- .../ingestion/source/database/bigquery/lineage.py | 5 ++++- .../ingestion/source/database/clickhouse/lineage.py | 5 ++++- .../metadata/ingestion/source/database/mssql/lineage.py | 9 ++++++++- .../ingestion/source/database/postgres/lineage.py | 4 +++- .../ingestion/source/database/redshift/lineage.py | 6 ++++-- .../metadata/ingestion/source/database/redshift/usage.py | 8 ++++---- .../ingestion/source/database/snowflake/lineage.py | 5 ++++- .../ingestion/source/database/vertica/lineage.py | 8 +++++++- 8 files changed, 38 insertions(+), 12 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/bigquery/lineage.py b/ingestion/src/metadata/ingestion/source/database/bigquery/lineage.py index 51fd0eff82e2..884528930a0c 100644 --- a/ingestion/src/metadata/ingestion/source/database/bigquery/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/bigquery/lineage.py @@ -27,5 +27,8 @@ class BigqueryLineageSource(BigqueryQueryParserSource, LineageSource): sql_stmt = BIGQUERY_STATEMENT filters = """ - AND statement_type IN ("INSERT", "MERGE", "CREATE_TABLE_AS_SELECT", "UPDATE") + AND ( + statement_type IN ("MERGE", "CREATE_TABLE_AS_SELECT", "UPDATE") + OR (statement_type = "INSERT" and UPPER(query) like '%%INSERT%%INTO%%SELECT%%') + ) """ diff --git a/ingestion/src/metadata/ingestion/source/database/clickhouse/lineage.py b/ingestion/src/metadata/ingestion/source/database/clickhouse/lineage.py index 1bca5462e5b8..0871c0e3984e 100644 --- a/ingestion/src/metadata/ingestion/source/database/clickhouse/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/clickhouse/lineage.py @@ -29,7 +29,10 @@ class ClickhouseLineageSource(ClickhouseQueryParserSource, LineageSource): sql_stmt = CLICKHOUSE_SQL_STATEMENT filters = """ - and query_kind in ('Create', 'Insert') + and ( + query_kind='Create' + or (query_kind='Insert' and query ilike '%%insert%%into%%select%%') + ) """ database_field = "" diff --git a/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py b/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py index 86069d0fa69e..08d93166dea2 100644 --- a/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py @@ -19,4 +19,11 @@ class MssqlLineageSource(MssqlQueryParserSource, LineageSource): sql_stmt = MSSQL_SQL_STATEMENT - filters = "" # No filtering in the queries + filters = """ + AND ( + lower(t.text) LIKE '%%select%%into%%' + OR lower(t.text) LIKE '%%insert%%into%%select%%' + OR lower(t.text) LIKE '%%update%%' + OR lower(t.text) LIKE '%%merge%%' + ) + """ diff --git a/ingestion/src/metadata/ingestion/source/database/postgres/lineage.py b/ingestion/src/metadata/ingestion/source/database/postgres/lineage.py index afc36491f4ef..4dd420186807 100644 --- a/ingestion/src/metadata/ingestion/source/database/postgres/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/postgres/lineage.py @@ -34,7 +34,9 @@ class PostgresLineageSource(PostgresQueryParserSource, LineageSource): filters = """ AND ( s.query ILIKE '%%create table%%as%%select%%' - OR s.query ILIKE '%%insert%%' + OR s.query ILIKE '%%insert%%into%%select%%' + OR s.query ILIKE '%%update%%' + OR s.query ILIKE '%%merge%%' ) """ diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/lineage.py b/ingestion/src/metadata/ingestion/source/database/redshift/lineage.py index a6da6c270994..d496cb1768c8 100644 --- a/ingestion/src/metadata/ingestion/source/database/redshift/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/redshift/lineage.py @@ -40,8 +40,10 @@ class RedshiftLineageSource(RedshiftQueryParserSource, LineageSource): filters = """ AND ( - querytxt ILIKE '%%create table%%as%%select%%' - OR querytxt ILIKE '%%insert%%' + querytxt ILIKE '%%create%%table%%as%%select%%' + OR querytxt ILIKE '%%insert%%into%%select%%' + OR querytxt ILIKE '%%update%%' + OR querytxt ILIKE '%%merge%%' ) """ diff --git a/ingestion/src/metadata/ingestion/source/database/redshift/usage.py b/ingestion/src/metadata/ingestion/source/database/redshift/usage.py index 781779cec1d4..5b790d1f2c33 100644 --- a/ingestion/src/metadata/ingestion/source/database/redshift/usage.py +++ b/ingestion/src/metadata/ingestion/source/database/redshift/usage.py @@ -20,10 +20,10 @@ class RedshiftUsageSource(RedshiftQueryParserSource, UsageSource): filters = """ - AND querytxt NOT ILIKE 'fetch %%' - AND querytxt NOT ILIKE 'padb_fetch_sample: %%' - AND querytxt NOT ILIKE 'Undoing %% transactions on table %% with current xid%%' - AND querytxt NOT ILIKE '%%create table%%as%%select%%' + AND querytxt NOT ILIKE 'fetch%%' + AND querytxt NOT ILIKE 'padb_fetch_sample:%%' + AND querytxt NOT ILIKE 'Undoing%%transactions%%on%%table%%with%%current%%xid%%' + AND querytxt NOT ILIKE '%%create%%table%%as%%select%%' AND querytxt NOT ILIKE '%%insert%%' """ diff --git a/ingestion/src/metadata/ingestion/source/database/snowflake/lineage.py b/ingestion/src/metadata/ingestion/source/database/snowflake/lineage.py index 11c2c5f1cb67..e4e8ea085109 100644 --- a/ingestion/src/metadata/ingestion/source/database/snowflake/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/snowflake/lineage.py @@ -27,5 +27,8 @@ class SnowflakeLineageSource(SnowflakeQueryParserSource, LineageSource): sql_stmt = SNOWFLAKE_SQL_STATEMENT filters = """ - AND QUERY_TYPE IN ('INSERT', 'MERGE', 'UPDATE','CREATE_TABLE_AS_SELECT') + AND ( + QUERY_TYPE IN ('MERGE', 'UPDATE','CREATE_TABLE_AS_SELECT') + OR (QUERY_TYPE = 'INSERT' and query_text ILIKE '%%insert%%into%%select%%') + ) """ diff --git a/ingestion/src/metadata/ingestion/source/database/vertica/lineage.py b/ingestion/src/metadata/ingestion/source/database/vertica/lineage.py index 8b3d3996d8a9..89108542b263 100644 --- a/ingestion/src/metadata/ingestion/source/database/vertica/lineage.py +++ b/ingestion/src/metadata/ingestion/source/database/vertica/lineage.py @@ -24,7 +24,13 @@ class VerticaLineageSource(VerticaQueryParserSource, LineageSource): sql_stmt = VERTICA_SQL_STATEMENT - filters = "AND query_type in ('INSERT', 'UPDATE', 'QUERY', 'DDL')" + filters = """ + AND ( + query_type in ('UPDATE', 'DDL') + OR ( query_type IN ('INSERT','QUERY') and p.query ilike '%%INSERT%%INTO%%SELECT%%') + OR ( query_type = 'QUERY' and p.query not ilike '%%INSERT%%INTO%%') + ) + """ database_field = "DBNAME()"