Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest/redshift): fix syntax error in temp sql #11661

Merged
merged 1 commit into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,25 +40,25 @@ def report_redshift_failure(
error_message = str(e).lower()
if "permission denied" in error_message:
if "svv_table_info" in error_message:
report.report_failure(
report.failure(
title="Permission denied",
message="Failed to extract metadata due to insufficient permission to access 'svv_table_info' table. Please ensure the provided database user has access.",
exc=e,
)
elif "svl_user_info" in error_message:
report.report_failure(
report.failure(
title="Permission denied",
message="Failed to extract metadata due to insufficient permission to access 'svl_user_info' table. Please ensure the provided database user has access.",
exc=e,
)
else:
report.report_failure(
report.failure(
title="Permission denied",
message="Failed to extract metadata due to insufficient permissions.",
exc=e,
)
else:
report.report_failure(
report.failure(
title="Failed to extract some metadata",
message="Failed to extract some metadata from Redshift.",
exc=e,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -569,8 +569,7 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str:

end_time_str: str = end_time.strftime(redshift_datetime_format)

return rf"""\
-- DataHub Redshift Source temp table DDL query
return rf"""-- DataHub Redshift Source temp table DDL query
select
*
from (
Expand Down Expand Up @@ -645,7 +644,7 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str:
)
where
rn = 1
"""
"""

# Add this join to the sql query for more metrics on completed queries
# LEFT JOIN svl_query_metrics_summary sqms ON ss.query = sqms.query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def mock_stl_insert_table_cursor(cursor: MagicMock) -> None:

query_vs_cursor_mocker = {
(
"\\\n-- DataHub Redshift Source temp table DDL query\nselect\n *\nfrom (\n select\n session_id,\n transaction_id,\n start_time,\n userid,\n REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\\\\\n','\\\\n'), '(CREATE(?:[\\\\n\\\\s\\\\t]+(?:temp|temporary))?(?:[\\\\n\\\\s\\\\t]+)table(?:[\\\\n\\\\s\\\\t]+)[^\\\\n\\\\s\\\\t()-]+)', 0, 1, 'ipe'),'[\\\\n\\\\s\\\\t]+',' ',1,'p') as create_command,\n query_text,\n row_number() over (\n partition by session_id, TRIM(query_text)\n order by start_time desc\n ) rn\n from (\n select\n pid as session_id,\n xid as transaction_id,\n starttime as start_time,\n type,\n query_text,\n userid\n from (\n select\n starttime,\n pid,\n xid,\n type,\n userid,\n LISTAGG(case\n when LEN(RTRIM(text)) = 0 then text\n else RTRIM(text)\n end,\n '') within group (\n order by sequence\n ) as query_text\n from\n SVL_STATEMENTTEXT\n where\n type in ('DDL', 'QUERY')\n AND starttime >= '2024-01-01 12:00:00'\n AND starttime < '2024-01-10 12:00:00'\n AND sequence < 290\n group by\n starttime,\n pid,\n xid,\n type,\n userid\n order by\n starttime,\n pid,\n xid,\n type,\n userid\n asc\n )\n where\n type in ('DDL', 'QUERY')\n )\n where\n (create_command ilike 'create temp table %'\n or create_command ilike 'create temporary table %'\n -- we want to get all the create table statements and not just temp tables if non temp table is created and dropped in the same transaction\n or create_command ilike 'create table %')\n -- Redshift creates temp tables with the following names: volt_tt_%. We need to filter them out.\n and query_text not ilike 'CREATE TEMP TABLE volt_tt_%'\n and create_command not like 'CREATE TEMP TABLE volt_tt_'\n -- We need to filter out our query and it was not possible earlier when we did not have any comment in the query\n and query_text not ilike '%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext%'\n\n)\nwhere\n rn = 1\n "
"-- DataHub Redshift Source temp table DDL query\nselect\n *\nfrom (\n select\n session_id,\n transaction_id,\n start_time,\n userid,\n REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\\\\\n','\\\\n'), '(CREATE(?:[\\\\n\\\\s\\\\t]+(?:temp|temporary))?(?:[\\\\n\\\\s\\\\t]+)table(?:[\\\\n\\\\s\\\\t]+)[^\\\\n\\\\s\\\\t()-]+)', 0, 1, 'ipe'),'[\\\\n\\\\s\\\\t]+',' ',1,'p') as create_command,\n query_text,\n row_number() over (\n partition by session_id, TRIM(query_text)\n order by start_time desc\n ) rn\n from (\n select\n pid as session_id,\n xid as transaction_id,\n starttime as start_time,\n type,\n query_text,\n userid\n from (\n select\n starttime,\n pid,\n xid,\n type,\n userid,\n LISTAGG(case\n when LEN(RTRIM(text)) = 0 then text\n else RTRIM(text)\n end,\n '') within group (\n order by sequence\n ) as query_text\n from\n SVL_STATEMENTTEXT\n where\n type in ('DDL', 'QUERY')\n AND starttime >= '2024-01-01 12:00:00'\n AND starttime < '2024-01-10 12:00:00'\n AND sequence < 290\n group by\n starttime,\n pid,\n xid,\n type,\n userid\n order by\n starttime,\n pid,\n xid,\n type,\n userid\n asc\n )\n where\n type in ('DDL', 'QUERY')\n )\n where\n (create_command ilike 'create temp table %'\n or create_command ilike 'create temporary table %'\n -- we want to get all the create table statements and not just temp tables if non temp table is created and dropped in the same transaction\n or create_command ilike 'create table %')\n -- Redshift creates temp tables with the following names: volt_tt_%. We need to filter them out.\n and query_text not ilike 'CREATE TEMP TABLE volt_tt_%'\n and create_command not like 'CREATE TEMP TABLE volt_tt_'\n -- We need to filter out our query and it was not possible earlier when we did not have any comment in the query\n and query_text not ilike '%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext%'\n\n)\nwhere\n rn = 1\n"
): mock_temp_table_cursor,
"select * from test_collapse_temp_lineage": mock_stl_insert_table_cursor,
}
Expand Down
Loading