diff --git a/pipelines/ocr_radar/dump_db_radar/flows.py b/pipelines/ocr_radar/dump_db_radar/flows.py index 8105a7f..7c1c661 100644 --- a/pipelines/ocr_radar/dump_db_radar/flows.py +++ b/pipelines/ocr_radar/dump_db_radar/flows.py @@ -15,9 +15,7 @@ ) from pipelines.constants import constants -from pipelines.ocr_radar.dump_db_radar.schedules import ( - ocr_radar_monthly_update_schedule, -) +from pipelines.ocr_radar.dump_db_radar.schedules import ocr_radar_schedule dump_sql_ocr_radar_flow = deepcopy(dump_sql_flow) dump_sql_ocr_radar_flow.state_handlers = [handler_inject_bd_credentials, handler_initialize_sentry] @@ -42,4 +40,4 @@ dump_sql_ocr_radar_flow, default_parameters=ocr_radar_default_parameters ) -dump_sql_ocr_radar_flow.schedule = ocr_radar_monthly_update_schedule +dump_sql_ocr_radar_flow.schedule = ocr_radar_schedule diff --git a/pipelines/ocr_radar/dump_db_radar/schedules.py b/pipelines/ocr_radar/dump_db_radar/schedules.py index 81608ef..68883ed 100644 --- a/pipelines/ocr_radar/dump_db_radar/schedules.py +++ b/pipelines/ocr_radar/dump_db_radar/schedules.py @@ -19,23 +19,154 @@ # ##################################### -ocr_radar_queries = { +# automatic generated using https://jupyter.dados.rio/lab/tree/bases/rj-cetrio/ocr_radar/generate_queries.ipynb +ocr_radar_equipamento_queries = { "equipamento": { "materialize_after_dump": True, "biglake_table": True, "materialization_mode": "prod", "dump_mode": "overwrite", - "execute_query": """ - SELECT - * - FROM [DBOCR_2024].[dbo].[Equipamento] - """, + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[Equipamento]", + "interval": timedelta(days=7), + }, +} + +ocr_radar_equipamento_clocks = generate_dump_db_schedules( + interval=timedelta(days=7), + start_date=datetime(2024, 7, 9, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + labels=[ + constants.RJ_CETRIO_AGENT_LABEL.value, + ], + db_database="DBOCR_2024", + db_host="10.39.64.50", + db_port="1433", + db_type="sql_server", + dataset_id="ocr_radar", + infisical_secret_path="/db-ocr-radar", + table_parameters=ocr_radar_equipamento_queries, +) + +ocr_radar_queries = { + "readings_2024_01": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_012024]", + }, + "readings_2024_02": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_022024]", + }, + "readings_2024_03": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_032024]", + }, + "readings_2024_04": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_042024]", + }, + "readings_2024_05": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_052024]", + }, + "readings_2024_06": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_062024]", + }, + "readings_2024_07": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_072024]", + "start_date": datetime(2024, 8, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + }, + "readings_2024_08": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_082024]", + "start_date": datetime(2024, 9, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + }, + "readings_2024_09": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_092024]", + "start_date": datetime(2024, 10, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + }, + "readings_2024_10": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_102024]", + "start_date": datetime(2024, 11, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + }, + "readings_2024_11": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_112024]", + "start_date": datetime(2024, 12, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + }, + "readings_2024_12": { + "partition_columns": "Data", + "partition_date_format": "%Y-%m-%d", + "materialize_after_dump": False, + "biglake_table": True, + "materialization_mode": "prod", + "dump_mode": "append", + "execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_122024]", + "start_date": datetime(2025, 1, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), }, } ocr_radar_clocks = generate_dump_db_schedules( - interval=timedelta(days=100), - start_date=datetime(2022, 11, 9, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + interval=timedelta(days=365 * 5), + start_date=datetime(2024, 7, 9, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")), + runs_interval_minutes=7 * 60 * 24, labels=[ constants.RJ_CETRIO_AGENT_LABEL.value, ], @@ -48,4 +179,5 @@ table_parameters=ocr_radar_queries, ) -ocr_radar_monthly_update_schedule = Schedule(clocks=untuple(ocr_radar_clocks)) + +ocr_radar_schedule = Schedule(clocks=untuple(ocr_radar_clocks + ocr_radar_equipamento_clocks)) diff --git a/poetry.lock b/poetry.lock index bbdb969..e472f45 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "agate" @@ -2647,8 +2647,8 @@ sgrc = ["aiohttp (>=3.8.5,<4.0.0)", "pendulum (>=2.1.2,<3.0.0)", "pytz (>=2023.3 [package.source] type = "git" url = "https://github.com/prefeitura-rio/prefeitura-rio" -reference = "6ece88c9daca181336c56789dbec0688d21c4c74" -resolved_reference = "6ece88c9daca181336c56789dbec0688d21c4c74" +reference = "38fda48678d515f4977b8b9fadc38fd4650094d3" +resolved_reference = "38fda48678d515f4977b8b9fadc38fd4650094d3" [[package]] name = "proto-plus" @@ -4250,4 +4250,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11" -content-hash = "48dc67496b48b552dc2903c7938bd54e27d5b7a801ae63b2cec90c6d1e982524" +content-hash = "704271bbdab355d17c2342993bf0bfbb5c4cf343a0d29ca620e3a63193e3cf93" diff --git a/pyproject.toml b/pyproject.toml index 7a3923e..7f0848a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ python = ">=3.10,<3.11" dbt-bigquery = "^1.6.1" google-cloud-storage = "^2.10.0" prefect = "1.4.1" -prefeitura-rio = { git = "https://github.com/prefeitura-rio/prefeitura-rio", rev = "6ece88c9daca181336c56789dbec0688d21c4c74", extras = [ +prefeitura-rio = { git = "https://github.com/prefeitura-rio/prefeitura-rio", rev = "38fda48678d515f4977b8b9fadc38fd4650094d3", extras = [ "pipelines", "pipelines-templates", "actions", diff --git a/queries/models/ocr_radar/equipamento.sql b/queries/models/ocr_radar/equipamento.sql index a388dbc..6efc6b0 100644 --- a/queries/models/ocr_radar/equipamento.sql +++ b/queries/models/ocr_radar/equipamento.sql @@ -6,4 +6,5 @@ SELECT CAST(longitude AS FLOAT64) as longitude, CAST(logradouro AS STRING) as logradouro, CAST(sentido AS STRING) as sentido + CAST(sentido AS INT64) as velofisc FROM `rj-cetrio.ocr_radar_staging.equipamento` \ No newline at end of file