Skip to content

Commit

Permalink
chore: add historical data
Browse files Browse the repository at this point in the history
  • Loading branch information
d116626 committed Jul 9, 2024
1 parent e9a0f57 commit b368df3
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 18 deletions.
6 changes: 2 additions & 4 deletions pipelines/ocr_radar/dump_db_radar/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
)

from pipelines.constants import constants
from pipelines.ocr_radar.dump_db_radar.schedules import (
ocr_radar_monthly_update_schedule,
)
from pipelines.ocr_radar.dump_db_radar.schedules import ocr_radar_schedule

dump_sql_ocr_radar_flow = deepcopy(dump_sql_flow)
dump_sql_ocr_radar_flow.state_handlers = [handler_inject_bd_credentials, handler_initialize_sentry]
Expand All @@ -42,4 +40,4 @@
dump_sql_ocr_radar_flow, default_parameters=ocr_radar_default_parameters
)

dump_sql_ocr_radar_flow.schedule = ocr_radar_monthly_update_schedule
dump_sql_ocr_radar_flow.schedule = ocr_radar_schedule
150 changes: 141 additions & 9 deletions pipelines/ocr_radar/dump_db_radar/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,154 @@
#
#####################################

ocr_radar_queries = {
# automatic generated using https://jupyter.dados.rio/lab/tree/bases/rj-cetrio/ocr_radar/generate_queries.ipynb
ocr_radar_equipamento_queries = {
"equipamento": {
"materialize_after_dump": True,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "overwrite",
"execute_query": """
SELECT
*
FROM [DBOCR_2024].[dbo].[Equipamento]
""",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[Equipamento]",
"interval": timedelta(days=7),
},
}

ocr_radar_equipamento_clocks = generate_dump_db_schedules(
interval=timedelta(days=7),
start_date=datetime(2024, 7, 9, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
labels=[
constants.RJ_CETRIO_AGENT_LABEL.value,
],
db_database="DBOCR_2024",
db_host="10.39.64.50",
db_port="1433",
db_type="sql_server",
dataset_id="ocr_radar",
infisical_secret_path="/db-ocr-radar",
table_parameters=ocr_radar_equipamento_queries,
)

ocr_radar_queries = {
"readings_2024_01": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_012024]",
},
"readings_2024_02": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_022024]",
},
"readings_2024_03": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_032024]",
},
"readings_2024_04": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_042024]",
},
"readings_2024_05": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_052024]",
},
"readings_2024_06": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_062024]",
},
"readings_2024_07": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_072024]",
"start_date": datetime(2024, 8, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
},
"readings_2024_08": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_082024]",
"start_date": datetime(2024, 9, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
},
"readings_2024_09": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_092024]",
"start_date": datetime(2024, 10, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
},
"readings_2024_10": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_102024]",
"start_date": datetime(2024, 11, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
},
"readings_2024_11": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_112024]",
"start_date": datetime(2024, 12, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
},
"readings_2024_12": {
"partition_columns": "Data",
"partition_date_format": "%Y-%m-%d",
"materialize_after_dump": False,
"biglake_table": True,
"materialization_mode": "prod",
"dump_mode": "append",
"execute_query": "SELECT * FROM [DBOCR_2024].[dbo].[OCR_122024]",
"start_date": datetime(2025, 1, 2, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
},
}

ocr_radar_clocks = generate_dump_db_schedules(
interval=timedelta(days=100),
start_date=datetime(2022, 11, 9, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
interval=timedelta(days=365 * 5),
start_date=datetime(2024, 7, 9, 22, 30, tzinfo=pytz.timezone("America/Sao_Paulo")),
runs_interval_minutes=7 * 60 * 24,
labels=[
constants.RJ_CETRIO_AGENT_LABEL.value,
],
Expand All @@ -48,4 +179,5 @@
table_parameters=ocr_radar_queries,
)

ocr_radar_monthly_update_schedule = Schedule(clocks=untuple(ocr_radar_clocks))

ocr_radar_schedule = Schedule(clocks=untuple(ocr_radar_clocks + ocr_radar_equipamento_clocks))
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ python = ">=3.10,<3.11"
dbt-bigquery = "^1.6.1"
google-cloud-storage = "^2.10.0"
prefect = "1.4.1"
prefeitura-rio = { git = "https://github.com/prefeitura-rio/prefeitura-rio", rev = "6ece88c9daca181336c56789dbec0688d21c4c74", extras = [
prefeitura-rio = { git = "https://github.com/prefeitura-rio/prefeitura-rio", rev = "38fda48678d515f4977b8b9fadc38fd4650094d3", extras = [
"pipelines",
"pipelines-templates",
"actions",
Expand Down
1 change: 1 addition & 0 deletions queries/models/ocr_radar/equipamento.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ SELECT
CAST(longitude AS FLOAT64) as longitude,
CAST(logradouro AS STRING) as logradouro,
CAST(sentido AS STRING) as sentido
CAST(sentido AS INT64) as velofisc
FROM `rj-cetrio.ocr_radar_staging.equipamento`

0 comments on commit b368df3

Please sign in to comment.