-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from prefeitura-rio/staging/migrate_smfp_flows
Staging/migrate smfp flows
- Loading branch information
Showing
8 changed files
with
197 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# -*- coding: utf-8 -*- | ||
from pipelines.receita_federal_cnpj.dump_db_porte_empresa.flows import * # noqa |
Empty file.
44 changes: 44 additions & 0 deletions
44
pipelines/receita_federal_cnpj/dump_db_porte_empresa/flows.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Database dumping flows | ||
""" | ||
|
||
from copy import deepcopy | ||
|
||
from prefect.run_configs import KubernetesRun | ||
from prefect.storage import GCS | ||
from prefeitura_rio.pipelines_templates.dump_db.flows import flow as dump_sql_flow | ||
from prefeitura_rio.pipelines_utils.prefect import set_default_parameters | ||
from prefeitura_rio.pipelines_utils.state_handlers import ( | ||
handler_initialize_sentry, | ||
handler_inject_bd_credentials, | ||
) | ||
|
||
from pipelines.constants import constants | ||
from pipelines.receita_federal_cnpj.dump_db_porte_empresa.schedules import ( | ||
porte_empresa_schedule, | ||
) | ||
|
||
porte_empresa_flow = deepcopy(dump_sql_flow) | ||
porte_empresa_flow.name = "SMFP: CNPJ porte_empresa - Ingerir tabelas de banco SQL" | ||
porte_empresa_flow.state_handlers = [handler_inject_bd_credentials, handler_initialize_sentry] | ||
porte_empresa_flow.storage = GCS(constants.GCS_FLOWS_BUCKET.value) | ||
porte_empresa_flow.run_config = KubernetesRun( | ||
image=constants.DOCKER_IMAGE.value, | ||
labels=[ | ||
constants.RJ_SMFP_AGENT_LABEL.value, | ||
], | ||
) | ||
porte_empresa_default_parameters = { | ||
"db_database": "SDI", | ||
"db_host": "10.70.1.34", | ||
"db_port": "1433", | ||
"db_type": "sql_server", | ||
"dataset_id": "porte_empresa", | ||
"infisical_secret_path": "/db-porte-empresa", | ||
} | ||
porte_empresa_flow = set_default_parameters( | ||
porte_empresa_flow, default_parameters=porte_empresa_default_parameters | ||
) | ||
|
||
porte_empresa_flow.schedule = porte_empresa_schedule |
54 changes: 54 additions & 0 deletions
54
pipelines/receita_federal_cnpj/dump_db_porte_empresa/schedules.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Schedules for the database dump pipeline | ||
""" | ||
|
||
from datetime import datetime, timedelta | ||
|
||
import pytz | ||
from prefect.schedules import Schedule | ||
from prefeitura_rio.pipelines_utils.io import untuple_clocks as untuple | ||
from prefeitura_rio.pipelines_utils.prefect import generate_dump_db_schedules | ||
|
||
from pipelines.constants import constants | ||
|
||
##################################### | ||
# | ||
# Inadimplente Schedules | ||
# | ||
##################################### | ||
|
||
porte_empresa_queries = { | ||
"situacao_cadastral": { | ||
"materialize_after_dump": True, | ||
"biglake_table": True, | ||
"materialization_mode": "prod", | ||
"partition_columns": "dt_SituacaoCadastral", | ||
"partition_date_format": "%Y-%m-%d", | ||
"dump_mode": "append", | ||
"lower_bound_date": "current_month", | ||
"execute_query": """ | ||
SELECT | ||
CNPJ_basico, CNPJ_ordem, CNPJ_dv, RazaoSocial, | ||
cd_PorteEmpresa, cd_SituacaoCadastral, dt_SituacaoCadastral | ||
FROM SDI.ReceitaFederal.Vw_PorteEmpresa_Sigma | ||
""", | ||
} | ||
} | ||
|
||
porte_empresa_clocks = generate_dump_db_schedules( | ||
interval=timedelta(days=7), | ||
start_date=datetime(2022, 10, 30, 23, 0, tzinfo=pytz.timezone("America/Sao_Paulo")), | ||
labels=[ | ||
constants.RJ_SMFP_AGENT_LABEL.value, | ||
], | ||
db_database="SDI", | ||
db_host="10.70.1.34", | ||
db_port="1433", | ||
db_type="sql_server", | ||
dataset_id="porte_empresa", | ||
infisical_secret_path="/db-porte-empresa", | ||
table_parameters=porte_empresa_queries, | ||
) | ||
|
||
porte_empresa_schedule = Schedule(clocks=untuple(porte_empresa_clocks)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{{ | ||
config( | ||
materialized='table', | ||
partition_by={ | ||
"field": "data_particao", | ||
"data_type": "date", | ||
"granularity": "month", | ||
} | ||
) | ||
}} | ||
SELECT | ||
SAFE_CAST(cnpj_basico AS STRING) AS cnpj_basico, | ||
SAFE_CAST(cnpj_ordem AS STRING) AS cnpj_ordem, | ||
SAFE_CAST(cnpj_dv AS STRING) AS cnpj_dv, | ||
SAFE_CAST(razaosocial AS STRING) AS razao_social, | ||
SAFE_CAST(REGEXP_REPLACE(cd_porteempresa, r'\.0$', '') AS STRING) AS id_porte_empresa, | ||
SAFE_CAST(REGEXP_REPLACE(cd_situacaocadastral, r'\.0$', '') AS STRING) AS id_situacao_cadastral, | ||
SAFE_CAST(dt_situacaocadastral AS DATE) AS data_situacao_cadastral | ||
SAFE_CAST(data_particao AS DATE) data_particao | ||
FROM `rj-smfp.porte_empresa_staging.situacao_cadastral` |