Skip to content

Commit

Permalink
Merge branch 'main' into staging/br_sicar
Browse files Browse the repository at this point in the history
  • Loading branch information
folhesgabriel authored Oct 8, 2024
2 parents 338e47c + 3724d02 commit a8ddd2a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 8 deletions.
4 changes: 4 additions & 0 deletions pipelines/utils/crawler_camara_dados_abertos/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ class constants(Enum):
ANO_ATUAL = (datetime.now()).year
ANO_ANTERIOR = (ANO_ATUAL - 1)

HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}

TABLES_INPUT_PATH = {
# ! - > Proposição
"proposicao_microdados": f"/tmp/input/proposicoes-{ANO_ATUAL}.csv",
Expand Down
8 changes: 5 additions & 3 deletions pipelines/utils/crawler_camara_dados_abertos/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def save_data(table_id: str) -> str:
df = download_and_read_data(table_id)
if not os.path.exists(f'{constants_camara.OUTPUT_PATH.value}{table_id}'):
os.makedirs(f'{constants_camara.OUTPUT_PATH.value}{table_id}')
log(f'testando : {constants_camara.OUTPUT_PATH.value}{table_id}')

output_path = constants_camara.TABLES_OUTPUT_PATH.value[table_id]

if table_id == "proposicao_microdados":
Expand Down Expand Up @@ -74,11 +74,13 @@ def save_data(table_id: str) -> str:
retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value),
)
def check_if_url_is_valid(table_id:str) -> bool:
if requests.get(constants_camara.TABLES_URL.value[table_id]).status_code == 200:
if requests.get(constants_camara.TABLES_URL.value[table_id], headers=constants_camara.HEADERS.value).status_code == 200:
log("URL is valid")
log(constants_camara.TABLES_URL.value[table_id])
return True
elif requests.get(constants_camara.TABLES_URL_ANO_ANTERIOR.value[table_id]).status_code == 200:
elif requests.get(constants_camara.TABLES_URL_ANO_ANTERIOR.value[table_id], headers=constants_camara.HEADERS.value).status_code == 200:
log("Table is not available in the current year only in the previous year")
log(constants_camara.TABLES_URL_ANO_ANTERIOR.value[table_id])
return False
else:
raise ValueError("URL is not valid")
8 changes: 3 additions & 5 deletions pipelines/utils/crawler_camara_dados_abertos/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

# ----------------------------------------------------------------------------------- > Universal
def download_table_despesa(table_id:str) -> None:

http_response = urlopen(constants_camara.TABLES_URL.value[table_id])
zipfile = ZipFile(BytesIO(http_response.read()))
zipfile.extractall(path=constants_camara.INPUT_PATH.value)
Expand All @@ -34,13 +35,10 @@ def download_all_table(table_id: str) -> None:

url = constants_camara.TABLES_URL.value[table_id]
input_path = constants_camara.TABLES_INPUT_PATH.value[table_id]
headers = {
"Content-Type": "application/json;charset=UTF-8",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)"
}


log(f"Downloading {table_id} from {url}")
response = requests.get(url, headers=headers)
response = requests.get(url, headers=constants_camara.HEADERS.value)
if response.status_code == 200:
with open(input_path, "wb") as f:
f.write(response.content)
Expand Down

0 comments on commit a8ddd2a

Please sign in to comment.