From be5a58e2e4ea3c5e9faa97bd09cd002d6dd2ce96 Mon Sep 17 00:00:00 2001 From: trevineju Date: Wed, 5 Jun 2024 17:25:14 -0300 Subject: [PATCH 1/2] Adiciona novos 8 raspadores --- data_collection/gazette/spiders/ba/ba_candeias.py | 11 +++++++++++ .../gazette/spiders/mg/mg_carmo_do_rio_claro.py | 11 +++++++++++ data_collection/gazette/spiders/mg/mg_juatuba.py | 11 +++++++++++ data_collection/gazette/spiders/mt/mt_cotriguacu.py | 11 +++++++++++ .../gazette/spiders/pr/pr_santo_antonio_do_paraiso.py | 11 +++++++++++ data_collection/gazette/spiders/sp/sp_iracemapolis.py | 11 +++++++++++ data_collection/gazette/spiders/sp/sp_itaporanga.py | 11 +++++++++++ data_collection/gazette/spiders/sp/sp_potirendaba.py | 11 +++++++++++ 8 files changed, 88 insertions(+) create mode 100644 data_collection/gazette/spiders/ba/ba_candeias.py create mode 100644 data_collection/gazette/spiders/mg/mg_carmo_do_rio_claro.py create mode 100644 data_collection/gazette/spiders/mg/mg_juatuba.py create mode 100644 data_collection/gazette/spiders/mt/mt_cotriguacu.py create mode 100644 data_collection/gazette/spiders/pr/pr_santo_antonio_do_paraiso.py create mode 100644 data_collection/gazette/spiders/sp/sp_iracemapolis.py create mode 100644 data_collection/gazette/spiders/sp/sp_itaporanga.py create mode 100644 data_collection/gazette/spiders/sp/sp_potirendaba.py diff --git a/data_collection/gazette/spiders/ba/ba_candeias.py b/data_collection/gazette/spiders/ba/ba_candeias.py new file mode 100644 index 000000000..13e856dfa --- /dev/null +++ b/data_collection/gazette/spiders/ba/ba_candeias.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class BaCandeiasSpider(BaseInstarSpider): + TERRITORY_ID = "2906501" + name = "ba_candeias" + allowed_domains = ["candeias.mg.gov.br"] + base_url = "https://www.candeias.mg.gov.br/portal/diario-oficial" + start_date = date(2017, 2, 8) diff --git a/data_collection/gazette/spiders/mg/mg_carmo_do_rio_claro.py b/data_collection/gazette/spiders/mg/mg_carmo_do_rio_claro.py new file mode 100644 index 000000000..7be486bcd --- /dev/null +++ b/data_collection/gazette/spiders/mg/mg_carmo_do_rio_claro.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class MgCarmoDoRioClaroSpider(BaseInstarSpider): + TERRITORY_ID = "3114402" + name = "mg_carmo_do_rio_claro" + allowed_domains = ["carmodorioclaro.mg.gov.br"] + base_url = "https://www.carmodorioclaro.mg.gov.br/portal/diario-oficial" + start_date = date(2021, 5, 14) diff --git a/data_collection/gazette/spiders/mg/mg_juatuba.py b/data_collection/gazette/spiders/mg/mg_juatuba.py new file mode 100644 index 000000000..79ac9da99 --- /dev/null +++ b/data_collection/gazette/spiders/mg/mg_juatuba.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class MgJuatubaSpider(BaseInstarSpider): + TERRITORY_ID = "3136652" + name = "mg_juatuba" + allowed_domains = ["juatuba.mg.gov.br"] + base_url = "https://www.juatuba.mg.gov.br/portal/diario-oficial" + start_date = date(2016, 1, 5) diff --git a/data_collection/gazette/spiders/mt/mt_cotriguacu.py b/data_collection/gazette/spiders/mt/mt_cotriguacu.py new file mode 100644 index 000000000..b104bea21 --- /dev/null +++ b/data_collection/gazette/spiders/mt/mt_cotriguacu.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class MtCotriguacuSpider(BaseInstarSpider): + TERRITORY_ID = "5103379" + name = "mt_cotriguacu" + allowed_domains = ["cotriguacu.mt.gov.br"] + base_url = "https://www.cotriguacu.mt.gov.br/portal/diario-oficial" + start_date = date(2023, 11, 17) diff --git a/data_collection/gazette/spiders/pr/pr_santo_antonio_do_paraiso.py b/data_collection/gazette/spiders/pr/pr_santo_antonio_do_paraiso.py new file mode 100644 index 000000000..acbf7296c --- /dev/null +++ b/data_collection/gazette/spiders/pr/pr_santo_antonio_do_paraiso.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class PrSantoAntonioDoParaisoSpider(BaseInstarSpider): + TERRITORY_ID = "4124301" + name = "pr_santo_antonio_do_paraiso" + allowed_domains = ["pmsantoantoniodoparaiso.pr.gov.br"] + base_url = "https://www.pmsantoantoniodoparaiso.pr.gov.br/portal/diario-oficial" + start_date = date(2012, 12, 27) diff --git a/data_collection/gazette/spiders/sp/sp_iracemapolis.py b/data_collection/gazette/spiders/sp/sp_iracemapolis.py new file mode 100644 index 000000000..e693ceeb0 --- /dev/null +++ b/data_collection/gazette/spiders/sp/sp_iracemapolis.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class SpIracemapolisSpider(BaseInstarSpider): + TERRITORY_ID = "3521408" + name = "sp_iracemapolis" + allowed_domains = ["iracemapolis.sp.gov.br"] + base_url = "https://www.iracemapolis.sp.gov.br/portal/diario-oficial" + start_date = date(2017, 1, 2) diff --git a/data_collection/gazette/spiders/sp/sp_itaporanga.py b/data_collection/gazette/spiders/sp/sp_itaporanga.py new file mode 100644 index 000000000..fe605bb75 --- /dev/null +++ b/data_collection/gazette/spiders/sp/sp_itaporanga.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class SpItaporangaSpider(BaseInstarSpider): + TERRITORY_ID = "3522802" + name = "sp_itaporanga" + allowed_domains = ["itaporanga.sp.gov.br"] + base_url = "https://www.itaporanga.sp.gov.br/portal/diario-oficial" + start_date = date(2009, 4, 16) diff --git a/data_collection/gazette/spiders/sp/sp_potirendaba.py b/data_collection/gazette/spiders/sp/sp_potirendaba.py new file mode 100644 index 000000000..953674c55 --- /dev/null +++ b/data_collection/gazette/spiders/sp/sp_potirendaba.py @@ -0,0 +1,11 @@ +from datetime import date + +from gazette.spiders.base.instar import BaseInstarSpider + + +class SpPotirendabaSpider(BaseInstarSpider): + TERRITORY_ID = "3540804" + name = "sp_potirendaba" + allowed_domains = ["potirendaba.sp.gov.br"] + base_url = "https://www.potirendaba.sp.gov.br/portal/diario-oficial" + start_date = date(2024, 1, 21) From 0e5a1b4b7fac931241eb15291831c10ab973bf5f Mon Sep 17 00:00:00 2001 From: Juliana Trevine <44185775+trevineju@users.noreply.github.com> Date: Wed, 2 Oct 2024 17:54:49 -0300 Subject: [PATCH 2/2] Remove Candeias-BA incorreto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit O site é do município de Candeias no estado de Minas Gerais, não da Bahia. Signed-off-by: Juliana Trevine <44185775+trevineju@users.noreply.github.com> --- data_collection/gazette/spiders/ba/ba_candeias.py | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 data_collection/gazette/spiders/ba/ba_candeias.py diff --git a/data_collection/gazette/spiders/ba/ba_candeias.py b/data_collection/gazette/spiders/ba/ba_candeias.py deleted file mode 100644 index 13e856dfa..000000000 --- a/data_collection/gazette/spiders/ba/ba_candeias.py +++ /dev/null @@ -1,11 +0,0 @@ -from datetime import date - -from gazette.spiders.base.instar import BaseInstarSpider - - -class BaCandeiasSpider(BaseInstarSpider): - TERRITORY_ID = "2906501" - name = "ba_candeias" - allowed_domains = ["candeias.mg.gov.br"] - base_url = "https://www.candeias.mg.gov.br/portal/diario-oficial" - start_date = date(2017, 2, 8)