Skip to content

Commit

Permalink
#1113 Cabo Frio passa a usar base instar a partir de set/2024
Browse files Browse the repository at this point in the history
  • Loading branch information
slfabio committed Sep 8, 2024
1 parent 6aa3ee6 commit b59c298
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 11 deletions.
4 changes: 3 additions & 1 deletion data_collection/gazette/spiders/base/instar.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@


class BaseInstarSpider(BaseGazetteSpider):
power = "executive_legislative"

def start_requests(self):
page = 1
start_date = self.start_date.strftime("%d-%m-%Y")
Expand Down Expand Up @@ -58,7 +60,7 @@ def parse(self, response, page, start_date, end_date):
date=gazette_date,
edition_number=edition_number,
is_extra_edition=False,
power="executive_legislative",
power=self.power,
)

yield scrapy.Request(
Expand Down
8 changes: 5 additions & 3 deletions data_collection/gazette/spiders/base/ptio.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

class BasePtioSpider(BaseGazetteSpider):
def start_requests(self):
yield scrapy.Request(self.BASE_URL)
yield scrapy.Request(url=self.base_url, callback=self.ptio_parse)

def parse(self, response):
def ptio_parse(self, response):
for gazette_div in response.xpath("//div[@class='edicoes']"):
raw_gazete_date = gazette_div.xpath(
".//div[@class='data-caderno hidden-phone']/text()"
Expand Down Expand Up @@ -41,4 +41,6 @@ def parse(self, response):
"//ul[@class='paginacao']//a[@class='proximo']/@href"
)
if next_page:
yield scrapy.Request(response.urljoin(next_page.get()))
yield scrapy.Request(
response.urljoin(next_page.get()), callback=self.ptio_parse
)
2 changes: 1 addition & 1 deletion data_collection/gazette/spiders/rj/rj_areal.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ class RjArealSpider(BasePtioSpider):
name = "rj_areal"
TERRITORY_ID = "3300225"
allowed_domains = ["portaldatransparencia.com.br"]
BASE_URL = "http://rj.portaldatransparencia.com.br/prefeitura/areal/"
base_url = "http://rj.portaldatransparencia.com.br/prefeitura/areal/"
start_date = date(2006, 8, 1)
33 changes: 29 additions & 4 deletions data_collection/gazette/spiders/rj/rj_cabo_frio.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
from datetime import date
from copy import copy
from datetime import date, timedelta

from gazette.spiders.base.instar import BaseInstarSpider
from gazette.spiders.base.ptio import BasePtioSpider


class RjCaboFrioSpider(BasePtioSpider):
class RjCaboFrioSpider(BaseInstarSpider, BasePtioSpider):
name = "rj_cabo_frio"
TERRITORY_ID = "3300704"
allowed_domains = ["portaldatransparencia.com.br"]
BASE_URL = "http://rj.portaldatransparencia.com.br/prefeitura/cabofrio/"
allowed_domains = [
"portaldatransparencia.com.br",
"cabofrio.instartecnologia.com.br",
]
base_url = "https://www.cabofrio.instartecnologia.com.br/portal/diario-oficial"
start_date = date(2020, 7, 29)
power = "executive"

def start_requests(self):
ptio_url = "http://rj.portaldatransparencia.com.br/prefeitura/cabofrio/"
ptio_end_date = date(2024, 8, 31)
if self.start_date > ptio_end_date:
yield from BaseInstarSpider.start_requests(self)
else:
if self.end_date <= ptio_end_date:
self.base_url = ptio_url
yield from BasePtioSpider.start_requests(self)
else:
ptio = copy(self)
ptio.end_date = ptio_end_date
ptio.base_url = ptio_url
yield from BasePtioSpider.start_requests(ptio)

instar = copy(self)
instar.start_date = ptio_end_date + timedelta(days=1)
yield from BaseInstarSpider.start_requests(instar)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class RjComendadorLevyGasparianSpider(BasePtioSpider):
name = "rj_comendador_levy_gasparian"
TERRITORY_ID = "3300951"
allowed_domains = ["portaldatransparencia.com.br"]
BASE_URL = (
base_url = (
"http://rj.portaldatransparencia.com.br/prefeitura/comendadorlevygasparian/"
)
start_date = date(2013, 11, 26)
2 changes: 1 addition & 1 deletion data_collection/gazette/spiders/rj/rj_sapucaia.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ class RjSapucaiaSpider(BasePtioSpider):
name = "rj_sapucaia"
TERRITORY_ID = "3305406"
allowed_domains = ["portaldatransparencia.com.br"]
BASE_URL = "http://rj.portaldatransparencia.com.br/prefeitura/sapucaia/"
base_url = "http://rj.portaldatransparencia.com.br/prefeitura/sapucaia/"
start_date = date(2019, 1, 16)

0 comments on commit b59c298

Please sign in to comment.