Skip to content

Commit

Permalink
feat: create date filter for start_date and end_date
Browse files Browse the repository at this point in the history
  • Loading branch information
talesmota committed May 9, 2024
1 parent ed2f0d5 commit 1b734d3
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions data_collection/gazette/spiders/pr/pr_guaratuba.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,12 @@ def _extract_label_edition_number(self, link_selector):
def parse(self, response):
map_date_edition = dict()
for line in response.css(".table.table-striped tr"):
td = line.css("td")
if len(td) == 0 or td.get() and td.get().find("Data da Publicação") != -1:
td_selector = line.css("td")
if (
len(td_selector) == 0
or td_selector.get()
and td_selector.get().find("Data da Publicação") != -1
):
continue
else:
_gazzete_date = dateparser.parse(
Expand All @@ -51,13 +55,16 @@ def parse(self, response):
gazzete_date = _gazzete_date.date()
pdf_links = []
edition_number, is_extra_edition = None, False
for _a in line.css("td")[1].css("a"):
pdf_relative_link = _a.css("::attr(href)").get()
for link_selector in line.css("td")[1].css("a"):
pdf_relative_link = link_selector.css("::attr(href)").get()
pdf_links.append(self.base_url + pdf_relative_link)
label = _a.css("::text").get().strip()

label = link_selector.css("::text").get().strip()

if not label:
label, _edition_number = self._extract_label_edition_number(_a)
label, _edition_number = self._extract_label_edition_number(
link_selector
)
else:
_edition_number = self._parse_edition_number(label)

Expand Down

0 comments on commit 1b734d3

Please sign in to comment.