Skip to content

Commit

Permalink
Fix player url extract xpath (#69)
Browse files Browse the repository at this point in the history
  • Loading branch information
dcaribou authored Aug 22, 2023
1 parent 6e5b788 commit 029afc2
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
5 changes: 4 additions & 1 deletion tfmkt/spiders/competitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,14 @@ def parse_competitions(self, response, base):
"""Parse competitions from the country competitions page.
@url https://www.transfermarkt.co.uk/wettbewerbe/national/wettbewerbe/157
@returns items 3 3
@returns items 2 2
@cb_kwargs {"base": {"href": "some_href/3", "type": "competition", "parent": {}, "country_id": 1, "country_name": "n", "country_code": "CC"}}
@scrapes type href parent country_id country_name country_code competition_type
"""

# uncommenting the two lines below will open a scrapy shell with the context of this request
# when you run the crawler. this is useful for developing new extractors

# inspect_response(response, self)
# exit(1)

Expand Down
5 changes: 4 additions & 1 deletion tfmkt/spiders/players.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ def parse(self, response, parent):
@cb_kwargs {"parent": "dummy"}
"""

# uncommenting the two lines below will open a scrapy shell with the context of this request
# when you run the crawler. this is useful for developing new extractors

# inspect_response(response, self)
# exit(1)

Expand All @@ -24,7 +27,7 @@ def parse(self, response, parent):

players_table = players_table[0]

player_hrefs = players_table.xpath('//table[@class="inline-table"]/tr[1]/td[2]/div[1]/span/a/@href').getall()
player_hrefs = players_table.xpath('//table[@class="inline-table"]//tr[@class="data-link"]/@data-link').getall()

for href in player_hrefs:

Expand Down

0 comments on commit 029afc2

Please sign in to comment.