Fix player url extract xpath (#69)

dcaribou · Aug 22, 2023 · 029afc2 · 029afc2
1 parent 6e5b788
commit 029afc2
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 2 deletions.
diff --git a/tfmkt/spiders/competitions.py b/tfmkt/spiders/competitions.py
@@ -70,11 +70,14 @@ def parse_competitions(self, response, base):
     """Parse competitions from the country competitions page.
 
     @url https://www.transfermarkt.co.uk/wettbewerbe/national/wettbewerbe/157
-    @returns items 3 3
+    @returns items 2 2
     @cb_kwargs {"base": {"href": "some_href/3", "type": "competition", "parent": {}, "country_id": 1, "country_name": "n", "country_code": "CC"}}
     @scrapes type href parent country_id country_name country_code competition_type
     """
 
+    # uncommenting the two lines below will open a scrapy shell with the context of this request
+    # when you run the crawler. this is useful for developing new extractors
+
     # inspect_response(response, self)
     # exit(1)
 

diff --git a/tfmkt/spiders/players.py b/tfmkt/spiders/players.py
@@ -16,6 +16,9 @@ def parse(self, response, parent):
         @cb_kwargs {"parent": "dummy"}
       """
 
+      # uncommenting the two lines below will open a scrapy shell with the context of this request
+      # when you run the crawler. this is useful for developing new extractors
+
       # inspect_response(response, self)
       # exit(1)
 
@@ -24,7 +27,7 @@ def parse(self, response, parent):
 
       players_table = players_table[0]
 
-      player_hrefs = players_table.xpath('//table[@class="inline-table"]/tr[1]/td[2]/div[1]/span/a/@href').getall()
+      player_hrefs = players_table.xpath('//table[@class="inline-table"]//tr[@class="data-link"]/@data-link').getall()
 
       for href in player_hrefs: