Skip to content

Commit

Permalink
Scrape managers per games (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
dcaribou authored Oct 12, 2022
1 parent 97d38c3 commit 25620ce
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions tfmkt/spiders/games.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def extract_game_urls(self, response, base):
def parse_game(self, response, base):
"""Parse games and fixutres page. From this page follow to each game page.
@url https://www.transfermarkt.co.uk/caykur-rizespor_fenerbahce-sk/index/spielbericht/3426662
@url https://www.transfermarkt.co.uk/spielbericht/index/spielbericht/3098550
@returns items 1 1
@cb_kwargs {"base": {"href": "some_href/3", "type": "league", "parent": {}}}
@scrapes type href parent game_id result matchday date stadium attendance
@scrapes type href parent game_id result matchday date stadium attendance home_manager away_manager
"""

# inspect_response(response, self)
Expand Down Expand Up @@ -111,6 +111,11 @@ def parse_game(self, response, base):

result = self.safe_strip(result_box.css('div.sb-endstand::text').get())

# extract from line-ups "box"
manager_names = response.xpath(
"//tr[(contains(td/b/text(),'Manager')) or (contains(td/div/text(),'Manager'))]/td[2]/a/text()"
).getall()

item = {
**base,
'type': 'game',
Expand All @@ -132,6 +137,15 @@ def parse_game(self, response, base):
'attendance': attendance,
'referee': referee
}

if len(manager_names) == 2:
home_manager_name, away_manager_name = manager_names
item["home_manager"] = {
'name': home_manager_name
}
item["away_manager"] = {
'name': away_manager_name
}

yield item

0 comments on commit 25620ce

Please sign in to comment.