Skip to content

Commit

Permalink
feat: add lineups info
Browse files Browse the repository at this point in the history
  • Loading branch information
LarchLiu committed Sep 28, 2023
1 parent fad50d5 commit 043ceb9
Showing 1 changed file with 127 additions and 1 deletion.
128 changes: 127 additions & 1 deletion tfmkt/spiders/games.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,100 @@ def extract_game_events(self, response, event_type):

return events

def parse_lineups(self, response, base):
"""Parse lineups.
@url https://www.transfermarkt.co.uk/spielbericht/aufstellung/spielbericht/3098550
@returns items 1 1
@cb_kwargs {"base": {"item": "game info"}}
@scrapes starting lineup substitutes
"""

item = base['item']
lineups = item['lineups']

starting_elements = response.xpath(
f"//div[./h2[contains(@class, 'content-box-headline')] and normalize-space(./h2/text()) = 'Starting Line-up']//div[@class='responsive-table']"
)
substitutes_elements = response.xpath(
f"//div[./h2[contains(@class, 'content-box-headline')] and normalize-space(./h2/text()) = 'Substitutes']//div[@class='responsive-table']"
)

for i in range(len(starting_elements)):
tr_elements = starting_elements[i].xpath("./table[@class = 'items']//tr")
defenders_count = 0
midfielders_count = 0
forwards_count = 0
for j in range(len(tr_elements)):
e = tr_elements[j]
idx = j % 3
number_idx = idx == 0
player_idx = idx == 1
position_idx = idx == 2
if number_idx:
player = {}
player['number'] = e.xpath("./td/div[@class = 'rn_nummer']/text()").get()
elif player_idx:
player['href'] = e.xpath("./td/a/@href").get()
player['name'] = e.xpath("./td/a/@title").get()
player['team_captain'] = 1 if e.xpath("./td/span/@title").get() else 0
elif position_idx:
position = self.safe_strip(e.xpath("./td/text()").get().split(',')[0])
player['position'] = position
if "Back" in position:
defenders_count = defenders_count + 1
elif "Midfield" in position:
midfielders_count = midfielders_count + 1
elif "Winger" in position or "Forward" in position or "Striker" in position:
forwards_count = forwards_count + 1

if position_idx:
if i == 0:
lineups['home_club']['starting_lineup'].append(player)
else:
lineups['away_club']['starting_lineup'].append(player)

formation = f"{defenders_count}-{midfielders_count}-{forwards_count}" if (defenders_count + midfielders_count + forwards_count) == 10 else None
if i == 0:
if lineups['home_club']['formation'] is None:
lineups['home_club']['formation'] = formation
else:
lineups['home_club']['formation'] = lineups['home_club']['formation'].split(':')[1].strip()
else:
if lineups['away_club']['formation'] is None:
lineups['away_club']['formation'] = formation
else:
lineups['away_club']['formation'] = lineups['away_club']['formation'].split(':')[1].strip()


for i in range(len(substitutes_elements)):
tr_elements = substitutes_elements[i].xpath("./table[@class = 'items']//tr")
for j in range(len(tr_elements)):
e = tr_elements[j]
idx = j % 3
number_idx = idx == 0
player_idx = idx == 1
position_idx = idx == 2
if number_idx:
player = {}
player['number'] = e.xpath("./td/div[@class = 'rn_nummer']/text()").get()
elif player_idx:
player['href'] = e.xpath("./td/a/@href").get()
player['name'] = e.xpath("./td/a/@title").get()
player['team_captain'] = 1 if e.xpath("./td/span/@title").get() else 0
elif position_idx:
player['position'] = self.safe_strip(e.xpath("./td/text()").get().split(',')[0])

if position_idx:
if i == 0:
lineups['home_club']['substitutes'].append(player)
else:
lineups['away_club']['substitutes'].append(player)

item['lineups'] = lineups

yield item

def parse_game(self, response, base):
"""Parse games and fixutres page. From this page follow to each game page.
Expand Down Expand Up @@ -210,6 +304,38 @@ def parse_game(self, response, base):
item["away_manager"] = {
'name': away_manager_name
}

lineups_url = base['href'].replace('index', 'aufstellung')
lineups_elements = response.xpath(
f".//div[./h2/@class = 'content-box-headline' and normalize-space(./h2/text()) = 'Line-Ups']/div[contains(@class, 'columns')]"
)
home_linup = lineups_elements[0]
away_linup = lineups_elements[1]

home_formation = self.safe_strip(home_linup.xpath("./div[@class = 'row']/div/text()").get())
away_formation = self.safe_strip(away_linup.xpath("./div[@class = 'row']/div/text()").get())

lineups = {
'href': lineups_url,
'home_club': {
'formation': home_formation,
'starting_lineup': [],
'substitutes': []
},
'away_club': {
'formation': away_formation,
'starting_lineup': [],
'substitutes': []
}
}

item['lineups'] = lineups

yield item
cb_kwargs = {
'base': {
'item': item
}
}

yield response.follow(lineups_url, self.parse_lineups, cb_kwargs=cb_kwargs)

0 comments on commit 043ceb9

Please sign in to comment.