From b8802eceb2281254d4b11dfd342e282a36c5b91e Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Fri, 21 Jul 2023 22:27:07 +0200 Subject: [PATCH 01/15] Refactor Sportec.event deserializer to read all metadata in a separate method. This way the metadata parsing for Sportec can be shared between the event- and trackingdata Deserializers --- kloppy/_providers/sportec.py | 26 ++- .../serializers/event/sportec/__init__.py | 2 +- .../serializers/event/sportec/deserializer.py | 156 +++++++++++++----- .../tracking/sportec/deserializer.py | 0 4 files changed, 133 insertions(+), 51 deletions(-) create mode 100644 kloppy/infra/serializers/tracking/sportec/deserializer.py diff --git a/kloppy/_providers/sportec.py b/kloppy/_providers/sportec.py index 9426fcde..0d5ecd48 100644 --- a/kloppy/_providers/sportec.py +++ b/kloppy/_providers/sportec.py @@ -3,13 +3,14 @@ from kloppy.config import get_config from kloppy.domain import EventDataset, EventFactory from kloppy.infra.serializers.event.sportec import ( - SportecEventDeserializer, - SportecInputs, + SportecEventDataDeserializer, + SportecEvenDataInputs, ) from kloppy.io import open_as_file +from kloppy.utils import deprecated -def load( +def load_event( event_data: str, meta_data: str, event_types: Optional[List[str]] = None, @@ -27,7 +28,7 @@ def load( event_factory: """ - serializer = SportecEventDeserializer( + serializer = SportecEventDataDeserializer( event_types=event_types, coordinate_system=coordinates, event_factory=event_factory or get_config("event_factory"), @@ -36,5 +37,20 @@ def load( meta_data ) as meta_data_fp: return serializer.deserialize( - SportecInputs(event_data=event_data_fp, meta_data=meta_data_fp) + SportecEvenDataInputs( + event_data=event_data_fp, meta_data=meta_data_fp + ) ) + + +@deprecated("sportec.load_event should be used") +def load( + event_data: str, + meta_data: str, + event_types: Optional[List[str]] = None, + coordinates: Optional[str] = None, + event_factory: Optional[EventFactory] = None, +) -> EventDataset: + return load_event( + event_data, meta_data, event_types, coordinates, event_factory + ) diff --git a/kloppy/infra/serializers/event/sportec/__init__.py b/kloppy/infra/serializers/event/sportec/__init__.py index d6faa3f4..00e8063b 100644 --- a/kloppy/infra/serializers/event/sportec/__init__.py +++ b/kloppy/infra/serializers/event/sportec/__init__.py @@ -1 +1 @@ -from .deserializer import SportecEventDeserializer, SportecInputs +from .deserializer import SportecEventDataDeserializer, SportecEvenDataInputs diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 0f08c543..03b0868b 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from typing import Tuple, Dict, List, NamedTuple, IO +from typing import Dict, List, NamedTuple, IO import logging from dateutil.parser import parse from lxml import objectify @@ -12,9 +12,6 @@ BallState, DatasetFlag, Orientation, - PassEvent, - ShotEvent, - GenericEvent, PassResult, ShotResult, EventType, @@ -29,12 +26,7 @@ BodyPartQualifier, BodyPart, Qualifier, - BallOutEvent, - RecoveryEvent, - SubstitutionEvent, - CardEvent, CardType, - FoulCommittedEvent, AttackingDirection, ) from kloppy.exceptions import DeserializationError @@ -74,6 +66,106 @@ def _team_from_xml_elm(team_elm) -> Team: return team +SPORTEC_FPS = 25 + + +class SportecMetadata(NamedTuple): + score: Score + teams: List[Team] + periods: List[Period] + x_max: float + y_max: float + fps: int + + +def _sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: + """ + Load metadata from Sportec XML element. This part is shared between event- and tracking data. + In the future this might move to a common.sportec package that provides functionality for both + deserializers. + """ + x_max = float(match_root.MatchInformation.Environment.attrib["PitchX"]) + y_max = float(match_root.MatchInformation.Environment.attrib["PitchY"]) + + team_path = objectify.ObjectPath("PutDataRequest.MatchInformation.Teams") + team_elms = list(team_path.find(match_root).iterchildren("Team")) + + home_team = away_team = None + for team_elm in team_elms: + if team_elm.attrib["Role"] == "home": + home_team = _team_from_xml_elm(team_elm) + elif team_elm.attrib["Role"] == "guest": + away_team = _team_from_xml_elm(team_elm) + else: + raise DeserializationError( + f"Unknown side: {team_elm.attrib['Role']}" + ) + + if not home_team: + raise DeserializationError("Home team is missing from metadata") + if not away_team: + raise DeserializationError("Away team is missing from metadata") + + (home_score, away_score,) = match_root.MatchInformation.General.attrib[ + "Result" + ].split(":") + score = Score(home=int(home_score), away=int(away_score)) + teams = [home_team, away_team] + + if len(home_team.players) == 0 or len(away_team.players) == 0: + raise DeserializationError("LineUp incomplete") + + # The periods can be rebuild from event data. Therefore, the periods attribute + # from the metadata can be ignored. It is required for tracking data. + other_game_information = ( + match_root.MatchInformation.OtherGameInformation.attrib + ) + periods = [ + Period( + id=1, + start_timestamp=10_000 / SPORTEC_FPS, + end_timestamp=10_000 / SPORTEC_FPS + + float(other_game_information["TotalTimeFirstHalf"]) / 1000, + ), + Period( + id=2, + start_timestamp=100_000 / SPORTEC_FPS, + end_timestamp=100_000 / SPORTEC_FPS + + float(other_game_information["TotalTimeSecondHalf"]) / 1000, + ), + ] + + if "TotalTimeFirstHalfExtra" in other_game_information: + # Add two periods for extra time. + periods.extend( + [ + Period( + id=3, + start_timestamp=200_000 / SPORTEC_FPS, + end_timestamp=200_000 / SPORTEC_FPS + + float(other_game_information["TotalTimeFirstHalfExtra"]) + / 1000, + ), + Period( + id=4, + start_timestamp=250_000 / SPORTEC_FPS, + end_timestamp=250_000 / SPORTEC_FPS + + float(other_game_information["TotalTimeSecondHalfExtra"]) + / 1000, + ), + ] + ) + + return SportecMetadata( + score=score, + teams=teams, + periods=periods, + x_max=x_max, + y_max=y_max, + fps=SPORTEC_FPS, + ) + + def _event_chain_from_xml_elm(event_elm): chain = OrderedDict() current_elm = event_elm @@ -259,55 +351,29 @@ def _parse_coordinates(event_attributes: Dict) -> Point: ) -class SportecInputs(NamedTuple): +class SportecEvenDataInputs(NamedTuple): meta_data: IO[bytes] event_data: IO[bytes] -class SportecEventDeserializer(EventDataDeserializer[SportecInputs]): +class SportecEventDataDeserializer( + EventDataDeserializer[SportecEvenDataInputs] +): @property def provider(self) -> Provider: return Provider.SPORTEC - def deserialize(self, inputs: SportecInputs) -> EventDataset: + def deserialize(self, inputs: SportecEvenDataInputs) -> EventDataset: with performance_logging("load data", logger=logger): match_root = objectify.fromstring(inputs.meta_data.read()) event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): - x_max = float( - match_root.MatchInformation.Environment.attrib["PitchX"] - ) - y_max = float( - match_root.MatchInformation.Environment.attrib["PitchY"] - ) - - transformer = self.get_transformer(length=x_max, width=y_max) - - team_path = objectify.ObjectPath( - "PutDataRequest.MatchInformation.Teams" + sportec_metadata = _sportec_metadata_from_xml_elm(match_root) + teams = home_team, away_team = sportec_metadata.teams + transformer = self.get_transformer( + length=sportec_metadata.x_max, width=sportec_metadata.y_max ) - team_elms = list(team_path.find(match_root).iterchildren("Team")) - - for team_elm in team_elms: - if team_elm.attrib["Role"] == "home": - home_team = _team_from_xml_elm(team_elm) - elif team_elm.attrib["Role"] == "guest": - away_team = _team_from_xml_elm(team_elm) - else: - raise DeserializationError( - f"Unknown side: {team_elm.attrib['Role']}" - ) - - ( - home_score, - away_score, - ) = match_root.MatchInformation.General.attrib["Result"].split(":") - score = Score(home=int(home_score), away=int(away_score)) - teams = [home_team, away_team] - - if len(home_team.players) == 0 or len(away_team.players) == 0: - raise DeserializationError("LineUp incomplete") periods = [] period_id = 0 @@ -518,7 +584,7 @@ def deserialize(self, inputs: SportecInputs) -> EventDataset: teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, - score=score, + score=sportec_metadata.score, frame_rate=None, orientation=orientation, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py new file mode 100644 index 00000000..e69de29b From 459df20b86209a69360c3350224ab16b36f3c0bc Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 11:41:45 +0200 Subject: [PATCH 02/15] Add failing test for Sportec tracking data --- kloppy/_providers/sportec.py | 40 +- .../serializers/tracking/sportec/__init__.py | 0 .../tracking/sportec/deserializer.py | 64 +++ kloppy/sportec.py | 2 +- kloppy/tests/files/sportec_positional.xml | 512 ++++++++++++++++++ kloppy/tests/test_sportec.py | 55 +- 6 files changed, 662 insertions(+), 11 deletions(-) create mode 100644 kloppy/infra/serializers/tracking/sportec/__init__.py create mode 100644 kloppy/tests/files/sportec_positional.xml diff --git a/kloppy/_providers/sportec.py b/kloppy/_providers/sportec.py index 0d5ecd48..1dcf6812 100644 --- a/kloppy/_providers/sportec.py +++ b/kloppy/_providers/sportec.py @@ -1,18 +1,22 @@ from typing import Optional, List from kloppy.config import get_config -from kloppy.domain import EventDataset, EventFactory +from kloppy.domain import EventDataset, EventFactory, TrackingDataset from kloppy.infra.serializers.event.sportec import ( SportecEventDataDeserializer, SportecEvenDataInputs, ) -from kloppy.io import open_as_file +from kloppy.infra.serializers.tracking.sportec.deserializer import ( + SportecTrackingDataSerializer, + SportecTrackingDataInputs, +) +from kloppy.io import open_as_file, FileLike from kloppy.utils import deprecated def load_event( - event_data: str, - meta_data: str, + event_data: FileLike, + meta_data: FileLike, event_types: Optional[List[str]] = None, coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, @@ -43,10 +47,34 @@ def load_event( ) +def load_tracking( + meta_data: FileLike, + raw_data: FileLike, + sample_rate: Optional[float] = None, + limit: Optional[int] = None, + coordinates: Optional[str] = None, + only_alive: Optional[bool] = True, +) -> TrackingDataset: + deserializer = SportecTrackingDataSerializer( + sample_rate=sample_rate, + limit=limit, + coordinate_system=coordinates, + only_alive=only_alive, + ) + with open_as_file(meta_data) as meta_data_fp, open_as_file( + raw_data + ) as raw_data_fp: + return deserializer.deserialize( + inputs=SportecTrackingDataInputs( + meta_data=meta_data_fp, raw_data=raw_data_fp + ) + ) + + @deprecated("sportec.load_event should be used") def load( - event_data: str, - meta_data: str, + event_data: FileLike, + meta_data: FileLike, event_types: Optional[List[str]] = None, coordinates: Optional[str] = None, event_factory: Optional[EventFactory] = None, diff --git a/kloppy/infra/serializers/tracking/sportec/__init__.py b/kloppy/infra/serializers/tracking/sportec/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index e69de29b..e4d165a2 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -0,0 +1,64 @@ +import json +import logging +from typing import Tuple, Dict, NamedTuple, Optional, Union, IO + +from lxml import objectify + +from kloppy.domain import ( + TrackingDataset, + DatasetFlag, + AttackingDirection, + Frame, + Point, + Point3D, + Team, + BallState, + Period, + Provider, + Orientation, + attacking_direction_from_frame, + Metadata, + Ground, + Player, + build_coordinate_system, + Provider, + PlayerData, +) + +from kloppy.utils import Readable, performance_logging + +from ..deserializer import TrackingDataDeserializer + +logger = logging.getLogger(__name__) + + +class SportecTrackingDataInputs(NamedTuple): + meta_data: IO[bytes] + raw_data: IO[bytes] + + +class SportecTrackingDataSerializer(TrackingDataDeserializer): + @property + def provider(self) -> Provider: + return Provider.SPORTEC + + def __init__( + self, + limit: Optional[int] = None, + sample_rate: Optional[float] = None, + coordinate_system: Optional[Union[str, Provider]] = None, + only_alive: Optional[bool] = True, + ): + super().__init__(limit, sample_rate, coordinate_system) + self.only_alive = only_alive + + def deserialize( + self, inputs: SportecTrackingDataInputs + ) -> TrackingDataset: + return TrackingDataset( + records=[], + metadata=None, + ) + + def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: + raise NotImplementedError diff --git a/kloppy/sportec.py b/kloppy/sportec.py index 27cf35f6..79595791 100644 --- a/kloppy/sportec.py +++ b/kloppy/sportec.py @@ -1 +1 @@ -from ._providers.sportec import load +from ._providers.sportec import load, load_event, load_tracking diff --git a/kloppy/tests/files/sportec_positional.xml b/kloppy/tests/files/sportec_positional.xml new file mode 100644 index 00000000..548cbad2 --- /dev/null +++ b/kloppy/tests/files/sportec_positional.xml @@ -0,0 +1,512 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index c33b789e..9f08aaeb 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -11,6 +11,8 @@ SetPieceType, BodyPart, DatasetType, + BallState, + Point3D, ) from kloppy import sportec @@ -23,12 +25,18 @@ class TestSportecEvent: def event_data(self, base_dir) -> str: return base_dir / "files/sportec_events.xml" + @pytest.fixture + def raw_data(self, base_dir) -> str: + return base_dir / "files/sportec_positional.xml" + @pytest.fixture def meta_data(self, base_dir) -> str: return base_dir / "files/sportec_meta.xml" - def test_correct_deserialization(self, event_data: Path, meta_data: Path): - dataset = sportec.load( + def test_correct_event_data_deserialization( + self, event_data: Path, meta_data: Path + ): + dataset = sportec.load_event( event_data=event_data, meta_data=meta_data, coordinates="sportec" ) @@ -69,9 +77,48 @@ def test_correct_deserialization(self, event_data: Path, meta_data: Path): assert dataset.events[0].coordinates == Point(56.41, 68.0) - def test_correct_normalized_deserialization( + def test_correct_normalized_event_data_deserialization( self, event_data: Path, meta_data: Path ): - dataset = sportec.load(event_data=event_data, meta_data=meta_data) + dataset = sportec.load_event( + event_data=event_data, meta_data=meta_data + ) assert dataset.events[0].coordinates == Point(0.5640999999999999, 1) + + def test_load_tracking_data(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, meta_data=meta_data, coordinates="sportec" + ) + + home_team, away_team = dataset.metadata.teams + + assert dataset.frames[0].ball_owning_team == away_team + assert dataset.frames[0].ball_state == BallState.DEAD + assert dataset.frames[0].ball_coordinates == Point3D( + x=2.69, y=0.26, z=0.06 + ) + + assert dataset.frames[1].ball_owning_team == home_team + assert dataset.frames[1].ball_state == BallState.ALIVE + + player_lilian = away_team.get_player_by_id("DFL-OBJ-002G3I") + player_data = dataset.frames[0].players_data[player_lilian] + + assert player_data.coordinates == Point(x=0.35, y=-25.26) + + # We don't load distance right now as it doesn't + # work together with `sample_rate`: "The distance covered from the previous frame in cm" + assert player_data.distance is None + + # Appears first in 27th frame + player_bensebaini = away_team.get_player_by_id("DFL-OBJ-002G5S") + assert player_bensebaini not in dataset.frames[0].players_data + assert player_bensebaini in dataset.frames[26] + + # Contains all 3 players + assert len(dataset.frames[35].players_data) == 3 + + assert dataset.metadata.provider == Provider.SPORTEC + assert dataset.dataset_type == DatasetType.TRACKING + assert len(dataset.metadata.periods) == 2 From ced42fc68064530735472d4ca41cf591cc9a2bcb Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 12:01:39 +0200 Subject: [PATCH 03/15] Load metadata --- .../tracking/sportec/deserializer.py | 32 ++++++++++++++++- kloppy/tests/test_sportec.py | 34 +++++++++++++------ 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index e4d165a2..e3727602 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -28,6 +28,7 @@ from kloppy.utils import Readable, performance_logging from ..deserializer import TrackingDataDeserializer +from ...event.sportec.deserializer import _sportec_metadata_from_xml_elm logger = logging.getLogger(__name__) @@ -55,9 +56,38 @@ def __init__( def deserialize( self, inputs: SportecTrackingDataInputs ) -> TrackingDataset: + with performance_logging("load data", logger=logger): + match_root = objectify.fromstring(inputs.meta_data.read()) + + with performance_logging("parse data", logger=logger): + sportec_metadata = _sportec_metadata_from_xml_elm(match_root) + teams = sportec_metadata.teams + periods = sportec_metadata.periods + transformer = self.get_transformer( + length=sportec_metadata.x_max, width=sportec_metadata.y_max + ) + + orientation = ( + Orientation.FIXED_HOME_AWAY + if periods[0].attacking_direction == AttackingDirection.HOME_AWAY + else Orientation.FIXED_AWAY_HOME + ) + + metadata = Metadata( + teams=teams, + periods=periods, + pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, + score=sportec_metadata.score, + frame_rate=sportec_metadata.fps, + orientation=orientation, + provider=Provider.SPORTEC, + flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, + coordinate_system=transformer.get_to_coordinate_system(), + ) + return TrackingDataset( records=[], - metadata=None, + metadata=metadata, ) def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index 9f08aaeb..b5750c86 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -18,17 +18,13 @@ from kloppy import sportec -class TestSportecEvent: +class TestSportecEventData: """""" @pytest.fixture def event_data(self, base_dir) -> str: return base_dir / "files/sportec_events.xml" - @pytest.fixture - def raw_data(self, base_dir) -> str: - return base_dir / "files/sportec_positional.xml" - @pytest.fixture def meta_data(self, base_dir) -> str: return base_dir / "files/sportec_meta.xml" @@ -86,11 +82,33 @@ def test_correct_normalized_event_data_deserialization( assert dataset.events[0].coordinates == Point(0.5640999999999999, 1) - def test_load_tracking_data(self, raw_data: Path, meta_data: Path): + +class TestSportecTrackingData: + """ + Tests for loading Sportec tracking data. + """ + + @pytest.fixture + def raw_data(self, base_dir) -> str: + return base_dir / "files/sportec_positional.xml" + + @pytest.fixture + def meta_data(self, base_dir) -> str: + return base_dir / "files/sportec_meta.xml" + + def test_load_metadata(self, raw_data: Path, meta_data: Path): dataset = sportec.load_tracking( raw_data=raw_data, meta_data=meta_data, coordinates="sportec" ) + assert dataset.metadata.provider == Provider.SPORTEC + assert dataset.dataset_type == DatasetType.TRACKING + assert len(dataset.metadata.periods) == 2 + + def test_load_frames(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, meta_data=meta_data, coordinates="sportec" + ) home_team, away_team = dataset.metadata.teams assert dataset.frames[0].ball_owning_team == away_team @@ -118,7 +136,3 @@ def test_load_tracking_data(self, raw_data: Path, meta_data: Path): # Contains all 3 players assert len(dataset.frames[35].players_data) == 3 - - assert dataset.metadata.provider == Provider.SPORTEC - assert dataset.dataset_type == DatasetType.TRACKING - assert len(dataset.metadata.periods) == 2 From a82fe5fe09c10826559409ea2ade15fb21aa1d42 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 14:27:28 +0200 Subject: [PATCH 04/15] WIP: Add implementation for loading the data --- .../tracking/sportec/deserializer.py | 153 +++++++++++++++++- kloppy/tests/test_sportec.py | 9 +- 2 files changed, 157 insertions(+), 5 deletions(-) diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index e3727602..473d4987 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -1,6 +1,7 @@ import json import logging -from typing import Tuple, Dict, NamedTuple, Optional, Union, IO +from collections import defaultdict +from typing import Tuple, Dict, NamedTuple, Optional, Union, IO, Literal from lxml import objectify @@ -32,6 +33,76 @@ logger = logging.getLogger(__name__) +PERIOD_ID_TO_GAME_SECTION = { + 1: "firstHalf", + 2: "secondHalf", + 3: "firstHalfExtra", + 4: "secondHalfExtra", +} + + +def _read_section_data(data_root, period: Period) -> dict: + """ + Read all data for a single period from data_root. + + Output format: + { + 10_000: { + ('BALL', 'DFL-OBJ-0000XT'): { + 'x': 20.92, + 'y': 2.84, + 'z': 0.08, + 'speed': 4.91, + 'ballPossession': 2, + 'ballStatus': 1 + }, + ('DFL-CLU-000004', 'DFL-OBJ-002G3I'): { + 'x': 0.35, + 'y': -25.26, + 'speed': 0.00, + }, + [....] + }, + 10_001: { + ... + } + } + """ + + game_section = PERIOD_ID_TO_GAME_SECTION[period.id] + frame_sets = data_root.findall( + f"Positions/FrameSet[@GameSection='{game_section}']" + ) + + raw_frames = defaultdict(dict) + for frame_set in frame_sets: + key = ( + "ball" + if frame_set.attrib["TeamId"] == "BALL" + else frame_set.attrib["PersonId"] + ) + for frame in frame_set.iterchildren("Frame"): + attr = frame.attrib + frame_id = int(attr["N"]) + + object_data = { + "x": float(attr["X"]), + "y": float(attr["Y"]), + "speed": float(attr["S"]), + } + if key == "ball": + object_data.update( + { + "z": float(attr["Z"]), + "possession": int(attr["BallPossession"]), + "state": int(attr["BallStatus"]), + } + ) + + raw_frames[frame_id][key] = object_data + + return raw_frames + class SportecTrackingDataInputs(NamedTuple): meta_data: IO[bytes] @@ -58,15 +129,91 @@ def deserialize( ) -> TrackingDataset: with performance_logging("load data", logger=logger): match_root = objectify.fromstring(inputs.meta_data.read()) + data_root = objectify.fromstring(inputs.raw_data.read()) - with performance_logging("parse data", logger=logger): + with performance_logging("parse metadata", logger=logger): sportec_metadata = _sportec_metadata_from_xml_elm(match_root) - teams = sportec_metadata.teams + teams = home_team, away_team = sportec_metadata.teams periods = sportec_metadata.periods transformer = self.get_transformer( length=sportec_metadata.x_max, width=sportec_metadata.y_max ) + with performance_logging("parse raw data", logger=None): + + def _iter(): + player_map = {} + for player in home_team.players: + player_map[player.player_id] = player + for player in away_team.players: + player_map[player.player_id] = player + + sample = 1.0 / self.sample_rate + + for period in periods: + raw_frames = _read_section_data(data_root, period) + + # Since python 3.6 dict keep insertion order + for i, (frame_id, frame_data) in enumerate( + raw_frames.items() + ): + if "ball" not in frame_data: + # Frames without ball data are corrupt. + print(frame_id, frame_data) + continue + + ball_data = frame_data["ball"] + if self.only_alive and ball_data["state"] != 1: + continue + + if i % sample == 0: + yield Frame( + frame_id=frame_id, + timestamp=(frame_id / sportec_metadata.fps) + - period.start_timestamp, + ball_owning_team=home_team + if ball_data["possession"] == 1 + else away_team, + ball_state=BallState.ALIVE + if ball_data["state"] == 1 + else BallState.DEAD, + period=period, + players_data={ + player_map[player_id]: PlayerData( + coordinates=Point( + x=raw_player_data["x"], + y=raw_player_data["y"], + ), + speed=raw_player_data["speed"], + ) + for player_id, raw_player_data in frame_data.items() + if player_id != "ball" + }, + other_data={}, + ball_coordinates=Point3D( + x=ball_data["x"], + y=ball_data["y"], + z=ball_data["z"], + ), + ) + + frames = [] + for n, frame in enumerate(_iter()): + frame = transformer.transform_frame(frame) + + frames.append(frame) + + if not frame.period.attacking_direction_set: + frame.period.set_attacking_direction( + attacking_direction=attacking_direction_from_frame( + frame + ) + ) + + if self.limit and n >= self.limit: + break + + print(len(frames)) orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index b5750c86..4b3058a5 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -33,7 +33,9 @@ def test_correct_event_data_deserialization( self, event_data: Path, meta_data: Path ): dataset = sportec.load_event( - event_data=event_data, meta_data=meta_data, coordinates="sportec" + event_data=event_data, + meta_data=meta_data, + coordinates="sportec", ) assert dataset.metadata.provider == Provider.SPORTEC @@ -107,7 +109,10 @@ def test_load_metadata(self, raw_data: Path, meta_data: Path): def test_load_frames(self, raw_data: Path, meta_data: Path): dataset = sportec.load_tracking( - raw_data=raw_data, meta_data=meta_data, coordinates="sportec" + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + only_alive=False, ) home_team, away_team = dataset.metadata.teams From df58a8707b793efe274ba9218f877183cc71d159 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 15:28:05 +0200 Subject: [PATCH 05/15] Change implementation a bit: keep original XML attribute as long as possible and only convert string attributes to float when constructing a Frame. When only_alive or sample_rate are set this will prevent lots of unnecessary type convertions --- .../tracking/sportec/deserializer.py | 72 ++++++++----------- kloppy/tests/test_sportec.py | 2 +- 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index 473d4987..324ba957 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -48,18 +48,22 @@ def _read_section_data(data_root, period: Period) -> dict: Output format: { 10_000: { - ('BALL', 'DFL-OBJ-0000XT'): { - 'x': 20.92, - 'y': 2.84, - 'z': 0.08, - 'speed': 4.91, - 'ballPossession': 2, - 'ballStatus': 1 + 'ball': { + 'N': "10000", + 'X': 20.92, + 'Y': 2.84, + 'Z': 0.08, + 'S': 4.91, + 'BallPossession': "2", + 'BallStatus': "1" + [...] }, - ('DFL-CLU-000004', 'DFL-OBJ-002G3I'): { - 'x': 0.35, - 'y': -25.26, - 'speed': 0.00, + 'DFL-OBJ-002G3I': { + 'N': "10000", + 'X': "0.35", + 'Y': "-25.26", + 'S': "0.00", + [...] }, [....] }, @@ -84,22 +88,7 @@ def _read_section_data(data_root, period: Period) -> dict: for frame in frame_set.iterchildren("Frame"): attr = frame.attrib frame_id = int(attr["N"]) - - object_data = { - "x": float(attr["X"]), - "y": float(attr["Y"]), - "speed": float(attr["S"]), - } - if key == "ball": - object_data.update( - { - "z": float(attr["Z"]), - "possession": int(attr["BallPossession"]), - "state": int(attr["BallStatus"]), - } - ) - - raw_frames[frame_id][key] = object_data + raw_frames[frame_id][key] = attr return raw_frames @@ -139,7 +128,7 @@ def deserialize( length=sportec_metadata.x_max, width=sportec_metadata.y_max ) - with performance_logging("parse raw data", logger=None): + with performance_logging("parse raw data", logger=logger): def _iter(): player_map = {} @@ -153,17 +142,19 @@ def _iter(): for period in periods: raw_frames = _read_section_data(data_root, period) - # Since python 3.6 dict keep insertion order + # Since python 3.6 dict keep insertion order. Don't need to sort + # on frame ID as it's already sorted. + # Ball FrameSet is always first and contains ALL frame ids. This + # makes sure even with substitutes the data is on order. for i, (frame_id, frame_data) in enumerate( raw_frames.items() ): if "ball" not in frame_data: # Frames without ball data are corrupt. - print(frame_id, frame_data) continue ball_data = frame_data["ball"] - if self.only_alive and ball_data["state"] != 1: + if self.only_alive and ball_data["BallStatus"] != "1": continue if i % sample == 0: @@ -172,28 +163,28 @@ def _iter(): timestamp=(frame_id / sportec_metadata.fps) - period.start_timestamp, ball_owning_team=home_team - if ball_data["possession"] == 1 + if ball_data["BallPossession"] == '1' else away_team, ball_state=BallState.ALIVE - if ball_data["state"] == 1 + if ball_data["BallStatus"] == '1' else BallState.DEAD, period=period, players_data={ player_map[player_id]: PlayerData( coordinates=Point( - x=raw_player_data["x"], - y=raw_player_data["y"], + x=float(raw_player_data["X"]), + y=float(raw_player_data["Y"]), ), - speed=raw_player_data["speed"], + speed=float(raw_player_data["S"]), ) for player_id, raw_player_data in frame_data.items() if player_id != "ball" }, other_data={}, ball_coordinates=Point3D( - x=ball_data["x"], - y=ball_data["y"], - z=ball_data["z"], + x=float(ball_data["X"]), + y=float(ball_data["Y"]), + z=float(ball_data["Z"]), ), ) @@ -213,7 +204,6 @@ def _iter(): if self.limit and n >= self.limit: break - print(len(frames)) orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY @@ -233,7 +223,7 @@ def _iter(): ) return TrackingDataset( - records=[], + records=frames, metadata=metadata, ) diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index 4b3058a5..dd38cf4f 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -137,7 +137,7 @@ def test_load_frames(self, raw_data: Path, meta_data: Path): # Appears first in 27th frame player_bensebaini = away_team.get_player_by_id("DFL-OBJ-002G5S") assert player_bensebaini not in dataset.frames[0].players_data - assert player_bensebaini in dataset.frames[26] + assert player_bensebaini in dataset.frames[26].players_data # Contains all 3 players assert len(dataset.frames[35].players_data) == 3 From 5fff03762dad47649885bada6e2803e249a944bf Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 15:31:54 +0200 Subject: [PATCH 06/15] Code formatting + only alive frames test --- kloppy/domain/models/common.py | 3 +++ .../infra/serializers/tracking/sportec/deserializer.py | 4 ++-- kloppy/tests/test_sportec.py | 10 ++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 4b60b893..aa329951 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -838,6 +838,9 @@ def __iter__(self): def __getitem__(self, item): return self.records[item] + def __len__(self): + return len(self.records) + def __post_init__(self): for i, record in enumerate(self.records): record.set_refs( diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index 324ba957..e9f1f463 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -163,10 +163,10 @@ def _iter(): timestamp=(frame_id / sportec_metadata.fps) - period.start_timestamp, ball_owning_team=home_team - if ball_data["BallPossession"] == '1' + if ball_data["BallPossession"] == "1" else away_team, ball_state=BallState.ALIVE - if ball_data["BallStatus"] == '1' + if ball_data["BallStatus"] == "1" else BallState.DEAD, period=period, players_data={ diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index dd38cf4f..1fa3f8c7 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -141,3 +141,13 @@ def test_load_frames(self, raw_data: Path, meta_data: Path): # Contains all 3 players assert len(dataset.frames[35].players_data) == 3 + assert len(dataset) == 202 + + def test_load_only_alive_frames(self, raw_data: Path, meta_data: Path): + dataset = sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + only_alive=True, + ) + assert len(dataset) == 199 From 2034364d5e226042ed4c39eb721f9ddb68977269 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 15:36:31 +0200 Subject: [PATCH 07/15] Small fixes --- .../serializers/event/sportec/deserializer.py | 4 ++-- .../serializers/tracking/sportec/deserializer.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 03b0868b..911633db 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -78,7 +78,7 @@ class SportecMetadata(NamedTuple): fps: int -def _sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: +def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: """ Load metadata from Sportec XML element. This part is shared between event- and tracking data. In the future this might move to a common.sportec package that provides functionality for both @@ -369,7 +369,7 @@ def deserialize(self, inputs: SportecEvenDataInputs) -> EventDataset: event_root = objectify.fromstring(inputs.event_data.read()) with performance_logging("parse data", logger=logger): - sportec_metadata = _sportec_metadata_from_xml_elm(match_root) + sportec_metadata = sportec_metadata_from_xml_elm(match_root) teams = home_team, away_team = sportec_metadata.teams transformer = self.get_transformer( length=sportec_metadata.x_max, width=sportec_metadata.y_max diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index e9f1f463..ec4aeb18 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -1,7 +1,6 @@ -import json import logging from collections import defaultdict -from typing import Tuple, Dict, NamedTuple, Optional, Union, IO, Literal +from typing import Tuple, NamedTuple, Optional, Union, IO from lxml import objectify @@ -12,24 +11,21 @@ Frame, Point, Point3D, - Team, BallState, Period, - Provider, Orientation, attacking_direction_from_frame, Metadata, - Ground, - Player, - build_coordinate_system, Provider, PlayerData, ) -from kloppy.utils import Readable, performance_logging +from kloppy.utils import performance_logging from ..deserializer import TrackingDataDeserializer -from ...event.sportec.deserializer import _sportec_metadata_from_xml_elm +from kloppy.infra.serializers.event.sportec.deserializer import ( + sportec_metadata_from_xml_elm, +) logger = logging.getLogger(__name__) @@ -121,7 +117,7 @@ def deserialize( data_root = objectify.fromstring(inputs.raw_data.read()) with performance_logging("parse metadata", logger=logger): - sportec_metadata = _sportec_metadata_from_xml_elm(match_root) + sportec_metadata = sportec_metadata_from_xml_elm(match_root) teams = home_team, away_team = sportec_metadata.teams periods = sportec_metadata.periods transformer = self.get_transformer( From 05494134fb5ebe362d60781c588ae220519bb624 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 15:41:01 +0200 Subject: [PATCH 08/15] Fix some naming + imports --- kloppy/_providers/sportec.py | 6 +++--- kloppy/infra/serializers/tracking/sportec/__init__.py | 4 ++++ kloppy/infra/serializers/tracking/sportec/deserializer.py | 5 +---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/kloppy/_providers/sportec.py b/kloppy/_providers/sportec.py index 1dcf6812..e9826d8d 100644 --- a/kloppy/_providers/sportec.py +++ b/kloppy/_providers/sportec.py @@ -6,8 +6,8 @@ SportecEventDataDeserializer, SportecEvenDataInputs, ) -from kloppy.infra.serializers.tracking.sportec.deserializer import ( - SportecTrackingDataSerializer, +from kloppy.infra.serializers.tracking.sportec import ( + SportecTrackingDataDeserializer, SportecTrackingDataInputs, ) from kloppy.io import open_as_file, FileLike @@ -55,7 +55,7 @@ def load_tracking( coordinates: Optional[str] = None, only_alive: Optional[bool] = True, ) -> TrackingDataset: - deserializer = SportecTrackingDataSerializer( + deserializer = SportecTrackingDataDeserializer( sample_rate=sample_rate, limit=limit, coordinate_system=coordinates, diff --git a/kloppy/infra/serializers/tracking/sportec/__init__.py b/kloppy/infra/serializers/tracking/sportec/__init__.py index e69de29b..245f9471 100644 --- a/kloppy/infra/serializers/tracking/sportec/__init__.py +++ b/kloppy/infra/serializers/tracking/sportec/__init__.py @@ -0,0 +1,4 @@ +from .deserializer import ( + SportecTrackingDataDeserializer, + SportecTrackingDataInputs, +) diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index ec4aeb18..fc4bdbfa 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -94,7 +94,7 @@ class SportecTrackingDataInputs(NamedTuple): raw_data: IO[bytes] -class SportecTrackingDataSerializer(TrackingDataDeserializer): +class SportecTrackingDataDeserializer(TrackingDataDeserializer): @property def provider(self) -> Provider: return Provider.SPORTEC @@ -222,6 +222,3 @@ def _iter(): records=frames, metadata=metadata, ) - - def serialize(self, dataset: TrackingDataset) -> Tuple[str, str]: - raise NotImplementedError From 468f54f4a332e627aa3dae884200d786125869a6 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 22 Jul 2023 15:51:39 +0200 Subject: [PATCH 09/15] Fix for floating issues with timestamp --- .../serializers/tracking/sportec/deserializer.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index fc4bdbfa..a687336b 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -156,8 +156,15 @@ def _iter(): if i % sample == 0: yield Frame( frame_id=frame_id, - timestamp=(frame_id / sportec_metadata.fps) - - period.start_timestamp, + timestamp=( + ( + frame_id + # Do subtraction with integers to prevent floating errors + - period.start_timestamp + * sportec_metadata.fps + ) + / sportec_metadata.fps + ), ball_owning_team=home_team if ball_data["BallPossession"] == "1" else away_team, From 64aea81bf9b8ed1b7982005ef9747ef3d2f3a969 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Mon, 24 Jul 2023 21:14:58 +0200 Subject: [PATCH 10/15] Typo fix --- kloppy/_providers/sportec.py | 4 ++-- kloppy/infra/serializers/event/sportec/__init__.py | 2 +- kloppy/infra/serializers/event/sportec/deserializer.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kloppy/_providers/sportec.py b/kloppy/_providers/sportec.py index e9826d8d..2aff6625 100644 --- a/kloppy/_providers/sportec.py +++ b/kloppy/_providers/sportec.py @@ -4,7 +4,7 @@ from kloppy.domain import EventDataset, EventFactory, TrackingDataset from kloppy.infra.serializers.event.sportec import ( SportecEventDataDeserializer, - SportecEvenDataInputs, + SportecEventDataInputs, ) from kloppy.infra.serializers.tracking.sportec import ( SportecTrackingDataDeserializer, @@ -41,7 +41,7 @@ def load_event( meta_data ) as meta_data_fp: return serializer.deserialize( - SportecEvenDataInputs( + SportecEventDataInputs( event_data=event_data_fp, meta_data=meta_data_fp ) ) diff --git a/kloppy/infra/serializers/event/sportec/__init__.py b/kloppy/infra/serializers/event/sportec/__init__.py index 00e8063b..1d3665c1 100644 --- a/kloppy/infra/serializers/event/sportec/__init__.py +++ b/kloppy/infra/serializers/event/sportec/__init__.py @@ -1 +1 @@ -from .deserializer import SportecEventDataDeserializer, SportecEvenDataInputs +from .deserializer import SportecEventDataDeserializer, SportecEventDataInputs diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 911633db..631d33d8 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -351,19 +351,19 @@ def _parse_coordinates(event_attributes: Dict) -> Point: ) -class SportecEvenDataInputs(NamedTuple): +class SportecEventDataInputs(NamedTuple): meta_data: IO[bytes] event_data: IO[bytes] class SportecEventDataDeserializer( - EventDataDeserializer[SportecEvenDataInputs] + EventDataDeserializer[SportecEventDataInputs] ): @property def provider(self) -> Provider: return Provider.SPORTEC - def deserialize(self, inputs: SportecEvenDataInputs) -> EventDataset: + def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset: with performance_logging("load data", logger=logger): match_root = objectify.fromstring(inputs.meta_data.read()) event_root = objectify.fromstring(inputs.event_data.read()) From bb536e99a623fcfbe8a665f835cc3f7b23da1d67 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 25 Jul 2023 19:57:07 +0200 Subject: [PATCH 11/15] Add missing ball speed --- kloppy/domain/services/transformers/attribute.py | 1 + kloppy/infra/serializers/tracking/sportec/deserializer.py | 1 + kloppy/tests/test_helpers.py | 2 ++ kloppy/tests/test_sportec.py | 1 + 4 files changed, 5 insertions(+) diff --git a/kloppy/domain/services/transformers/attribute.py b/kloppy/domain/services/transformers/attribute.py index 09bdcf02..8bdff050 100644 --- a/kloppy/domain/services/transformers/attribute.py +++ b/kloppy/domain/services/transformers/attribute.py @@ -274,6 +274,7 @@ def __call__(self, frame: Frame) -> Dict[str, Any]: ball_z=getattr(frame.ball_coordinates, "z", None) if frame.ball_coordinates else None, + ball_speed=frame.ball_speed, ) for player, player_data in frame.players_data.items(): row.update( diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index a687336b..c8126382 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -189,6 +189,7 @@ def _iter(): y=float(ball_data["Y"]), z=float(ball_data["Z"]), ), + ball_speed=float(ball_data["S"]), ) frames = [] diff --git a/kloppy/tests/test_helpers.py b/kloppy/tests/test_helpers.py index d320b6fa..c687ed2a 100644 --- a/kloppy/tests/test_helpers.py +++ b/kloppy/tests/test_helpers.py @@ -296,6 +296,7 @@ def test_to_pandas(self): "ball_x": {0: 100, 1: 0}, "ball_y": {0: -50, 1: 50}, "ball_z": {0: 0, 1: 1}, + "ball_speed": {0: None, 1: None}, "home_1_x": {0: None, 1: 15.0}, "home_1_y": {0: None, 1: 35.0}, "home_1_d": {0: None, 1: 0.03}, @@ -348,6 +349,7 @@ def test_to_pandas_additional_columns(self): "ball_x": [100, 0], "ball_y": [-50, 50], "ball_z": [0, 1], + "ball_speed": [None, None], "match": ["test", "test"], "bonus_column": [11, 12], "home_1_x": [None, 15], diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index 1fa3f8c7..d1127906 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -121,6 +121,7 @@ def test_load_frames(self, raw_data: Path, meta_data: Path): assert dataset.frames[0].ball_coordinates == Point3D( x=2.69, y=0.26, z=0.06 ) + assert dataset.frames[1].ball_speed == 65.59 assert dataset.frames[1].ball_owning_team == home_team assert dataset.frames[1].ball_state == BallState.ALIVE From 8787ac5dd183502e336a2cafd6d2a8efe73f9804 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Wed, 26 Jul 2023 16:29:26 +0200 Subject: [PATCH 12/15] Some providers - like Sportec - use different CoordinateSystems for event vs tracking data. Therefore, Provider alone cannot be used to identify a CoordinateSystem. Use DatasetType to determine correct CoordinateSystem. Also refactor construction of DatasetTransformer into DatasetTransformerBuilder, and use it from both TrackingDataDeserializer as EventDataDeserializer. --- kloppy/domain/models/common.py | 74 +++++++++++++------ kloppy/domain/services/__init__.py | 2 +- .../domain/services/transformers/__init__.py | 2 +- .../domain/services/transformers/dataset.py | 54 ++++++++++++++ .../infra/serializers/event/deserializer.py | 30 ++------ .../serializers/tracking/deserializer.py | 27 ++----- 6 files changed, 122 insertions(+), 67 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index aa329951..dabad000 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -17,6 +17,7 @@ Iterable, ) + if sys.version_info >= (3, 8): from typing import Literal else: @@ -518,7 +519,7 @@ def pitch_dimensions(self) -> PitchDimensions: @dataclass -class SportecCoordinateSystem(CoordinateSystem): +class SportecEventDataCoordinateSystem(CoordinateSystem): @property def provider(self) -> Provider: return Provider.SPORTEC @@ -541,6 +542,30 @@ def pitch_dimensions(self) -> PitchDimensions: ) +@dataclass +class SportecTrackingDataCoordinateSystem(CoordinateSystem): + @property + def provider(self) -> Provider: + return Provider.SPORTEC + + @property + def origin(self) -> Origin: + return Origin.CENTER + + @property + def vertical_orientation(self) -> VerticalOrientation: + return VerticalOrientation.BOTTOM_TO_TOP + + @property + def pitch_dimensions(self) -> PitchDimensions: + return PitchDimensions( + x_dim=Dimension(-self.length / 2, self.length / 2), + y_dim=Dimension(-self.width / 2, self.width / 2), + length=self.length, + width=self.width, + ) + + @dataclass class StatsBombCoordinateSystem(CoordinateSystem): @property @@ -654,7 +679,27 @@ def pitch_dimensions(self) -> PitchDimensions: ) -def build_coordinate_system(provider: Provider, **kwargs): +class DatasetType(Enum): + """ + DatasetType + + Attributes: + TRACKING (DatasetType): + EVENT (DatasetType): + CODE (DatasetType): + """ + + TRACKING = "TRACKING" + EVENT = "EVENT" + CODE = "CODE" + + def __repr__(self): + return self.value + + +def build_coordinate_system( + provider: Provider, dataset_type: DatasetType = DatasetType.EVENT, **kwargs +): if provider == Provider.TRACAB: return TracabCoordinateSystem(normalized=False, **kwargs) @@ -668,7 +713,12 @@ def build_coordinate_system(provider: Provider, **kwargs): return OptaCoordinateSystem(normalized=False, **kwargs) if provider == Provider.SPORTEC: - return SportecCoordinateSystem(normalized=False, **kwargs) + if dataset_type == DatasetType.TRACKING: + return SportecTrackingDataCoordinateSystem( + normalized=False, **kwargs + ) + else: + return SportecEventDataCoordinateSystem(normalized=False, **kwargs) if provider == Provider.STATSBOMB: return StatsBombCoordinateSystem(normalized=False, **kwargs) @@ -795,24 +845,6 @@ class Metadata: attributes: Optional[Dict] = field(default_factory=dict, compare=False) -class DatasetType(Enum): - """ - DatasetType - - Attributes: - TRACKING (DatasetType): - EVENT (DatasetType): - CODE (DatasetType): - """ - - TRACKING = "TRACKING" - EVENT = "EVENT" - CODE = "CODE" - - def __repr__(self): - return self.value - - T = TypeVar("T", bound="DataRecord") diff --git a/kloppy/domain/services/__init__.py b/kloppy/domain/services/__init__.py index 3e3884d0..404a7f8c 100644 --- a/kloppy/domain/services/__init__.py +++ b/kloppy/domain/services/__init__.py @@ -2,7 +2,7 @@ from kloppy.domain import AttackingDirection, Frame, Ground -from .transformers import DatasetTransformer +from .transformers import DatasetTransformer, DatasetTransformerBuilder from .event_factory import EventFactory, create_event # NOT YET: from .enrichers import TrackingPossessionEnricher diff --git a/kloppy/domain/services/transformers/__init__.py b/kloppy/domain/services/transformers/__init__.py index 41ba032f..92dac6b3 100644 --- a/kloppy/domain/services/transformers/__init__.py +++ b/kloppy/domain/services/transformers/__init__.py @@ -1 +1 @@ -from .dataset import DatasetTransformer +from .dataset import DatasetTransformer, DatasetTransformerBuilder diff --git a/kloppy/domain/services/transformers/dataset.py b/kloppy/domain/services/transformers/dataset.py index c8407655..96126065 100644 --- a/kloppy/domain/services/transformers/dataset.py +++ b/kloppy/domain/services/transformers/dataset.py @@ -1,4 +1,5 @@ from dataclasses import fields, replace + from kloppy.domain.models.tracking import PlayerData from typing import Union, Optional @@ -15,6 +16,9 @@ Team, TrackingDataset, CoordinateSystem, + Provider, + build_coordinate_system, + DatasetType, ) from kloppy.domain.models.event import Event from kloppy.exceptions import KloppyError @@ -431,3 +435,53 @@ def transform_dataset( ) else: raise KloppyError("Unknown Dataset type") + + +class DatasetTransformerBuilder: + def __init__( + self, to_coordinate_system: Optional[Union[str, Provider]] = None + ): + from kloppy.config import get_config + + if not to_coordinate_system: + to_coordinate_system = get_config("coordinate_system") + + if not to_coordinate_system: + to_coordinate_system = Provider.KLOPPY + + to_dataset_type = None + if isinstance(to_coordinate_system, str): + if ":" in to_coordinate_system: + provider_name, dataset_type_name = to_coordinate_system.split( + ":" + ) + to_coordinate_system = Provider[provider_name.upper()] + to_dataset_type = DatasetType[dataset_type_name.upper()] + else: + to_coordinate_system = Provider[to_coordinate_system.upper()] + + self.to_coordinate_system = to_coordinate_system + self.to_dataset_type = to_dataset_type + + def build( + self, + length: float, + width: float, + provider: Provider, + dataset_type: DatasetType, + ): + from_coordinate_system = build_coordinate_system( + provider, length=length, width=width, dataset_type=dataset_type + ) + + to_coordinate_system = build_coordinate_system( + self.to_coordinate_system, + length=length, + width=width, + dataset_type=self.to_dataset_type or dataset_type, + ) + + return DatasetTransformer( + from_coordinate_system=from_coordinate_system, + to_coordinate_system=to_coordinate_system, + ) diff --git a/kloppy/infra/serializers/event/deserializer.py b/kloppy/infra/serializers/event/deserializer.py index cec06b42..ce80705b 100644 --- a/kloppy/infra/serializers/event/deserializer.py +++ b/kloppy/infra/serializers/event/deserializer.py @@ -1,15 +1,15 @@ from abc import ABC, abstractmethod from typing import Optional, List, Generic, TypeVar, Union -from kloppy.config import get_config from kloppy.domain import ( EventDataset, Event, EventType, DatasetTransformer, Provider, - build_coordinate_system, EventFactory, + DatasetType, + DatasetTransformerBuilder, ) T = TypeVar("T") @@ -32,13 +32,7 @@ def __init__( for event_type in event_types ] - if not coordinate_system: - coordinate_system = get_config("coordinate_system") - - if isinstance(coordinate_system, str): - coordinate_system = Provider[coordinate_system.upper()] - - self.coordinate_system = coordinate_system + self.transformer_builder = DatasetTransformerBuilder(coordinate_system) if not event_factory: event_factory = EventFactory() @@ -50,23 +44,13 @@ def should_include_event(self, event: Event) -> bool: return event.event_type in self.event_types def get_transformer( - self, length: float, width: float + self, length: float, width: float, provider: Optional[Provider] = None ) -> DatasetTransformer: - from_coordinate_system = build_coordinate_system( - self.provider, - length=length, - width=width, - ) - - to_coordinate_system = build_coordinate_system( - self.coordinate_system, + return self.transformer_builder.build( length=length, width=width, - ) - - return DatasetTransformer( - from_coordinate_system=from_coordinate_system, - to_coordinate_system=to_coordinate_system, + provider=provider or self.provider, + dataset_type=DatasetType.EVENT, ) @property diff --git a/kloppy/infra/serializers/tracking/deserializer.py b/kloppy/infra/serializers/tracking/deserializer.py index cc370cba..0635566c 100644 --- a/kloppy/infra/serializers/tracking/deserializer.py +++ b/kloppy/infra/serializers/tracking/deserializer.py @@ -4,8 +4,9 @@ from kloppy.domain import ( Provider, TrackingDataset, - build_coordinate_system, DatasetTransformer, + DatasetTransformerBuilder, + DatasetType, ) T = TypeVar("T") @@ -26,32 +27,16 @@ def __init__( sample_rate = 1.0 self.sample_rate = sample_rate - if not coordinate_system: - coordinate_system = Provider.KLOPPY - - if isinstance(coordinate_system, str): - coordinate_system = Provider[coordinate_system.upper()] - - self.coordinate_system = coordinate_system + self.transformer_builder = DatasetTransformerBuilder(coordinate_system) def get_transformer( self, length: float, width: float, provider: Optional[Provider] = None ) -> DatasetTransformer: - from_coordinate_system = build_coordinate_system( - provider or self.provider, - length=length, - width=width, - ) - - to_coordinate_system = build_coordinate_system( - self.coordinate_system, + return self.transformer_builder.build( length=length, width=width, - ) - - return DatasetTransformer( - from_coordinate_system=from_coordinate_system, - to_coordinate_system=to_coordinate_system, + provider=provider or self.provider, + dataset_type=DatasetType.TRACKING, ) @property From cad6474ccea4528f8b3f492bb809857503ded8b6 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Thu, 27 Jul 2023 15:55:47 +0200 Subject: [PATCH 13/15] Make sure frames are always sorted on timestamp, instead of occurance in input data --- .../tracking/sportec/deserializer.py | 2 +- kloppy/tests/files/sportec_positional.xml | 58 +++++++++---------- kloppy/tests/test_sportec.py | 12 ++++ 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index c8126382..cfdee0c6 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -143,7 +143,7 @@ def _iter(): # Ball FrameSet is always first and contains ALL frame ids. This # makes sure even with substitutes the data is on order. for i, (frame_id, frame_data) in enumerate( - raw_frames.items() + sorted(raw_frames.items()) ): if "ball" not in frame_data: # Frames without ball data are corrupt. diff --git a/kloppy/tests/files/sportec_positional.xml b/kloppy/tests/files/sportec_positional.xml index 548cbad2..8e85056d 100644 --- a/kloppy/tests/files/sportec_positional.xml +++ b/kloppy/tests/files/sportec_positional.xml @@ -270,6 +270,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -479,34 +508,5 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/kloppy/tests/test_sportec.py b/kloppy/tests/test_sportec.py index d1127906..95951cf7 100644 --- a/kloppy/tests/test_sportec.py +++ b/kloppy/tests/test_sportec.py @@ -116,6 +116,7 @@ def test_load_frames(self, raw_data: Path, meta_data: Path): ) home_team, away_team = dataset.metadata.teams + assert dataset.frames[0].timestamp == 0.0 assert dataset.frames[0].ball_owning_team == away_team assert dataset.frames[0].ball_state == BallState.DEAD assert dataset.frames[0].ball_coordinates == Point3D( @@ -144,6 +145,17 @@ def test_load_frames(self, raw_data: Path, meta_data: Path): assert len(dataset.frames[35].players_data) == 3 assert len(dataset) == 202 + second_period = dataset.metadata.periods[1] + for frame in dataset: + if frame.period == second_period: + assert ( + frame.timestamp == 0 + ), "First frame must start at timestamp 0.0" + break + else: + # No data found in second half + assert False + def test_load_only_alive_frames(self, raw_data: Path, meta_data: Path): dataset = sportec.load_tracking( raw_data=raw_data, From d8eeb5ee9b256ded7e5cd68cd6aecc649cb71dd0 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sat, 29 Jul 2023 14:48:34 +0200 Subject: [PATCH 14/15] Use 'constants' for Sportec starting frame ids for each half + code formatting improvement --- .../domain/services/transformers/dataset.py | 6 ++++- .../serializers/event/sportec/deserializer.py | 27 +++++++++++++------ .../tracking/sportec/deserializer.py | 2 +- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/kloppy/domain/services/transformers/dataset.py b/kloppy/domain/services/transformers/dataset.py index 96126065..3ddb1d46 100644 --- a/kloppy/domain/services/transformers/dataset.py +++ b/kloppy/domain/services/transformers/dataset.py @@ -471,7 +471,11 @@ def build( dataset_type: DatasetType, ): from_coordinate_system = build_coordinate_system( - provider, length=length, width=width, dataset_type=dataset_type + # This comment forces black to keep the arguments as multi-line + provider, + length=length, + width=width, + dataset_type=dataset_type, ) to_coordinate_system = build_coordinate_system( diff --git a/kloppy/infra/serializers/event/sportec/deserializer.py b/kloppy/infra/serializers/event/sportec/deserializer.py index 631d33d8..87cabab9 100644 --- a/kloppy/infra/serializers/event/sportec/deserializer.py +++ b/kloppy/infra/serializers/event/sportec/deserializer.py @@ -68,6 +68,12 @@ def _team_from_xml_elm(team_elm) -> Team: SPORTEC_FPS = 25 +"""Sportec uses fixed starting frame ids for each half""" +SPORTEC_FIRST_HALF_STARTING_FRAME_ID = 10_000 +SPORTEC_SECOND_HALF_STARTING_FRAME_ID = 100_000 +SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID = 200_000 +SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID = 250_000 + class SportecMetadata(NamedTuple): score: Score @@ -123,14 +129,15 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: periods = [ Period( id=1, - start_timestamp=10_000 / SPORTEC_FPS, - end_timestamp=10_000 / SPORTEC_FPS + start_timestamp=SPORTEC_FIRST_HALF_STARTING_FRAME_ID / SPORTEC_FPS, + end_timestamp=SPORTEC_FIRST_HALF_STARTING_FRAME_ID / SPORTEC_FPS + float(other_game_information["TotalTimeFirstHalf"]) / 1000, ), Period( id=2, - start_timestamp=100_000 / SPORTEC_FPS, - end_timestamp=100_000 / SPORTEC_FPS + start_timestamp=SPORTEC_SECOND_HALF_STARTING_FRAME_ID + / SPORTEC_FPS, + end_timestamp=SPORTEC_SECOND_HALF_STARTING_FRAME_ID / SPORTEC_FPS + float(other_game_information["TotalTimeSecondHalf"]) / 1000, ), ] @@ -141,15 +148,19 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata: [ Period( id=3, - start_timestamp=200_000 / SPORTEC_FPS, - end_timestamp=200_000 / SPORTEC_FPS + start_timestamp=SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS, + end_timestamp=SPORTEC_FIRST_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS + float(other_game_information["TotalTimeFirstHalfExtra"]) / 1000, ), Period( id=4, - start_timestamp=250_000 / SPORTEC_FPS, - end_timestamp=250_000 / SPORTEC_FPS + start_timestamp=SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS, + end_timestamp=SPORTEC_SECOND_EXTRA_HALF_STARTING_FRAME_ID + / SPORTEC_FPS + float(other_game_information["TotalTimeSecondHalfExtra"]) / 1000, ), diff --git a/kloppy/infra/serializers/tracking/sportec/deserializer.py b/kloppy/infra/serializers/tracking/sportec/deserializer.py index cfdee0c6..45b05b1f 100644 --- a/kloppy/infra/serializers/tracking/sportec/deserializer.py +++ b/kloppy/infra/serializers/tracking/sportec/deserializer.py @@ -1,6 +1,6 @@ import logging from collections import defaultdict -from typing import Tuple, NamedTuple, Optional, Union, IO +from typing import NamedTuple, Optional, Union, IO from lxml import objectify From edfb9350c570fffcfc3ce4ebb9f66effdea963a3 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Sun, 30 Jul 2023 21:54:18 +0200 Subject: [PATCH 15/15] Update sportec docs --- docs/getting-started/sportec.ipynb | 284 +++++++++++++++++++++++++++-- 1 file changed, 268 insertions(+), 16 deletions(-) diff --git a/docs/getting-started/sportec.ipynb b/docs/getting-started/sportec.ipynb index 399f8bb4..51227d12 100644 --- a/docs/getting-started/sportec.ipynb +++ b/docs/getting-started/sportec.ipynb @@ -7,13 +7,13 @@ "source": [ "# Sportec\n", "\n", - "## Load local files" + "## Load local event files" ] }, { "cell_type": "code", "execution_count": 1, - "id": "e9adb7cb", + "id": "4f6455fb", "metadata": {}, "outputs": [ { @@ -77,7 +77,7 @@ " 38.71\n", " DFL-OBJ-0000ZS\n", " KICK_OFF\n", - " NaN\n", + " None\n", " \n", " \n", " 1\n", @@ -97,8 +97,8 @@ " NaN\n", " NaN\n", " DFL-OBJ-002G3I\n", - " NaN\n", - " NaN\n", + " None\n", + " None\n", " \n", " \n", " 2\n", @@ -119,7 +119,7 @@ " 28.58\n", " DFL-OBJ-0027B9\n", " THROW_IN\n", - " NaN\n", + " None\n", " \n", " \n", " 3\n", @@ -138,8 +138,8 @@ " 28.58\n", " NaN\n", " NaN\n", - " NaN\n", - " NaN\n", + " None\n", + " None\n", " RIGHT_FOOT\n", " \n", " \n", @@ -160,8 +160,8 @@ " NaN\n", " NaN\n", " None\n", - " NaN\n", - " NaN\n", + " None\n", + " None\n", " \n", " \n", "\n", @@ -190,11 +190,11 @@ "4 8.72 4.21 NaN NaN \n", "\n", " receiver_player_id set_piece_type body_part_type \n", - "0 DFL-OBJ-0000ZS KICK_OFF NaN \n", - "1 DFL-OBJ-002G3I NaN NaN \n", - "2 DFL-OBJ-0027B9 THROW_IN NaN \n", - "3 NaN NaN RIGHT_FOOT \n", - "4 None NaN NaN " + "0 DFL-OBJ-0000ZS KICK_OFF None \n", + "1 DFL-OBJ-002G3I None None \n", + "2 DFL-OBJ-0027B9 THROW_IN None \n", + "3 None None RIGHT_FOOT \n", + "4 None None None " ] }, "execution_count": 1, @@ -205,7 +205,7 @@ "source": [ "from kloppy import sportec\n", "\n", - "dataset = sportec.load(\n", + "dataset = sportec.load_event(\n", " event_data=\"../../kloppy/tests/files/sportec_events.xml\",\n", " meta_data=\"../../kloppy/tests/files/sportec_meta.xml\",\n", " \n", @@ -216,6 +216,258 @@ "\n", "dataset.to_df().head()" ] + }, + { + "cell_type": "markdown", + "id": "81989fc6", + "metadata": {}, + "source": [ + "# Load local tracking files" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "958f17ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
period_idtimestampframe_idball_stateball_owning_team_idball_xball_yball_zball_speedDFL-OBJ-002G3I_x...DFL-OBJ-002G3I_dDFL-OBJ-002G3I_sDFL-OBJ-002G5S_xDFL-OBJ-002G5S_yDFL-OBJ-002G5S_dDFL-OBJ-002G5S_sDFL-OBJ-002FVJ_xDFL-OBJ-002FVJ_yDFL-OBJ-002FVJ_dDFL-OBJ-002FVJ_s
010.0010000deadDFL-CLU-0000042.690.260.060.000.35...None0.00NaNNaNNoneNaNNaNNaNNoneNaN
110.0410001aliveDFL-CLU-00000A3.410.260.0865.590.34...None1.74NaNNaNNoneNaNNaNNaNNoneNaN
210.0810002aliveDFL-CLU-0000044.220.330.0965.160.32...None1.76NaNNaNNoneNaNNaNNaNNoneNaN
310.1210003aliveDFL-CLU-0000045.020.380.0974.340.31...None1.78NaNNaNNoneNaNNaNNaNNoneNaN
410.1610004aliveDFL-CLU-0000045.790.440.0873.580.29...None1.80NaNNaNNoneNaNNaNNaNNoneNaN
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " period_id timestamp frame_id ball_state ball_owning_team_id ball_x \\\n", + "0 1 0.00 10000 dead DFL-CLU-000004 2.69 \n", + "1 1 0.04 10001 alive DFL-CLU-00000A 3.41 \n", + "2 1 0.08 10002 alive DFL-CLU-000004 4.22 \n", + "3 1 0.12 10003 alive DFL-CLU-000004 5.02 \n", + "4 1 0.16 10004 alive DFL-CLU-000004 5.79 \n", + "\n", + " ball_y ball_z ball_speed DFL-OBJ-002G3I_x ... DFL-OBJ-002G3I_d \\\n", + "0 0.26 0.06 0.00 0.35 ... None \n", + "1 0.26 0.08 65.59 0.34 ... None \n", + "2 0.33 0.09 65.16 0.32 ... None \n", + "3 0.38 0.09 74.34 0.31 ... None \n", + "4 0.44 0.08 73.58 0.29 ... None \n", + "\n", + " DFL-OBJ-002G3I_s DFL-OBJ-002G5S_x DFL-OBJ-002G5S_y DFL-OBJ-002G5S_d \\\n", + "0 0.00 NaN NaN None \n", + "1 1.74 NaN NaN None \n", + "2 1.76 NaN NaN None \n", + "3 1.78 NaN NaN None \n", + "4 1.80 NaN NaN None \n", + "\n", + " DFL-OBJ-002G5S_s DFL-OBJ-002FVJ_x DFL-OBJ-002FVJ_y DFL-OBJ-002FVJ_d \\\n", + "0 NaN NaN NaN None \n", + "1 NaN NaN NaN None \n", + "2 NaN NaN NaN None \n", + "3 NaN NaN NaN None \n", + "4 NaN NaN NaN None \n", + "\n", + " DFL-OBJ-002FVJ_s \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from kloppy import sportec\n", + "\n", + "\n", + "dataset = sportec.load_tracking(\n", + " raw_data=\"../../kloppy/tests/files/sportec_positional.xml\",\n", + " meta_data=\"../../kloppy/tests/files/sportec_meta.xml\",\n", + " \n", + " # Optional arguments\n", + " coordinates=\"sportec\",\n", + " only_alive=False\n", + ")\n", + "\n", + "dataset.to_df().head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "366f1126", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {