diff --git a/data/raw/manual_motion_linking.json b/data/raw/manual_motion_linking.json index be3545a..961af12 100644 --- a/data/raw/manual_motion_linking.json +++ b/data/raw/manual_motion_linking.json @@ -98,5 +98,17 @@ { "decision_gid": "uk.org.publicwhip/debate/2023-04-19d.336.0", "motion_gid": "uk.org.publicwhip/debate/2023-04-19d.323.2.7" + }, + { + "decision_gid": "uk.org.publicwhip/debate/2024-10-09c.415.0", + "motion": { + "date": "2024-10-09", + "gid": "uk.org.publicwhip/debate/2024-10-09c.332.0.2", + "speech_id": "uk.org.publicwhip/debate/2024-10-09c.332.0", + "motion_lines": [ + "[Reasoned amendment - opposing a second reading]" + ], + "motion_title": "Renters' Rights Bill: Reasoned Amendment to Second Reading" + } } ] \ No newline at end of file diff --git a/src/parl_motion_detector/mapper.py b/src/parl_motion_detector/mapper.py index ee4aa75..5a6fc52 100644 --- a/src/parl_motion_detector/mapper.py +++ b/src/parl_motion_detector/mapper.py @@ -11,7 +11,7 @@ import rich from mysoc_validator import Transcript from mysoc_validator.models.transcripts import Chamber -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, TypeAdapter from parl_motion_detector.detector import PhraseDetector @@ -28,10 +28,31 @@ DEBUG: bool = False +class ManualLink(BaseModel): + decision_gid: str + motion_gid: str + + +class ManualText(BaseModel): + decision_gid: str + motion: Motion + + +ManualInfo = ManualLink | ManualText + + @lru_cache def get_manual_connections(data_dir: Path) -> dict[str, str]: - data = json.loads(Path(data_dir, "raw", "manual_motion_linking.json").read_text()) - return {x["motion_gid"]: x["decision_gid"] for x in data} + data = Path(data_dir, "raw", "manual_motion_linking.json").read_text() + items = TypeAdapter(list[ManualLink | ManualText]).validate_json(data) + return {x.motion_gid: x.decision_gid for x in items if isinstance(x, ManualLink)} + + +@lru_cache +def get_manual_text(data_dir: Path) -> dict[str, Motion]: + data = Path(data_dir, "raw", "manual_motion_linking.json").read_text() + items = TypeAdapter(list[ManualLink | ManualText]).validate_json(data) + return {x.decision_gid: x.motion for x in items if isinstance(x, ManualText)} amendment_be_made = PhraseDetector(criteria=["That the amendment be made."]) @@ -687,6 +708,13 @@ def assign_manual(self): elif len(mdecision) == 0: raise ValueError(f"Manual lookup failed to find {mdecision_gid}") + # when motions are just missing, sometimes we specify the whole thing by hand + manual_motions = get_manual_text(self.data_dir) + for decision in decisions: + if decision.gid in manual_motions: + motion = manual_motions[decision.gid] + self.assign_motion_decision(motion, decision, "manual text") + def assigned_gids(self): division_gids = [x.gid for x in self.division_assignments] agreement_gids = [x.gid for x in self.agreement_assignments] diff --git a/src/parl_motion_detector/motions.py b/src/parl_motion_detector/motions.py index 3a2bffe..80bfabe 100644 --- a/src/parl_motion_detector/motions.py +++ b/src/parl_motion_detector/motions.py @@ -62,11 +62,11 @@ class Flag(StrEnum): class Motion(BaseModel): date: str motion_title: str = "" - major_heading_id: str - minor_heading_id: str + major_heading_id: str = "" + minor_heading_id: str = "" major_heading_title: str = "" minor_heading_title: str = "" - speech_start_pid: str + speech_start_pid: str = "" speech_id: str final_speech_id: str = "" end_reason: str = ""