From e52d3419dd21c03ef2fe8f850a65bc1dddf364c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Mon, 3 Jun 2024 09:34:19 +0000 Subject: [PATCH 1/7] add data to dvc --- data/datasets/en/.gitignore | 2 ++ data/datasets/en/en_judgements_dataset.dvc | 6 ++++++ data/datasets/en/england_wales_data_refined_7.jsonl.dvc | 5 +++++ 3 files changed, 13 insertions(+) create mode 100644 data/datasets/en/.gitignore create mode 100644 data/datasets/en/en_judgements_dataset.dvc create mode 100644 data/datasets/en/england_wales_data_refined_7.jsonl.dvc diff --git a/data/datasets/en/.gitignore b/data/datasets/en/.gitignore new file mode 100644 index 0000000..639bb05 --- /dev/null +++ b/data/datasets/en/.gitignore @@ -0,0 +1,2 @@ +/england_wales_data_refined_7.jsonl +/en_judgements_dataset diff --git a/data/datasets/en/en_judgements_dataset.dvc b/data/datasets/en/en_judgements_dataset.dvc new file mode 100644 index 0000000..3cd2b3a --- /dev/null +++ b/data/datasets/en/en_judgements_dataset.dvc @@ -0,0 +1,6 @@ +outs: +- md5: ccf9486ab2d4b38836e50d06d5a080d9.dir + size: 168746581 + nfiles: 3 + hash: md5 + path: en_judgements_dataset diff --git a/data/datasets/en/england_wales_data_refined_7.jsonl.dvc b/data/datasets/en/england_wales_data_refined_7.jsonl.dvc new file mode 100644 index 0000000..a6b2666 --- /dev/null +++ b/data/datasets/en/england_wales_data_refined_7.jsonl.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 0634fad494a0ec6837834d1b8fd28f5e + size: 172557440 + hash: md5 + path: england_wales_data_refined_7.jsonl From 187284381a07bbb32368b9cd2e35419b3ef39e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Mon, 3 Jun 2024 09:36:13 +0000 Subject: [PATCH 2/7] fix lint --- .../england_wales/00_download_judgements.py | 35 +-- .../england_wales/01_extract_jsonl_refined.py | 225 ++++++++++++------ 2 files changed, 170 insertions(+), 90 deletions(-) diff --git a/scripts/england_wales/00_download_judgements.py b/scripts/england_wales/00_download_judgements.py index d33bb85..8413938 100644 --- a/scripts/england_wales/00_download_judgements.py +++ b/scripts/england_wales/00_download_judgements.py @@ -1,16 +1,17 @@ -import requests -from bs4 import BeautifulSoup -import pandas as pd -from multiprocessing import Pool import os import time +from multiprocessing import Pool + +import pandas as pd +import requests +from bs4 import BeautifulSoup from tqdm import tqdm # Define the base URL base_url = "https://caselaw.nationalarchives.gov.uk/judgments/advanced_search?query=&court=ewca%2Fcrim&order=date&per_page=50&page=" num_pages = 124 output_folder = "dump" -csv_file = 'judgments.csv' +csv_file = "judgments.csv" # Ensure the output directory exists os.makedirs(output_folder, exist_ok=True) @@ -20,15 +21,15 @@ def scrape_page(page_number): url = base_url + str(page_number) response = requests.get(url) - soup = BeautifulSoup(response.text, 'html.parser') + soup = BeautifulSoup(response.text, "html.parser") results = [] - for li in soup.select('ul.judgment-listing__list > li'): - title_tag = li.find('a') - date_tag = li.find('time') + for li in soup.select("ul.judgment-listing__list > li"): + title_tag = li.find("a") + date_tag = li.find("time") if title_tag and date_tag: - href = title_tag['href'] + href = title_tag["href"] title = title_tag.text.strip() date = date_tag.text.strip() link = "https://caselaw.nationalarchives.gov.uk" + href @@ -40,13 +41,13 @@ def scrape_page(page_number): # Download XML files def download_xml(data): title, link, date, sno = data - date_formatted = pd.to_datetime(date).strftime('%Y_%m_%d') + date_formatted = pd.to_datetime(date).strftime("%Y_%m_%d") xml_url = link + "/data.xml" file_name = f"{date_formatted}-{sno}.xml" file_path = os.path.join(output_folder, file_name) response = requests.get(xml_url) - with open(file_path, 'wb') as file: + with open(file_path, "wb") as file: file.write(response.content) time.sleep(1) # Pause to avoid blocking IP address @@ -54,7 +55,7 @@ def download_xml(data): # Initialize CSV file if not os.path.exists(csv_file): - pd.DataFrame(columns=['Title', 'Link', 'Date', 'SNo']).to_csv(csv_file, index=False) + pd.DataFrame(columns=["Title", "Link", "Date", "SNo"]).to_csv(csv_file, index=False) # Scrape all pages and process data incrementally sno = 1 @@ -62,12 +63,14 @@ def download_xml(data): results = scrape_page(page) # Add serial number to each result - results_with_sno = [(title, link, date, sno + i) for i, (title, link, date) in enumerate(results)] + results_with_sno = [ + (title, link, date, sno + i) for i, (title, link, date) in enumerate(results) + ] sno += len(results) # Save results to CSV incrementally - df = pd.DataFrame(results_with_sno, columns=['Title', 'Link', 'Date', 'SNo']) - df.to_csv(csv_file, mode='a', header=False, index=False) + df = pd.DataFrame(results_with_sno, columns=["Title", "Link", "Date", "SNo"]) + df.to_csv(csv_file, mode="a", header=False, index=False) # Download XML files with Pool() as pool: diff --git a/scripts/england_wales/01_extract_jsonl_refined.py b/scripts/england_wales/01_extract_jsonl_refined.py index a1389ce..7604b17 100644 --- a/scripts/england_wales/01_extract_jsonl_refined.py +++ b/scripts/england_wales/01_extract_jsonl_refined.py @@ -1,16 +1,24 @@ -import os import json -from bs4 import BeautifulSoup -from tqdm import tqdm +import os import re from multiprocessing import Pool +from bs4 import BeautifulSoup +from tqdm import tqdm + + def extract_appeal_type(text): patterns = [ - (r'appeal\s+against\s+\S+\s+sentence\s+or\s+\S+\s+conviction', 'conviction_sentence'), - (r'appeal\s+against\s+\S+\s+conviction\s+or\s+\S+\s+sentence', 'conviction_sentence'), - (r'appeal\s+against\s+\S+\s+conviction', 'conviction'), - (r'appeal\s+against\s+\S+\s+sentence', 'sentence') + ( + r"appeal\s+against\s+\S+\s+sentence\s+or\s+\S+\s+conviction", + "conviction_sentence", + ), + ( + r"appeal\s+against\s+\S+\s+conviction\s+or\s+\S+\s+sentence", + "conviction_sentence", + ), + (r"appeal\s+against\s+\S+\s+conviction", "conviction"), + (r"appeal\s+against\s+\S+\s+sentence", "sentence"), ] for pattern, appeal_type in patterns: @@ -21,10 +29,10 @@ def extract_appeal_type(text): def extract_appeal_outcome(text): outcome_patterns = { - 'granted': r'appeal\s+is\s+granted', - 'dismissed': r'appeal\s+is\s+dismissed', - 'refused': r'appeal\s+is\s+refused', - 'allowed': r'appeal\s+is\s+allowed' + "granted": r"appeal\s+is\s+granted", + "dismissed": r"appeal\s+is\s+dismissed", + "refused": r"appeal\s+is\s+refused", + "allowed": r"appeal\s+is\s+allowed", } for outcome, pattern in outcome_patterns.items(): @@ -32,137 +40,196 @@ def extract_appeal_outcome(text): return outcome return None + def extract_and_clean_judges(paragraphs): judges = [] for para in paragraphs: text = para.get_text(strip=True) - if re.search(r'\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b', text, re.IGNORECASE): + if re.search( + r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", text, re.IGNORECASE + ): # Remove text within parentheses - cleaned_text = re.sub(r'\([^)]*\)', '', text).strip() + cleaned_text = re.sub(r"\([^)]*\)", "", text).strip() # Remove dashes and any text following them - cleaned_text = re.sub(r'-.*', '', cleaned_text).strip() + cleaned_text = re.sub(r"-.*", "", cleaned_text).strip() # Check for specific keywords and ensure it's not empty or unwanted text - if cleaned_text and 'Royal Courts of Justice' not in cleaned_text and cleaned_text != "THE LORD CHIEF JUSTICE OF ENGLAND AND WALES": + if ( + cleaned_text + and "Royal Courts of Justice" not in cleaned_text + and cleaned_text != "THE LORD CHIEF JUSTICE OF ENGLAND AND WALES" + ): judges.append(cleaned_text) return judges + def categorize_court(court_name): - if 'SUPREME_COURT' in court_name: - return 'supreme_court' + if "SUPREME_COURT" in court_name: + return "supreme_court" elif "HIGH_COURT" in court_name and "ADMINISTRATIVE_COURT" in court_name: - return 'high_court_administrative_court' - elif 'HIGH_COURT' in court_name and 'DIVISIONAL_COURT' in court_name: - return 'high_court_division_court' - elif 'HIGH_COURT' in court_name: - return 'high_court' - elif 'CIVIL_AND_CRIMINAL' in court_name: - return 'civil_criminal_court' - elif 'MARTIAL' in court_name: - return 'martial_court' - elif 'DIVISIONAL_COURT' in court_name: - return 'division_court' + return "high_court_administrative_court" + elif "HIGH_COURT" in court_name and "DIVISIONAL_COURT" in court_name: + return "high_court_division_court" + elif "HIGH_COURT" in court_name: + return "high_court" + elif "CIVIL_AND_CRIMINAL" in court_name: + return "civil_criminal_court" + elif "MARTIAL" in court_name: + return "martial_court" + elif "DIVISIONAL_COURT" in court_name: + return "division_court" else: - return 'crown_court' + return "crown_court" + def extract_information_from_xml(xml_content, file_name): - soup = BeautifulSoup(xml_content, 'xml') # Using 'xml' parser for handling namespaces + soup = BeautifulSoup( + xml_content, "xml" + ) # Using 'xml' parser for handling namespaces # Extract required fields - _id = soup.find('uk:hash').text if soup.find('uk:hash') else None - citation = soup.find('uk:cite').text if soup.find('uk:cite') else None - signature = citation.split('] ')[1] if citation else None # Removing the year part + _id = soup.find("uk:hash").text if soup.find("uk:hash") else None + citation = soup.find("uk:cite").text if soup.find("uk:cite") else None + signature = citation.split("] ")[1] if citation else None # Removing the year part if signature: - signature = signature.replace(' ', '_') - hearing_date = soup.find('hearingdate').text if soup.find('hearingdate') else None + signature = signature.replace(" ", "_") + hearing_date = soup.find("hearingdate").text if soup.find("hearingdate") else None date = hearing_date.strip() if hearing_date else None - publication_date = soup.find('FRBRdate', {'name': 'judgment'})['date'] if soup.find('FRBRdate', - {'name': 'judgment'}) else None + publication_date = ( + soup.find("FRBRdate", {"name": "judgment"})["date"] + if soup.find("FRBRdate", {"name": "judgment"}) + else None + ) - court_type_tags = soup.find_all('courtType') + court_type_tags = soup.find_all("courtType") # Use a set to collect unique court types unique_court_types = set( - re.sub(r'\([^)]*\)', '', tag.get_text(strip=True)).replace(' ', '_') for tag in court_type_tags) + re.sub(r"\([^)]*\)", "", tag.get_text(strip=True)).replace(" ", "_") + for tag in court_type_tags + ) # Join the unique court types court_type_ = "_".join(unique_court_types) - court_type_ = re.sub(r'_+', '_', court_type_).strip('_') + court_type_ = re.sub(r"_+", "_", court_type_).strip("_") # Categorize the combined court types court_type = categorize_court(court_type_) # Get the excerpt - header_text = soup.header.get_text(separator=' ', strip=True) if soup.header else "" + header_text = soup.header.get_text(separator=" ", strip=True) if soup.header else "" excerpt = header_text[:500] # Get the full content of the header and judgment body as text - header_content = soup.header.get_text(separator='\n', strip=True) if soup.header else "" - judgment_body_content = soup.find('judgmentBody').get_text(separator='\n', strip=True) if soup.find( - 'judgmentBody') else "" + header_content = ( + soup.header.get_text(separator="\n", strip=True) if soup.header else "" + ) + judgment_body_content = ( + soup.find("judgmentBody").get_text(separator="\n", strip=True) + if soup.find("judgmentBody") + else "" + ) content = header_content + "\n" + judgment_body_content # Get the judges list # Get the judges list from TLCPerson elements - judges = [judge['showAs'] for judge in soup.find_all('TLCPerson') if 'showAs' in judge.attrs and re.search(r'\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b', judge['showAs'], re.IGNORECASE)] + judges = [ + judge["showAs"] + for judge in soup.find_all("TLCPerson") + if "showAs" in judge.attrs + and re.search( + r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", + judge["showAs"], + re.IGNORECASE, + ) + ] # Filter judges using regex criteria - judges = [judge for judge in judges if - re.search(r'\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b', judge, re.IGNORECASE)] + judges = [ + judge + for judge in judges + if re.search( + r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", judge, re.IGNORECASE + ) + ] # If no judges found, get text from elements if not judges: - judges = [judge.get_text(strip=True) for judge in soup.find_all('judge')] + judges = [judge.get_text(strip=True) for judge in soup.find_all("judge")] # If no judges found, use regex to extract them from header content if not judges and soup.header: # Extract all

tags - paragraphs = soup.header.find_all('p') + paragraphs = soup.header.find_all("p") judges = extract_and_clean_judges(paragraphs) # If still no judges found, look for text in

tags with style="text-align:center" if not judges: - centered_paragraphs = soup.find_all('p', style=lambda x: x and 'text-align:center' in x) + centered_paragraphs = soup.find_all( + "p", style=lambda x: x and "text-align:center" in x + ) judges.extend(extract_and_clean_judges(centered_paragraphs)) # If still no judges found, look for text in

tags with style="text-align:right" if not judges: - right_aligned_paragraphs = soup.find_all('p', style=lambda x: x and 'text-align:right' in x) + right_aligned_paragraphs = soup.find_all( + "p", style=lambda x: x and "text-align:right" in x + ) judges.extend(extract_and_clean_judges(right_aligned_paragraphs)) # Filter judges using regex criteria - judges = [judge for judge in judges if - re.search(r'\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b', judge, re.IGNORECASE)] + judges = [ + judge + for judge in judges + if re.search( + r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", judge, re.IGNORECASE + ) + ] # Extract URIs - xml_uri = soup.find('FRBRManifestation').find('FRBRuri')['value'] if soup.find('FRBRManifestation') and soup.find('FRBRManifestation').find('FRBRuri') else None - uri = soup.find('FRBRWork').find('FRBRuri')['value'] if soup.find('FRBRWork') and soup.find('FRBRWork').find('FRBRuri') else None + xml_uri = ( + soup.find("FRBRManifestation").find("FRBRuri")["value"] + if soup.find("FRBRManifestation") + and soup.find("FRBRManifestation").find("FRBRuri") + else None + ) + uri = ( + soup.find("FRBRWork").find("FRBRuri")["value"] + if soup.find("FRBRWork") and soup.find("FRBRWork").find("FRBRuri") + else None + ) # Extract legislation texts - legislation_tags = soup.find_all('ref', {'uk:type': 'legislation'}) + legislation_tags = soup.find_all("ref", {"uk:type": "legislation"}) legislation_texts = set(tag.get_text() for tag in legislation_tags) - legislation_list = list(legislation_texts) # Convert set to list to remove duplicates + legislation_list = list( + legislation_texts + ) # Convert set to list to remove duplicates # Extract case references - case_tags = soup.find_all('ref', {'uk:type': 'case'}) + case_tags = soup.find_all("ref", {"uk:type": "case"}) case_references = set(tag.get_text() for tag in case_tags) - case_references_list = list(case_references) # Convert set to list to remove duplicates + case_references_list = list( + case_references + ) # Convert set to list to remove duplicates # Extract case numbers case_numbers = set() - docket_number_tags = soup.find_all('docketNumber') + docket_number_tags = soup.find_all("docketNumber") for tag in docket_number_tags: case_numbers.add(tag.get_text()) # Extract case numbers from

tags containing "Case No:" - cover_text_tags = soup.find_all('p', class_='CoverText') - case_no_pattern = re.compile(r'Case No:\s*(.*)') + cover_text_tags = soup.find_all("p", class_="CoverText") + case_no_pattern = re.compile(r"Case No:\s*(.*)") for tag in cover_text_tags: match = case_no_pattern.search(tag.get_text()) if match: - case_numbers.update([num.strip() for num in match.group(1).split(',')]) + case_numbers.update([num.strip() for num in match.group(1).split(",")]) # If no case numbers found, look for text in

tags with style="text-align:right" if not case_numbers: - right_aligned_paragraphs = soup.find_all('p', style=lambda x: x and 'text-align:right' in x) - case_no_pattern = re.compile(r'\b\d{4}/\d{4}/\w+\b|\d{6}') + right_aligned_paragraphs = soup.find_all( + "p", style=lambda x: x and "text-align:right" in x + ) + case_no_pattern = re.compile(r"\b\d{4}/\d{4}/\w+\b|\d{6}") for tag in right_aligned_paragraphs: matches = case_no_pattern.findall(tag.get_text()) case_numbers.update(matches) @@ -193,24 +260,34 @@ def null_if_empty(value): "appeal_type": null_if_empty(appeal_type), "appeal_outcome": null_if_empty(appeal_outcome), "xml_uri": null_if_empty(xml_uri), - "uri": null_if_empty(uri) + "uri": null_if_empty(uri), } + def process_file(file_path): - with open(file_path, 'r', encoding='utf-8') as xml_file: + with open(file_path, "r", encoding="utf-8") as xml_file: xml_content = xml_file.read() file_name = os.path.basename(file_path) return extract_information_from_xml(xml_content, file_name) + def process_directory(directory_path, output_file): - xml_files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.xml')] + xml_files = [ + os.path.join(directory_path, f) + for f in os.listdir(directory_path) + if f.endswith(".xml") + ] + + with Pool() as pool, open(output_file, "w") as jsonl_file: + for judgment_data in tqdm( + pool.imap(process_file, xml_files), + total=len(xml_files), + desc="Processing XML files", + ): + jsonl_file.write(json.dumps(judgment_data) + "\n") - with Pool() as pool, open(output_file, 'w') as jsonl_file: - for judgment_data in tqdm(pool.imap(process_file, xml_files), total=len(xml_files), - desc="Processing XML files"): - jsonl_file.write(json.dumps(judgment_data) + '\n') -directory_path = '/home/stirunag/work/github/ML4-legal-documents/judgements_xml/dump/' -output_file = '/home/stirunag/work/github/ML4-legal-documents/judgements_xml/england_wales_data_refined_7.jsonl' +directory_path = "/home/stirunag/work/github/ML4-legal-documents/judgements_xml/dump/" +output_file = "/home/stirunag/work/github/ML4-legal-documents/judgements_xml/england_wales_data_refined_7.jsonl" process_directory(directory_path, output_file) From cf7438897d7b8adcba7206890cfa3d79183a6e64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Mon, 3 Jun 2024 09:46:31 +0000 Subject: [PATCH 3/7] fix nbdev --- .../01_Analyze_En_Judgements_Texts.ipynb | 209 ++++-------------- .../england-wales/02_Analyse_En_Dataset.ipynb | 137 ++++++------ 2 files changed, 110 insertions(+), 236 deletions(-) diff --git a/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb b/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb index 55ff4ad..b40a886 100644 --- a/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb +++ b/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb @@ -10,35 +10,35 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "6b666da3-f393-4d88-8036-e818937d2305", "metadata": {}, "outputs": [], "source": [ - "import os\n", "import json\n", "import string\n", - "from datasets import Dataset, DatasetDict, load_dataset, load_from_disk\n", - "import pandas as pd\n", + "from datasets import Dataset, DatasetDict, load_from_disk\n", "import seaborn as sns\n", - "import matplotlib.pyplot as plt" + "import matplotlib.pyplot as plt\n", + "\n", + "from juddges.settings import DATA_PATH" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "27d73a94-5cd3-4820-938c-a827b8c34bd0", "metadata": {}, "outputs": [], "source": [ - "path_ = '/home/stirunag/work/github/ML4-legal-documents/judgements_xml/'\n", - "jsonl_file = path_+'england_wales_data_refined_7.jsonl'\n", - "dataset_path = path_+'en_judgements_dataset'" + "path_ = DATA_PATH / \"datasets\" / \"en\"\n", + "jsonl_file = path_ / \"england_wales_data_refined_7.jsonl\"\n", + "dataset_path = path_ / \"en_judgements_dataset\"" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "c2851986-f950-4a21-b3e1-7ce58f6fa4a4", "metadata": {}, "outputs": [ @@ -58,6 +58,7 @@ } ], "source": [ + "#| eval: false\n", "data = []\n", "with open(jsonl_file, 'r') as file:\n", " for line in file:\n", @@ -72,18 +73,19 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "c1f37c21-de73-48ee-8cc3-8f4f2d4ce735", "metadata": {}, "outputs": [], "source": [ + "#| eval: false\n", "# Load the dataset from disk\n", "ds = load_from_disk(dataset_path)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "c49a038b-3bd5-4124-89c2-a019c364fd22", "metadata": {}, "outputs": [ @@ -107,7 +109,7 @@ "0" ] }, - "execution_count": 7, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -133,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "4314d158-2396-4a1c-9538-456be68c3441", "metadata": {}, "outputs": [ @@ -238,7 +240,7 @@ "4 allowed 10352 1879 1793 " ] }, - "execution_count": 8, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -256,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "c6bb139f-4340-45b4-a277-43da9d31a8f7", "metadata": {}, "outputs": [ @@ -266,7 +268,7 @@ "[Text(0.5, 1.0, '#tokens distribution')]" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" }, @@ -293,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "e2d55b29-5e3a-4b80-827f-9a12e9ff4b97", "metadata": {}, "outputs": [ @@ -316,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "e1051c95-4339-4a5e-bb1a-559ea811c5ec", "metadata": {}, "outputs": [ @@ -339,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "c4a68318-5880-4d5f-9690-80235ed0bfe4", "metadata": {}, "outputs": [ @@ -362,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "ef0ec395-bd03-47bf-84b2-7adf338595f3", "metadata": {}, "outputs": [ @@ -372,7 +374,7 @@ "" ] }, - "execution_count": 26, + "execution_count": null, "metadata": {}, "output_type": "execute_result" }, @@ -398,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "06f8c2b2-8f87-4876-b58c-a164c3412c31", "metadata": {}, "outputs": [ @@ -408,7 +410,7 @@ "" ] }, - "execution_count": 29, + "execution_count": null, "metadata": {}, "output_type": "execute_result" }, @@ -434,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "1de5e68f-8ae4-4a67-bdd1-c84146d2475e", "metadata": {}, "outputs": [ @@ -444,7 +446,7 @@ "" ] }, - "execution_count": 30, + "execution_count": null, "metadata": {}, "output_type": "execute_result" }, @@ -478,7 +480,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "08c70fdc-0b03-4983-8da9-8d065161d3e7", "metadata": {}, "outputs": [ @@ -497,131 +499,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "0af8c3ba-aa89-4e1a-bfcb-65b618c4559e", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9f849bfbaab840c7883c4e321f589d87", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "tokenizer_config.json: 0%| | 0.00/418 [00:00 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (7729 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (4093 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (968 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2180 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (1937 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2857 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (8490 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (17735 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (22812 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3021 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2964 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (1604 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2726 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3342 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3668 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (4760 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (14217 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (1346 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (5781 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (12451 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (11813 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (6959 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2493 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3168 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (12022 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3316 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (6039 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (5440 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (14833 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3606 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (4197 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3538 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (4618 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3974 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (14842 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3610 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (6583 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2124 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (9074 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (11635 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (7935 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (4170 > 512). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3503 > 512). Running this sequence through the model will result in indexing errors\n" - ] - } - ], + "outputs": [], "source": [ "# | eval: false\n", "tokenizer = AutoTokenizer.from_pretrained(\"intfloat/multilingual-e5-large\")\n", @@ -634,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "6f822fae-f91c-4ee1-a114-97a021bf1e81", "metadata": {}, "outputs": [], @@ -647,18 +528,19 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "id": "3c059b5a-5c25-4381-aad7-d69ef0b90320", "metadata": {}, "outputs": [], "source": [ + "#| eval: false\n", "num_tokens = [item['num_tokens'] for item in tokenized]\n", "filtered_tokens = [token for token in num_tokens if token <= 40000]" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, "id": "cdac696f-056a-4b12-a48e-ac8f8dac9eeb", "metadata": {}, "outputs": [ @@ -668,7 +550,7 @@ "" ] }, - "execution_count": 49, + "execution_count": null, "metadata": {}, "output_type": "execute_result" }, @@ -690,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "id": "c890ee73", "metadata": {}, "outputs": [ @@ -706,6 +588,7 @@ } ], "source": [ + "# | eval: false\n", "# Plot the box plot\n", "plt.figure(figsize=(6, 6))\n", "sns.boxplot(filtered_tokens)\n", @@ -723,21 +606,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python (ml4legal)", + "display_name": "python3", "language": "python", - "name": "myenv" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" + "name": "python3" } }, "nbformat": 4, diff --git a/nbs/Data/england-wales/02_Analyse_En_Dataset.ipynb b/nbs/Data/england-wales/02_Analyse_En_Dataset.ipynb index 8e6748b..25da298 100644 --- a/nbs/Data/england-wales/02_Analyse_En_Dataset.ipynb +++ b/nbs/Data/england-wales/02_Analyse_En_Dataset.ipynb @@ -10,32 +10,34 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "initial_id", "metadata": {}, "outputs": [], "source": [ "# | eval: false\n", "import polars as pl\n", - "from datasets import Dataset, DatasetDict, load_dataset, load_from_disk" + "from datasets import load_from_disk\n", + "\n", + "from juddges.settings import DATA_PATH" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "c8a2c7d4858169a2", "metadata": {}, "outputs": [], "source": [ "# | eval: false\n", - "path_ = '/home/stirunag/work/github/ML4-legal-documents/judgements_xml/'\n", - "dataset_path = path_+'en_judgements_dataset'\n", + "path_ = DATA_PATH / \"datasets\" / \"en\"\n", + "dataset_path = path_ / \"en_judgements_dataset\"\n", "ds = load_from_disk(dataset_path)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "dd788638-6a7d-4f31-bfed-8845eb4cfbd0", "metadata": {}, "outputs": [ @@ -48,29 +50,31 @@ "})" ] }, - "execution_count": 10, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# | eval: false\n", "ds" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "167b28d4-1e8a-4bf3-a2f3-bea277fb448f", "metadata": {}, "outputs": [], "source": [ + "# | eval: false\n", "df = ds.to_pandas()\n", "pl_df = pl.DataFrame(df)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "9e3c70ac", "metadata": {}, "outputs": [ @@ -88,28 +92,34 @@ "│ ab0224364 ┆ [2008] ┆ EWCA_Crim ┆ null ┆ … ┆ null ┆ null ┆ https://c ┆ https:// │\n", "│ e4cf6562c ┆ EWCA Crim ┆ _2952 ┆ ┆ ┆ ┆ ┆ aselaw.na ┆ caselaw. │\n", "│ 82f8861d5 ┆ 2952 ┆ ┆ ┆ ┆ ┆ ┆ tionalarc ┆ national │\n", - "│ 268… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hiv… ┆ archiv… │\n", + "│ 268d4… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hives… ┆ archives │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ … │\n", "│ d4630d932 ┆ [2006] ┆ EWCA_Crim ┆ null ┆ … ┆ convictio ┆ null ┆ https://c ┆ https:// │\n", "│ 58ea51ecf ┆ EWCA Crim ┆ _3187 ┆ ┆ ┆ n ┆ ┆ aselaw.na ┆ caselaw. │\n", "│ f4bc40154 ┆ 3187 ┆ ┆ ┆ ┆ ┆ ┆ tionalarc ┆ national │\n", - "│ 43b… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hiv… ┆ archiv… │\n", + "│ 43b4e… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hives… ┆ archives │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ … │\n", "│ 37183a714 ┆ [2012] ┆ EWCA_Crim ┆ null ┆ … ┆ null ┆ null ┆ https://c ┆ https:// │\n", "│ b626cfe98 ┆ EWCA Crim ┆ _1840 ┆ ┆ ┆ ┆ ┆ aselaw.na ┆ caselaw. │\n", "│ 081ac0250 ┆ 1840 ┆ ┆ ┆ ┆ ┆ ┆ tionalarc ┆ national │\n", - "│ c80… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hiv… ┆ archiv… │\n", + "│ c804f… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hives… ┆ archives │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ … │\n", "│ b41933b19 ┆ [2014] ┆ EWCA_Crim ┆ null ┆ … ┆ null ┆ null ┆ https://c ┆ https:// │\n", "│ 505ab8767 ┆ EWCA Crim ┆ _1730 ┆ ┆ ┆ ┆ ┆ aselaw.na ┆ caselaw. │\n", "│ ce30faf8d ┆ 1730 ┆ ┆ ┆ ┆ ┆ ┆ tionalarc ┆ national │\n", - "│ b95… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hiv… ┆ archiv… │\n", + "│ b9524… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hives… ┆ archives │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ … │\n", "│ 418382a2a ┆ [2018] ┆ EWCA_Crim ┆ null ┆ … ┆ null ┆ allowed ┆ https://c ┆ https:// │\n", "│ 6c0c32d3d ┆ EWCA Crim ┆ _2189 ┆ ┆ ┆ ┆ ┆ aselaw.na ┆ caselaw. │\n", "│ 2bd4cb7b3 ┆ 2189 ┆ ┆ ┆ ┆ ┆ ┆ tionalarc ┆ national │\n", - "│ 9e1… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hiv… ┆ archiv… │\n", + "│ 9e1ba… ┆ ┆ ┆ ┆ ┆ ┆ ┆ hives… ┆ archives │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ … │\n", "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘\n" ] } ], "source": [ + "# | eval: false\n", "pl_df = pl_df.with_columns([\n", " pl.col(\"date\").cast(pl.Utf8),\n", " pl.col(\"publicationDate\").cast(pl.Utf8),\n", @@ -133,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "35e65fe2dd9a4bce", "metadata": {}, "outputs": [ @@ -147,9 +157,8 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (9, 18)
statistic_idcitationsignaturedatepublicationDatetypeexcerptcontentjudgescaseNumberscitation_referenceslegislationfile_nameappeal_typeappeal_outcomexml_uriuri
strstrstrstrstrstrstrstrstrf64f64f64f64strstrstrstrstr
"count""6154""6154""6154""0""6154""6154""6058""6154"6115.04934.01392.01826.0"6154""834""1368""6154""6154"
"null_count""0""0""0""6154""0""0""96""0"39.01220.04762.04328.0"0""5320""4786""0""0"
"mean"nullnullnullnull"2013-10-13 09:46:09.320766"nullnullnullnullnullnullnullnullnullnullnullnull
"std"nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
"min""001d3b389f60bfd101c581fe8f1a9a…"[2003] EWCA Crim 1""EWCA_(Crim)_1478"null"2003-01-04 00:00:00"null"********REPORTING RESTRICTIONS…"\n", - "2020] EWCA Crim 570\n", - "No: 20190…nullnullnullnull"2003_01_04-1.xml"nullnull"https://caselaw.nationalarchiv…"https://caselaw.nationalarchiv…
"25%"nullnullnullnull"2008-06-11 00:00:00"nullnullnullnullnullnullnullnullnullnullnullnull
"50%"nullnullnullnull"2012-11-29 00:00:00"nullnullnullnullnullnullnullnullnullnullnullnull
"75%"nullnullnullnull"2019-06-07 00:00:00"nullnullnullnullnullnullnullnullnullnullnullnull
"max""ffffb6552ad89849b5d2767708b5c2…"[2024] EWCA Crim 99""Ewca_Crim_664"null"2024-05-22 00:00:00"null"…WARNING: reporting restrictio…"…WARNING: reporting restrictio…nullnullnullnull"2024_05_22-6154.xml"nullnull"https://caselaw.nationalarchiv…"https://caselaw.nationalarchiv…
" + "shape: (9, 18)
statistic_idcitationsignaturedatepublicationDatetypeexcerptcontentjudgescaseNumberscitation_referenceslegislationfile_nameappeal_typeappeal_outcomexml_uriuri
strstrstrstrstrstrstrstrstrf64f64f64f64strstrstrstrstr
"count""6154""6154""6154""0""6154""6154""6058""6154"6115.04934.01392.01826.0"6154""834""1368""6154""6154"
"null_count""0""0""0""6154""0""0""96""0"39.01220.04762.04328.0"0""5320""4786""0""0"
"mean"nullnullnullnull"2013-10-13 09:…nullnullnullnullnullnullnullnullnullnullnullnull
"std"nullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnullnull
"min""001d3b389f60bf…"[2003] EWCA Cr…"EWCA_(Crim)_14…null"2003-01-04 00:…null"********REPORT…"\n", + "2020] EWCA Cr…nullnullnullnull"2003_01_04-1.x…nullnull"https://casela…"https://casela…
"25%"nullnullnullnull"2008-06-11 00:…nullnullnullnullnullnullnullnullnullnullnullnull
"50%"nullnullnullnull"2012-11-29 00:…nullnullnullnullnullnullnullnullnullnullnullnull
"75%"nullnullnullnull"2019-06-07 00:…nullnullnullnullnullnullnullnullnullnullnullnull
"max""ffffb6552ad898…"[2024] EWCA Cr…"Ewca_Crim_664"null"2024-05-22 00:…null"…WARNING: repo…"…WARNING: repo…nullnullnullnull"2024_05_22-615…nullnull"https://casela…"https://casela…
" ], "text/plain": [ "shape: (9, 18)\n", @@ -167,18 +176,20 @@ "│ min ┆ 001d3b389 ┆ [2003] ┆ EWCA_(Cri ┆ … ┆ null ┆ null ┆ https://c ┆ https:// │\n", "│ ┆ f60bfd101 ┆ EWCA Crim ┆ m)_1478 ┆ ┆ ┆ ┆ aselaw.na ┆ caselaw. │\n", "│ ┆ c581fe8f1 ┆ 1 ┆ ┆ ┆ ┆ ┆ tionalarc ┆ national │\n", - "│ ┆ a9a… ┆ ┆ ┆ ┆ ┆ ┆ hiv… ┆ archiv… │\n", + "│ ┆ a9a4d… ┆ ┆ ┆ ┆ ┆ ┆ hives… ┆ archives │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ … │\n", "│ 25% ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n", "│ 50% ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n", "│ 75% ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n", "│ max ┆ ffffb6552 ┆ [2024] ┆ Ewca_Crim ┆ … ┆ null ┆ null ┆ https://c ┆ https:// │\n", "│ ┆ ad89849b5 ┆ EWCA Crim ┆ _664 ┆ ┆ ┆ ┆ aselaw.na ┆ caselaw. │\n", "│ ┆ d2767708b ┆ 99 ┆ ┆ ┆ ┆ ┆ tionalarc ┆ national │\n", - "│ ┆ 5c2… ┆ ┆ ┆ ┆ ┆ ┆ hiv… ┆ archiv… │\n", + "│ ┆ 5c261… ┆ ┆ ┆ ┆ ┆ ┆ hives… ┆ archives │\n", + "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ … │\n", "└───────────┴───────────┴───────────┴───────────┴───┴───────────┴───────────┴───────────┴──────────┘" ] }, - "execution_count": 22, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -190,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "ab23ff37327a377a", "metadata": {}, "outputs": [ @@ -204,7 +215,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (7, 2)
typecount
catu32
"crown_court"5472
"supreme_court"660
"martial_court"11
"high_court_administrative_cour…2
"high_court_division_court"7
"civil_criminal_court"1
"division_court"1
" + "shape: (7, 2)
typecount
catu32
"crown_court"5472
"supreme_court"660
"martial_court"11
"high_court_adm…2
"high_court_div…7
"civil_criminal…1
"division_court…1
" ], "text/plain": [ "shape: (7, 2)\n", @@ -216,14 +227,14 @@ "│ crown_court ┆ 5472 │\n", "│ supreme_court ┆ 660 │\n", "│ martial_court ┆ 11 │\n", - "│ high_court_administrative_cour… ┆ 2 │\n", + "│ high_court_administrative_court ┆ 2 │\n", "│ high_court_division_court ┆ 7 │\n", "│ civil_criminal_court ┆ 1 │\n", "│ division_court ┆ 1 │\n", "└─────────────────────────────────┴───────┘" ] }, - "execution_count": 23, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -235,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "11883338-0a71-48ff-8699-6a4dd8cc085d", "metadata": {}, "outputs": [ @@ -249,7 +260,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (3, 2)
appeal_typecount
catu32
"conviction"496
"sentence"338
null5320
" + "shape: (3, 2)
appeal_typecount
catu32
"conviction"496
null5320
"sentence"338
" ], "text/plain": [ "shape: (3, 2)\n", @@ -259,23 +270,24 @@ "│ cat ┆ u32 │\n", "╞═════════════╪═══════╡\n", "│ conviction ┆ 496 │\n", - "│ sentence ┆ 338 │\n", "│ null ┆ 5320 │\n", + "│ sentence ┆ 338 │\n", "└─────────────┴───────┘" ] }, - "execution_count": 24, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# | eval: false\n", "pl_df[\"appeal_type\"].value_counts()" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "0cdfb0a9-c761-44c5-8fa0-17508df966e9", "metadata": {}, "outputs": [ @@ -289,7 +301,7 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (5, 2)
appeal_outcomecount
catu32
"allowed"697
"refused"65
"dismissed"586
null4786
"granted"20
" + "shape: (5, 2)
appeal_outcomecount
catu32
null4786
"granted"20
"dismissed"586
"refused"65
"allowed"697
" ], "text/plain": [ "shape: (5, 2)\n", @@ -298,26 +310,27 @@ "│ --- ┆ --- │\n", "│ cat ┆ u32 │\n", "╞════════════════╪═══════╡\n", - "│ allowed ┆ 697 │\n", - "│ refused ┆ 65 │\n", - "│ dismissed ┆ 586 │\n", "│ null ┆ 4786 │\n", "│ granted ┆ 20 │\n", + "│ dismissed ┆ 586 │\n", + "│ refused ┆ 65 │\n", + "│ allowed ┆ 697 │\n", "└────────────────┴───────┘" ] }, - "execution_count": 25, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# | eval: false\n", "pl_df[\"appeal_outcome\"].value_counts()" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "11446c299cdf1700", "metadata": {}, "outputs": [ @@ -338,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "891ffbad", "metadata": {}, "outputs": [ @@ -348,13 +361,13 @@ "" ] }, - "execution_count": 38, + "execution_count": null, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGzCAYAAAAmH71NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzC0lEQVR4nO3de3hU1b3/8c8kIYGEJFwSAuEWBcQGJBxDoFhQKNEYKRW8FKuVQBFUBguN1ULPqRFvoFYK6ihaK2jPURFrsRW5iQgi9BBABIyiIDdJSIJKAkECSdbvD3+Z45AEkskkk6x5v55nnoe99p69v3vNrvl077VmHMYYIwAAAAsF+bsAAACAhkLQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdAB0GQ4HA5NnTrV32V45f7775fD4fBoS0hI0Pjx4xv82Pv375fD4dCiRYvcbePHj1fr1q0b/NiVHA6H7r///kY7HlBbBB3gHBYtWiSHw1Hj69///re/S6y3V155RfPmzfP6/WPGjNEvf/lLSZIxRm3btvX4g4u6eeedd5psYGjKtQE1CfF3AUBz8MADD+iCCy6o0t6zZ08/VONbr7zyinbt2qXp06d79f7Nmzfr3nvvlSR9+umnOnbsmH784x/7sMLma/fu3QoKqtv/n3znnXfkcrnqFCi6d++u7777Ti1atKhjhXVzrtq+++47hYTwJwVND1clUAvp6ekaMGCAv8s4r7KyMlVUVCg0NLRRjvfVV18pNzfXHWw2bdqk6Oho9e7du1GO742SkhJFREQ0yrHCwsIadP8//LxbtmzZoMc6H38fH6gJj64AH8jKylJQUJDWrFnj0T558mSFhobq448/drcdPnxYEydOVHx8vMLCwnTBBRfozjvv1OnTp93bHDt2TNOnT1fXrl0VFhamnj176tFHH1VFRYV7m8pxGX/60580b9489ejRQ2FhYcrJydH7778vh8OhxYsX6w9/+IM6duyoiIgI/fznP9ehQ4fc+xg2bJiWLVumAwcOuB/HJSQknPNcS0tLdfToUR09elRr165VixYt1LVrVx09elTr169Xv3799PXXX+vo0aMe9VZUVGj+/Pm65JJL1LJlS8XGxurqq6/Wli1bqhxj6dKl6tu3r8LCwtSnTx+tWLHCY/2BAwc0ZcoU9e7dW61atVL79u114403av/+/R7bVT56XLdunaZMmaIOHTqoS5cukqTjx49r+vTpSkhIUFhYmDp06KArr7xS27ZtO+f5S9KGDRuUkpKili1bqkePHnruueeq3e7sMTpnzpzRrFmz1KtXL7Vs2VLt27fXkCFDtHr1aknfj6txuVyS5PGIVDr3513dGJ1KX375pdLS0hQREaH4+Hg98MADMsa411deK++//77H+87e57lqq2w7+07PRx99pPT0dEVFRal169YaMWJElce9lZ/Rhx9+qMzMTMXGxioiIkJjxoxRYWFh9R8AUAfc0QFqoaioSEePHvVoczgcat++vSTpv/7rv/Svf/1LEydO1M6dOxUZGamVK1fqL3/5ix588EElJSVJknJzczVw4EAdO3ZMkydP1sUXX6zDhw/rjTfe0MmTJxUaGqqTJ0/qiiuu0OHDh3X77berW7du2rhxo2bOnKm8vLwq42kWLlyoU6dOafLkyQoLC1O7du107NgxSdLDDz8sh8Oh3//+9yooKNC8efOUmpqq7du3q1WrVvrP//xPFRUV6auvvtKf//xnSTrvANZXX31VEyZM8Gjr3Lmzx3JsbKwkad++fe7gNHHiRC1atEjp6em67bbbVFZWpg8++ED//ve/Pe6WbdiwQW+++aamTJmiyMhIPfnkk7r++ut18OBBd39nZ2dr48aNuummm9SlSxft379fzz77rIYNG6acnByFh4d71DNlyhTFxsbqvvvuU0lJiSTpjjvu0BtvvKGpU6cqMTFRX3/9tTZs2KBPP/1Ul156aY3nv3PnTl111VWKjY3V/fffr7KyMmVlZSkuLu6c/SZ9P2B59uzZuu222zRw4EAVFxdry5Yt2rZtm6688krdfvvtys3N1erVq/W3v/2t2n1U93n/MFD+UHl5ua6++mr9+Mc/1mOPPaYVK1YoKytLZWVleuCBB85b7w/VprYf+uSTTzR06FBFRUXp3nvvVYsWLfTcc89p2LBhWrdunQYNGuSx/V133aW2bdsqKytL+/fv17x58zR16lQtXry4TnUCVRgANVq4cKGRVO0rLCzMY9udO3ea0NBQc9ttt5lvv/3WdO7c2QwYMMCcOXPGvc24ceNMUFCQyc7OrnKsiooKY4wxDz74oImIiDCff/65x/oZM2aY4OBgc/DgQWOMMfv27TOSTFRUlCkoKPDYdu3atUaS6dy5sykuLna3v/7660aSmT9/vrtt5MiRpnv37rXuk9zcXLN69WqzevVq0717dzNu3DizevVq8+qrrxpJ5sknn3Sv/+6774wxxrz33ntGkvnNb35T43kbY4wkExoaavbs2eNu+/jjj40k89RTT7nbTp48WWU/mzZtMpLMyy+/7G6r/PyGDBliysrKPLaPjo42Tqez1uddafTo0aZly5bmwIED7racnBwTHBxszv5Pavfu3U1GRoZ7OSkpyYwcOfKc+3c6nVX2Y8y5P+/KdQsXLnS3ZWRkGEnmrrvucrdVVFSYkSNHmtDQUFNYWGiM+b9rZe3atefdZ021GfP9Z5eVleVeHj16tAkNDTV79+51t+Xm5prIyEhz+eWXu9sqP6PU1FSPa+G3v/2tCQ4ONseOHav2eEBt8egKqAWXy6XVq1d7vJYvX+6xTd++fTVr1iy98MILSktL09GjR/XSSy+5B2hWVFRo6dKlGjVqVLXjfSofAyxZskRDhw5V27Zt3Y+Ijh49qtTUVJWXl2v9+vUe77v++uvdd1DONm7cOEVGRrqXb7jhBnXq1EnvvPOO133RqVMnpaamasCAATp06JBuueUWpaamKiQkRC1bttTkyZOVmpqq1NRU97iNv//973I4HMrKyqrxvCulpqaqR48e7uV+/fopKipKX375pbutVatW7n+fOXNGX3/9tXr27Kk2bdpU++hp0qRJCg4O9mhr06aN/vd//1e5ubm1Pvfy8nKtXLlSo0ePVrdu3dztP/rRj5SWlnbe97dp00affPKJvvjii1of82zn+ryr88Pp+pXT90+fPq13333X6xrOp7y8XKtWrdLo0aN14YUXuts7deqkm2++WRs2bFBxcbHHeyZPnuxxLQwdOlTl5eU6cOBAg9WJwMCjK6AWBg4cWKvByPfcc49ee+01bd68WY888ogSExPd6woLC1VcXKy+ffuecx9ffPGFduzYUeMfs4KCAo/l6maDVerVq5fHssPhUM+ePauMZamtM2fOqKioSJK0cuVKBQUF6eKLL9bRo0e1cuVK/cd//IeOHz+u48ePKzo62j0LaO/evYqPj1e7du3Oe4wfBohKbdu21bfffute/u677zR79mwtXLhQhw8f9hhzUlnfD1XXR4899pgyMjLUtWtXJScn65prrtG4ceM8/jCfrbCwUN99912VfpWk3r17nzdAPvDAA7r22mt10UUXqW/fvrr66qt16623ql+/fud83/nOpSZBQUFVzueiiy6SJK+vgdooLCzUyZMnqx2U/qMf/UgVFRU6dOiQ+vTp424/+3Nv27atJHl87oA3CDqAD3355Zfu/7e+c+dOr/ZRUVGhK6+80j1l+2yVf6gq/fDuRkP78MMPNXz4cI+27t27eyxXBrS1a9dq2LBhdT7G2XdeKv0wzNx1111auHChpk+frsGDBys6OloOh0M33XRTteNVquujX/ziFxo6dKj+8Y9/aNWqVXr88cf16KOP6s0331R6enqd666Nyy+/XHv37tVbb72lVatW6YUXXtCf//xnLViwQLfddlut9uHrz/vsO2qVysvLfXqc86nN5w54g6AD+EhFRYXGjx+vqKgoTZ8+XY888ohuuOEGXXfddZK+DwBRUVHatWvXOffTo0cPnThxQqmpqfWu6exHJMYY7dmzx+MOQk1/6KqTlJTkniF055136sc//rEyMjJUVFSkG264QfPnz3ffxaocgC19f04rV67UN998U6u7OufzxhtvKCMjQ0888YS77dSpU+5B2LXVqVMnTZkyRVOmTFFBQYEuvfRSPfzwwzUGndjYWLVq1araR0+7d++u1THbtWunCRMmaMKECTpx4oQuv/xy3X///e6gU5fP43wqKir05ZdfeoTjzz//XJLcg8Qr75yc3XfVPTKqbW2xsbEKDw+vtk8+++wzBQUFqWvXrrXaF1BfjNEBfGTu3LnauHGjnn/+eT344IO67LLLdOedd7pnawUFBWn06NH617/+Ve2U6sr/5/qLX/xCmzZt0sqVK6tsc+zYMZWVldW6ppdfflnHjx93L7/xxhvKy8vz+EMeERFR7eOe6rRt21apqakaMmSIDh48qOuvv16pqamKiIhQcHCwJk6c6B6fU/kHVPp+XIkxRrNmzarxvOsiODi4yvueeuqpWt+FKC8vr3LOHTp0UHx8vEpLS8953LS0NC1dulQHDx50t3/66afVfl5n+/rrrz2WW7durZ49e3ocs/I7fuoa2mry9NNPu/9tjNHTTz+tFi1aaMSIEZK+vyMXHBxcZezXM888U2Vfta0tODhYV111ld566y2PR2T5+fl65ZVXNGTIEEVFRXl5RkDdcEcHqIXly5frs88+q9J+2WWX6cILL9Snn36qP/7xjxo/frxGjRol6fvvB+nfv7+mTJmi119/XZL0yCOPaNWqVbriiis0efJk/ehHP1JeXp6WLFmiDRs2qE2bNrrnnnv0z3/+Uz/72c80fvx4JScnq6SkRDt37tQbb7yh/fv3KyYmplZ1t2vXTkOGDNGECROUn5+vefPmqWfPnpo0aZJ7m+TkZC1evFiZmZlKSUlR69at3edQky1btuj06dO67LLLJEkbN25Uv379avwivuHDh+vWW2/Vk08+qS+++EJXX321Kioq9MEHH2j48OF1/n2rn/3sZ/rb3/6m6OhoJSYmatOmTXr33Xfd08/P5/jx4+rSpYtuuOEGJSUlqXXr1nr33XeVnZ3tcZeoOrNmzdKKFSs0dOhQTZkyRWVlZXrqqafUp08f7dix45zvTUxM1LBhw5ScnKx27dppy5Yt7inulZKTkyVJv/nNb5SWlqbg4GDddNNNtTqvs7Vs2VIrVqxQRkaGBg0apOXLl2vZsmX6wx/+4H7EGB0drRtvvFFPPfWUHA6HevToobfffrvKWLC61vbQQw9p9erVGjJkiKZMmaKQkBA999xzKi0t1WOPPebV+QBe8dt8L6AZONf0cv3/qbdlZWUmJSXFdOnSpcpU2Pnz5xtJZvHixe62AwcOmHHjxpnY2FgTFhZmLrzwQuN0Ok1paal7m+PHj5uZM2eanj17mtDQUBMTE2Muu+wy86c//cmcPn3aGPN/038ff/zxKnVXThl+9dVXzcyZM02HDh1Mq1atzMiRIz2mRRtjzIkTJ8zNN99s2rRpYyTVaqr5nDlzTI8ePdzLqamp552qXVZWZh5//HFz8cUXm9DQUBMbG2vS09PN1q1b3dtIqnY/Z0/T/vbbb82ECRNMTEyMad26tUlLSzOfffZZle0qP7+zp/OXlpaae+65xyQlJZnIyEgTERFhkpKSzDPPPHPeczfGmHXr1pnk5GQTGhpqLrzwQrNgwQKTlZV13unlDz30kBk4cKBp06aNadWqlbn44ovNww8/7P5MK/vprrvuMrGxscbhcLj3ea7Pu6bp5REREWbv3r3mqquuMuHh4SYuLs5kZWWZ8vJyj/cXFhaa66+/3oSHh5u2bdua22+/3ezatavKPmuqzZiq08uNMWbbtm0mLS3NtG7d2oSHh5vhw4ebjRs3emxT02dU07R3oK4cxjDSC7DN+++/r+HDh2vJkiW64YYb/F0OAPgNY3QAAIC1CDoAAMBaBB0AAGAtxugAAABrcUcHAABYi6ADAACsFfBfGFhRUaHc3FxFRkb69KvXAQBAwzHG6Pjx44qPj1dQUM33bQI+6OTm5vKbKwAANFOHDh1Sly5dalwfsEHH5XLJ5XK5fzfo0KFD/PYKAADNRHFxsbp27arIyMhzbhfws66Ki4sVHR2toqIigg4AAM1Ebf9+MxgZAABYi6ADAACsRdABAADWCtig43K5lJiYqJSUFH+XAgAAGgiDkRmMDABAs8NgZAAAEPAIOgAAwFoEHQAAYK2ADToMRgYAwH4MRmYwMgAAzQ6DkQEAQMAj6AAAAGsRdAAAgLVC/F0AAABonhJmLDvvNvvnjGyESmoWsHd0mHUFAID9AjboOJ1O5eTkKDs729+lAACABhKwQQcAANiPoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUCNugwvRwAAPsFbNBhejkAAPYL2KADAADsR9ABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGCtgA06fI8OAAD2C9igw/foAABgv4ANOgAAwH4EHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgrYANOvzWFQAA9gvYoMNvXQEAYL+ADToAAMB+BB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArBWwQcflcikxMVEpKSn+LgUAADSQgA06TqdTOTk5ys7O9ncpAACggQRs0AEAAPYj6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1mn3QOXbsmAYMGKD+/furb9+++stf/uLvkgAAQBMR4u8C6isyMlLr169XeHi4SkpK1LdvX1133XVq3769v0sDAAB+1uzv6AQHBys8PFySVFpaKmOMjDF+rgoAADQFfg8669ev16hRoxQfHy+Hw6GlS5dW2cblcikhIUEtW7bUoEGDtHnzZo/1x44dU1JSkrp06aJ77rlHMTExjVQ9AABoyvwedEpKSpSUlCSXy1Xt+sWLFyszM1NZWVnatm2bkpKSlJaWpoKCAvc2bdq00ccff6x9+/bplVdeUX5+fmOVDwAAmjC/B5309HQ99NBDGjNmTLXr586dq0mTJmnChAlKTEzUggULFB4erhdffLHKtnFxcUpKStIHH3xQ4/FKS0tVXFzs8QIAAHbye9A5l9OnT2vr1q1KTU11twUFBSk1NVWbNm2SJOXn5+v48eOSpKKiIq1fv169e/eucZ+zZ89WdHS0+9W1a9eGPQkAAOA3TTroHD16VOXl5YqLi/Noj4uL05EjRyRJBw4c0NChQ5WUlKShQ4fqrrvu0iWXXFLjPmfOnKmioiL369ChQw16DgAAwH+a/fTygQMHavv27bXePiwsTGFhYQ1XEAAAaDKa9B2dmJgYBQcHVxlcnJ+fr44dO/qpKgAA0Fw06aATGhqq5ORkrVmzxt1WUVGhNWvWaPDgwfXat8vlUmJiolJSUupbJgAAaKL8/ujqxIkT2rNnj3t537592r59u9q1a6du3bopMzNTGRkZGjBggAYOHKh58+appKREEyZMqNdxnU6nnE6niouLFR0dXd/TAAAATZDfg86WLVs0fPhw93JmZqYkKSMjQ4sWLdLYsWNVWFio++67T0eOHFH//v21YsWKKgOUAQAAzuYwAf57CZV3dIqKihQVFeXvcgAAaDYSZiw77zb754xskGPX9u93kx6jAwAAUB8BG3QYjAwAgP0CNug4nU7l5OQoOzvb36UAAIAGErBBBwAA2I+gAwAArEXQAQAA1grYoMNgZAAA7BewQYfByAAA2C9ggw4AALAfQQcAAFiLoAMAAKxF0AEAANYK2KDDrCsAAOwXsEGHWVcAANgvYIMOAACwH0EHAABYi6ADAACsRdABAADWIugAAABrBWzQYXo5AAD2C9igw/RyAADsF7BBBwAA2I+gAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgEbdPgeHQAA7BewQYfv0QEAwH4BG3QAAID9CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUCNujwExAAANgvYIMOPwEBAID9AjboAAAA+xF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFgrYIOOy+VSYmKiUlJS/F0KAABoIAEbdJxOp3JycpSdne3vUgAAQAMJ2KADAADsR9ABAADW8irofPnll76uAwAAwOe8Cjo9e/bU8OHD9d///d86deqUr2sCAADwCa+CzrZt29SvXz9lZmaqY8eOuv3227V582Zf1wYAAFAvXgWd/v37a/78+crNzdWLL76ovLw8DRkyRH379tXcuXNVWFjo6zoBAADqrF6DkUNCQnTddddpyZIlevTRR7Vnzx797ne/U9euXTVu3Djl5eX5qk4AAIA6q1fQ2bJli6ZMmaJOnTpp7ty5+t3vfqe9e/dq9erVys3N1bXXXuurOgEAAOosxJs3zZ07VwsXLtTu3bt1zTXX6OWXX9Y111yjoKDvc9MFF1ygRYsWKSEhwZe1AgAA1IlXQefZZ5/Vr3/9a40fP16dOnWqdpsOHTror3/9a72KAwAAqA+vgs4XX3xx3m1CQ0OVkZHhze4BAAB8wqsxOgsXLtSSJUuqtC9ZskQvvfRSvYsCAADwBa+CzuzZsxUTE1OlvUOHDnrkkUfqXRQAAIAveBV0Dh48qAsuuKBKe/fu3XXw4MF6FwUAAOALXgWdDh06aMeOHVXaP/74Y7Vv377eRQEAAPiCV0Hnl7/8pX7zm99o7dq1Ki8vV3l5ud577z1NmzZNN910k69rBAAA8IpXs64efPBB7d+/XyNGjFBIyPe7qKio0Lhx4xijAwAAmgyvgk5oaKgWL16sBx98UB9//LFatWqlSy65RN27d/d1fQAAAF7zKuhUuuiii3TRRRf5qhYAAACf8irolJeXa9GiRVqzZo0KCgpUUVHhsf69997zSXG1cejQId16660qKChQSEiI/vjHP+rGG29stOMDAICmy6ugM23aNC1atEgjR45U37595XA4fF1XrYWEhGjevHnq37+/jhw5ouTkZF1zzTWKiIjwW00AAKBp8CrovPbaa3r99dd1zTXX+LqeOuvUqZP797Y6duyomJgYffPNNwQdAADg3fTy0NBQ9ezZ0ycFrF+/XqNGjVJ8fLwcDoeWLl1aZRuXy6WEhAS1bNlSgwYN0ubNm6vd19atW1VeXq6uXbv6pDYAANC8eRV07r77bs2fP1/GmHoXUFJSoqSkJLlcrmrXL168WJmZmcrKytK2bduUlJSktLQ0FRQUeGz3zTffaNy4cXr++efrXRMAALCDV4+uNmzYoLVr12r58uXq06ePWrRo4bH+zTffrPW+0tPTlZ6eXuP6uXPnatKkSZowYYIkacGCBVq2bJlefPFFzZgxQ5JUWlqq0aNHa8aMGbrsssvOebzS0lKVlpa6l4uLi2tdKwAAaF68Cjpt2rTRmDFjfF1LFadPn9bWrVs1c+ZMd1tQUJBSU1O1adMmSZIxRuPHj9dPf/pT3Xrrrefd5+zZszVr1qwGqxkAADQdXgWdhQsX+rqOah09elTl5eWKi4vzaI+Li9Nnn30mSfrwww+1ePFi9evXzz2+529/+5suueSSavc5c+ZMZWZmupeLi4sZ0wMAgKW8/sLAsrIyvf/++9q7d69uvvlmRUZGKjc3V1FRUWrdurUvazynIUOGVPken3MJCwtTWFhYA1YEAACaCq+CzoEDB3T11Vfr4MGDKi0t1ZVXXqnIyEg9+uijKi0t1YIFC3xSXExMjIKDg5Wfn+/Rnp+fr44dO/rkGAAAwF5ezbqaNm2aBgwYoG+//VatWrVyt48ZM0Zr1qzxWXGhoaFKTk722GdFRYXWrFmjwYMH12vfLpdLiYmJSklJqW+ZAACgifLqjs4HH3ygjRs3KjQ01KM9ISFBhw8frtO+Tpw4oT179riX9+3bp+3bt6tdu3bq1q2bMjMzlZGRoQEDBmjgwIGaN2+eSkpK3LOwvOV0OuV0OlVcXKzo6Oh67QsAADRNXgWdiooKlZeXV2n/6quvFBkZWad9bdmyRcOHD3cvVw4UzsjI0KJFizR27FgVFhbqvvvu05EjR9S/f3+tWLGiygBlAACAszmMF9/6N3bsWEVHR+v5559XZGSkduzYodjYWF177bXq1q1bo83K8oXKOzpFRUWKiorydzkAADQbCTOWnXeb/XNGNsixa/v326s7Ok888YTS0tKUmJioU6dO6eabb9YXX3yhmJgYvfrqq14X3ZhcLpdcLle1d6YAAIAdvLqjI30/vfy1117Tjh07dOLECV166aW65ZZbPAYnNwfc0QEAwDvW3tGRpJCQEP3qV7/y9u0AAAANzqug8/LLL59z/bhx47wqBgAAwJe8CjrTpk3zWD5z5oxOnjyp0NBQhYeHE3QAAECT4NUXBn777bcerxMnTmj37t0aMmRIsxmMDAAA7OdV0KlOr169NGfOnCp3e5oqvhkZAAD7+SzoSN8PUM7NzfXlLhuM0+lUTk6OsrOz/V0KAABoIF6N0fnnP//psWyMUV5enp5++mn95Cc/8UlhAAAA9eVV0Bk9erTHssPhUGxsrH7605/qiSee8EVdAAAA9eb1b10BAAA0dT4do9OcMBgZAAD7eXVHp/IXxmtj7ty53hyiwTmdTjmdTvdXSAMAAPt4FXQ++ugjffTRRzpz5ox69+4tSfr8888VHBysSy+91L2dw+HwTZUAAABe8CrojBo1SpGRkXrppZfUtm1bSd9/ieCECRM0dOhQ3X333T4tEgAAwBtejdF54oknNHv2bHfIkaS2bdvqoYceYtYVAABoMrwKOsXFxSosLKzSXlhYqOPHj9e7KAAAAF/wKuiMGTNGEyZM0JtvvqmvvvpKX331lf7+979r4sSJuu6663xdIwAAgFe8GqOzYMEC/e53v9PNN9+sM2fOfL+jkBBNnDhRjz/+uE8LBAAA8JZXQSc8PFzPPPOMHn/8ce3du1eS1KNHD0VERPi0uIbkcrnkcrlUXl7u71IAAEADqdcXBubl5SkvL0+9evVSRESEjDG+qqvB8aOeAADYz6ug8/XXX2vEiBG66KKLdM011ygvL0+SNHHiRKaWAwCAJsOroPPb3/5WLVq00MGDBxUeHu5uHzt2rFasWOGz4gAAAOrDqzE6q1at0sqVK9WlSxeP9l69eunAgQM+KQwAAKC+vLqjU1JS4nEnp9I333yjsLCwehcFAADgC14FnaFDh+rll192LzscDlVUVOixxx7T8OHDfVYcAABAfXj16Oqxxx7TiBEjtGXLFp0+fVr33nuvPvnkE33zzTf68MMPfV0jAACAV7y6o9O3b199/vnnGjJkiK699lqVlJTouuuu00cffaQePXr4usYG4XK5lJiYqJSUFH+XAgAAGojD1PHLb86cOaOrr75aCxYsUK9evRqqrkZTXFys6OhoFRUVKSoqyt/lAADQbCTMWHbebfbPGdkgx67t3+8639Fp0aKFduzYUa/iAAAAGoNXj65+9atf6a9//auvawEAAPAprwYjl5WV6cUXX9S7776r5OTkKr9xNXfuXJ8UBwAAUB91CjpffvmlEhIStGvXLl166aWSpM8//9xjG4fD4bvqAAAA6qFOQadXr17Ky8vT2rVrJX3/kw9PPvmk4uLiGqQ4AACA+qjTGJ2zJ2gtX75cJSUlPi0IAADAV7wajFypjjPTAQAAGlWdgo7D4agyBocxOQAAoKmq0xgdY4zGjx/v/uHOU6dO6Y477qgy6+rNN9/0XYUAAABeqlPQycjI8Fj+1a9+5dNiGpPL5ZLL5VJ5ebm/SwEAAA2kzj8BYRt+AgIAAO9Y+RMQAAAAzQVBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1AjbouFwuJSYmKiUlxd+lAACABhKwQcfpdConJ0fZ2dn+LgUAADSQgA06AADAfgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsJYVQWfMmDFq27atbrjhBn+XAgAAmhArgs60adP08ssv+7sMAADQxFgRdIYNG6bIyEh/lwEAAJoYvwed9evXa9SoUYqPj5fD4dDSpUurbONyuZSQkKCWLVtq0KBB2rx5c+MXCgAAmh2/B52SkhIlJSXJ5XJVu37x4sXKzMxUVlaWtm3bpqSkJKWlpamgoKCRKwUAAM1NiL8LSE9PV3p6eo3r586dq0mTJmnChAmSpAULFmjZsmV68cUXNWPGjDofr7S0VKWlpe7l4uLiuhcNAACaBb/f0TmX06dPa+vWrUpNTXW3BQUFKTU1VZs2bfJqn7Nnz1Z0dLT71bVrV1+VCwAAmpgmHXSOHj2q8vJyxcXFebTHxcXpyJEj7uXU1FTdeOONeuedd9SlS5dzhqCZM2eqqKjI/Tp06FCD1Q8AAPzL74+ufOHdd9+t9bZhYWEKCwtrwGoAAEBT0aTv6MTExCg4OFj5+fke7fn5+erYsaOfqgIAAM1Fkw46oaGhSk5O1po1a9xtFRUVWrNmjQYPHlyvfbtcLiUmJiolJaW+ZQIAgCbK74+uTpw4oT179riX9+3bp+3bt6tdu3bq1q2bMjMzlZGRoQEDBmjgwIGaN2+eSkpK3LOwvOV0OuV0OlVcXKzo6Oj6ngYAAGiC/B50tmzZouHDh7uXMzMzJUkZGRlatGiRxo4dq8LCQt133306cuSI+vfvrxUrVlQZoAwAAHA2hzHG+LsIf6q8o1NUVKSoqCh/lwMAQLORMGPZebfZP2dkgxy7tn+/m/QYnYbEGB0AAOwXsEHH6XQqJydH2dnZ/i4FAAA0kIANOgAAwH4EHQAAYC2CDgAAsFbABh0GIwMAYL+ADToMRgYAwH4BG3QAAID9CDoAAMBaBB0AAGAtgg4AALBWwAYdZl0BAGC/gA06zLoCAMB+ARt0AACA/Qg6AADAWgQdAABgLYIOAACwFkEHAABYK2CDDtPLAQCwX8AGHaaXAwBgv4ANOgAAwH4EHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAawVs0OELAwEAsF/ABh2+MBAAAPsFbNABAAD2I+gAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYK2KDDT0AAAGC/gA06/AQEAAD2C9igAwAA7EfQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGCtEH8X4C8ul0sul0vl5eX+LgUBKGHGsvNus3/OyEaoBADsFrB3dJxOp3JycpSdne3vUgAAQAMJ2KADAADsR9ABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAa1kRdN5++2317t1bvXr10gsvvODvcgAAQBMR4u8C6qusrEyZmZlau3atoqOjlZycrDFjxqh9+/b+Lg0AAPhZs7+js3nzZvXp00edO3dW69atlZ6erlWrVvm7LAAA0AT4PeisX79eo0aNUnx8vBwOh5YuXVplG5fLpYSEBLVs2VKDBg3S5s2b3etyc3PVuXNn93Lnzp11+PDhxigdAAA0cX4POiUlJUpKSpLL5ap2/eLFi5WZmamsrCxt27ZNSUlJSktLU0FBgVfHKy0tVXFxsccLAADYye9jdNLT05Wenl7j+rlz52rSpEmaMGGCJGnBggVatmyZXnzxRc2YMUPx8fEed3AOHz6sgQMH1ri/2bNna9asWb47gXNImLHsvNvsnzOyESqBrbjGmh8+M9QH10/d+f2OzrmcPn1aW7duVWpqqrstKChIqamp2rRpkyRp4MCB2rVrlw4fPqwTJ05o+fLlSktLq3GfM2fOVFFRkft16NChBj8PAADgH36/o3MuR48eVXl5ueLi4jza4+Li9Nlnn0mSQkJC9MQTT2j48OGqqKjQvffee84ZV2FhYQoLC2vQugEAQNPQpINObf385z/Xz3/+c3+XAQAAmpgm/egqJiZGwcHBys/P92jPz89Xx44d/VQVAABoLpp00AkNDVVycrLWrFnjbquoqNCaNWs0ePDgeu3b5XIpMTFRKSkp9S0TAAA0UX5/dHXixAnt2bPHvbxv3z5t375d7dq1U7du3ZSZmamMjAwNGDBAAwcO1Lx581RSUuKeheUtp9Mpp9Op4uJiRUdH1/c0AABAE+T3oLNlyxYNHz7cvZyZmSlJysjI0KJFizR27FgVFhbqvvvu05EjR9S/f3+tWLGiygBlAACAs/k96AwbNkzGmHNuM3XqVE2dOrWRKgIAALZo0mN0GhJjdAAAsF/ABh2n06mcnBxlZ2f7uxQAANBAAjboAAAA+xF0AACAtQg6AADAWgEbdBiMDACA/QI26DAYGQAA+/n9e3T8rfI7fIqLi32+74rSk+fdpiGOi6bPV9cG11jzw2eG+mhq148/66nc7/m+i89hzreF5b766it17drV32UAAAAvHDp0SF26dKlxfcAHnYqKCuXm5ioyMlIOh+Oc2xYXF6tr1646dOiQoqKiGqnCwEV/Nz76vHHR342PPm9cDdnfxhgdP35c8fHxCgqqeSROwD+6CgoKOmcSrE5UVBT/A2lE9Hfjo88bF/3d+OjzxtVQ/V2bH+UO2MHIAADAfgQdAABgLYJOHYSFhSkrK0thYWH+LiUg0N+Njz5vXPR346PPG1dT6O+AH4wMAADsxR0dAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWCvigM3v2bKWkpCgyMlIdOnTQ6NGjtXv3bo9thg0bJofD4fG64447PLY5ePCgRo4cqfDwcHXo0EH33HOPysrKGvNUmoVnn31W/fr1c39L5uDBg7V8+XL3+lOnTsnpdKp9+/Zq3bq1rr/+euXn53vsg76um/P1Odd3w5ozZ44cDoemT5/ubuM6bzjV9TfXuG/df//9Vfrz4osvdq9vatd3wP8ExLp16+R0OpWSkqKysjL94Q9/0FVXXaWcnBxFRES4t5s0aZIeeOAB93J4eLj73+Xl5Ro5cqQ6duyojRs3Ki8vT+PGjVOLFi30yCOPNOr5NHVdunTRnDlz1KtXLxlj9NJLL+naa6/VRx99pD59+ui3v/2tli1bpiVLlig6OlpTp07Vddddpw8//FASfe2N8/W5xPXdULKzs/Xcc8+pX79+Hu1c5w2jpv6WuMZ9rU+fPnr33XfdyyEh/xcnmtz1beChoKDASDLr1q1zt11xxRVm2rRpNb7nnXfeMUFBQebIkSPutmeffdZERUWZ0tLShizXCm3btjUvvPCCOXbsmGnRooVZsmSJe92nn35qJJlNmzYZY+hrX6nsc2O4vhvK8ePHTa9evczq1as9+pjrvGHU1N/GcI37WlZWlklKSqp2XVO8vgP+0dXZioqKJEnt2rXzaP+f//kfxcTEqG/fvpo5c6ZOnjzpXrdp0yZdcskliouLc7elpaWpuLhYn3zySeMU3gyVl5frtddeU0lJiQYPHqytW7fqzJkzSk1NdW9z8cUXq1u3btq0aZMk+rq+zu7zSlzfvud0OjVy5EiP61kS13kDqam/K3GN+9YXX3yh+Ph4XXjhhbrlllt08OBBSU3z+g74R1c/VFFRoenTp+snP/mJ+vbt626/+eab1b17d8XHx2vHjh36/e9/r927d+vNN9+UJB05csTjA5PkXj5y5EjjnUAzsXPnTg0ePFinTp1S69at9Y9//EOJiYnavn27QkND1aZNG4/t4+Li3P1IX3unpj6XuL4bwmuvvaZt27YpOzu7yrojR45wnfvYufpb4hr3tUGDBmnRokXq3bu38vLyNGvWLA0dOlS7du1qktc3QecHnE6ndu3apQ0bNni0T5482f3vSy65RJ06ddKIESO0d+9e9ejRo7HLbPZ69+6t7du3q6ioSG+88YYyMjK0bt06f5dltZr6PDExkevbxw4dOqRp06Zp9erVatmypb/LsV5t+ptr3LfS09Pd/+7Xr58GDRqk7t276/XXX1erVq38WFn1eHT1/02dOlVvv/221q5dqy5dupxz20GDBkmS9uzZI0nq2LFjlRHllcsdO3ZsgGqbt9DQUPXs2VPJycmaPXu2kpKSNH/+fHXs2FGnT5/WsWPHPLbPz8939yN97Z2a+rw6XN/1s3XrVhUUFOjSSy9VSEiIQkJCtG7dOj355JMKCQlRXFwc17kPna+/y8vLq7yHa9y32rRpo4suukh79uxpkv8dD/igY4zR1KlT9Y9//EPvvfeeLrjggvO+Z/v27ZKkTp06SZIGDx6snTt3qqCgwL3N6tWrFRUV5X48gJpVVFSotLRUycnJatGihdasWeNet3v3bh08eNA9noS+9o3KPq8O13f9jBgxQjt37tT27dvdrwEDBuiWW25x/5vr3HfO19/BwcFV3sM17lsnTpzQ3r171alTp6b533GfD29uZu68804THR1t3n//fZOXl+d+nTx50hhjzJ49e8wDDzxgtmzZYvbt22feeustc+GFF5rLL7/cvY+ysjLTt29fc9VVV5nt27ebFStWmNjYWDNz5kx/nVaTNWPGDLNu3Tqzb98+s2PHDjNjxgzjcDjMqlWrjDHG3HHHHaZbt27mvffeM1u2bDGDBw82gwcPdr+fvq67c/U513fjOHvWD9d5w/phf3ON+97dd99t3n//fbNv3z7z4YcfmtTUVBMTE2MKCgqMMU3v+g74oCOp2tfChQuNMcYcPHjQXH755aZdu3YmLCzM9OzZ09xzzz2mqKjIYz/79+836enpplWrViYmJsbcfffd5syZM344o6bt17/+tenevbsJDQ01sbGxZsSIEe6QY4wx3333nZkyZYpp27atCQ8PN2PGjDF5eXke+6Cv6+Zcfc713TjODjpc5w3rh/3NNe57Y8eONZ06dTKhoaGmc+fOZuzYsWbPnj3u9U3t+nYYY4zv7xMBAAD4X8CP0QEAAPYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtf4fiVInC+FH654AAAAASUVORK5CYII=", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjoAAAGzCAYAAAAmH71NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzC0lEQVR4nO3de3hU1b3/8c8kIYGEJFwSAuEWBcQGJBxDoFhQKNEYKRW8FKuVQBFUBguN1ULPqRFvoFYK6ihaK2jPURFrsRW5iQgi9BBABIyiIDdJSIJKAkECSdbvD3+Z45AEkskkk6x5v55nnoe99p69v3vNrvl077VmHMYYIwAAAAsF+bsAAACAhkLQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdAB0GQ4HA5NnTrV32V45f7775fD4fBoS0hI0Pjx4xv82Pv375fD4dCiRYvcbePHj1fr1q0b/NiVHA6H7r///kY7HlBbBB3gHBYtWiSHw1Hj69///re/S6y3V155RfPmzfP6/WPGjNEvf/lLSZIxRm3btvX4g4u6eeedd5psYGjKtQE1CfF3AUBz8MADD+iCCy6o0t6zZ08/VONbr7zyinbt2qXp06d79f7Nmzfr3nvvlSR9+umnOnbsmH784x/7sMLma/fu3QoKqtv/n3znnXfkcrnqFCi6d++u7777Ti1atKhjhXVzrtq+++47hYTwJwVND1clUAvp6ekaMGCAv8s4r7KyMlVUVCg0NLRRjvfVV18pNzfXHWw2bdqk6Oho9e7du1GO742SkhJFREQ0yrHCwsIadP8//LxbtmzZoMc6H38fH6gJj64AH8jKylJQUJDWrFnj0T558mSFhobq448/drcdPnxYEydOVHx8vMLCwnTBBRfozjvv1OnTp93bHDt2TNOnT1fXrl0VFhamnj176tFHH1VFRYV7m8pxGX/60580b9489ejRQ2FhYcrJydH7778vh8OhxYsX6w9/+IM6duyoiIgI/fznP9ehQ4fc+xg2bJiWLVumAwcOuB/HJSQknPNcS0tLdfToUR09elRr165VixYt1LVrVx09elTr169Xv3799PXXX+vo0aMe9VZUVGj+/Pm65JJL1LJlS8XGxurqq6/Wli1bqhxj6dKl6tu3r8LCwtSnTx+tWLHCY/2BAwc0ZcoU9e7dW61atVL79u114403av/+/R7bVT56XLdunaZMmaIOHTqoS5cukqTjx49r+vTpSkhIUFhYmDp06KArr7xS27ZtO+f5S9KGDRuUkpKili1bqkePHnruueeq3e7sMTpnzpzRrFmz1KtXL7Vs2VLt27fXkCFDtHr1aknfj6txuVyS5PGIVDr3513dGJ1KX375pdLS0hQREaH4+Hg98MADMsa411deK++//77H+87e57lqq2w7+07PRx99pPT0dEVFRal169YaMWJElce9lZ/Rhx9+qMzMTMXGxioiIkJjxoxRYWFh9R8AUAfc0QFqoaioSEePHvVoczgcat++vSTpv/7rv/Svf/1LEydO1M6dOxUZGamVK1fqL3/5ix588EElJSVJknJzczVw4EAdO3ZMkydP1sUXX6zDhw/rjTfe0MmTJxUaGqqTJ0/qiiuu0OHDh3X77berW7du2rhxo2bOnKm8vLwq42kWLlyoU6dOafLkyQoLC1O7du107NgxSdLDDz8sh8Oh3//+9yooKNC8efOUmpqq7du3q1WrVvrP//xPFRUV6auvvtKf//xnSTrvANZXX31VEyZM8Gjr3Lmzx3JsbKwkad++fe7gNHHiRC1atEjp6em67bbbVFZWpg8++ED//ve/Pe6WbdiwQW+++aamTJmiyMhIPfnkk7r++ut18OBBd39nZ2dr48aNuummm9SlSxft379fzz77rIYNG6acnByFh4d71DNlyhTFxsbqvvvuU0lJiSTpjjvu0BtvvKGpU6cqMTFRX3/9tTZs2KBPP/1Ul156aY3nv3PnTl111VWKjY3V/fffr7KyMmVlZSkuLu6c/SZ9P2B59uzZuu222zRw4EAVFxdry5Yt2rZtm6688krdfvvtys3N1erVq/W3v/2t2n1U93n/MFD+UHl5ua6++mr9+Mc/1mOPPaYVK1YoKytLZWVleuCBB85b7w/VprYf+uSTTzR06FBFRUXp3nvvVYsWLfTcc89p2LBhWrdunQYNGuSx/V133aW2bdsqKytL+/fv17x58zR16lQtXry4TnUCVRgANVq4cKGRVO0rLCzMY9udO3ea0NBQc9ttt5lvv/3WdO7c2QwYMMCcOXPGvc24ceNMUFCQyc7OrnKsiooKY4wxDz74oImIiDCff/65x/oZM2aY4OBgc/DgQWOMMfv27TOSTFRUlCkoKPDYdu3atUaS6dy5sykuLna3v/7660aSmT9/vrtt5MiRpnv37rXuk9zcXLN69WqzevVq0717dzNu3DizevVq8+qrrxpJ5sknn3Sv/+6774wxxrz33ntGkvnNb35T43kbY4wkExoaavbs2eNu+/jjj40k89RTT7nbTp48WWU/mzZtMpLMyy+/7G6r/PyGDBliysrKPLaPjo42Tqez1uddafTo0aZly5bmwIED7racnBwTHBxszv5Pavfu3U1GRoZ7OSkpyYwcOfKc+3c6nVX2Y8y5P+/KdQsXLnS3ZWRkGEnmrrvucrdVVFSYkSNHmtDQUFNYWGiM+b9rZe3atefdZ021GfP9Z5eVleVeHj16tAkNDTV79+51t+Xm5prIyEhz+eWXu9sqP6PU1FSPa+G3v/2tCQ4ONseOHav2eEBt8egKqAWXy6XVq1d7vJYvX+6xTd++fTVr1iy98MILSktL09GjR/XSSy+5B2hWVFRo6dKlGjVqVLXjfSofAyxZskRDhw5V27Zt3Y+Ijh49qtTUVJWXl2v9+vUe77v++uvdd1DONm7cOEVGRrqXb7jhBnXq1EnvvPOO133RqVMnpaamasCAATp06JBuueUWpaamKiQkRC1bttTkyZOVmpqq1NRU97iNv//973I4HMrKyqrxvCulpqaqR48e7uV+/fopKipKX375pbutVatW7n+fOXNGX3/9tXr27Kk2bdpU++hp0qRJCg4O9mhr06aN/vd//1e5ubm1Pvfy8nKtXLlSo0ePVrdu3dztP/rRj5SWlnbe97dp00affPKJvvjii1of82zn+ryr88Pp+pXT90+fPq13333X6xrOp7y8XKtWrdLo0aN14YUXuts7deqkm2++WRs2bFBxcbHHeyZPnuxxLQwdOlTl5eU6cOBAg9WJwMCjK6AWBg4cWKvByPfcc49ee+01bd68WY888ogSExPd6woLC1VcXKy+ffuecx9ffPGFduzYUeMfs4KCAo/l6maDVerVq5fHssPhUM+ePauMZamtM2fOqKioSJK0cuVKBQUF6eKLL9bRo0e1cuVK/cd//IeOHz+u48ePKzo62j0LaO/evYqPj1e7du3Oe4wfBohKbdu21bfffute/u677zR79mwtXLhQhw8f9hhzUlnfD1XXR4899pgyMjLUtWtXJScn65prrtG4ceM8/jCfrbCwUN99912VfpWk3r17nzdAPvDAA7r22mt10UUXqW/fvrr66qt16623ql+/fud83/nOpSZBQUFVzueiiy6SJK+vgdooLCzUyZMnqx2U/qMf/UgVFRU6dOiQ+vTp424/+3Nv27atJHl87oA3CDqAD3355Zfu/7e+c+dOr/ZRUVGhK6+80j1l+2yVf6gq/fDuRkP78MMPNXz4cI+27t27eyxXBrS1a9dq2LBhdT7G2XdeKv0wzNx1111auHChpk+frsGDBys6OloOh0M33XRTteNVquujX/ziFxo6dKj+8Y9/aNWqVXr88cf16KOP6s0331R6enqd666Nyy+/XHv37tVbb72lVatW6YUXXtCf//xnLViwQLfddlut9uHrz/vsO2qVysvLfXqc86nN5w54g6AD+EhFRYXGjx+vqKgoTZ8+XY888ohuuOEGXXfddZK+DwBRUVHatWvXOffTo0cPnThxQqmpqfWu6exHJMYY7dmzx+MOQk1/6KqTlJTkniF055136sc//rEyMjJUVFSkG264QfPnz3ffxaocgC19f04rV67UN998U6u7OufzxhtvKCMjQ0888YS77dSpU+5B2LXVqVMnTZkyRVOmTFFBQYEuvfRSPfzwwzUGndjYWLVq1araR0+7d++u1THbtWunCRMmaMKECTpx4oQuv/xy3X///e6gU5fP43wqKir05ZdfeoTjzz//XJLcg8Qr75yc3XfVPTKqbW2xsbEKDw+vtk8+++wzBQUFqWvXrrXaF1BfjNEBfGTu3LnauHGjnn/+eT344IO67LLLdOedd7pnawUFBWn06NH617/+Ve2U6sr/5/qLX/xCmzZt0sqVK6tsc+zYMZWVldW6ppdfflnHjx93L7/xxhvKy8vz+EMeERFR7eOe6rRt21apqakaMmSIDh48qOuvv16pqamKiIhQcHCwJk6c6B6fU/kHVPp+XIkxRrNmzarxvOsiODi4yvueeuqpWt+FKC8vr3LOHTp0UHx8vEpLS8953LS0NC1dulQHDx50t3/66afVfl5n+/rrrz2WW7durZ49e3ocs/I7fuoa2mry9NNPu/9tjNHTTz+tFi1aaMSIEZK+vyMXHBxcZezXM888U2Vfta0tODhYV111ld566y2PR2T5+fl65ZVXNGTIEEVFRXl5RkDdcEcHqIXly5frs88+q9J+2WWX6cILL9Snn36qP/7xjxo/frxGjRol6fvvB+nfv7+mTJmi119/XZL0yCOPaNWqVbriiis0efJk/ehHP1JeXp6WLFmiDRs2qE2bNrrnnnv0z3/+Uz/72c80fvx4JScnq6SkRDt37tQbb7yh/fv3KyYmplZ1t2vXTkOGDNGECROUn5+vefPmqWfPnpo0aZJ7m+TkZC1evFiZmZlKSUlR69at3edQky1btuj06dO67LLLJEkbN25Uv379avwivuHDh+vWW2/Vk08+qS+++EJXX321Kioq9MEHH2j48OF1/n2rn/3sZ/rb3/6m6OhoJSYmatOmTXr33Xfd08/P5/jx4+rSpYtuuOEGJSUlqXXr1nr33XeVnZ3tcZeoOrNmzdKKFSs0dOhQTZkyRWVlZXrqqafUp08f7dix45zvTUxM1LBhw5ScnKx27dppy5Yt7inulZKTkyVJv/nNb5SWlqbg4GDddNNNtTqvs7Vs2VIrVqxQRkaGBg0apOXLl2vZsmX6wx/+4H7EGB0drRtvvFFPPfWUHA6HevToobfffrvKWLC61vbQQw9p9erVGjJkiKZMmaKQkBA999xzKi0t1WOPPebV+QBe8dt8L6AZONf0cv3/qbdlZWUmJSXFdOnSpcpU2Pnz5xtJZvHixe62AwcOmHHjxpnY2FgTFhZmLrzwQuN0Ok1paal7m+PHj5uZM2eanj17mtDQUBMTE2Muu+wy86c//cmcPn3aGPN/038ff/zxKnVXThl+9dVXzcyZM02HDh1Mq1atzMiRIz2mRRtjzIkTJ8zNN99s2rRpYyTVaqr5nDlzTI8ePdzLqamp552qXVZWZh5//HFz8cUXm9DQUBMbG2vS09PN1q1b3dtIqnY/Z0/T/vbbb82ECRNMTEyMad26tUlLSzOfffZZle0qP7+zp/OXlpaae+65xyQlJZnIyEgTERFhkpKSzDPPPHPeczfGmHXr1pnk5GQTGhpqLrzwQrNgwQKTlZV13unlDz30kBk4cKBp06aNadWqlbn44ovNww8/7P5MK/vprrvuMrGxscbhcLj3ea7Pu6bp5REREWbv3r3mqquuMuHh4SYuLs5kZWWZ8vJyj/cXFhaa66+/3oSHh5u2bdua22+/3ezatavKPmuqzZiq08uNMWbbtm0mLS3NtG7d2oSHh5vhw4ebjRs3emxT02dU07R3oK4cxjDSC7DN+++/r+HDh2vJkiW64YYb/F0OAPgNY3QAAIC1CDoAAMBaBB0AAGAtxugAAABrcUcHAABYi6ADAACsFfBfGFhRUaHc3FxFRkb69KvXAQBAwzHG6Pjx44qPj1dQUM33bQI+6OTm5vKbKwAANFOHDh1Sly5dalwfsEHH5XLJ5XK5fzfo0KFD/PYKAADNRHFxsbp27arIyMhzbhfws66Ki4sVHR2toqIigg4AAM1Ebf9+MxgZAABYi6ADAACsRdABAADWCtig43K5lJiYqJSUFH+XAgAAGgiDkRmMDABAs8NgZAAAEPAIOgAAwFoEHQAAYK2ADToMRgYAwH4MRmYwMgAAzQ6DkQEAQMAj6AAAAGsRdAAAgLVC/F0AAABonhJmLDvvNvvnjGyESmoWsHd0mHUFAID9AjboOJ1O5eTkKDs729+lAACABhKwQQcAANiPoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUCNugwvRwAAPsFbNBhejkAAPYL2KADAADsR9ABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGCtgA06fI8OAAD2C9igw/foAABgv4ANOgAAwH4EHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgrYANOvzWFQAA9gvYoMNvXQEAYL+ADToAAMB+BB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArBWwQcflcikxMVEpKSn+LgUAADSQgA06TqdTOTk5ys7O9ncpAACggQRs0AEAAPYj6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1mn3QOXbsmAYMGKD+/furb9+++stf/uLvkgAAQBMR4u8C6isyMlLr169XeHi4SkpK1LdvX1133XVq3769v0sDAAB+1uzv6AQHBys8PFySVFpaKmOMjDF+rgoAADQFfg8669ev16hRoxQfHy+Hw6GlS5dW2cblcikhIUEtW7bUoEGDtHnzZo/1x44dU1JSkrp06aJ77rlHMTExjVQ9AABoyvwedEpKSpSUlCSXy1Xt+sWLFyszM1NZWVnatm2bkpKSlJaWpoKCAvc2bdq00ccff6x9+/bplVdeUX5+fmOVDwAAmjC/B5309HQ99NBDGjNmTLXr586dq0mTJmnChAlKTEzUggULFB4erhdffLHKtnFxcUpKStIHH3xQ4/FKS0tVXFzs8QIAAHbye9A5l9OnT2vr1q1KTU11twUFBSk1NVWbNm2SJOXn5+v48eOSpKKiIq1fv169e/eucZ+zZ89WdHS0+9W1a9eGPQkAAOA3TTroHD16VOXl5YqLi/Noj4uL05EjRyRJBw4c0NChQ5WUlKShQ4fqrrvu0iWXXFLjPmfOnKmioiL369ChQw16DgAAwH+a/fTygQMHavv27bXePiwsTGFhYQ1XEAAAaDKa9B2dmJgYBQcHVxlcnJ+fr44dO/qpKgAA0Fw06aATGhqq5ORkrVmzxt1WUVGhNWvWaPDgwfXat8vlUmJiolJSUupbJgAAaKL8/ujqxIkT2rNnj3t537592r59u9q1a6du3bopMzNTGRkZGjBggAYOHKh58+appKREEyZMqNdxnU6nnE6niouLFR0dXd/TAAAATZDfg86WLVs0fPhw93JmZqYkKSMjQ4sWLdLYsWNVWFio++67T0eOHFH//v21YsWKKgOUAQAAzuYwAf57CZV3dIqKihQVFeXvcgAAaDYSZiw77zb754xskGPX9u93kx6jAwAAUB8BG3QYjAwAgP0CNug4nU7l5OQoOzvb36UAAIAGErBBBwAA2I+gAwAArEXQAQAA1grYoMNgZAAA7BewQYfByAAA2C9ggw4AALAfQQcAAFiLoAMAAKxF0AEAANYK2KDDrCsAAOwXsEGHWVcAANgvYIMOAACwH0EHAABYi6ADAACsRdABAADWIugAAABrBWzQYXo5AAD2C9igw/RyAADsF7BBBwAA2I+gAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgEbdPgeHQAA7BewQYfv0QEAwH4BG3QAAID9CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUCNujwExAAANgvYIMOPwEBAID9AjboAAAA+xF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFgrYIOOy+VSYmKiUlJS/F0KAABoIAEbdJxOp3JycpSdne3vUgAAQAMJ2KADAADsR9ABAADW8irofPnll76uAwAAwOe8Cjo9e/bU8OHD9d///d86deqUr2sCAADwCa+CzrZt29SvXz9lZmaqY8eOuv3227V582Zf1wYAAFAvXgWd/v37a/78+crNzdWLL76ovLw8DRkyRH379tXcuXNVWFjo6zoBAADqrF6DkUNCQnTddddpyZIlevTRR7Vnzx797ne/U9euXTVu3Djl5eX5qk4AAIA6q1fQ2bJli6ZMmaJOnTpp7ty5+t3vfqe9e/dq9erVys3N1bXXXuurOgEAAOosxJs3zZ07VwsXLtTu3bt1zTXX6OWXX9Y111yjoKDvc9MFF1ygRYsWKSEhwZe1AgAA1IlXQefZZ5/Vr3/9a40fP16dOnWqdpsOHTror3/9a72KAwAAqA+vgs4XX3xx3m1CQ0OVkZHhze4BAAB8wqsxOgsXLtSSJUuqtC9ZskQvvfRSvYsCAADwBa+CzuzZsxUTE1OlvUOHDnrkkUfqXRQAAIAveBV0Dh48qAsuuKBKe/fu3XXw4MF6FwUAAOALXgWdDh06aMeOHVXaP/74Y7Vv377eRQEAAPiCV0Hnl7/8pX7zm99o7dq1Ki8vV3l5ud577z1NmzZNN910k69rBAAA8IpXs64efPBB7d+/XyNGjFBIyPe7qKio0Lhx4xijAwAAmgyvgk5oaKgWL16sBx98UB9//LFatWqlSy65RN27d/d1fQAAAF7zKuhUuuiii3TRRRf5qhYAAACf8irolJeXa9GiRVqzZo0KCgpUUVHhsf69997zSXG1cejQId16660qKChQSEiI/vjHP+rGG29stOMDAICmy6ugM23aNC1atEgjR45U37595XA4fF1XrYWEhGjevHnq37+/jhw5ouTkZF1zzTWKiIjwW00AAKBp8CrovPbaa3r99dd1zTXX+LqeOuvUqZP797Y6duyomJgYffPNNwQdAADg3fTy0NBQ9ezZ0ycFrF+/XqNGjVJ8fLwcDoeWLl1aZRuXy6WEhAS1bNlSgwYN0ubNm6vd19atW1VeXq6uXbv6pDYAANC8eRV07r77bs2fP1/GmHoXUFJSoqSkJLlcrmrXL168WJmZmcrKytK2bduUlJSktLQ0FRQUeGz3zTffaNy4cXr++efrXRMAALCDV4+uNmzYoLVr12r58uXq06ePWrRo4bH+zTffrPW+0tPTlZ6eXuP6uXPnatKkSZowYYIkacGCBVq2bJlefPFFzZgxQ5JUWlqq0aNHa8aMGbrsssvOebzS0lKVlpa6l4uLi2tdKwAAaF68Cjpt2rTRmDFjfF1LFadPn9bWrVs1c+ZMd1tQUJBSU1O1adMmSZIxRuPHj9dPf/pT3Xrrrefd5+zZszVr1qwGqxkAADQdXgWdhQsX+rqOah09elTl5eWKi4vzaI+Li9Nnn30mSfrwww+1ePFi9evXzz2+529/+5suueSSavc5c+ZMZWZmupeLi4sZ0wMAgKW8/sLAsrIyvf/++9q7d69uvvlmRUZGKjc3V1FRUWrdurUvazynIUOGVPken3MJCwtTWFhYA1YEAACaCq+CzoEDB3T11Vfr4MGDKi0t1ZVXXqnIyEg9+uijKi0t1YIFC3xSXExMjIKDg5Wfn+/Rnp+fr44dO/rkGAAAwF5ezbqaNm2aBgwYoG+//VatWrVyt48ZM0Zr1qzxWXGhoaFKTk722GdFRYXWrFmjwYMH12vfLpdLiYmJSklJqW+ZAACgifLqjs4HH3ygjRs3KjQ01KM9ISFBhw8frtO+Tpw4oT179riX9+3bp+3bt6tdu3bq1q2bMjMzlZGRoQEDBmjgwIGaN2+eSkpK3LOwvOV0OuV0OlVcXKzo6Oh67QsAADRNXgWdiooKlZeXV2n/6quvFBkZWad9bdmyRcOHD3cvVw4UzsjI0KJFizR27FgVFhbqvvvu05EjR9S/f3+tWLGiygBlAACAszmMF9/6N3bsWEVHR+v5559XZGSkduzYodjYWF177bXq1q1bo83K8oXKOzpFRUWKiorydzkAADQbCTOWnXeb/XNGNsixa/v326s7Ok888YTS0tKUmJioU6dO6eabb9YXX3yhmJgYvfrqq14X3ZhcLpdcLle1d6YAAIAdvLqjI30/vfy1117Tjh07dOLECV166aW65ZZbPAYnNwfc0QEAwDvW3tGRpJCQEP3qV7/y9u0AAAANzqug8/LLL59z/bhx47wqBgAAwJe8CjrTpk3zWD5z5oxOnjyp0NBQhYeHE3QAAECT4NUXBn777bcerxMnTmj37t0aMmRIsxmMDAAA7OdV0KlOr169NGfOnCp3e5oqvhkZAAD7+SzoSN8PUM7NzfXlLhuM0+lUTk6OsrOz/V0KAABoIF6N0fnnP//psWyMUV5enp5++mn95Cc/8UlhAAAA9eVV0Bk9erTHssPhUGxsrH7605/qiSee8EVdAAAA9eb1b10BAAA0dT4do9OcMBgZAAD7eXVHp/IXxmtj7ty53hyiwTmdTjmdTvdXSAMAAPt4FXQ++ugjffTRRzpz5ox69+4tSfr8888VHBysSy+91L2dw+HwTZUAAABe8CrojBo1SpGRkXrppZfUtm1bSd9/ieCECRM0dOhQ3X333T4tEgAAwBtejdF54oknNHv2bHfIkaS2bdvqoYceYtYVAABoMrwKOsXFxSosLKzSXlhYqOPHj9e7KAAAAF/wKuiMGTNGEyZM0JtvvqmvvvpKX331lf7+979r4sSJuu6663xdIwAAgFe8GqOzYMEC/e53v9PNN9+sM2fOfL+jkBBNnDhRjz/+uE8LBAAA8JZXQSc8PFzPPPOMHn/8ce3du1eS1KNHD0VERPi0uIbkcrnkcrlUXl7u71IAAEADqdcXBubl5SkvL0+9evVSRESEjDG+qqvB8aOeAADYz6ug8/XXX2vEiBG66KKLdM011ygvL0+SNHHiRKaWAwCAJsOroPPb3/5WLVq00MGDBxUeHu5uHzt2rFasWOGz4gAAAOrDqzE6q1at0sqVK9WlSxeP9l69eunAgQM+KQwAAKC+vLqjU1JS4nEnp9I333yjsLCwehcFAADgC14FnaFDh+rll192LzscDlVUVOixxx7T8OHDfVYcAABAfXj16Oqxxx7TiBEjtGXLFp0+fVr33nuvPvnkE33zzTf68MMPfV0jAACAV7y6o9O3b199/vnnGjJkiK699lqVlJTouuuu00cffaQePXr4usYG4XK5lJiYqJSUFH+XAgAAGojD1PHLb86cOaOrr75aCxYsUK9evRqqrkZTXFys6OhoFRUVKSoqyt/lAADQbCTMWHbebfbPGdkgx67t3+8639Fp0aKFduzYUa/iAAAAGoNXj65+9atf6a9//auvawEAAPAprwYjl5WV6cUXX9S7776r5OTkKr9xNXfuXJ8UBwAAUB91CjpffvmlEhIStGvXLl166aWSpM8//9xjG4fD4bvqAAAA6qFOQadXr17Ky8vT2rVrJX3/kw9PPvmk4uLiGqQ4AACA+qjTGJ2zJ2gtX75cJSUlPi0IAADAV7wajFypjjPTAQAAGlWdgo7D4agyBocxOQAAoKmq0xgdY4zGjx/v/uHOU6dO6Y477qgy6+rNN9/0XYUAAABeqlPQycjI8Fj+1a9+5dNiGpPL5ZLL5VJ5ebm/SwEAAA2kzj8BYRt+AgIAAO9Y+RMQAAAAzQVBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1AjbouFwuJSYmKiUlxd+lAACABhKwQcfpdConJ0fZ2dn+LgUAADSQgA06AADAfgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsJYVQWfMmDFq27atbrjhBn+XAgAAmhArgs60adP08ssv+7sMAADQxFgRdIYNG6bIyEh/lwEAAJoYvwed9evXa9SoUYqPj5fD4dDSpUurbONyuZSQkKCWLVtq0KBB2rx5c+MXCgAAmh2/B52SkhIlJSXJ5XJVu37x4sXKzMxUVlaWtm3bpqSkJKWlpamgoKCRKwUAAM1NiL8LSE9PV3p6eo3r586dq0mTJmnChAmSpAULFmjZsmV68cUXNWPGjDofr7S0VKWlpe7l4uLiuhcNAACaBb/f0TmX06dPa+vWrUpNTXW3BQUFKTU1VZs2bfJqn7Nnz1Z0dLT71bVrV1+VCwAAmpgmHXSOHj2q8vJyxcXFebTHxcXpyJEj7uXU1FTdeOONeuedd9SlS5dzhqCZM2eqqKjI/Tp06FCD1Q8AAPzL74+ufOHdd9+t9bZhYWEKCwtrwGoAAEBT0aTv6MTExCg4OFj5+fke7fn5+erYsaOfqgIAAM1Fkw46oaGhSk5O1po1a9xtFRUVWrNmjQYPHlyvfbtcLiUmJiolJaW+ZQIAgCbK74+uTpw4oT179riX9+3bp+3bt6tdu3bq1q2bMjMzlZGRoQEDBmjgwIGaN2+eSkpK3LOwvOV0OuV0OlVcXKzo6Oj6ngYAAGiC/B50tmzZouHDh7uXMzMzJUkZGRlatGiRxo4dq8LCQt133306cuSI+vfvrxUrVlQZoAwAAHA2hzHG+LsIf6q8o1NUVKSoqCh/lwMAQLORMGPZebfZP2dkgxy7tn+/m/QYnYbEGB0AAOwXsEHH6XQqJydH2dnZ/i4FAAA0kIANOgAAwH4EHQAAYC2CDgAAsFbABh0GIwMAYL+ADToMRgYAwH4BG3QAAID9CDoAAMBaBB0AAGAtgg4AALBWwAYdZl0BAGC/gA06zLoCAMB+ARt0AACA/Qg6AADAWgQdAABgLYIOAACwFkEHAABYK2CDDtPLAQCwX8AGHaaXAwBgv4ANOgAAwH4EHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAawVs0OELAwEAsF/ABh2+MBAAAPsFbNABAAD2I+gAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYK2KDDT0AAAGC/gA06/AQEAAD2C9igAwAA7EfQAQAA1iLoAAAAaxF0AACAtQg6AADAWgQdAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGCtEH8X4C8ul0sul0vl5eX+LgUBKGHGsvNus3/OyEaoBADsFrB3dJxOp3JycpSdne3vUgAAQAMJ2KADAADsR9ABAADWIugAAABrEXQAAIC1CDoAAMBaBB0AAGAtgg4AALAWQQcAAFiLoAMAAKxF0AEAANYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAa1kRdN5++2317t1bvXr10gsvvODvcgAAQBMR4u8C6qusrEyZmZlau3atoqOjlZycrDFjxqh9+/b+Lg0AAPhZs7+js3nzZvXp00edO3dW69atlZ6erlWrVvm7LAAA0AT4PeisX79eo0aNUnx8vBwOh5YuXVplG5fLpYSEBLVs2VKDBg3S5s2b3etyc3PVuXNn93Lnzp11+PDhxigdAAA0cX4POiUlJUpKSpLL5ap2/eLFi5WZmamsrCxt27ZNSUlJSktLU0FBgVfHKy0tVXFxsccLAADYye9jdNLT05Wenl7j+rlz52rSpEmaMGGCJGnBggVatmyZXnzxRc2YMUPx8fEed3AOHz6sgQMH1ri/2bNna9asWb47gXNImLHsvNvsnzOyESqBrbjGmh8+M9QH10/d+f2OzrmcPn1aW7duVWpqqrstKChIqamp2rRpkyRp4MCB2rVrlw4fPqwTJ05o+fLlSktLq3GfM2fOVFFRkft16NChBj8PAADgH36/o3MuR48eVXl5ueLi4jza4+Li9Nlnn0mSQkJC9MQTT2j48OGqqKjQvffee84ZV2FhYQoLC2vQugEAQNPQpINObf385z/Xz3/+c3+XAQAAmpgm/egqJiZGwcHBys/P92jPz89Xx44d/VQVAABoLpp00AkNDVVycrLWrFnjbquoqNCaNWs0ePDgeu3b5XIpMTFRKSkp9S0TAAA0UX5/dHXixAnt2bPHvbxv3z5t375d7dq1U7du3ZSZmamMjAwNGDBAAwcO1Lx581RSUuKeheUtp9Mpp9Op4uJiRUdH1/c0AABAE+T3oLNlyxYNHz7cvZyZmSlJysjI0KJFizR27FgVFhbqvvvu05EjR9S/f3+tWLGiygBlAACAs/k96AwbNkzGmHNuM3XqVE2dOrWRKgIAALZo0mN0GhJjdAAAsF/ABh2n06mcnBxlZ2f7uxQAANBAAjboAAAA+xF0AACAtQg6AADAWgEbdBiMDACA/QI26DAYGQAA+/n9e3T8rfI7fIqLi32+74rSk+fdpiGOi6bPV9cG11jzw2eG+mhq148/66nc7/m+i89hzreF5b766it17drV32UAAAAvHDp0SF26dKlxfcAHnYqKCuXm5ioyMlIOh+Oc2xYXF6tr1646dOiQoqKiGqnCwEV/Nz76vHHR342PPm9cDdnfxhgdP35c8fHxCgqqeSROwD+6CgoKOmcSrE5UVBT/A2lE9Hfjo88bF/3d+OjzxtVQ/V2bH+UO2MHIAADAfgQdAABgLYJOHYSFhSkrK0thYWH+LiUg0N+Njz5vXPR346PPG1dT6O+AH4wMAADsxR0dAABgLYIOAACwFkEHAABYi6ADAACsRdABAADWCvigM3v2bKWkpCgyMlIdOnTQ6NGjtXv3bo9thg0bJofD4fG64447PLY5ePCgRo4cqfDwcHXo0EH33HOPysrKGvNUmoVnn31W/fr1c39L5uDBg7V8+XL3+lOnTsnpdKp9+/Zq3bq1rr/+euXn53vsg76um/P1Odd3w5ozZ44cDoemT5/ubuM6bzjV9TfXuG/df//9Vfrz4osvdq9vatd3wP8ExLp16+R0OpWSkqKysjL94Q9/0FVXXaWcnBxFRES4t5s0aZIeeOAB93J4eLj73+Xl5Ro5cqQ6duyojRs3Ki8vT+PGjVOLFi30yCOPNOr5NHVdunTRnDlz1KtXLxlj9NJLL+naa6/VRx99pD59+ui3v/2tli1bpiVLlig6OlpTp07Vddddpw8//FASfe2N8/W5xPXdULKzs/Xcc8+pX79+Hu1c5w2jpv6WuMZ9rU+fPnr33XfdyyEh/xcnmtz1beChoKDASDLr1q1zt11xxRVm2rRpNb7nnXfeMUFBQebIkSPutmeffdZERUWZ0tLShizXCm3btjUvvPCCOXbsmGnRooVZsmSJe92nn35qJJlNmzYZY+hrX6nsc2O4vhvK8ePHTa9evczq1as9+pjrvGHU1N/GcI37WlZWlklKSqp2XVO8vgP+0dXZioqKJEnt2rXzaP+f//kfxcTEqG/fvpo5c6ZOnjzpXrdp0yZdcskliouLc7elpaWpuLhYn3zySeMU3gyVl5frtddeU0lJiQYPHqytW7fqzJkzSk1NdW9z8cUXq1u3btq0aZMk+rq+zu7zSlzfvud0OjVy5EiP61kS13kDqam/K3GN+9YXX3yh+Ph4XXjhhbrlllt08OBBSU3z+g74R1c/VFFRoenTp+snP/mJ+vbt626/+eab1b17d8XHx2vHjh36/e9/r927d+vNN9+UJB05csTjA5PkXj5y5EjjnUAzsXPnTg0ePFinTp1S69at9Y9//EOJiYnavn27QkND1aZNG4/t4+Li3P1IX3unpj6XuL4bwmuvvaZt27YpOzu7yrojR45wnfvYufpb4hr3tUGDBmnRokXq3bu38vLyNGvWLA0dOlS7du1qktc3QecHnE6ndu3apQ0bNni0T5482f3vSy65RJ06ddKIESO0d+9e9ejRo7HLbPZ69+6t7du3q6ioSG+88YYyMjK0bt06f5dltZr6PDExkevbxw4dOqRp06Zp9erVatmypb/LsV5t+ptr3LfS09Pd/+7Xr58GDRqk7t276/XXX1erVq38WFn1eHT1/02dOlVvv/221q5dqy5dupxz20GDBkmS9uzZI0nq2LFjlRHllcsdO3ZsgGqbt9DQUPXs2VPJycmaPXu2kpKSNH/+fHXs2FGnT5/WsWPHPLbPz8939yN97Z2a+rw6XN/1s3XrVhUUFOjSSy9VSEiIQkJCtG7dOj355JMKCQlRXFwc17kPna+/y8vLq7yHa9y32rRpo4suukh79uxpkv8dD/igY4zR1KlT9Y9//EPvvfeeLrjggvO+Z/v27ZKkTp06SZIGDx6snTt3qqCgwL3N6tWrFRUV5X48gJpVVFSotLRUycnJatGihdasWeNet3v3bh08eNA9noS+9o3KPq8O13f9jBgxQjt37tT27dvdrwEDBuiWW25x/5vr3HfO19/BwcFV3sM17lsnTpzQ3r171alTp6b533GfD29uZu68804THR1t3n//fZOXl+d+nTx50hhjzJ49e8wDDzxgtmzZYvbt22feeustc+GFF5rLL7/cvY+ysjLTt29fc9VVV5nt27ebFStWmNjYWDNz5kx/nVaTNWPGDLNu3Tqzb98+s2PHDjNjxgzjcDjMqlWrjDHG3HHHHaZbt27mvffeM1u2bDGDBw82gwcPdr+fvq67c/U513fjOHvWD9d5w/phf3ON+97dd99t3n//fbNv3z7z4YcfmtTUVBMTE2MKCgqMMU3v+g74oCOp2tfChQuNMcYcPHjQXH755aZdu3YmLCzM9OzZ09xzzz2mqKjIYz/79+836enpplWrViYmJsbcfffd5syZM344o6bt17/+tenevbsJDQ01sbGxZsSIEe6QY4wx3333nZkyZYpp27atCQ8PN2PGjDF5eXke+6Cv6+Zcfc713TjODjpc5w3rh/3NNe57Y8eONZ06dTKhoaGmc+fOZuzYsWbPnj3u9U3t+nYYY4zv7xMBAAD4X8CP0QEAAPYi6AAAAGsRdAAAgLUIOgAAwFoEHQAAYC2CDgAAsBZBBwAAWIugAwAArEXQAQAA1iLoAAAAaxF0AACAtf4fiVInC+FH654AAAAASUVORK5CYII=", "text/plain": [ "

" ] @@ -372,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "82355195-bcd0-47fe-9cea-8feb680dd650", "metadata": {}, "outputs": [ @@ -386,38 +399,39 @@ " white-space: pre-wrap;\n", "}\n", "\n", - "shape: (6_154,)
excerpt
str
"No. 2008/03296/A9 2008/03350/A…
"Neutral Citation Number: [2006…
"Neutral Citation Number: [2012…
"Neutral Citation Number: [2014…
"No: 201802356 A2 Neutral Citat…
"Neutral Citation Number: [2018…
"Case No: 2002/04091/D1 Neutral…
"Neutral Citation Number: [2010…
"Case No: 200305991 D2 Neutral …
"2017/05382/B1 Neutral Citation…
" + "shape: (6_154,)
excerpt
str
"No. 2008/03296…
"Neutral Citati…
"Neutral Citati…
"Neutral Citati…
"No: 201802356 …
"Neutral Citati…
"Case No: 2002/…
"Neutral Citati…
"Case No: 20030…
"2017/05382/B1 …
" ], "text/plain": [ "shape: (6_154,)\n", "Series: 'excerpt' [str]\n", "[\n", - "\t\"No. 2008/03296/A9 2008/03350/A…\n", - "\t\"Neutral Citation Number: [2006…\n", - "\t\"Neutral Citation Number: [2012…\n", - "\t\"Neutral Citation Number: [2014…\n", - "\t\"No: 201802356 A2 Neutral Citat…\n", + "\t\"No. 2008/03296…\n", + "\t\"Neutral Citati…\n", + "\t\"Neutral Citati…\n", + "\t\"Neutral Citati…\n", + "\t\"No: 201802356 …\n", "\t…\n", - "\t\"Neutral Citation Number: [2018…\n", - "\t\"Case No: 2002/04091/D1 Neutral…\n", - "\t\"Neutral Citation Number: [2010…\n", - "\t\"Case No: 200305991 D2 Neutral …\n", - "\t\"2017/05382/B1 Neutral Citation…\n", + "\t\"Neutral Citati…\n", + "\t\"Case No: 2002/…\n", + "\t\"Neutral Citati…\n", + "\t\"Case No: 20030…\n", + "\t\"2017/05382/B1 …\n", "]" ] }, - "execution_count": 32, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# | eval: false\n", "pl_df[\"excerpt\"]" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "bb009db3", "metadata": {}, "outputs": [ @@ -438,12 +452,13 @@ "Name: excerpt, Length: 6154, dtype: float64" ] }, - "execution_count": 33, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# | eval: false\n", "pl_df[\"excerpt\"].str.strip_chars().str.len_chars().to_pandas()" ] }, @@ -458,21 +473,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" } }, "nbformat": 4, From 5aaa57d92c84982cc3c80f0254a4715c41689d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Mon, 3 Jun 2024 09:47:13 +0000 Subject: [PATCH 4/7] add more raw data to dvc --- data/datasets/en/.gitignore | 2 ++ data/datasets/en/csv.dvc | 6 ++++++ data/datasets/en/xml.dvc | 6 ++++++ 3 files changed, 14 insertions(+) create mode 100644 data/datasets/en/csv.dvc create mode 100644 data/datasets/en/xml.dvc diff --git a/data/datasets/en/.gitignore b/data/datasets/en/.gitignore index 639bb05..8cf34f2 100644 --- a/data/datasets/en/.gitignore +++ b/data/datasets/en/.gitignore @@ -1,2 +1,4 @@ /england_wales_data_refined_7.jsonl /en_judgements_dataset +/xml +/csv diff --git a/data/datasets/en/csv.dvc b/data/datasets/en/csv.dvc new file mode 100644 index 0000000..4363988 --- /dev/null +++ b/data/datasets/en/csv.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 9dd651ab42dcab35b1431c4163a041ba.dir + size: 583602 + nfiles: 1 + hash: md5 + path: csv diff --git a/data/datasets/en/xml.dvc b/data/datasets/en/xml.dvc new file mode 100644 index 0000000..96fd344 --- /dev/null +++ b/data/datasets/en/xml.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 9203a565235f9431cc3beda483b5f727.dir + size: 75196782 + nfiles: 1 + hash: md5 + path: xml From 7367300c6622d1488e37fba633dcb47c98e5c591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Mon, 3 Jun 2024 09:58:01 +0000 Subject: [PATCH 5/7] fix nbdev --- nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb b/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb index b40a886..717cf10 100644 --- a/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb +++ b/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb @@ -15,6 +15,7 @@ "metadata": {}, "outputs": [], "source": [ + "# | eval: false\n", "import json\n", "import string\n", "from datasets import Dataset, DatasetDict, load_from_disk\n", @@ -31,6 +32,7 @@ "metadata": {}, "outputs": [], "source": [ + "# | eval: false\n", "path_ = DATA_PATH / \"datasets\" / \"en\"\n", "jsonl_file = path_ / \"england_wales_data_refined_7.jsonl\"\n", "dataset_path = path_ / \"en_judgements_dataset\"" From e87b20795c8a96772f31193cf4c2ea664a46f722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Mon, 3 Jun 2024 10:37:28 +0000 Subject: [PATCH 6/7] fix ruff --- .../england_wales/01_extract_jsonl_refined.py | 47 +++++-------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/scripts/england_wales/01_extract_jsonl_refined.py b/scripts/england_wales/01_extract_jsonl_refined.py index 7604b17..e4289d5 100644 --- a/scripts/england_wales/01_extract_jsonl_refined.py +++ b/scripts/england_wales/01_extract_jsonl_refined.py @@ -45,9 +45,7 @@ def extract_and_clean_judges(paragraphs): judges = [] for para in paragraphs: text = para.get_text(strip=True) - if re.search( - r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", text, re.IGNORECASE - ): + if re.search(r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", text, re.IGNORECASE): # Remove text within parentheses cleaned_text = re.sub(r"\([^)]*\)", "", text).strip() # Remove dashes and any text following them @@ -82,9 +80,7 @@ def categorize_court(court_name): def extract_information_from_xml(xml_content, file_name): - soup = BeautifulSoup( - xml_content, "xml" - ) # Using 'xml' parser for handling namespaces + soup = BeautifulSoup(xml_content, "xml") # Using 'xml' parser for handling namespaces # Extract required fields _id = soup.find("uk:hash").text if soup.find("uk:hash") else None @@ -119,9 +115,7 @@ def extract_information_from_xml(xml_content, file_name): excerpt = header_text[:500] # Get the full content of the header and judgment body as text - header_content = ( - soup.header.get_text(separator="\n", strip=True) if soup.header else "" - ) + header_content = soup.header.get_text(separator="\n", strip=True) if soup.header else "" judgment_body_content = ( soup.find("judgmentBody").get_text(separator="\n", strip=True) if soup.find("judgmentBody") @@ -145,9 +139,7 @@ def extract_information_from_xml(xml_content, file_name): judges = [ judge for judge in judges - if re.search( - r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", judge, re.IGNORECASE - ) + if re.search(r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", judge, re.IGNORECASE) ] # If no judges found, get text from elements @@ -162,32 +154,25 @@ def extract_information_from_xml(xml_content, file_name): # If still no judges found, look for text in

tags with style="text-align:center" if not judges: - centered_paragraphs = soup.find_all( - "p", style=lambda x: x and "text-align:center" in x - ) + centered_paragraphs = soup.find_all("p", style=lambda x: x and "text-align:center" in x) judges.extend(extract_and_clean_judges(centered_paragraphs)) # If still no judges found, look for text in

tags with style="text-align:right" if not judges: - right_aligned_paragraphs = soup.find_all( - "p", style=lambda x: x and "text-align:right" in x - ) + right_aligned_paragraphs = soup.find_all("p", style=lambda x: x and "text-align:right" in x) judges.extend(extract_and_clean_judges(right_aligned_paragraphs)) # Filter judges using regex criteria judges = [ judge for judge in judges - if re.search( - r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", judge, re.IGNORECASE - ) + if re.search(r"\bJustice\b|\bJudge\b|\bSIR\b|\bHonour\b|\bHHJ\b", judge, re.IGNORECASE) ] # Extract URIs xml_uri = ( soup.find("FRBRManifestation").find("FRBRuri")["value"] - if soup.find("FRBRManifestation") - and soup.find("FRBRManifestation").find("FRBRuri") + if soup.find("FRBRManifestation") and soup.find("FRBRManifestation").find("FRBRuri") else None ) uri = ( @@ -199,16 +184,12 @@ def extract_information_from_xml(xml_content, file_name): # Extract legislation texts legislation_tags = soup.find_all("ref", {"uk:type": "legislation"}) legislation_texts = set(tag.get_text() for tag in legislation_tags) - legislation_list = list( - legislation_texts - ) # Convert set to list to remove duplicates + legislation_list = list(legislation_texts) # Convert set to list to remove duplicates # Extract case references case_tags = soup.find_all("ref", {"uk:type": "case"}) case_references = set(tag.get_text() for tag in case_tags) - case_references_list = list( - case_references - ) # Convert set to list to remove duplicates + case_references_list = list(case_references) # Convert set to list to remove duplicates # Extract case numbers case_numbers = set() @@ -226,9 +207,7 @@ def extract_information_from_xml(xml_content, file_name): # If no case numbers found, look for text in

tags with style="text-align:right" if not case_numbers: - right_aligned_paragraphs = soup.find_all( - "p", style=lambda x: x and "text-align:right" in x - ) + right_aligned_paragraphs = soup.find_all("p", style=lambda x: x and "text-align:right" in x) case_no_pattern = re.compile(r"\b\d{4}/\d{4}/\w+\b|\d{6}") for tag in right_aligned_paragraphs: matches = case_no_pattern.findall(tag.get_text()) @@ -273,9 +252,7 @@ def process_file(file_path): def process_directory(directory_path, output_file): xml_files = [ - os.path.join(directory_path, f) - for f in os.listdir(directory_path) - if f.endswith(".xml") + os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith(".xml") ] with Pool() as pool, open(output_file, "w") as jsonl_file: From c7ceac46b23f35ddbb5384d86f063b920b31846c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Augustyniak?= Date: Mon, 3 Jun 2024 10:40:39 +0000 Subject: [PATCH 7/7] reformat --- ...gements_Texts.ipynb => 03_Analyze_En_Judgements_Texts.ipynb} | 0 .../02_Analyse_En_Dataset.ipynb => 04_Analyse_En_Dataset.ipynb} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename nbs/Data/{england-wales/01_Analyze_En_Judgements_Texts.ipynb => 03_Analyze_En_Judgements_Texts.ipynb} (100%) rename nbs/Data/{england-wales/02_Analyse_En_Dataset.ipynb => 04_Analyse_En_Dataset.ipynb} (99%) diff --git a/nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb b/nbs/Data/03_Analyze_En_Judgements_Texts.ipynb similarity index 100% rename from nbs/Data/england-wales/01_Analyze_En_Judgements_Texts.ipynb rename to nbs/Data/03_Analyze_En_Judgements_Texts.ipynb diff --git a/nbs/Data/england-wales/02_Analyse_En_Dataset.ipynb b/nbs/Data/04_Analyse_En_Dataset.ipynb similarity index 99% rename from nbs/Data/england-wales/02_Analyse_En_Dataset.ipynb rename to nbs/Data/04_Analyse_En_Dataset.ipynb index 25da298..dd7c26c 100644 --- a/nbs/Data/england-wales/02_Analyse_En_Dataset.ipynb +++ b/nbs/Data/04_Analyse_En_Dataset.ipynb @@ -5,7 +5,7 @@ "id": "a98d226c", "metadata": {}, "source": [ - "# Analyse Polish Dataset\n" + "# Analyse England and Wales Dataset\n" ] }, {