Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH semi-automatic parser #141

Merged
merged 3 commits into from
Nov 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions jsonParser/semi-automatic/converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import aspose.pdf as pdf
import os
import click
from glob import glob

@click.command()
@click.option('--input', '-i', help='Input path with PDF files', required=True)
def main(input:str):
if not os.path.isdir(input):
raise FileNotFoundError(f"'{input}' is not a folder or does not exist")
folder = input
folders = glob(f"{folder}/*.pdf")
for file_pdf in folders:
output_file = os.path.join(file_pdf.replace(".pdf", ".csv"))
print(f"Converting {file_pdf} to {output_file}...")
convert_PDF_to_CSV(file_pdf, output_file)

def convert_PDF_to_CSV(infile:str, outfile:str):
# Load input PDF document
document = pdf.Document(infile)

# Initialize the ExcelSaveOptions
excelSaveOptions = pdf.ExcelSaveOptions()

# Set CSV format
excelSaveOptions.format= pdf.ExcelSaveOptions.ExcelFormat.CSV

# Convert the PDF to Comma-Separated Values
document.save(outfile, excelSaveOptions)

print("Rendering process completed")

if __name__ == "__main__":
main()
66 changes: 52 additions & 14 deletions jsonParser/semi-automatic/parse_election_results.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,39 @@
import csv
import json
import click
import os
import csv
from typing import List
from glob import glob

@click.command()
@click.option('--input', '-i', help='Input CSV file', required=True)
@click.option('--output', '-o', help='Output JSON file', required=True)
@click.option('--input', '-i', help='Input CSV file or path with CSV files', required=True)
@click.option('--output', '-o', help='Output JSON file or folder', required=True)
def main(input:str, output:str):
check_file_exists(input)

input_files = []
output_files = []
if os.path.isdir(input):
input_files = glob(input + "/*.csv")
if len(input_files) == 0:
input_files = glob(input + "/*/*.csv")
if len(input_files) == 0:
raise FileNotFoundError(f"No CSV files found in '{input}'")
for file in input_files:
output_files.append(os.path.join(output, file.split("/")[-1].replace(".csv", ".json")))
else:
input_files.append(input)
output_files.append(output)

for input, output in zip(input_files, output_files):
print(f"Creating JSON file '{output}' from CSV file '{input}'...")
create_json_file(input, output)
print("#"*50)

def create_json_file(input:str, output:str) -> None:
# Read the CSV file
with open(input, 'r', encoding='utf-8-sig') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=';')
csv_reader = csv.reader(csv_file, delimiter=',')
rows_list = create_list(csv_reader)

# Create the dictionary that will contain the data
Expand Down Expand Up @@ -44,13 +66,12 @@ def main(input:str, output:str):
# Save the JSON data to a file
with open(output, 'w', encoding='utf-8') as json_file:
json_file.write(json_output)

def check_file_exists(file_path:str) -> None:
if not os.path.isfile(file_path):
if not os.path.isfile(file_path) and not os.path.isdir(file_path):
raise FileNotFoundError(f"File '{file_path}' not found")

def create_list(csv_reader:List[str]) -> List[str]:

"""Create a list of rows from the CSV file

Args:
Expand All @@ -60,10 +81,24 @@ def create_list(csv_reader:List[str]) -> List[str]:
list: The list of rows of the CSV file
"""
rows_list = []
for row in csv_reader:
# Replace \xa0 with a space in the entire row
row = [cell.replace('\xa0', ' ') for cell in row]
rows_list.append(row)
for line in csv_reader:
for i in range(len(line)):
line[i] = line[i].replace("\"", "").replace("\n", "").replace("\xa0", " ").replace("Evaluation Only. Created with Aspose.PDF. Copyright 2002-2023 Aspose Pty Ltd.", "")
if len(line) == 1 and (line[0] == "" or line[0] == "VOTI DI LISTA" or line[0] == "BIENNIO 2023/2025" or "ELEZIONI RAPPRESENTANTI" in line[0]):
continue
if line[0] == "VOTI DI LISTA" or line[0] == "BIENNIO 2023/2025" or "ELEZIONI RAPPRESENTANTI" in line[0]:
continue
line = [x.strip() for x in line if x.strip() != "" and "aequo" not in x.strip()]
if len(line) == 0:
continue
if "DIPARTIMENTO" in line[0]:
l = " ".join(line)
line = [l.split("-")[0].strip()]
if len(line) > 1:
if line[1] in line[0]:
line[0] = line[0].replace(line[1], "")
line.append(line[1])
rows_list.append(line)
return rows_list

def get_name_and_seats(rows_list:list, data:dict) -> list:
Expand All @@ -77,8 +112,10 @@ def get_name_and_seats(rows_list:list, data:dict) -> list:
list: The list of rows of the CSV file
"""
row = rows_list[0]
print(row)
data["dipartimento"] = str(row[0])
row = rows_list[1]
print(row)
data["seggi_da_assegnare"] = row[1]
rows_list = rows_list[4:]
return rows_list
Expand All @@ -100,6 +137,7 @@ def get_list_information(rows_list: list, data: dict) -> list:
if row[0].strip() == "TOTALE":
data["liste"].append({"totale": int(row[1].strip())})
break
print(row)
lista = {
"nome": str(row[0].strip()),
"seggi": {
Expand Down Expand Up @@ -142,7 +180,7 @@ def get_votation_information(rows_list:list, data:dict) -> list:
elif row[0].strip() == "VOTANTI":
data["votanti"] = {
"totali": int(row[1].strip()),
"percentuale": float(row[4].strip().replace(",", ".")),
"percentuale": float(row[3].strip().replace(",", ".")),
"seggio_n_telematico": int(row[-1].strip())
}
elif row[0].strip() == "TOTALE ELETTORI AVENTI DIRITTO":
Expand Down Expand Up @@ -176,10 +214,10 @@ def get_candidates_information(rows_list:list, data:dict) -> None:
"lista": list_name,
"voti": {
"totali": int(row[1].strip()),
"seggio_telematico": int(row[-4].strip())
"seggio_telematico": int(row[-1].strip())
}
}
if "ELETTO" in row[3].strip():
if "ELETTO" in row:
data["eletti"].append(candidate)
else:
data["non_eletti"].append(candidate)
Expand Down
119 changes: 119 additions & 0 deletions src/data/2023-2025/Corso di Laurea/Beni_culturali.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{
"schede": {
"bianche": {
"totali": 5,
"seggio_n_telematico": 5
},
"nulle": {
"totali": 0
},
"contestate": {
"totali": 0
}
},
"liste": [
{
"nome": "Beni Culturali Disum",
"seggi": {
"seggi_pieni": "9",
"resti": "0,0",
"seggi_ai_resti": "0",
"seggi_totali": "9"
},
"voti": {
"totali": "127",
"seggio_telematico": "127"
}
},
{
"totale": 127
}
],
"eletti": [
{
"nominativo": "SANFILIPPO Lorenzo",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 34,
"seggio_telematico": 34
}
},
{
"nominativo": "PETRALIA Salvatore Giuseppe",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 34,
"seggio_telematico": 34
}
},
{
"nominativo": "CUSUMANO Francesca Maria",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 13,
"seggio_telematico": 13
}
},
{
"nominativo": "CINO Anastasia",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 2,
"seggio_telematico": 2
}
},
{
"nominativo": "RUGGIERI Edith Maria Gae",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 5,
"seggio_telematico": 5
}
},
{
"nominativo": "DI STEFANO Agnese Pia",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 2,
"seggio_telematico": 2
}
},
{
"nominativo": "ASSENNATO Concetta Ambra",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 8,
"seggio_telematico": 8
}
},
{
"nominativo": "LO PIERO William",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 16,
"seggio_telematico": 16
}
},
{
"nominativo": "BENTIVEGNA Riccardo",
"lista": "Beni Culturali Disum",
"voti": {
"totali": 9,
"seggio_telematico": 9
}
}
],
"non_eletti": [],
"dipartimento": "BENI CULTURALI-L-1-Laurea Triennale (D.M.270/2004)",
"seggi_da_assegnare": "9",
"quoziente": 14.111,
"votanti": {
"totali": 132,
"percentuale": 21.09,
"seggio_n_telematico": 132
},
"elettori": {
"totali": 626,
"seggio_n_telematico": 626
}
}
Loading