diff --git a/grader.py b/grader.py index a708e10..6fbae25 100644 --- a/grader.py +++ b/grader.py @@ -9,7 +9,7 @@ import requests -from utility import execute_system_call, find_emails, extract_link, unzip +from utility import execute_system_call, find_emails, extract_link, unzip, remove_duplicates NULL_EMAIL = 'null___@null__.___' @@ -227,7 +227,7 @@ def grade(self, hw_str='hw00', output_file='grades.csv'): """ # create a file to store the grades with open(output_file, 'w', encoding='utf-8') as grades_file: - grades_file.write('Name, ID, Email, Language, Score\n') + grades_file.write('Name,ID,Email,Language,Score\n') # Unzip the submission file if not os.path.exists(self.submission_dir): print('Unzipping the submission file ...') @@ -257,4 +257,10 @@ def grade(self, hw_str='hw00', output_file='grades.csv'): cnt_passes, email, student_code = self.grade_exception_file(hw_str, student_dir) data.append( (cnt_passes, email, student_code)) self.output(grades_file, i, student_info, data) + + grades_file.close() + print(f'\nGrades saved in {output_file}\n') + print('Cleaning up ...') + remove_duplicates(output_file) + print('============== Grading completed! =============\n\n') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2532eca..c4b8db7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests bs4 -numpy \ No newline at end of file +numpy +pandas \ No newline at end of file diff --git a/utility.py b/utility.py index 9933880..6653c1e 100644 --- a/utility.py +++ b/utility.py @@ -7,6 +7,7 @@ import re import zipfile import subprocess +import pandas as pd from bs4 import BeautifulSoup def execute_system_call(command): @@ -61,3 +62,17 @@ def unzip(file, file_dir, skip_dir=True): continue zip_info.filename = os.path.basename(zip_info.filename) zip_ref.extract(zip_info, file_dir) + +def remove_duplicates(csv_file): + """ + Remove duplicate IDs (remain the maximum score value) in the CSV file with panda + """ + # Read the CSV file + df = pd.read_csv(csv_file) + + # Remove duplicate IDs (remain the maximum score value) + df = df.sort_values('Score', ascending=False).drop_duplicates('ID').sort_index() + + # Write the updated data to the CSV file + df.to_csv(csv_file, index=False) + \ No newline at end of file