From 65b40a59412295deb1db06cee8a15549b54f2797 Mon Sep 17 00:00:00 2001 From: Frank Schneider Date: Tue, 30 Jul 2024 13:04:15 +0200 Subject: [PATCH] Quality of Life improvements --- scoring/performance_profile.py | 26 ++++++++++++++------------ scoring/score_submissions.py | 7 ++----- scoring/scoring_utils.py | 2 +- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index 31106f057..391a07927 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -26,20 +26,19 @@ the dictionary of submissions. """ import itertools -import logging import operator import os import re -from absl import logging import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd +from absl import logging from tabulate import tabulate -from algorithmic_efficiency.workloads.workloads import get_base_workload_name import algorithmic_efficiency.workloads.workloads as workloads_registry +from algorithmic_efficiency.workloads.workloads import get_base_workload_name from scoring import scoring_utils WORKLOADS = workloads_registry.WORKLOADS @@ -184,10 +183,10 @@ def get_workloads_time_to_target(submission, if strict: raise ValueError( f'Expecting {NUM_BASE_WORKLOADS + NUM_VARIANT_WORKLOADS} workloads ' - f'but found {num_workloads} workloads.') + f'but found {num_workloads} workloads for {submission_name}.') logging.warning( f'Expecting {NUM_BASE_WORKLOADS + NUM_VARIANT_WORKLOADS} workloads ' - f'but found {num_workloads} workloads.') + f'but found {num_workloads} workloads for {submission_name}.') # For each workload get submission time get the submission times to target. for workload, group in submission.groupby('workload'): @@ -198,11 +197,13 @@ def get_workloads_time_to_target(submission, num_studies = len(group.groupby('study')) if num_studies != NUM_STUDIES: if strict: - raise ValueError(f'Expecting {NUM_STUDIES} trials for workload ' - f'{workload} but found {num_studies} trials.') + raise ValueError(f'Expecting {NUM_STUDIES} studies for workload ' + f'{workload} but found {num_studies} studies ' + f'for {submission_name}.') else: - logging.warning(f'Expecting {NUM_STUDIES} trials for workload ' - f'{workload} but found {num_studies} trials.') + logging.warning(f'Expecting {NUM_STUDIES} studies for workload ' + f'{workload} but found {num_studies} studies ' + f'for {submission_name}.') # For each study check trials for study, group in group.groupby('study'): @@ -213,11 +214,13 @@ def get_workloads_time_to_target(submission, if strict: raise ValueError( f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' - f'{workload} but found {num_trials} trials.') + f'{workload} but found {num_trials} trials ' + f'for {submission_name}.') else: logging.warning( f'In Study {study}: Expecting {NUM_TRIALS} trials for workload ' - f'{workload} but found {num_trials} trials.') + f'{workload} but found {num_trials} trials ' + f'for {submission_name}.') # Get trial and time index that reaches target trial_idx, time_idx = get_best_trial_index( @@ -309,7 +312,6 @@ def compute_performance_profiles(submissions, df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf # For each held-out workload if variant target was not hit set submission to inf - framework = None for workload in df.keys(): if workload not in BASE_WORKLOADS: # If variants do not have finite score set base_workload score to inf diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index bbc23a1fc..bc2340029 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -12,18 +12,15 @@ --compute_performance_profiles """ -import json import operator import os import pickle -from absl import app -from absl import flags -from absl import logging import numpy as np import pandas as pd import performance_profile import scoring_utils +from absl import app, flags, logging from tabulate import tabulate flags.DEFINE_string( @@ -136,7 +133,7 @@ def get_submission_summary(df, include_test_split=True): return df -def compute_leaderboard_score(df, normalize=False): +def compute_leaderboard_score(df, normalize=True): """Compute leaderboard score by taking integral of performance profile. Args: diff --git a/scoring/scoring_utils.py b/scoring/scoring_utils.py index 0dd997ab9..fd8b0b2c3 100644 --- a/scoring/scoring_utils.py +++ b/scoring/scoring_utils.py @@ -4,8 +4,8 @@ import os import re -from absl import logging import pandas as pd +from absl import logging import algorithmic_efficiency.workloads.workloads as workloads_registry