Skip to content

Commit

Permalink
Merge pull request #776 from fsschneider/scoring_QoL
Browse files Browse the repository at this point in the history
Scoring: Quality of Life improvements
  • Loading branch information
priyakasimbeg authored Jul 30, 2024
2 parents ec434a9 + 9b6c845 commit 38554d1
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
> [!IMPORTANT]
> Submitters are no longer required to self-report results.
> We are currently in the process of evaluating and scoring received submissions.
> We are aiming to release results by July 15th 2024.
> Results coming soon!
> For other key dates please see [Call for Submissions](CALL_FOR_SUBMISSIONS.md).
## Table of Contents <!-- omit from toc -->
Expand Down
22 changes: 12 additions & 10 deletions scoring/performance_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
the dictionary of submissions.
"""
import itertools
import logging
import operator
import os
import re
Expand Down Expand Up @@ -184,10 +183,10 @@ def get_workloads_time_to_target(submission,
if strict:
raise ValueError(
f'Expecting {NUM_BASE_WORKLOADS + NUM_VARIANT_WORKLOADS} workloads '
f'but found {num_workloads} workloads.')
f'but found {num_workloads} workloads for {submission_name}.')
logging.warning(
f'Expecting {NUM_BASE_WORKLOADS + NUM_VARIANT_WORKLOADS} workloads '
f'but found {num_workloads} workloads.')
f'but found {num_workloads} workloads for {submission_name}.')

# For each workload get submission time get the submission times to target.
for workload, group in submission.groupby('workload'):
Expand All @@ -198,11 +197,13 @@ def get_workloads_time_to_target(submission,
num_studies = len(group.groupby('study'))
if num_studies != NUM_STUDIES:
if strict:
raise ValueError(f'Expecting {NUM_STUDIES} trials for workload '
f'{workload} but found {num_studies} trials.')
raise ValueError(f'Expecting {NUM_STUDIES} studies for workload '
f'{workload} but found {num_studies} studies '
f'for {submission_name}.')
else:
logging.warning(f'Expecting {NUM_STUDIES} trials for workload '
f'{workload} but found {num_studies} trials.')
logging.warning(f'Expecting {NUM_STUDIES} studies for workload '
f'{workload} but found {num_studies} studies '
f'for {submission_name}.')

# For each study check trials
for study, group in group.groupby('study'):
Expand All @@ -213,11 +214,13 @@ def get_workloads_time_to_target(submission,
if strict:
raise ValueError(
f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
f'{workload} but found {num_trials} trials.')
f'{workload} but found {num_trials} trials '
f'for {submission_name}.')
else:
logging.warning(
f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
f'{workload} but found {num_trials} trials.')
f'{workload} but found {num_trials} trials '
f'for {submission_name}.')

# Get trial and time index that reaches target
trial_idx, time_idx = get_best_trial_index(
Expand Down Expand Up @@ -309,7 +312,6 @@ def compute_performance_profiles(submissions,
df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf

# For each held-out workload if variant target was not hit set submission to inf
framework = None
for workload in df.keys():
if workload not in BASE_WORKLOADS:
# If variants do not have finite score set base_workload score to inf
Expand Down
3 changes: 1 addition & 2 deletions scoring/score_submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
--compute_performance_profiles
"""

import json
import operator
import os
import pickle
Expand Down Expand Up @@ -136,7 +135,7 @@ def get_submission_summary(df, include_test_split=True):
return df


def compute_leaderboard_score(df, normalize=False):
def compute_leaderboard_score(df, normalize=True):
"""Compute leaderboard score by taking integral of performance profile.
Args:
Expand Down

0 comments on commit 38554d1

Please sign in to comment.