Skip to content

Commit

Permalink
collect results from workdirs run at different timestamps
Browse files Browse the repository at this point in the history
  • Loading branch information
chandramouli-sastry committed Oct 9, 2023
1 parent 4131232 commit e7abddb
Showing 1 changed file with 41 additions and 33 deletions.
74 changes: 41 additions & 33 deletions scoring/scoring_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import glob
import json
import os
import re
Expand All @@ -9,6 +10,7 @@
METRICS_LINE_REGEX = '(.*) Metrics: ({.*})'
TRIAL_DIR_REGEX = 'trial_(\d+)'
MEASUREMENTS_FILENAME = 'eval_measurements.csv'
TIMESTAMP = r"-\d{4}(-\d{2}){5}"


#### File IO helper functions ###
Expand Down Expand Up @@ -137,7 +139,9 @@ def get_experiment_df(experiment_dir):
scoring.compute_performance_profiles.
Args:
experiment_dir: path to experiment directory containing
results for workloads.
results for workloads. Measurements from experiments
sharing the same prefix but different timestamps are
collected together.
The directory structure is assumed to be:
+ experiment_dir
+ <workload>
Expand All @@ -148,38 +152,42 @@ def get_experiment_df(experiment_dir):
df: DataFrame where indices are trials, columns are
metric names and values are lists.
e.g
+----+-----------+---------+--------------------+--------------------+
| | workload | trial | validation/accuracy| score |
|----+-----------+---------+--------------------+--------------------|
| 0 | mnist_jax | trial_1 | [0.0911, 0.0949] | [10.6396, 10.6464] |
+----+-----------+---------+--------------------+--------------------+
+----+-----------+-----------------------------+--------------------+--------------------+
| | workload | trial | validation/accuracy| score |
|----+-----------+-----------------------------+--------------------+--------------------|
| 0 | mnist_jax | (trial_1, <experiment_dir>) | [0.0911, 0.0949] | [10.6396, 10.6464] |
+----+-----------+-----------------------------+--------------------+--------------------+
"""
df = pd.DataFrame()
workload_dirs = os.listdir(experiment_dir)
for workload in workload_dirs:
data = {
'workload': workload,
}
trial_dirs = [
t for t in os.listdir(os.path.join(experiment_dir, workload))
if re.match(TRIAL_DIR_REGEX, t)
]
for trial in trial_dirs:
eval_measurements_filepath = os.path.join(
experiment_dir,
workload,
trial,
MEASUREMENTS_FILENAME,
)
try:
trial_df = pd.read_csv(eval_measurements_filepath)
except FileNotFoundError as e:
logging.info(f'Could not read {eval_measurements_filepath}')
continue
data['trial'] = trial
for column in trial_df.columns:
values = trial_df[column].to_numpy()
data[column] = values
trial_df = pd.DataFrame([data])
df = pd.concat([df, trial_df], ignore_index=True)
paths = filter(
lambda x: re.match(experiment_dir + TIMESTAMP, x) or x == experiment_dir,
glob.glob(f"{experiment_dir}*"))
for experiment_dir in list(paths):
workload_dirs = os.listdir(experiment_dir)
for workload in workload_dirs:
data = {
'workload': workload,
}
trial_dirs = [
t for t in os.listdir(os.path.join(experiment_dir, workload))
if re.match(TRIAL_DIR_REGEX, t)
]
for trial in trial_dirs:
eval_measurements_filepath = os.path.join(
experiment_dir,
workload,
trial,
MEASUREMENTS_FILENAME,
)
try:
trial_df = pd.read_csv(eval_measurements_filepath)
except FileNotFoundError as e:
logging.info(f'Could not read {eval_measurements_filepath}')
continue
data['trial'] = (trial, experiment_dir)
for column in trial_df.columns:
values = trial_df[column].to_numpy()
data[column] = values
trial_df = pd.DataFrame([data])
df = pd.concat([df, trial_df], ignore_index=True)
return df

0 comments on commit e7abddb

Please sign in to comment.