Skip to content

Commit

Permalink
add logging of numerical results in experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
jaivardhankapoor committed Feb 5, 2024
1 parent cfd3cb9 commit 2a9ce77
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 22 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,6 @@ figures/

.idea/

secrets.py
secrets.py

results/
1 change: 1 addition & 0 deletions configs/conf_default.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

exp_log_name: "default" # optional but recommended
data: "random"
experiments: ["ScaleDimKL"]
n: 10000
Expand Down
33 changes: 24 additions & 9 deletions labproject/experiments.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import torch
from metrics import sliced_wasserstein_distance, gaussian_kl_divergence
from plotting import plot_scaling_metric_dimensionality
import pickle


class Experiment:
Expand All @@ -9,32 +10,46 @@ def __init__(self):

def run_experiment(self, metric, dataset1, dataset2):
raise NotImplementedError("Subclasses must implement this method")

def plot_experiment(self):
raise NotImplementedError("Subclasses must implement this method")


def log_results(self, results, log_path):
raise NotImplementedError("Subclasses must implement this method")


class ScaleDim(Experiment):

def __init__(self, metric_name, metric_fn, min_dim=1, max_dim=1000, step=100):
self.metric_name = metric_name
self.metric_fn = metric_fn
self.dimensionality = list(range(min_dim, max_dim, step))
super().__init__()

def run_experiment(self, dataset1, dataset2):
distances = []
for d in self.dimensionality:
distances.append(self.metric_fn(dataset1[:, :d], dataset2[:, :d]))
return self.dimensionality, distances

def plot_experiment(self, dimensionality, distances, dataset_name):
plot_scaling_metric_dimensionality(dimensionality, distances, self.metric_name, dataset_name)

plot_scaling_metric_dimensionality(
dimensionality, distances, self.metric_name, dataset_name
)

def log_results(self, results, log_path):
"""
Save the results to a file.
"""
with open(log_path, "wb") as f:
pickle.dump(results, f)


class ScaleDimKL(ScaleDim):
def __init__(self):
super().__init__("KL", gaussian_kl_divergence, min_dim=2)



class ScaleDimSW(ScaleDim):
def __init__(self):
super().__init__("Sliced Wasserstein", sliced_wasserstein_distance)
super().__init__("Sliced Wasserstein", sliced_wasserstein_distance)
44 changes: 32 additions & 12 deletions labproject/run_default.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,61 @@

from labproject.utils import set_seed, get_cfg
from labproject.data import get_dataset
from labproject.experiments import *


import time
import datetime
import os


def get_log_path(cfg):
"""
Get the log path for the current experiment run.
This log path is then used to save the numerical results of the experiment.
Import this function in the run_{name}.py file and call it to get the log path.
"""

# get datetime string
now = datetime.datetime.now()
if "exp_log_name" not in cfg:
exp_log_name = now.strftime("%Y-%m-%d_%H-%M-%S")
else:
exp_log_name = cfg.exp_log_name
# add datetime to the name
exp_log_name = exp_log_name + "_" + now.strftime("%Y-%m-%d_%H-%M-%S")
log_path = os.path.join(f"results/{cfg.running_user}/{exp_log_name}.pkl")
return log_path


if __name__ == "__main__":



print("Running experiments...")
cfg = get_cfg()
seed = cfg.seed

set_seed(seed)
print(f"Seed: {seed}")
print(f"Experiments: {cfg.experiments}")
print(f"Data: {cfg.data}")

dataset_fn = get_dataset(cfg.data)



for exp_name in cfg.experiments:
experiment = globals()[exp_name]()
time_start = time.time()
dataset1 = dataset_fn(cfg.n, cfg.d)
dataset2 = dataset_fn(cfg.n, cfg.d)

output = experiment.run_experiment(
dataset1=dataset1, dataset2=dataset2
)
output = experiment.run_experiment(dataset1=dataset1, dataset2=dataset2)
time_end = time.time()
print(f"Experiment {exp_name} finished in {time_end - time_start}")
experiment.plot_experiment(*output, cfg.data)


log_path = get_log_path(cfg)
os.makedirs(os.path.dirname(log_path), exist_ok=True)
experiment.log_results(output, log_path)
print(f"Numerical results saved to {log_path}")

experiment.plot_experiment(*output, cfg.data)
print(f"Plots saved to {cfg.data}.png")

print("Finished running experiments.")

0 comments on commit 2a9ce77

Please sign in to comment.