add logging of numerical results in experiments

mackelab · Feb 5, 2024 · 2a9ce77 · 2a9ce77
1 parent cfd3cb9
commit 2a9ce77
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 22 deletions.
diff --git a/.gitignore b/.gitignore
@@ -148,4 +148,6 @@ figures/
 
 .idea/
 
-secrets.py
+secrets.py
+
+results/
diff --git a/configs/conf_default.yaml b/configs/conf_default.yaml
@@ -1,4 +1,5 @@
 
+exp_log_name: "default" # optional but recommended
 data: "random" 
 experiments: ["ScaleDimKL"]
 n: 10000

diff --git a/labproject/experiments.py b/labproject/experiments.py
@@ -1,6 +1,7 @@
 import torch
 from metrics import sliced_wasserstein_distance, gaussian_kl_divergence
 from plotting import plot_scaling_metric_dimensionality
+import pickle
 
 
 class Experiment:
@@ -9,32 +10,46 @@ def __init__(self):
 
     def run_experiment(self, metric, dataset1, dataset2):
         raise NotImplementedError("Subclasses must implement this method")
-    
+
     def plot_experiment(self):
         raise NotImplementedError("Subclasses must implement this method")
-
+
+    def log_results(self, results, log_path):
+        raise NotImplementedError("Subclasses must implement this method")
+
 
 class ScaleDim(Experiment):
-    
+
     def __init__(self, metric_name, metric_fn, min_dim=1, max_dim=1000, step=100):
         self.metric_name = metric_name
         self.metric_fn = metric_fn
         self.dimensionality = list(range(min_dim, max_dim, step))
         super().__init__()
-    
+
     def run_experiment(self, dataset1, dataset2):
         distances = []
         for d in self.dimensionality:
             distances.append(self.metric_fn(dataset1[:, :d], dataset2[:, :d]))
         return self.dimensionality, distances
-    
+
     def plot_experiment(self, dimensionality, distances, dataset_name):
-        plot_scaling_metric_dimensionality(dimensionality, distances, self.metric_name, dataset_name)
-
+        plot_scaling_metric_dimensionality(
+            dimensionality, distances, self.metric_name, dataset_name
+        )
+
+    def log_results(self, results, log_path):
+        """
+        Save the results to a file.
+        """
+        with open(log_path, "wb") as f:
+            pickle.dump(results, f)
+
+
 class ScaleDimKL(ScaleDim):
     def __init__(self):
         super().__init__("KL", gaussian_kl_divergence, min_dim=2)
-
+
+
 class ScaleDimSW(ScaleDim):
     def __init__(self):
-        super().__init__("Sliced Wasserstein", sliced_wasserstein_distance)
+        super().__init__("Sliced Wasserstein", sliced_wasserstein_distance)
diff --git a/labproject/run_default.py b/labproject/run_default.py
@@ -1,41 +1,61 @@
-
 from labproject.utils import set_seed, get_cfg
 from labproject.data import get_dataset
 from labproject.experiments import *
 
 
 import time
+import datetime
+import os
+
+
+def get_log_path(cfg):
+    """
+    Get the log path for the current experiment run.
+    This log path is then used to save the numerical results of the experiment.
+    Import this function in the run_{name}.py file and call it to get the log path.
+    """
+
+    # get datetime string
+    now = datetime.datetime.now()
+    if "exp_log_name" not in cfg:
+        exp_log_name = now.strftime("%Y-%m-%d_%H-%M-%S")
+    else:
+        exp_log_name = cfg.exp_log_name
+        # add datetime to the name
+        exp_log_name = exp_log_name + "_" + now.strftime("%Y-%m-%d_%H-%M-%S")
+    log_path = os.path.join(f"results/{cfg.running_user}/{exp_log_name}.pkl")
+    return log_path
 
 
 if __name__ == "__main__":
-
-
 
     print("Running experiments...")
     cfg = get_cfg()
     seed = cfg.seed
-    
+
     set_seed(seed)
     print(f"Seed: {seed}")
     print(f"Experiments: {cfg.experiments}")
     print(f"Data: {cfg.data}")
-    
+
     dataset_fn = get_dataset(cfg.data)
-
-
+
     for exp_name in cfg.experiments:
         experiment = globals()[exp_name]()
         time_start = time.time()
         dataset1 = dataset_fn(cfg.n, cfg.d)
         dataset2 = dataset_fn(cfg.n, cfg.d)
 
-        output = experiment.run_experiment(
-                dataset1=dataset1, dataset2=dataset2
-            )
+        output = experiment.run_experiment(dataset1=dataset1, dataset2=dataset2)
         time_end = time.time()
         print(f"Experiment {exp_name} finished in {time_end - time_start}")
-        experiment.plot_experiment(*output, cfg.data)
-
 
+        log_path = get_log_path(cfg)
+        os.makedirs(os.path.dirname(log_path), exist_ok=True)
+        experiment.log_results(output, log_path)
+        print(f"Numerical results saved to {log_path}")
+
+        experiment.plot_experiment(*output, cfg.data)
+        print(f"Plots saved to {cfg.data}.png")
 
     print("Finished running experiments.")