example update

automl · May 30, 2024 · 98c7b39 · 98c7b39
1 parent bbcb884
commit 98c7b39
Show file tree

Hide file tree

Showing 8 changed files with 5,239 additions and 10 deletions.
diff --git a/examples/Readme.md b/examples/Readme.md
@@ -6,10 +6,17 @@ We provide three different categories of examples:
 3. Running a reactive schedule based on the gradient history
 
 We use 'hydra' as a command line interface for these experiments, you'll find the corresponding configurations (including some variations on the algorithms and environments) in the 'configs' directory.
+The "hypersweeper_tuning" and "schedules" notebooks can help you run these examples and inspect their results.
 
 ## 1. Black-Box HPO
 
-We use the 'hypersweeper' package to demonstrate how ARLBench can be used for black-box HPO. Since it's hydra-based, we simply set up a script which takes a configuration, runs it and returns the evaluation reward at the end. You can try a single run like this:
+We use the 'hypersweeper' package to demonstrate how ARLBench can be used for black-box HPO. Since it's hydra-based, we simply set up a script which takes a configuration, runs it and returns the evaluation reward at the end. First, use pip to install the hypersweeper:
+
+```bash
+pip install hypersweeper
+```
+
+You can try a single run of arlbench first:
 
 ```bash
 python run_arlbench.py

diff --git a/examples/configs/epsilon_heuristic.yaml b/examples/configs/epsilon_heuristic.yaml
@@ -4,9 +4,9 @@ defaults:
 
 hydra:
  run:
- dir: results/${algorithm}_${environment.name}/${autorl.seed}
+ dir: results/heuristic_schedule_${algorithm}_${environment.name}/${autorl.seed}
  sweep:
- dir: results/${algorithm}_${environment.name}/${autorl.seed}
+ dir: results/heuristic_schedule_${algorithm}_${environment.name}/${autorl.seed}
  job:
  chdir: true
 

diff --git a/examples/configs/gradient_lr.yaml b/examples/configs/gradient_lr.yaml
@@ -4,9 +4,9 @@ defaults:
 
 hydra:
  run:
- dir: results/${algorithm}_${environment.name}/${autorl.seed}
+ dir: results/reactive_schedule_${algorithm}_${environment.name}/${autorl.seed}
  sweep:
- dir: results/${algorithm}_${environment.name}/${autorl.seed}
+ dir: results/reactive_schedule_${algorithm}_${environment.name}/${autorl.seed}
  job:
  chdir: true
 

diff --git a/examples/configs/smac.yaml b/examples/configs/smac.yaml
@@ -31,7 +31,7 @@ hydra:
  n_workers: 1
  output_directory: ${hydra.sweep.dir}
  seeds: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
- maximize: true
+ maximize: false
  run:
  dir: results/smac/${algorithm}_${autorl.env_name}/${smac_seed}/${seed}
  sweep:

diff --git a/examples/hypersweeper_tuning.ipynb b/examples/hypersweeper_tuning.ipynb
diff --git a/examples/run_heuristic_schedule.py b/examples/run_heuristic_schedule.py
@@ -9,6 +9,7 @@
 import sys
 import traceback
 from typing import TYPE_CHECKING
+import json
 
 import hydra
 import jax
@@ -27,16 +28,24 @@ def run(cfg: DictConfig, logger: logging.Logger):
 
  # Reset environment and run for 10 steps
  _ = env.reset()
+
+ rewards = []
+ epsilons = []
  for _ in range(10):
+ epsilons.append(cfg.hp_config.initial_epsilon)
  # The objectives are configured to return the mean reward
  _, objectives, _, _, _ = env.step(cfg.hp_config)
  if objectives["reward_mean"] > 30 and cfg.hp_config.initial_epsilon > 0.7:
  # We can change epsilon by changing which config we run in the next step
  cfg.hp_config.target_epsilon = 0.7
  cfg.hp_config.initial_epsilon = 0.7
  logger.info("Agent reached performance threshold, decreasing epsilon to 0.7")
+ rewards.append(float(objectives["reward_mean"]))
 
  logger.info(f"Training finished with a total reward of {objectives['reward_mean']}")
+ output = {"rewards": rewards, "epsilons": epsilons}
+ with open("output.json", "w") as f:
+ json.dump(output, f)
 
 @hydra.main(version_base=None, config_path="configs", config_name="epsilon_heuristic")
 def execute(cfg: DictConfig):

diff --git a/examples/run_reactive_schedule.py b/examples/run_reactive_schedule.py
@@ -9,6 +9,7 @@
 import sys
 import traceback
 from typing import TYPE_CHECKING
+import json
 
 import hydra
 import jax
@@ -32,16 +33,15 @@ def run(cfg: DictConfig, logger: logging.Logger):
 
  # define a tolerance for the gradient norm
  tolerance = 1e-4
+ rewards = []
+ lrs = []
  for i in range(100):
-
+ lrs.append(cfg.hp_config.learning_rate)
  # Statistics here contain the number of steps and gradient information
  statistics, objectives, te, tr, _ = env.step(cfg.hp_config)
  grad_norm, _ = statistics["grad_info"]
 
  # If grad norm doesn't change much, spike the learning rate
- if last_grad_norm is not None:
- print(i)
- print(abs(grad_norm - last_grad_norm))
  if last_grad_norm is not None and abs(grad_norm - last_grad_norm) < tolerance:
  last_lr = cfg.hp_config.learning_rate
  cfg.hp_config.learning_rate *= 10
@@ -54,7 +54,12 @@ def run(cfg: DictConfig, logger: logging.Logger):
  spiked = False
  logger.info(f"Resetting learning rate to {cfg.hp_config.learning_rate}")
  last_grad_norm = grad_norm
+ rewards.append(float(objectives["reward_mean"]))
+
  logger.info(f"Training finished with a total reward of {objectives['reward_mean']}")
+ output = {"rewards": rewards, "lr": lrs}
+ with open("output.json", "w") as f:
+ json.dump(output, f)
 
 
 @hydra.main(version_base=None, config_path="configs", config_name="gradient_lr")

diff --git a/examples/schedules.ipynb b/examples/schedules.ipynb