Fix RLlib PPO example.

huawei-noah · Jan 4, 2024 · 70c4750 · 70c4750
1 parent f412151
commit 70c4750
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 4 deletions.
diff --git a/examples/e12_rllib/ppo_example.py b/examples/e12_rllib/ppo_example.py
@@ -129,7 +129,7 @@ def main(
             enable_tf1_exec_eagerly=True,
         )
         .training(
-            lr=[[0, 1e-3], [1e3, 5e-4], [1e5, 1e-4], [1e7, 5e-5], [1e8, 1e-5]],
+            lr_schedule=[[0, 1e-3], [1e3, 5e-4], [1e5, 1e-4], [1e7, 5e-5], [1e8, 1e-5]],
             train_batch_size=train_batch_size,
         )
         .multi_agent(

diff --git a/examples/e12_rllib/ppo_pbt_example.py b/examples/e12_rllib/ppo_pbt_example.py
@@ -225,7 +225,7 @@ def main(
         scheduler=pbt,
         max_concurrent_trials=4,
     )
-    trainable = "PG"
+    trainable = "PPO"
     if resume_training:
         tuner = tune.Tuner.restore(
             str(experiment_dir),
@@ -246,8 +246,7 @@ def main(
 
     # Get the best checkpoint corresponding to the best result.
     best_checkpoint = best_result.checkpoint
-
-    best_logdir = Path(best_result.log_dir)
+    best_logdir = Path(best_checkpoint.path)
     model_path = best_logdir
 
     copy_tree(str(model_path), save_model_path, overwrite=True)