[RLlib] Fix 3 test cases that broke in move to revert PPO to old API …

…stack. (ray-project#40788)
miqdigital · Oct 30, 2023 · 83785ab · 83785ab
1 parent afdcdd2
commit 83785ab
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 3 deletions.
diff --git a/rllib/examples/action_masking.py b/rllib/examples/action_masking.py
@@ -109,7 +109,10 @@ def get_cli_args():
         )
         # We need to disable preprocessing of observations, because preprocessing
         # would flatten the observation dict of the environment.
-        .experimental(_disable_preprocessor_api=True)
+        .experimental(
+            _enable_new_api_stack=True,
+            _disable_preprocessor_api=True,
+        )
         .framework(args.framework)
         .resources(
             # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.

diff --git a/rllib/examples/env/action_mask_env.py b/rllib/examples/env/action_mask_env.py
@@ -30,7 +30,8 @@ def step(self, action):
         # Check whether action is valid.
         if not self.valid_actions[action]:
             raise ValueError(
-                f"Invalid action sent to env! " f"valid_actions={self.valid_actions}"
+                f"Invalid action ({action}) sent to env! "
+                f"valid_actions={self.valid_actions}"
             )
         obs, rew, done, truncated, info = super().step(action)
         self._fix_action_mask(obs)

diff --git a/rllib/examples/learner/train_w_bc_finetune_w_ppo.py b/rllib/examples/learner/train_w_bc_finetune_w_ppo.py
@@ -117,7 +117,7 @@ def train_ppo_agent_from_checkpointed_module(
 
     config = (
         PPOConfig()
-        .training()
+        .experimental(_enable_new_api_stack=True)
         .rl_module(rl_module_spec=module_spec_from_ckpt)
         .environment(GYM_ENV_NAME)
         .debugging(seed=0)