[RLlib] Revert PPO back to old API stack (by default). New stack and …

…PPO not ready yet on several features. (ray-project#40706)
miqdigital · Oct 27, 2023 · eabd18e · eabd18e
1 parent 310409f
commit eabd18e
Show file tree

Hide file tree

Showing 98 changed files with 456 additions and 576 deletions.
diff --git a/doc/source/rllib/doc_code/catalog_guide.py b/doc/source/rllib/doc_code/catalog_guide.py
@@ -113,10 +113,9 @@ def __init__(self, *args, **kwargs):
 
 config = (
     PPOConfig()
+    .experimental(_enable_new_api_stack=True)
     .environment("CartPole-v1")
     .framework("torch")
-    .rl_module(_enable_rl_module_api=True)
-    .training(_enable_learner_api=True)
 )
 
 # Specify the catalog to use for the PPORLModule.

diff --git a/doc/source/rllib/doc_code/rlmodule_guide.py b/doc/source/rllib/doc_code/rlmodule_guide.py
@@ -12,10 +12,9 @@
 
 config = (
     PPOConfig()
+    .experimental(_enable_new_api_stack=True)
     .framework("torch")
     .environment("CartPole-v1")
-    .rl_module(_enable_rl_module_api=True)
-    .training(_enable_learner_api=True)
 )
 
 algorithm = config.build()
@@ -81,15 +80,12 @@
 
 config = (
     BCConfigTest()
+    .experimental(_enable_new_api_stack=True)
     .environment("CartPole-v1")
     .rl_module(
-        _enable_rl_module_api=True,
         rl_module_spec=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule),
     )
-    .training(
-        model={"fcnet_hiddens": [32, 32]},
-        _enable_learner_api=True,
-    )
+    .training(model={"fcnet_hiddens": [32, 32]})
 )
 
 algo = config.build()
@@ -107,17 +103,14 @@
 
 config = (
     BCConfigTest()
+    .experimental(_enable_new_api_stack=True)
     .environment(MultiAgentCartPole, env_config={"num_agents": 2})
     .rl_module(
-        _enable_rl_module_api=True,
         rl_module_spec=MultiAgentRLModuleSpec(
             module_specs=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule)
         ),
     )
-    .training(
-        model={"fcnet_hiddens": [32, 32]},
-        _enable_learner_api=True,
-    )
+    .training(model={"fcnet_hiddens": [32, 32]})
 )
 # __pass-specs-to-configs-ma-end__
 
@@ -410,7 +403,11 @@ def setup(self):
 from ray.rllib.algorithms.ppo.torch.ppo_torch_rl_module import PPOTorchRLModule
 from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
 
-config = PPOConfig().environment("CartPole-v1")
+config = (
+    PPOConfig()
+    # Enable the new API stack (RLModule and Learner APIs).
+    .experimental(_enable_new_api_stack=True).environment("CartPole-v1")
+)
 env = gym.make("CartPole-v1")
 # Create an RL Module that we would like to checkpoint
 module_spec = SingleAgentRLModuleSpec(
@@ -437,10 +434,7 @@ def setup(self):
 )
 
 # Train with the checkpointed RL Module
-config.rl_module(
-    rl_module_spec=module_to_load_spec,
-    _enable_rl_module_api=True,
-)
+config.rl_module(rl_module_spec=module_to_load_spec)
 algo = config.build()
 algo.train()
 # __checkpointing-end__

diff --git a/doc/source/rllib/rllib-learner.rst b/doc/source/rllib/rllib-learner.rst
@@ -56,14 +56,13 @@ arguments in the :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConf
 
     config = (
         PPOConfig()
+        .experimental(_enable_new_api_stack=True)
         .resources(
             num_gpus_per_learner_worker=0,  # Set this to 1 to enable GPU training.
             num_cpus_per_learner_worker=1,
             num_learner_workers=0  # Set this to greater than 0 to allow for DDP style 
                                # updates.
         )
-        .training(_enable_learner_api=True)
-        .rl_module(_enable_rl_module_api=True)
     )
 
 .. testcode::
@@ -77,8 +76,7 @@ arguments in the :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConf
 .. note::
 
     This features is in alpha. If you migrate to this algorithm, enable the feature by 
-    setting `_enable_learner_api` and `_enable_rl_module_api` flags in the 
-    `AlgorithmConfig`.
+    via `AlgorithmConfig.experimental(_enable_new_api_stack=True)`.
 
     The following algorithms support :py:class:`~ray.rllib.core.learner.learner.Learner` out of the box. Implement
     an algorithm with a custom :py:class:`~ray.rllib.core.learner.learner.Learner` to leverage this API for other algorithms.

diff --git a/doc/source/rllib/rllib-rlmodule.rst b/doc/source/rllib/rllib-rlmodule.rst
@@ -62,7 +62,7 @@ RL Module is a neural network container that implements three public methods: :p
 Enabling RL Modules in the Configuration
 ----------------------------------------
 
-Enable RL Modules by setting the ``_enable_rl_module_api`` flag to ``True`` in the configuration object.
+Enable RL Modules via our configuration object: ``AlgorithmConfig.experimental(_enable_new_api_stack=True)``.
 
 .. literalinclude:: doc_code/rlmodule_guide.py
     :language: python

diff --git a/rllib/algorithms/a3c/a3c.py b/rllib/algorithms/a3c/a3c.py
@@ -70,7 +70,7 @@ def __init__(self, algo_class=None):
         self.lambda_ = 1.0
 
         self.grad_clip = 40.0
-        # Note: Only when using _enable_learner_api=True can the clipping mode be
+        # Note: Only when using _enable_new_api_stack=True can the clipping mode be
         # configured by the user. On the old API stack, RLlib will always clip by
         # global_norm, no matter the value of `grad_clip_by`.
         self.grad_clip_by = "global_norm"

diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py
@@ -747,7 +747,7 @@ def setup(self, config: AlgorithmConfig) -> None:
             method_config["type"] = method_type
 
         self.learner_group = None
-        if self.config._enable_learner_api:
+        if self.config._enable_new_api_stack:
             # TODO (Kourosh): This is an interim solution where policies and modules
             #  co-exist. In this world we have both policy_map and MARLModule that need
             #  to be consistent with one another. To make a consistent parity between
@@ -1636,7 +1636,7 @@ def training_step(self) -> ResultDict:
             # cases should use the multi-GPU optimizer, even if only using 1 GPU).
             # TODO: (sven) rename MultiGPUOptimizer into something more
             #  meaningful.
-            if self.config._enable_learner_api:
+            if self.config._enable_new_api_stack:
                 is_module_trainable = self.workers.local_worker().is_policy_to_train
                 self.learner_group.set_is_module_trainable(is_module_trainable)
                 train_results = self.learner_group.update(train_batch)
@@ -1658,7 +1658,7 @@ def training_step(self) -> ResultDict:
             # TODO (Kourosh): figure out how we are going to sync MARLModule
             # weights to MARLModule weights under the policy_map objects?
             from_worker_or_trainer = None
-            if self.config._enable_learner_api:
+            if self.config._enable_new_api_stack:
                 from_worker_or_trainer = self.learner_group
             self.workers.sync_weights(
                 from_worker_or_learner_group=from_worker_or_trainer,
@@ -2107,7 +2107,7 @@ def add_policy(
 
         # If learner API is enabled, we need to also add the underlying module
         # to the learner group.
-        if self.config._enable_learner_api:
+        if self.config._enable_new_api_stack:
             policy = self.get_policy(policy_id)
             module = policy.model
             self.learner_group.add_module(
@@ -2296,7 +2296,7 @@ def save_checkpoint(self, checkpoint_dir: str) -> None:
             policy_states = state["worker"].pop("policy_states", {})
 
         # Add RLlib checkpoint version.
-        if self.config._enable_learner_api:
+        if self.config._enable_new_api_stack:
             state["checkpoint_version"] = CHECKPOINT_VERSION_LEARNER
         else:
             state["checkpoint_version"] = CHECKPOINT_VERSION
@@ -2331,7 +2331,7 @@ def save_checkpoint(self, checkpoint_dir: str) -> None:
             policy.export_checkpoint(policy_dir, policy_state=policy_state)
 
         # if we are using the learner API, save the learner group state
-        if self.config._enable_learner_api:
+        if self.config._enable_new_api_stack:
             learner_state_dir = os.path.join(checkpoint_dir, "learner")
             self.learner_group.save_state(learner_state_dir)
 
@@ -2343,7 +2343,7 @@ def load_checkpoint(self, checkpoint_dir: str) -> None:
         checkpoint_info = get_checkpoint_info(checkpoint_dir)
         checkpoint_data = Algorithm._checkpoint_info_to_algorithm_state(checkpoint_info)
         self.__setstate__(checkpoint_data)
-        if self.config._enable_learner_api:
+        if self.config._enable_new_api_stack:
             learner_state_dir = os.path.join(checkpoint_dir, "learner")
             self.learner_group.load_state(learner_state_dir)
 
@@ -2392,7 +2392,7 @@ def default_resource_request(
         eval_cf.freeze()
 
         # resources for the driver of this trainable
-        if cf._enable_learner_api:
+        if cf._enable_new_api_stack:
             if cf.num_learner_workers == 0:
                 # in this case local_worker only does sampling and training is done on
                 # local learner worker
@@ -2447,7 +2447,7 @@ def default_resource_request(
 
         # resources for remote learner workers
         learner_bundles = []
-        if cf._enable_learner_api and cf.num_learner_workers > 0:
+        if cf._enable_new_api_stack and cf.num_learner_workers > 0:
             learner_bundles = cls._get_learner_bundles(cf)
 
         bundles = [driver] + rollout_bundles + evaluation_bundles + learner_bundles