Skip to content

Commit

Permalink
[RLlib] Revert PPO back to old API stack (by default). New stack and …
Browse files Browse the repository at this point in the history
…PPO not ready yet on several features. (ray-project#40706)
  • Loading branch information
sven1977 authored Oct 27, 2023
1 parent 310409f commit eabd18e
Show file tree
Hide file tree
Showing 98 changed files with 456 additions and 576 deletions.
3 changes: 1 addition & 2 deletions doc/source/rllib/doc_code/catalog_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,9 @@ def __init__(self, *args, **kwargs):

config = (
PPOConfig()
.experimental(_enable_new_api_stack=True)
.environment("CartPole-v1")
.framework("torch")
.rl_module(_enable_rl_module_api=True)
.training(_enable_learner_api=True)
)

# Specify the catalog to use for the PPORLModule.
Expand Down
28 changes: 11 additions & 17 deletions doc/source/rllib/doc_code/rlmodule_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@

config = (
PPOConfig()
.experimental(_enable_new_api_stack=True)
.framework("torch")
.environment("CartPole-v1")
.rl_module(_enable_rl_module_api=True)
.training(_enable_learner_api=True)
)

algorithm = config.build()
Expand Down Expand Up @@ -81,15 +80,12 @@

config = (
BCConfigTest()
.experimental(_enable_new_api_stack=True)
.environment("CartPole-v1")
.rl_module(
_enable_rl_module_api=True,
rl_module_spec=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule),
)
.training(
model={"fcnet_hiddens": [32, 32]},
_enable_learner_api=True,
)
.training(model={"fcnet_hiddens": [32, 32]})
)

algo = config.build()
Expand All @@ -107,17 +103,14 @@

config = (
BCConfigTest()
.experimental(_enable_new_api_stack=True)
.environment(MultiAgentCartPole, env_config={"num_agents": 2})
.rl_module(
_enable_rl_module_api=True,
rl_module_spec=MultiAgentRLModuleSpec(
module_specs=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule)
),
)
.training(
model={"fcnet_hiddens": [32, 32]},
_enable_learner_api=True,
)
.training(model={"fcnet_hiddens": [32, 32]})
)
# __pass-specs-to-configs-ma-end__

Expand Down Expand Up @@ -410,7 +403,11 @@ def setup(self):
from ray.rllib.algorithms.ppo.torch.ppo_torch_rl_module import PPOTorchRLModule
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec

config = PPOConfig().environment("CartPole-v1")
config = (
PPOConfig()
# Enable the new API stack (RLModule and Learner APIs).
.experimental(_enable_new_api_stack=True).environment("CartPole-v1")
)
env = gym.make("CartPole-v1")
# Create an RL Module that we would like to checkpoint
module_spec = SingleAgentRLModuleSpec(
Expand All @@ -437,10 +434,7 @@ def setup(self):
)

# Train with the checkpointed RL Module
config.rl_module(
rl_module_spec=module_to_load_spec,
_enable_rl_module_api=True,
)
config.rl_module(rl_module_spec=module_to_load_spec)
algo = config.build()
algo.train()
# __checkpointing-end__
Expand Down
6 changes: 2 additions & 4 deletions doc/source/rllib/rllib-learner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,13 @@ arguments in the :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConf

config = (
PPOConfig()
.experimental(_enable_new_api_stack=True)
.resources(
num_gpus_per_learner_worker=0, # Set this to 1 to enable GPU training.
num_cpus_per_learner_worker=1,
num_learner_workers=0 # Set this to greater than 0 to allow for DDP style
# updates.
)
.training(_enable_learner_api=True)
.rl_module(_enable_rl_module_api=True)
)

.. testcode::
Expand All @@ -77,8 +76,7 @@ arguments in the :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConf
.. note::

This features is in alpha. If you migrate to this algorithm, enable the feature by
setting `_enable_learner_api` and `_enable_rl_module_api` flags in the
`AlgorithmConfig`.
via `AlgorithmConfig.experimental(_enable_new_api_stack=True)`.

The following algorithms support :py:class:`~ray.rllib.core.learner.learner.Learner` out of the box. Implement
an algorithm with a custom :py:class:`~ray.rllib.core.learner.learner.Learner` to leverage this API for other algorithms.
Expand Down
2 changes: 1 addition & 1 deletion doc/source/rllib/rllib-rlmodule.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ RL Module is a neural network container that implements three public methods: :p
Enabling RL Modules in the Configuration
----------------------------------------

Enable RL Modules by setting the ``_enable_rl_module_api`` flag to ``True`` in the configuration object.
Enable RL Modules via our configuration object: ``AlgorithmConfig.experimental(_enable_new_api_stack=True)``.

.. literalinclude:: doc_code/rlmodule_guide.py
:language: python
Expand Down
2 changes: 1 addition & 1 deletion rllib/algorithms/a3c/a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(self, algo_class=None):
self.lambda_ = 1.0

self.grad_clip = 40.0
# Note: Only when using _enable_learner_api=True can the clipping mode be
# Note: Only when using _enable_new_api_stack=True can the clipping mode be
# configured by the user. On the old API stack, RLlib will always clip by
# global_norm, no matter the value of `grad_clip_by`.
self.grad_clip_by = "global_norm"
Expand Down
18 changes: 9 additions & 9 deletions rllib/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ def setup(self, config: AlgorithmConfig) -> None:
method_config["type"] = method_type

self.learner_group = None
if self.config._enable_learner_api:
if self.config._enable_new_api_stack:
# TODO (Kourosh): This is an interim solution where policies and modules
# co-exist. In this world we have both policy_map and MARLModule that need
# to be consistent with one another. To make a consistent parity between
Expand Down Expand Up @@ -1636,7 +1636,7 @@ def training_step(self) -> ResultDict:
# cases should use the multi-GPU optimizer, even if only using 1 GPU).
# TODO: (sven) rename MultiGPUOptimizer into something more
# meaningful.
if self.config._enable_learner_api:
if self.config._enable_new_api_stack:
is_module_trainable = self.workers.local_worker().is_policy_to_train
self.learner_group.set_is_module_trainable(is_module_trainable)
train_results = self.learner_group.update(train_batch)
Expand All @@ -1658,7 +1658,7 @@ def training_step(self) -> ResultDict:
# TODO (Kourosh): figure out how we are going to sync MARLModule
# weights to MARLModule weights under the policy_map objects?
from_worker_or_trainer = None
if self.config._enable_learner_api:
if self.config._enable_new_api_stack:
from_worker_or_trainer = self.learner_group
self.workers.sync_weights(
from_worker_or_learner_group=from_worker_or_trainer,
Expand Down Expand Up @@ -2107,7 +2107,7 @@ def add_policy(

# If learner API is enabled, we need to also add the underlying module
# to the learner group.
if self.config._enable_learner_api:
if self.config._enable_new_api_stack:
policy = self.get_policy(policy_id)
module = policy.model
self.learner_group.add_module(
Expand Down Expand Up @@ -2296,7 +2296,7 @@ def save_checkpoint(self, checkpoint_dir: str) -> None:
policy_states = state["worker"].pop("policy_states", {})

# Add RLlib checkpoint version.
if self.config._enable_learner_api:
if self.config._enable_new_api_stack:
state["checkpoint_version"] = CHECKPOINT_VERSION_LEARNER
else:
state["checkpoint_version"] = CHECKPOINT_VERSION
Expand Down Expand Up @@ -2331,7 +2331,7 @@ def save_checkpoint(self, checkpoint_dir: str) -> None:
policy.export_checkpoint(policy_dir, policy_state=policy_state)

# if we are using the learner API, save the learner group state
if self.config._enable_learner_api:
if self.config._enable_new_api_stack:
learner_state_dir = os.path.join(checkpoint_dir, "learner")
self.learner_group.save_state(learner_state_dir)

Expand All @@ -2343,7 +2343,7 @@ def load_checkpoint(self, checkpoint_dir: str) -> None:
checkpoint_info = get_checkpoint_info(checkpoint_dir)
checkpoint_data = Algorithm._checkpoint_info_to_algorithm_state(checkpoint_info)
self.__setstate__(checkpoint_data)
if self.config._enable_learner_api:
if self.config._enable_new_api_stack:
learner_state_dir = os.path.join(checkpoint_dir, "learner")
self.learner_group.load_state(learner_state_dir)

Expand Down Expand Up @@ -2392,7 +2392,7 @@ def default_resource_request(
eval_cf.freeze()

# resources for the driver of this trainable
if cf._enable_learner_api:
if cf._enable_new_api_stack:
if cf.num_learner_workers == 0:
# in this case local_worker only does sampling and training is done on
# local learner worker
Expand Down Expand Up @@ -2447,7 +2447,7 @@ def default_resource_request(

# resources for remote learner workers
learner_bundles = []
if cf._enable_learner_api and cf.num_learner_workers > 0:
if cf._enable_new_api_stack and cf.num_learner_workers > 0:
learner_bundles = cls._get_learner_bundles(cf)

bundles = [driver] + rollout_bundles + evaluation_bundles + learner_bundles
Expand Down
Loading

0 comments on commit eabd18e

Please sign in to comment.