From d2bde6bf1bed913dd4f71015cc11bbf8df037ef0 Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Sat, 25 Nov 2023 15:08:38 +0000 Subject: [PATCH] Docs --- benchmarl/algorithms/common.py | 16 ++++++++-------- benchmarl/algorithms/iddpg.py | 12 ++++++++++++ benchmarl/algorithms/ippo.py | 17 +++++++++++++++++ benchmarl/algorithms/iql.py | 11 +++++++++++ benchmarl/algorithms/isac.py | 18 ++++++++++++++++++ .../_templates/autosummary/class_private.rst | 9 +++++++++ docs/source/modules/algorithms.rst | 4 ++-- 7 files changed, 77 insertions(+), 10 deletions(-) create mode 100644 docs/source/_templates/autosummary/class_private.rst diff --git a/benchmarl/algorithms/common.py b/benchmarl/algorithms/common.py index b5f5f3c6..3702b75d 100644 --- a/benchmarl/algorithms/common.py +++ b/benchmarl/algorithms/common.py @@ -32,7 +32,7 @@ class Algorithm(ABC): This should be overridden by implemented algorithms and all abstract methods should be implemented. - Args: + Args: experiment (Experiment): the experiment class """ @@ -104,14 +104,13 @@ def _check_specs(self): def get_loss_and_updater(self, group: str) -> Tuple[LossModule, TargetNetUpdater]: """ Get the LossModule and TargetNetUpdater for a specific group. - This function calls the abstract self._get_loss() which needs to be implemented. + This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_loss()` which needs to be implemented. The function will cache the output at the first call and return the cached values in future calls. Args: group (str): agent group of the loss and updater Returns: LossModule and TargetNetUpdater for the group - """ if group not in self._losses_and_updaters.keys(): action_space = self.action_spec[group, "action"] @@ -144,7 +143,7 @@ def get_replay_buffer( ) -> ReplayBuffer: """ Get the ReplayBuffer for a specific group. - This function will check self.on_policy and create the buffer accordingly + This function will check ``self.on_policy`` and create the buffer accordingly Args: group (str): agent group of the loss and updater @@ -165,7 +164,7 @@ def get_replay_buffer( def get_policy_for_loss(self, group: str) -> TensorDictModule: """ Get the non-explorative policy for a specific group loss. - This function calls the abstract self._get_policy_for_loss() which needs to be implemented. + This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_policy_for_loss()` which needs to be implemented. The function will cache the output at the first call and return the cached values in future calls. Args: @@ -192,7 +191,7 @@ def get_policy_for_loss(self, group: str) -> TensorDictModule: def get_policy_for_collection(self) -> TensorDictSequential: """ Get the explorative policy for all groups together. - This function calls the abstract self._get_policy_for_collection() which needs to be implemented. + This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_policy_for_collection()` which needs to be implemented. The function will cache the output at the first call and return the cached values in future calls. Returns: TensorDictSequential representing all explorative policies @@ -217,7 +216,7 @@ def get_policy_for_collection(self) -> TensorDictSequential: def get_parameters(self, group: str) -> Dict[str, Iterable]: """ Get the dictionary mapping loss names to the relative parameters to optimize for a given group. - This function calls the abstract self._get_parameters() which needs to be implemented. + This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_parameters()` which needs to be implemented. Returns: a dictionary mapping loss names to a parameters' list """ @@ -332,6 +331,7 @@ class AlgorithmConfig: def get_algorithm(self, experiment) -> Algorithm: """ Main function to turn the config into the associated algorithm + Args: experiment (Experiment): the experiment class @@ -361,7 +361,7 @@ def get_from_yaml(cls, path: Optional[str] = None): Args: path (str, optional): The full path of the yaml file to load from. If None, it will default to - benchmarl/conf/algorithm/self.associated_class().__name__ + ``benchmarl/conf/algorithm/self.associated_class().__name__`` Returns: the loaded AlgorithmConfig """ diff --git a/benchmarl/algorithms/iddpg.py b/benchmarl/algorithms/iddpg.py index 2bf1657c..48fb1646 100644 --- a/benchmarl/algorithms/iddpg.py +++ b/benchmarl/algorithms/iddpg.py @@ -19,6 +19,16 @@ class Iddpg(Algorithm): + """Same as :class:`~benchmarkl.algorithms.Maddpg` (from `https://arxiv.org/abs/1706.02275 `__) but with decentralized critics. + + Args: + share_param_critic (bool): Whether to share the parameters of the critics withing agent groups + loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1". + delay_value (bool): whether to separate the target value networks from the value networks used for + data collection. + + """ + def __init__( self, share_param_critic: bool, loss_function: str, delay_value: bool, **kwargs ): @@ -227,6 +237,8 @@ def get_value_module(self, group: str) -> TensorDictModule: @dataclass class IddpgConfig(AlgorithmConfig): + """Configuration dataclass for :class:`~benchmarkl.algorithms.Iddpg`.""" + share_param_critic: bool = MISSING loss_function: str = MISSING delay_value: bool = MISSING diff --git a/benchmarl/algorithms/ippo.py b/benchmarl/algorithms/ippo.py index f7190630..08bd4845 100644 --- a/benchmarl/algorithms/ippo.py +++ b/benchmarl/algorithms/ippo.py @@ -22,6 +22,21 @@ class Ippo(Algorithm): + """Independent PPO (from `https://arxiv.org/abs/2011.09533 `__). + + Args: + share_param_critic (bool): Whether to share the parameters of the critics withing agent groups + clip_epsilon (scalar): weight clipping threshold in the clipped PPO loss equation. + entropy_coef (scalar): entropy multiplier when computing the total loss. + critic_coef (scalar): critic loss multiplier when computing the total + loss_critic_type (str): loss function for the value discrepancy. + Can be one of "l1", "l2" or "smooth_l1". + lmbda (float): The GAE lambda + scale_mapping (str): positive mapping function to be used with the std. + choices: "softplus", "exp", "relu", "biased_softplus_1"; + + """ + def __init__( self, share_param_critic: bool, @@ -270,6 +285,8 @@ def get_critic(self, group: str) -> TensorDictModule: @dataclass class IppoConfig(AlgorithmConfig): + """Configuration dataclass for :class:`~benchmarkl.algorithms.Ippo`.""" + share_param_critic: bool = MISSING clip_epsilon: float = MISSING entropy_coef: float = MISSING diff --git a/benchmarl/algorithms/iql.py b/benchmarl/algorithms/iql.py index 8838c8fa..a56dedc7 100644 --- a/benchmarl/algorithms/iql.py +++ b/benchmarl/algorithms/iql.py @@ -18,6 +18,15 @@ class Iql(Algorithm): + """Independent Q Learning (from `https://www.semanticscholar.org/paper/Multi-Agent-Reinforcement-Learning%3A-Independent-Tan/59de874c1e547399b695337bcff23070664fa66e `__). + + Args: + loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1". + delay_value (bool): whether to separate the target value networks from the value networks used for + data collection. + + """ + def __init__(self, delay_value: bool, loss_function: str, **kwargs): super().__init__(**kwargs) @@ -175,6 +184,8 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase: @dataclass class IqlConfig(AlgorithmConfig): + """Configuration dataclass for :class:`~benchmarkl.algorithms.Iql`.""" + delay_value: bool = MISSING loss_function: str = MISSING diff --git a/benchmarl/algorithms/isac.py b/benchmarl/algorithms/isac.py index 20df1ac1..61f75c21 100644 --- a/benchmarl/algorithms/isac.py +++ b/benchmarl/algorithms/isac.py @@ -26,6 +26,24 @@ class Isac(Algorithm): + """Independent Soft Actor Critic. + + Args: + share_param_critic (bool): Whether to share the parameters of the critics withing agent groups + num_qvalue_nets (integer): number of Q-Value networks used. + loss_function (str): loss function to be used with + the value function loss. + delay_qvalue (): + target_entropy (): + discrete_target_entropy_weight (): + alpha_init (): + min_alpha (): + max_alpha (): + fixed_alpha (): + scale_mapping (): + + """ + def __init__( self, share_param_critic: bool, diff --git a/docs/source/_templates/autosummary/class_private.rst b/docs/source/_templates/autosummary/class_private.rst new file mode 100644 index 00000000..e9f2f9de --- /dev/null +++ b/docs/source/_templates/autosummary/class_private.rst @@ -0,0 +1,9 @@ +{{ fullname | escape | underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :show-inheritance: + :members: + :undoc-members: + :private-members: diff --git a/docs/source/modules/algorithms.rst b/docs/source/modules/algorithms.rst index 005a3dda..cba959e8 100644 --- a/docs/source/modules/algorithms.rst +++ b/docs/source/modules/algorithms.rst @@ -15,7 +15,7 @@ Common .. autosummary:: :nosignatures: :toctree: ../generated - :template: autosummary/class.rst + :template: autosummary/class_private.rst Algorithm AlgorithmConfig @@ -26,7 +26,7 @@ Algorithms .. autosummary:: :nosignatures: :toctree: ../generated - :template: autosummary/class.rst + :template: autosummary/class_private.rst {% for name in benchmarl.algorithms.classes %} {{ name }}