facebookresearch · matteobettini · Nov 25, 2023 · Oct 21, 2023 · Oct 21, 2023 · Oct 21, 2023
diff --git a/.gitignore b/.gitignore
@@ -4,7 +4,10 @@
 **/outputs/
 **/multirun/
 
-
+# Docs
+docs/output/
+docs/source/generated/
+docs/build/
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,31 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+   fail_on_warning: true
+   configuration: docs/source/conf.py
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats:
+    - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+    install:
+    - requirements: docs/requirements.txt
+    # Install our python package before building the docs
+    - method: pip
+      path: .
diff --git a/README.md b/README.md
@@ -1,19 +1,20 @@
-![BenchMARL](https://github.com/matteobettini/vmas-media/blob/main/media/benchmarl.png?raw=true)
+![BenchMARL](https://raw.githubusercontent.com/matteobettini/benchmarl_sphinx_theme/master/benchmarl_sphinx_theme/static/img/benchmarl.png?raw=true)
 
 
 # BenchMARL
 [![tests](https://github.com/facebookresearch/BenchMARL/actions/workflows/unit_tests.yml/badge.svg)](test)
 [![codecov](https://codecov.io/github/facebookresearch/BenchMARL/coverage.svg?branch=main)](https://codecov.io/gh/facebookresearch/BenchMARL)
+[![Documentation Status](https://readthedocs.org/projects/benchmarl/badge/?version=latest)](https://benchmarl.readthedocs.io/en/latest/?badge=latest)
 [![Python](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue.svg)](https://www.python.org/downloads/)
 <a href="https://pypi.org/project/benchmarl"><img src="https://img.shields.io/pypi/v/benchmarl" alt="pypi version"></a>
 [![Downloads](https://static.pepy.tech/personalized-badge/benchmarl?period=total&units=international_system&left_color=grey&right_color=blue&left_text=Downloads)](https://pepy.tech/project/benchmarl)
+[![Discord Shield](https://dcbadge.vercel.app/api/server/jEEWCn6T3p?style=flat)](https://discord.gg/jEEWCn6T3p)
 
 ```bash
 python benchmarl/run.py algorithm=mappo task=vmas/balance
 ```
 
 
-
 [![Examples](https://img.shields.io/badge/Examples-blue.svg)](examples) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/facebookresearch/BenchMARL/blob/main/notebooks/run.ipynb)
 [![Static Badge](https://img.shields.io/badge/Benchmarks-Wandb-yellow)](https://wandb.ai/matteobettini/benchmarl-public/reportlist)
 
@@ -58,6 +59,7 @@ the domain and want to easily take a picture of the landscape.
   * [Reporting and plotting](#reporting-and-plotting)
   * [Extending](#extending)
   * [Configuring](#configuring)
+    + [Experiment](#experiment)
     + [Algorithm](#algorithm)
     + [Task](#task)
     + [Model](#model)
@@ -280,10 +282,9 @@ Currently available ones are:
 
 In the following, we report a table of the results:
 
-| **<p align="center">Environment</p>** | **<p align="center">Sample efficiency curves (all tasks)</p>**                            | **<p align="center">Performance profile</p>**                                             | **<p align="center">Aggregate scores</p>**                                                |
-|---------------------------------------|-------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------|
-| VMAS                                  | <img src="https://drive.google.com/uc?export=view&id=1fzfFn0q54gsALRAwmqD1hRTqQIadGPoE"/> | <img src="https://drive.google.com/uc?export=view&id=151pSR2sBluSpWiYxtq3jNX0tfE0vgAuR"/> | <img src="https://drive.google.com/uc?export=view&id=1q2So9V6sL8NHMtj6vL-S3KyzZi11Vfia"/> |
-
+| **<p align="center">Environment</p>** | **<p align="center">Sample efficiency curves (all tasks)</p>**                                                                                                                        | **<p align="center">Performance profile</p>**                                                                                                                               | **<p align="center">Aggregate scores</p>**                                                                                                                        |
+|---------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| VMAS                                  | <img src="https://raw.githubusercontent.com/matteobettini/benchmarl_sphinx_theme/master/benchmarl_sphinx_theme/static/img/benchmarks/vmas/environemnt_sample_efficiency_curves.png"/> | <img src="https://raw.githubusercontent.com/matteobettini/benchmarl_sphinx_theme/master/benchmarl_sphinx_theme/static/img/benchmarks/vmas/performance_profile_figure.png"/> | <img src="https://raw.githubusercontent.com/matteobettini/benchmarl_sphinx_theme/master/benchmarl_sphinx_theme/static/img/benchmarks/vmas/aggregate_scores.png"/> |
 
 ## Reporting and plotting
 
@@ -295,9 +296,9 @@ your benchmarks.  No more struggling with matplotlib and latex!
 
 [![Example](https://img.shields.io/badge/Example-blue.svg)](examples/plotting)
 
-![aggregate_scores](https://drive.google.com/uc?export=view&id=1q2So9V6sL8NHMtj6vL-S3KyzZi11Vfia)
-![sample_efficiancy](https://drive.google.com/uc?export=view&id=1fzfFn0q54gsALRAwmqD1hRTqQIadGPoE)
-![performace_profile](https://drive.google.com/uc?export=view&id=151pSR2sBluSpWiYxtq3jNX0tfE0vgAuR)
+![aggregate_scores](https://raw.githubusercontent.com/matteobettini/benchmarl_sphinx_theme/master/benchmarl_sphinx_theme/static/img/benchmarks/vmas/aggregate_scores.png)
+![sample_efficiancy](https://raw.githubusercontent.com/matteobettini/benchmarl_sphinx_theme/master/benchmarl_sphinx_theme/static/img/benchmarks/vmas/environemnt_sample_efficiency_curves.png)
+![performace_profile](https://raw.githubusercontent.com/matteobettini/benchmarl_sphinx_theme/master/benchmarl_sphinx_theme/static/img/benchmarks/vmas/performance_profile_figure.png)
 
 
 ## Extending
@@ -322,7 +323,6 @@ in the script itself or via [hydra](https://hydra.cc/docs/intro/).
 We suggest to read the hydra documentation
 to get familiar with all its functionalities. 
 
-The project can be configured either the script itself or via hydra. 
 Each component in the project has a corresponding yaml configuration in the BenchMARL 
 [conf tree](benchmarl/conf). 
 Components' configurations are loaded from these files into python dataclasses that act 
@@ -333,8 +333,7 @@ You can also directly load and validate configuration yaml files without using h
 
 ### Experiment
 
-Experiment configurations are in [`benchmarl/conf/config.yaml`](benchmarl/conf/config.yaml),
-with the experiment hyperparameters in [`benchmarl/conf/experiment`](benchmarl/conf/experiment).
+Experiment configurations are in [`benchmarl/conf/config.yaml`](benchmarl/conf/config.yaml).
 Running custom experiments is extremely simplified by the [Hydra](https://hydra.cc/) configurations.
 The default configuration for the library is contained in the [`benchmarl/conf`](benchmarl/conf) folder.
 

diff --git a/benchmarl/__init__.py b/benchmarl/__init__.py
@@ -4,13 +4,22 @@
 #  LICENSE file in the root directory of this source tree.
 #
 
+
+__version__ = "0.0.4"
+
 import importlib
 
+import benchmarl.algorithms
+import benchmarl.benchmark
+import benchmarl.environments
+import benchmarl.experiment
+import benchmarl.models
+
 _has_hydra = importlib.util.find_spec("hydra") is not None
 
 if _has_hydra:
 
-    def load_hydra_schemas():
+    def _load_hydra_schemas():
         from hydra.core.config_store import ConfigStore
 
         from benchmarl.algorithms import algorithm_config_registry
@@ -28,4 +37,4 @@ def load_hydra_schemas():
         for task_schema_name, task_schema in _task_class_registry.items():
             cs.store(name=task_schema_name, group="task", node=task_schema)
 
-    load_hydra_schemas()
+    _load_hydra_schemas()
diff --git a/benchmarl/algorithms/__init__.py b/benchmarl/algorithms/__init__.py
@@ -4,6 +4,7 @@
 #  LICENSE file in the root directory of this source tree.
 #
 
+from .common import Algorithm, AlgorithmConfig
 from .iddpg import Iddpg, IddpgConfig
 from .ippo import Ippo, IppoConfig
 from .iql import Iql, IqlConfig
@@ -14,6 +15,27 @@
 from .qmix import Qmix, QmixConfig
 from .vdn import Vdn, VdnConfig
 
+classes = [
+    "Iddpg",
+    "IddpgConfig",
+    "Ippo",
+    "IppoConfig",
+    "Iql",
+    "IqlConfig",
+    "Isac",
+    "IsacConfig",
+    "Maddpg",
+    "MaddpgConfig",
+    "Mappo",
+    "MappoConfig",
+    "Masac",
+    "MasacConfig",
+    "Qmix",
+    "QmixConfig",
+    "Vdn",
+    "VdnConfig",
+]
+
 # A registry mapping "algoname" to its config dataclass
 # This is used to aid loading of algorithms from yaml
 algorithm_config_registry = {

diff --git a/benchmarl/algorithms/common.py b/benchmarl/algorithms/common.py
@@ -23,7 +23,7 @@
 from torchrl.objectives.utils import HardUpdate, SoftUpdate, TargetNetUpdater
 
 from benchmarl.models.common import ModelConfig
-from benchmarl.utils import DEVICE_TYPING, read_yaml_config
+from benchmarl.utils import _read_yaml_config, DEVICE_TYPING
 
 
 class Algorithm(ABC):
@@ -32,7 +32,7 @@ class Algorithm(ABC):
     This should be overridden by implemented algorithms
     and all abstract methods should be implemented.
 
-     Args:
+    Args:
         experiment (Experiment): the experiment class
     """
 
@@ -104,14 +104,13 @@ def _check_specs(self):
     def get_loss_and_updater(self, group: str) -> Tuple[LossModule, TargetNetUpdater]:
         """
         Get the LossModule and TargetNetUpdater for a specific group.
-        This function calls the abstract self._get_loss() which needs to be implemented.
+        This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_loss()` which needs to be implemented.
         The function will cache the output at the first call and return the cached values in future calls.
 
         Args:
             group (str): agent group of the loss and updater
 
         Returns: LossModule and TargetNetUpdater for the group
-
         """
         if group not in self._losses_and_updaters.keys():
             action_space = self.action_spec[group, "action"]
@@ -144,7 +143,7 @@ def get_replay_buffer(
     ) -> ReplayBuffer:
         """
         Get the ReplayBuffer for a specific group.
-        This function will check self.on_policy and create the buffer accordingly
+        This function will check ``self.on_policy`` and create the buffer accordingly
 
         Args:
             group (str): agent group of the loss and updater
@@ -165,7 +164,7 @@ def get_replay_buffer(
     def get_policy_for_loss(self, group: str) -> TensorDictModule:
         """
         Get the non-explorative policy for a specific group loss.
-        This function calls the abstract self._get_policy_for_loss() which needs to be implemented.
+        This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_policy_for_loss()` which needs to be implemented.
         The function will cache the output at the first call and return the cached values in future calls.
 
         Args:
@@ -192,7 +191,7 @@ def get_policy_for_loss(self, group: str) -> TensorDictModule:
     def get_policy_for_collection(self) -> TensorDictSequential:
         """
         Get the explorative policy for all groups together.
-        This function calls the abstract self._get_policy_for_collection() which needs to be implemented.
+        This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_policy_for_collection()` which needs to be implemented.
         The function will cache the output at the first call and return the cached values in future calls.
 
         Returns: TensorDictSequential representing all explorative policies
@@ -217,7 +216,7 @@ def get_policy_for_collection(self) -> TensorDictSequential:
     def get_parameters(self, group: str) -> Dict[str, Iterable]:
         """
         Get the dictionary mapping loss names to the relative parameters to optimize for a given group.
-        This function calls the abstract self._get_parameters() which needs to be implemented.
+        This function calls the abstract :class:`~benchmarl.algorithms.Algorithm._get_parameters()` which needs to be implemented.
 
         Returns: a dictionary mapping loss names to a parameters' list
         """
@@ -323,13 +322,16 @@ class AlgorithmConfig:
     Dataclass representing an algorithm configuration.
     This should be overridden by implemented algorithms.
     Implementors should:
-     1. add configuration parameters for their algorithm
-     2. implement all abstract methods
+
+        1. add configuration parameters for their algorithm
+        2. implement all abstract methods
+
     """
 
     def get_algorithm(self, experiment) -> Algorithm:
         """
         Main function to turn the config into the associated algorithm
+
         Args:
             experiment (Experiment): the experiment class
 
@@ -349,7 +351,7 @@ def _load_from_yaml(name: str) -> Dict[str, Any]:
             / "algorithm"
             / f"{name.lower()}.yaml"
         )
-        return read_yaml_config(str(yaml_path.resolve()))
+        return _read_yaml_config(str(yaml_path.resolve()))
 
     @classmethod
     def get_from_yaml(cls, path: Optional[str] = None):
@@ -359,7 +361,7 @@ def get_from_yaml(cls, path: Optional[str] = None):
         Args:
             path (str, optional): The full path of the yaml file to load from.
                 If None, it will default to
-                benchmarl/conf/algorithm/self.associated_class().__name__
+                ``benchmarl/conf/algorithm/self.associated_class().__name__``
 
         Returns: the loaded AlgorithmConfig
         """
@@ -370,7 +372,7 @@ def get_from_yaml(cls, path: Optional[str] = None):
                 )
             )
         else:
-            return cls(**read_yaml_config(path))
+            return cls(**_read_yaml_config(path))
 
     @staticmethod
     @abstractmethod

diff --git a/benchmarl/algorithms/iddpg.py b/benchmarl/algorithms/iddpg.py
@@ -19,6 +19,16 @@
 
 
 class Iddpg(Algorithm):
+    """Same as :class:`~benchmarkl.algorithms.Maddpg` (from `https://arxiv.org/abs/1706.02275 <https://arxiv.org/abs/1706.02275>`__) but with decentralized critics.
+
+    Args:
+        share_param_critic (bool): Whether to share the parameters of the critics withing agent groups
+        loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
+        delay_value (bool): whether to separate the target value networks from the value networks used for
+            data collection.
+
+    """
+
     def __init__(
         self, share_param_critic: bool, loss_function: str, delay_value: bool, **kwargs
     ):
@@ -227,6 +237,8 @@ def get_value_module(self, group: str) -> TensorDictModule:
 
 @dataclass
 class IddpgConfig(AlgorithmConfig):
+    """Configuration dataclass for :class:`~benchmarl.algorithms.Iddpg`."""
+
     share_param_critic: bool = MISSING
     loss_function: str = MISSING
     delay_value: bool = MISSING

diff --git a/benchmarl/algorithms/ippo.py b/benchmarl/algorithms/ippo.py
@@ -22,6 +22,21 @@
 
 
 class Ippo(Algorithm):
+    """Independent PPO (from `https://arxiv.org/abs/2011.09533 <https://arxiv.org/abs/2011.09533>`__).
+
+    Args:
+        share_param_critic (bool): Whether to share the parameters of the critics withing agent groups
+        clip_epsilon (scalar): weight clipping threshold in the clipped PPO loss equation.
+        entropy_coef (scalar): entropy multiplier when computing the total loss.
+        critic_coef (scalar): critic loss multiplier when computing the total
+        loss_critic_type (str): loss function for the value discrepancy.
+            Can be one of "l1", "l2" or "smooth_l1".
+        lmbda (float): The GAE lambda
+        scale_mapping (str): positive mapping function to be used with the std.
+            choices: "softplus", "exp", "relu", "biased_softplus_1";
+
+    """
+
     def __init__(
         self,
         share_param_critic: bool,
@@ -270,6 +285,8 @@ def get_critic(self, group: str) -> TensorDictModule:
 
 @dataclass
 class IppoConfig(AlgorithmConfig):
+    """Configuration dataclass for :class:`~benchmarl.algorithms.Ippo`."""
+
     share_param_critic: bool = MISSING
     clip_epsilon: float = MISSING
     entropy_coef: float = MISSING

diff --git a/benchmarl/algorithms/iql.py b/benchmarl/algorithms/iql.py
@@ -18,6 +18,15 @@
 
 
 class Iql(Algorithm):
+    """Independent Q Learning (from `https://www.semanticscholar.org/paper/Multi-Agent-Reinforcement-Learning%3A-Independent-Tan/59de874c1e547399b695337bcff23070664fa66e <https://www.semanticscholar.org/paper/Multi-Agent-Reinforcement-Learning%3A-Independent-Tan/59de874c1e547399b695337bcff23070664fa66e>`__).
+
+    Args:
+        loss_function (str): loss function for the value discrepancy. Can be one of "l1", "l2" or "smooth_l1".
+        delay_value (bool): whether to separate the target value networks from the value networks used for
+            data collection.
+
+    """
+
     def __init__(self, delay_value: bool, loss_function: str, **kwargs):
         super().__init__(**kwargs)
 
@@ -175,6 +184,8 @@ def process_batch(self, group: str, batch: TensorDictBase) -> TensorDictBase:
 
 @dataclass
 class IqlConfig(AlgorithmConfig):
+    """Configuration dataclass for :class:`~benchmarl.algorithms.Iql`."""
+
     delay_value: bool = MISSING
     loss_function: str = MISSING