diff --git a/torchrl/envs/libs/vmas.py b/torchrl/envs/libs/vmas.py index 339b981a742..1a5d0e2ce15 100644 --- a/torchrl/envs/libs/vmas.py +++ b/torchrl/envs/libs/vmas.py @@ -115,13 +115,21 @@ class VmasWrapper(_EnvWrapper): env (``vmas.simulator.environment.environment.Environment``): the vmas environment to wrap. Keyword Args: - num_envs (int): Number of vectorized simulation environments. - device (torch.device, optional): Device for simulation. - continuous_actions (bool, optional): Weather to use continuous actions. Defaults to ``True``. - max_steps (int, optional): Maximum number of steps in each vectorized environment after which done is returned. - Defaults to ``None`` (no truncation). + num_envs (int): Number of vectorized simulation environments. VMAS perfroms vectorized simulations using PyTorch. + This argument indicates the number of vectorized environments that should be simulated in a batch. It will also + determine the batch size of the environment. + device (torch.device, optional): Device for simulation. Defaults to the default device. All the tensors created by VMAS + will be placed on this device. + continuous_actions (bool, optional): Whether to use continuous actions. Defaults to ``True``. If ``False``, actions + will be discrete. The number of actions and their size will depend on the chosen scenario. + See the VMAS repository for more info. + max_steps (int, optional): Horizon of the task. Defaults to ``None`` (infinite horizon). Each VMAS scenario can + be terminating or not. If ``max_steps`` is specified, + the scenario is also terminated (and the ``"terminated"`` flag is set) whenever this horizon is reached. + Unlike gym's ``TimeLimit`` transform or torchrl's :class:`~torchrl.envs.transforms.StepCounter`, + this argument will not set the ``"truncated"`` entry in the tensordict. categorical_actions (bool, optional): if the environment actions are discrete, whether to transform - them to categorical or one-hot. + them to categorical or one-hot. Defaults to ``True``. group_map (MarlGroupMapType or Dict[str, List[str]], optional): how to group agents in tensordicts for input/output. By default, if the agent names follow the ``"_"`` convention, they will be grouped by ``""``. If they do not follow this convention, they will be all put @@ -627,22 +635,38 @@ class VmasEnv(VmasWrapper): Paper: https://arxiv.org/abs/2207.03530 Args: - scenario (str or vmas.simulator.scenario.BaseScenario): the vmas environment to build. + scenario (str or vmas.simulator.scenario.BaseScenario): the vmas scenario to build. + Must be one of :attr:`~.available_envs`. For a description and rendering of available scenarios see + `the README `__. + Keyword Args: - num_envs (int): Number of vectorized simulation environments. - device (torch.device, optional): Device for simulation. - continuous_actions (bool, optional): Weather to use continuous actions. Defaults to ``True``. - max_steps (int, optional): Maximum number of steps in each vectorized environment after which done is returned. - Defaults to ``None`` (no truncation). + num_envs (int): Number of vectorized simulation environments. VMAS perfroms vectorized simulations using PyTorch. + This argument indicates the number of vectorized environments that should be simulated in a batch. It will also + determine the batch size of the environment. + device (torch.device, optional): Device for simulation. Defaults to the defaultt device. All the tensors created by VMAS + will be placed on this device. + continuous_actions (bool, optional): Whether to use continuous actions. Defaults to ``True``. If ``False``, actions + will be discrete. The number of actions and their size will depend on the chosen scenario. + See the VMAS repositiory for more info. + max_steps (int, optional): Horizon of the task. Defaults to ``None`` (infinite horizon). Each VMAS scenario can + be terminating or not. If ``max_steps`` is specified, + the scenario is also terminated (and the ``"terminated"`` flag is set) whenever this horizon is reached. + Unlike gym's ``TimeLimit`` transform or torchrl's :class:`~torchrl.envs.transforms.StepCounter`, + this argument will not set the ``"truncated"`` entry in the tensordict. categorical_actions (bool, optional): if the environment actions are discrete, whether to transform - them to categorical or one-hot. + them to categorical or one-hot. Defaults to ``True``. group_map (MarlGroupMapType or Dict[str, List[str]], optional): how to group agents in tensordicts for input/output. By default, if the agent names follow the ``"_"`` convention, they will be grouped by ``""``. If they do not follow this convention, they will be all put in one group named ``"agents"``. Otherwise, a group map can be specified or selected from some premade options. See :class:`~torchrl.envs.utils.MarlGroupMapType` for more info. + **kwargs (Dict, optional): These are additional arguments that can be passed to the VMAS scenario constructor. + (e.g., number of agents, reward sparsity). The available arguments will vary based on the chosen scenario. + To see the available arguments for a specific scenario, see the constructor in its file from + `the scenario folder `__. + Attributes: group_map (Dict[str, List[str]]): how to group agents in tensordicts for