From c328f982d738c231553d38b07a8279ecc0c830d2 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Wed, 10 Mar 2021 19:49:51 +0000 Subject: [PATCH 01/45] working on conditional action distributions --- .../rllib_single_agent_conditional_actions.py | 63 +++++++++++++ .../agents/global_average_pooling_agent.py | 2 + .../torch/conditional_actions/__init__.py | 0 .../conditional_action_exploration.py | 91 +++++++++++++++++++ .../conditional_action_mixin.py | 66 ++++++++++++++ .../conditional_action_policy_trainer.py | 26 ++++++ python/griddly/util/rllib/wrappers/core.py | 5 +- 7 files changed, 251 insertions(+), 2 deletions(-) create mode 100644 python/examples/rllib/rllib_single_agent_conditional_actions.py create mode 100644 python/griddly/util/rllib/torch/conditional_actions/__init__.py create mode 100644 python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py create mode 100644 python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py create mode 100644 python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py diff --git a/python/examples/rllib/rllib_single_agent_conditional_actions.py b/python/examples/rllib/rllib_single_agent_conditional_actions.py new file mode 100644 index 000000000..0a7ed1ae4 --- /dev/null +++ b/python/examples/rllib/rllib_single_agent_conditional_actions.py @@ -0,0 +1,63 @@ +import os +import sys + +import ray +from ray import tune +from ray.rllib.models import ModelCatalog +from ray.tune.registry import register_env + +from griddly import gd +from griddly.util.rllib.torch import GAPAgent +from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import ConditionalActionImpalaTrainer +from griddly.util.rllib.wrappers.core import RLlibEnv + +if __name__ == '__main__': + sep = os.pathsep + os.environ['PYTHONPATH'] = sep.join(sys.path) + + ray.init(num_gpus=1, local_mode=True) + + env_name = "ray-griddly-env" + + register_env(env_name, RLlibEnv) + ModelCatalog.register_custom_model("GAP", GAPAgent) + + max_training_steps = 100000000 + + config = { + 'framework': 'torch', + 'num_workers': 1, + 'num_envs_per_worker': 1, + + 'model': { + 'custom_model': 'GAP', + 'custom_model_config': {} + }, + 'env': env_name, + 'env_config': { + 'record_video_config': { + 'frequency': 100000 + }, + + 'conditional_action_sampling': True, + 'invalid_action_masking': True, + 'random_level_on_reset': True, + 'yaml_file': 'Single-Player/GVGAI/clusters_partially_observable.yaml', + 'global_observer_type': gd.ObserverType.SPRITE_2D, + 'max_steps': 1000, + }, + 'entropy_coeff_schedule': [ + [0, 0.01], + [max_training_steps, 0.0] + ], + 'lr_schedule': [ + [0, 0.005], + [max_training_steps, 0.0] + ] + } + + stop = { + "timesteps_total": max_training_steps, + } + + result = tune.run(ConditionalActionImpalaTrainer, config=config, stop=stop) diff --git a/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py b/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py index 2929508bb..d2249abcb 100644 --- a/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py +++ b/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py @@ -1,4 +1,5 @@ import numpy as np +from gym.spaces import Dict from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from torch import nn @@ -32,6 +33,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name): nn.Module.__init__(self) self._num_objects = obs_space.shape[2] + self._num_actions = num_outputs self.network = nn.Sequential( diff --git a/python/griddly/util/rllib/torch/conditional_actions/__init__.py b/python/griddly/util/rllib/torch/conditional_actions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py new file mode 100644 index 000000000..341ab2879 --- /dev/null +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -0,0 +1,91 @@ +import torch +from gym.spaces import Discrete, MultiDiscrete +from ray.rllib.models.torch.torch_action_dist import TorchCategorical, TorchMultiCategorical +from torch.distributions import Categorical +import numpy as np + +class TorchConditionalMaskingExploration(): + + def __init__(self, model, dist_inputs, valid_action_trees, explore=False): + self._valid_action_trees = valid_action_trees + + self._num_inputs = dist_inputs.shape[0] + if isinstance(model.action_space, Discrete): + self._action_space_shape = [model.action_space.n] + elif isinstance(model.action_space, MultiDiscrete): + self._action_space_shape = model.action_space.nvec + + self._num_action_logits = np.sum(self._action_space_shape) + self._num_action_parts = len(self._action_space_shape) + + self._explore = explore + + self._inputs_split = dist_inputs.split(tuple(self._action_space_shape), dim=1) + + def _mask_and_sample(self, options, logits): + + mask = torch.zeros([logits.shape[0]]) + mask[options] = 1 + + logits += torch.log(mask) + dist = Categorical(logits=logits) + sampled = dist.sample() + logp = dist.log_prob(sampled) + + return sampled, logits, logp, mask + + def get_actions_and_mask(self): + + actions = torch.zeros([self._num_inputs, self._num_action_parts]) + masked_logits = torch.zeros([self._num_inputs, self._num_action_logits]) + mask = torch.zeros([self._num_inputs, self._num_action_logits]) + logp_sums = torch.zeros([self._num_inputs]) + + if self._valid_action_trees is not None: + + for i in range(self._num_inputs): + if len(self._valid_action_trees) >= 1: + + subtree = self._valid_action_trees[i] + subtree_options = list(subtree.keys()) + + # In the case there are no available actions for the player + if len(subtree_options) == 0: + subtree = {} + for _ in range(self._num_action_parts): + subtree[0] = {} + subtree_options = [0] + + logp_parts = torch.zeros([self._num_action_parts]) + mask_offset = 0 + for a in range(self._num_action_parts): + dist_part = self._inputs_split[a] + sampled, masked_part_logits, logp, mask_part = self._mask_and_sample(subtree_options, dist_part[i]) + + # Set the action and the mask for each part of the action + actions[i, a] = sampled + masked_logits[i, mask_offset:mask_offset + self._action_space_shape[a]] = masked_part_logits + mask[i, mask_offset:mask_offset + self._action_space_shape[a]] = mask_part + + logp_parts[a] = logp + + if mask_part.sum() == 0: + raise RuntimeError('mask calculated incorrectly') + + mask_offset += self._action_space_shape[a] + + if isinstance(subtree, dict): + subtree = subtree[int(sampled)] + if isinstance(subtree, dict): + subtree_options = list(subtree.keys()) + else: + # Leaf nodes with action_id list + subtree_options = subtree + + logp_sums[i] = torch.sum(logp_parts) + + # if its a discrete then flatten the space + if self._num_action_parts == 1: + actions = actions.flatten() + + return actions, masked_logits, logp_sums, mask \ No newline at end of file diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py new file mode 100644 index 000000000..ebe5b8baa --- /dev/null +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py @@ -0,0 +1,66 @@ +import numpy as np +import torch +from ray.rllib import Policy, SampleBatch +from ray.rllib.utils import override +from ray.rllib.utils.torch_ops import convert_to_non_torch_type + +from griddly.util.rllib.torch.conditional_actions.conditional_action_exploration import TorchConditionalMaskingExploration + + +class ConditionalActionMixin: + + @override(Policy) + def compute_actions_from_input_dict( + self, + input_dict, + explore=None, + timestep = None, + **kwargs): + + explore = explore if explore is not None else self.config["explore"] + timestep = timestep if timestep is not None else self.global_timestep + + with torch.no_grad(): + # Pass lazy (torch) tensor dict to Model as `input_dict`. + input_dict = self._lazy_tensor_dict(input_dict) + # Pack internal state inputs into (separate) list. + state_batches = [ + input_dict[k] for k in input_dict.keys() if "state_in" in k[:8] + ] + # Calculate RNN sequence lengths. + seq_lens = np.array([1] * len(input_dict["obs"])) \ + if state_batches else None + + self._is_recurrent = state_batches is not None and state_batches != [] + + # Switch to eval mode. + self.model.eval() + + dist_inputs, state_out = self.model(input_dict, state_batches, + seq_lens) + + infos = input_dict[SampleBatch.INFOS] if SampleBatch.INFOS in input_dict else {} + + valid_action_trees = infos[0]['valid_action_trees'] if isinstance(infos, np.ndarray) and 'valid_action_trees' in infos[0] else None + + exploration = TorchConditionalMaskingExploration( + self.model, + dist_inputs, + valid_action_trees, + explore, + ) + + actions, masked_logits, logp, mask = exploration.get_actions_and_mask() + + input_dict[SampleBatch.ACTIONS] = actions + + extra_fetches = { + SampleBatch.ACTION_DIST_INPUTS: dist_inputs, + SampleBatch.ACTION_PROB: torch.exp(logp.float()), + SampleBatch.ACTION_LOGP: logp + } + + # Update our global timestep by the batch size. + self.global_timestep += len(input_dict[SampleBatch.CUR_OBS]) + + return convert_to_non_torch_type((actions, state_out, extra_fetches)) diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py new file mode 100644 index 000000000..440b7360a --- /dev/null +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py @@ -0,0 +1,26 @@ +from ray.rllib.agents.impala import ImpalaTrainer +from ray.rllib.agents.impala.vtrace_torch_policy import VTraceTorchPolicy +from ray.rllib.policy.torch_policy import LearningRateSchedule, EntropyCoeffSchedule + +from griddly.util.rllib.torch.conditional_actions.conditional_action_mixin import ConditionalActionMixin + +def setup_mixins(policy, obs_space, action_space, config): + ConditionalActionMixin.__init__(policy) + EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], + config["entropy_coeff_schedule"]) + LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) + +ConditionalActionVTraceTorchPolicy = VTraceTorchPolicy.with_updates( + name="ConditionalMaskingVTraceTorchPolicy", + before_init=setup_mixins, + mixins=[LearningRateSchedule, EntropyCoeffSchedule, ConditionalActionMixin] +) + +def get_vtrace_policy_class(config): + if config['framework'] == 'torch': + return ConditionalActionVTraceTorchPolicy + else: + raise NotImplementedError('Tensorflow not supported') + +ConditionalActionImpalaTrainer = ImpalaTrainer.with_updates(default_policy=ConditionalActionVTraceTorchPolicy, + get_policy_class=get_vtrace_policy_class) \ No newline at end of file diff --git a/python/griddly/util/rllib/wrappers/core.py b/python/griddly/util/rllib/wrappers/core.py index 3c96ee659..d504b57c7 100644 --- a/python/griddly/util/rllib/wrappers/core.py +++ b/python/griddly/util/rllib/wrappers/core.py @@ -58,6 +58,7 @@ def __init__(self, env_config): super().__init__(**env_config) self.invalid_action_masking = env_config.get('invalid_action_masking', False) + self.conditional_action_sampling = env_config.get('conditional_action_sampling', False) self._record_video_config = env_config.get('record_video_config', None) self._random_level_on_reset = env_config.get('random_level_on_reset', False) @@ -149,7 +150,7 @@ def reset(self, **kwargs): observation = super().reset(**kwargs) self.set_transform() - if self.invalid_action_masking: + if self.conditional_action_sampling: self.last_valid_action_trees = self._build_valid_action_trees() return self._transform(observation) @@ -161,7 +162,7 @@ def step(self, action): self._env_steps += 1 - if self.invalid_action_masking: + if self.conditional_action_sampling: self.last_valid_action_trees = self._build_valid_action_trees() info['valid_action_trees'] = self.last_valid_action_trees From 8f62d4fa37a9ea19e261828d75f4dac9126b5432 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 11 Mar 2021 20:17:07 +0000 Subject: [PATCH 02/45] building tree in c++ side for speed --- bindings/python.cpp | 1 + bindings/wrapper/GameWrapper.cpp | 74 +++++- python/examples/rllib/rllib_multi_agent.py | 2 +- python/examples/rllib/rllib_single_agent.py | 2 +- .../rllib_single_agent_conditional_actions.py | 18 +- .../conditional_action_exploration.py | 27 +- .../conditional_action_mixin.py | 10 +- .../conditional_action_policy_trainer.py | 11 +- .../griddly/util/rllib/wrappers/__init__.py | 0 python/griddly/util/rllib/wrappers/core.py | 236 ------------------ 10 files changed, 108 insertions(+), 273 deletions(-) delete mode 100644 python/griddly/util/rllib/wrappers/__init__.py delete mode 100644 python/griddly/util/rllib/wrappers/core.py diff --git a/bindings/python.cpp b/bindings/python.cpp index ce84016ff..a5fde9813 100644 --- a/bindings/python.cpp +++ b/bindings/python.cpp @@ -56,6 +56,7 @@ PYBIND11_MODULE(python_griddly, m) { // Get available actions for objects in the current game game_process.def("get_available_actions", &Py_GameWrapper::getAvailableActionNames); game_process.def("get_available_action_ids", &Py_GameWrapper::getAvailableActionIds); + game_process.def("build_valid_action_trees", &Py_GameWrapper::buildValidActionTrees); // Width and height of the game grid game_process.def("get_width", &Py_GameWrapper::getWidth); diff --git a/bindings/wrapper/GameWrapper.cpp b/bindings/wrapper/GameWrapper.cpp index 2b9510f20..da04ae768 100644 --- a/bindings/wrapper/GameWrapper.cpp +++ b/bindings/wrapper/GameWrapper.cpp @@ -39,8 +39,65 @@ class Py_GameWrapper { return player; } - uint32_t getNumPlayers() const { - return gameProcess_->getNumPlayers(); + std::vector buildValidActionTrees() const { + + std::vector valid_action_trees; + auto externalActionNames = gdyFactory_->getExternalActionNames(); + for (int playerId = 1; playerId <= playerCount_; playerId++) { + py::dict valid_action_tree; + for (auto actionNamesAtLocation : gameProcess_->getAvailableActionNames(playerId)) { + auto location = actionNamesAtLocation.first; + auto actionNames = actionNamesAtLocation.second; + + for (auto actionName : actionNames) { + auto& treePtr = valid_action_tree; + auto actionInputsDefinitions = gdyFactory_->getActionInputsDefinitions(); + if (actionInputsDefinitions.find(actionName) != actionInputsDefinitions.end()) { + auto locationVec = glm::ivec2{location[0], location[1]}; + auto actionIdsForName = gameProcess_->getAvailableActionIdsAtLocation(locationVec, actionName); + + if (actionIdsForName.size() > 0) { + if (gdyFactory_->getAvatarObject().length() == 0) { + auto py_x = py::cast(locationVec[0]); + auto py_y = py::cast(locationVec[1]); + if(!treePtr.contains(py_x)) { + treePtr[py_x] = py::dict(); + } + + treePtr = treePtr[py_x]; + + if(!treePtr.contains(py_y)) { + treePtr[py_y] = py::dict(); + } + + treePtr = treePtr[py_y]; + } + + if (externalActionNames.size() > 1) { + auto py_actionName = py::cast(actionName); + if(!treePtr.contains(py_actionName)) { + treePtr[py_actionName] = py::dict(); + } + + treePtr = treePtr[py_actionName]; + } + + for(auto id : actionIdsForName) { + auto py_id = py::cast(id); + treePtr[py_id] = py::dict(); + } + + auto py_nop = py::cast(0); + treePtr[py_nop] = py::dict(); + + } + } + } + } + valid_action_trees.push_back(valid_action_tree); + } + + return valid_action_trees; } py::dict getAvailableActionNames(int playerId) const { @@ -106,8 +163,6 @@ class Py_GameWrapper { } py::tuple stepParallel(py::buffer stepArray) { - - auto stepArrayInfo = stepArray.request(); if (stepArrayInfo.format != "l" && stepArrayInfo.format != "i") { auto error = fmt::format("Invalid data type {0}, must be an integer.", stepArrayInfo.format); @@ -130,7 +185,7 @@ class Py_GameWrapper { } auto externalActionNames = gdyFactory_->getExternalActionNames(); - + std::vector playerRewards; bool terminated; py::dict info; @@ -138,7 +193,7 @@ class Py_GameWrapper { for (int p = 0; p < playerSize; p++) { std::string actionName; std::vector actionArray; - auto pStr = (int32_t *)stepArrayInfo.ptr + p * playerStride; + auto pStr = (int32_t*)stepArrayInfo.ptr + p * playerStride; bool lastPlayer = p == (playerSize - 1); @@ -173,7 +228,7 @@ class Py_GameWrapper { auto playerStepResult = players_[p]->stepSingle(actionName, actionArray, lastPlayer); playerRewards.push_back(playerStepResult[0].cast()); - if(lastPlayer) { + if (lastPlayer) { terminated = playerStepResult[1].cast(); info = playerStepResult[2]; } @@ -253,7 +308,6 @@ class Py_GameWrapper { } py::dict getGlobalVariables(std::vector variables) const { - py::dict py_globalVariables; auto globalVariables = gameProcess_->getGrid()->getGlobalVariables(); @@ -262,7 +316,7 @@ class Py_GameWrapper { auto globalVariableMap = globalVariables[variableNameIt]; - for(auto playerVariableIt : globalVariableMap) { + for (auto playerVariableIt : globalVariableMap) { resolvedGlobalVariableMap.insert({playerVariableIt.first, *playerVariableIt.second}); } @@ -280,7 +334,7 @@ class Py_GameWrapper { py::dict py_event; py::dict rewards; - for (auto& reward: historyEvent.rewards) { + for (auto& reward : historyEvent.rewards) { rewards[py::cast(reward.first)] = reward.second; } diff --git a/python/examples/rllib/rllib_multi_agent.py b/python/examples/rllib/rllib_multi_agent.py index 0b80a9ca3..1560a127f 100644 --- a/python/examples/rllib/rllib_multi_agent.py +++ b/python/examples/rllib/rllib_multi_agent.py @@ -10,7 +10,7 @@ from griddly import gd from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent -from griddly.util.rllib.wrappers.core import RLlibMultiAgentWrapper, RLlibEnv +from griddly.util.rllib.env.core import RLlibMultiAgentWrapper, RLlibEnv if __name__ == '__main__': sep = os.pathsep diff --git a/python/examples/rllib/rllib_single_agent.py b/python/examples/rllib/rllib_single_agent.py index c9a603505..eda9d41e3 100644 --- a/python/examples/rllib/rllib_single_agent.py +++ b/python/examples/rllib/rllib_single_agent.py @@ -9,7 +9,7 @@ from griddly import gd from griddly.util.rllib.torch import GAPAgent -from griddly.util.rllib.wrappers.core import RLlibEnv +from griddly.util.rllib.env.core import RLlibEnv if __name__ == '__main__': sep = os.pathsep diff --git a/python/examples/rllib/rllib_single_agent_conditional_actions.py b/python/examples/rllib/rllib_single_agent_conditional_actions.py index 0a7ed1ae4..8e1920751 100644 --- a/python/examples/rllib/rllib_single_agent_conditional_actions.py +++ b/python/examples/rllib/rllib_single_agent_conditional_actions.py @@ -9,25 +9,25 @@ from griddly import gd from griddly.util.rllib.torch import GAPAgent from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import ConditionalActionImpalaTrainer -from griddly.util.rllib.wrappers.core import RLlibEnv +from griddly.util.rllib.env.core import RLlibEnv if __name__ == '__main__': sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - ray.init(num_gpus=1, local_mode=True) + ray.init(num_gpus=1) env_name = "ray-griddly-env" register_env(env_name, RLlibEnv) ModelCatalog.register_custom_model("GAP", GAPAgent) - max_training_steps = 100000000 + max_training_steps = 5000000 config = { 'framework': 'torch', - 'num_workers': 1, - 'num_envs_per_worker': 1, + 'num_workers': 6, + 'num_envs_per_worker': 2, 'model': { 'custom_model': 'GAP', @@ -39,8 +39,8 @@ 'frequency': 100000 }, - 'conditional_action_sampling': True, - 'invalid_action_masking': True, + 'invalid_action_masking': tune.grid_search([True, False]), + 'generate_valid_action_trees': tune.grid_search([True, False]), 'random_level_on_reset': True, 'yaml_file': 'Single-Player/GVGAI/clusters_partially_observable.yaml', 'global_observer_type': gd.ObserverType.SPRITE_2D, @@ -53,7 +53,9 @@ 'lr_schedule': [ [0, 0.005], [max_training_steps, 0.0] - ] + ], + + } stop = { diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py index 341ab2879..4f6a513d0 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -6,7 +6,7 @@ class TorchConditionalMaskingExploration(): - def __init__(self, model, dist_inputs, valid_action_trees, explore=False): + def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invalid_action_masking=False): self._valid_action_trees = valid_action_trees self._num_inputs = dist_inputs.shape[0] @@ -18,16 +18,21 @@ def __init__(self, model, dist_inputs, valid_action_trees, explore=False): self._num_action_logits = np.sum(self._action_space_shape) self._num_action_parts = len(self._action_space_shape) + self._invalid_action_masking = invalid_action_masking + self._explore = explore self._inputs_split = dist_inputs.split(tuple(self._action_space_shape), dim=1) def _mask_and_sample(self, options, logits): - mask = torch.zeros([logits.shape[0]]) + #if self._invalid_action_masking: + mask = torch.zeros([logits.shape[0]]).to(logits.device) mask[options] = 1 - logits += torch.log(mask) + #else: + # mask = torch.ones([logits.shape[0]]) + dist = Categorical(logits=logits) sampled = dist.sample() logp = dist.log_prob(sampled) @@ -51,10 +56,11 @@ def get_actions_and_mask(self): # In the case there are no available actions for the player if len(subtree_options) == 0: - subtree = {} + build_tree = subtree for _ in range(self._num_action_parts): - subtree[0] = {} - subtree_options = [0] + build_tree[0] = {} + build_tree = build_tree[0] + subtree_options = list(subtree.keys()) logp_parts = torch.zeros([self._num_action_parts]) mask_offset = 0 @@ -74,13 +80,8 @@ def get_actions_and_mask(self): mask_offset += self._action_space_shape[a] - if isinstance(subtree, dict): - subtree = subtree[int(sampled)] - if isinstance(subtree, dict): - subtree_options = list(subtree.keys()) - else: - # Leaf nodes with action_id list - subtree_options = subtree + subtree = subtree[int(sampled)] + subtree_options = list(subtree.keys()) logp_sums[i] = torch.sum(logp_parts) diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py index ebe5b8baa..199d3f825 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py @@ -41,13 +41,21 @@ def compute_actions_from_input_dict( infos = input_dict[SampleBatch.INFOS] if SampleBatch.INFOS in input_dict else {} - valid_action_trees = infos[0]['valid_action_trees'] if isinstance(infos, np.ndarray) and 'valid_action_trees' in infos[0] else None + valid_action_trees = [] + for info in infos: + if isinstance(info, dict) and 'valid_action_tree' in info: + valid_action_trees.append(info['valid_action_tree']) + else: + valid_action_trees.append({}) + + invalid_action_masking = self.config["env_config"].get("invalid_action_masking", False) exploration = TorchConditionalMaskingExploration( self.model, dist_inputs, valid_action_trees, explore, + invalid_action_masking, ) actions, masked_logits, logp, mask = exploration.get_actions_and_mask() diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py index 440b7360a..441da7d74 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py @@ -1,26 +1,31 @@ -from ray.rllib.agents.impala import ImpalaTrainer +from ray.rllib.agents import with_common_config +from ray.rllib.agents.impala import ImpalaTrainer, DEFAULT_CONFIG as IMPALA_CONFIG from ray.rllib.agents.impala.vtrace_torch_policy import VTraceTorchPolicy from ray.rllib.policy.torch_policy import LearningRateSchedule, EntropyCoeffSchedule from griddly.util.rllib.torch.conditional_actions.conditional_action_mixin import ConditionalActionMixin + def setup_mixins(policy, obs_space, action_space, config): ConditionalActionMixin.__init__(policy) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) + ConditionalActionVTraceTorchPolicy = VTraceTorchPolicy.with_updates( - name="ConditionalMaskingVTraceTorchPolicy", + name="ConditionalActionVTraceTorchPolicy", before_init=setup_mixins, mixins=[LearningRateSchedule, EntropyCoeffSchedule, ConditionalActionMixin] ) + def get_vtrace_policy_class(config): if config['framework'] == 'torch': return ConditionalActionVTraceTorchPolicy else: raise NotImplementedError('Tensorflow not supported') + ConditionalActionImpalaTrainer = ImpalaTrainer.with_updates(default_policy=ConditionalActionVTraceTorchPolicy, - get_policy_class=get_vtrace_policy_class) \ No newline at end of file + get_policy_class=get_vtrace_policy_class) diff --git a/python/griddly/util/rllib/wrappers/__init__.py b/python/griddly/util/rllib/wrappers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/griddly/util/rllib/wrappers/core.py b/python/griddly/util/rllib/wrappers/core.py deleted file mode 100644 index d504b57c7..000000000 --- a/python/griddly/util/rllib/wrappers/core.py +++ /dev/null @@ -1,236 +0,0 @@ -from uuid import uuid1 -from collections import defaultdict -from enum import Enum -from typing import Tuple - -import gym -from gym.spaces import Dict -from ray.rllib import MultiAgentEnv -from ray.rllib.utils.typing import MultiAgentDict - -from griddly import GymWrapper -import numpy as np - -from griddly.RenderTools import VideoRecorder - - -class RecordingState(Enum): - NOT_RECORDING = 1 - WAITING_FOR_EPISODE_START = 2 - BEFORE_RECORDING = 3 - RECORDING = 4 - - -class RLlibEnv(GymWrapper): - """ - Wraps a Griddly environment for compatibility with RLLib. - - Use the `env_config` in the rllib config to provide Griddly Environment Parameters - - Example: - - Firstly register the RLlibWrapper using rllib's - - env_name = "my_env_name" - - register_env(env_name, RLlibWrapper) - - you can then configure it - - rllib_config = { - 'env_config': { - 'yaml_file': 'Single-Player/GVGAI/butterflies.yaml', - 'level": 6, - 'player_observer_type': gd.ObserverType.SPRITE_2D, - 'global_observer_type': gd.ObserverType.ISOMETRIC, - 'max_steps': 1000, - }, - # Other configuration options - } - - Create the rllib trainer using this config: - - trainer = ImpalaTrainer(rllib_config, env=env_name) - - """ - - def __init__(self, env_config): - super().__init__(**env_config) - - self.invalid_action_masking = env_config.get('invalid_action_masking', False) - self.conditional_action_sampling = env_config.get('conditional_action_sampling', False) - self._record_video_config = env_config.get('record_video_config', None) - self._random_level_on_reset = env_config.get('random_level_on_reset', False) - - super().reset() - - self._recording_state = None - self._env_steps = 0 - - if self._record_video_config is not None: - self._recording_state = RecordingState.BEFORE_RECORDING - self._record_frequency = self._record_video_config.get('frequency', 1000) - - self.set_transform() - - def _get_player_action_tree(self, player_id): - - valid_action_tree = defaultdict(lambda: defaultdict(lambda: defaultdict(defaultdict))) - for location, action_names in self.game.get_available_actions(player_id).items(): - for action_name, action_ids in self.game.get_available_action_ids(location, list(action_names)).items(): - if len(action_ids) > 0: - valid_action_tree[location[0]][location[1]][self.action_names.index(action_name)] = action_ids - return valid_action_tree - - def _build_valid_action_trees(self): - player_valid_action_trees = [] - - if self.player_count > 0: - for p in range(self.player_count): - player_valid_action_trees.append(self._get_player_action_tree(p + 1)) - - else: - player_valid_action_trees.append(self._get_player_action_tree(1)) - - return player_valid_action_trees - - def _transform(self, observation): - - if self.player_count > 1: - transformed_obs = [obs.transpose(1, 2, 0).astype(np.float) for obs in observation] - else: - transformed_obs = observation.transpose(1, 2, 0).astype(np.float) - - return transformed_obs - - def _after_step(self, observation, reward, done, info): - if self._recording_state is not None: - if self._recording_state is RecordingState.NOT_RECORDING and self._env_steps % self._record_frequency == 0: - self._recording_state = RecordingState.WAITING_FOR_EPISODE_START - - if self._recording_state == RecordingState.BEFORE_RECORDING: - global_obs = self.render(observer='global', mode='rgb_array') - self._global_recorder = VideoRecorder() - self._global_recorder.start(f'global_video_{uuid1()}_{self._env_steps}.mp4', global_obs.shape) - self._recording_state = RecordingState.RECORDING - - if self._recording_state == RecordingState.RECORDING: - global_obs = self.render(observer='global', mode='rgb_array') - self._global_recorder.add_frame(global_obs) - if done: - self._recording_state = RecordingState.NOT_RECORDING - self._global_recorder.close() - - if self._recording_state == RecordingState.WAITING_FOR_EPISODE_START: - if done: - self._recording_state = RecordingState.BEFORE_RECORDING - - def set_transform(self): - """ - Create the transform for rllib based on the observation space - """ - - if self.player_count > 1: - self.observation_space = self.observation_space[0] - self.action_space = self.action_space[0] - - self.observation_space = gym.spaces.Box( - self.observation_space.low.transpose((1, 2, 0)).astype(np.float), - self.observation_space.high.transpose((1, 2, 0)).astype(np.float), - dtype=np.float, - ) - - self.height = self.observation_space.shape[0] - self.width = self.observation_space.shape[1] - - def reset(self, **kwargs): - - if self._random_level_on_reset: - kwargs['level_id'] = np.random.choice(self.level_count) - observation = super().reset(**kwargs) - self.set_transform() - - if self.conditional_action_sampling: - self.last_valid_action_trees = self._build_valid_action_trees() - - return self._transform(observation) - - def step(self, action): - observation, reward, done, info = super().step(action) - - self._after_step(observation, reward, done, info) - - self._env_steps += 1 - - if self.conditional_action_sampling: - self.last_valid_action_trees = self._build_valid_action_trees() - info['valid_action_trees'] = self.last_valid_action_trees - - return self._transform(observation), reward, done, info - - def render(self, mode='human', observer=0): - return super().render(mode, observer='global') - - -class RLlibMultiAgentWrapper(gym.Wrapper, MultiAgentEnv): - - def __init__(self, env, env_config): - super().__init__(env) - - self._player_done_variable = env_config.get('player_done_variable', None) - - # Used to keep track of agents that are active in the environment - self._active_agents = set() - - assert self.player_count > 1, 'RLlibMultiAgentWrapper can only be used with environments that have multiple agents' - - def _to_multi_agent_map(self, data): - return {a: data[a - 1] for a in self._active_agents} - - def reset(self, **kwargs): - obs = super().reset(**kwargs) - self._active_agents.update([a + 1 for a in range(self.player_count)]) - return self._to_multi_agent_map(obs) - - def _resolve_player_done_variable(self): - resolved_variables = self.game.get_global_variable([self._player_done_variable]) - return resolved_variables[self._player_done_variable] - - def step(self, action_dict: MultiAgentDict): - actions_array = np.zeros((self.player_count, *self.action_space.shape)) - for agent_id, action in action_dict.items(): - actions_array[agent_id - 1] = action - - obs, reward, all_done, info = super().step(actions_array) - - done_map = {'__all__': all_done} - - if self._player_done_variable is not None: - griddly_players_done = self._resolve_player_done_variable() - - for agent_id in self._active_agents: - done_map[agent_id] = griddly_players_done[agent_id] == 1 or all_done - else: - for p in range(self.player_count): - done_map[p] = False - - if self.invalid_action_masking: - info_map = self._to_multi_agent_map([ - {'valid_action_tree': valid_action_tree} for valid_action_tree in info['valid_action_trees'] - ]) - else: - info_map = self._to_multi_agent_map(defaultdict(dict)) - - obs_map = self._to_multi_agent_map(obs) - reward_map = self._to_multi_agent_map(reward) - - # Finally remove any agent ids that are done - for agent_id, is_done in done_map.items(): - if is_done: - self._active_agents.discard(agent_id) - - assert len(obs_map) == len(reward_map) - assert len(obs_map) == len(done_map) - 1 - assert len(obs_map) == len(info_map) - - return obs_map, reward_map, done_map, info_map From cc9ee0367cb07d7ce3b346f7b31fd705a694f3cc Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 16 Mar 2021 15:10:56 +0000 Subject: [PATCH 03/45] experiments and associated code for action space work --- .gitignore | 3 + bindings/wrapper/GameWrapper.cpp | 80 ++-- .../clusters_po.yaml | 310 +++++++++++++ .../clusters_po_with_push.yaml | 330 ++++++++++++++ ...rs_po_with_push_separate_colors_units.yaml | 0 ...clusters_po_with_push_seperate_colors.yaml | 409 ++++++++++++++++++ .../clusters_po_with_push_units.yaml | 0 .../rllib_conditional_actions.py | 81 ++++ python/examples/rllib/rllib_single_agent.py | 2 +- .../rllib_single_agent_conditional_actions.py | 28 +- python/griddly/GymWrapper.py | 3 + python/griddly/RenderTools.py | 1 + python/griddly/util/rllib/callbacks.py | 58 +++ .../conditional_action_exploration.py | 79 +++- .../conditional_action_mixin.py | 57 ++- .../conditional_action_policy_trainer.py | 3 +- 16 files changed, 1361 insertions(+), 83 deletions(-) create mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po.yaml create mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml create mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml create mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push_seperate_colors.yaml create mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml create mode 100644 python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py create mode 100644 python/griddly/util/rllib/callbacks.py diff --git a/.gitignore b/.gitignore index fdba0dac0..2531ed319 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,6 @@ rules.ninja # misc bin/ + +# wandb +wandb/ diff --git a/bindings/wrapper/GameWrapper.cpp b/bindings/wrapper/GameWrapper.cpp index da04ae768..39ff52cd5 100644 --- a/bindings/wrapper/GameWrapper.cpp +++ b/bindings/wrapper/GameWrapper.cpp @@ -7,6 +7,30 @@ #include "StepPlayerWrapper.cpp" namespace griddly { + +class ValidActionNode { + public: + std::unordered_map> children; + + bool contains(uint32_t value) { + return children.find(value) != children.end(); + } + + void add(uint32_t value) { + children[value] = std::shared_ptr(new ValidActionNode()); + } + + static py::dict toPyDict(std::shared_ptr node) { + py::dict py_dict; + for(auto child: node->children) { + py_dict[py::cast(child.first)] = toPyDict(child.second); + } + + return py_dict; + } +}; + + class Py_GameWrapper { public: Py_GameWrapper(ObserverType globalObserverType, std::shared_ptr gdyFactory) @@ -39,62 +63,68 @@ class Py_GameWrapper { return player; } + const uint32_t getActionTypeId(std::string actionName) const { + auto actionNames = gdyFactory_->getExternalActionNames(); + for(int i = 0; i buildValidActionTrees() const { std::vector valid_action_trees; auto externalActionNames = gdyFactory_->getExternalActionNames(); for (int playerId = 1; playerId <= playerCount_; playerId++) { - py::dict valid_action_tree; + std::shared_ptr node = std::shared_ptr(new ValidActionNode()); for (auto actionNamesAtLocation : gameProcess_->getAvailableActionNames(playerId)) { auto location = actionNamesAtLocation.first; auto actionNames = actionNamesAtLocation.second; for (auto actionName : actionNames) { - auto& treePtr = valid_action_tree; + std::shared_ptr treePtr = node; auto actionInputsDefinitions = gdyFactory_->getActionInputsDefinitions(); if (actionInputsDefinitions.find(actionName) != actionInputsDefinitions.end()) { auto locationVec = glm::ivec2{location[0], location[1]}; auto actionIdsForName = gameProcess_->getAvailableActionIdsAtLocation(locationVec, actionName); if (actionIdsForName.size() > 0) { - if (gdyFactory_->getAvatarObject().length() == 0) { - auto py_x = py::cast(locationVec[0]); - auto py_y = py::cast(locationVec[1]); - if(!treePtr.contains(py_x)) { - treePtr[py_x] = py::dict(); - } + // if (gdyFactory_->getAvatarObject().length() == 0) { + // auto py_x = py::cast(locationVec[0]); + // auto py_y = py::cast(locationVec[1]); + // if(!treePtr.contains(py_x)) { + // (*treePtr)[py_x] = py::dict(); + // } - treePtr = treePtr[py_x]; + // treePtr = treePtr[py_x]; - if(!treePtr.contains(py_y)) { - treePtr[py_y] = py::dict(); - } + // if(!treePtr.contains(py_y)) { + // treePtr[py_y] = py::dict(); + // } - treePtr = treePtr[py_y]; - } + // treePtr = treePtr[py_y]; + // } if (externalActionNames.size() > 1) { - auto py_actionName = py::cast(actionName); - if(!treePtr.contains(py_actionName)) { - treePtr[py_actionName] = py::dict(); + auto actionTypeId = getActionTypeId(actionName); + if(!treePtr->contains(actionTypeId)) { + treePtr->add(actionTypeId); } - treePtr = treePtr[py_actionName]; + treePtr = treePtr->children[actionTypeId]; } for(auto id : actionIdsForName) { - auto py_id = py::cast(id); - treePtr[py_id] = py::dict(); + treePtr->add(id); } - - auto py_nop = py::cast(0); - treePtr[py_nop] = py::dict(); - + treePtr->add(0); } } } } - valid_action_trees.push_back(valid_action_tree); + valid_action_trees.push_back(ValidActionNode::toPyDict(node)); } return valid_action_trees; diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po.yaml new file mode 100644 index 000000000..3d8b70722 --- /dev/null +++ b/python/examples/experiments/conditional_action_spaces/clusters_po.yaml @@ -0,0 +1,310 @@ +Version: "0.1" +Environment: + Name: Partially Observable Clusters + Description: Cluster the coloured objects together by pushing them against the static coloured blocks. + Observers: + Sprite2D: + TileSize: 24 + BackgroundTile: oryx/oryx_fantasy/floor1-2.png + Variables: + - Name: box_count + InitialValue: 0 + Player: + Observer: + RotateWithAvatar: true + TrackAvatar: true + Height: 5 + Width: 5 + OffsetX: 0 + OffsetY: 2 + AvatarObject: avatar # The player can only control a single avatar in the game + Termination: + Win: + - eq: [box_count, 0] + Lose: + - eq: [broken_box:count, 1] + - eq: [avatar:count, 0] + Levels: + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 1 . . . 2 . 2 . w + w . . . . 1 . . . . . . w + w . . . a . . . . . 2 . w + w . . . . . . . h . . . w + w . . . . 1 . . . . b . w + w . . . . . . 1 . . . . w + w . . . . . . . . A . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 . . 2 . c 3 . . w + w . . . . h . . h . . . w + w . . . 2 . . 3 . . 1 . w + w . . . . b . . h . . . w + w . . 3 . . . 2 . . 1 . w + w . . h . h . . . a . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . a . . b . . c . . w + w . . . . . . . . . . . w + w . . . . . . . . . . . w + w h h h h h . h h h h h w + w . . . . h . h . . . . w + w . 1 2 . h . h . 1 3 . w + w . 3 . . . . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . 1 . 2 . . c . . w + w . . . . . 3 . . 3 . . w + w . . a . 2 . . . h . . w + w . . . . h h . 3 . . . w + w . . 1 . . . . . 2 . . w + w . . . . . 1 . . b . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . . . . 1 . . . . w + w . . h . . b . . h . . w + w . . . . 1 . . . . . . w + w . . 3 . . . . 2 . . . w + w . . . a . h . . c . . w + w . . . . 3 . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + +Actions: + + # A simple action to count the number of boxes in the game at the start + # Not currently a way to do complex things in termination conditions like combine multiple conditions + - Name: box_counter + InputMapping: + Internal: true + Inputs: + 1: + Description: "The only action here is to increment the box count" + Behaviours: + - Src: + Object: [blue_box, red_box, green_box] + Commands: + - incr: box_count + Dst: + Object: [blue_box, red_box, green_box] + + # Define the move action + - Name: move + InputMapping: + Inputs: + 1: + Description: Rotate left + OrientationVector: [-1, 0] + 2: + Description: Move forwards + OrientationVector: [0, -1] + VectorToDest: [0, -1] + 3: + Description: Rotate right + OrientationVector: [1, 0] + Relative: true + Behaviours: + + # Avatar rotates + - Src: + Object: avatar + Commands: + - rot: _dir + Dst: + Object: avatar + + # Avatar and boxes can move into empty space + - Src: + Object: [avatar, blue_box, green_box, red_box] + Commands: + - mov: _dest + Dst: + Object: _empty + + # Boxes can be pushed by the avatar + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: [blue_box, green_box, red_box] + Commands: + - cascade: _dest + + # When boxes are pushed against the blocks they change + - Src: + Object: blue_box + Commands: + - change_to: blue_block + - reward: 1 + - decr: box_count + Dst: + Object: blue_block + - Src: + Object: red_box + Commands: + - reward: 1 + - change_to: red_block + - decr: box_count + Dst: + Object: red_block + - Src: + Object: green_box + Commands: + - reward: 1 + - change_to: green_block + - decr: box_count + Dst: + Object: green_block + + # Boxes break if they hit the spikes + - Src: + Object: [blue_box, green_box, red_box] + Commands: + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Avatar dies if it hits the spikes + - Src: + Object: avatar + Commands: + - remove: true + - reward: -1 + Dst: + Object: spike + +Objects: + - Name: avatar + MapCharacter: A + Observers: + Sprite2D: + - Image: gvgai/oryx/knight1.png + Block2D: + - Shape: triangle + Color: [0.0, 1.0, 0.0] + Scale: 0.8 + + - Name: wall + MapCharacter: w + Observers: + Sprite2D: + - TilingMode: WALL_16 + Image: + - oryx/oryx_fantasy/wall1-0.png + - oryx/oryx_fantasy/wall1-1.png + - oryx/oryx_fantasy/wall1-2.png + - oryx/oryx_fantasy/wall1-3.png + - oryx/oryx_fantasy/wall1-4.png + - oryx/oryx_fantasy/wall1-5.png + - oryx/oryx_fantasy/wall1-6.png + - oryx/oryx_fantasy/wall1-7.png + - oryx/oryx_fantasy/wall1-8.png + - oryx/oryx_fantasy/wall1-9.png + - oryx/oryx_fantasy/wall1-10.png + - oryx/oryx_fantasy/wall1-11.png + - oryx/oryx_fantasy/wall1-12.png + - oryx/oryx_fantasy/wall1-13.png + - oryx/oryx_fantasy/wall1-14.png + - oryx/oryx_fantasy/wall1-15.png + Block2D: + - Shape: square + Color: [0.5, 0.5, 0.5] + Scale: 0.9 + + - Name: spike + MapCharacter: h + Observers: + Sprite2D: + - Image: gvgai/oryx/spike2.png + Block2D: + - Shape: triangle + Color: [0.9, 0.1, 0.1] + Scale: 0.5 + + - Name: red_box + MapCharacter: "2" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockR.png + Block2D: + - Shape: square + Color: [0.5, 0.2, 0.2] + Scale: 0.5 + - Name: red_block + MapCharacter: b + Observers: + Sprite2D: + - Image: gvgai/newset/blockR2.png + Block2D: + - Shape: square + Color: [1.0, 0.0, 0.0] + Scale: 1.0 + + - Name: green_box + MapCharacter: "3" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockG.png + Block2D: + - Shape: square + Color: [0.2, 0.5, 0.2] + Scale: 0.5 + - Name: green_block + MapCharacter: c + Observers: + Sprite2D: + - Image: gvgai/newset/blockG2.png + Block2D: + - Shape: square + Color: [0.0, 1.0, 0.0] + Scale: 1.0 + + - Name: blue_box + MapCharacter: "1" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockB.png + Block2D: + - Shape: square + Color: [0.2, 0.2, 0.5] + Scale: 0.5 + - Name: blue_block + MapCharacter: a + Observers: + Sprite2D: + - Image: gvgai/newset/blockB2.png + Block2D: + - Shape: square + Color: [0.0, 0.0, 1.0] + Scale: 1.0 + + - Name: broken_box + Observers: + Sprite2D: + - Image: gvgai/newset/block3.png + Block2D: + - Shape: triangle + Color: [1.0, 0.0, 1.0] + Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml new file mode 100644 index 000000000..9904e87e7 --- /dev/null +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml @@ -0,0 +1,330 @@ +Version: "0.1" +Environment: + Name: Partially Observable Clusters + Description: Cluster the coloured objects together by pushing them against the static coloured blocks. + Observers: + Sprite2D: + TileSize: 24 + BackgroundTile: oryx/oryx_fantasy/floor1-2.png + Variables: + - Name: box_count + InitialValue: 0 + Player: + Observer: + RotateWithAvatar: true + TrackAvatar: true + Height: 5 + Width: 5 + OffsetX: 0 + OffsetY: 2 + AvatarObject: avatar # The player can only control a single avatar in the game + Termination: + Win: + - eq: [box_count, 0] + Lose: + - eq: [broken_box:count, 1] + - eq: [avatar:count, 0] + Levels: + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 1 . . . 2 . 2 . w + w . . . . 1 . . . . . . w + w . . . a . . . . . 2 . w + w . . . . . . . h . . . w + w . . . . 1 . . . . b . w + w . . . . . . 1 . . . . w + w . . . . . . . . A . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 . . 2 . c 3 . . w + w . . . . h . . h . . . w + w . . . 2 . . 3 . . 1 . w + w . . . . b . . h . . . w + w . . 3 . . . 2 . . 1 . w + w . . h . h . . . a . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . a . . b . . c . . w + w . . . . . . . . . . . w + w . . . . . . . . . . . w + w h h h h h . h h h h h w + w . . . . h . h . . . . w + w . 1 2 . h . h . 1 3 . w + w . 3 . . . . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . 1 . 2 . . c . . w + w . . . . . 3 . . 3 . . w + w . . a . 2 . . . h . . w + w . . . . h h . 3 . . . w + w . . 1 . . . . . 2 . . w + w . . . . . 1 . . b . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . . . . 1 . . . . w + w . . h . . b . . h . . w + w . . . . 1 . . . . . . w + w . . 3 . . . . 2 . . . w + w . . . a . h . . c . . w + w . . . . 3 . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + +Actions: + + # A simple action to count the number of boxes in the game at the start + # Not currently a way to do complex things in termination conditions like combine multiple conditions + - Name: box_counter + InputMapping: + Internal: true + Inputs: + 1: + Description: "The only action here is to increment the box count" + Behaviours: + - Src: + Object: [blue_box, red_box, green_box] + Commands: + - incr: box_count + Dst: + Object: [blue_box, red_box, green_box] + + # Define the move action + - Name: move + InputMapping: + Inputs: + 1: + Description: Rotate left + OrientationVector: [-1, 0] + 2: + Description: Move forwards + OrientationVector: [0, -1] + VectorToDest: [0, -1] + 3: + Description: Rotate right + OrientationVector: [1, 0] + Relative: true + Behaviours: + + # Avatar rotates + - Src: + Object: avatar + Commands: + - rot: _dir + Dst: + Object: avatar + + # Avatar can move into empty space + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + + # Avatar dies if it hits the spikes + - Src: + Object: avatar + Commands: + - remove: true + - reward: -1 + Dst: + Object: spike + + + - Name: push + InputMapping: + Inputs: + 1: + Description: Push Forwards + OrientationVector: [ 0, -1 ] + VectorToDest: [ 0, -1 ] + Relative: true + Behaviours: + + # Boxes can be pushed by the avatar + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: [blue_box, green_box, red_box] + Commands: + - cascade: _dest + + # Boxes break if they hit the spikes + - Src: + Object: [ blue_box, green_box, red_box ] + Commands: + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: [blue_box, green_box, red_box] + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: blue_box + Commands: + - change_to: blue_block + - reward: 1 + - decr: box_count + Dst: + Object: blue_block + - Src: + Object: red_box + Commands: + - reward: 1 + - change_to: red_block + - decr: box_count + Dst: + Object: red_block + - Src: + Object: green_box + Commands: + - reward: 1 + - change_to: green_block + - decr: box_count + Dst: + Object: green_block + + +Objects: + - Name: avatar + MapCharacter: A + Observers: + Sprite2D: + - Image: gvgai/oryx/knight1.png + Block2D: + - Shape: triangle + Color: [0.0, 1.0, 0.0] + Scale: 0.8 + + - Name: wall + MapCharacter: w + Observers: + Sprite2D: + - TilingMode: WALL_16 + Image: + - oryx/oryx_fantasy/wall1-0.png + - oryx/oryx_fantasy/wall1-1.png + - oryx/oryx_fantasy/wall1-2.png + - oryx/oryx_fantasy/wall1-3.png + - oryx/oryx_fantasy/wall1-4.png + - oryx/oryx_fantasy/wall1-5.png + - oryx/oryx_fantasy/wall1-6.png + - oryx/oryx_fantasy/wall1-7.png + - oryx/oryx_fantasy/wall1-8.png + - oryx/oryx_fantasy/wall1-9.png + - oryx/oryx_fantasy/wall1-10.png + - oryx/oryx_fantasy/wall1-11.png + - oryx/oryx_fantasy/wall1-12.png + - oryx/oryx_fantasy/wall1-13.png + - oryx/oryx_fantasy/wall1-14.png + - oryx/oryx_fantasy/wall1-15.png + Block2D: + - Shape: square + Color: [0.5, 0.5, 0.5] + Scale: 0.9 + + - Name: spike + MapCharacter: h + Observers: + Sprite2D: + - Image: gvgai/oryx/spike2.png + Block2D: + - Shape: triangle + Color: [0.9, 0.1, 0.1] + Scale: 0.5 + + - Name: red_box + MapCharacter: "2" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockR.png + Block2D: + - Shape: square + Color: [0.5, 0.2, 0.2] + Scale: 0.5 + - Name: red_block + MapCharacter: b + Observers: + Sprite2D: + - Image: gvgai/newset/blockR2.png + Block2D: + - Shape: square + Color: [1.0, 0.0, 0.0] + Scale: 1.0 + + - Name: green_box + MapCharacter: "3" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockG.png + Block2D: + - Shape: square + Color: [0.2, 0.5, 0.2] + Scale: 0.5 + - Name: green_block + MapCharacter: c + Observers: + Sprite2D: + - Image: gvgai/newset/blockG2.png + Block2D: + - Shape: square + Color: [0.0, 1.0, 0.0] + Scale: 1.0 + + - Name: blue_box + MapCharacter: "1" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockB.png + Block2D: + - Shape: square + Color: [0.2, 0.2, 0.5] + Scale: 0.5 + - Name: blue_block + MapCharacter: a + Observers: + Sprite2D: + - Image: gvgai/newset/blockB2.png + Block2D: + - Shape: square + Color: [0.0, 0.0, 1.0] + Scale: 1.0 + + - Name: broken_box + Observers: + Sprite2D: + - Image: gvgai/newset/block3.png + Block2D: + - Shape: triangle + Color: [1.0, 0.0, 1.0] + Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_seperate_colors.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_seperate_colors.yaml new file mode 100644 index 000000000..bb173e3bc --- /dev/null +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_seperate_colors.yaml @@ -0,0 +1,409 @@ +Version: "0.1" +Environment: + Name: Partially Observable Clusters + Description: Cluster the coloured objects together by pushing them against the static coloured blocks. + Observers: + Sprite2D: + TileSize: 24 + BackgroundTile: oryx/oryx_fantasy/floor1-2.png + Variables: + - Name: box_count + InitialValue: 0 + Player: + Observer: + RotateWithAvatar: true + TrackAvatar: true + Height: 5 + Width: 5 + OffsetX: 0 + OffsetY: 2 + AvatarObject: avatar # The player can only control a single avatar in the game + Termination: + Win: + - eq: [box_count, 0] + Lose: + - eq: [broken_box:count, 1] + - eq: [avatar:count, 0] + Levels: + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 1 . . . 2 . 2 . w + w . . . . 1 . . . . . . w + w . . . a . . . . . 2 . w + w . . . . . . . h . . . w + w . . . . 1 . . . . b . w + w . . . . . . 1 . . . . w + w . . . . . . . . A . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 . . 2 . c 3 . . w + w . . . . h . . h . . . w + w . . . 2 . . 3 . . 1 . w + w . . . . b . . h . . . w + w . . 3 . . . 2 . . 1 . w + w . . h . h . . . a . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . a . . b . . c . . w + w . . . . . . . . . . . w + w . . . . . . . . . . . w + w h h h h h . h h h h h w + w . . . . h . h . . . . w + w . 1 2 . h . h . 1 3 . w + w . 3 . . . . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . 1 . 2 . . c . . w + w . . . . . 3 . . 3 . . w + w . . a . 2 . . . h . . w + w . . . . h h . 3 . . . w + w . . 1 . . . . . 2 . . w + w . . . . . 1 . . b . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . . . . 1 . . . . w + w . . h . . b . . h . . w + w . . . . 1 . . . . . . w + w . . 3 . . . . 2 . . . w + w . . . a . h . . c . . w + w . . . . 3 . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + +Actions: + + # A simple action to count the number of boxes in the game at the start + # Not currently a way to do complex things in termination conditions like combine multiple conditions + - Name: box_counter + InputMapping: + Internal: true + Inputs: + 1: + Description: "The only action here is to increment the box count" + Behaviours: + - Src: + Object: [blue_box, red_box, green_box] + Commands: + - incr: box_count + Dst: + Object: [blue_box, red_box, green_box] + + # Define the move action + - Name: move + InputMapping: + Inputs: + 1: + Description: Rotate left + OrientationVector: [-1, 0] + 2: + Description: Move forwards + OrientationVector: [0, -1] + VectorToDest: [0, -1] + 3: + Description: Rotate right + OrientationVector: [1, 0] + Relative: true + Behaviours: + + # Avatar rotates + - Src: + Object: avatar + Commands: + - rot: _dir + Dst: + Object: avatar + + # Avatar can move into empty space + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + + + # Avatar dies if it hits the spikes + - Src: + Object: avatar + Commands: + - remove: true + - reward: -1 + Dst: + Object: spike + + + - Name: push_blue + InputMapping: + Inputs: + 1: + Description: Push Blue + OrientationVector: [ 0, -1 ] + VectorToDest: [ 0, -1 ] + Relative: true + Behaviours: + + # Boxes can be pushed by the avatar + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: blue_box + Commands: + - cascade: _dest + + # Boxes break if they are pushed into the spikes + - Src: + Object: blue_box + Commands: + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: blue_box + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: blue_box + Commands: + - change_to: blue_block + - reward: 1 + - decr: box_count + Dst: + Object: blue_block + + - Name: push_red + InputMapping: + Inputs: + 1: + Description: Push Red + OrientationVector: [ 0, -1 ] + VectorToDest: [ 0, -1 ] + Relative: true + Behaviours: + + # Boxes can be pushed by the avatar + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: red_box + Commands: + - cascade: _dest + + # Boxes break if they are pushed into the spikes + - Src: + Object: red_box + Commands: + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: red_box + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: red_box + Commands: + - reward: 1 + - change_to: red_block + - decr: box_count + Dst: + Object: red_block + + - Name: push_green + InputMapping: + Inputs: + 1: + Description: Push Green + OrientationVector: [ 0, -1 ] + VectorToDest: [ 0, -1 ] + Relative: true + Behaviours: + + # Boxes can be pushed by the avatar + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: green_box + Commands: + - cascade: _dest + + # Boxes break if they are pushed into the spikes + - Src: + Object: green_box + Commands: + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: green_box + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: green_box + Commands: + - reward: 1 + - change_to: green_block + - decr: box_count + Dst: + Object: green_block + + +Objects: + - Name: avatar + MapCharacter: A + Observers: + Sprite2D: + - Image: gvgai/oryx/knight1.png + Block2D: + - Shape: triangle + Color: [0.0, 1.0, 0.0] + Scale: 0.8 + + - Name: wall + MapCharacter: w + Observers: + Sprite2D: + - TilingMode: WALL_16 + Image: + - oryx/oryx_fantasy/wall1-0.png + - oryx/oryx_fantasy/wall1-1.png + - oryx/oryx_fantasy/wall1-2.png + - oryx/oryx_fantasy/wall1-3.png + - oryx/oryx_fantasy/wall1-4.png + - oryx/oryx_fantasy/wall1-5.png + - oryx/oryx_fantasy/wall1-6.png + - oryx/oryx_fantasy/wall1-7.png + - oryx/oryx_fantasy/wall1-8.png + - oryx/oryx_fantasy/wall1-9.png + - oryx/oryx_fantasy/wall1-10.png + - oryx/oryx_fantasy/wall1-11.png + - oryx/oryx_fantasy/wall1-12.png + - oryx/oryx_fantasy/wall1-13.png + - oryx/oryx_fantasy/wall1-14.png + - oryx/oryx_fantasy/wall1-15.png + Block2D: + - Shape: square + Color: [0.5, 0.5, 0.5] + Scale: 0.9 + + - Name: spike + MapCharacter: h + Observers: + Sprite2D: + - Image: gvgai/oryx/spike2.png + Block2D: + - Shape: triangle + Color: [0.9, 0.1, 0.1] + Scale: 0.5 + + - Name: red_box + MapCharacter: "2" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockR.png + Block2D: + - Shape: square + Color: [0.5, 0.2, 0.2] + Scale: 0.5 + - Name: red_block + MapCharacter: b + Observers: + Sprite2D: + - Image: gvgai/newset/blockR2.png + Block2D: + - Shape: square + Color: [1.0, 0.0, 0.0] + Scale: 1.0 + + - Name: green_box + MapCharacter: "3" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockG.png + Block2D: + - Shape: square + Color: [0.2, 0.5, 0.2] + Scale: 0.5 + - Name: green_block + MapCharacter: c + Observers: + Sprite2D: + - Image: gvgai/newset/blockG2.png + Block2D: + - Shape: square + Color: [0.0, 1.0, 0.0] + Scale: 1.0 + + - Name: blue_box + MapCharacter: "1" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockB.png + Block2D: + - Shape: square + Color: [0.2, 0.2, 0.5] + Scale: 0.5 + - Name: blue_block + MapCharacter: a + Observers: + Sprite2D: + - Image: gvgai/newset/blockB2.png + Block2D: + - Shape: square + Color: [0.0, 0.0, 1.0] + Scale: 1.0 + + - Name: broken_box + Observers: + Sprite2D: + - Image: gvgai/newset/block3.png + Block2D: + - Shape: triangle + Color: [1.0, 0.0, 1.0] + Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py new file mode 100644 index 000000000..789bc4c6c --- /dev/null +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -0,0 +1,81 @@ +import os +import sys + +import ray +from ray import tune +from ray.rllib.models import ModelCatalog +from ray.tune.integration.wandb import WandbLoggerCallback +from ray.tune.registry import register_env + +from griddly import gd +from griddly.util.rllib.env.core import RLlibEnv +# from griddly.util.rllib.callbacks import GriddlyCallbacks +from griddly.util.rllib.torch import GAPAgent +from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ + ConditionalActionImpalaTrainer + +if __name__ == '__main__': + sep = os.pathsep + os.environ['PYTHONPATH'] = sep.join(sys.path) + + yaml_file = os.path.realpath('clusters_po_with_push_seperate_colors.yaml') + + ray.init(num_gpus=1) + # ray.init(num_gpus=1, local_mode=True) + + env_name = "ray-griddly-env" + + register_env(env_name, RLlibEnv) + ModelCatalog.register_custom_model("GAP", GAPAgent) + + wandbLoggerCallback = WandbLoggerCallback( + project='conditional_actions', + api_key_file='~/.wandb_rc' + ) + + max_training_steps = 5000000 + + config = { + 'framework': 'torch', + 'num_workers': 4, + 'num_envs_per_worker': 4, + + # 'callbacks': GriddlyCallbacks, + + 'model': { + 'custom_model': 'GAP', + 'custom_model_config': {} + }, + 'env': env_name, + 'env_config': { + 'record_video_config': { + 'frequency': 100000, + 'directory': 'videos' + }, + + 'allow_nop': tune.grid_search([True, False]), + 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + # 'invalid_action_masking': 'collapsed', + # 'allow_nop': False, + 'generate_valid_action_trees': tune.grid_search([True, False]), + 'random_level_on_reset': True, + 'yaml_file': yaml_file, + 'global_observer_type': gd.ObserverType.SPRITE_2D, + 'max_steps': 1000, + }, + 'entropy_coeff_schedule': [ + [0, 0.01], + [max_training_steps, 0.0] + ], + 'lr_schedule': [ + [0, 0.0005], + [max_training_steps, 0.0] + ], + + } + + stop = { + "timesteps_total": max_training_steps, + } + + result = tune.run(ConditionalActionImpalaTrainer, config=config, stop=stop, callbacks=[wandbLoggerCallback]) diff --git a/python/examples/rllib/rllib_single_agent.py b/python/examples/rllib/rllib_single_agent.py index eda9d41e3..de96b6589 100644 --- a/python/examples/rllib/rllib_single_agent.py +++ b/python/examples/rllib/rllib_single_agent.py @@ -49,7 +49,7 @@ [max_training_steps, 0.0] ], 'lr_schedule': [ - [0, 0.005], + [0, 0.0005], [max_training_steps, 0.0] ] } diff --git a/python/examples/rllib/rllib_single_agent_conditional_actions.py b/python/examples/rllib/rllib_single_agent_conditional_actions.py index 8e1920751..a7d0b4a61 100644 --- a/python/examples/rllib/rllib_single_agent_conditional_actions.py +++ b/python/examples/rllib/rllib_single_agent_conditional_actions.py @@ -16,18 +16,19 @@ os.environ['PYTHONPATH'] = sep.join(sys.path) ray.init(num_gpus=1) + #ray.init(num_gpus=1, local_mode=True) env_name = "ray-griddly-env" register_env(env_name, RLlibEnv) ModelCatalog.register_custom_model("GAP", GAPAgent) - max_training_steps = 5000000 + max_training_steps = 2000000 config = { 'framework': 'torch', - 'num_workers': 6, - 'num_envs_per_worker': 2, + 'num_workers': 8, + 'num_envs_per_worker': 4, 'model': { 'custom_model': 'GAP', @@ -39,21 +40,22 @@ 'frequency': 100000 }, - 'invalid_action_masking': tune.grid_search([True, False]), - 'generate_valid_action_trees': tune.grid_search([True, False]), + 'allow_nop': tune.grid_search([True, False]), + 'invalid_action_masking': tune.grid_search(['none', 'conditional']), + 'generate_valid_action_trees': True, 'random_level_on_reset': True, 'yaml_file': 'Single-Player/GVGAI/clusters_partially_observable.yaml', 'global_observer_type': gd.ObserverType.SPRITE_2D, 'max_steps': 1000, }, - 'entropy_coeff_schedule': [ - [0, 0.01], - [max_training_steps, 0.0] - ], - 'lr_schedule': [ - [0, 0.005], - [max_training_steps, 0.0] - ], + #'entropy_coeff_schedule': [ + # [0, 0.01], + # [max_training_steps, 0.0] + #], + #'lr_schedule': [ + # [0, 0.005], + # [max_training_steps, 0.0] + #], } diff --git a/python/griddly/GymWrapper.py b/python/griddly/GymWrapper.py index 1879b82b2..d4922f659 100644 --- a/python/griddly/GymWrapper.py +++ b/python/griddly/GymWrapper.py @@ -39,6 +39,7 @@ def __init__(self, yaml_file=None, level=0, global_observer_type=gd.ObserverType if level is not None: self.game.load_level(level) + self.level_id = level # if we are loading a copy of the game elif gdy is not None and game is not None: @@ -150,8 +151,10 @@ def reset(self, level_id=None, level_string=None, global_observations=False): if level_string is not None: self.game.load_level_string(level_string) + self.level_id = 'custom' elif level_id is not None: self.game.load_level(level_id) + self.level_id = level_id self.game.reset() diff --git a/python/griddly/RenderTools.py b/python/griddly/RenderTools.py index 6fe4714bd..73029561d 100644 --- a/python/griddly/RenderTools.py +++ b/python/griddly/RenderTools.py @@ -83,6 +83,7 @@ def start(self, output_file, observation_shape, fps=30): :param fps: :return: """ + self.output_file = output_file self._image_encoder = ImageEncoder(output_file, observation_shape, fps, fps) def add_frame(self, observation): diff --git a/python/griddly/util/rllib/callbacks.py b/python/griddly/util/rllib/callbacks.py new file mode 100644 index 000000000..b1c0587f1 --- /dev/null +++ b/python/griddly/util/rllib/callbacks.py @@ -0,0 +1,58 @@ +from typing import Optional, Dict + +from ray.rllib import Policy, SampleBatch, BaseEnv +from ray.rllib.agents.callbacks import DefaultCallbacks +from ray.rllib.evaluation import MultiAgentEpisode +from ray.rllib.utils.typing import AgentID, PolicyID + + +# from wandb import Video + + +class GriddlyCallbacks(DefaultCallbacks): + + def __init__(self, legacy_callbacks_dict: Dict[str, callable] = None): + super().__init__(legacy_callbacks_dict) + + # self._videos = {} + + def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], + episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: + super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, + env_index=env_index, **kwargs) + + def on_episode_step(self, *, worker: "RolloutWorker", base_env: BaseEnv, episode: MultiAgentEpisode, + env_index: Optional[int] = None, **kwargs) -> None: + super().on_episode_step(worker=worker, base_env=base_env, episode=episode, env_index=env_index, **kwargs) + + def on_episode_end(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], + episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: + super().on_episode_end(worker=worker, base_env=base_env, policies=policies, episode=episode, + env_index=env_index, **kwargs) + # if not worker.multiagent: + # info = episode.last_info_for() + # if 'video' in info: + # video_info = info['video'] + # self._videos[video_info['level']] = video_info['path'] + + def on_postprocess_trajectory(self, *, worker: "RolloutWorker", episode: MultiAgentEpisode, agent_id: AgentID, + policy_id: PolicyID, policies: Dict[PolicyID, Policy], + postprocessed_batch: SampleBatch, original_batches: Dict[AgentID, SampleBatch], + **kwargs) -> None: + super().on_postprocess_trajectory(worker=worker, episode=episode, agent_id=agent_id, policy_id=policy_id, + policies=policies, postprocessed_batch=postprocessed_batch, + original_batches=original_batches, **kwargs) + + def on_sample_end(self, *, worker: "RolloutWorker", samples: SampleBatch, **kwargs) -> None: + super().on_sample_end(worker=worker, samples=samples, **kwargs) + + def on_learn_on_batch(self, *, policy: Policy, train_batch: SampleBatch, **kwargs) -> None: + # TODO: extract any video from infos when this API is updated + super().on_learn_on_batch(policy=policy, train_batch=train_batch, **kwargs) + + def on_train_result(self, *, trainer, result: dict, **kwargs) -> None: + super().on_train_result(trainer=trainer, result=result, **kwargs) + + # for level, path in self._videos.items(): + # result[f'level_{level}'] = Video(path) + # del self._videos[level] diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py index 4f6a513d0..be92f86a1 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -1,12 +1,13 @@ +from collections import defaultdict + import torch from gym.spaces import Discrete, MultiDiscrete -from ray.rllib.models.torch.torch_action_dist import TorchCategorical, TorchMultiCategorical from torch.distributions import Categorical import numpy as np class TorchConditionalMaskingExploration(): - def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invalid_action_masking=False): + def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invalid_action_masking='none', allow_nop=False): self._valid_action_trees = valid_action_trees self._num_inputs = dist_inputs.shape[0] @@ -19,25 +20,68 @@ def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invali self._num_action_parts = len(self._action_space_shape) self._invalid_action_masking = invalid_action_masking + self._allow_nop = allow_nop self._explore = explore self._inputs_split = dist_inputs.split(tuple(self._action_space_shape), dim=1) - def _mask_and_sample(self, options, logits): + def _mask_and_sample(self, options, logits, is_parameters=False): - #if self._invalid_action_masking: mask = torch.zeros([logits.shape[0]]).to(logits.device) mask[options] = 1 - logits += torch.log(mask) - #else: - # mask = torch.ones([logits.shape[0]]) - dist = Categorical(logits=logits) + if is_parameters: + if not self._allow_nop and len(options) > 1: + mask[0] = 0 + + masked_logits = logits + torch.log(mask) + + dist = Categorical(logits=masked_logits) sampled = dist.sample() - logp = dist.log_prob(sampled) - return sampled, logits, logp, mask + if self._invalid_action_masking != 'none': + logp = dist.log_prob(sampled) + out_logits = masked_logits + else: + mask = torch.ones([logits.shape[0]]) + dist = Categorical(logits=logits) + logp = dist.log_prob(sampled) + out_logits = logits + + + return sampled, out_logits, logp, mask + + def _merge_all_branches(self, tree): + all_nodes = {} + merged_tree = {} + for k, v in tree.items(): + v = self._merge_all_branches(v) + all_nodes.update(v) + + for k in tree.keys(): + merged_tree[k] = all_nodes + + return merged_tree + + def _process_valid_action_tree(self, valid_action_tree): + subtree = valid_action_tree + subtree_options = list(subtree.keys()) + + # In the case there are no available actions for the player + if len(subtree_options) == 0: + build_tree = subtree + for _ in range(self._num_action_parts): + build_tree[0] = {} + build_tree = build_tree[0] + subtree_options = list(subtree.keys()) + + # If we want very basic action masking where parameterized masks are superimposed we use this + if self._invalid_action_masking == 'collapsed': + subtree = self._merge_all_branches(valid_action_tree) + subtree_options = list(subtree.keys()) + + return subtree, subtree_options def get_actions_and_mask(self): @@ -51,22 +95,15 @@ def get_actions_and_mask(self): for i in range(self._num_inputs): if len(self._valid_action_trees) >= 1: - subtree = self._valid_action_trees[i] - subtree_options = list(subtree.keys()) - - # In the case there are no available actions for the player - if len(subtree_options) == 0: - build_tree = subtree - for _ in range(self._num_action_parts): - build_tree[0] = {} - build_tree = build_tree[0] - subtree_options = list(subtree.keys()) + subtree, subtree_options = self._process_valid_action_tree(self._valid_action_trees[i]) logp_parts = torch.zeros([self._num_action_parts]) mask_offset = 0 for a in range(self._num_action_parts): + dist_part = self._inputs_split[a] - sampled, masked_part_logits, logp, mask_part = self._mask_and_sample(subtree_options, dist_part[i]) + is_parameters = a==(self._num_action_parts-1) + sampled, masked_part_logits, logp, mask_part = self._mask_and_sample(subtree_options, dist_part[i], is_parameters) # Set the action and the mask for each part of the action actions[i, a] = sampled diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py index 199d3f825..5a3c09277 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py @@ -39,31 +39,46 @@ def compute_actions_from_input_dict( dist_inputs, state_out = self.model(input_dict, state_batches, seq_lens) - infos = input_dict[SampleBatch.INFOS] if SampleBatch.INFOS in input_dict else {} - - valid_action_trees = [] - for info in infos: - if isinstance(info, dict) and 'valid_action_tree' in info: - valid_action_trees.append(info['valid_action_tree']) - else: - valid_action_trees.append({}) - - invalid_action_masking = self.config["env_config"].get("invalid_action_masking", False) - - exploration = TorchConditionalMaskingExploration( - self.model, - dist_inputs, - valid_action_trees, - explore, - invalid_action_masking, - ) - - actions, masked_logits, logp, mask = exploration.get_actions_and_mask() + generate_valid_action_trees = self.config['env_config'].get('generate_valid_action_trees', False) + invalid_action_masking = self.config["env_config"].get("invalid_action_masking", 'none') + allow_nop = self.config["env_config"].get("allow_nop", False) + + if generate_valid_action_trees: + infos = input_dict[SampleBatch.INFOS] if SampleBatch.INFOS in input_dict else {} + + valid_action_trees = [] + for info in infos: + if isinstance(info, dict) and 'valid_action_tree' in info: + valid_action_trees.append(info['valid_action_tree']) + else: + valid_action_trees.append({}) + + exploration = TorchConditionalMaskingExploration( + self.model, + dist_inputs, + valid_action_trees, + explore, + invalid_action_masking, + allow_nop + ) + + actions, masked_logits, logp, mask = exploration.get_actions_and_mask() + else: + action_dist = self.dist_class(dist_inputs, self.model) + + # Get the exploration action from the forward results. + actions, logp = \ + self.exploration.get_exploration_action( + action_distribution=action_dist, + timestep=timestep, + explore=explore) + + masked_logits = dist_inputs input_dict[SampleBatch.ACTIONS] = actions extra_fetches = { - SampleBatch.ACTION_DIST_INPUTS: dist_inputs, + SampleBatch.ACTION_DIST_INPUTS: masked_logits, SampleBatch.ACTION_PROB: torch.exp(logp.float()), SampleBatch.ACTION_LOGP: logp } diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py index 441da7d74..20d82ead6 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py @@ -1,5 +1,4 @@ -from ray.rllib.agents import with_common_config -from ray.rllib.agents.impala import ImpalaTrainer, DEFAULT_CONFIG as IMPALA_CONFIG +from ray.rllib.agents.impala import ImpalaTrainer from ray.rllib.agents.impala.vtrace_torch_policy import VTraceTorchPolicy from ray.rllib.policy.torch_policy import LearningRateSchedule, EntropyCoeffSchedule From 684be8b235c4d6ff486df003fc3655fa78ba276c Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 16 Mar 2021 15:20:44 +0000 Subject: [PATCH 04/45] add template arguments for centos build --- bindings/wrapper/GameWrapper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/wrapper/GameWrapper.cpp b/bindings/wrapper/GameWrapper.cpp index 39ff52cd5..4a74ca412 100644 --- a/bindings/wrapper/GameWrapper.cpp +++ b/bindings/wrapper/GameWrapper.cpp @@ -380,8 +380,8 @@ class Py_GameWrapper { py_event["SourceObjectPlayerId"] = historyEvent.sourceObjectPlayerId; py_event["DestinationObjectPlayerId"] = historyEvent.destinationObjectPlayerId; - py_event["SourceLocation"] = std::array{historyEvent.sourceLocation.x, historyEvent.sourceLocation.y}; - py_event["DestinationLocation"] = std::array{historyEvent.destLocation.x, historyEvent.destLocation.y}; + py_event["SourceLocation"] = std::array{historyEvent.sourceLocation.x, historyEvent.sourceLocation.y}; + py_event["DestinationLocation"] = std::array{historyEvent.destLocation.x, historyEvent.destLocation.y}; py_events.push_back(py_event); } From 71168fb92bff15bce6ea4bae8dc201084ecc88da Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 16 Mar 2021 19:36:50 +0000 Subject: [PATCH 05/45] shinking networks a bit for simplicity --- .../rllib_baseline.py | 84 +++++++++++++++++++ .../rllib_conditional_actions.py | 2 +- .../util/rllib/torch/agents/conv_agent.py | 9 +- .../agents/global_average_pooling_agent.py | 9 +- 4 files changed, 89 insertions(+), 15 deletions(-) create mode 100644 python/examples/experiments/conditional_action_spaces/rllib_baseline.py diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py new file mode 100644 index 000000000..9b7aa7a98 --- /dev/null +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -0,0 +1,84 @@ +import os +import sys + +import ray +from ray import tune +from ray.rllib.models import ModelCatalog +from ray.tune.integration.wandb import WandbLoggerCallback +from ray.tune.registry import register_env + +from griddly import gd +from griddly.util.rllib.env.core import RLlibEnv +from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent +# from griddly.util.rllib.callbacks import GriddlyCallbacks +from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ + ConditionalActionImpalaTrainer + +if __name__ == '__main__': + sep = os.pathsep + os.environ['PYTHONPATH'] = sep.join(sys.path) + + yaml_files = [ + os.path.realpath('clusters_po.yaml'), + os.path.realpath('clusters_po_with_push.yaml'), + os.path.realpath('clusters_po_with_push_seperate_colors.yaml') + ] + + + ray.init(num_gpus=1) + # ray.init(num_gpus=1, local_mode=True) + + env_name = "ray-griddly-env" + + register_env(env_name, RLlibEnv) + ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) + + wandbLoggerCallback = WandbLoggerCallback( + project='conditional_actions', + group='baseline', + api_key_file='~/.wandb_rc' + ) + + max_training_steps = 5000000 + + config = { + 'framework': 'torch', + 'num_workers': 8, + 'num_envs_per_worker': 4, + + # 'callbacks': GriddlyCallbacks, + + 'model': { + 'custom_model': 'SimpleConv', + 'custom_model_config': {} + }, + 'env': env_name, + 'env_config': { + 'record_video_config': { + 'frequency': 100000, + 'directory': 'baseline_videos' + }, + + # Put this here so it shows up in wandb + 'generate_valid_action_trees': False, + 'random_level_on_reset': True, + 'yaml_file': tune.grid_search(yaml_files), + 'global_observer_type': gd.ObserverType.SPRITE_2D, + 'max_steps': 1000, + }, + 'entropy_coeff_schedule': [ + [0, 0.01], + [max_training_steps, 0.0] + ], + 'lr_schedule': [ + [0, 0.0005], + [max_training_steps, 0.0] + ], + + } + + stop = { + "timesteps_total": max_training_steps, + } + + result = tune.run(ConditionalActionImpalaTrainer, config=config, stop=stop, callbacks=[wandbLoggerCallback]) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 789bc4c6c..79f5a0b85 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -18,7 +18,7 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - yaml_file = os.path.realpath('clusters_po_with_push_seperate_colors.yaml') + yaml_files = os.path.realpath('clusters_po_with_push_seperate_colors.yaml') ray.init(num_gpus=1) # ray.init(num_gpus=1, local_mode=True) diff --git a/python/griddly/util/rllib/torch/agents/conv_agent.py b/python/griddly/util/rllib/torch/agents/conv_agent.py index 52dccf80a..ff1ddd9b4 100644 --- a/python/griddly/util/rllib/torch/agents/conv_agent.py +++ b/python/griddly/util/rllib/torch/agents/conv_agent.py @@ -23,22 +23,17 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name): nn.ReLU(), layer_init(nn.Conv2d(32, 64, 3, padding=1)), nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), nn.Flatten(), layer_init(nn.Linear(linear_flatten, 1024)), nn.ReLU(), layer_init(nn.Linear(1024, 512)), nn.ReLU(), - layer_init(nn.Linear(512, 512)) ) self._actor_head = nn.Sequential( - layer_init(nn.Linear(512, 512), std=0.01), + layer_init(nn.Linear(512, 256), std=0.01), nn.ReLU(), - layer_init(nn.Linear(512, self._num_actions), std=0.01) + layer_init(nn.Linear(256, self._num_actions), std=0.01) ) self._critic_head = nn.Sequential( diff --git a/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py b/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py index d2249abcb..8fab31c26 100644 --- a/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py +++ b/python/griddly/util/rllib/torch/agents/global_average_pooling_agent.py @@ -41,22 +41,17 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name): nn.ReLU(), layer_init(nn.Conv2d(32, 64, 3, padding=1)), nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), GlobalAvePool(2048), layer_init(nn.Linear(2048, 1024)), nn.ReLU(), layer_init(nn.Linear(1024, 512)), nn.ReLU(), - layer_init(nn.Linear(512, 512)) ) self._actor_head = nn.Sequential( - layer_init(nn.Linear(512, 512), std=0.01), + layer_init(nn.Linear(512, 256), std=0.01), nn.ReLU(), - layer_init(nn.Linear(512, self._num_actions), std=0.01) + layer_init(nn.Linear(256, self._num_actions), std=0.01) ) self._critic_head = nn.Sequential( From 923a4893d26d586a1bd7a22186f59b22d21fe445 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 18 Mar 2021 08:12:26 +0000 Subject: [PATCH 06/45] some work on fixes for videos --- python/.gitignore | 3 + .../rllib_baseline.py | 8 +- .../rllib_conditional_actions.py | 30 +-- python/examples/rllib/rllib_multi_agent.py | 2 +- python/examples/rllib/rllib_single_agent.py | 2 +- .../rllib_single_agent_conditional_actions.py | 2 +- python/griddly/RenderTools.py | 1 + python/griddly/util/rllib/callbacks.py | 36 +-- .../util/rllib/environment/__init__.py | 0 python/griddly/util/rllib/environment/core.py | 239 ++++++++++++++++++ .../conditional_action_mixin.py | 1 + 11 files changed, 286 insertions(+), 38 deletions(-) create mode 100644 python/griddly/util/rllib/environment/__init__.py create mode 100644 python/griddly/util/rllib/environment/core.py diff --git a/python/.gitignore b/python/.gitignore index bfed1620f..5391f8bed 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -143,3 +143,6 @@ griddly/resources/ # Hacky stuff scratchpad/ + +# Video folders +.video/ diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py index 9b7aa7a98..f023a7b5f 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -8,7 +8,8 @@ from ray.tune.registry import register_env from griddly import gd -from griddly.util.rllib.env.core import RLlibEnv +from griddly.util.rllib.environment.core import RLlibEnv +from griddly.util.rllib.torch import GAPAgent from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent # from griddly.util.rllib.callbacks import GriddlyCallbacks from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ @@ -32,6 +33,7 @@ register_env(env_name, RLlibEnv) ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) + ModelCatalog.register_custom_model("GAP", GAPAgent) wandbLoggerCallback = WandbLoggerCallback( project='conditional_actions', @@ -39,7 +41,7 @@ api_key_file='~/.wandb_rc' ) - max_training_steps = 5000000 + max_training_steps = 20000000 config = { 'framework': 'torch', @@ -49,7 +51,7 @@ # 'callbacks': GriddlyCallbacks, 'model': { - 'custom_model': 'SimpleConv', + 'custom_model': tune.grid_search(['SimpleConv', 'GAP']), 'custom_model_config': {} }, 'env': env_name, diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 79f5a0b85..62d355312 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -8,9 +8,9 @@ from ray.tune.registry import register_env from griddly import gd -from griddly.util.rllib.env.core import RLlibEnv -# from griddly.util.rllib.callbacks import GriddlyCallbacks -from griddly.util.rllib.torch import GAPAgent +from griddly.util.rllib.callbacks import GriddlyCallbacks +from griddly.util.rllib.environment.core import RLlibEnv +from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ ConditionalActionImpalaTrainer @@ -18,46 +18,46 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - yaml_files = os.path.realpath('clusters_po_with_push_seperate_colors.yaml') + yaml_file = os.path.realpath('clusters_po.yaml') ray.init(num_gpus=1) - # ray.init(num_gpus=1, local_mode=True) + #ray.init(num_gpus=1, local_mode=True) env_name = "ray-griddly-env" register_env(env_name, RLlibEnv) - ModelCatalog.register_custom_model("GAP", GAPAgent) + ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) wandbLoggerCallback = WandbLoggerCallback( project='conditional_actions', api_key_file='~/.wandb_rc' ) - max_training_steps = 5000000 + max_training_steps = 1000000 config = { 'framework': 'torch', - 'num_workers': 4, - 'num_envs_per_worker': 4, + 'num_workers': 1, + 'num_envs_per_worker': 1, - # 'callbacks': GriddlyCallbacks, + 'callbacks': GriddlyCallbacks, 'model': { - 'custom_model': 'GAP', + 'custom_model': 'SimpleConv', 'custom_model_config': {} }, 'env': env_name, 'env_config': { 'record_video_config': { - 'frequency': 100000, + 'frequency': 1000, 'directory': 'videos' }, - 'allow_nop': tune.grid_search([True, False]), - 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + #'allow_nop': tune.grid_search([True, False]), + #'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), # 'invalid_action_masking': 'collapsed', # 'allow_nop': False, - 'generate_valid_action_trees': tune.grid_search([True, False]), + 'generate_valid_action_trees': True, 'random_level_on_reset': True, 'yaml_file': yaml_file, 'global_observer_type': gd.ObserverType.SPRITE_2D, diff --git a/python/examples/rllib/rllib_multi_agent.py b/python/examples/rllib/rllib_multi_agent.py index 1560a127f..713910dbc 100644 --- a/python/examples/rllib/rllib_multi_agent.py +++ b/python/examples/rllib/rllib_multi_agent.py @@ -10,7 +10,7 @@ from griddly import gd from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent -from griddly.util.rllib.env.core import RLlibMultiAgentWrapper, RLlibEnv +from griddly.util.rllib.environment.core import RLlibMultiAgentWrapper, RLlibEnv if __name__ == '__main__': sep = os.pathsep diff --git a/python/examples/rllib/rllib_single_agent.py b/python/examples/rllib/rllib_single_agent.py index de96b6589..c69697a81 100644 --- a/python/examples/rllib/rllib_single_agent.py +++ b/python/examples/rllib/rllib_single_agent.py @@ -9,7 +9,7 @@ from griddly import gd from griddly.util.rllib.torch import GAPAgent -from griddly.util.rllib.env.core import RLlibEnv +from griddly.util.rllib.environment.core import RLlibEnv if __name__ == '__main__': sep = os.pathsep diff --git a/python/examples/rllib/rllib_single_agent_conditional_actions.py b/python/examples/rllib/rllib_single_agent_conditional_actions.py index a7d0b4a61..1075d8f1b 100644 --- a/python/examples/rllib/rllib_single_agent_conditional_actions.py +++ b/python/examples/rllib/rllib_single_agent_conditional_actions.py @@ -9,7 +9,7 @@ from griddly import gd from griddly.util.rllib.torch import GAPAgent from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import ConditionalActionImpalaTrainer -from griddly.util.rllib.env.core import RLlibEnv +from griddly.util.rllib.environment.core import RLlibEnv if __name__ == '__main__': sep = os.pathsep diff --git a/python/griddly/RenderTools.py b/python/griddly/RenderTools.py index 73029561d..b95386afd 100644 --- a/python/griddly/RenderTools.py +++ b/python/griddly/RenderTools.py @@ -1,5 +1,6 @@ import imageio from gym.wrappers.monitoring.video_recorder import ImageEncoder +import os class RenderWindow(): diff --git a/python/griddly/util/rllib/callbacks.py b/python/griddly/util/rllib/callbacks.py index b1c0587f1..ab7ce09e0 100644 --- a/python/griddly/util/rllib/callbacks.py +++ b/python/griddly/util/rllib/callbacks.py @@ -4,9 +4,7 @@ from ray.rllib.agents.callbacks import DefaultCallbacks from ray.rllib.evaluation import MultiAgentEpisode from ray.rllib.utils.typing import AgentID, PolicyID - - -# from wandb import Video +from wandb import Video class GriddlyCallbacks(DefaultCallbacks): @@ -14,8 +12,6 @@ class GriddlyCallbacks(DefaultCallbacks): def __init__(self, legacy_callbacks_dict: Dict[str, callable] = None): super().__init__(legacy_callbacks_dict) - # self._videos = {} - def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, @@ -29,11 +25,14 @@ def on_episode_end(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: super().on_episode_end(worker=worker, base_env=base_env, policies=policies, episode=episode, env_index=env_index, **kwargs) - # if not worker.multiagent: - # info = episode.last_info_for() - # if 'video' in info: - # video_info = info['video'] - # self._videos[video_info['level']] = video_info['path'] + if not worker.multiagent: + info = episode.last_info_for() + if 'video' in info: + level = info['video']['level'] + path = info['video']['path'] + print(f'creating video with path: {path}') + episode.media['video_test'] = 'here is some test data' + episode.media[f'level_{level}'] = Video(path) def on_postprocess_trajectory(self, *, worker: "RolloutWorker", episode: MultiAgentEpisode, agent_id: AgentID, policy_id: PolicyID, policies: Dict[PolicyID, Policy], @@ -46,13 +45,16 @@ def on_postprocess_trajectory(self, *, worker: "RolloutWorker", episode: MultiAg def on_sample_end(self, *, worker: "RolloutWorker", samples: SampleBatch, **kwargs) -> None: super().on_sample_end(worker=worker, samples=samples, **kwargs) - def on_learn_on_batch(self, *, policy: Policy, train_batch: SampleBatch, **kwargs) -> None: - # TODO: extract any video from infos when this API is updated - super().on_learn_on_batch(policy=policy, train_batch=train_batch, **kwargs) + def on_learn_on_batch(self, *, policy: Policy, train_batch: SampleBatch, result: dict, **kwargs) -> None: + pass + # Loop through the 'info' keys looking for 'video' + # for info_dict in train_batch[SampleBatch.INFOS]: + # if 'video' in info_dict: + # level = info_dict['video']['level'] + # path = info_dict['video']['path'] + # print(f'creating video with path: {path}') + # result['video_test'] = 1 + # result[f'level_{level}'] = Video(path) def on_train_result(self, *, trainer, result: dict, **kwargs) -> None: super().on_train_result(trainer=trainer, result=result, **kwargs) - - # for level, path in self._videos.items(): - # result[f'level_{level}'] = Video(path) - # del self._videos[level] diff --git a/python/griddly/util/rllib/environment/__init__.py b/python/griddly/util/rllib/environment/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/griddly/util/rllib/environment/core.py b/python/griddly/util/rllib/environment/core.py new file mode 100644 index 000000000..097f6c7b0 --- /dev/null +++ b/python/griddly/util/rllib/environment/core.py @@ -0,0 +1,239 @@ +import os +from collections import defaultdict +from enum import Enum +from uuid import uuid1 + +import gym +import numpy as np +from ray.rllib import MultiAgentEnv +from ray.rllib.utils.typing import MultiAgentDict + +from griddly import GymWrapper +from griddly.RenderTools import VideoRecorder + + +class RecordingState(Enum): + NOT_RECORDING = 1 + WAITING_FOR_EPISODE_START = 2 + BEFORE_RECORDING = 3 + RECORDING = 4 + + +class RLlibEnv(GymWrapper): + """ + Wraps a Griddly environment for compatibility with RLLib. + + Use the `env_config` in the rllib config to provide Griddly Environment Parameters + + Example: + + Firstly register the RLlibWrapper using rllib's + + env_name = "my_env_name" + + register_env(env_name, RLlibWrapper) + + you can then configure it + + rllib_config = { + 'env_config': { + 'yaml_file': 'Single-Player/GVGAI/butterflies.yaml', + 'level": 6, + 'player_observer_type': gd.ObserverType.SPRITE_2D, + 'global_observer_type': gd.ObserverType.ISOMETRIC, + 'max_steps': 1000, + }, + # Other configuration options + } + + Create the rllib trainer using this config: + + trainer = ImpalaTrainer(rllib_config, env=env_name) + + """ + + def __init__(self, env_config): + super().__init__(**env_config) + + self.generate_valid_action_trees = env_config.get('generate_valid_action_trees', False) + self._record_video_config = env_config.get('record_video_config', None) + self._random_level_on_reset = env_config.get('random_level_on_reset', False) + + super().reset() + + self._recording_state = None + self._env_steps = 0 + + if self._record_video_config is not None: + self._recording_state = RecordingState.BEFORE_RECORDING + self._video_frequency = self._record_video_config.get('frequency', 1000) + self._video_directory = os.path.realpath(self._record_video_config.get('directory', '.')) + os.makedirs(self._video_directory, exist_ok=True) + + self.set_transform() + + def _transform(self, observation): + + if self.player_count > 1: + transformed_obs = [obs.transpose(1, 2, 0).astype(np.float) for obs in observation] + else: + transformed_obs = observation.transpose(1, 2, 0).astype(np.float) + + return transformed_obs + + def _after_step(self, observation, reward, done, info): + extra_info = {} + if self._recording_state is not None: + if self._recording_state is RecordingState.NOT_RECORDING and self._env_steps % self._video_frequency == 0: + self._recording_state = RecordingState.WAITING_FOR_EPISODE_START + + if self._recording_state == RecordingState.BEFORE_RECORDING: + global_obs = self.render(observer='global', mode='rgb_array') + self._global_recorder = VideoRecorder() + + video_filename = os.path.join( + self._video_directory, + f'global_video_{uuid1()}_{self.level_id}_{self._env_steps}.mp4' + ) + + self._global_recorder.start(video_filename, global_obs.shape) + self._recording_state = RecordingState.RECORDING + + if self._recording_state == RecordingState.RECORDING: + global_obs = self.render(observer='global', mode='rgb_array') + self._global_recorder.add_frame(global_obs) + if done: + self._recording_state = RecordingState.NOT_RECORDING + self._global_recorder.close() + + print(f'finished recording {self._global_recorder.output_file}') + + extra_info['video'] = { + 'level': self.level_id, + 'path': self._global_recorder.output_file + } + + if self._recording_state == RecordingState.WAITING_FOR_EPISODE_START: + if done: + self._recording_state = RecordingState.BEFORE_RECORDING + + return extra_info + + def set_transform(self): + """ + Create the transform for rllib based on the observation space + """ + + if self.player_count > 1: + self.observation_space = self.observation_space[0] + self.action_space = self.action_space[0] + + self.observation_space = gym.spaces.Box( + self.observation_space.low.transpose((1, 2, 0)).astype(np.float), + self.observation_space.high.transpose((1, 2, 0)).astype(np.float), + dtype=np.float, + ) + + self.height = self.observation_space.shape[0] + self.width = self.observation_space.shape[1] + + def _get_valid_action_trees(self): + valid_action_trees = self.game.build_valid_action_trees() + if self.player_count == 1: + return valid_action_trees[0] + return valid_action_trees + + def reset(self, **kwargs): + + if self._random_level_on_reset: + kwargs['level_id'] = np.random.choice(self.level_count) + observation = super().reset(**kwargs) + self.set_transform() + + if self.generate_valid_action_trees: + self.last_valid_action_trees = self._get_valid_action_trees() + + return self._transform(observation) + + def step(self, action): + observation, reward, done, info = super().step(action) + + extra_info = self._after_step(observation, reward, done, info) + + if 'video' in extra_info: + info['video'] = extra_info['video'] + + self._env_steps += 1 + + if self.generate_valid_action_trees: + self.last_valid_action_trees = self._get_valid_action_trees() + info['valid_action_tree'] = self.last_valid_action_trees + + return self._transform(observation), reward, done, info + + def render(self, mode='human', observer=0): + return super().render(mode, observer='global') + + +class RLlibMultiAgentWrapper(gym.Wrapper, MultiAgentEnv): + + def __init__(self, env, env_config): + super().__init__(env) + + self._player_done_variable = env_config.get('player_done_variable', None) + + # Used to keep track of agents that are active in the environment + self._active_agents = set() + + assert self.player_count > 1, 'RLlibMultiAgentWrapper can only be used with environments that have multiple agents' + + def _to_multi_agent_map(self, data): + return {a: data[a - 1] for a in self._active_agents} + + def reset(self, **kwargs): + obs = super().reset(**kwargs) + self._active_agents.update([a + 1 for a in range(self.player_count)]) + return self._to_multi_agent_map(obs) + + def _resolve_player_done_variable(self): + resolved_variables = self.game.get_global_variable([self._player_done_variable]) + return resolved_variables[self._player_done_variable] + + def step(self, action_dict: MultiAgentDict): + actions_array = np.zeros((self.player_count, *self.action_space.shape)) + for agent_id, action in action_dict.items(): + actions_array[agent_id - 1] = action + + obs, reward, all_done, info = super().step(actions_array) + + done_map = {'__all__': all_done} + + if self._player_done_variable is not None: + griddly_players_done = self._resolve_player_done_variable() + + for agent_id in self._active_agents: + done_map[agent_id] = griddly_players_done[agent_id] == 1 or all_done + else: + for p in range(self.player_count): + done_map[p] = False + + if self.generate_valid_action_trees: + info_map = self._to_multi_agent_map([ + {'valid_action_tree': valid_action_tree} for valid_action_tree in info['valid_action_trees'] + ]) + else: + info_map = self._to_multi_agent_map(defaultdict(dict)) + + obs_map = self._to_multi_agent_map(obs) + reward_map = self._to_multi_agent_map(reward) + + # Finally remove any agent ids that are done + for agent_id, is_done in done_map.items(): + if is_done: + self._active_agents.discard(agent_id) + + assert len(obs_map) == len(reward_map) + assert len(obs_map) == len(done_map) - 1 + assert len(obs_map) == len(info_map) + + return obs_map, reward_map, done_map, info_map diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py index 5a3c09277..c9ee92568 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py @@ -50,6 +50,7 @@ def compute_actions_from_input_dict( for info in infos: if isinstance(info, dict) and 'valid_action_tree' in info: valid_action_trees.append(info['valid_action_tree']) + print('valid_action_tree_found') else: valid_action_trees.append({}) From 9c73bb8458c8fd403e4b127dc61a54e8ab033f7b Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 18 Mar 2021 16:25:40 +0000 Subject: [PATCH 07/45] removing some code and updating other examples --- .../conditional_action_spaces/rllib_baseline.py | 1 - .../rllib_conditional_actions.py | 16 ++++++++++------ python/examples/rllib/rllib_multi_agent.py | 3 ++- python/examples/rllib/rllib_single_agent.py | 3 ++- .../rllib_single_agent_conditional_actions.py | 5 +++-- python/griddly/util/rllib/callbacks.py | 10 ---------- python/griddly/util/rllib/environment/core.py | 2 -- .../conditional_action_mixin.py | 1 - 8 files changed, 17 insertions(+), 24 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py index f023a7b5f..a89f160c8 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -33,7 +33,6 @@ register_env(env_name, RLlibEnv) ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) - ModelCatalog.register_custom_model("GAP", GAPAgent) wandbLoggerCallback = WandbLoggerCallback( project='conditional_actions', diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 62d355312..002e40308 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -18,7 +18,11 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - yaml_file = os.path.realpath('clusters_po.yaml') + yaml_files = [ + os.path.realpath('clusters_po.yaml'), + os.path.realpath('clusters_po_with_push.yaml'), + os.path.realpath('clusters_po_with_push_seperate_colors.yaml') + ] ray.init(num_gpus=1) #ray.init(num_gpus=1, local_mode=True) @@ -37,7 +41,7 @@ config = { 'framework': 'torch', - 'num_workers': 1, + 'num_workers': 8, 'num_envs_per_worker': 1, 'callbacks': GriddlyCallbacks, @@ -49,17 +53,17 @@ 'env': env_name, 'env_config': { 'record_video_config': { - 'frequency': 1000, + 'frequency': 100000, 'directory': 'videos' }, - #'allow_nop': tune.grid_search([True, False]), - #'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + 'allow_nop': tune.grid_search([True, False]), + 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), # 'invalid_action_masking': 'collapsed', # 'allow_nop': False, 'generate_valid_action_trees': True, 'random_level_on_reset': True, - 'yaml_file': yaml_file, + 'yaml_file': tune.grid_search(yaml_files), 'global_observer_type': gd.ObserverType.SPRITE_2D, 'max_steps': 1000, }, diff --git a/python/examples/rllib/rllib_multi_agent.py b/python/examples/rllib/rllib_multi_agent.py index 713910dbc..efa82c5a7 100644 --- a/python/examples/rllib/rllib_multi_agent.py +++ b/python/examples/rllib/rllib_multi_agent.py @@ -49,7 +49,8 @@ def _create_env(env_config): # 'player_done_variable': 'player_done', 'record_video_config': { - 'frequency': 20000 # number of rollouts + 'frequency': 20000, # number of rollouts + 'directory': 'videos' }, 'random_level_on_reset': True, diff --git a/python/examples/rllib/rllib_single_agent.py b/python/examples/rllib/rllib_single_agent.py index c69697a81..4f6b47a1d 100644 --- a/python/examples/rllib/rllib_single_agent.py +++ b/python/examples/rllib/rllib_single_agent.py @@ -36,7 +36,8 @@ 'env': env_name, 'env_config': { 'record_video_config': { - 'frequency': 100000 + 'frequency': 100000, + 'directory': 'videos' }, 'random_level_on_reset': True, diff --git a/python/examples/rllib/rllib_single_agent_conditional_actions.py b/python/examples/rllib/rllib_single_agent_conditional_actions.py index 1075d8f1b..b2ff4efca 100644 --- a/python/examples/rllib/rllib_single_agent_conditional_actions.py +++ b/python/examples/rllib/rllib_single_agent_conditional_actions.py @@ -23,7 +23,7 @@ register_env(env_name, RLlibEnv) ModelCatalog.register_custom_model("GAP", GAPAgent) - max_training_steps = 2000000 + max_training_steps = 20000000 config = { 'framework': 'torch', @@ -37,7 +37,8 @@ 'env': env_name, 'env_config': { 'record_video_config': { - 'frequency': 100000 + 'frequency': 100000, + 'directory': 'videos' }, 'allow_nop': tune.grid_search([True, False]), diff --git a/python/griddly/util/rllib/callbacks.py b/python/griddly/util/rllib/callbacks.py index ab7ce09e0..354c90c3f 100644 --- a/python/griddly/util/rllib/callbacks.py +++ b/python/griddly/util/rllib/callbacks.py @@ -30,8 +30,6 @@ def on_episode_end(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies if 'video' in info: level = info['video']['level'] path = info['video']['path'] - print(f'creating video with path: {path}') - episode.media['video_test'] = 'here is some test data' episode.media[f'level_{level}'] = Video(path) def on_postprocess_trajectory(self, *, worker: "RolloutWorker", episode: MultiAgentEpisode, agent_id: AgentID, @@ -47,14 +45,6 @@ def on_sample_end(self, *, worker: "RolloutWorker", samples: SampleBatch, **kwar def on_learn_on_batch(self, *, policy: Policy, train_batch: SampleBatch, result: dict, **kwargs) -> None: pass - # Loop through the 'info' keys looking for 'video' - # for info_dict in train_batch[SampleBatch.INFOS]: - # if 'video' in info_dict: - # level = info_dict['video']['level'] - # path = info_dict['video']['path'] - # print(f'creating video with path: {path}') - # result['video_test'] = 1 - # result[f'level_{level}'] = Video(path) def on_train_result(self, *, trainer, result: dict, **kwargs) -> None: super().on_train_result(trainer=trainer, result=result, **kwargs) diff --git a/python/griddly/util/rllib/environment/core.py b/python/griddly/util/rllib/environment/core.py index 097f6c7b0..86672c862 100644 --- a/python/griddly/util/rllib/environment/core.py +++ b/python/griddly/util/rllib/environment/core.py @@ -106,8 +106,6 @@ def _after_step(self, observation, reward, done, info): self._recording_state = RecordingState.NOT_RECORDING self._global_recorder.close() - print(f'finished recording {self._global_recorder.output_file}') - extra_info['video'] = { 'level': self.level_id, 'path': self._global_recorder.output_file diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py index c9ee92568..5a3c09277 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py @@ -50,7 +50,6 @@ def compute_actions_from_input_dict( for info in infos: if isinstance(info, dict) and 'valid_action_tree' in info: valid_action_trees.append(info['valid_action_tree']) - print('valid_action_tree_found') else: valid_action_trees.append({}) From 28a107dcf1d2210b1a7b00cbfc6c8c3f5684bf7c Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sat, 20 Mar 2021 15:18:57 +0000 Subject: [PATCH 08/45] added more options for running/testing on cluster --- .../rllib_conditional_actions.py | 45 ++++++++++++++----- python/griddly/util/rllib/callbacks.py | 35 +-------------- 2 files changed, 36 insertions(+), 44 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 002e40308..1ac7e9cc0 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -14,7 +14,28 @@ from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ ConditionalActionImpalaTrainer +import argparse + +parser = argparse.ArgumentParser(description='Run experiments') + +parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') +parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available') + +parser.add_argument('--num-workers', default=8, type=int, help='Number of workers') +parser.add_argument('--num-envs-per-worker', default=2, type=int, help='Number of workers') +parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') +parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') +parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') + +parser.add_argument('--video-directory', default='videos', help='directory of video') +parser.add_argument('--video-frequency', type=int, default=10000, help='Frequency of videos') + +parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') + if __name__ == '__main__': + + args = parser.parse_args() + sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) @@ -24,8 +45,8 @@ os.path.realpath('clusters_po_with_push_seperate_colors.yaml') ] - ray.init(num_gpus=1) - #ray.init(num_gpus=1, local_mode=True) + ray.init(num_gpus=args.num_gpus) + #ray.init(num_gpus=args.num_gpus, local_mode=True) env_name = "ray-griddly-env" @@ -37,12 +58,14 @@ api_key_file='~/.wandb_rc' ) - max_training_steps = 1000000 + max_training_steps = args.max_training_steps config = { 'framework': 'torch', - 'num_workers': 8, - 'num_envs_per_worker': 1, + 'num_workers': args.num_workers, + 'num_envs_per_worker': args.num_envs_per_worker, + 'num_gpus_per_worker': float(args.num_gpus_per_worker), + 'num_cpus_per_worker': args.num_cpus_per_worker, 'callbacks': GriddlyCallbacks, @@ -53,12 +76,12 @@ 'env': env_name, 'env_config': { 'record_video_config': { - 'frequency': 100000, - 'directory': 'videos' + 'frequency': args.video_frequency, + 'directory': os.path.join(args.root_directory, args.video_directory) }, - 'allow_nop': tune.grid_search([True, False]), - 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + #'allow_nop': tune.grid_search([True, False]), + #'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), # 'invalid_action_masking': 'collapsed', # 'allow_nop': False, 'generate_valid_action_trees': True, @@ -72,7 +95,7 @@ [max_training_steps, 0.0] ], 'lr_schedule': [ - [0, 0.0005], + [0, args.lr], [max_training_steps, 0.0] ], @@ -82,4 +105,4 @@ "timesteps_total": max_training_steps, } - result = tune.run(ConditionalActionImpalaTrainer, config=config, stop=stop, callbacks=[wandbLoggerCallback]) + result = tune.run(ConditionalActionImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, callbacks=[wandbLoggerCallback]) diff --git a/python/griddly/util/rllib/callbacks.py b/python/griddly/util/rllib/callbacks.py index 354c90c3f..dfb1865fe 100644 --- a/python/griddly/util/rllib/callbacks.py +++ b/python/griddly/util/rllib/callbacks.py @@ -1,50 +1,19 @@ from typing import Optional, Dict -from ray.rllib import Policy, SampleBatch, BaseEnv +from ray.rllib import Policy, BaseEnv from ray.rllib.agents.callbacks import DefaultCallbacks from ray.rllib.evaluation import MultiAgentEpisode -from ray.rllib.utils.typing import AgentID, PolicyID +from ray.rllib.utils.typing import PolicyID from wandb import Video class GriddlyCallbacks(DefaultCallbacks): - def __init__(self, legacy_callbacks_dict: Dict[str, callable] = None): - super().__init__(legacy_callbacks_dict) - - def on_episode_start(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], - episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: - super().on_episode_start(worker=worker, base_env=base_env, policies=policies, episode=episode, - env_index=env_index, **kwargs) - - def on_episode_step(self, *, worker: "RolloutWorker", base_env: BaseEnv, episode: MultiAgentEpisode, - env_index: Optional[int] = None, **kwargs) -> None: - super().on_episode_step(worker=worker, base_env=base_env, episode=episode, env_index=env_index, **kwargs) - def on_episode_end(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: - super().on_episode_end(worker=worker, base_env=base_env, policies=policies, episode=episode, - env_index=env_index, **kwargs) if not worker.multiagent: info = episode.last_info_for() if 'video' in info: level = info['video']['level'] path = info['video']['path'] episode.media[f'level_{level}'] = Video(path) - - def on_postprocess_trajectory(self, *, worker: "RolloutWorker", episode: MultiAgentEpisode, agent_id: AgentID, - policy_id: PolicyID, policies: Dict[PolicyID, Policy], - postprocessed_batch: SampleBatch, original_batches: Dict[AgentID, SampleBatch], - **kwargs) -> None: - super().on_postprocess_trajectory(worker=worker, episode=episode, agent_id=agent_id, policy_id=policy_id, - policies=policies, postprocessed_batch=postprocessed_batch, - original_batches=original_batches, **kwargs) - - def on_sample_end(self, *, worker: "RolloutWorker", samples: SampleBatch, **kwargs) -> None: - super().on_sample_end(worker=worker, samples=samples, **kwargs) - - def on_learn_on_batch(self, *, policy: Policy, train_batch: SampleBatch, result: dict, **kwargs) -> None: - pass - - def on_train_result(self, *, trainer, result: dict, **kwargs) -> None: - super().on_train_result(trainer=trainer, result=result, **kwargs) From 05abd17ee4dea3400c8eb16601a0cc220c88bd6d Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sat, 20 Mar 2021 15:45:46 +0000 Subject: [PATCH 09/45] don't ever need dashbaord --- .../conditional_action_spaces/rllib_conditional_actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 1ac7e9cc0..554a2386e 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -45,8 +45,8 @@ os.path.realpath('clusters_po_with_push_seperate_colors.yaml') ] - ray.init(num_gpus=args.num_gpus) - #ray.init(num_gpus=args.num_gpus, local_mode=True) + ray.init(include_dashboard=False, num_gpus=args.num_gpus) + #ray.init(include_dashboard=False, num_gpus=args.num_gpus, local_mode=True) env_name = "ray-griddly-env" From 10f5873e1e7f5065924cfc1195aea776bcf4c928 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sat, 20 Mar 2021 15:53:25 +0000 Subject: [PATCH 10/45] turn these tune hyperparams back on --- .../conditional_action_spaces/rllib_conditional_actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 554a2386e..d416d00a6 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -80,8 +80,8 @@ 'directory': os.path.join(args.root_directory, args.video_directory) }, - #'allow_nop': tune.grid_search([True, False]), - #'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + 'allow_nop': tune.grid_search([True, False]), + 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), # 'invalid_action_masking': 'collapsed', # 'allow_nop': False, 'generate_valid_action_trees': True, From da337feb7b0f0e471d56f66bf0612dc2a972c53f Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sat, 20 Mar 2021 16:06:45 +0000 Subject: [PATCH 11/45] option for turning off videos --- .../rllib_conditional_actions.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index d416d00a6..957790a45 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -18,7 +18,8 @@ parser = argparse.ArgumentParser(description='Run experiments') -parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') +parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), + help='root directory for all data associated with the run') parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available') parser.add_argument('--num-workers', default=8, type=int, help='Number of workers') @@ -27,6 +28,7 @@ parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') +parser.add_argument('--capture-video', action='store_true', help='enable video capture') parser.add_argument('--video-directory', default='videos', help='directory of video') parser.add_argument('--video-frequency', type=int, default=10000, help='Frequency of videos') @@ -46,7 +48,7 @@ ] ray.init(include_dashboard=False, num_gpus=args.num_gpus) - #ray.init(include_dashboard=False, num_gpus=args.num_gpus, local_mode=True) + # ray.init(include_dashboard=False, num_gpus=args.num_gpus, local_mode=True) env_name = "ray-griddly-env" @@ -75,10 +77,6 @@ }, 'env': env_name, 'env_config': { - 'record_video_config': { - 'frequency': args.video_frequency, - 'directory': os.path.join(args.root_directory, args.video_directory) - }, 'allow_nop': tune.grid_search([True, False]), 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), @@ -101,8 +99,15 @@ } + if args.capture_video: + config['env_config']['record_video_config'] = { + 'frequency': args.video_frequency, + 'directory': os.path.join(args.root_directory, args.video_directory) + } + stop = { "timesteps_total": max_training_steps, } - result = tune.run(ConditionalActionImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, callbacks=[wandbLoggerCallback]) + result = tune.run(ConditionalActionImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, + callbacks=[wandbLoggerCallback]) From d771199bfdaf97924b956b20ea2adab7e60f75ba Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 21 Mar 2021 11:45:15 +0000 Subject: [PATCH 12/45] adding in option for wandb default directory and set level with argument --- .../conditional_action_spaces/rllib_baseline.py | 4 ++-- .../rllib_conditional_actions.py | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py index a89f160c8..46a86849f 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -26,8 +26,8 @@ ] - ray.init(num_gpus=1) - # ray.init(num_gpus=1, local_mode=True) + #ray.init(num_gpus=1) + ray.init(num_gpus=1, local_mode=True) env_name = "ray-griddly-env" diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 957790a45..4cf15a7bd 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -41,12 +41,6 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - yaml_files = [ - os.path.realpath('clusters_po.yaml'), - os.path.realpath('clusters_po_with_push.yaml'), - os.path.realpath('clusters_po_with_push_seperate_colors.yaml') - ] - ray.init(include_dashboard=False, num_gpus=args.num_gpus) # ray.init(include_dashboard=False, num_gpus=args.num_gpus, local_mode=True) @@ -57,7 +51,8 @@ wandbLoggerCallback = WandbLoggerCallback( project='conditional_actions', - api_key_file='~/.wandb_rc' + api_key_file='~/.wandb_rc', + dir=os.path.join(args.root_directory, 'wandb') ) max_training_steps = args.max_training_steps @@ -84,7 +79,7 @@ # 'allow_nop': False, 'generate_valid_action_trees': True, 'random_level_on_reset': True, - 'yaml_file': tune.grid_search(yaml_files), + 'yaml_file': args.yaml_file, 'global_observer_type': gd.ObserverType.SPRITE_2D, 'max_steps': 1000, }, From 899e1e687cc723d93d31f5c59a6c37b97ea8b68d Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 21 Mar 2021 11:50:58 +0000 Subject: [PATCH 13/45] yaml file --- .../conditional_action_spaces/rllib_conditional_actions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 4cf15a7bd..58f8e585e 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -18,6 +18,8 @@ parser = argparse.ArgumentParser(description='Run experiments') +parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') + parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available') From 8723b6dad1466f51e6941b7f295e4f78f2a13fc8 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 21 Mar 2021 11:56:38 +0000 Subject: [PATCH 14/45] fixing wandb data dir --- .../conditional_action_spaces/rllib_conditional_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 58f8e585e..0353f15e2 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -54,7 +54,7 @@ wandbLoggerCallback = WandbLoggerCallback( project='conditional_actions', api_key_file='~/.wandb_rc', - dir=os.path.join(args.root_directory, 'wandb') + dir=args.root_directory ) max_training_steps = args.max_training_steps From 5d3538b9aab7e2adde0db9fce98077f112e76e9b Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 22 Mar 2021 13:44:56 +0000 Subject: [PATCH 15/45] vtrace masking option --- .../clusters_po_with_push_units.yaml | 330 ++++++++++++++++++ .../rllib_conditional_actions.py | 9 +- python/griddly/util/rllib/callbacks.py | 13 +- .../conditional_action_mixin.py | 3 +- .../conditional_action_policy_trainer.py | 93 ++++- 5 files changed, 436 insertions(+), 12 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml index e69de29bb..9904e87e7 100644 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml @@ -0,0 +1,330 @@ +Version: "0.1" +Environment: + Name: Partially Observable Clusters + Description: Cluster the coloured objects together by pushing them against the static coloured blocks. + Observers: + Sprite2D: + TileSize: 24 + BackgroundTile: oryx/oryx_fantasy/floor1-2.png + Variables: + - Name: box_count + InitialValue: 0 + Player: + Observer: + RotateWithAvatar: true + TrackAvatar: true + Height: 5 + Width: 5 + OffsetX: 0 + OffsetY: 2 + AvatarObject: avatar # The player can only control a single avatar in the game + Termination: + Win: + - eq: [box_count, 0] + Lose: + - eq: [broken_box:count, 1] + - eq: [avatar:count, 0] + Levels: + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 1 . . . 2 . 2 . w + w . . . . 1 . . . . . . w + w . . . a . . . . . 2 . w + w . . . . . . . h . . . w + w . . . . 1 . . . . b . w + w . . . . . . 1 . . . . w + w . . . . . . . . A . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . 1 . . 2 . c 3 . . w + w . . . . h . . h . . . w + w . . . 2 . . 3 . . 1 . w + w . . . . b . . h . . . w + w . . 3 . . . 2 . . 1 . w + w . . h . h . . . a . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . a . . b . . c . . w + w . . . . . . . . . . . w + w . . . . . . . . . . . w + w h h h h h . h h h h h w + w . . . . h . h . . . . w + w . 1 2 . h . h . 1 3 . w + w . 3 . . . . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . 1 . 2 . . c . . w + w . . . . . 3 . . 3 . . w + w . . a . 2 . . . h . . w + w . . . . h h . 3 . . . w + w . . 1 . . . . . 2 . . w + w . . . . . 1 . . b . . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . . . . 1 . . . . w + w . . h . . b . . h . . w + w . . . . 1 . . . . . . w + w . . 3 . . . . 2 . . . w + w . . . a . h . . c . . w + w . . . . 3 . . . . 2 . w + w . . . . . A . . . . . w + w w w w w w w w w w w w w + +Actions: + + # A simple action to count the number of boxes in the game at the start + # Not currently a way to do complex things in termination conditions like combine multiple conditions + - Name: box_counter + InputMapping: + Internal: true + Inputs: + 1: + Description: "The only action here is to increment the box count" + Behaviours: + - Src: + Object: [blue_box, red_box, green_box] + Commands: + - incr: box_count + Dst: + Object: [blue_box, red_box, green_box] + + # Define the move action + - Name: move + InputMapping: + Inputs: + 1: + Description: Rotate left + OrientationVector: [-1, 0] + 2: + Description: Move forwards + OrientationVector: [0, -1] + VectorToDest: [0, -1] + 3: + Description: Rotate right + OrientationVector: [1, 0] + Relative: true + Behaviours: + + # Avatar rotates + - Src: + Object: avatar + Commands: + - rot: _dir + Dst: + Object: avatar + + # Avatar can move into empty space + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + + # Avatar dies if it hits the spikes + - Src: + Object: avatar + Commands: + - remove: true + - reward: -1 + Dst: + Object: spike + + + - Name: push + InputMapping: + Inputs: + 1: + Description: Push Forwards + OrientationVector: [ 0, -1 ] + VectorToDest: [ 0, -1 ] + Relative: true + Behaviours: + + # Boxes can be pushed by the avatar + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: [blue_box, green_box, red_box] + Commands: + - cascade: _dest + + # Boxes break if they hit the spikes + - Src: + Object: [ blue_box, green_box, red_box ] + Commands: + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: [blue_box, green_box, red_box] + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: blue_box + Commands: + - change_to: blue_block + - reward: 1 + - decr: box_count + Dst: + Object: blue_block + - Src: + Object: red_box + Commands: + - reward: 1 + - change_to: red_block + - decr: box_count + Dst: + Object: red_block + - Src: + Object: green_box + Commands: + - reward: 1 + - change_to: green_block + - decr: box_count + Dst: + Object: green_block + + +Objects: + - Name: avatar + MapCharacter: A + Observers: + Sprite2D: + - Image: gvgai/oryx/knight1.png + Block2D: + - Shape: triangle + Color: [0.0, 1.0, 0.0] + Scale: 0.8 + + - Name: wall + MapCharacter: w + Observers: + Sprite2D: + - TilingMode: WALL_16 + Image: + - oryx/oryx_fantasy/wall1-0.png + - oryx/oryx_fantasy/wall1-1.png + - oryx/oryx_fantasy/wall1-2.png + - oryx/oryx_fantasy/wall1-3.png + - oryx/oryx_fantasy/wall1-4.png + - oryx/oryx_fantasy/wall1-5.png + - oryx/oryx_fantasy/wall1-6.png + - oryx/oryx_fantasy/wall1-7.png + - oryx/oryx_fantasy/wall1-8.png + - oryx/oryx_fantasy/wall1-9.png + - oryx/oryx_fantasy/wall1-10.png + - oryx/oryx_fantasy/wall1-11.png + - oryx/oryx_fantasy/wall1-12.png + - oryx/oryx_fantasy/wall1-13.png + - oryx/oryx_fantasy/wall1-14.png + - oryx/oryx_fantasy/wall1-15.png + Block2D: + - Shape: square + Color: [0.5, 0.5, 0.5] + Scale: 0.9 + + - Name: spike + MapCharacter: h + Observers: + Sprite2D: + - Image: gvgai/oryx/spike2.png + Block2D: + - Shape: triangle + Color: [0.9, 0.1, 0.1] + Scale: 0.5 + + - Name: red_box + MapCharacter: "2" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockR.png + Block2D: + - Shape: square + Color: [0.5, 0.2, 0.2] + Scale: 0.5 + - Name: red_block + MapCharacter: b + Observers: + Sprite2D: + - Image: gvgai/newset/blockR2.png + Block2D: + - Shape: square + Color: [1.0, 0.0, 0.0] + Scale: 1.0 + + - Name: green_box + MapCharacter: "3" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockG.png + Block2D: + - Shape: square + Color: [0.2, 0.5, 0.2] + Scale: 0.5 + - Name: green_block + MapCharacter: c + Observers: + Sprite2D: + - Image: gvgai/newset/blockG2.png + Block2D: + - Shape: square + Color: [0.0, 1.0, 0.0] + Scale: 1.0 + + - Name: blue_box + MapCharacter: "1" + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockB.png + Block2D: + - Shape: square + Color: [0.2, 0.2, 0.5] + Scale: 0.5 + - Name: blue_block + MapCharacter: a + Observers: + Sprite2D: + - Image: gvgai/newset/blockB2.png + Block2D: + - Shape: square + Color: [0.0, 0.0, 1.0] + Scale: 1.0 + + - Name: broken_box + Observers: + Sprite2D: + - Image: gvgai/newset/block3.png + Block2D: + - Shape: triangle + Color: [1.0, 0.0, 1.0] + Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 0353f15e2..ffc7e3c28 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -24,8 +24,8 @@ help='root directory for all data associated with the run') parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available') -parser.add_argument('--num-workers', default=8, type=int, help='Number of workers') -parser.add_argument('--num-envs-per-worker', default=2, type=int, help='Number of workers') +parser.add_argument('--num-workers', default=11, type=int, help='Number of workers') +parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') @@ -44,7 +44,7 @@ os.environ['PYTHONPATH'] = sep.join(sys.path) ray.init(include_dashboard=False, num_gpus=args.num_gpus) - # ray.init(include_dashboard=False, num_gpus=args.num_gpus, local_mode=True) + #ray.init(include_dashboard=False, num_gpus=1, local_mode=True) env_name = "ray-griddly-env" @@ -77,12 +77,13 @@ 'allow_nop': tune.grid_search([True, False]), 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + 'vtrace_masking': tune.grid_search([True, False]), # 'invalid_action_masking': 'collapsed', # 'allow_nop': False, 'generate_valid_action_trees': True, 'random_level_on_reset': True, 'yaml_file': args.yaml_file, - 'global_observer_type': gd.ObserverType.SPRITE_2D, + 'global_observer_type': gd.ObserverType.VECTOR, 'max_steps': 1000, }, 'entropy_coeff_schedule': [ diff --git a/python/griddly/util/rllib/callbacks.py b/python/griddly/util/rllib/callbacks.py index dfb1865fe..37767b835 100644 --- a/python/griddly/util/rllib/callbacks.py +++ b/python/griddly/util/rllib/callbacks.py @@ -11,9 +11,10 @@ class GriddlyCallbacks(DefaultCallbacks): def on_episode_end(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: - if not worker.multiagent: - info = episode.last_info_for() - if 'video' in info: - level = info['video']['level'] - path = info['video']['path'] - episode.media[f'level_{level}'] = Video(path) + if worker.worker_index == 0 and env_index == 0: + if not worker.multiagent: + info = episode.last_info_for() + if 'video' in info: + level = info['video']['level'] + path = info['video']['path'] + episode.media[f'level_{level}'] = Video(path) diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py index 5a3c09277..9d714fa45 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py @@ -80,7 +80,8 @@ def compute_actions_from_input_dict( extra_fetches = { SampleBatch.ACTION_DIST_INPUTS: masked_logits, SampleBatch.ACTION_PROB: torch.exp(logp.float()), - SampleBatch.ACTION_LOGP: logp + SampleBatch.ACTION_LOGP: logp, + 'invalid_action_mask': mask } # Update our global timestep by the batch size. diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py index 20d82ead6..20252f4e9 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py @@ -1,10 +1,100 @@ +import gym +import numpy as np +import torch +from ray.rllib import SampleBatch from ray.rllib.agents.impala import ImpalaTrainer -from ray.rllib.agents.impala.vtrace_torch_policy import VTraceTorchPolicy +from ray.rllib.agents.impala.vtrace_tf_policy import build_vtrace_loss +from ray.rllib.agents.impala.vtrace_torch_policy import VTraceTorchPolicy, VTraceLoss, make_time_major +from ray.rllib.models.torch.torch_action_dist import TorchCategorical from ray.rllib.policy.torch_policy import LearningRateSchedule, EntropyCoeffSchedule +from tensorflow import sequence_mask from griddly.util.rllib.torch.conditional_actions.conditional_action_mixin import ConditionalActionMixin +def build_invalid_masking_vtrace_loss(policy, model, dist_class, train_batch): + + if not policy.config['env_config'].get('vtrace_masking', False): + return build_vtrace_loss(policy, model, dist_class, train_batch) + + model_out, _ = model.from_batch(train_batch) + + if isinstance(policy.action_space, gym.spaces.Discrete): + is_multidiscrete = False + output_hidden_shape = [policy.action_space.n] + elif isinstance(policy.action_space, gym.spaces.MultiDiscrete): + is_multidiscrete = True + output_hidden_shape = policy.action_space.nvec.astype(np.int32) + else: + is_multidiscrete = False + output_hidden_shape = 1 + + def _make_time_major(*args, **kw): + return make_time_major(policy, train_batch.get("seq_lens"), *args, + **kw) + + actions = train_batch[SampleBatch.ACTIONS] + dones = train_batch[SampleBatch.DONES] + rewards = train_batch[SampleBatch.REWARDS] + behaviour_action_logp = train_batch[SampleBatch.ACTION_LOGP] + behaviour_logits = train_batch[SampleBatch.ACTION_DIST_INPUTS] + + invalid_action_mask = train_batch['invalid_action_mask'] + + if 'seq_lens' in train_batch: + max_seq_len = policy.config['rollout_fragment_length'] + mask_orig = sequence_mask(train_batch["seq_lens"], max_seq_len) + mask = torch.reshape(mask_orig, [-1]) + else: + mask = torch.ones_like(rewards) + + model_out += torch.log(invalid_action_mask) + action_dist = dist_class(model_out, model) + + if isinstance(output_hidden_shape, (list, tuple, np.ndarray)): + unpacked_behaviour_logits = torch.split( + behaviour_logits, list(output_hidden_shape), dim=1) + unpacked_outputs = torch.split( + model_out, list(output_hidden_shape), dim=1) + else: + unpacked_behaviour_logits = torch.chunk( + behaviour_logits, output_hidden_shape, dim=1) + unpacked_outputs = torch.chunk(model_out, output_hidden_shape, dim=1) + values = model.value_function() + + # Prepare actions for loss. + loss_actions = actions if is_multidiscrete else torch.unsqueeze( + actions, dim=1) + + # Inputs are reshaped from [B * T] => [T - 1, B] for V-trace calc. + policy.loss = VTraceLoss( + actions=_make_time_major(loss_actions, drop_last=True), + actions_logp=_make_time_major( + action_dist.logp(actions), drop_last=True), + actions_entropy=_make_time_major( + action_dist.entropy(), drop_last=True), + dones=_make_time_major(dones, drop_last=True), + behaviour_action_logp=_make_time_major( + behaviour_action_logp, drop_last=True), + behaviour_logits=_make_time_major( + unpacked_behaviour_logits, drop_last=True), + target_logits=_make_time_major(unpacked_outputs, drop_last=True), + discount=policy.config["gamma"], + rewards=_make_time_major(rewards, drop_last=True), + values=_make_time_major(values, drop_last=True), + bootstrap_value=_make_time_major(values)[-1], + dist_class=TorchCategorical if is_multidiscrete else dist_class, + model=model, + valid_mask=_make_time_major(mask, drop_last=True), + config=policy.config, + vf_loss_coeff=policy.config["vf_loss_coeff"], + entropy_coeff=policy.entropy_coeff, + clip_rho_threshold=policy.config["vtrace_clip_rho_threshold"], + clip_pg_rho_threshold=policy.config["vtrace_clip_pg_rho_threshold"]) + + return policy.loss.total_loss + + def setup_mixins(policy, obs_space, action_space, config): ConditionalActionMixin.__init__(policy) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], @@ -14,6 +104,7 @@ def setup_mixins(policy, obs_space, action_space, config): ConditionalActionVTraceTorchPolicy = VTraceTorchPolicy.with_updates( name="ConditionalActionVTraceTorchPolicy", + loss_fn=build_invalid_masking_vtrace_loss, before_init=setup_mixins, mixins=[LearningRateSchedule, EntropyCoeffSchedule, ConditionalActionMixin] ) From 9f88c0f91d9a48e36504389c9d605e82d136079d Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 22 Mar 2021 13:46:45 +0000 Subject: [PATCH 16/45] restrict cpus to same number of workers --- .../conditional_action_spaces/rllib_conditional_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index ffc7e3c28..097d6a5bd 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -43,7 +43,7 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - ray.init(include_dashboard=False, num_gpus=args.num_gpus) + ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_workers) #ray.init(include_dashboard=False, num_gpus=1, local_mode=True) env_name = "ray-griddly-env" From 2a5784c2bb6f56d337dc803d17dd6bfe17a66a57 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 22 Mar 2021 14:23:56 +0000 Subject: [PATCH 17/45] num cpus has to be independent of num workers as one cpu is used for impala --- .../conditional_action_spaces/rllib_conditional_actions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 097d6a5bd..5d91fdcb8 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -22,7 +22,8 @@ parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') -parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available') +parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available to ray.') +parser.add_argument('--num-cpus', default=8, type=int, help='Number of CPUs to make available to ray.') parser.add_argument('--num-workers', default=11, type=int, help='Number of workers') parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') @@ -43,7 +44,7 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_workers) + ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) #ray.init(include_dashboard=False, num_gpus=1, local_mode=True) env_name = "ray-griddly-env" From eebfdd6b0bee91ef5909b02e7b6ad31bd0ee1235 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 22 Mar 2021 16:33:03 +0000 Subject: [PATCH 18/45] invalid action mask fixes --- .../conditional_action_spaces/rllib_conditional_actions.py | 6 +++--- .../conditional_action_policy_trainer.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 5d91fdcb8..b9ff11e4e 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -25,7 +25,7 @@ parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available to ray.') parser.add_argument('--num-cpus', default=8, type=int, help='Number of CPUs to make available to ray.') -parser.add_argument('--num-workers', default=11, type=int, help='Number of workers') +parser.add_argument('--num-workers', default=7, type=int, help='Number of workers') parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') @@ -45,7 +45,7 @@ os.environ['PYTHONPATH'] = sep.join(sys.path) ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - #ray.init(include_dashboard=False, num_gpus=1, local_mode=True) + #ray.init(include_dashboard=False, num_gpus=1, num_cpus=2, local_mode=True) env_name = "ray-griddly-env" @@ -84,7 +84,7 @@ 'generate_valid_action_trees': True, 'random_level_on_reset': True, 'yaml_file': args.yaml_file, - 'global_observer_type': gd.ObserverType.VECTOR, + 'global_observer_type': gd.ObserverType.SPRITE_2D, 'max_steps': 1000, }, 'entropy_coeff_schedule': [ diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py index 20252f4e9..9bb3ffcd9 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py @@ -13,7 +13,6 @@ def build_invalid_masking_vtrace_loss(policy, model, dist_class, train_batch): - if not policy.config['env_config'].get('vtrace_masking', False): return build_vtrace_loss(policy, model, dist_class, train_batch) From afcbd48f443636c98143f7ad32ea2fce15fd8d0b Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 22 Mar 2021 19:20:51 +0000 Subject: [PATCH 19/45] video frequency too high --- .../conditional_action_spaces/rllib_conditional_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index b9ff11e4e..aca66deba 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -33,7 +33,7 @@ parser.add_argument('--capture-video', action='store_true', help='enable video capture') parser.add_argument('--video-directory', default='videos', help='directory of video') -parser.add_argument('--video-frequency', type=int, default=10000, help='Frequency of videos') +parser.add_argument('--video-frequency', type=int, default=1000000, help='Frequency of videos') parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') From 94b3af036691bd6512b4334f1ceb5f4a5143d620 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 23 Mar 2021 10:39:41 +0000 Subject: [PATCH 20/45] vtrace masking not in grid-search --- .../clusters_po_with_push_units.yaml | 192 ++++++------------ .../rllib_conditional_actions.py | 2 +- 2 files changed, 58 insertions(+), 136 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml index 9904e87e7..879a28681 100644 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml @@ -9,15 +9,6 @@ Environment: Variables: - Name: box_count InitialValue: 0 - Player: - Observer: - RotateWithAvatar: true - TrackAvatar: true - Height: 5 - Width: 5 - OffsetX: 0 - OffsetY: 2 - AvatarObject: avatar # The player can only control a single avatar in the game Termination: Win: - eq: [box_count, 0] @@ -26,60 +17,60 @@ Environment: - eq: [avatar:count, 0] Levels: - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 1 . . . 2 . 2 . w - w . . . . 1 . . . . . . w - w . . . a . . . . . 2 . w - w . . . . . . . h . . . w - w . . . . 1 . . . . b . w - w . . . . . . 1 . . . . w - w . . . . . . . . A . . w - w w w w w w w w w w w w w + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . b1 b1 . . . r1 . r1 . w + w . . . . b1 . . . . . . w + w . . . B . . . . . r1 . w + w . . . . . . . x . . . w + w . . . . b1 . . . . R . w + w . . . . . . b1 . . . . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 . . 2 . c 3 . . w - w . . . . h . . h . . . w - w . . . 2 . . 3 . . 1 . w - w . . . . b . . h . . . w - w . . 3 . . . 2 . . 1 . w - w . . h . h . . . a . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . b1 . . r1 . G g1 . . w + w . . . . x . . x . . . w + w . . . r1 . . g1 . . b1 . w + w . . . . b . . h . . . w + w . . g1 . . . r1 . . b1 . w + w . . x . x . . . B . . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w - | - w w w w w w w w w w w w w - w . . a . . b . . c . . w - w . . . . . . . . . . . w - w . . . . . . . . . . . w - w h h h h h . h h h h h w - w . . . . h . h . . . . w - w . 1 2 . h . h . 1 3 . w - w . 3 . . . . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w + w w w w w w w w w w w w w + w . . B . . R . . G . . w + w . . . . . . . . . . . w + w . . . . . . . . . . . w + w x x x x x . x x x x x w + w . . . . x . x . . . . w + w . b1 r1 . x . x . b1 g1 . w + w . g1 . . . . . . . r1 . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . 1 . 2 . . c . . w - w . . . . . 3 . . 3 . . w - w . . a . 2 . . . h . . w - w . . . . h h . 3 . . . w - w . . 1 . . . . . 2 . . w - w . . . . . 1 . . b . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . b1 . r1 . . G . . w + w . . . . . g1 . . g1 . . w + w . . B . r1 . . . x . . w + w . . . . x x . g1 . . . w + w . . b1 . . . . . r1 . . w + w . . . . . b1 . . R . . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . . . . 1 . . . . w - w . . h . . b . . h . . w - w . . . . 1 . . . . . . w - w . . 3 . . . . 2 . . . w - w . . . a . h . . c . . w - w . . . . 3 . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . . . . b1 . . . . w + w . . x . . R . . h . . w + w . . . . b1 . . . . . . w + w . . g1 . . . . r1 . . . w + w . . . B . x . . G . . w + w . . . . g1 . . . . r1 . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w Actions: @@ -99,69 +90,9 @@ Actions: Dst: Object: [blue_box, red_box, green_box] - # Define the move action - - Name: move - InputMapping: - Inputs: - 1: - Description: Rotate left - OrientationVector: [-1, 0] - 2: - Description: Move forwards - OrientationVector: [0, -1] - VectorToDest: [0, -1] - 3: - Description: Rotate right - OrientationVector: [1, 0] - Relative: true - Behaviours: - - # Avatar rotates - - Src: - Object: avatar - Commands: - - rot: _dir - Dst: - Object: avatar - - # Avatar can move into empty space - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: _empty - - # Avatar dies if it hits the spikes - - Src: - Object: avatar - Commands: - - remove: true - - reward: -1 - Dst: - Object: spike - - - Name: push - InputMapping: - Inputs: - 1: - Description: Push Forwards - OrientationVector: [ 0, -1 ] - VectorToDest: [ 0, -1 ] - Relative: true Behaviours: - # Boxes can be pushed by the avatar - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: [blue_box, green_box, red_box] - Commands: - - cascade: _dest - # Boxes break if they hit the spikes - Src: Object: [ blue_box, green_box, red_box ] @@ -207,15 +138,6 @@ Actions: Objects: - - Name: avatar - MapCharacter: A - Observers: - Sprite2D: - - Image: gvgai/oryx/knight1.png - Block2D: - - Shape: triangle - Color: [0.0, 1.0, 0.0] - Scale: 0.8 - Name: wall MapCharacter: w @@ -245,7 +167,7 @@ Objects: Scale: 0.9 - Name: spike - MapCharacter: h + MapCharacter: x Observers: Sprite2D: - Image: gvgai/oryx/spike2.png @@ -255,7 +177,7 @@ Objects: Scale: 0.5 - Name: red_box - MapCharacter: "2" + MapCharacter: r InitialActions: - Action: box_counter ActionId: 1 @@ -267,7 +189,7 @@ Objects: Color: [0.5, 0.2, 0.2] Scale: 0.5 - Name: red_block - MapCharacter: b + MapCharacter: R Observers: Sprite2D: - Image: gvgai/newset/blockR2.png @@ -277,7 +199,7 @@ Objects: Scale: 1.0 - Name: green_box - MapCharacter: "3" + MapCharacter: g InitialActions: - Action: box_counter ActionId: 1 @@ -289,7 +211,7 @@ Objects: Color: [0.2, 0.5, 0.2] Scale: 0.5 - Name: green_block - MapCharacter: c + MapCharacter: G Observers: Sprite2D: - Image: gvgai/newset/blockG2.png @@ -299,7 +221,7 @@ Objects: Scale: 1.0 - Name: blue_box - MapCharacter: "1" + MapCharacter: b InitialActions: - Action: box_counter ActionId: 1 @@ -311,7 +233,7 @@ Objects: Color: [0.2, 0.2, 0.5] Scale: 0.5 - Name: blue_block - MapCharacter: a + MapCharacter: B Observers: Sprite2D: - Image: gvgai/newset/blockB2.png diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index aca66deba..d96526562 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -78,7 +78,7 @@ 'allow_nop': tune.grid_search([True, False]), 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), - 'vtrace_masking': tune.grid_search([True, False]), + 'vtrace_masking': False, # 'invalid_action_masking': 'collapsed', # 'allow_nop': False, 'generate_valid_action_trees': True, From d7950ed25f1ce7c07abfa00d26363ab889d4595e Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 23 Mar 2021 10:40:55 +0000 Subject: [PATCH 21/45] allow not not in grid-search --- .../conditional_action_spaces/rllib_conditional_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index d96526562..2e6d37260 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -76,7 +76,7 @@ 'env': env_name, 'env_config': { - 'allow_nop': tune.grid_search([True, False]), + 'allow_nop': True, 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), 'vtrace_masking': False, # 'invalid_action_masking': 'collapsed', From a10958c2ce76fe70f6f90451278af289ce6662a7 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 23 Mar 2021 10:49:15 +0000 Subject: [PATCH 22/45] add vtrace masking and no-ops as command flags --- .../conditional_action_spaces/rllib_conditional_actions.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 2e6d37260..4cebac626 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -35,6 +35,9 @@ parser.add_argument('--video-directory', default='videos', help='directory of video') parser.add_argument('--video-frequency', type=int, default=1000000, help='Frequency of videos') +parser.add_argument('--allow-nop', action='store_true', default=False, help='allow NOP actions in action tree') +parser.add_argument('--vtrace-masking', action='store_true', default=False, help='use masks in vtrace calculations') + parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') if __name__ == '__main__': @@ -76,9 +79,9 @@ 'env': env_name, 'env_config': { - 'allow_nop': True, + 'allow_nop': args.allow_nop, 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), - 'vtrace_masking': False, + 'vtrace_masking': args.vtrace_masking, # 'invalid_action_masking': 'collapsed', # 'allow_nop': False, 'generate_valid_action_trees': True, From b3440a0a6ccf4b5d83d4d1c8ffdbe1bebd398d04 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 23 Mar 2021 11:02:25 +0000 Subject: [PATCH 23/45] add seed for consistency --- .../conditional_action_spaces/rllib_conditional_actions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 4cebac626..aff2108a0 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -38,6 +38,8 @@ parser.add_argument('--allow-nop', action='store_true', default=False, help='allow NOP actions in action tree') parser.add_argument('--vtrace-masking', action='store_true', default=False, help='use masks in vtrace calculations') +parser.add_argument('--seed', type=int, default=69420, help='seed for experiments') + parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') if __name__ == '__main__': @@ -65,6 +67,7 @@ config = { 'framework': 'torch', + 'seed': args.seed, 'num_workers': args.num_workers, 'num_envs_per_worker': args.num_envs_per_worker, 'num_gpus_per_worker': float(args.num_gpus_per_worker), From edf7d771c533c8bce8d69a2ec2f323ec26ac488a Mon Sep 17 00:00:00 2001 From: Bam4d Date: Wed, 24 Mar 2021 20:44:34 +0000 Subject: [PATCH 24/45] allowing the x+y values to be placed in trees, making rts-style clusters --- bindings/wrapper/GameWrapper.cpp | 24 +- ...rs_po_with_push_separate_colors_units.yaml | 304 ++++++++++++++++++ .../clusters_po_with_push_units.yaml | 46 +-- .../rllib_baseline.py | 76 +++-- .../rllib_conditional_actions.py | 10 +- python/griddly/util/rllib/callbacks.py | 2 +- .../conditional_action_exploration.py | 30 +- .../conditional_action_mixin.py | 11 +- .../conditional_action_policy_trainer.py | 2 +- 9 files changed, 425 insertions(+), 80 deletions(-) diff --git a/bindings/wrapper/GameWrapper.cpp b/bindings/wrapper/GameWrapper.cpp index 4a74ca412..de3f0175b 100644 --- a/bindings/wrapper/GameWrapper.cpp +++ b/bindings/wrapper/GameWrapper.cpp @@ -91,21 +91,21 @@ class Py_GameWrapper { auto actionIdsForName = gameProcess_->getAvailableActionIdsAtLocation(locationVec, actionName); if (actionIdsForName.size() > 0) { - // if (gdyFactory_->getAvatarObject().length() == 0) { - // auto py_x = py::cast(locationVec[0]); - // auto py_y = py::cast(locationVec[1]); - // if(!treePtr.contains(py_x)) { - // (*treePtr)[py_x] = py::dict(); - // } + if (gdyFactory_->getAvatarObject().length() == 0) { + auto py_x = locationVec[0]; + auto py_y = locationVec[1]; + if(!treePtr->contains(py_x)) { + treePtr->add(py_x); + } - // treePtr = treePtr[py_x]; + treePtr = treePtr->children[py_x]; - // if(!treePtr.contains(py_y)) { - // treePtr[py_y] = py::dict(); - // } + if(!treePtr->contains(py_y)) { + treePtr->add(py_y); + } - // treePtr = treePtr[py_y]; - // } + treePtr = treePtr->children[py_y]; + } if (externalActionNames.size() > 1) { auto actionTypeId = getActionTypeId(actionName); diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml index e69de29bb..1fba6490c 100644 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml @@ -0,0 +1,304 @@ +Version: "0.1" +Environment: + Name: Partially Observable Clusters + Description: Cluster the coloured objects together by pushing them against the static coloured blocks. + Observers: + Sprite2D: + TileSize: 24 + BackgroundTile: oryx/oryx_fantasy/floor1-2.png + Variables: + - Name: box_count + InitialValue: 0 + PerPlayer: true + - Name: broken_boxes + InitialValue: 0 + PerPlayer: true + Player: + Count: 1 + Termination: + Win: + - eq: [ box_count, 0 ] + Lose: + - eq: [ broken_boxes, 1 ] + Levels: + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . b1 b1 . . . r1 . r1 . w + w . . . . b1 . . . . . . w + w . . . B . . . . . r1 . w + w . . . . . . . x . . . w + w . . . . b1 . . . . R . w + w . . . . . . b1 . . . . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . b1 . . r1 . G g1 . . w + w . . . . x . . x . . . w + w . . . r1 . . g1 . . b1 . w + w . . . . b . . x . . . w + w . . g1 . . . r1 . . b1 . w + w . . x . x . . . B . . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . B . . R . . G . . w + w . . . . . . . . . . . w + w . . . . . . . . . . . w + w x x x x x . x x x x x w + w . . . . x . x . . . . w + w . b1 r1 . x . x . b1 g1 . w + w . g1 . . . . . . . r1 . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . b1 . r1 . . G . . w + w . . . . . g1 . . g1 . . w + w . . B . r1 . . . x . . w + w . . . . x x . g1 . . . w + w . . b1 . . . . . r1 . . w + w . . . . . b1 . . R . . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w + - | + w w w w w w w w w w w w w + w . . . . . . . . . . . w + w . . . . . . b1 . . . . w + w . . x . . R . . x . . w + w . . . . b1 . . . . . . w + w . . g1 . . . . r1 . . . w + w . . . B . x . . G . . w + w . . . . g1 . . . . r1 . w + w . . . . . . . . . . . w + w w w w w w w w w w w w w + +Actions: + + # A simple action to count the number of boxes in the game at the start + # Not currently a way to do complex things in termination conditions like combine multiple conditions + - Name: box_counter + InputMapping: + Internal: true + Inputs: + 1: + Description: "The only action here is to increment the box count" + Behaviours: + - Src: + Object: [ blue_box, red_box, green_box ] + Commands: + - incr: box_count + Dst: + Object: [ blue_box, red_box, green_box ] + + - Name: push_blue + Behaviours: + + # Boxes break if they are pushed into the spikes + - Src: + Object: blue_box + Commands: + - incr: broken_boxes + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: blue_box + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: blue_box + Commands: + - change_to: blue_block + - reward: 1 + - decr: box_count + Dst: + Object: blue_block + + - Name: push_red + Behaviours: + + # Boxes break if they are pushed into the spikes + - Src: + Object: red_box + Commands: + - incr: broken_boxes + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: red_box + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: red_box + Commands: + - reward: 1 + - change_to: red_block + - decr: box_count + Dst: + Object: red_block + + - Name: push_green + Behaviours: + + # Boxes break if they are pushed into the spikes + - Src: + Object: green_box + Commands: + - incr: broken_boxes + - change_to: broken_box + - reward: -1 + Dst: + Object: spike + + # Boxes can pushed into empty space + - Src: + Object: green_box + Commands: + - mov: _dest + Dst: + Object: _empty + + # When boxes are pushed against the blocks they change + - Src: + Object: green_box + Commands: + - reward: 1 + - change_to: green_block + - decr: box_count + Dst: + Object: green_block + + +Objects: + + - Name: wall + MapCharacter: w + Observers: + Sprite2D: + - TilingMode: WALL_16 + Image: + - oryx/oryx_fantasy/wall1-0.png + - oryx/oryx_fantasy/wall1-1.png + - oryx/oryx_fantasy/wall1-2.png + - oryx/oryx_fantasy/wall1-3.png + - oryx/oryx_fantasy/wall1-4.png + - oryx/oryx_fantasy/wall1-5.png + - oryx/oryx_fantasy/wall1-6.png + - oryx/oryx_fantasy/wall1-7.png + - oryx/oryx_fantasy/wall1-8.png + - oryx/oryx_fantasy/wall1-9.png + - oryx/oryx_fantasy/wall1-10.png + - oryx/oryx_fantasy/wall1-11.png + - oryx/oryx_fantasy/wall1-12.png + - oryx/oryx_fantasy/wall1-13.png + - oryx/oryx_fantasy/wall1-14.png + - oryx/oryx_fantasy/wall1-15.png + Block2D: + - Shape: square + Color: [ 0.5, 0.5, 0.5 ] + Scale: 0.9 + + - Name: spike + MapCharacter: x + Observers: + Sprite2D: + - Image: gvgai/oryx/spike2.png + Block2D: + - Shape: triangle + Color: [ 0.9, 0.1, 0.1 ] + Scale: 0.5 + + - Name: red_box + MapCharacter: r + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockR.png + Block2D: + - Shape: square + Color: [ 0.5, 0.2, 0.2 ] + Scale: 0.5 + - Name: red_block + MapCharacter: R + Observers: + Sprite2D: + - Image: gvgai/newset/blockR2.png + Block2D: + - Shape: square + Color: [ 1.0, 0.0, 0.0 ] + Scale: 1.0 + + - Name: green_box + MapCharacter: g + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockG.png + Block2D: + - Shape: square + Color: [ 0.2, 0.5, 0.2 ] + Scale: 0.5 + - Name: green_block + MapCharacter: G + Observers: + Sprite2D: + - Image: gvgai/newset/blockG2.png + Block2D: + - Shape: square + Color: [ 0.0, 1.0, 0.0 ] + Scale: 1.0 + + - Name: blue_box + MapCharacter: b + InitialActions: + - Action: box_counter + ActionId: 1 + Observers: + Sprite2D: + - Image: gvgai/newset/blockB.png + Block2D: + - Shape: square + Color: [ 0.2, 0.2, 0.5 ] + Scale: 0.5 + - Name: blue_block + MapCharacter: B + Observers: + Sprite2D: + - Image: gvgai/newset/blockB2.png + Block2D: + - Shape: square + Color: [ 0.0, 0.0, 1.0 ] + Scale: 1.0 + + - Name: broken_box + Observers: + Sprite2D: + - Image: gvgai/newset/block3.png + Block2D: + - Shape: triangle + Color: [ 1.0, 0.0, 1.0 ] + Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml index 879a28681..4a00e3a8c 100644 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml @@ -9,12 +9,17 @@ Environment: Variables: - Name: box_count InitialValue: 0 + PerPlayer: true + - Name: broken_boxes + InitialValue: 0 + PerPlayer: true + Player: + Count: 1 Termination: Win: - - eq: [box_count, 0] + - eq: [ box_count, 0 ] Lose: - - eq: [broken_box:count, 1] - - eq: [avatar:count, 0] + - eq: [ broken_boxes, 1 ] Levels: - | w w w w w w w w w w w w w @@ -33,7 +38,7 @@ Environment: w . . b1 . . r1 . G g1 . . w w . . . . x . . x . . . w w . . . r1 . . g1 . . b1 . w - w . . . . b . . h . . . w + w . . . . b . . x . . . w w . . g1 . . . r1 . . b1 . w w . . x . x . . . B . . w w . . . . . . . . . . . w @@ -64,7 +69,7 @@ Environment: w w w w w w w w w w w w w w . . . . . . . . . . . w w . . . . . . b1 . . . . w - w . . x . . R . . h . . w + w . . x . . R . . x . . w w . . . . b1 . . . . . . w w . . g1 . . . . r1 . . . w w . . . B . x . . G . . w @@ -84,11 +89,11 @@ Actions: Description: "The only action here is to increment the box count" Behaviours: - Src: - Object: [blue_box, red_box, green_box] + Object: [ blue_box, red_box, green_box ] Commands: - incr: box_count Dst: - Object: [blue_box, red_box, green_box] + Object: [ blue_box, red_box, green_box ] - Name: push Behaviours: @@ -97,6 +102,7 @@ Actions: - Src: Object: [ blue_box, green_box, red_box ] Commands: + - incr: broken_boxes - change_to: broken_box - reward: -1 Dst: @@ -104,7 +110,7 @@ Actions: # Boxes can pushed into empty space - Src: - Object: [blue_box, green_box, red_box] + Object: [ blue_box, green_box, red_box ] Commands: - mov: _dest Dst: @@ -116,7 +122,7 @@ Actions: Commands: - change_to: blue_block - reward: 1 - - decr: box_count + - decr: box_count Dst: Object: blue_block - Src: @@ -124,7 +130,7 @@ Actions: Commands: - reward: 1 - change_to: red_block - - decr: box_count + - decr: box_count Dst: Object: red_block - Src: @@ -132,7 +138,7 @@ Actions: Commands: - reward: 1 - change_to: green_block - - decr: box_count + - decr: box_count Dst: Object: green_block @@ -163,7 +169,7 @@ Objects: - oryx/oryx_fantasy/wall1-15.png Block2D: - Shape: square - Color: [0.5, 0.5, 0.5] + Color: [ 0.5, 0.5, 0.5 ] Scale: 0.9 - Name: spike @@ -173,7 +179,7 @@ Objects: - Image: gvgai/oryx/spike2.png Block2D: - Shape: triangle - Color: [0.9, 0.1, 0.1] + Color: [ 0.9, 0.1, 0.1 ] Scale: 0.5 - Name: red_box @@ -186,7 +192,7 @@ Objects: - Image: gvgai/newset/blockR.png Block2D: - Shape: square - Color: [0.5, 0.2, 0.2] + Color: [ 0.5, 0.2, 0.2 ] Scale: 0.5 - Name: red_block MapCharacter: R @@ -195,7 +201,7 @@ Objects: - Image: gvgai/newset/blockR2.png Block2D: - Shape: square - Color: [1.0, 0.0, 0.0] + Color: [ 1.0, 0.0, 0.0 ] Scale: 1.0 - Name: green_box @@ -208,7 +214,7 @@ Objects: - Image: gvgai/newset/blockG.png Block2D: - Shape: square - Color: [0.2, 0.5, 0.2] + Color: [ 0.2, 0.5, 0.2 ] Scale: 0.5 - Name: green_block MapCharacter: G @@ -217,7 +223,7 @@ Objects: - Image: gvgai/newset/blockG2.png Block2D: - Shape: square - Color: [0.0, 1.0, 0.0] + Color: [ 0.0, 1.0, 0.0 ] Scale: 1.0 - Name: blue_box @@ -230,7 +236,7 @@ Objects: - Image: gvgai/newset/blockB.png Block2D: - Shape: square - Color: [0.2, 0.2, 0.5] + Color: [ 0.2, 0.2, 0.5 ] Scale: 0.5 - Name: blue_block MapCharacter: B @@ -239,7 +245,7 @@ Objects: - Image: gvgai/newset/blockB2.png Block2D: - Shape: square - Color: [0.0, 0.0, 1.0] + Color: [ 0.0, 0.0, 1.0 ] Scale: 1.0 - Name: broken_box @@ -248,5 +254,5 @@ Objects: - Image: gvgai/newset/block3.png Block2D: - Shape: triangle - Color: [1.0, 0.0, 1.0] + Color: [ 1.0, 0.0, 1.0 ] Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py index 46a86849f..e95c20c7a 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -1,3 +1,4 @@ +import argparse import os import sys @@ -8,6 +9,7 @@ from ray.tune.registry import register_env from griddly import gd +from griddly.util.rllib.callbacks import GriddlyCallbacks from griddly.util.rllib.environment.core import RLlibEnv from griddly.util.rllib.torch import GAPAgent from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent @@ -15,19 +17,38 @@ from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ ConditionalActionImpalaTrainer +parser = argparse.ArgumentParser(description='Run experiments') + +parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') + +parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), + help='root directory for all data associated with the run') +parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available to ray.') +parser.add_argument('--num-cpus', default=8, type=int, help='Number of CPUs to make available to ray.') + +parser.add_argument('--num-workers', default=7, type=int, help='Number of workers') +parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') +parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') +parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') +parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') + +parser.add_argument('--capture-video', action='store_true', help='enable video capture') +parser.add_argument('--video-directory', default='videos', help='directory of video') +parser.add_argument('--video-frequency', type=int, default=1000000, help='Frequency of videos') + +parser.add_argument('--seed', type=int, default=69420, help='seed for experiments') + +parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') + if __name__ == '__main__': - sep = os.pathsep - os.environ['PYTHONPATH'] = sep.join(sys.path) - yaml_files = [ - os.path.realpath('clusters_po.yaml'), - os.path.realpath('clusters_po_with_push.yaml'), - os.path.realpath('clusters_po_with_push_seperate_colors.yaml') - ] + args = parser.parse_args() + sep = os.pathsep + os.environ['PYTHONPATH'] = sep.join(sys.path) - #ray.init(num_gpus=1) - ray.init(num_gpus=1, local_mode=True) + ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) + #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) env_name = "ray-griddly-env" @@ -36,34 +57,32 @@ wandbLoggerCallback = WandbLoggerCallback( project='conditional_actions', - group='baseline', - api_key_file='~/.wandb_rc' + api_key_file='~/.wandb_rc', + dir=args.root_directory ) - max_training_steps = 20000000 + max_training_steps = args.max_training_steps + config = { 'framework': 'torch', - 'num_workers': 8, - 'num_envs_per_worker': 4, + 'seed': args.seed, + 'num_workers': args.num_workers, + 'num_envs_per_worker': args.num_envs_per_worker, + 'num_gpus_per_worker': float(args.num_gpus_per_worker), + 'num_cpus_per_worker': args.num_cpus_per_worker, - # 'callbacks': GriddlyCallbacks, + 'callbacks': GriddlyCallbacks, 'model': { - 'custom_model': tune.grid_search(['SimpleConv', 'GAP']), + 'custom_model': 'SimpleConv', 'custom_model_config': {} }, 'env': env_name, 'env_config': { - 'record_video_config': { - 'frequency': 100000, - 'directory': 'baseline_videos' - }, - - # Put this here so it shows up in wandb 'generate_valid_action_trees': False, 'random_level_on_reset': True, - 'yaml_file': tune.grid_search(yaml_files), + 'yaml_file': args.yaml_file, 'global_observer_type': gd.ObserverType.SPRITE_2D, 'max_steps': 1000, }, @@ -72,14 +91,21 @@ [max_training_steps, 0.0] ], 'lr_schedule': [ - [0, 0.0005], + [0, args.lr], [max_training_steps, 0.0] ], } + if args.capture_video: + real_video_frequency = args.video_frequency/(args.num_envs_per_worker*args.num_workers) + config['env_config']['record_video_config'] = { + 'frequency': real_video_frequency, + 'directory': os.path.join(args.root_directory, args.video_directory) + } stop = { "timesteps_total": max_training_steps, } - result = tune.run(ConditionalActionImpalaTrainer, config=config, stop=stop, callbacks=[wandbLoggerCallback]) + result = tune.run(ConditionalActionImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, + callbacks=[wandbLoggerCallback]) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index aff2108a0..4e1729590 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -50,7 +50,7 @@ os.environ['PYTHONPATH'] = sep.join(sys.path) ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - #ray.init(include_dashboard=False, num_gpus=1, num_cpus=2, local_mode=True) + #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) env_name = "ray-griddly-env" @@ -83,10 +83,9 @@ 'env_config': { 'allow_nop': args.allow_nop, - 'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + #'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), 'vtrace_masking': args.vtrace_masking, - # 'invalid_action_masking': 'collapsed', - # 'allow_nop': False, + 'invalid_action_masking': 'conditional', 'generate_valid_action_trees': True, 'random_level_on_reset': True, 'yaml_file': args.yaml_file, @@ -105,8 +104,9 @@ } if args.capture_video: + real_video_frequency = args.video_frequency / (args.num_envs_per_worker * args.num_workers) config['env_config']['record_video_config'] = { - 'frequency': args.video_frequency, + 'frequency': real_video_frequency, 'directory': os.path.join(args.root_directory, args.video_directory) } diff --git a/python/griddly/util/rllib/callbacks.py b/python/griddly/util/rllib/callbacks.py index 37767b835..c46e85824 100644 --- a/python/griddly/util/rllib/callbacks.py +++ b/python/griddly/util/rllib/callbacks.py @@ -11,7 +11,7 @@ class GriddlyCallbacks(DefaultCallbacks): def on_episode_end(self, *, worker: "RolloutWorker", base_env: BaseEnv, policies: Dict[PolicyID, Policy], episode: MultiAgentEpisode, env_index: Optional[int] = None, **kwargs) -> None: - if worker.worker_index == 0 and env_index == 0: + if env_index == 0: if not worker.multiagent: info = episode.last_info_for() if 'video' in info: diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py index be92f86a1..dfa00e7ac 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -101,24 +101,28 @@ def get_actions_and_mask(self): mask_offset = 0 for a in range(self._num_action_parts): - dist_part = self._inputs_split[a] - is_parameters = a==(self._num_action_parts-1) - sampled, masked_part_logits, logp, mask_part = self._mask_and_sample(subtree_options, dist_part[i], is_parameters) + try: + dist_part = self._inputs_split[a] + is_parameters = a==(self._num_action_parts-1) + sampled, masked_part_logits, logp, mask_part = self._mask_and_sample(subtree_options, dist_part[i], is_parameters) - # Set the action and the mask for each part of the action - actions[i, a] = sampled - masked_logits[i, mask_offset:mask_offset + self._action_space_shape[a]] = masked_part_logits - mask[i, mask_offset:mask_offset + self._action_space_shape[a]] = mask_part + # Set the action and the mask for each part of the action + actions[i, a] = sampled + masked_logits[i, mask_offset:mask_offset + self._action_space_shape[a]] = masked_part_logits + mask[i, mask_offset:mask_offset + self._action_space_shape[a]] = mask_part - logp_parts[a] = logp + logp_parts[a] = logp - if mask_part.sum() == 0: - raise RuntimeError('mask calculated incorrectly') + if mask_part.sum() == 0: + raise RuntimeError('mask calculated incorrectly') - mask_offset += self._action_space_shape[a] + mask_offset += self._action_space_shape[a] + + subtree = subtree[int(sampled)] + subtree_options = list(subtree.keys()) + except ValueError as e: + print(e) - subtree = subtree[int(sampled)] - subtree_options = list(subtree.keys()) logp_sums[i] = torch.sum(logp_parts) diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py index 9d714fa45..21afd5c8a 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_mixin.py @@ -43,6 +43,8 @@ def compute_actions_from_input_dict( invalid_action_masking = self.config["env_config"].get("invalid_action_masking", 'none') allow_nop = self.config["env_config"].get("allow_nop", False) + extra_fetches = {} + if generate_valid_action_trees: infos = input_dict[SampleBatch.INFOS] if SampleBatch.INFOS in input_dict else {} @@ -63,6 +65,10 @@ def compute_actions_from_input_dict( ) actions, masked_logits, logp, mask = exploration.get_actions_and_mask() + + extra_fetches.update({ + 'invalid_action_mask': mask + }) else: action_dist = self.dist_class(dist_inputs, self.model) @@ -77,12 +83,11 @@ def compute_actions_from_input_dict( input_dict[SampleBatch.ACTIONS] = actions - extra_fetches = { + extra_fetches.update({ SampleBatch.ACTION_DIST_INPUTS: masked_logits, SampleBatch.ACTION_PROB: torch.exp(logp.float()), SampleBatch.ACTION_LOGP: logp, - 'invalid_action_mask': mask - } + }) # Update our global timestep by the batch size. self.global_timestep += len(input_dict[SampleBatch.CUR_OBS]) diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py index 9bb3ffcd9..c3e4f9407 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_policy_trainer.py @@ -3,7 +3,7 @@ import torch from ray.rllib import SampleBatch from ray.rllib.agents.impala import ImpalaTrainer -from ray.rllib.agents.impala.vtrace_tf_policy import build_vtrace_loss +from ray.rllib.agents.impala.vtrace_torch_policy import build_vtrace_loss from ray.rllib.agents.impala.vtrace_torch_policy import VTraceTorchPolicy, VTraceLoss, make_time_major from ray.rllib.models.torch.torch_action_dist import TorchCategorical from ray.rllib.policy.torch_policy import LearningRateSchedule, EntropyCoeffSchedule From e06ab42578a87c372f9982840296e6bafda62dc2 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 25 Mar 2021 11:53:20 +0000 Subject: [PATCH 25/45] frequency needs to be an integer value for modulo to work --- .../experiments/conditional_action_spaces/rllib_baseline.py | 2 +- .../conditional_action_spaces/rllib_conditional_actions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py index e95c20c7a..7ef20d001 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -97,7 +97,7 @@ } if args.capture_video: - real_video_frequency = args.video_frequency/(args.num_envs_per_worker*args.num_workers) + real_video_frequency = int(args.video_frequency / (args.num_envs_per_worker * args.num_workers)) config['env_config']['record_video_config'] = { 'frequency': real_video_frequency, 'directory': os.path.join(args.root_directory, args.video_directory) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 4e1729590..323ee6f4c 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -104,7 +104,7 @@ } if args.capture_video: - real_video_frequency = args.video_frequency / (args.num_envs_per_worker * args.num_workers) + real_video_frequency = int(args.video_frequency / (args.num_envs_per_worker * args.num_workers)) config['env_config']['record_video_config'] = { 'frequency': real_video_frequency, 'directory': os.path.join(args.root_directory, args.video_directory) From 7264a14f3114ab3c596c72d8fadd5f1acb1e917c Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sat, 27 Mar 2021 11:51:12 +0000 Subject: [PATCH 26/45] some fixes for flat representation --- ...lusters_po_with_push_separate_colors.yaml} | 0 .../rllib_baseline_flat.py | 270 ++++++++++++++++++ python/griddly/util/rllib/environment/core.py | 4 +- 3 files changed, 272 insertions(+), 2 deletions(-) rename python/examples/experiments/conditional_action_spaces/{clusters_po_with_push_seperate_colors.yaml => clusters_po_with_push_separate_colors.yaml} (100%) create mode 100644 python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_seperate_colors.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors.yaml similarity index 100% rename from python/examples/experiments/conditional_action_spaces/clusters_po_with_push_seperate_colors.yaml rename to python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors.yaml diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py new file mode 100644 index 000000000..b0ebd7f9c --- /dev/null +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py @@ -0,0 +1,270 @@ +import argparse +import os +import sys + +import gym +import numpy as np +import ray +import torch +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from torch import nn +from gym.spaces import MultiDiscrete, Dict, Box +from ray import tune +from ray.rllib.agents.impala import ImpalaTrainer +from ray.rllib.models import ModelCatalog +from ray.tune.integration.wandb import WandbLoggerCallback +from ray.tune.registry import register_env + +from griddly import gd +from griddly.util.rllib.callbacks import GriddlyCallbacks +from griddly.util.rllib.environment.core import RLlibEnv +from griddly.util.rllib.torch.agents.common import layer_init +from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent + +parser = argparse.ArgumentParser(description='Run experiments') + +parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') + +parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), + help='root directory for all data associated with the run') +parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available to ray.') +parser.add_argument('--num-cpus', default=8, type=int, help='Number of CPUs to make available to ray.') + +parser.add_argument('--num-workers', default=7, type=int, help='Number of workers') +parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') +parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') +parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') +parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') + +parser.add_argument('--capture-video', action='store_true', help='enable video capture') +parser.add_argument('--video-directory', default='videos', help='directory of video') +parser.add_argument('--video-frequency', type=int, default=1000000, help='Frequency of videos') + +parser.add_argument('--seed', type=int, default=69420, help='seed for experiments') + +parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') + + +class FlatActionWrapper(gym.Wrapper): + + def __init__(self, env): + super().__init__(env) + + self._num_action_parts = 1 + self._action_params_offset = 0 + if not self.has_avatar: + self._num_action_parts += 1 + self._action_params_offset = 1 + + + self._action_splits = np.zeros(self._num_action_parts) + + self._total_position_params = 0 + if not self.has_avatar: + self._action_splits[0] = self.width*self.height + self._total_position_params += self.width*self.height + + self._action_logit_offsets = {} + + total_action_params = 0 + for i, action_name in enumerate(self.env.action_names): + self._action_logit_offsets[action_name] = total_action_params + self._total_position_params + total_action_params += self.num_action_ids[action_name] + + self._action_splits[self._action_params_offset] = total_action_params + + self._total_actions = int(np.sum(self._action_splits)) + + self.action_space = MultiDiscrete(self._action_splits) + self.observation_space = Dict({ + 'obs': self.observation_space, + 'mask': Box(0, 1, shape=(self._total_actions,)), + }) + + def _get_flat_mask(self): + flat_mask = np.zeros(self._total_actions) + for location, action_names in self.env.game.get_available_actions(1).items(): + if not self.has_avatar: + flat_location = self.width * location[1] + location[0] + flat_mask[flat_location] = 1 + for action_name, action_ids in self.env.game.get_available_action_ids(location, list(action_names)).items(): + mask_offset = self._action_logit_offsets[action_name] + flat_mask[mask_offset:mask_offset + self.num_action_ids[action_name]][action_ids] = 1 + return flat_mask + + def _to_griddly_action(self, action): + # convert the flat action back to Griddly's tree based format + + griddly_action = [] + action_ptr = 0 + if not self.has_avatar: + x = action[action_ptr] % self.width + griddly_action.append(x) + y = int(action[action_ptr] / self.width) + griddly_action.append(y) + action_ptr += 1 + + if self.action_count > 0: + action_type_id = 0 + action_param_id = 0 + for action_name in self.action_names: + action_offset_after_position = (self._action_logit_offsets[action_name] - self._total_position_params) + next_offset = action_offset_after_position + self.num_action_ids[action_name] + if next_offset > action[action_ptr]: + action_param_id = action[action_ptr] - action_offset_after_position + break + action_type_id += 1 + + griddly_action.append(action_type_id) + griddly_action.append(action_param_id) + else: + griddly_action.append(action[action_ptr]) + + return griddly_action + + def reset(self, **kwargs): + + obs = super().reset(**kwargs) + + observations = { + 'obs': obs, + 'mask': self._get_flat_mask() + } + + return observations + + def step(self, action): + griddly_action = self._to_griddly_action(action) + + obs, reward, info, done = super().step(griddly_action) + + observations = { + 'obs': obs, + 'mask': self._get_flat_mask() + } + + return observations, reward, info, done + + +class SimpleConvFlatAgent(TorchModelV2, nn.Module): + + def __init__(self, obs_space, action_space, num_outputs, model_config, name): + super().__init__(obs_space, action_space, num_outputs, model_config, name) + nn.Module.__init__(self) + + self._num_objects = obs_space.original_space['obs'].shape[2] + self._num_actions = num_outputs + + linear_flatten = np.prod(obs_space.original_space['obs'].shape[:2]) * 64 + + self.network = nn.Sequential( + layer_init(nn.Conv2d(self._num_objects, 32, 3, padding=1)), + nn.ReLU(), + layer_init(nn.Conv2d(32, 64, 3, padding=1)), + nn.ReLU(), + nn.Flatten(), + layer_init(nn.Linear(linear_flatten, 1024)), + nn.ReLU(), + layer_init(nn.Linear(1024, 512)), + nn.ReLU(), + ) + + self._actor_head = nn.Sequential( + layer_init(nn.Linear(512, 256), std=0.01), + nn.ReLU(), + layer_init(nn.Linear(256, self._num_actions), std=0.01) + ) + + self._critic_head = nn.Sequential( + layer_init(nn.Linear(512, 1), std=0.01) + ) + + def forward(self, input_dict, state, seq_lens): + obs_transformed = input_dict['obs']['obs'].permute(0, 3, 1, 2) + mask = input_dict['obs']['mask'] + network_output = self.network(obs_transformed) + value = self._critic_head(network_output) + self._value = value.reshape(-1) + logits = self._actor_head(network_output) + + logits += torch.maximum(torch.log(mask), torch.tensor(torch.finfo().min)) + + return logits, state + + def value_function(self): + return self._value + + +if __name__ == '__main__': + + args = parser.parse_args() + + sep = os.pathsep + os.environ['PYTHONPATH'] = sep.join(sys.path) + + ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) + #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) + env_name = "ray-griddly-env" + + + def _create_env(env_config): + env = RLlibEnv(env_config) + return FlatActionWrapper(env) + + + register_env(env_name, _create_env) + ModelCatalog.register_custom_model("SimpleConv", SimpleConvFlatAgent) + + wandbLoggerCallback = WandbLoggerCallback( + project='conditional_actions', + api_key_file='~/.wandb_rc', + dir=args.root_directory + ) + + max_training_steps = args.max_training_steps + + config = { + 'framework': 'torch', + 'seed': args.seed, + 'num_workers': args.num_workers, + 'num_envs_per_worker': args.num_envs_per_worker, + 'num_gpus_per_worker': float(args.num_gpus_per_worker), + 'num_cpus_per_worker': args.num_cpus_per_worker, + + 'callbacks': GriddlyCallbacks, + + 'model': { + 'custom_model': 'SimpleConv', + 'custom_model_config': {} + }, + 'env': env_name, + 'env_config': { + 'generate_valid_action_trees': False, + 'random_level_on_reset': True, + 'yaml_file': args.yaml_file, + 'global_observer_type': gd.ObserverType.SPRITE_2D, + 'max_steps': 1000, + }, + 'entropy_coeff_schedule': [ + [0, 0.01], + [max_training_steps, 0.0] + ], + 'lr_schedule': [ + [0, args.lr], + [max_training_steps, 0.0] + ], + + } + if args.capture_video: + real_video_frequency = int(args.video_frequency / (args.num_envs_per_worker * args.num_workers)) + config['env_config']['record_video_config'] = { + 'frequency': real_video_frequency, + 'directory': os.path.join(args.root_directory, args.video_directory) + } + + stop = { + "timesteps_total": max_training_steps, + } + + result = tune.run(ImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, + callbacks=[wandbLoggerCallback]) diff --git a/python/griddly/util/rllib/environment/core.py b/python/griddly/util/rllib/environment/core.py index 86672c862..fe6723ccf 100644 --- a/python/griddly/util/rllib/environment/core.py +++ b/python/griddly/util/rllib/environment/core.py @@ -132,8 +132,8 @@ def set_transform(self): dtype=np.float, ) - self.height = self.observation_space.shape[0] - self.width = self.observation_space.shape[1] + self.height = self.observation_space.shape[1] + self.width = self.observation_space.shape[0] def _get_valid_action_trees(self): valid_action_trees = self.game.build_valid_action_trees() From f36c3acb606ccb3054262b988332a17ac1fda642 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 29 Mar 2021 09:15:50 +0100 Subject: [PATCH 27/45] fixing bug with action trees not synchronized with observations during policy rollouts --- .../rllib_baseline_flat.py | 6 ++-- .../rllib_conditional_actions.py | 7 ++-- python/griddly/util/rllib/environment/core.py | 2 +- .../conditional_action_exploration.py | 32 +++++++++---------- src/Griddly/Core/Grid.cpp | 8 ++--- 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py index b0ebd7f9c..afda50765 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py @@ -56,13 +56,12 @@ def __init__(self, env): self._num_action_parts += 1 self._action_params_offset = 1 - self._action_splits = np.zeros(self._num_action_parts) self._total_position_params = 0 if not self.has_avatar: - self._action_splits[0] = self.width*self.height - self._total_position_params += self.width*self.height + self._action_splits[0] = self.width * self.height + self._total_position_params += self.width * self.height self._action_logit_offsets = {} @@ -202,6 +201,7 @@ def value_function(self): sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) + ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) env_name = "ray-griddly-env" diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 323ee6f4c..80700db63 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -49,8 +49,8 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) + #ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) + ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) env_name = "ray-griddly-env" @@ -87,7 +87,8 @@ 'vtrace_masking': args.vtrace_masking, 'invalid_action_masking': 'conditional', 'generate_valid_action_trees': True, - 'random_level_on_reset': True, + #'random_level_on_reset': True, + 'level': 0, 'yaml_file': args.yaml_file, 'global_observer_type': gd.ObserverType.SPRITE_2D, 'max_steps': 1000, diff --git a/python/griddly/util/rllib/environment/core.py b/python/griddly/util/rllib/environment/core.py index fe6723ccf..602fa5f16 100644 --- a/python/griddly/util/rllib/environment/core.py +++ b/python/griddly/util/rllib/environment/core.py @@ -165,7 +165,7 @@ def step(self, action): if self.generate_valid_action_trees: self.last_valid_action_trees = self._get_valid_action_trees() - info['valid_action_tree'] = self.last_valid_action_trees + info['valid_action_tree'] = dict(self.last_valid_action_trees) return self._transform(observation), reward, done, info diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py index dfa00e7ac..5d092055a 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -7,7 +7,7 @@ class TorchConditionalMaskingExploration(): - def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invalid_action_masking='none', allow_nop=False): + def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invalid_action_masking='conditional', allow_nop=False): self._valid_action_trees = valid_action_trees self._num_inputs = dist_inputs.shape[0] @@ -26,32 +26,36 @@ def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invali self._inputs_split = dist_inputs.split(tuple(self._action_space_shape), dim=1) + self._full_tree = self._fill_node(self._action_space_shape,0) + def _mask_and_sample(self, options, logits, is_parameters=False): mask = torch.zeros([logits.shape[0]]).to(logits.device) mask[options] = 1 if is_parameters: - if not self._allow_nop and len(options) > 1: + if not self._allow_nop: mask[0] = 0 masked_logits = logits + torch.log(mask) dist = Categorical(logits=masked_logits) sampled = dist.sample() + logp = dist.log_prob(sampled) + out_logits = masked_logits - if self._invalid_action_masking != 'none': - logp = dist.log_prob(sampled) - out_logits = masked_logits - else: - mask = torch.ones([logits.shape[0]]) - dist = Categorical(logits=logits) - logp = dist.log_prob(sampled) - out_logits = logits + if not self._allow_nop and is_parameters: + assert sampled != 0 return sampled, out_logits, logp, mask + def _fill_node(self, keys, pos): + if pos < len(keys): + return {k: self._fill_node(keys, pos + 1) for k in np.arange(keys[pos])} + else: + return {} + def _merge_all_branches(self, tree): all_nodes = {} merged_tree = {} @@ -70,10 +74,7 @@ def _process_valid_action_tree(self, valid_action_tree): # In the case there are no available actions for the player if len(subtree_options) == 0: - build_tree = subtree - for _ in range(self._num_action_parts): - build_tree[0] = {} - build_tree = build_tree[0] + subtree = self._full_tree subtree_options = list(subtree.keys()) # If we want very basic action masking where parameterized masks are superimposed we use this @@ -113,9 +114,6 @@ def get_actions_and_mask(self): logp_parts[a] = logp - if mask_part.sum() == 0: - raise RuntimeError('mask calculated incorrectly') - mask_offset += self._action_space_shape[a] subtree = subtree[int(sampled)] diff --git a/src/Griddly/Core/Grid.cpp b/src/Griddly/Core/Grid.cpp index aed52577b..d482285ea 100644 --- a/src/Griddly/Core/Grid.cpp +++ b/src/Griddly/Core/Grid.cpp @@ -143,19 +143,19 @@ std::unordered_map Grid::executeAction(uint32_t playerId, std } if (sourceObject == nullptr) { - spdlog::debug("Cannot perform action on empty space."); + spdlog::warn("Cannot perform action on empty space. ({0},{1})", action->getSourceLocation()[0], action->getSourceLocation()[1]); return {}; } auto sourceObjectPlayerId = sourceObject->getPlayerId(); if (playerId != 0 && sourceObjectPlayerId != playerId) { - spdlog::debug("Cannot perform action on object not owned by player. Object owner {0}, Player owner {1}", sourceObjectPlayerId, playerId); + spdlog::warn("Cannot perform action on object not owned by player. Object owner {0}, Player owner {1}", sourceObjectPlayerId, playerId); return {}; } if (playerId != 0 && sourceObject->isPlayerAvatar() && playerAvatars_.find(playerId) == playerAvatars_.end()) { - spdlog::debug("Avatar for player {0} has been removed, action will be ignored.", playerId); + spdlog::warn("Avatar for player {0} has been removed, action will be ignored.", playerId); return {}; } @@ -176,7 +176,7 @@ std::unordered_map Grid::executeAction(uint32_t playerId, std return rewardAccumulator; } else { - spdlog::debug("Cannot perform action={0} on object={1}", action->getActionName(), sourceObject->getObjectName()); + spdlog::warn("Cannot perform action={0} on object={1}", action->getActionName(), sourceObject->getObjectName()); return {}; } } From 232b286396e5b7c6083b53c3ef3cdb3d05c5b14b Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 29 Mar 2021 10:01:05 +0100 Subject: [PATCH 28/45] automatically use nops if we have no infos (straight after reset etc) --- .../rllib_conditional_actions.py | 14 +++++++------- .../conditional_action_exploration.py | 12 ++++++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 80700db63..5f0987735 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -18,7 +18,7 @@ parser = argparse.ArgumentParser(description='Run experiments') -parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') +parser.add_argument('--yaml-file', help='YAML file containing GDY for the game') parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') @@ -49,8 +49,8 @@ sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - #ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) + ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) + #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) env_name = "ray-griddly-env" @@ -83,12 +83,12 @@ 'env_config': { 'allow_nop': args.allow_nop, - #'invalid_action_masking': tune.grid_search(['none', 'conditional', 'collapsed']), + 'invalid_action_masking': tune.grid_search(['conditional', 'collapsed']), 'vtrace_masking': args.vtrace_masking, - 'invalid_action_masking': 'conditional', + #'invalid_action_masking': 'conditional', 'generate_valid_action_trees': True, - #'random_level_on_reset': True, - 'level': 0, + #'level': 0, + 'random_level_on_reset': True, 'yaml_file': args.yaml_file, 'global_observer_type': gd.ObserverType.SPRITE_2D, 'max_steps': 1000, diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py index 5d092055a..f6231c486 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -34,7 +34,7 @@ def _mask_and_sample(self, options, logits, is_parameters=False): mask[options] = 1 if is_parameters: - if not self._allow_nop: + if not self._allow_nop and len(options) > 1: mask[0] = 0 masked_logits = logits + torch.log(mask) @@ -44,8 +44,8 @@ def _mask_and_sample(self, options, logits, is_parameters=False): logp = dist.log_prob(sampled) out_logits = masked_logits - if not self._allow_nop and is_parameters: - assert sampled != 0 + # if not self._allow_nop and is_parameters: + # assert sampled != 0 return sampled, out_logits, logp, mask @@ -74,7 +74,11 @@ def _process_valid_action_tree(self, valid_action_tree): # In the case there are no available actions for the player if len(subtree_options) == 0: - subtree = self._full_tree + #subtree = self._full_tree + build_tree = subtree + for _ in range(self._num_action_parts): + build_tree[0] = {} + build_tree = build_tree[0] subtree_options = list(subtree.keys()) # If we want very basic action masking where parameterized masks are superimposed we use this From f4a37db0ed268f74544cc48904459a44d58ef5eb Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 29 Mar 2021 16:32:29 +0100 Subject: [PATCH 29/45] fixing unit clusters games --- .../clusters_po_with_push_separate_colors_units.yaml | 2 +- .../conditional_action_spaces/clusters_po_with_push_units.yaml | 2 +- .../torch/conditional_actions/conditional_action_exploration.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml index 1fba6490c..bb932fc35 100644 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml @@ -38,7 +38,7 @@ Environment: w . . b1 . . r1 . G g1 . . w w . . . . x . . x . . . w w . . . r1 . . g1 . . b1 . w - w . . . . b . . x . . . w + w . . . . R . . x . . . w w . . g1 . . . r1 . . b1 . w w . . x . x . . . B . . w w . . . . . . . . . . . w diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml index 4a00e3a8c..cae2d9a80 100644 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml +++ b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml @@ -38,7 +38,7 @@ Environment: w . . b1 . . r1 . G g1 . . w w . . . . x . . x . . . w w . . . r1 . . g1 . . b1 . w - w . . . . b . . x . . . w + w . . . . R . . x . . . w w . . g1 . . . r1 . . b1 . w w . . x . x . . . B . . w w . . . . . . . . . . . w diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py index f6231c486..dc8836f9c 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -26,7 +26,7 @@ def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invali self._inputs_split = dist_inputs.split(tuple(self._action_space_shape), dim=1) - self._full_tree = self._fill_node(self._action_space_shape,0) + #self._full_tree = self._fill_node(self._action_space_shape,0) def _mask_and_sample(self, options, logits, is_parameters=False): From 541ef74bbb2810c1725a82432fe9bb844921deec Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 29 Mar 2021 19:15:47 +0100 Subject: [PATCH 30/45] full tree for reset mask --- .../conditional_action_exploration.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py index dc8836f9c..164442adb 100644 --- a/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py +++ b/python/griddly/util/rllib/torch/conditional_actions/conditional_action_exploration.py @@ -26,7 +26,7 @@ def __init__(self, model, dist_inputs, valid_action_trees, explore=False, invali self._inputs_split = dist_inputs.split(tuple(self._action_space_shape), dim=1) - #self._full_tree = self._fill_node(self._action_space_shape,0) + self._full_tree = self._fill_node(self._action_space_shape,0) def _mask_and_sample(self, options, logits, is_parameters=False): @@ -34,7 +34,7 @@ def _mask_and_sample(self, options, logits, is_parameters=False): mask[options] = 1 if is_parameters: - if not self._allow_nop and len(options) > 1: + if not self._allow_nop: mask[0] = 0 masked_logits = logits + torch.log(mask) @@ -74,11 +74,11 @@ def _process_valid_action_tree(self, valid_action_tree): # In the case there are no available actions for the player if len(subtree_options) == 0: - #subtree = self._full_tree - build_tree = subtree - for _ in range(self._num_action_parts): - build_tree[0] = {} - build_tree = build_tree[0] + subtree = self._full_tree + # build_tree = subtree + # for _ in range(self._num_action_parts): + # build_tree[0] = {} + # build_tree = build_tree[0] subtree_options = list(subtree.keys()) # If we want very basic action masking where parameterized masks are superimposed we use this From f7339e9c721b38809000d507d4ac30480fd9d221 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Tue, 30 Mar 2021 20:57:29 +0100 Subject: [PATCH 31/45] changing wandb naming, removing warn lines in Grid.cpp --- .../rllib_baseline.py | 16 ++++++++++++---- .../rllib_baseline_flat.py | 19 +++++++++++++------ src/Griddly/Core/Grid.cpp | 8 ++++---- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py index 7ef20d001..d982350e0 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -11,7 +11,6 @@ from griddly import gd from griddly.util.rllib.callbacks import GriddlyCallbacks from griddly.util.rllib.environment.core import RLlibEnv -from griddly.util.rllib.torch import GAPAgent from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent # from griddly.util.rllib.callbacks import GriddlyCallbacks from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ @@ -20,6 +19,7 @@ parser = argparse.ArgumentParser(description='Run experiments') parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') +parser.add_argument('--experiment-name', default='unknown', help='name of the experiment') parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') @@ -56,7 +56,7 @@ ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) wandbLoggerCallback = WandbLoggerCallback( - project='conditional_actions', + project='conditional_action_trees', api_key_file='~/.wandb_rc', dir=args.root_directory ) @@ -107,5 +107,13 @@ "timesteps_total": max_training_steps, } - result = tune.run(ConditionalActionImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, - callbacks=[wandbLoggerCallback]) + trial_name_creator = lambda trial: f'baseline-{args.experiment_name}' + + result = tune.run( + ConditionalActionImpalaTrainer, + local_dir=args.root_directory, + config=config, + stop=stop, + callbacks=[wandbLoggerCallback], + trial_name_creator=trial_name_creator + ) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py index afda50765..b8549d9ec 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py @@ -19,11 +19,11 @@ from griddly.util.rllib.callbacks import GriddlyCallbacks from griddly.util.rllib.environment.core import RLlibEnv from griddly.util.rllib.torch.agents.common import layer_init -from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent parser = argparse.ArgumentParser(description='Run experiments') parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') +parser.add_argument('--experiment-name', default='unknown', help='name of the experiment') parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') @@ -201,9 +201,8 @@ def value_function(self): sep = os.pathsep os.environ['PYTHONPATH'] = sep.join(sys.path) - ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) + # ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) env_name = "ray-griddly-env" @@ -216,7 +215,7 @@ def _create_env(env_config): ModelCatalog.register_custom_model("SimpleConv", SimpleConvFlatAgent) wandbLoggerCallback = WandbLoggerCallback( - project='conditional_actions', + project='conditional_action_trees', api_key_file='~/.wandb_rc', dir=args.root_directory ) @@ -266,5 +265,13 @@ def _create_env(env_config): "timesteps_total": max_training_steps, } - result = tune.run(ImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, - callbacks=[wandbLoggerCallback]) + trial_name_creator = lambda trial: f'baseline-flat-{args.experiment_name}' + + result = tune.run( + ImpalaTrainer, + local_dir=args.root_directory, + config=config, + stop=stop, + callbacks=[wandbLoggerCallback], + trial_name_creator=trial_name_creator + ) diff --git a/src/Griddly/Core/Grid.cpp b/src/Griddly/Core/Grid.cpp index d482285ea..d207d101e 100644 --- a/src/Griddly/Core/Grid.cpp +++ b/src/Griddly/Core/Grid.cpp @@ -143,19 +143,19 @@ std::unordered_map Grid::executeAction(uint32_t playerId, std } if (sourceObject == nullptr) { - spdlog::warn("Cannot perform action on empty space. ({0},{1})", action->getSourceLocation()[0], action->getSourceLocation()[1]); + spdlog::debug("Cannot perform action on empty space. ({0},{1})", action->getSourceLocation()[0], action->getSourceLocation()[1]); return {}; } auto sourceObjectPlayerId = sourceObject->getPlayerId(); if (playerId != 0 && sourceObjectPlayerId != playerId) { - spdlog::warn("Cannot perform action on object not owned by player. Object owner {0}, Player owner {1}", sourceObjectPlayerId, playerId); + spdlog::debug("Cannot perform action on object not owned by player. Object owner {0}, Player owner {1}", sourceObjectPlayerId, playerId); return {}; } if (playerId != 0 && sourceObject->isPlayerAvatar() && playerAvatars_.find(playerId) == playerAvatars_.end()) { - spdlog::warn("Avatar for player {0} has been removed, action will be ignored.", playerId); + spdlog::debug("Avatar for player {0} has been removed, action will be ignored.", playerId); return {}; } @@ -176,7 +176,7 @@ std::unordered_map Grid::executeAction(uint32_t playerId, std return rewardAccumulator; } else { - spdlog::warn("Cannot perform action={0} on object={1}", action->getActionName(), sourceObject->getObjectName()); + spdlog::debug("Cannot perform action={0} on object={1}", action->getActionName(), sourceObject->getObjectName()); return {}; } } From 90f55261594e0b3e7391b44c37f8be44fce61d8d Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 1 Apr 2021 10:10:06 +0100 Subject: [PATCH 32/45] better naming for conditional action experiments --- .../conditional_action_spaces/rllib_baseline.py | 1 - .../rllib_conditional_actions.py | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py index d982350e0..d29c7647c 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py @@ -12,7 +12,6 @@ from griddly.util.rllib.callbacks import GriddlyCallbacks from griddly.util.rllib.environment.core import RLlibEnv from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent -# from griddly.util.rllib.callbacks import GriddlyCallbacks from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ ConditionalActionImpalaTrainer diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index 5f0987735..dcfcf8de6 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -19,6 +19,7 @@ parser = argparse.ArgumentParser(description='Run experiments') parser.add_argument('--yaml-file', help='YAML file containing GDY for the game') +parser.add_argument('--experiment-name', default='unknown', help='name of the experiment') parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), help='root directory for all data associated with the run') @@ -58,7 +59,7 @@ ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) wandbLoggerCallback = WandbLoggerCallback( - project='conditional_actions', + project='conditional_action_trees', api_key_file='~/.wandb_rc', dir=args.root_directory ) @@ -115,5 +116,13 @@ "timesteps_total": max_training_steps, } - result = tune.run(ConditionalActionImpalaTrainer, local_dir=args.root_directory, config=config, stop=stop, - callbacks=[wandbLoggerCallback]) + trial_name_creator = lambda trial: f'CAT-{args.experiment_name}' + + result = tune.run( + ConditionalActionImpalaTrainer, + local_dir=args.root_directory, + config=config, + stop=stop, + callbacks=[wandbLoggerCallback], + trial_name_creator=trial_name_creator + ) From e3290b072ab8c2aa2b55f7baaebc0bdd32211380 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 1 Apr 2021 12:50:08 +0100 Subject: [PATCH 33/45] add invalid_action_masking choice --- .../conditional_action_spaces/rllib_conditional_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py index dcfcf8de6..a6b07b851 100644 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py @@ -116,7 +116,7 @@ "timesteps_total": max_training_steps, } - trial_name_creator = lambda trial: f'CAT-{args.experiment_name}' + trial_name_creator = lambda trial: f'CAT-{args.experiment_name}-{trial.config["env_config"]["invalid_action_masking"]}' result = tune.run( ConditionalActionImpalaTrainer, From e030ed02a8c6b7219714668aba0ed148aa1366d0 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 11 Apr 2021 16:06:14 +0100 Subject: [PATCH 34/45] removing code from here and putting in a submodule so its easy to digest --- .../clusters_po.yaml | 310 ------------- .../clusters_po_with_push.yaml | 330 -------------- ...clusters_po_with_push_separate_colors.yaml | 409 ------------------ ...rs_po_with_push_separate_colors_units.yaml | 304 ------------- .../clusters_po_with_push_units.yaml | 258 ----------- .../rllib_baseline.py | 118 ----- .../rllib_baseline_flat.py | 277 ------------ .../rllib_conditional_actions.py | 128 ------ python/requirements.txt | 3 +- 9 files changed, 2 insertions(+), 2135 deletions(-) delete mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po.yaml delete mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml delete mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors.yaml delete mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml delete mode 100644 python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml delete mode 100644 python/examples/experiments/conditional_action_spaces/rllib_baseline.py delete mode 100644 python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py delete mode 100644 python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po.yaml deleted file mode 100644 index 3d8b70722..000000000 --- a/python/examples/experiments/conditional_action_spaces/clusters_po.yaml +++ /dev/null @@ -1,310 +0,0 @@ -Version: "0.1" -Environment: - Name: Partially Observable Clusters - Description: Cluster the coloured objects together by pushing them against the static coloured blocks. - Observers: - Sprite2D: - TileSize: 24 - BackgroundTile: oryx/oryx_fantasy/floor1-2.png - Variables: - - Name: box_count - InitialValue: 0 - Player: - Observer: - RotateWithAvatar: true - TrackAvatar: true - Height: 5 - Width: 5 - OffsetX: 0 - OffsetY: 2 - AvatarObject: avatar # The player can only control a single avatar in the game - Termination: - Win: - - eq: [box_count, 0] - Lose: - - eq: [broken_box:count, 1] - - eq: [avatar:count, 0] - Levels: - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 1 . . . 2 . 2 . w - w . . . . 1 . . . . . . w - w . . . a . . . . . 2 . w - w . . . . . . . h . . . w - w . . . . 1 . . . . b . w - w . . . . . . 1 . . . . w - w . . . . . . . . A . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 . . 2 . c 3 . . w - w . . . . h . . h . . . w - w . . . 2 . . 3 . . 1 . w - w . . . . b . . h . . . w - w . . 3 . . . 2 . . 1 . w - w . . h . h . . . a . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . a . . b . . c . . w - w . . . . . . . . . . . w - w . . . . . . . . . . . w - w h h h h h . h h h h h w - w . . . . h . h . . . . w - w . 1 2 . h . h . 1 3 . w - w . 3 . . . . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . 1 . 2 . . c . . w - w . . . . . 3 . . 3 . . w - w . . a . 2 . . . h . . w - w . . . . h h . 3 . . . w - w . . 1 . . . . . 2 . . w - w . . . . . 1 . . b . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . . . . 1 . . . . w - w . . h . . b . . h . . w - w . . . . 1 . . . . . . w - w . . 3 . . . . 2 . . . w - w . . . a . h . . c . . w - w . . . . 3 . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - -Actions: - - # A simple action to count the number of boxes in the game at the start - # Not currently a way to do complex things in termination conditions like combine multiple conditions - - Name: box_counter - InputMapping: - Internal: true - Inputs: - 1: - Description: "The only action here is to increment the box count" - Behaviours: - - Src: - Object: [blue_box, red_box, green_box] - Commands: - - incr: box_count - Dst: - Object: [blue_box, red_box, green_box] - - # Define the move action - - Name: move - InputMapping: - Inputs: - 1: - Description: Rotate left - OrientationVector: [-1, 0] - 2: - Description: Move forwards - OrientationVector: [0, -1] - VectorToDest: [0, -1] - 3: - Description: Rotate right - OrientationVector: [1, 0] - Relative: true - Behaviours: - - # Avatar rotates - - Src: - Object: avatar - Commands: - - rot: _dir - Dst: - Object: avatar - - # Avatar and boxes can move into empty space - - Src: - Object: [avatar, blue_box, green_box, red_box] - Commands: - - mov: _dest - Dst: - Object: _empty - - # Boxes can be pushed by the avatar - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: [blue_box, green_box, red_box] - Commands: - - cascade: _dest - - # When boxes are pushed against the blocks they change - - Src: - Object: blue_box - Commands: - - change_to: blue_block - - reward: 1 - - decr: box_count - Dst: - Object: blue_block - - Src: - Object: red_box - Commands: - - reward: 1 - - change_to: red_block - - decr: box_count - Dst: - Object: red_block - - Src: - Object: green_box - Commands: - - reward: 1 - - change_to: green_block - - decr: box_count - Dst: - Object: green_block - - # Boxes break if they hit the spikes - - Src: - Object: [blue_box, green_box, red_box] - Commands: - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Avatar dies if it hits the spikes - - Src: - Object: avatar - Commands: - - remove: true - - reward: -1 - Dst: - Object: spike - -Objects: - - Name: avatar - MapCharacter: A - Observers: - Sprite2D: - - Image: gvgai/oryx/knight1.png - Block2D: - - Shape: triangle - Color: [0.0, 1.0, 0.0] - Scale: 0.8 - - - Name: wall - MapCharacter: w - Observers: - Sprite2D: - - TilingMode: WALL_16 - Image: - - oryx/oryx_fantasy/wall1-0.png - - oryx/oryx_fantasy/wall1-1.png - - oryx/oryx_fantasy/wall1-2.png - - oryx/oryx_fantasy/wall1-3.png - - oryx/oryx_fantasy/wall1-4.png - - oryx/oryx_fantasy/wall1-5.png - - oryx/oryx_fantasy/wall1-6.png - - oryx/oryx_fantasy/wall1-7.png - - oryx/oryx_fantasy/wall1-8.png - - oryx/oryx_fantasy/wall1-9.png - - oryx/oryx_fantasy/wall1-10.png - - oryx/oryx_fantasy/wall1-11.png - - oryx/oryx_fantasy/wall1-12.png - - oryx/oryx_fantasy/wall1-13.png - - oryx/oryx_fantasy/wall1-14.png - - oryx/oryx_fantasy/wall1-15.png - Block2D: - - Shape: square - Color: [0.5, 0.5, 0.5] - Scale: 0.9 - - - Name: spike - MapCharacter: h - Observers: - Sprite2D: - - Image: gvgai/oryx/spike2.png - Block2D: - - Shape: triangle - Color: [0.9, 0.1, 0.1] - Scale: 0.5 - - - Name: red_box - MapCharacter: "2" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockR.png - Block2D: - - Shape: square - Color: [0.5, 0.2, 0.2] - Scale: 0.5 - - Name: red_block - MapCharacter: b - Observers: - Sprite2D: - - Image: gvgai/newset/blockR2.png - Block2D: - - Shape: square - Color: [1.0, 0.0, 0.0] - Scale: 1.0 - - - Name: green_box - MapCharacter: "3" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockG.png - Block2D: - - Shape: square - Color: [0.2, 0.5, 0.2] - Scale: 0.5 - - Name: green_block - MapCharacter: c - Observers: - Sprite2D: - - Image: gvgai/newset/blockG2.png - Block2D: - - Shape: square - Color: [0.0, 1.0, 0.0] - Scale: 1.0 - - - Name: blue_box - MapCharacter: "1" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockB.png - Block2D: - - Shape: square - Color: [0.2, 0.2, 0.5] - Scale: 0.5 - - Name: blue_block - MapCharacter: a - Observers: - Sprite2D: - - Image: gvgai/newset/blockB2.png - Block2D: - - Shape: square - Color: [0.0, 0.0, 1.0] - Scale: 1.0 - - - Name: broken_box - Observers: - Sprite2D: - - Image: gvgai/newset/block3.png - Block2D: - - Shape: triangle - Color: [1.0, 0.0, 1.0] - Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml deleted file mode 100644 index 9904e87e7..000000000 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push.yaml +++ /dev/null @@ -1,330 +0,0 @@ -Version: "0.1" -Environment: - Name: Partially Observable Clusters - Description: Cluster the coloured objects together by pushing them against the static coloured blocks. - Observers: - Sprite2D: - TileSize: 24 - BackgroundTile: oryx/oryx_fantasy/floor1-2.png - Variables: - - Name: box_count - InitialValue: 0 - Player: - Observer: - RotateWithAvatar: true - TrackAvatar: true - Height: 5 - Width: 5 - OffsetX: 0 - OffsetY: 2 - AvatarObject: avatar # The player can only control a single avatar in the game - Termination: - Win: - - eq: [box_count, 0] - Lose: - - eq: [broken_box:count, 1] - - eq: [avatar:count, 0] - Levels: - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 1 . . . 2 . 2 . w - w . . . . 1 . . . . . . w - w . . . a . . . . . 2 . w - w . . . . . . . h . . . w - w . . . . 1 . . . . b . w - w . . . . . . 1 . . . . w - w . . . . . . . . A . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 . . 2 . c 3 . . w - w . . . . h . . h . . . w - w . . . 2 . . 3 . . 1 . w - w . . . . b . . h . . . w - w . . 3 . . . 2 . . 1 . w - w . . h . h . . . a . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . a . . b . . c . . w - w . . . . . . . . . . . w - w . . . . . . . . . . . w - w h h h h h . h h h h h w - w . . . . h . h . . . . w - w . 1 2 . h . h . 1 3 . w - w . 3 . . . . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . 1 . 2 . . c . . w - w . . . . . 3 . . 3 . . w - w . . a . 2 . . . h . . w - w . . . . h h . 3 . . . w - w . . 1 . . . . . 2 . . w - w . . . . . 1 . . b . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . . . . 1 . . . . w - w . . h . . b . . h . . w - w . . . . 1 . . . . . . w - w . . 3 . . . . 2 . . . w - w . . . a . h . . c . . w - w . . . . 3 . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - -Actions: - - # A simple action to count the number of boxes in the game at the start - # Not currently a way to do complex things in termination conditions like combine multiple conditions - - Name: box_counter - InputMapping: - Internal: true - Inputs: - 1: - Description: "The only action here is to increment the box count" - Behaviours: - - Src: - Object: [blue_box, red_box, green_box] - Commands: - - incr: box_count - Dst: - Object: [blue_box, red_box, green_box] - - # Define the move action - - Name: move - InputMapping: - Inputs: - 1: - Description: Rotate left - OrientationVector: [-1, 0] - 2: - Description: Move forwards - OrientationVector: [0, -1] - VectorToDest: [0, -1] - 3: - Description: Rotate right - OrientationVector: [1, 0] - Relative: true - Behaviours: - - # Avatar rotates - - Src: - Object: avatar - Commands: - - rot: _dir - Dst: - Object: avatar - - # Avatar can move into empty space - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: _empty - - # Avatar dies if it hits the spikes - - Src: - Object: avatar - Commands: - - remove: true - - reward: -1 - Dst: - Object: spike - - - - Name: push - InputMapping: - Inputs: - 1: - Description: Push Forwards - OrientationVector: [ 0, -1 ] - VectorToDest: [ 0, -1 ] - Relative: true - Behaviours: - - # Boxes can be pushed by the avatar - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: [blue_box, green_box, red_box] - Commands: - - cascade: _dest - - # Boxes break if they hit the spikes - - Src: - Object: [ blue_box, green_box, red_box ] - Commands: - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: [blue_box, green_box, red_box] - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: blue_box - Commands: - - change_to: blue_block - - reward: 1 - - decr: box_count - Dst: - Object: blue_block - - Src: - Object: red_box - Commands: - - reward: 1 - - change_to: red_block - - decr: box_count - Dst: - Object: red_block - - Src: - Object: green_box - Commands: - - reward: 1 - - change_to: green_block - - decr: box_count - Dst: - Object: green_block - - -Objects: - - Name: avatar - MapCharacter: A - Observers: - Sprite2D: - - Image: gvgai/oryx/knight1.png - Block2D: - - Shape: triangle - Color: [0.0, 1.0, 0.0] - Scale: 0.8 - - - Name: wall - MapCharacter: w - Observers: - Sprite2D: - - TilingMode: WALL_16 - Image: - - oryx/oryx_fantasy/wall1-0.png - - oryx/oryx_fantasy/wall1-1.png - - oryx/oryx_fantasy/wall1-2.png - - oryx/oryx_fantasy/wall1-3.png - - oryx/oryx_fantasy/wall1-4.png - - oryx/oryx_fantasy/wall1-5.png - - oryx/oryx_fantasy/wall1-6.png - - oryx/oryx_fantasy/wall1-7.png - - oryx/oryx_fantasy/wall1-8.png - - oryx/oryx_fantasy/wall1-9.png - - oryx/oryx_fantasy/wall1-10.png - - oryx/oryx_fantasy/wall1-11.png - - oryx/oryx_fantasy/wall1-12.png - - oryx/oryx_fantasy/wall1-13.png - - oryx/oryx_fantasy/wall1-14.png - - oryx/oryx_fantasy/wall1-15.png - Block2D: - - Shape: square - Color: [0.5, 0.5, 0.5] - Scale: 0.9 - - - Name: spike - MapCharacter: h - Observers: - Sprite2D: - - Image: gvgai/oryx/spike2.png - Block2D: - - Shape: triangle - Color: [0.9, 0.1, 0.1] - Scale: 0.5 - - - Name: red_box - MapCharacter: "2" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockR.png - Block2D: - - Shape: square - Color: [0.5, 0.2, 0.2] - Scale: 0.5 - - Name: red_block - MapCharacter: b - Observers: - Sprite2D: - - Image: gvgai/newset/blockR2.png - Block2D: - - Shape: square - Color: [1.0, 0.0, 0.0] - Scale: 1.0 - - - Name: green_box - MapCharacter: "3" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockG.png - Block2D: - - Shape: square - Color: [0.2, 0.5, 0.2] - Scale: 0.5 - - Name: green_block - MapCharacter: c - Observers: - Sprite2D: - - Image: gvgai/newset/blockG2.png - Block2D: - - Shape: square - Color: [0.0, 1.0, 0.0] - Scale: 1.0 - - - Name: blue_box - MapCharacter: "1" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockB.png - Block2D: - - Shape: square - Color: [0.2, 0.2, 0.5] - Scale: 0.5 - - Name: blue_block - MapCharacter: a - Observers: - Sprite2D: - - Image: gvgai/newset/blockB2.png - Block2D: - - Shape: square - Color: [0.0, 0.0, 1.0] - Scale: 1.0 - - - Name: broken_box - Observers: - Sprite2D: - - Image: gvgai/newset/block3.png - Block2D: - - Shape: triangle - Color: [1.0, 0.0, 1.0] - Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors.yaml deleted file mode 100644 index bb173e3bc..000000000 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors.yaml +++ /dev/null @@ -1,409 +0,0 @@ -Version: "0.1" -Environment: - Name: Partially Observable Clusters - Description: Cluster the coloured objects together by pushing them against the static coloured blocks. - Observers: - Sprite2D: - TileSize: 24 - BackgroundTile: oryx/oryx_fantasy/floor1-2.png - Variables: - - Name: box_count - InitialValue: 0 - Player: - Observer: - RotateWithAvatar: true - TrackAvatar: true - Height: 5 - Width: 5 - OffsetX: 0 - OffsetY: 2 - AvatarObject: avatar # The player can only control a single avatar in the game - Termination: - Win: - - eq: [box_count, 0] - Lose: - - eq: [broken_box:count, 1] - - eq: [avatar:count, 0] - Levels: - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 1 . . . 2 . 2 . w - w . . . . 1 . . . . . . w - w . . . a . . . . . 2 . w - w . . . . . . . h . . . w - w . . . . 1 . . . . b . w - w . . . . . . 1 . . . . w - w . . . . . . . . A . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . 1 . . 2 . c 3 . . w - w . . . . h . . h . . . w - w . . . 2 . . 3 . . 1 . w - w . . . . b . . h . . . w - w . . 3 . . . 2 . . 1 . w - w . . h . h . . . a . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . a . . b . . c . . w - w . . . . . . . . . . . w - w . . . . . . . . . . . w - w h h h h h . h h h h h w - w . . . . h . h . . . . w - w . 1 2 . h . h . 1 3 . w - w . 3 . . . . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . 1 . 2 . . c . . w - w . . . . . 3 . . 3 . . w - w . . a . 2 . . . h . . w - w . . . . h h . 3 . . . w - w . . 1 . . . . . 2 . . w - w . . . . . 1 . . b . . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . . . . 1 . . . . w - w . . h . . b . . h . . w - w . . . . 1 . . . . . . w - w . . 3 . . . . 2 . . . w - w . . . a . h . . c . . w - w . . . . 3 . . . . 2 . w - w . . . . . A . . . . . w - w w w w w w w w w w w w w - -Actions: - - # A simple action to count the number of boxes in the game at the start - # Not currently a way to do complex things in termination conditions like combine multiple conditions - - Name: box_counter - InputMapping: - Internal: true - Inputs: - 1: - Description: "The only action here is to increment the box count" - Behaviours: - - Src: - Object: [blue_box, red_box, green_box] - Commands: - - incr: box_count - Dst: - Object: [blue_box, red_box, green_box] - - # Define the move action - - Name: move - InputMapping: - Inputs: - 1: - Description: Rotate left - OrientationVector: [-1, 0] - 2: - Description: Move forwards - OrientationVector: [0, -1] - VectorToDest: [0, -1] - 3: - Description: Rotate right - OrientationVector: [1, 0] - Relative: true - Behaviours: - - # Avatar rotates - - Src: - Object: avatar - Commands: - - rot: _dir - Dst: - Object: avatar - - # Avatar can move into empty space - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: _empty - - - # Avatar dies if it hits the spikes - - Src: - Object: avatar - Commands: - - remove: true - - reward: -1 - Dst: - Object: spike - - - - Name: push_blue - InputMapping: - Inputs: - 1: - Description: Push Blue - OrientationVector: [ 0, -1 ] - VectorToDest: [ 0, -1 ] - Relative: true - Behaviours: - - # Boxes can be pushed by the avatar - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: blue_box - Commands: - - cascade: _dest - - # Boxes break if they are pushed into the spikes - - Src: - Object: blue_box - Commands: - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: blue_box - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: blue_box - Commands: - - change_to: blue_block - - reward: 1 - - decr: box_count - Dst: - Object: blue_block - - - Name: push_red - InputMapping: - Inputs: - 1: - Description: Push Red - OrientationVector: [ 0, -1 ] - VectorToDest: [ 0, -1 ] - Relative: true - Behaviours: - - # Boxes can be pushed by the avatar - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: red_box - Commands: - - cascade: _dest - - # Boxes break if they are pushed into the spikes - - Src: - Object: red_box - Commands: - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: red_box - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: red_box - Commands: - - reward: 1 - - change_to: red_block - - decr: box_count - Dst: - Object: red_block - - - Name: push_green - InputMapping: - Inputs: - 1: - Description: Push Green - OrientationVector: [ 0, -1 ] - VectorToDest: [ 0, -1 ] - Relative: true - Behaviours: - - # Boxes can be pushed by the avatar - - Src: - Object: avatar - Commands: - - mov: _dest - Dst: - Object: green_box - Commands: - - cascade: _dest - - # Boxes break if they are pushed into the spikes - - Src: - Object: green_box - Commands: - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: green_box - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: green_box - Commands: - - reward: 1 - - change_to: green_block - - decr: box_count - Dst: - Object: green_block - - -Objects: - - Name: avatar - MapCharacter: A - Observers: - Sprite2D: - - Image: gvgai/oryx/knight1.png - Block2D: - - Shape: triangle - Color: [0.0, 1.0, 0.0] - Scale: 0.8 - - - Name: wall - MapCharacter: w - Observers: - Sprite2D: - - TilingMode: WALL_16 - Image: - - oryx/oryx_fantasy/wall1-0.png - - oryx/oryx_fantasy/wall1-1.png - - oryx/oryx_fantasy/wall1-2.png - - oryx/oryx_fantasy/wall1-3.png - - oryx/oryx_fantasy/wall1-4.png - - oryx/oryx_fantasy/wall1-5.png - - oryx/oryx_fantasy/wall1-6.png - - oryx/oryx_fantasy/wall1-7.png - - oryx/oryx_fantasy/wall1-8.png - - oryx/oryx_fantasy/wall1-9.png - - oryx/oryx_fantasy/wall1-10.png - - oryx/oryx_fantasy/wall1-11.png - - oryx/oryx_fantasy/wall1-12.png - - oryx/oryx_fantasy/wall1-13.png - - oryx/oryx_fantasy/wall1-14.png - - oryx/oryx_fantasy/wall1-15.png - Block2D: - - Shape: square - Color: [0.5, 0.5, 0.5] - Scale: 0.9 - - - Name: spike - MapCharacter: h - Observers: - Sprite2D: - - Image: gvgai/oryx/spike2.png - Block2D: - - Shape: triangle - Color: [0.9, 0.1, 0.1] - Scale: 0.5 - - - Name: red_box - MapCharacter: "2" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockR.png - Block2D: - - Shape: square - Color: [0.5, 0.2, 0.2] - Scale: 0.5 - - Name: red_block - MapCharacter: b - Observers: - Sprite2D: - - Image: gvgai/newset/blockR2.png - Block2D: - - Shape: square - Color: [1.0, 0.0, 0.0] - Scale: 1.0 - - - Name: green_box - MapCharacter: "3" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockG.png - Block2D: - - Shape: square - Color: [0.2, 0.5, 0.2] - Scale: 0.5 - - Name: green_block - MapCharacter: c - Observers: - Sprite2D: - - Image: gvgai/newset/blockG2.png - Block2D: - - Shape: square - Color: [0.0, 1.0, 0.0] - Scale: 1.0 - - - Name: blue_box - MapCharacter: "1" - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockB.png - Block2D: - - Shape: square - Color: [0.2, 0.2, 0.5] - Scale: 0.5 - - Name: blue_block - MapCharacter: a - Observers: - Sprite2D: - - Image: gvgai/newset/blockB2.png - Block2D: - - Shape: square - Color: [0.0, 0.0, 1.0] - Scale: 1.0 - - - Name: broken_box - Observers: - Sprite2D: - - Image: gvgai/newset/block3.png - Block2D: - - Shape: triangle - Color: [1.0, 0.0, 1.0] - Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml deleted file mode 100644 index bb932fc35..000000000 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_separate_colors_units.yaml +++ /dev/null @@ -1,304 +0,0 @@ -Version: "0.1" -Environment: - Name: Partially Observable Clusters - Description: Cluster the coloured objects together by pushing them against the static coloured blocks. - Observers: - Sprite2D: - TileSize: 24 - BackgroundTile: oryx/oryx_fantasy/floor1-2.png - Variables: - - Name: box_count - InitialValue: 0 - PerPlayer: true - - Name: broken_boxes - InitialValue: 0 - PerPlayer: true - Player: - Count: 1 - Termination: - Win: - - eq: [ box_count, 0 ] - Lose: - - eq: [ broken_boxes, 1 ] - Levels: - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . b1 b1 . . . r1 . r1 . w - w . . . . b1 . . . . . . w - w . . . B . . . . . r1 . w - w . . . . . . . x . . . w - w . . . . b1 . . . . R . w - w . . . . . . b1 . . . . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . b1 . . r1 . G g1 . . w - w . . . . x . . x . . . w - w . . . r1 . . g1 . . b1 . w - w . . . . R . . x . . . w - w . . g1 . . . r1 . . b1 . w - w . . x . x . . . B . . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . B . . R . . G . . w - w . . . . . . . . . . . w - w . . . . . . . . . . . w - w x x x x x . x x x x x w - w . . . . x . x . . . . w - w . b1 r1 . x . x . b1 g1 . w - w . g1 . . . . . . . r1 . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . b1 . r1 . . G . . w - w . . . . . g1 . . g1 . . w - w . . B . r1 . . . x . . w - w . . . . x x . g1 . . . w - w . . b1 . . . . . r1 . . w - w . . . . . b1 . . R . . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . . . . b1 . . . . w - w . . x . . R . . x . . w - w . . . . b1 . . . . . . w - w . . g1 . . . . r1 . . . w - w . . . B . x . . G . . w - w . . . . g1 . . . . r1 . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - -Actions: - - # A simple action to count the number of boxes in the game at the start - # Not currently a way to do complex things in termination conditions like combine multiple conditions - - Name: box_counter - InputMapping: - Internal: true - Inputs: - 1: - Description: "The only action here is to increment the box count" - Behaviours: - - Src: - Object: [ blue_box, red_box, green_box ] - Commands: - - incr: box_count - Dst: - Object: [ blue_box, red_box, green_box ] - - - Name: push_blue - Behaviours: - - # Boxes break if they are pushed into the spikes - - Src: - Object: blue_box - Commands: - - incr: broken_boxes - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: blue_box - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: blue_box - Commands: - - change_to: blue_block - - reward: 1 - - decr: box_count - Dst: - Object: blue_block - - - Name: push_red - Behaviours: - - # Boxes break if they are pushed into the spikes - - Src: - Object: red_box - Commands: - - incr: broken_boxes - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: red_box - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: red_box - Commands: - - reward: 1 - - change_to: red_block - - decr: box_count - Dst: - Object: red_block - - - Name: push_green - Behaviours: - - # Boxes break if they are pushed into the spikes - - Src: - Object: green_box - Commands: - - incr: broken_boxes - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: green_box - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: green_box - Commands: - - reward: 1 - - change_to: green_block - - decr: box_count - Dst: - Object: green_block - - -Objects: - - - Name: wall - MapCharacter: w - Observers: - Sprite2D: - - TilingMode: WALL_16 - Image: - - oryx/oryx_fantasy/wall1-0.png - - oryx/oryx_fantasy/wall1-1.png - - oryx/oryx_fantasy/wall1-2.png - - oryx/oryx_fantasy/wall1-3.png - - oryx/oryx_fantasy/wall1-4.png - - oryx/oryx_fantasy/wall1-5.png - - oryx/oryx_fantasy/wall1-6.png - - oryx/oryx_fantasy/wall1-7.png - - oryx/oryx_fantasy/wall1-8.png - - oryx/oryx_fantasy/wall1-9.png - - oryx/oryx_fantasy/wall1-10.png - - oryx/oryx_fantasy/wall1-11.png - - oryx/oryx_fantasy/wall1-12.png - - oryx/oryx_fantasy/wall1-13.png - - oryx/oryx_fantasy/wall1-14.png - - oryx/oryx_fantasy/wall1-15.png - Block2D: - - Shape: square - Color: [ 0.5, 0.5, 0.5 ] - Scale: 0.9 - - - Name: spike - MapCharacter: x - Observers: - Sprite2D: - - Image: gvgai/oryx/spike2.png - Block2D: - - Shape: triangle - Color: [ 0.9, 0.1, 0.1 ] - Scale: 0.5 - - - Name: red_box - MapCharacter: r - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockR.png - Block2D: - - Shape: square - Color: [ 0.5, 0.2, 0.2 ] - Scale: 0.5 - - Name: red_block - MapCharacter: R - Observers: - Sprite2D: - - Image: gvgai/newset/blockR2.png - Block2D: - - Shape: square - Color: [ 1.0, 0.0, 0.0 ] - Scale: 1.0 - - - Name: green_box - MapCharacter: g - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockG.png - Block2D: - - Shape: square - Color: [ 0.2, 0.5, 0.2 ] - Scale: 0.5 - - Name: green_block - MapCharacter: G - Observers: - Sprite2D: - - Image: gvgai/newset/blockG2.png - Block2D: - - Shape: square - Color: [ 0.0, 1.0, 0.0 ] - Scale: 1.0 - - - Name: blue_box - MapCharacter: b - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockB.png - Block2D: - - Shape: square - Color: [ 0.2, 0.2, 0.5 ] - Scale: 0.5 - - Name: blue_block - MapCharacter: B - Observers: - Sprite2D: - - Image: gvgai/newset/blockB2.png - Block2D: - - Shape: square - Color: [ 0.0, 0.0, 1.0 ] - Scale: 1.0 - - - Name: broken_box - Observers: - Sprite2D: - - Image: gvgai/newset/block3.png - Block2D: - - Shape: triangle - Color: [ 1.0, 0.0, 1.0 ] - Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml b/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml deleted file mode 100644 index cae2d9a80..000000000 --- a/python/examples/experiments/conditional_action_spaces/clusters_po_with_push_units.yaml +++ /dev/null @@ -1,258 +0,0 @@ -Version: "0.1" -Environment: - Name: Partially Observable Clusters - Description: Cluster the coloured objects together by pushing them against the static coloured blocks. - Observers: - Sprite2D: - TileSize: 24 - BackgroundTile: oryx/oryx_fantasy/floor1-2.png - Variables: - - Name: box_count - InitialValue: 0 - PerPlayer: true - - Name: broken_boxes - InitialValue: 0 - PerPlayer: true - Player: - Count: 1 - Termination: - Win: - - eq: [ box_count, 0 ] - Lose: - - eq: [ broken_boxes, 1 ] - Levels: - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . b1 b1 . . . r1 . r1 . w - w . . . . b1 . . . . . . w - w . . . B . . . . . r1 . w - w . . . . . . . x . . . w - w . . . . b1 . . . . R . w - w . . . . . . b1 . . . . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . b1 . . r1 . G g1 . . w - w . . . . x . . x . . . w - w . . . r1 . . g1 . . b1 . w - w . . . . R . . x . . . w - w . . g1 . . . r1 . . b1 . w - w . . x . x . . . B . . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . B . . R . . G . . w - w . . . . . . . . . . . w - w . . . . . . . . . . . w - w x x x x x . x x x x x w - w . . . . x . x . . . . w - w . b1 r1 . x . x . b1 g1 . w - w . g1 . . . . . . . r1 . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . b1 . r1 . . G . . w - w . . . . . g1 . . g1 . . w - w . . B . r1 . . . x . . w - w . . . . x x . g1 . . . w - w . . b1 . . . . . r1 . . w - w . . . . . b1 . . R . . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - - | - w w w w w w w w w w w w w - w . . . . . . . . . . . w - w . . . . . . b1 . . . . w - w . . x . . R . . x . . w - w . . . . b1 . . . . . . w - w . . g1 . . . . r1 . . . w - w . . . B . x . . G . . w - w . . . . g1 . . . . r1 . w - w . . . . . . . . . . . w - w w w w w w w w w w w w w - -Actions: - - # A simple action to count the number of boxes in the game at the start - # Not currently a way to do complex things in termination conditions like combine multiple conditions - - Name: box_counter - InputMapping: - Internal: true - Inputs: - 1: - Description: "The only action here is to increment the box count" - Behaviours: - - Src: - Object: [ blue_box, red_box, green_box ] - Commands: - - incr: box_count - Dst: - Object: [ blue_box, red_box, green_box ] - - - Name: push - Behaviours: - - # Boxes break if they hit the spikes - - Src: - Object: [ blue_box, green_box, red_box ] - Commands: - - incr: broken_boxes - - change_to: broken_box - - reward: -1 - Dst: - Object: spike - - # Boxes can pushed into empty space - - Src: - Object: [ blue_box, green_box, red_box ] - Commands: - - mov: _dest - Dst: - Object: _empty - - # When boxes are pushed against the blocks they change - - Src: - Object: blue_box - Commands: - - change_to: blue_block - - reward: 1 - - decr: box_count - Dst: - Object: blue_block - - Src: - Object: red_box - Commands: - - reward: 1 - - change_to: red_block - - decr: box_count - Dst: - Object: red_block - - Src: - Object: green_box - Commands: - - reward: 1 - - change_to: green_block - - decr: box_count - Dst: - Object: green_block - - -Objects: - - - Name: wall - MapCharacter: w - Observers: - Sprite2D: - - TilingMode: WALL_16 - Image: - - oryx/oryx_fantasy/wall1-0.png - - oryx/oryx_fantasy/wall1-1.png - - oryx/oryx_fantasy/wall1-2.png - - oryx/oryx_fantasy/wall1-3.png - - oryx/oryx_fantasy/wall1-4.png - - oryx/oryx_fantasy/wall1-5.png - - oryx/oryx_fantasy/wall1-6.png - - oryx/oryx_fantasy/wall1-7.png - - oryx/oryx_fantasy/wall1-8.png - - oryx/oryx_fantasy/wall1-9.png - - oryx/oryx_fantasy/wall1-10.png - - oryx/oryx_fantasy/wall1-11.png - - oryx/oryx_fantasy/wall1-12.png - - oryx/oryx_fantasy/wall1-13.png - - oryx/oryx_fantasy/wall1-14.png - - oryx/oryx_fantasy/wall1-15.png - Block2D: - - Shape: square - Color: [ 0.5, 0.5, 0.5 ] - Scale: 0.9 - - - Name: spike - MapCharacter: x - Observers: - Sprite2D: - - Image: gvgai/oryx/spike2.png - Block2D: - - Shape: triangle - Color: [ 0.9, 0.1, 0.1 ] - Scale: 0.5 - - - Name: red_box - MapCharacter: r - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockR.png - Block2D: - - Shape: square - Color: [ 0.5, 0.2, 0.2 ] - Scale: 0.5 - - Name: red_block - MapCharacter: R - Observers: - Sprite2D: - - Image: gvgai/newset/blockR2.png - Block2D: - - Shape: square - Color: [ 1.0, 0.0, 0.0 ] - Scale: 1.0 - - - Name: green_box - MapCharacter: g - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockG.png - Block2D: - - Shape: square - Color: [ 0.2, 0.5, 0.2 ] - Scale: 0.5 - - Name: green_block - MapCharacter: G - Observers: - Sprite2D: - - Image: gvgai/newset/blockG2.png - Block2D: - - Shape: square - Color: [ 0.0, 1.0, 0.0 ] - Scale: 1.0 - - - Name: blue_box - MapCharacter: b - InitialActions: - - Action: box_counter - ActionId: 1 - Observers: - Sprite2D: - - Image: gvgai/newset/blockB.png - Block2D: - - Shape: square - Color: [ 0.2, 0.2, 0.5 ] - Scale: 0.5 - - Name: blue_block - MapCharacter: B - Observers: - Sprite2D: - - Image: gvgai/newset/blockB2.png - Block2D: - - Shape: square - Color: [ 0.0, 0.0, 1.0 ] - Scale: 1.0 - - - Name: broken_box - Observers: - Sprite2D: - - Image: gvgai/newset/block3.png - Block2D: - - Shape: triangle - Color: [ 1.0, 0.0, 1.0 ] - Scale: 1.0 diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline.py deleted file mode 100644 index d29c7647c..000000000 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline.py +++ /dev/null @@ -1,118 +0,0 @@ -import argparse -import os -import sys - -import ray -from ray import tune -from ray.rllib.models import ModelCatalog -from ray.tune.integration.wandb import WandbLoggerCallback -from ray.tune.registry import register_env - -from griddly import gd -from griddly.util.rllib.callbacks import GriddlyCallbacks -from griddly.util.rllib.environment.core import RLlibEnv -from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent -from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ - ConditionalActionImpalaTrainer - -parser = argparse.ArgumentParser(description='Run experiments') - -parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') -parser.add_argument('--experiment-name', default='unknown', help='name of the experiment') - -parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), - help='root directory for all data associated with the run') -parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available to ray.') -parser.add_argument('--num-cpus', default=8, type=int, help='Number of CPUs to make available to ray.') - -parser.add_argument('--num-workers', default=7, type=int, help='Number of workers') -parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') -parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') -parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') -parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') - -parser.add_argument('--capture-video', action='store_true', help='enable video capture') -parser.add_argument('--video-directory', default='videos', help='directory of video') -parser.add_argument('--video-frequency', type=int, default=1000000, help='Frequency of videos') - -parser.add_argument('--seed', type=int, default=69420, help='seed for experiments') - -parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') - -if __name__ == '__main__': - - args = parser.parse_args() - - sep = os.pathsep - os.environ['PYTHONPATH'] = sep.join(sys.path) - - ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) - - env_name = "ray-griddly-env" - - register_env(env_name, RLlibEnv) - ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) - - wandbLoggerCallback = WandbLoggerCallback( - project='conditional_action_trees', - api_key_file='~/.wandb_rc', - dir=args.root_directory - ) - - max_training_steps = args.max_training_steps - - - config = { - 'framework': 'torch', - 'seed': args.seed, - 'num_workers': args.num_workers, - 'num_envs_per_worker': args.num_envs_per_worker, - 'num_gpus_per_worker': float(args.num_gpus_per_worker), - 'num_cpus_per_worker': args.num_cpus_per_worker, - - 'callbacks': GriddlyCallbacks, - - 'model': { - 'custom_model': 'SimpleConv', - 'custom_model_config': {} - }, - 'env': env_name, - 'env_config': { - 'generate_valid_action_trees': False, - 'random_level_on_reset': True, - 'yaml_file': args.yaml_file, - 'global_observer_type': gd.ObserverType.SPRITE_2D, - 'max_steps': 1000, - }, - 'entropy_coeff_schedule': [ - [0, 0.01], - [max_training_steps, 0.0] - ], - 'lr_schedule': [ - [0, args.lr], - [max_training_steps, 0.0] - ], - - } - if args.capture_video: - real_video_frequency = int(args.video_frequency / (args.num_envs_per_worker * args.num_workers)) - config['env_config']['record_video_config'] = { - 'frequency': real_video_frequency, - 'directory': os.path.join(args.root_directory, args.video_directory) - } - - stop = { - "timesteps_total": max_training_steps, - } - - trial_name_creator = lambda trial: f'baseline-{args.experiment_name}' - - result = tune.run( - ConditionalActionImpalaTrainer, - local_dir=args.root_directory, - config=config, - stop=stop, - callbacks=[wandbLoggerCallback], - trial_name_creator=trial_name_creator - ) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py b/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py deleted file mode 100644 index b8549d9ec..000000000 --- a/python/examples/experiments/conditional_action_spaces/rllib_baseline_flat.py +++ /dev/null @@ -1,277 +0,0 @@ -import argparse -import os -import sys - -import gym -import numpy as np -import ray -import torch -from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 -from torch import nn -from gym.spaces import MultiDiscrete, Dict, Box -from ray import tune -from ray.rllib.agents.impala import ImpalaTrainer -from ray.rllib.models import ModelCatalog -from ray.tune.integration.wandb import WandbLoggerCallback -from ray.tune.registry import register_env - -from griddly import gd -from griddly.util.rllib.callbacks import GriddlyCallbacks -from griddly.util.rllib.environment.core import RLlibEnv -from griddly.util.rllib.torch.agents.common import layer_init - -parser = argparse.ArgumentParser(description='Run experiments') - -parser.add_argument('--yaml-file', help='YAML file condining GDY for the game') -parser.add_argument('--experiment-name', default='unknown', help='name of the experiment') - -parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), - help='root directory for all data associated with the run') -parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available to ray.') -parser.add_argument('--num-cpus', default=8, type=int, help='Number of CPUs to make available to ray.') - -parser.add_argument('--num-workers', default=7, type=int, help='Number of workers') -parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') -parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') -parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') -parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') - -parser.add_argument('--capture-video', action='store_true', help='enable video capture') -parser.add_argument('--video-directory', default='videos', help='directory of video') -parser.add_argument('--video-frequency', type=int, default=1000000, help='Frequency of videos') - -parser.add_argument('--seed', type=int, default=69420, help='seed for experiments') - -parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') - - -class FlatActionWrapper(gym.Wrapper): - - def __init__(self, env): - super().__init__(env) - - self._num_action_parts = 1 - self._action_params_offset = 0 - if not self.has_avatar: - self._num_action_parts += 1 - self._action_params_offset = 1 - - self._action_splits = np.zeros(self._num_action_parts) - - self._total_position_params = 0 - if not self.has_avatar: - self._action_splits[0] = self.width * self.height - self._total_position_params += self.width * self.height - - self._action_logit_offsets = {} - - total_action_params = 0 - for i, action_name in enumerate(self.env.action_names): - self._action_logit_offsets[action_name] = total_action_params + self._total_position_params - total_action_params += self.num_action_ids[action_name] - - self._action_splits[self._action_params_offset] = total_action_params - - self._total_actions = int(np.sum(self._action_splits)) - - self.action_space = MultiDiscrete(self._action_splits) - self.observation_space = Dict({ - 'obs': self.observation_space, - 'mask': Box(0, 1, shape=(self._total_actions,)), - }) - - def _get_flat_mask(self): - flat_mask = np.zeros(self._total_actions) - for location, action_names in self.env.game.get_available_actions(1).items(): - if not self.has_avatar: - flat_location = self.width * location[1] + location[0] - flat_mask[flat_location] = 1 - for action_name, action_ids in self.env.game.get_available_action_ids(location, list(action_names)).items(): - mask_offset = self._action_logit_offsets[action_name] - flat_mask[mask_offset:mask_offset + self.num_action_ids[action_name]][action_ids] = 1 - return flat_mask - - def _to_griddly_action(self, action): - # convert the flat action back to Griddly's tree based format - - griddly_action = [] - action_ptr = 0 - if not self.has_avatar: - x = action[action_ptr] % self.width - griddly_action.append(x) - y = int(action[action_ptr] / self.width) - griddly_action.append(y) - action_ptr += 1 - - if self.action_count > 0: - action_type_id = 0 - action_param_id = 0 - for action_name in self.action_names: - action_offset_after_position = (self._action_logit_offsets[action_name] - self._total_position_params) - next_offset = action_offset_after_position + self.num_action_ids[action_name] - if next_offset > action[action_ptr]: - action_param_id = action[action_ptr] - action_offset_after_position - break - action_type_id += 1 - - griddly_action.append(action_type_id) - griddly_action.append(action_param_id) - else: - griddly_action.append(action[action_ptr]) - - return griddly_action - - def reset(self, **kwargs): - - obs = super().reset(**kwargs) - - observations = { - 'obs': obs, - 'mask': self._get_flat_mask() - } - - return observations - - def step(self, action): - griddly_action = self._to_griddly_action(action) - - obs, reward, info, done = super().step(griddly_action) - - observations = { - 'obs': obs, - 'mask': self._get_flat_mask() - } - - return observations, reward, info, done - - -class SimpleConvFlatAgent(TorchModelV2, nn.Module): - - def __init__(self, obs_space, action_space, num_outputs, model_config, name): - super().__init__(obs_space, action_space, num_outputs, model_config, name) - nn.Module.__init__(self) - - self._num_objects = obs_space.original_space['obs'].shape[2] - self._num_actions = num_outputs - - linear_flatten = np.prod(obs_space.original_space['obs'].shape[:2]) * 64 - - self.network = nn.Sequential( - layer_init(nn.Conv2d(self._num_objects, 32, 3, padding=1)), - nn.ReLU(), - layer_init(nn.Conv2d(32, 64, 3, padding=1)), - nn.ReLU(), - nn.Flatten(), - layer_init(nn.Linear(linear_flatten, 1024)), - nn.ReLU(), - layer_init(nn.Linear(1024, 512)), - nn.ReLU(), - ) - - self._actor_head = nn.Sequential( - layer_init(nn.Linear(512, 256), std=0.01), - nn.ReLU(), - layer_init(nn.Linear(256, self._num_actions), std=0.01) - ) - - self._critic_head = nn.Sequential( - layer_init(nn.Linear(512, 1), std=0.01) - ) - - def forward(self, input_dict, state, seq_lens): - obs_transformed = input_dict['obs']['obs'].permute(0, 3, 1, 2) - mask = input_dict['obs']['mask'] - network_output = self.network(obs_transformed) - value = self._critic_head(network_output) - self._value = value.reshape(-1) - logits = self._actor_head(network_output) - - logits += torch.maximum(torch.log(mask), torch.tensor(torch.finfo().min)) - - return logits, state - - def value_function(self): - return self._value - - -if __name__ == '__main__': - - args = parser.parse_args() - - sep = os.pathsep - os.environ['PYTHONPATH'] = sep.join(sys.path) - - ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - # ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) - env_name = "ray-griddly-env" - - - def _create_env(env_config): - env = RLlibEnv(env_config) - return FlatActionWrapper(env) - - - register_env(env_name, _create_env) - ModelCatalog.register_custom_model("SimpleConv", SimpleConvFlatAgent) - - wandbLoggerCallback = WandbLoggerCallback( - project='conditional_action_trees', - api_key_file='~/.wandb_rc', - dir=args.root_directory - ) - - max_training_steps = args.max_training_steps - - config = { - 'framework': 'torch', - 'seed': args.seed, - 'num_workers': args.num_workers, - 'num_envs_per_worker': args.num_envs_per_worker, - 'num_gpus_per_worker': float(args.num_gpus_per_worker), - 'num_cpus_per_worker': args.num_cpus_per_worker, - - 'callbacks': GriddlyCallbacks, - - 'model': { - 'custom_model': 'SimpleConv', - 'custom_model_config': {} - }, - 'env': env_name, - 'env_config': { - 'generate_valid_action_trees': False, - 'random_level_on_reset': True, - 'yaml_file': args.yaml_file, - 'global_observer_type': gd.ObserverType.SPRITE_2D, - 'max_steps': 1000, - }, - 'entropy_coeff_schedule': [ - [0, 0.01], - [max_training_steps, 0.0] - ], - 'lr_schedule': [ - [0, args.lr], - [max_training_steps, 0.0] - ], - - } - if args.capture_video: - real_video_frequency = int(args.video_frequency / (args.num_envs_per_worker * args.num_workers)) - config['env_config']['record_video_config'] = { - 'frequency': real_video_frequency, - 'directory': os.path.join(args.root_directory, args.video_directory) - } - - stop = { - "timesteps_total": max_training_steps, - } - - trial_name_creator = lambda trial: f'baseline-flat-{args.experiment_name}' - - result = tune.run( - ImpalaTrainer, - local_dir=args.root_directory, - config=config, - stop=stop, - callbacks=[wandbLoggerCallback], - trial_name_creator=trial_name_creator - ) diff --git a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py b/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py deleted file mode 100644 index a6b07b851..000000000 --- a/python/examples/experiments/conditional_action_spaces/rllib_conditional_actions.py +++ /dev/null @@ -1,128 +0,0 @@ -import os -import sys - -import ray -from ray import tune -from ray.rllib.models import ModelCatalog -from ray.tune.integration.wandb import WandbLoggerCallback -from ray.tune.registry import register_env - -from griddly import gd -from griddly.util.rllib.callbacks import GriddlyCallbacks -from griddly.util.rllib.environment.core import RLlibEnv -from griddly.util.rllib.torch.agents.conv_agent import SimpleConvAgent -from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import \ - ConditionalActionImpalaTrainer - -import argparse - -parser = argparse.ArgumentParser(description='Run experiments') - -parser.add_argument('--yaml-file', help='YAML file containing GDY for the game') -parser.add_argument('--experiment-name', default='unknown', help='name of the experiment') - -parser.add_argument('--root-directory', default=os.path.expanduser("~/ray_results"), - help='root directory for all data associated with the run') -parser.add_argument('--num-gpus', default=1, type=int, help='Number of GPUs to make available to ray.') -parser.add_argument('--num-cpus', default=8, type=int, help='Number of CPUs to make available to ray.') - -parser.add_argument('--num-workers', default=7, type=int, help='Number of workers') -parser.add_argument('--num-envs-per-worker', default=5, type=int, help='Number of workers') -parser.add_argument('--num-gpus-per-worker', default=0, type=float, help='Number of gpus per worker') -parser.add_argument('--num-cpus-per-worker', default=1, type=float, help='Number of gpus per worker') -parser.add_argument('--max-training-steps', default=20000000, type=int, help='Number of workers') - -parser.add_argument('--capture-video', action='store_true', help='enable video capture') -parser.add_argument('--video-directory', default='videos', help='directory of video') -parser.add_argument('--video-frequency', type=int, default=1000000, help='Frequency of videos') - -parser.add_argument('--allow-nop', action='store_true', default=False, help='allow NOP actions in action tree') -parser.add_argument('--vtrace-masking', action='store_true', default=False, help='use masks in vtrace calculations') - -parser.add_argument('--seed', type=int, default=69420, help='seed for experiments') - -parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') - -if __name__ == '__main__': - - args = parser.parse_args() - - sep = os.pathsep - os.environ['PYTHONPATH'] = sep.join(sys.path) - - ray.init(include_dashboard=False, num_gpus=args.num_gpus, num_cpus=args.num_cpus) - #ray.init(include_dashboard=False, num_gpus=1, num_cpus=args.num_cpus, local_mode=True) - - env_name = "ray-griddly-env" - - register_env(env_name, RLlibEnv) - ModelCatalog.register_custom_model("SimpleConv", SimpleConvAgent) - - wandbLoggerCallback = WandbLoggerCallback( - project='conditional_action_trees', - api_key_file='~/.wandb_rc', - dir=args.root_directory - ) - - max_training_steps = args.max_training_steps - - config = { - 'framework': 'torch', - 'seed': args.seed, - 'num_workers': args.num_workers, - 'num_envs_per_worker': args.num_envs_per_worker, - 'num_gpus_per_worker': float(args.num_gpus_per_worker), - 'num_cpus_per_worker': args.num_cpus_per_worker, - - 'callbacks': GriddlyCallbacks, - - 'model': { - 'custom_model': 'SimpleConv', - 'custom_model_config': {} - }, - 'env': env_name, - 'env_config': { - - 'allow_nop': args.allow_nop, - 'invalid_action_masking': tune.grid_search(['conditional', 'collapsed']), - 'vtrace_masking': args.vtrace_masking, - #'invalid_action_masking': 'conditional', - 'generate_valid_action_trees': True, - #'level': 0, - 'random_level_on_reset': True, - 'yaml_file': args.yaml_file, - 'global_observer_type': gd.ObserverType.SPRITE_2D, - 'max_steps': 1000, - }, - 'entropy_coeff_schedule': [ - [0, 0.01], - [max_training_steps, 0.0] - ], - 'lr_schedule': [ - [0, args.lr], - [max_training_steps, 0.0] - ], - - } - - if args.capture_video: - real_video_frequency = int(args.video_frequency / (args.num_envs_per_worker * args.num_workers)) - config['env_config']['record_video_config'] = { - 'frequency': real_video_frequency, - 'directory': os.path.join(args.root_directory, args.video_directory) - } - - stop = { - "timesteps_total": max_training_steps, - } - - trial_name_creator = lambda trial: f'CAT-{args.experiment_name}-{trial.config["env_config"]["invalid_action_masking"]}' - - result = tune.run( - ConditionalActionImpalaTrainer, - local_dir=args.root_directory, - config=config, - stop=stop, - callbacks=[wandbLoggerCallback], - trial_name_creator=trial_name_creator - ) diff --git a/python/requirements.txt b/python/requirements.txt index bcc842968..1fb1c3aad 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -5,4 +5,5 @@ imageio>=2.9.0 pygame>=2.0.0 matplotlib>=3.3.3 pyglet -pytest>=6.2.1 \ No newline at end of file +pytest>=6.2.1 + From 61d2a992630b350fe33a95d7f968909efd0a34da Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 11 Apr 2021 16:08:08 +0100 Subject: [PATCH 35/45] adding code to submodule to keep this repo cleaner --- .gitmodules | 3 +++ python/examples/experiments/conditional-action-trees | 1 + 2 files changed, 4 insertions(+) create mode 160000 python/examples/experiments/conditional-action-trees diff --git a/.gitmodules b/.gitmodules index d4a3170ab..d5ee31d48 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ path = libs/glm url = https://github.com/g-truc/glm.git ignore = dirty +[submodule "python/examples/experiments/conditional-action-trees"] + path = python/examples/experiments/conditional-action-trees + url = git@github.com:Bam4d/conditional-action-trees.git diff --git a/python/examples/experiments/conditional-action-trees b/python/examples/experiments/conditional-action-trees new file mode 160000 index 000000000..e36bc5144 --- /dev/null +++ b/python/examples/experiments/conditional-action-trees @@ -0,0 +1 @@ +Subproject commit e36bc5144ec7becbe60cd24599a9db65e37de715 From 34f4c596ad180911fdeb4c4ca86aa74af8de6d6f Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 11 Apr 2021 16:09:57 +0100 Subject: [PATCH 36/45] info -> debug --- src/Griddly/Core/Observers/Vulkan/VulkanDevice.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Griddly/Core/Observers/Vulkan/VulkanDevice.cpp b/src/Griddly/Core/Observers/Vulkan/VulkanDevice.cpp index 43a82b50d..a5cda36ac 100644 --- a/src/Griddly/Core/Observers/Vulkan/VulkanDevice.cpp +++ b/src/Griddly/Core/Observers/Vulkan/VulkanDevice.cpp @@ -150,11 +150,11 @@ void VulkanDevice::initRenderMode(RenderMode mode) { switch (mode) { case SHAPES: - spdlog::info("Render mode set to SHAPES. Will only load shape render pipeline."); + spdlog::debug("Render mode set to SHAPES. Will only load shape render pipeline."); shapeBuffers_ = createShapeBuffers(); break; case SPRITES: - spdlog::info("Render mode set to SPRITES. Will load both shape and sprite render pipelines."); + spdlog::debug("Render mode set to SPRITES. Will load both shape and sprite render pipelines."); spriteShapeBuffer_ = createSpriteShapeBuffer(); break; } @@ -801,12 +801,12 @@ std::vector VulkanDevice::getSupportedPhysicalDevices( } if (deviceSelection.order == DeviceSelectionOrder::PCI_BUS_ID) { - spdlog::info("Sorting devices by PCI_BUS_ID ascending"); + spdlog::debug("Sorting devices by PCI_BUS_ID ascending"); std::sort(physicalDeviceInfoList.begin(), physicalDeviceInfoList.end(), [](const VulkanPhysicalDeviceInfo& a, const VulkanPhysicalDeviceInfo& b) -> bool { return a.pciBusId < b.pciBusId; }); } for (auto& physicalDeviceInfo : physicalDeviceInfoList) { - spdlog::info("Device {0}, isGpu {1}, PCI bus: {2}, isSupported {3}.", physicalDeviceInfo.deviceName, physicalDeviceInfo.isGpu, physicalDeviceInfo.pciBusId, physicalDeviceInfo.isSupported); + spdlog::debug("Device {0}, isGpu {1}, PCI bus: {2}, isSupported {3}.", physicalDeviceInfo.deviceName, physicalDeviceInfo.isGpu, physicalDeviceInfo.pciBusId, physicalDeviceInfo.isSupported); if (physicalDeviceInfo.isGpu) { physicalDeviceInfo.gpuIdx = gpuIdx++; @@ -815,7 +815,7 @@ std::vector VulkanDevice::getSupportedPhysicalDevices( if (physicalDeviceInfo.isSupported) { if (physicalDeviceInfo.isGpu && limitGpuUsage) { if (allowedGpuIdx.find(physicalDeviceInfo.gpuIdx) != allowedGpuIdx.end()) { - spdlog::info("GPU Device {0}, Id: {1}, PCI bus: {2} -> Visible", physicalDeviceInfo.deviceName, physicalDeviceInfo.gpuIdx, physicalDeviceInfo.pciBusId); + spdlog::debug("GPU Device {0}, Id: {1}, PCI bus: {2} -> Visible", physicalDeviceInfo.deviceName, physicalDeviceInfo.gpuIdx, physicalDeviceInfo.pciBusId); supportedPhysicalDeviceList.push_back(physicalDeviceInfo); } } else { @@ -852,7 +852,7 @@ VulkanPhysicalDeviceInfo VulkanDevice::getPhysicalDeviceInfo(VkPhysicalDevice& p auto deviceName = deviceProperties.deviceName; - spdlog::info("Device found {0}, PCI Bus: {1}. checking for Vulkan support...", deviceName, devicePCIBusInfo.pciBus); + spdlog::debug("Device found {0}, PCI Bus: {1}. checking for Vulkan support...", deviceName, devicePCIBusInfo.pciBus); bool isGpu = deviceProperties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; bool isSupported = hasQueueFamilySupport(physicalDevice, queueFamilyIndices); From a13474a25bbb23069536c9eb4877828654ae192b Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 11 Apr 2021 16:15:40 +0100 Subject: [PATCH 37/45] remove example here as it will be in seperate repository --- .../rllib_single_agent_conditional_actions.py | 68 ------------------- 1 file changed, 68 deletions(-) delete mode 100644 python/examples/rllib/rllib_single_agent_conditional_actions.py diff --git a/python/examples/rllib/rllib_single_agent_conditional_actions.py b/python/examples/rllib/rllib_single_agent_conditional_actions.py deleted file mode 100644 index b2ff4efca..000000000 --- a/python/examples/rllib/rllib_single_agent_conditional_actions.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import sys - -import ray -from ray import tune -from ray.rllib.models import ModelCatalog -from ray.tune.registry import register_env - -from griddly import gd -from griddly.util.rllib.torch import GAPAgent -from griddly.util.rllib.torch.conditional_actions.conditional_action_policy_trainer import ConditionalActionImpalaTrainer -from griddly.util.rllib.environment.core import RLlibEnv - -if __name__ == '__main__': - sep = os.pathsep - os.environ['PYTHONPATH'] = sep.join(sys.path) - - ray.init(num_gpus=1) - #ray.init(num_gpus=1, local_mode=True) - - env_name = "ray-griddly-env" - - register_env(env_name, RLlibEnv) - ModelCatalog.register_custom_model("GAP", GAPAgent) - - max_training_steps = 20000000 - - config = { - 'framework': 'torch', - 'num_workers': 8, - 'num_envs_per_worker': 4, - - 'model': { - 'custom_model': 'GAP', - 'custom_model_config': {} - }, - 'env': env_name, - 'env_config': { - 'record_video_config': { - 'frequency': 100000, - 'directory': 'videos' - }, - - 'allow_nop': tune.grid_search([True, False]), - 'invalid_action_masking': tune.grid_search(['none', 'conditional']), - 'generate_valid_action_trees': True, - 'random_level_on_reset': True, - 'yaml_file': 'Single-Player/GVGAI/clusters_partially_observable.yaml', - 'global_observer_type': gd.ObserverType.SPRITE_2D, - 'max_steps': 1000, - }, - #'entropy_coeff_schedule': [ - # [0, 0.01], - # [max_training_steps, 0.0] - #], - #'lr_schedule': [ - # [0, 0.005], - # [max_training_steps, 0.0] - #], - - - } - - stop = { - "timesteps_total": max_training_steps, - } - - result = tune.run(ConditionalActionImpalaTrainer, config=config, stop=stop) From 11b064829bd896ea87ddb0b121cf8944186af9de Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 11 Apr 2021 17:37:28 +0100 Subject: [PATCH 38/45] try to fix windows/mac compile --- python/examples/experiments/conditional-action-trees | 2 +- src/Griddly/Core/Grid.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/examples/experiments/conditional-action-trees b/python/examples/experiments/conditional-action-trees index e36bc5144..0e8d6ffae 160000 --- a/python/examples/experiments/conditional-action-trees +++ b/python/examples/experiments/conditional-action-trees @@ -1 +1 @@ -Subproject commit e36bc5144ec7becbe60cd24599a9db65e37de715 +Subproject commit 0e8d6ffae5636d2dbbbbd95ed7a0fbe8771a8c54 diff --git a/src/Griddly/Core/Grid.hpp b/src/Griddly/Core/Grid.hpp index 3fb4fb31e..22e36cdbb 100644 --- a/src/Griddly/Core/Grid.hpp +++ b/src/Griddly/Core/Grid.hpp @@ -35,8 +35,8 @@ struct GridEvent { uint32_t sourceObjectPlayerId = 0; uint32_t destinationObjectPlayerId = 0; - glm::vec2 sourceLocation; - glm::vec2 destLocation; + glm::ivec2 sourceLocation; + glm::ivec2 destLocation; }; struct GlobalVariableDefinition { From bf3407ac1017db1cc71498f09b2ad925f3f26065 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Sun, 11 Apr 2021 17:52:40 +0100 Subject: [PATCH 39/45] try to fix windows/mac compile --- bindings/wrapper/GameWrapper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/wrapper/GameWrapper.cpp b/bindings/wrapper/GameWrapper.cpp index de3f0175b..77d8d9e46 100644 --- a/bindings/wrapper/GameWrapper.cpp +++ b/bindings/wrapper/GameWrapper.cpp @@ -380,8 +380,8 @@ class Py_GameWrapper { py_event["SourceObjectPlayerId"] = historyEvent.sourceObjectPlayerId; py_event["DestinationObjectPlayerId"] = historyEvent.destinationObjectPlayerId; - py_event["SourceLocation"] = std::array{historyEvent.sourceLocation.x, historyEvent.sourceLocation.y}; - py_event["DestinationLocation"] = std::array{historyEvent.destLocation.x, historyEvent.destLocation.y}; + py_event["SourceLocation"] = std::array{(uint32_t)historyEvent.sourceLocation.x, (uint32_t)historyEvent.sourceLocation.y}; + py_event["DestinationLocation"] = std::array{(uint32_t)historyEvent.destLocation.x, (uint32_t)historyEvent.destLocation.y}; py_events.push_back(py_event); } From 25f07cd369d07ecf8e1b44bcf7df24da9076e197 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 12 Apr 2021 08:28:24 +0100 Subject: [PATCH 40/45] fixing submodule --- .gitmodules | 5 +++++ python/examples/experiments/conditional-action-trees | 1 + 2 files changed, 6 insertions(+) create mode 160000 python/examples/experiments/conditional-action-trees diff --git a/.gitmodules b/.gitmodules index d4a3170ab..c7835f09c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,8 @@ path = libs/glm url = https://github.com/g-truc/glm.git ignore = dirty +[submodule "python/examples/experiments/conditional-action-trees"] + path=python/examples/experiments/conditional-action-trees + url=https://github.com/Bam4d/conditional-action-trees + ignore = dirty + diff --git a/python/examples/experiments/conditional-action-trees b/python/examples/experiments/conditional-action-trees new file mode 160000 index 000000000..0e8d6ffae --- /dev/null +++ b/python/examples/experiments/conditional-action-trees @@ -0,0 +1 @@ +Subproject commit 0e8d6ffae5636d2dbbbbd95ed7a0fbe8771a8c54 From 3309a70a298bd97f0ffde24ad58782cc2d5a8a48 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 12 Apr 2021 09:46:16 +0100 Subject: [PATCH 41/45] added tests for building conditional action trees --- python/tests/cat_test.py | 107 ++++++++++++++++++ python/tests/gdy/test_CAT_depth_1.yaml | 26 +++++ python/tests/gdy/test_CAT_depth_2.yaml | 35 ++++++ python/tests/gdy/test_CAT_depth_3.yaml | 25 ++++ python/tests/gdy/test_CAT_depth_4.yaml | 34 ++++++ .../tests/gdy/test_CAT_depth_4_2_players.yaml | 34 ++++++ 6 files changed, 261 insertions(+) create mode 100644 python/tests/cat_test.py create mode 100644 python/tests/gdy/test_CAT_depth_1.yaml create mode 100644 python/tests/gdy/test_CAT_depth_2.yaml create mode 100644 python/tests/gdy/test_CAT_depth_3.yaml create mode 100644 python/tests/gdy/test_CAT_depth_4.yaml create mode 100644 python/tests/gdy/test_CAT_depth_4_2_players.yaml diff --git a/python/tests/cat_test.py b/python/tests/cat_test.py new file mode 100644 index 000000000..4d3fa4236 --- /dev/null +++ b/python/tests/cat_test.py @@ -0,0 +1,107 @@ +import numpy as np +import gym +import pytest +from griddly import GymWrapperFactory, gd + + +@pytest.fixture +def test_name(request): + return request.node.name + + +def build_test_env(test_name, yaml_file): + wrapper_factory = GymWrapperFactory() + + wrapper_factory.build_gym_from_yaml( + test_name, + yaml_file, + global_observer_type=gd.ObserverType.VECTOR, + player_observer_type=gd.ObserverType.VECTOR, + ) + + env = gym.make(f'GDY-{test_name}-v0') + env.reset() + return env + + +def test_CAT_depth_1(test_name): + + env = build_test_env( + test_name, + "tests/gdy/test_CAT_depth_1.yaml" + ) + + valid_action_trees = env.game.build_valid_action_trees() + + assert len(valid_action_trees) == 1 + assert set(valid_action_trees[0].keys()) == {0, 1, 2, 3} + +def test_CAT_depth_2(test_name): + + env = build_test_env( + test_name, + "tests/gdy/test_CAT_depth_2.yaml" + ) + + valid_action_trees = env.game.build_valid_action_trees() + + assert len(valid_action_trees) == 1 + assert set(valid_action_trees[0].keys()) == {0, 1} + + assert set(valid_action_trees[0][0].keys()) == {0, 1, 2, 3} + assert set(valid_action_trees[0][1].keys()) == {0, 4} + +def test_CAT_depth_3(test_name): + + env = build_test_env( + test_name, + "tests/gdy/test_CAT_depth_3.yaml" + ) + + valid_action_trees = env.game.build_valid_action_trees() + + assert len(valid_action_trees) == 1 + assert set(valid_action_trees[0].keys()) == {1} + assert set(valid_action_trees[0][1].keys()) == {1} + assert set(valid_action_trees[0][1][1].keys()) == {0, 1, 2, 3} + +def test_CAT_depth_4(test_name): + + env = build_test_env( + test_name, + "tests/gdy/test_CAT_depth_4.yaml" + ) + + valid_action_trees = env.game.build_valid_action_trees() + + assert len(valid_action_trees) == 1 + assert set(valid_action_trees[0].keys()) == {1} + assert set(valid_action_trees[0][1].keys()) == {1} + assert set(valid_action_trees[0][1][1].keys()) == {0, 1} + + assert set(valid_action_trees[0][1][1][0].keys()) == {0, 1, 2, 3} + assert set(valid_action_trees[0][1][1][1].keys()) == {0, 4} + + +def test_CAT_depth_4_2_players(test_name): + env = build_test_env( + test_name, + "tests/gdy/test_CAT_depth_4_2_players.yaml" + ) + + valid_action_trees = env.game.build_valid_action_trees() + + assert len(valid_action_trees) == 2 + assert set(valid_action_trees[0].keys()) == {1} + assert set(valid_action_trees[0][1].keys()) == {1} + assert set(valid_action_trees[0][1][1].keys()) == {0, 1} + + assert set(valid_action_trees[0][1][1][0].keys()) == {0, 1, 2, 3} + assert set(valid_action_trees[0][1][1][1].keys()) == {0, 4} + + assert set(valid_action_trees[1].keys()) == {3} + assert set(valid_action_trees[1][3].keys()) == {1} + assert set(valid_action_trees[1][3][1].keys()) == {0, 1} + + assert set(valid_action_trees[1][3][1][0].keys()) == {0, 1, 2, 3} + assert set(valid_action_trees[1][3][1][1].keys()) == {0, 4} \ No newline at end of file diff --git a/python/tests/gdy/test_CAT_depth_1.yaml b/python/tests/gdy/test_CAT_depth_1.yaml new file mode 100644 index 000000000..cf28e7278 --- /dev/null +++ b/python/tests/gdy/test_CAT_depth_1.yaml @@ -0,0 +1,26 @@ +Version: "0.1" +Environment: + Player: + AvatarObject: avatar + Levels: + - | + . . . + . a . + . b . + +Actions: + - Name: move + Behaviours: + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + +Objects: + - Name: avatar + MapCharacter: a + - Name: wall + MapCharacter: b + diff --git a/python/tests/gdy/test_CAT_depth_2.yaml b/python/tests/gdy/test_CAT_depth_2.yaml new file mode 100644 index 000000000..68bbdc366 --- /dev/null +++ b/python/tests/gdy/test_CAT_depth_2.yaml @@ -0,0 +1,35 @@ +Version: "0.1" +Environment: + Player: + AvatarObject: avatar + Levels: + - | + . . . + . a . + . b . + +Actions: + - Name: move + Behaviours: + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + + - Name: move2 + Behaviours: + - Src: + Object: avatar + Commands: + - reward: 1 + Dst: + Object: wall + +Objects: + - Name: avatar + MapCharacter: a + - Name: wall + MapCharacter: b + diff --git a/python/tests/gdy/test_CAT_depth_3.yaml b/python/tests/gdy/test_CAT_depth_3.yaml new file mode 100644 index 000000000..8ac6f21dc --- /dev/null +++ b/python/tests/gdy/test_CAT_depth_3.yaml @@ -0,0 +1,25 @@ +Version: "0.1" +Environment: + Player: + Count: 1 + Levels: + - | + . . . + . a1 . + . b . + +Actions: + - Name: move + Behaviours: + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + +Objects: + - Name: avatar + MapCharacter: a + - Name: wall + MapCharacter: b \ No newline at end of file diff --git a/python/tests/gdy/test_CAT_depth_4.yaml b/python/tests/gdy/test_CAT_depth_4.yaml new file mode 100644 index 000000000..743b3f419 --- /dev/null +++ b/python/tests/gdy/test_CAT_depth_4.yaml @@ -0,0 +1,34 @@ +Version: "0.1" +Environment: + Player: + Count: 1 + Levels: + - | + . . . + . a1 . + . b . + +Actions: + - Name: move + Behaviours: + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + + - Name: move2 + Behaviours: + - Src: + Object: avatar + Commands: + - reward: 1 + Dst: + Object: wall + +Objects: + - Name: avatar + MapCharacter: a + - Name: wall + MapCharacter: b \ No newline at end of file diff --git a/python/tests/gdy/test_CAT_depth_4_2_players.yaml b/python/tests/gdy/test_CAT_depth_4_2_players.yaml new file mode 100644 index 000000000..80ddb26c0 --- /dev/null +++ b/python/tests/gdy/test_CAT_depth_4_2_players.yaml @@ -0,0 +1,34 @@ +Version: "0.1" +Environment: + Player: + Count: 2 + Levels: + - | + . . . . . + . a1 . a2 . + . b . b . + +Actions: + - Name: move + Behaviours: + - Src: + Object: avatar + Commands: + - mov: _dest + Dst: + Object: _empty + + - Name: move2 + Behaviours: + - Src: + Object: avatar + Commands: + - reward: 1 + Dst: + Object: wall + +Objects: + - Name: avatar + MapCharacter: a + - Name: wall + MapCharacter: b \ No newline at end of file From d3eafbf59119b275e6048d1fe11c7577219ea3f9 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 12 Apr 2021 10:34:37 +0100 Subject: [PATCH 42/45] adding a bit of docs and some debug lines for future --- bindings/wrapper/GameWrapper.cpp | 8 ++++++++ docs/getting-started/action spaces/index.rst | 10 +++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/bindings/wrapper/GameWrapper.cpp b/bindings/wrapper/GameWrapper.cpp index 77d8d9e46..65d0b43a0 100644 --- a/bindings/wrapper/GameWrapper.cpp +++ b/bindings/wrapper/GameWrapper.cpp @@ -77,19 +77,27 @@ class Py_GameWrapper { std::vector valid_action_trees; auto externalActionNames = gdyFactory_->getExternalActionNames(); + spdlog::debug("Building tree, {0} actions", externalActionNames.size()); for (int playerId = 1; playerId <= playerCount_; playerId++) { std::shared_ptr node = std::shared_ptr(new ValidActionNode()); for (auto actionNamesAtLocation : gameProcess_->getAvailableActionNames(playerId)) { auto location = actionNamesAtLocation.first; auto actionNames = actionNamesAtLocation.second; + + for (auto actionName : actionNames) { + + spdlog::debug("[{0}] available at location [{1}, {2}]", actionName, location.x, location.y); + std::shared_ptr treePtr = node; auto actionInputsDefinitions = gdyFactory_->getActionInputsDefinitions(); if (actionInputsDefinitions.find(actionName) != actionInputsDefinitions.end()) { auto locationVec = glm::ivec2{location[0], location[1]}; auto actionIdsForName = gameProcess_->getAvailableActionIdsAtLocation(locationVec, actionName); + spdlog::debug("{0} action ids available", actionIdsForName.size()); + if (actionIdsForName.size() > 0) { if (gdyFactory_->getAvatarObject().length() == 0) { auto py_x = locationVec[0]; diff --git a/docs/getting-started/action spaces/index.rst b/docs/getting-started/action spaces/index.rst index 86ecacae3..6353a4dce 100644 --- a/docs/getting-started/action spaces/index.rst +++ b/docs/getting-started/action spaces/index.rst @@ -119,7 +119,15 @@ In order to easily support games with large action spaces such as RTS games, sev .. seealso:: A Closer Look at Action Masking in Policy Gradient Algorithms: https://arxiv.org/abs/2006.14171 - +Valid Action Trees +------------------ + +Valid action trees can be used to construct Conditional Action Trees, which can be used to iteratively apply masks to complex action spaces depending on the previous actions selected. + +:env.game.build_valid_action_trees(): + Returns a valid action tree for the current state for each player. + +.. seealso:: You can find several examples of Conditional Action Trees being used with Griddly and RLLib here: https://github.com/Bam4d/conditional-action-trees ******** Examples From 2b5b1cab8f67b49491db99685aa3b7e1ad9ac31a Mon Sep 17 00:00:00 2001 From: Bam4d Date: Mon, 12 Apr 2021 17:15:44 +0100 Subject: [PATCH 43/45] fixing basic network docs --- docs/rllib/intro/index.rst | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/docs/rllib/intro/index.rst b/docs/rllib/intro/index.rst index 698cd381b..b5cae6d73 100644 --- a/docs/rllib/intro/index.rst +++ b/docs/rllib/intro/index.rst @@ -117,9 +117,9 @@ SimpleConvAgent .. code-block:: class SimpleConvAgent(TorchModelV2, nn.Module): - """ - Simple Convolution agent that calculates the required linear output layer - """ + """ + Simple Convolution agent that calculates the required linear output layer + """ def __init__(self, obs_space, action_space, num_outputs, model_config, name): super().__init__(obs_space, action_space, num_outputs, model_config, name) @@ -135,22 +135,17 @@ SimpleConvAgent nn.ReLU(), layer_init(nn.Conv2d(32, 64, 3, padding=1)), nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), nn.Flatten(), layer_init(nn.Linear(linear_flatten, 1024)), nn.ReLU(), layer_init(nn.Linear(1024, 512)), nn.ReLU(), - layer_init(nn.Linear(512, 512)) ) self._actor_head = nn.Sequential( - layer_init(nn.Linear(512, 512), std=0.01), + layer_init(nn.Linear(512, 256), std=0.01), nn.ReLU(), - layer_init(nn.Linear(512, self._num_actions), std=0.01) + layer_init(nn.Linear(256, self._num_actions), std=0.01) ) self._critic_head = nn.Sequential( @@ -214,6 +209,7 @@ GAPAgent nn.Module.__init__(self) self._num_objects = obs_space.shape[2] + self._num_actions = num_outputs self.network = nn.Sequential( @@ -221,22 +217,17 @@ GAPAgent nn.ReLU(), layer_init(nn.Conv2d(32, 64, 3, padding=1)), nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), - layer_init(nn.Conv2d(64, 64, 3, padding=1)), - nn.ReLU(), GlobalAvePool(2048), layer_init(nn.Linear(2048, 1024)), nn.ReLU(), layer_init(nn.Linear(1024, 512)), nn.ReLU(), - layer_init(nn.Linear(512, 512)) ) self._actor_head = nn.Sequential( - layer_init(nn.Linear(512, 512), std=0.01), + layer_init(nn.Linear(512, 256), std=0.01), nn.ReLU(), - layer_init(nn.Linear(512, self._num_actions), std=0.01) + layer_init(nn.Linear(256, self._num_actions), std=0.01) ) self._critic_head = nn.Sequential( From d64d5700f2efe6da72b3963132b2d2245f56cfea Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 15 Apr 2021 10:26:39 +0100 Subject: [PATCH 44/45] updated experiment repo --- python/examples/experiments/conditional-action-trees | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/examples/experiments/conditional-action-trees b/python/examples/experiments/conditional-action-trees index 0e8d6ffae..0c1cc7e39 160000 --- a/python/examples/experiments/conditional-action-trees +++ b/python/examples/experiments/conditional-action-trees @@ -1 +1 @@ -Subproject commit 0e8d6ffae5636d2dbbbbd95ed7a0fbe8771a8c54 +Subproject commit 0c1cc7e39e3024d0538064c629142abe130b0a3d From 794cd14c77c73bfb7fc729d5f9ce7f419bb85622 Mon Sep 17 00:00:00 2001 From: Bam4d Date: Thu, 15 Apr 2021 10:28:13 +0100 Subject: [PATCH 45/45] bumping version numbers --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- CMakeLists.txt | 2 +- bindings/python.cpp | 2 +- docs/conf.py | 2 +- python/setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 0cd31c1d4..1cfb6bf66 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -24,7 +24,7 @@ If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: [e.g. mac/linux/windows] - - Version [e.g. 1.0.0] + - Version [e.g. 1.0.1] **Additional context** Add any other context about the problem here. diff --git a/CMakeLists.txt b/CMakeLists.txt index 270aa3a82..140f1ca16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.10.0) -project(Griddly VERSION 1.0.0) +project(Griddly VERSION 1.0.1) set(BINARY ${CMAKE_PROJECT_NAME}) diff --git a/bindings/python.cpp b/bindings/python.cpp index a5fde9813..a6677cd8f 100644 --- a/bindings/python.cpp +++ b/bindings/python.cpp @@ -12,7 +12,7 @@ namespace griddly { PYBIND11_MODULE(python_griddly, m) { m.doc() = "Griddly python bindings"; - m.attr("version") = "1.0.0"; + m.attr("version") = "1.0.1"; #ifndef NDEBUG spdlog::set_level(spdlog::level::debug); diff --git a/docs/conf.py b/docs/conf.py index 47a2bb244..074e63f9a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'Chris Bamford' # The full version, including alpha/beta/rc tags -release = '1.0.0' +release = '1.0.1' # -- General configuration --------------------------------------------------- diff --git a/python/setup.py b/python/setup.py index 430fb88d1..4f2178df1 100644 --- a/python/setup.py +++ b/python/setup.py @@ -71,7 +71,7 @@ def griddly_package_data(config='Debug'): setup( name='griddly', - version="1.0.0", + version="1.0.1", author_email="chrisbam4d@gmail.com", description="Griddly Python Libraries", long_description=long_description,