From f5d5641958f131a740989b7478fe953f73f110fa Mon Sep 17 00:00:00 2001 From: Mohith Sakthivel Date: Wed, 14 Oct 2020 08:52:04 +0530 Subject: [PATCH 1/2] Fix wrappers bug --- wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wrappers.py b/wrappers.py index 6862705..aae77b5 100644 --- a/wrappers.py +++ b/wrappers.py @@ -343,7 +343,7 @@ def __getattr__(self, name): def observation_space(self): spaces = self._env.observation_space.spaces assert 'reward' not in spaces - spaces['reward'] = gym.spaces.Box(-np.inf, np.inf, dtype=np.float32) + spaces['reward'] = gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32) return gym.spaces.Dict(spaces) def step(self, action): From 4c540477527614c4dd55015f3dce1c4c82977428 Mon Sep 17 00:00:00 2001 From: Mohith Sakthivel Date: Mon, 26 Oct 2020 19:44:43 +0530 Subject: [PATCH 2/2] Convert obs dtype with reset --- wrappers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wrappers.py b/wrappers.py index aae77b5..41503ce 100644 --- a/wrappers.py +++ b/wrappers.py @@ -176,6 +176,7 @@ def step(self, action): def reset(self): obs = self._env.reset() + obs = {k: self._convert(v) for k, v in obs.items()} transition = obs.copy() transition['action'] = np.zeros(self._env.action_space.shape) transition['reward'] = 0.0