From b18a31c6372d860f741ed135d08f65e86ae2b5f9 Mon Sep 17 00:00:00 2001 From: Lucas Alegre Date: Sun, 27 Oct 2024 15:33:16 -0300 Subject: [PATCH] hotfix walker2d energy cost --- mo_gymnasium/envs/mujoco/walker2d_v4.py | 2 +- mo_gymnasium/envs/mujoco/walker2d_v5.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mo_gymnasium/envs/mujoco/walker2d_v4.py b/mo_gymnasium/envs/mujoco/walker2d_v4.py index efa87bf..e4a7493 100644 --- a/mo_gymnasium/envs/mujoco/walker2d_v4.py +++ b/mo_gymnasium/envs/mujoco/walker2d_v4.py @@ -26,7 +26,7 @@ def __init__(self, **kwargs): def step(self, action): observation, reward, terminated, truncated, info = super().step(action) velocity = info["x_velocity"] - neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight + neg_energy_cost = -np.sum(np.square(action)) vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32) diff --git a/mo_gymnasium/envs/mujoco/walker2d_v5.py b/mo_gymnasium/envs/mujoco/walker2d_v5.py index 5b036db..cfe4327 100644 --- a/mo_gymnasium/envs/mujoco/walker2d_v5.py +++ b/mo_gymnasium/envs/mujoco/walker2d_v5.py @@ -29,9 +29,9 @@ def __init__(self, **kwargs): def step(self, action): observation, reward, terminated, truncated, info = super().step(action) velocity = info["x_velocity"] - energy = -np.sum(np.square(action)) + neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight - vec_reward = np.array([velocity, energy], dtype=np.float32) + vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32) vec_reward += self.healthy_reward # All objectives are penalyzed when the agent falls