hotfix walker2d energy cost

Farama-Foundation · Oct 27, 2024 · b18a31c · b18a31c
1 parent 311f378
commit b18a31c
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/mo_gymnasium/envs/mujoco/walker2d_v4.py b/mo_gymnasium/envs/mujoco/walker2d_v4.py
@@ -26,7 +26,7 @@ def __init__(self, **kwargs):
     def step(self, action):
         observation, reward, terminated, truncated, info = super().step(action)
         velocity = info["x_velocity"]
-        neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight
+        neg_energy_cost = -np.sum(np.square(action))
 
         vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)
 

diff --git a/mo_gymnasium/envs/mujoco/walker2d_v5.py b/mo_gymnasium/envs/mujoco/walker2d_v5.py
@@ -29,9 +29,9 @@ def __init__(self, **kwargs):
     def step(self, action):
         observation, reward, terminated, truncated, info = super().step(action)
         velocity = info["x_velocity"]
-        energy = -np.sum(np.square(action))
+        neg_energy_cost = info["reward_ctrl"] / self._ctrl_cost_weight
 
-        vec_reward = np.array([velocity, energy], dtype=np.float32)
+        vec_reward = np.array([velocity, neg_energy_cost], dtype=np.float32)
 
         vec_reward += self.healthy_reward  # All objectives are penalyzed when the agent falls