Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rendering and arm movement fixes #474

Merged
merged 3 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/CNAME

This file was deleted.

14 changes: 8 additions & 6 deletions metaworld/envs/mujoco/sawyer_xyz/sawyer_xyz_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,8 @@ def __init__(
np.array([+1, +1, +1, +1]),
dtype=np.float64,
)

# Technically these observation lengths are different between v1 and v2,
# but we handle that elsewhere and just stick with v2 numbers here
self._obs_obj_max_len = 14

self._set_task_called = False

self.hand_init_pos = None # OVERRIDE ME
self._target_pos = None # OVERRIDE ME
self._random_reset_space = None # OVERRIDE ME
Expand All @@ -189,6 +184,8 @@ def __init__(
# doesn't seem to matter (it will only effect frame-stacking for the
# very first observation)

self.init_qpos = np.copy(self.data.qpos)
self.init_qvel = np.copy(self.data.qvel)
self._prev_obs = self._get_curr_obs_combined_no_goal()

EzPickle.__init__(
Expand Down Expand Up @@ -538,10 +535,15 @@ def evaluate_state(self, obs, action):
# V1 environments don't have to implement it
raise NotImplementedError

def reset_model(self):
qpos = self.init_qpos
qvel = self.init_qvel
self.set_state(qpos, qvel)

def reset(self, seed=None, options=None):
self.curr_path_length = 0
self.reset_model()
obs, info = super().reset()
mujoco.mj_forward(self.model, self.data)
self._prev_obs = obs[:18].copy()
obs[18:36] = self._prev_obs
obs = np.float64(obs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def reset_model(self):
mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "goal")
]
self._set_obj_xyz(self.obj_init_pos)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_box_close_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def reset_model(self):
mujoco.mj_step(self.model, self.data)

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def reset_model(self):
] = pos_machine

self._target_pos = pos_mug_goal
self.model.site("mug_goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def reset_model(self):
] = pos_machine

self._target_pos = pos_mug_goal
self.model.site("coffee_goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
1 change: 1 addition & 0 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_dial_turn_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def reset_model(self):
mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "dial")
] = self.obj_init_pos
self.dial_push_position = self._get_pos_objects() + np.array([0.05, 0.02, 0.09])
self.model.site("goal").pos = self._target_pos
mujoco.mj_forward(self.model, self.data)
return self._get_obs()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def reset_model(self):

# keep the door open after resetting initial positions
self._set_obj_xyz(-1.5708)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

@_assert_task_is_set
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_door_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
self.data.geom("handle").xpos[:-1] - self._target_pos[:-1]
)
self.target_reward = 1000 * self.maxPullDist + 1000 * 2

self.model.site("goal").pos = self._target_pos
return self._get_obs()

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
# Pull drawer out all the way and mark its starting position
self._set_obj_xyz(-self.maxDist)
self.obj_init_pos = self._get_pos_objects()

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
3 changes: 1 addition & 2 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_drawer_open_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ def reset_model(self):
self._target_pos = self.obj_init_pos + np.array(
[0.0, -0.16 - self.maxDist, 0.09]
)
mujoco.mj_forward(self.model, self.data)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def reset_model(self):
[-self._handle_length, 0.0, 0.125]
)
mujoco.mj_forward(self.model, self.data)
self.model.site("goal_close").pos = self._target_pos
return self._get_obs()

def _reset_hand(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def reset_model(self):
self._target_pos = self.obj_init_pos + np.array(
[+self._handle_length, 0.0, 0.125]
)
mujoco.mj_forward(self.model, self.data)
self.model.site("goal_open").pos = self._target_pos
return self._get_obs()

def _reset_hand(self):
Expand Down
1 change: 0 additions & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_hammer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def reset_model(self):
self.nail_init_pos = self._get_site_pos("nailHead")
self.obj_init_pos = self.hammer_init_pos.copy()
self._set_hammer_xyz(self.hammer_init_pos)

return self._get_obs()

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def reset_model(self):
self._target_pos = goal_pos[-3:]

self._set_obj_xyz(self.obj_init_pos)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def evaluate_state(self, obs, action):
object_grasped,
in_place,
) = self.compute_reward(action, obs)

info = {
"success": float(target_to_obj <= self.TARGET_RADIUS),
"near_object": float(tcp_to_obj <= 0.05),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
self._target_pos = self.obj_init_pos + np.array(
[0.12, 0.0, 0.25 + self.LEVER_RADIUS]
)
mujoco.mj_forward(self.model, self.data)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def reset_model(self):
mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "box")
] = pos_box
self._target_pos = pos_box + np.array([0.03, 0.0, 0.13])
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def reset_model(self):
self.obj_init_pos = self._get_site_pos("pegEnd")

self._target_pos = pos_plug + np.array([0.15, 0.0, 0.0])

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def reset_model(self):
self.obj_init_pos = pos_obj
self._set_obj_xyz(self.obj_init_pos)
self._target_pos = pos_goal

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def reset_model(self):
self.init_right_pad = self.get_body_com("rightpad")

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def reset_model(self):
self.obj_init_pos = goal_pos[:3]

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_back_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def reset_model(self):
self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]]))

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def reset_model(self):
self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]]))

self._set_obj_xyz(self.obj_init_pos)

self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
1 change: 1 addition & 0 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_push_wall_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def reset_model(self):
self.obj_init_pos = np.concatenate((goal_pos[:2], [self.obj_init_pos[-1]]))

self._set_obj_xyz(self.obj_init_pos)
self.model.site("goal").pos = self._target_pos
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
4 changes: 2 additions & 2 deletions metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_reach_v2.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import mujoco
import numpy as np
from gymnasium.spaces import Box
from scipy.spatial.transform import Rotation
Expand Down Expand Up @@ -113,7 +112,8 @@ def reset_model(self):
self._target_pos = goal_pos[-3:]
self.obj_init_pos = goal_pos[:3]
self._set_obj_xyz(self.obj_init_pos)
mujoco.mj_forward(self.model, self.data)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, actions, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def reset_model(self):
self.obj_init_pos = goal_pos[:3]

self._set_obj_xyz(self.obj_init_pos)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, actions, obs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def reset_model(self):
)

self._set_obj_xyz(self.obj_init_pos)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, action, obs):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_soccer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def reset_model(self):
self.maxPushDist = np.linalg.norm(
self.obj_init_pos[:2] - np.array(self._target_pos)[:2]
)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def reset_model(self):
self._set_stick_xyz(self.stick_init_pos)
self._set_obj_xyz(self.obj_init_qpos)
self.obj_init_pos = self.get_body_com("object").copy()

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _stick_is_inserted(self, handle, end_of_stick):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def reset_model(self):
self._set_stick_xyz(self.stick_init_pos)
self._set_obj_xyz(self.obj_init_qpos)
self.obj_init_pos = self.get_body_com("object").copy()

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def reset_model(self):
self.maxPushDist = np.linalg.norm(
self.obj_init_pos[:2] - np.array(self._target_pos)[:2]
)

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
2 changes: 1 addition & 1 deletion metaworld/envs/mujoco/sawyer_xyz/v2/sawyer_sweep_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def reset_model(self):
self.get_body_com("obj")[:-1] - self._target_pos[:-1]
)
self.target_reward = 1000 * self.maxPushDist + 1000 * 2

self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _gripper_caging_reward(self, action, obj_position, obj_radius):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def reset_model(self):
[0.2, 0.0, 0.0]
)
self.data.joint("window_slide").qpos = 0.2
mujoco.mj_forward(self.model, self.data)
self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def _reset_hand(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def reset_model(self):

self.window_handle_pos_init = self._get_pos_objects()
self.data.joint("window_slide").qpos = 0.0
mujoco.mj_forward(self.model, self.data)
self._set_pos_site("goal", self._target_pos)
return self._get_obs()

def compute_reward(self, actions, obs):
Expand Down
Loading