From 7b861a8bbdde325208da7f0eed8e6eb0f894cb6c Mon Sep 17 00:00:00 2001 From: Charles xu Date: Wed, 22 Nov 2023 18:06:34 -0800 Subject: [PATCH] added environment --- .gitignore | 2 +- robot_infra/env/__init__.py | 5 + robot_infra/env/franka_bin_pick_env.py | 300 ++++++++++++++++ robot_infra/env/franka_cable_env.py | 247 +++++++++++++ robot_infra/env/franka_pcb_env.py | 130 +++++++ robot_infra/env/franka_robotiq_env.py | 472 +++++++++++++++++++++++++ robot_infra/env/wrappers.py | 208 +++++++++++ 7 files changed, 1363 insertions(+), 1 deletion(-) create mode 100644 robot_infra/env/__init__.py create mode 100644 robot_infra/env/franka_bin_pick_env.py create mode 100644 robot_infra/env/franka_cable_env.py create mode 100644 robot_infra/env/franka_pcb_env.py create mode 100644 robot_infra/env/franka_robotiq_env.py create mode 100644 robot_infra/env/wrappers.py diff --git a/.gitignore b/.gitignore index 47ff034..7eec5c1 100644 --- a/.gitignore +++ b/.gitignore @@ -105,7 +105,7 @@ celerybeat.pid # Environments .env .venv -env/ +# env/ venv/ ENV/ env.bak/ diff --git a/robot_infra/env/__init__.py b/robot_infra/env/__init__.py new file mode 100644 index 0000000..06558f0 --- /dev/null +++ b/robot_infra/env/__init__.py @@ -0,0 +1,5 @@ +from env.franka_robotiq_env import FrankaRobotiq +from env.franka_pcb_env import PCBEnv +from env.franka_cable_env import RouteCableEnv +from env.franka_cable_env import ResetCableEnv +from env.franka_bin_pick_env import BinPickEnv \ No newline at end of file diff --git a/robot_infra/env/franka_bin_pick_env.py b/robot_infra/env/franka_bin_pick_env.py new file mode 100644 index 0000000..462f5a4 --- /dev/null +++ b/robot_infra/env/franka_bin_pick_env.py @@ -0,0 +1,300 @@ +import gym +from gym import spaces +import numpy as np +from franka_robotiq_env import FrankaRobotiq +import time +from scipy.spatial.transform import Rotation +import requests +import copy +import cv2 +from camera.video_capture import VideoCapture +from camera.rs_capture import RSCapture +import queue + +class BinPickEnv(FrankaRobotiq): + def __init__(self): + super().__init__() + # Bouding box + self.xyz_bounding_box = gym.spaces.Box( + np.array((0.44, -0.12, 0.04)), np.array((0.53, 0.12, 0.1)), dtype=np.float64 + ) + self.rpy_bounding_box = gym.spaces.Box( + # np.array((np.pi-0.001, 0-0.001, np.pi/4)), + # np.array((np.pi+0.001, 0+0.001, 3*np.pi/4)), + np.array((np.pi-0.001, 0-0.001, 0-0.01)), + np.array((np.pi+0.001, 0+0.001, 0+0.01)), + dtype=np.float64, + ) + self.inner_box = gym.spaces.Box( + np.array([0.44, -0.04, 0.04]), + np.array([0.53, 0.04, 0.08]), + dtype=np.float64 + ) + self.drop_box = gym.spaces.Box( + np.array([0.44, -0.04]), + np.array([0.53, 0.04]), + dtype=np.float64 + ) + ## Action/Observation Space + self.action_space = gym.spaces.Box( + np.array((-0.03, -0.03, -0.03, -0.05, -0.05, -0.2, -1)), + np.array((0.03, 0.03, 0.03, 0.05, 0.05, 0.2, 1)), + ) + # enable gripper in observation space + self.observation_space['state_observation']['gripper_pose'] = spaces.Box(-np.inf, np.inf, shape=(1,)) + self.centerpos = copy.deepcopy(self.resetpos) + self.centerpos[:3] = np.mean((self.xyz_bounding_box.high, self.xyz_bounding_box.low), axis=0) #np.array([0.55,-0.05,0.09]) + self.centerpos[2] += 0.01 + self.resetpos = copy.deepcopy(self.centerpos) + self.resetpos[3:] = self.euler_2_quat(np.pi, 0., 0) + + def go_to_rest(self, jpos=False): + count = 0 + requests.post(self.url + "precision_mode") + if jpos: + restp_new = copy.deepcopy(self.currpos) + restp_new[2] = 0.3 + dp = restp_new - self.currpos + count_1 = 0 + self._send_pos_command(self.currpos) + requests.post(self.url + "precision_mode") + while ( + (np.linalg.norm(dp[:3]) > 0.03 or np.linalg.norm(dp[3:]) > 0.04) + ) and count_1 < 50: + if np.linalg.norm(dp[3:]) > 0.05: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.03: + dp[:3] = 0.03 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count_1 += 1 + + print("JOINT RESET") + requests.post(self.url + "jointreset") + else: + # print("RESET") + self.update_currpos() + restp = copy.deepcopy(self.resetpos[:]) + if self.randomreset: + restp[:2] += np.random.uniform(-0.005, 0.005, (2,)) + restp[2] += np.random.uniform(-0.005, 0.005, (1,)) + # restyaw += np.random.uniform(-np.pi / 6, np.pi / 6) + # restp[3:] = self.euler_2_quat(np.pi, 0, restyaw) + + restp_new = copy.deepcopy(restp) + restp_new[2] = 0.13 #cable + dp = restp_new - self.currpos + + height = np.zeros_like(self.resetpos) + height[2] = 0.02 + while count < 10: + self._send_pos_command(self.currpos + height) + time.sleep(0.1) + self.update_currpos() + count += 1 + + count = 0 + while count < 200 and ( + np.linalg.norm(dp[:3]) > 0.01 or np.linalg.norm(dp[3:]) > 0.03 + ): + if np.linalg.norm(dp[3:]) > 0.02: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count += 1 + + dp = restp - self.currpos + count = 0 + while count < 20 and ( + np.linalg.norm(dp[:3]) > 0.01 or np.linalg.norm(dp[3:]) > 0.01 + ): + if np.linalg.norm(dp[3:]) > 0.05: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp - self.currpos + count += 1 + requests.post(self.url + "peg_compliance_mode") + return count < 50 + + def get_im(self): + images = {} + for key, cap in self.cap.items(): + try: + rgb = cap.read() + # images[key] = cv2.resize(rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + if key == 'wrist_1': + # cropped_rgb = rgb[ 100:400, 50:350, :] + cropped_rgb = rgb[:, 80:560, :] + if key == 'wrist_2': + # cropped_rgb = rgb[ 50:350, 200:500, :] #150:450 + cropped_rgb = rgb[:, 80:560, :] + # if key == 'side_1': + # cropped_rgb = rgb[150:330, 230:410, :] + + images[key] = cv2.resize(cropped_rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + # images[key] = cv2.resize(rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + images[key + "_full"] = rgb + # images[f"{key}_depth"] = depth + except queue.Empty: + input(f'{key} camera frozen. Check connect, then press enter to relaunch...') + cap.close() + # if key == 'side_1': + # cap = RSCapture(name='side_1', serial_number='128422270679', depth=True) + # elif key == 'side_2': + # cap = RSCapture(name='side_2', serial_number='127122270146', depth=True) + if key == 'wrist_1': + cap = RSCapture(name='wrist_1', serial_number='130322274175', depth=False) + elif key == 'wrist_2': + # cap = RSCapture(name='wrist_2', serial_number='127122270572', depth=False) + cap = RSCapture(name='wrist_2', serial_number='127122270572', depth=False) + elif key == 'side_1': + cap = RSCapture(name='side_1', serial_number='128422272758', depth=False) + else: + raise KeyError + self.cap[key] = VideoCapture(cap) + return self.get_im() + + self.img_queue.put(images) + return images + + def clip_safety_box(self, pose): + pose[:3] = np.clip( + pose[:3], self.xyz_bounding_box.low, self.xyz_bounding_box.high + ) + + euler = Rotation.from_quat(pose[3:]).as_euler("xyz") + old_sign = np.sign(euler[0]) + euler[0] = ( + np.clip( + euler[0] * old_sign, + self.rpy_bounding_box.low[0], + self.rpy_bounding_box.high[0], + ) + * old_sign + ) + euler[1:] = np.clip( + euler[1:], self.rpy_bounding_box.low[1:], self.rpy_bounding_box.high[1:] + ) + pose[3:] = Rotation.from_euler("xyz", euler).as_quat() + + # Clip xyz to inner box + if self.inner_box.contains(pose[:3]): + print(f'Command: {pose[:3]}') + pose[:3] = self.intersect_line_bbox(self.currpos[:3], pose[:3], self.inner_box.low, self.inner_box.high) + print(f'Clipped: {pose[:3]}') + + return pose + + def intersect_line_bbox(self, p1, p2, bbox_min, bbox_max): + # Define the parameterized line segment + # P(t) = p1 + t(p2 - p1) + tmin = 0 + tmax = 1 + + for i in range(3): + if p1[i] < bbox_min[i] and p2[i] < bbox_min[i]: + return None + if p1[i] > bbox_max[i] and p2[i] > bbox_max[i]: + return None + + # For each axis (x, y, z), compute t values at the intersection points + if abs(p2[i] - p1[i]) > 1e-10: # To prevent division by zero + t1 = (bbox_min[i] - p1[i]) / (p2[i] - p1[i]) + t2 = (bbox_max[i] - p1[i]) / (p2[i] - p1[i]) + + # Ensure t1 is smaller than t2 + if t1 > t2: + t1, t2 = t2, t1 + + tmin = max(tmin, t1) + tmax = min(tmax, t2) + + if tmin > tmax: + return None + + # Compute the intersection point using the t value + intersection = p1 + tmin * (p2 - p1) + + return intersection + + def step(self, action): + start_time = time.time() + action = np.clip(action, self.action_space.low, self.action_space.high) + if self.actionnoise > 0: + a = action[:3] + np.random.uniform( + -self.actionnoise, self.actionnoise, (3,) + ) + else: + a = action[:3] + + self.nextpos = self.currpos.copy() + self.nextpos[:3] = self.nextpos[:3] + a + + ### GET ORIENTATION FROM ACTION + self.nextpos[3:] = ( + Rotation.from_euler("xyz", action[3:6]) + * Rotation.from_quat(self.currpos[3:]) + ).as_quat() + + gripper = action[-1] + if gripper > 0: + if not self.drop_box.contains(self.currpos[:2]): + gripper = (self.currgrip + 1) % 2 + self.set_gripper(gripper) + + self._send_pos_command(self.clip_safety_box(self.nextpos)) + + self.curr_path_length += 1 + dl = time.time() - start_time + + time.sleep(max(0, (1.0 / self.hz) - dl)) + + self.update_currpos() + ob = self._get_obs() + obs_xyz = ob['state_observation']['tcp_pose'][:3] + obs_rpy = ob['state_observation']['tcp_pose'][3:] + reward = 0 + done = self.curr_path_length >= 40 #100 + # if not self.xyz_bounding_box.contains(obs_xyz) or not self.rpy_bounding_box.contains(obs_rpy): + # # print('Truncated: Bouding Box') + # print("xyz: ", self.xyz_bounding_box.contains(obs_xyz), obs_xyz) + # print("rortate: ", self.rpy_bounding_box.contains(obs_rpy), obs_rpy) + # return ob, 0, True, True, {} + return ob, int(reward), done, done, {} + + def reset(self, jpos=False, gripper=None, require_input=False): + self.cycle_count += 1 + if self.cycle_count % 1500 == 0: + self.cycle_count = 0 + jpos=True + + success = self.go_to_rest(jpos=jpos) + self.update_currpos() + self.curr_path_length = 0 + self.recover() + if jpos == True: + self.go_to_rest(jpos=False) + self.update_currpos() + self.recover() + + if require_input: + input("Reset Environment, Press Enter Once Complete: ") + # print("RESET COMPLETE") + requests.post(self.url + "open") + self.currgrip = 0 + time.sleep(1) + + self.update_currpos() + # self.last_quat = self.currpos[3:] + o = self._get_obs() + return o, {} \ No newline at end of file diff --git a/robot_infra/env/franka_cable_env.py b/robot_infra/env/franka_cable_env.py new file mode 100644 index 0000000..ad0d81f --- /dev/null +++ b/robot_infra/env/franka_cable_env.py @@ -0,0 +1,247 @@ +import gym +from gym import spaces +import numpy as np +from franka_robotiq_env import FrankaRobotiq +import time +from scipy.spatial.transform import Rotation +import requests +import copy +import cv2 +from camera.video_capture import VideoCapture +from camera.rs_capture import RSCapture +import queue + +class RouteCableEnv(FrankaRobotiq): + def __init__(self): + super().__init__() + # Bouding box + self.xyz_bounding_box = gym.spaces.Box( + np.array((0.51, -0.1, 0.04)), np.array((0.59, 0, 0.12)), dtype=np.float64 + ) + self.rpy_bounding_box = gym.spaces.Box( + np.array((np.pi-0.001, 0-0.001, np.pi/4)), + np.array((np.pi+0.001, 0+0.001, 3*np.pi/4)), + dtype=np.float64, + ) + ## Action/Observation Space + self.action_space = gym.spaces.Box( + np.array((-0.02, -0.02, -0.02, -0.05, -0.05, -0.1, -1)), + np.array((0.02, 0.02, 0.02, 0.05, 0.05, 0.1, 1)), + ) + # enable gripper in observation space + self.observation_space['state_observation']['gripper_pose'] = spaces.Box(-np.inf, np.inf, shape=(1,)) + # [0.48012088982197254,-0.07218941280725254,0.11078303293108258,0.6995269546628874,0.7134059993136379,0.028532587996196627,0.029996854262000595] + self.resetpos[:3] = np.array([0.55,-0.05,0.09]) + self.resetpos[3:] = self.euler_2_quat(np.pi, 0.03, np.pi/2) + + def go_to_rest(self, jpos=False): + count = 0 + requests.post(self.url + "precision_mode") + if jpos: + restp_new = copy.deepcopy(self.currpos) + restp_new[2] = 0.3 + dp = restp_new - self.currpos + count_1 = 0 + self._send_pos_command(self.currpos) + requests.post(self.url + "precision_mode") + while ( + (np.linalg.norm(dp[:3]) > 0.03 or np.linalg.norm(dp[3:]) > 0.04) + ) and count_1 < 50: + if np.linalg.norm(dp[3:]) > 0.05: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.03: + dp[:3] = 0.03 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count_1 += 1 + + print("JOINT RESET") + requests.post(self.url + "jointreset") + else: + # print("RESET") + self.update_currpos() + restp = copy.deepcopy(self.resetpos[:]) + if self.randomreset: + restp[:2] += np.random.uniform(-0.005, 0.005, (2,)) + restp[2] += np.random.uniform(-0.005, 0.005, (1,)) + # restyaw += np.random.uniform(-np.pi / 6, np.pi / 6) + # restp[3:] = self.euler_2_quat(np.pi, 0, restyaw) + + restp_new = copy.deepcopy(restp) + restp_new[2] = 0.15 #cable + dp = restp_new - self.currpos + + height = np.zeros_like(self.resetpos) + height[2] = 0.02 + while count < 10: + self._send_pos_command(self.currpos + height) + time.sleep(0.1) + self.update_currpos() + count += 1 + + count = 0 + while count < 200 and ( + np.linalg.norm(dp[:3]) > 0.01 or np.linalg.norm(dp[3:]) > 0.03 + ): + if np.linalg.norm(dp[3:]) > 0.02: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count += 1 + + dp = restp - self.currpos + count = 0 + while count < 20 and ( + np.linalg.norm(dp[:3]) > 0.01 or np.linalg.norm(dp[3:]) > 0.01 + ): + if np.linalg.norm(dp[3:]) > 0.05: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp - self.currpos + count += 1 + requests.post(self.url + "peg_compliance_mode") + return count < 50 + + def get_im(self): + images = {} + for key, cap in self.cap.items(): + try: + rgb = cap.read() + # images[key] = cv2.resize(rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + if key == 'wrist_1': + # cropped_rgb = rgb[ 100:400, 50:350, :] + cropped_rgb = rgb[:, 80:560, :] + if key == 'wrist_2': + # cropped_rgb = rgb[ 50:350, 200:500, :] #150:450 + cropped_rgb = rgb[:, 80:560, :] + # if key == 'side_1': + # cropped_rgb = rgb[150:330, 230:410, :] + + images[key] = cv2.resize(cropped_rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + # images[key] = cv2.resize(rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + images[key + "_full"] = rgb + # images[f"{key}_depth"] = depth + except queue.Empty: + input(f'{key} camera frozen. Check connect, then press enter to relaunch...') + cap.close() + # if key == 'side_1': + # cap = RSCapture(name='side_1', serial_number='128422270679', depth=True) + # elif key == 'side_2': + # cap = RSCapture(name='side_2', serial_number='127122270146', depth=True) + if key == 'wrist_1': + cap = RSCapture(name='wrist_1', serial_number='130322274175', depth=False) + elif key == 'wrist_2': + # cap = RSCapture(name='wrist_2', serial_number='127122270572', depth=False) + cap = RSCapture(name='wrist_2', serial_number='127122270572', depth=False) + elif key == 'side_1': + cap = RSCapture(name='side_1', serial_number='128422272758', depth=False) + else: + raise KeyError + self.cap[key] = VideoCapture(cap) + return self.get_im() + + self.img_queue.put(images) + return images + + def step(self, action): + start_time = time.time() + action = np.clip(action, self.action_space.low, self.action_space.high) + if self.actionnoise > 0: + a = action[:3] + np.random.uniform( + -self.actionnoise, self.actionnoise, (3,) + ) + else: + a = action[:3] + + self.nextpos = self.currpos.copy() + self.nextpos[:3] = self.nextpos[:3] + a + + ### GET ORIENTATION FROM ACTION + self.nextpos[3:] = ( + Rotation.from_euler("xyz", action[3:6]) + * Rotation.from_quat(self.currpos[3:]) + ).as_quat() + + self._send_pos_command(self.clip_safety_box(self.nextpos)) + # only change the gripper if the action is above a threshold, either open or close + if len(action) == 7: + if action[-1] > 0.8: + self.set_gripper(1) + elif action[-1] < -0.8: + self.set_gripper(0) + + self.curr_path_length += 1 + dl = time.time() - start_time + + time.sleep(max(0, (1.0 / self.hz) - dl)) + + self.update_currpos() + ob = self._get_obs() + obs_xyz = ob['state_observation']['tcp_pose'][:3] + obs_rpy = ob['state_observation']['tcp_pose'][3:] + reward = 0 + done = self.curr_path_length >= 30 #100 + # if not self.xyz_bounding_box.contains(obs_xyz) or not self.rpy_bounding_box.contains(obs_rpy): + # # print('Truncated: Bouding Box') + # print("xyz: ", self.xyz_bounding_box.contains(obs_xyz), obs_xyz) + # print("rortate: ", self.rpy_bounding_box.contains(obs_rpy), obs_rpy) + # return ob, 0, True, True, {} + return ob, int(reward), done, done, {} + + def reset(self, jpos=False, gripper=None, require_input=False): + self.cycle_count += 1 + if self.cycle_count % 1500 == 0: + self.cycle_count = 0 + jpos=True + # requests.post(self.url + "reset_gripper") + # time.sleep(3) + # self.set_gripper(self.start_gripper, block=False) + self.currgrip = self.start_gripper + + success = self.go_to_rest(jpos=jpos) + self.update_currpos() + self.curr_path_length = 0 + self.recover() + if jpos == True: + self.go_to_rest(jpos=False) + self.update_currpos() + self.recover() + + if require_input: + input("Reset Environment, Press Enter Once Complete: ") + # print("RESET COMPLETE") + self.update_currpos() + # self.last_quat = self.currpos[3:] + o = self._get_obs() + return o, {} + + +class ResetCableEnv(FrankaRobotiq): + def __init__(self): + super().__init__() + # Bouding box + self.xyz_bounding_box = gym.spaces.Box( + np.array((0.62, 0.0, 0.05)), np.array((0.71, 0.08, 0.3)), dtype=np.float64 + ) + self.rpy_bounding_box = gym.spaces.Box( + np.array((np.pi-0.1, -0.1, 1.35)), + np.array((np.pi+0.1, 0.1, 1.7)), + dtype=np.float64, + ) + ## Action/Observation Space + self.action_space = gym.spaces.Box( + np.array((-0.02, -0.02, -0.02, -0.05, -0.05, -0.05, 0 - 1e-8)), + np.array((0.02, 0.02, 0.02, 0.05, 0.05, 0.05, 1 + 1e-8)), + ) + # self.resetpos[:3] = np.array([0.645, 0.17, 0.07]) + # self.resetpos[3:] = self.euler_2_quat(np.pi, 0.03, 0) \ No newline at end of file diff --git a/robot_infra/env/franka_pcb_env.py b/robot_infra/env/franka_pcb_env.py new file mode 100644 index 0000000..df0d716 --- /dev/null +++ b/robot_infra/env/franka_pcb_env.py @@ -0,0 +1,130 @@ +import gym +from gym import spaces +import numpy as np +# from franka.scripts.spacemouse_teleop import SpaceMouseExpert +import time +from franka_robotiq_env import FrankaRobotiq +import copy +import requests + +class PCBEnv(FrankaRobotiq): + def __init__(self): + + super().__init__() + self._TARGET_POSE = [0.6479450830785974,0.17181947852969695,0.056419218166284224, 3.1415, 0.0, 0.0 ] + self._REWARD_THRESHOLD = [0.005, 0.005, 0.0006, 0.03, 0.03, 0.05] + self.observation_space = spaces.Dict( + { + "state_observation": spaces.Dict( + { + "tcp_pose": spaces.Box(-np.inf, np.inf, shape=(6,)), # xyz + euler + "tcp_vel": spaces.Box(-np.inf, np.inf, shape=(6,)), + "tcp_force": spaces.Box(-np.inf, np.inf, shape=(3,)), + "tcp_torque": spaces.Box(-np.inf, np.inf, shape=(3,)), + } + ), + "image_observation": spaces.Dict( + { + "wrist_1": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + "wrist_1_full": spaces.Box(0, 255, shape=(480, 640, 3), dtype=np.uint8), + "wrist_2": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + "wrist_2_full": spaces.Box(0, 255, shape=(480, 640, 3), dtype=np.uint8), + } + ), + } + ) + self.action_space = gym.spaces.Box( + np.array((-0.01, -0.01, -0.01, -0.05, -0.05, -0.05)), + np.array((0.01, 0.01, 0.01, 0.05, 0.05, 0.05)) + ) + self.xyz_bounding_box = gym.spaces.Box( + np.array((0.62, 0.15, 0.03)), + np.array((0.67, 0.19, 0.09)), + dtype=np.float64 + ) + self.rpy_bounding_box = gym.spaces.Box( + np.array((np.pi-0.15, -0.05, -0.1)), + np.array((np.pi+0.1, 0.15, 0.1)), + dtype=np.float64 + ) + self.resetpos[:3] = np.array([0.645, 0.17, 0.07]) + self.resetpos[3:] = self.euler_2_quat(np.pi, 0.03, 0) + self.episodes = 1 + self.randomreset = False + + def _get_state(self): + state = super()._get_state() + state.pop('gripper_pose') + return state + + def go_to_rest(self, jpos=False): + count = 0 + if self.currpos[2] < 0.06: + restp_new = copy.deepcopy(self.currpos) + restp_new[2] += 0.02 + dp = restp_new - self.currpos + while count < 200 and ( + np.linalg.norm(dp[:3]) > 0.01 or np.linalg.norm(dp[3:]) > 0.03 + ): + if np.linalg.norm(dp[3:]) > 0.02: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count += 1 + + + requests.post(self.url + "precision_mode") + if jpos: + restp_new = copy.deepcopy(self.currpos) + restp_new[2] = 0.2 + dp = restp_new - self.currpos + count_1 = 0 + self._send_pos_command(self.currpos) + requests.post(self.url + "precision_mode") + while ( + (np.linalg.norm(dp[:3]) > 0.03 or np.linalg.norm(dp[3:]) > 0.04) + ) and count_1 < 50: + if np.linalg.norm(dp[3:]) > 0.05: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.03: + dp[:3] = 0.03 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count_1 += 1 + + print("JOINT RESET") + requests.post(self.url + "jointreset") + else: + # print("RESET") + restp = copy.deepcopy(self.resetpos[:]) + if self.randomreset: + restp[:2] += np.random.uniform(-0.005, 0.005, (2,)) + restp[2] += np.random.uniform(-0.005, 0.005, (1,)) + # restyaw += np.random.uniform(-np.pi / 6, np.pi / 6) + # restp[3:] = self.euler_2_quat(np.pi, 0, restyaw) + + restp_new = copy.deepcopy(restp) + restp_new[2] = 0.07 #PCB + self.update_currpos() + dp = restp_new - self.currpos + while count < 200 and ( + np.linalg.norm(dp[:3]) > 0.005 or np.linalg.norm(dp[3:]) > 0.03 + ): + if np.linalg.norm(dp[3:]) > 0.02: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count += 1 + + requests.post(self.url + "pcb_compliance_mode") + return count < 200 \ No newline at end of file diff --git a/robot_infra/env/franka_robotiq_env.py b/robot_infra/env/franka_robotiq_env.py new file mode 100644 index 0000000..0a7540e --- /dev/null +++ b/robot_infra/env/franka_robotiq_env.py @@ -0,0 +1,472 @@ +"""Gym Interface for Franka""" +import numpy as np +import gym +from pyquaternion import Quaternion +import cv2 +import copy +from scipy.spatial.transform import Rotation +import time +import requests +from gym import core, spaces +from camera.video_capture import VideoCapture +from camera.rs_capture import RSCapture +import queue +from PIL import Image +from queue import Queue +import threading +import os + +class ImageDisplayer(threading.Thread): + def __init__(self, queue): + threading.Thread.__init__(self) + self.queue = queue + self.stop_signal = False + self.daemon = True # make this a daemon thread + + self.video = [] + + video_dir = '/home/undergrad/franka_fwbw_pick_screw_vice_ckpts' + os.makedirs(video_dir, exist_ok=True) + uuid = time.strftime("%Y%m%d-%H%M%S") + self.wrist1 = cv2.VideoWriter(os.path.join(video_dir, f'wrist_1_{uuid}.mp4'), cv2.VideoWriter_fourcc(*'mp4v'), 24, (640, 480)) + self.wrist2 = cv2.VideoWriter(os.path.join(video_dir, f'wrist_2_{uuid}.mp4'), cv2.VideoWriter_fourcc(*'mp4v'), 24, (640, 480)) + self.frame_counter = 0 + + def run(self): + while True: + img_array = self.queue.get() # retrieve an image from the queue + if img_array is None: # None is our signal to exit + break + # pair1 = np.concatenate([img_array['wrist_1_full'], img_array['wrist_2_full']], axis=0) + pair1 = np.concatenate([img_array['wrist_1'], img_array['wrist_2']], axis=0) + # pair1 = np.concatenate([img_array['wrist_1'], img_array['wrist_2'], img_array['side_1']], axis=0) + # pair2 = np.concatenate([img_array['side_2_full'], img_array['side_1_full']], axis=0) + # concatenated = np.concatenate([pair1, pair2], axis=1) + cv2.imshow('wrist', pair1/255.) + cv2.waitKey(1) + + self.wrist1.write(img_array['wrist_1_full']) + self.wrist2.write(img_array['wrist_2_full']) + self.frame_counter += 1 + if self.frame_counter == 400: + self.wrist1.release() + self.wrist2.release() + + +class FrankaRobotiq(gym.Env): + def __init__( + self, + randomReset=False, + hz=10, + start_gripper=0, + ): + + self._TARGET_POSE = [0.6636488814118523,0.05388642290645651,0.09439445897864279, 3.1339503, 0.009167, 1.5550434] + self._REWARD_THRESHOLD = [0.01, 0.01, 0.01, 0.2, 0.2, 0.2] + self.resetpos = np.zeros(7) + + self.resetpos[:3] = self._TARGET_POSE[:3] + self.resetpos[2] += 0.07 + self.resetpos[3:] = self.euler_2_quat(self._TARGET_POSE[3], self._TARGET_POSE[4], self._TARGET_POSE[5]) + + self.currpos = self.resetpos.copy() + self.currvel = np.zeros((6,)) + self.q = np.zeros((7,)) + self.dq = np.zeros((7,)) + self.currforce = np.zeros((3,)) + self.currtorque = np.zeros((3,)) + self.currjacobian = np.zeros((6, 7)) + self.start_gripper = start_gripper + self.currgrip = self.start_gripper #start_gripper + self.lastsent = time.time() + self.randomreset = randomReset + self.actionnoise = 0 + self.hz = hz + + ## NUC + self.ip = "127.0.0.1" + self.url = "http://" + self.ip + ":5000/" + + # Bouding box + self.xyz_bounding_box = gym.spaces.Box( + np.array((0.62, 0.0, 0.05)), np.array((0.71, 0.08, 0.3)), dtype=np.float64 + ) + self.rpy_bounding_box = gym.spaces.Box( + np.array((np.pi-0.1, -0.1, 1.35)), + np.array((np.pi+0.1, 0.1, 1.7)), + dtype=np.float64, + ) + ## Action/Observation Space + self.action_space = gym.spaces.Box( + np.array((-0.02, -0.02, -0.02, -0.05, -0.05, -0.05, 0 - 1e-8)), + np.array((0.02, 0.02, 0.02, 0.05, 0.05, 0.05, 1 + 1e-8)), + ) + + self.observation_space = spaces.Dict( + { + "state_observation": spaces.Dict( + { + # "tcp_pose": spaces.Box(-np.inf, np.inf, shape=(7,)), # xyz + quat + "tcp_pose": spaces.Box(-np.inf, np.inf, shape=(6,)), # xyz + euler + "tcp_vel": spaces.Box(-np.inf, np.inf, shape=(6,)), + "gripper_pose": spaces.Box(-1, 1, shape=(1,)), + # "q": spaces.Box(-np.inf, np.inf, shape=(7,)), + # "dq": spaces.Box(-np.inf, np.inf, shape=(7,)), + "tcp_force": spaces.Box(-np.inf, np.inf, shape=(3,)), + "tcp_torque": spaces.Box(-np.inf, np.inf, shape=(3,)), + # "jacobian": spaces.Box(-np.inf, np.inf, shape=((6, 7))), + } + ), + "image_observation": spaces.Dict( + { + "wrist_1": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + "wrist_1_full": spaces.Box(0, 255, shape=(480, 640, 3), dtype=np.uint8), + "wrist_2": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + "wrist_2_full": spaces.Box(0, 255, shape=(480, 640, 3), dtype=np.uint8), + # "side_1": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + # "side_1_full": spaces.Box(0, 255, shape=(480, 640, 3), dtype=np.uint8), + } + ), + } + ) + self.cycle_count = 0 + self.cap_wrist_1 = VideoCapture(RSCapture(name='wrist_1', serial_number='130322274175', depth=False)) + self.cap_wrist_2 = VideoCapture(RSCapture(name='wrist_2', serial_number='127122270572', depth=False)) + # self.cap_side_1 = VideoCapture(RSCapture(name='side_1', serial_number='128422272758', depth=False)) + + # self.cap_side_1 = VideoCapture( + # RSCapture(name="side_1", serial_number="128422270679", depth=True) + # ) + # self.cap_side_2 = VideoCapture( + # RSCapture(name="side_2", serial_number="127122270146", depth=True) + # ) + self.cap = { + # "side_1": self.cap_side_1, + # "side_2": self.cap_side_2, + "wrist_1": self.cap_wrist_1, + "wrist_2": self.cap_wrist_2, + } + + self.img_queue = queue.Queue() + self.displayer = ImageDisplayer(self.img_queue) + self.displayer.start() + print("Initialized Franka") + + def recover(self): + requests.post(self.url + "clearerr") + + def _send_pos_command(self, pos): + self.recover() + arr = np.array(pos).astype(np.float32) + data = {"arr": arr.tolist()} + requests.post(self.url + "pose", json=data) + + def update_currpos(self): + ps = requests.post(self.url + "getstate").json() + self.currpos[:] = np.array(ps["pose"]) + self.currvel[:] = np.array(ps["vel"]) + + self.currforce[:] = np.array(ps["force"]) + self.currtorque[:] = np.array(ps["torque"]) + self.currjacobian[:] = np.reshape(np.array(ps["jacobian"]), (6, 7)) + + self.q[:] = np.array(ps["q"]) + self.dq[:] = np.array(ps["dq"]) + + def set_gripper(self, position, block=True): + if position == 1: + st = 'close' + elif position == 0: + st = 'open' + else: + raise ValueError(f'Gripper position {position} not supported') + + ### IMPORTANT, IF FRANKA GRIPPER GETS OPEN/CLOSE COMMANDS TOO QUICKLY IT WILL FREEZE + ### THIS MAKES SURE CONSECUTIVE GRIPPER CHANGES ONLY HAPPEN 1 SEC APART + now = time.time() + delta = now - self.lastsent + if delta >= 1: + requests.post(self.url + st) + self.lastsent = time.time() + self.currgrip = position + # time.sleep(max(0, 1.5 - delta)) + + + def clip_safety_box(self, pose): + pose[:3] = np.clip( + pose[:3], self.xyz_bounding_box.low, self.xyz_bounding_box.high + ) + euler = Rotation.from_quat(pose[3:]).as_euler("xyz") + old_sign = np.sign(euler[0]) + euler[0] = ( + np.clip( + euler[0] * old_sign, + self.rpy_bounding_box.low[0], + self.rpy_bounding_box.high[0], + ) + * old_sign + ) + euler[1:] = np.clip( + euler[1:], self.rpy_bounding_box.low[1:], self.rpy_bounding_box.high[1:] + ) + + pose[3:] = Rotation.from_euler("xyz", euler).as_quat() + + return pose + + def move_to_pos(self, pos): + start_time = time.time() + self._send_pos_command(self.clip_safety_box(pos)) + dl = time.time() - start_time + time.sleep(max(0, (1.0 / self.hz) - dl)) + self.update_currpos() + obs = self._get_obs() + return obs + + def step(self, action): + start_time = time.time() + action = np.clip(action, self.action_space.low, self.action_space.high) + if self.actionnoise > 0: + a = action[:3] + np.random.uniform( + -self.actionnoise, self.actionnoise, (3,) + ) + else: + a = action[:3] + + self.nextpos = self.currpos.copy() + self.nextpos[:3] = self.nextpos[:3] + a + + ### GET ORIENTATION FROM ACTION + self.nextpos[3:] = ( + Rotation.from_euler("xyz", action[3:6]) + * Rotation.from_quat(self.currpos[3:]) + ).as_quat() + + # self.nextpos = self.clip_safety_box(self.nextpos) + # self._send_pos_command(self.nextpos) + self._send_pos_command(self.clip_safety_box(self.nextpos)) + # self.set_gripper(action[-1]) + + self.curr_path_length += 1 + dl = time.time() - start_time + + time.sleep(max(0, (1.0 / self.hz) - dl)) + + self.update_currpos() + ob = self._get_obs() + obs_xyz = ob['state_observation']['tcp_pose'][:3] + # obs_rpy = self.quat_2_euler(ob['state_observation']['tcp_pose'][3:7]) + obs_rpy = ob['state_observation']['tcp_pose'][3:] + reward = self.binary_reward_tcp(ob['state_observation']['tcp_pose']) + done = self.curr_path_length >= 100 + # if not self.xyz_bounding_box.contains(obs_xyz) or not self.rpy_bounding_box.contains(obs_rpy): + # # print('Truncated: Bouding Box') + # # print("xyz: ", self.xyz_bounding_box.contains(obs_xyz), obs_xyz) + # # print("rortate: ", self.rpy_bounding_box.contains(obs_rpy), obs_rpy) + # return ob, 0, True, True, {} + # return ob, int(reward), done or reward, done, {} + return ob, int(reward), done, done, {} + + + def binary_reward_tcp(self, current_pose,): + # euler_angles = np.abs(R.from_quat(current_pose[3:]).as_euler("xyz")) + euler_angles = np.abs(current_pose[3:]) + current_pose = np.hstack([current_pose[:3],euler_angles]) + delta = np.abs(current_pose - self._TARGET_POSE) + if np.all(delta < self._REWARD_THRESHOLD): + return True + else: + # print(f'Goal not reached, the difference is {delta}, the desired threshold is {_REWARD_THRESHOLD}') + return False + + def get_im(self): + images = {} + for key, cap in self.cap.items(): + try: + rgb = cap.read() + # images[key] = cv2.resize(rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + # if key == 'wrist_1': + # cropped_rgb = rgb[ 0:300, 150:450, :] + # if key == 'wrist_2': + # cropped_rgb = rgb[ 50:350, 150:450, :] + if key == 'wrist_1': + cropped_rgb = rgb[:, 80:560, :] + if key == 'wrist_2': + cropped_rgb = rgb[:, 80:560, :] + images[key] = cv2.resize(cropped_rgb, self.observation_space['image_observation'][key].shape[:2][::-1]) + images[key + "_full"] = rgb + # images[f"{key}_depth"] = depth + except queue.Empty: + input(f'{key} camera frozen. Check connect, then press enter to relaunch...') + cap.close() + # if key == 'side_1': + # cap = RSCapture(name='side_1', serial_number='128422270679', depth=True) + # elif key == 'side_2': + # cap = RSCapture(name='side_2', serial_number='127122270146', depth=True) + if key == 'wrist_1': + cap = RSCapture(name='wrist_1', serial_number='130322274175', depth=False) + elif key == 'wrist_2': + cap = RSCapture(name='wrist_2', serial_number='127122270572', depth=False) + else: + raise KeyError + self.cap[key] = VideoCapture(cap) + return self.get_im() + + self.img_queue.put(images) + return images + + def _get_state(self): + state_observation = { + "tcp_pose": np.concatenate((self.currpos[:3], self.quat_2_euler(self.currpos[3:]))), + "tcp_vel": self.currvel, + "gripper_pose": self.currgrip, + # "q": self.q, + # "dq": self.dq, + "tcp_force": self.currforce, + "tcp_torque": self.currtorque, + # "jacobian": self.currjacobian, + } + return state_observation + + def _get_obs(self): + images = self.get_im() + state_observation = self._get_state() + + return copy.deepcopy(dict( + image_observation=images, + state_observation=state_observation + )) + + def go_to_rest(self, jpos=False): + count = 0 + requests.post(self.url + "precision_mode") + if jpos: + restp_new = copy.deepcopy(self.currpos) + restp_new[2] = 0.3 + dp = restp_new - self.currpos + count_1 = 0 + self._send_pos_command(self.currpos) + requests.post(self.url + "precision_mode") + while ( + (np.linalg.norm(dp[:3]) > 0.03 or np.linalg.norm(dp[3:]) > 0.04) + ) and count_1 < 50: + if np.linalg.norm(dp[3:]) > 0.05: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.03: + dp[:3] = 0.03 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count_1 += 1 + + print("JOINT RESET") + requests.post(self.url + "jointreset") + else: + # print("RESET") + self.update_currpos() + restp = copy.deepcopy(self.resetpos[:]) + if self.randomreset: + restp[:2] += np.random.uniform(-0.005, 0.005, (2,)) + restp[2] += np.random.uniform(-0.005, 0.005, (1,)) + # restyaw += np.random.uniform(-np.pi / 6, np.pi / 6) + # restp[3:] = self.euler_2_quat(np.pi, 0, restyaw) + + restp_new = copy.deepcopy(restp) + restp_new[2] = 0.2 #PEG + dp = restp_new - self.currpos + while count < 200 and ( + np.linalg.norm(dp[:3]) > 0.01 or np.linalg.norm(dp[3:]) > 0.03 + ): + if np.linalg.norm(dp[3:]) > 0.02: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp_new - self.currpos + count += 1 + + dp = restp - self.currpos + count = 0 + while count < 20 and ( + np.linalg.norm(dp[:3]) > 0.01 or np.linalg.norm(dp[3:]) > 0.01 + ): + if np.linalg.norm(dp[3:]) > 0.05: + dp[3:] = 0.05 * dp[3:] / np.linalg.norm(dp[3:]) + if np.linalg.norm(dp[:3]) > 0.02: + dp[:3] = 0.02 * dp[:3] / np.linalg.norm(dp[:3]) + self._send_pos_command(self.currpos + dp) + time.sleep(0.1) + self.update_currpos() + dp = restp - self.currpos + count += 1 + requests.post(self.url + "peg_compliance_mode") + return count < 50 + + def reset(self, jpos=False, gripper=None, require_input=False): + self.cycle_count += 1 + if self.cycle_count % 150 == 0: + self.cycle_count = 0 + jpos=True + # requests.post(self.url + "reset_gripper") + # time.sleep(3) + # self.set_gripper(self.start_gripper, block=False) + self.currgrip = self.start_gripper + + success = self.go_to_rest(jpos=jpos) + self.update_currpos() + self.curr_path_length = 0 + self.recover() + if jpos == True: + self.go_to_rest(jpos=False) + self.update_currpos() + self.recover() + + if require_input: + input("Reset Environment, Press Enter Once Complete: ") + # print("RESET COMPLETE") + self.update_currpos() + # self.last_quat = self.currpos[3:] + o = self._get_obs() + return o, {} + + def quat_2_euler(self, quat): + # calculates and returns: yaw, pitch, roll from given quaternion + if not isinstance(quat, Quaternion): + quat = Quaternion(quat) + yaw, pitch, roll = quat.yaw_pitch_roll + return yaw + np.pi, pitch, roll + + def euler_2_quat(self, yaw=np.pi / 2, pitch=0.0, roll=np.pi): + yaw = np.pi - yaw + yaw_matrix = np.array( + [ + [np.cos(yaw), -np.sin(yaw), 0.0], + [np.sin(yaw), np.cos(yaw), 0.0], + [0, 0, 1.0], + ] + ) + pitch_matrix = np.array( + [ + [np.cos(pitch), 0.0, np.sin(pitch)], + [0.0, 1.0, 0.0], + [-np.sin(pitch), 0, np.cos(pitch)], + ] + ) + roll_matrix = np.array( + [ + [1.0, 0, 0], + [0, np.cos(roll), -np.sin(roll)], + [0, np.sin(roll), np.cos(roll)], + ] + ) + rot_mat = yaw_matrix.dot(pitch_matrix.dot(roll_matrix)) + return Quaternion(matrix=rot_mat).elements + + def close_camera(self): + # self.cap_top.close() + # self.cap_side.close() + self.cap_wrist_2.close() + self.cap_wrist_1.close() \ No newline at end of file diff --git a/robot_infra/env/wrappers.py b/robot_infra/env/wrappers.py new file mode 100644 index 0000000..af95a0b --- /dev/null +++ b/robot_infra/env/wrappers.py @@ -0,0 +1,208 @@ +import time +from gym import Env, spaces +import gym +import numpy as np +from gym.spaces import Box +import copy +from robot_infra.spacemouse.spacemouse_teleop import SpaceMouseExpert + + +class ProxyEnv(Env): + def __init__(self, wrapped_env): + self._wrapped_env = wrapped_env + self.action_space = self._wrapped_env.action_space + self.observation_space = self._wrapped_env.observation_space + + @property + def wrapped_env(self): + return self._wrapped_env + + def reset(self, **kwargs): + return self._wrapped_env.reset(**kwargs) + + def step(self, action): + return self._wrapped_env.step(action) + + def render(self, *args, **kwargs): + return self._wrapped_env.render(*args, **kwargs) + + @property + def horizon(self): + return self._wrapped_env.horizon + + def terminate(self): + if hasattr(self.wrapped_env, "terminate"): + self.wrapped_env.terminate() + + def seed(self, _seed): + return self.wrapped_env.seed(_seed) + + def __getattr__(self, attr): + if attr == '_wrapped_env': + raise AttributeError() + if attr == 'planner': + return self._planner + if attr == 'set_vf': + return self.set_vf + return getattr(self._wrapped_env, attr) + # try: + # getattr(self, attr) + # except Exception: + # return getattr(self._wrapped_env, attr) + + def __getstate__(self): + """ + This is useful to override in case the wrapped env has some funky + __getstate__ that doesn't play well with overriding __getattr__. + + The main problematic case is/was gym's EzPickle serialization scheme. + :return: + """ + return self.__dict__ + + def __setstate__(self, state): + self.__dict__.update(state) + + def __str__(self): + return '{}({})'.format(type(self).__name__, self.wrapped_env) + +class GripperCloseEnv(ProxyEnv): + def __init__( + self, + env, + ): + ProxyEnv.__init__(self, env) + ub = self._wrapped_env.action_space + assert ub.shape == (7,) + self.action_space = Box(ub.low[:6], ub.high[:6]) + self.observation_space = spaces.Dict( + { + "state_observation": spaces.Dict( + { + "tcp_pose": spaces.Box(-np.inf, np.inf, shape=(6,)), # xyz + euler + "tcp_vel": spaces.Box(-np.inf, np.inf, shape=(6,)), + "tcp_force": spaces.Box(-np.inf, np.inf, shape=(3,)), + "tcp_torque": spaces.Box(-np.inf, np.inf, shape=(3,)), + } + ), + "image_observation": spaces.Dict( + { + "wrist_1": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + "wrist_1_full": spaces.Box(0, 255, shape=(480, 640, 3), dtype=np.uint8), + "wrist_2": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + "wrist_2_full": spaces.Box(0, 255, shape=(480, 640, 3), dtype=np.uint8), + } + ), + } + ) + + def step(self, action): + a = np.zeros(self._wrapped_env.action_space.shape) + a[:6] = copy.deepcopy(action) + a[6] = 1 + return self._wrapped_env.step(a) + +class SpacemouseIntervention(ProxyEnv): + def __init__(self, env, gripper_enabled=False): + ProxyEnv.__init__(self, env) + self._wrapped_env = env + self.action_space = self._wrapped_env.action_space + self.gripper_enabled = gripper_enabled + if self.gripper_enabled: + assert self.action_space.shape == (7,) # maybe not so elegant + self.observation_space = self._wrapped_env.observation_space + self.expert = SpaceMouseExpert( + xyz_dims=3, + xyz_remap=[0, 1, 2], + xyz_scale=200, + rot_scale=200, + all_angles=True + ) + self.last_intervene = 0 + + def expert_action(self, action): + ''' + Input: + - action: policy action + Output: + - action: spacemouse action if nonezero; else, policy action + ''' + controller_a, _, left, right = self.expert.get_action() + expert_a = np.zeros((6,)) + if self.gripper_enabled: + expert_a = np.zeros((7,)) + expert_a[-1] = np.random.uniform(-1, 0) + + expert_a[:3] = controller_a[:3] # XYZ + expert_a[3] = controller_a[4] # Roll + expert_a[4] = controller_a[5] # Pitch + expert_a[5] = -controller_a[6] # Yaw + + if self.gripper_enabled: + if left: + expert_a[6] = np.random.uniform(0, 1) + self.last_intervene = time.time() + + if np.linalg.norm(expert_a[:6]) > 0.001: + self.last_intervene = time.time() + else: + if np.linalg.norm(expert_a) > 0.001: + self.last_intervene = time.time() + + if time.time() - self.last_intervene < 0.5: + return expert_a, left, right + return action, left, right + + def step(self, action): + expert_action, left, right = self.expert_action(action) + o, r, done, truncated, env_info = self._wrapped_env.step(expert_action) + env_info['expert_action'] = expert_action + env_info['right'] = right + return o, r, done, truncated, env_info + +class FourDoFWrapper(gym.ActionWrapper): + def __init__(self, env: Env): + super().__init__(env) + + def action(self, action): + a = np.zeros(4) + a[:3] = action[:3] + a[-1] = action[-1] + return a + +class TwoCameraFrankaWrapper(gym.ObservationWrapper): + def __init__(self, env): + ProxyEnv.__init__(self, env) + self.env = env + self.observation_space = spaces.Dict( + { + "state": spaces.flatten_space(self.env.observation_space['state_observation']), + "wrist_1": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + "wrist_2": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + # "side_1": spaces.Box(0, 255, shape=(128, 128, 3), dtype=np.uint8), + } + ) + + def observation(self, obs): + ob = { + 'state': spaces.flatten(self.env.observation_space['state_observation'], + obs['state_observation']), + 'wrist_1': obs['image_observation']['wrist_1'][...,::-1], # flip color channel + 'wrist_2': obs['image_observation']['wrist_2'][...,::-1], # flip color channel + # 'side_1': obs['image_observation']['side_1'][...,::-1], # flip color channel + } + return ob + +class ResetFreeWrapper(gym.Wrapper): + def __init__(self, env): + super().__init__(env) + self.task_id = 0 # 0: place into silver bin, 1: place into brown bin + + def reset(self, task_id=0): + self.task_id = task_id + print(f'reset to task {self.task_id}') + if self.task_id == 0: + self.resetpos[1] = self.centerpos[1] + 0.1 + else: + self.resetpos[1] = self.centerpos[1] - 0.1 + return self.env.reset() \ No newline at end of file