You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm trying to evaluate an octo-based policy in several simpler environments in parallel to accelerate the evaluation process. I generally use python's built-in multiprocessing. A minimal code example is as below:
import numpy as np
from multiprocessing import Process, Pipe
import simpler_env
def worker(remote, parent_remote, env_name):
parent_remote.close() # Close the parent end of the pipe
env = simpler_env.make(env_name)
while True:
cmd, data = remote.recv()
if cmd == 'step':
obs, reward, success, truncated, info = env.step(data)
# if done:
# obs = env.reset()
remote.send((obs, reward, success, truncated, info))
elif cmd == 'reset':
obs, reset_info = env.reset()
remote.send((obs, reset_info))
elif cmd == 'close':
env.close()
remote.close()
break
class ParallelEnvs:
def __init__(self, env_name, num_envs):
self.num_envs = num_envs
self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(num_envs)])
self.processes = [Process(target=worker, args=(work_remote, remote, env_name))
for (work_remote, remote) in zip(self.work_remotes, self.remotes)]
for p in self.processes:
p.start()
for work_remote in self.work_remotes:
work_remote.close() # Close the worker end in the main process
def step(self, actions):
for remote, action in zip(self.remotes, actions):
remote.send(('step', action))
results = [remote.recv() for remote in self.remotes]
obs, reward, success, truncated, info = zip(*results)
return obs, reward, success, truncated, info
def reset(self):
for remote in self.remotes:
remote.send(('reset', None))
obs, reset_info = [remote.recv() for remote in self.remotes]
return obs, reset_info
def close(self):
for remote in self.remotes:
remote.send(('close', None))
for p in self.processes:
p.join()
if __name__ == "__main__":
env_name = "google_robot_pick_coke_can"
num_envs = 4 # Number of parallel environments
num_steps = 100
dummy_env = simpler_env.make(env_name)
# Initialize parallel environments
envs = ParallelEnvs(env_name, num_envs)
# Reset all environments
obs, reset_info = envs.reset()
breakpoint()
for step in range(num_steps):
# Get actions from the policy
actions = [dummy_env.action_space.sample() for _ in range(num_envs)]
# Step all environments with the actions
obs, reward, success, truncated, info = envs.step(actions)
breakpoint()
# Close the environments
envs.close()
But I got the following error when initializing multiple environments:
File "/user/fine-tune/test.py", line 15, in worker [15/1901]
env = simpler_env.make(env_name)
File "/SimplerEnv/simpler_env/__init__.py", line 78, in make
env = gym.make(env_name, obs_mode="rgbd", **kwargs)
File "/opt/conda/lib/python3.10/site-packages/gymnasium/envs/registration.py", line 802, in make
env = env_creator(**env_spec_kwargs)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/utils/registration.py", line 92, in make
env = env_spec.make(**kwargs)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/utils/registration.py", line 34, in make
return self.cls(**_kwargs)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/grasp_single_in_scene.py", line 630, in __init__
super().__init__(**kwargs)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/grasp_single_in_scene.py", line 540, in __init__
super().__init__(**kwargs)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/grasp_single_in_scene.py", line 64, in __init__
super().__init__(**kwargs)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/base_env.py", line 134, in __init__
super().__init__(**kwargs)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/sapien_env.py", line 188, in __init__
obs, _ = self.reset(seed=2022, options=dict(reconfigure=True))
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/grasp_single_in_scene.py", line 585, in reset
obs, info = super().reset(seed=self._episode_seed, options=options)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/grasp_single_in_scene.py", line 135, in reset
obs, info = super().reset(seed=self._episode_seed, options=options)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/base_env.py", line 228, in reset
obs, info = super().reset(seed=seed, options=options)
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/sapien_env.py", line 488, in reset
return self.get_obs(), {}
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/custom_scenes/base_env.py", line 350, in get_obs
obs = super().get_obs()
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/sapien_env.py", line 265, in get_obs
return self._get_obs_images()
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/sapien_env.py", line 312, in _get_obs_images
self.take_picture()
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/envs/sapien_env.py", line 289, in take_picture
cam.take_picture()
File "/SimplerEnv/ManiSkill2_real2sim/mani_skill2_real2sim/sensors/camera.py", line 187, in take_picture
self.camera.take_picture()
RuntimeError: vk::Device::waitForFences: ErrorDeviceLost
Could you help have a look at what is the issue here? Or what is the right way to parallalize simpler environments? Thanks for your help!
Yes only the widowx robot. We don't have an implementation of a GPU parallelized version of the google robot's controller. I know it's possible but currently don't have time to tackle that problem just yet.
Hi,
I'm trying to evaluate an octo-based policy in several
simpler
environments in parallel to accelerate the evaluation process. I generally use python's built-in multiprocessing. A minimal code example is as below:But I got the following error when initializing multiple environments:
Could you help have a look at what is the issue here? Or what is the right way to parallalize
simpler
environments? Thanks for your help!P.S. This link may be relevant to my issue here.
The text was updated successfully, but these errors were encountered: