Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dataset_states_to_obs.py with camera depths #99

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions robomimic/config/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,16 @@ def observation_config(self):
self.observation.encoder.scan.core_kwargs = Config() # See models/obs_core.py for important kwargs to set and defaults used
self.observation.encoder.scan.core_kwargs.do_not_lock_keys()

# =============== Spatial default encoder (pointnet) ===============
self.observation.encoder.spatial = deepcopy(self.observation.encoder.rgb)

# Scan: Modify the core class + kwargs, otherwise, is same as rgb encoder
self.observation.encoder.spatial.core_class = "SpatialCore" # Default ScanCore class uses Conv1D to process this modality
# self.observation.encoder.spatial.core_class = "SpatialTest" # Default ScanCore class uses Conv1D to process this modality
# self.observation.encoder.spatial.core_class = "SparseTransformer" # Default ScanCore class uses Conv1D to process this modality
self.observation.encoder.scan.core_kwargs = Config() # See models/obs_core.py for important kwargs to set and defaults used
self.observation.encoder.scan.core_kwargs.do_not_lock_keys()

def meta_config(self):
"""
This function populates the `config.meta` attribute of the config. This portion of the config
Expand Down
20 changes: 19 additions & 1 deletion robomimic/envs/env_robosuite.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import numpy as np
from copy import deepcopy

import mujoco
import robosuite
from robosuite.utils.camera_utils import get_real_depth_map, get_camera_extrinsic_matrix, get_camera_intrinsic_matrix
try:
# this is needed for ensuring robosuite can find the additional mimicgen environments (see https://mimicgen.github.io)
import mimicgen_envs
Expand Down Expand Up @@ -70,7 +72,7 @@ def __init__(
ignore_done=True,
use_object_obs=True,
use_camera_obs=use_image_obs,
camera_depths=False,
camera_depths=kwargs['camera_depths'],
)
kwargs.update(update_kwargs)

Expand Down Expand Up @@ -201,9 +203,21 @@ def get_observation(self, di=None):
ret[k] = di[k][::-1]
if self.postprocess_visual_obs:
ret[k] = ObsUtils.process_obs(obs=ret[k], obs_key=k)
if (k in ObsUtils.OBS_KEYS_TO_MODALITIES) and ObsUtils.key_is_obs_modality(key=k, obs_modality="depth"):
ret[k] = di[k][::-1]
ret[k] = get_real_depth_map(self.env.sim, ret[k])
if self.postprocess_visual_obs:
ret[k] = ObsUtils.process_obs(obs=ret[k], obs_key=k)

# "object" key contains object information
ret["object"] = np.array(di["object-state"])

# save camera intrinsics and extrinsics
for cam_idx, camera_name in enumerate(self.env.camera_names):
cam_height = self.env.camera_heights[cam_idx]
cam_width = self.env.camera_widths[cam_idx]
ret[f'{camera_name}_extrinsic'] = get_camera_extrinsic_matrix(self.env.sim, camera_name)
ret[f'{camera_name}_intrinsic'] = get_camera_intrinsic_matrix(self.env.sim, camera_name, cam_height, cam_width)

if self._is_v1:
for robot in self.env.robots:
Expand Down Expand Up @@ -357,6 +371,8 @@ def create_for_data_processing(
image_modalities = list(camera_names)
if is_v1:
image_modalities = ["{}_image".format(cn) for cn in camera_names]
if kwargs['camera_depths']:
depth_modalities = ["{}_depth".format(cn) for cn in camera_names]
elif has_camera:
# v0.3 only had support for one image, and it was named "rgb"
assert len(image_modalities) == 1
Expand All @@ -367,6 +383,8 @@ def create_for_data_processing(
"rgb": image_modalities,
}
}
if kwargs['camera_depths']:
obs_modality_specs['obs']['depth'] = depth_modalities
ObsUtils.initialize_obs_utils_with_obs_specs(obs_modality_specs)

# note that @postprocess_visual_obs is False since this env's images will be written to a dataset
Expand Down
233 changes: 233 additions & 0 deletions robomimic/models/base_nets.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from torchvision import transforms
from torchvision import models as vision_models

import sys
sys.path.append('/home/yixuan/general_dp/robomimic')

import robomimic.utils.tensor_utils as TensorUtils


Expand Down Expand Up @@ -1109,3 +1112,233 @@ def forward(self, x):
# weighted mean-pooling
return torch.sum(x * self.agg_weight, dim=1)
raise Exception("unexpected agg type: {}".forward(self.agg_type))

class Conv1dBNReLU(Module):
"""Applies a 1D convolution over an input signal composed of several input planes,
optionally followed by batch normalization and ReLU activation.
"""

def __init__(
self, in_channels, out_channels, kernel_size, relu=True, bn=True, **kwargs
):
super().__init__()

self.in_channels = in_channels
self.out_channels = out_channels

self.conv = nn.Conv1d(
in_channels, out_channels, kernel_size, bias=(not bn), **kwargs
)
self.bn = nn.BatchNorm1d(out_channels) if bn else None
self.relu = nn.ReLU(inplace=True) if relu else None

def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x


class Conv2dBNReLU(Module):
"""Applies a 2D convolution (optionally with batch normalization and relu activation)
over an input signal composed of several input planes.
"""

def __init__(
self, in_channels, out_channels, kernel_size, relu=True, bn=True, **kwargs
):
super().__init__()

self.in_channels = in_channels
self.out_channels = out_channels

self.conv = nn.Conv2d(
in_channels, out_channels, kernel_size, bias=(not bn), **kwargs
)
self.bn = nn.BatchNorm2d(out_channels) if bn else None
self.relu = nn.ReLU(inplace=True) if relu else None

def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x


class LinearBNReLU(Module):
"""Applies a linear transformation to the incoming data
optionally followed by batch normalization and relu activation
"""

def __init__(self, in_channels, out_channels, relu=True, bn=True):
super(LinearBNReLU, self).__init__()

self.in_channels = in_channels
self.out_channels = out_channels

self.fc = nn.Linear(in_channels, out_channels, bias=(not bn))
self.bn = nn.BatchNorm1d(out_channels) if bn else None
self.relu = nn.ReLU(inplace=True) if relu else None

def forward(self, x):
x = self.fc(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x

def mlp_bn_relu(in_channels, out_channels_list):
c_in = in_channels
layers = []
for c_out in out_channels_list:
layers.append(LinearBNReLU(c_in, c_out, relu=True, bn=True))
c_in = c_out
return nn.Sequential(*layers)


def mlp_relu(in_channels, out_channels_list):
c_in = in_channels
layers = []
for c_out in out_channels_list:
layers.append(LinearBNReLU(c_in, c_out, relu=True, bn=False))
c_in = c_out
return nn.Sequential(*layers)


def mlp1d_bn_relu(in_channels, out_channels_list):
c_in = in_channels
layers = []
for c_out in out_channels_list:
layers.append(Conv1dBNReLU(c_in, c_out, 1, relu=True))
c_in = c_out
return nn.Sequential(*layers)


def mlp1d_relu(in_channels, out_channels_list):
c_in = in_channels
layers = []
for c_out in out_channels_list:
layers.append(Conv1dBNReLU(c_in, c_out, 1, relu=True, bn=False))
c_in = c_out
return nn.Sequential(*layers)


def mlp2d_bn_relu(in_channels, out_channels_list):
c_in = in_channels
layers = []
for c_out in out_channels_list:
layers.append(Conv2dBNReLU(c_in, c_out, 1, relu=True))
c_in = c_out
return nn.Sequential(*layers)


def mlp2d_relu(in_channels, out_channels_list):
c_in = in_channels
layers = []
for c_out in out_channels_list:
layers.append(Conv2dBNReLU(c_in, c_out, 1, relu=True, bn=False))
c_in = c_out
return nn.Sequential(*layers)

class PointNet(Module):
"""PointNet for classification.
Notes:
1. The original implementation includes dropout for global MLPs.
2. The original implementation decays the BN momentum.
"""

def __init__(
self,
in_channels=3,
local_channels=(64, 64, 64, 128, 1024),
global_channels=(512, 256),
use_bn=True,
):
super().__init__()

self.in_channels = in_channels
self.out_channels = (local_channels + global_channels)[-1]
self.use_bn = use_bn

if use_bn:
self.mlp_local = mlp1d_bn_relu(in_channels, local_channels)
self.mlp_global = mlp_bn_relu(local_channels[-1], global_channels)
else:
self.mlp_local = mlp1d_relu(in_channels, local_channels)
self.mlp_global = mlp_relu(local_channels[-1], global_channels)

self.reset_parameters()

def forward_internal(self, points, points_feature=None, points_mask=None) -> dict:
# points: [B, 3, N]; points_feature: [B, C, N], points_mask: [B, N]
if points_feature is not None:
input_feature = torch.cat([points, points_feature], dim=1)
else:
input_feature = points

local_feature = self.mlp_local(input_feature)
if points_mask is not None:
local_feature = torch.where(
points_mask.unsqueeze(1), local_feature, torch.zeros_like(local_feature)
)
global_feature, max_indices = torch.max(local_feature, 2)
output_feature = self.mlp_global(global_feature)

return {"feature": output_feature, "max_indices": max_indices}

def forward(self, feats_points):
# points: [B, 3 + C, N]
points = feats_points[:, :3, :]
if feats_points.shape[1] > 3:
points_feature = feats_points[:, 3:, :]
else:
points_feature = None
return self.forward_internal(points, points_feature)['feature']

def reset_parameters(self):
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv1d, nn.Conv2d)):
if module.bias is not None:
nn.init.zeros_(module.bias)
if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
module.momentum = 0.01

class PointMLP(Module):
def __init__(
self,
):
super().__init__()

self.model = nn.Sequential(
nn.Linear(3 * 100 * 2, 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, 256),
)
print('WARNING: PointMLP is only for testing purpose')

def forward(self, points):
# points: [B, 3 + C, N]
points = points[:, :3, :]
B, _, N = points.shape
points = points.reshape(B, 3 * 100 * 2)
return self.model(points)

def test_pointnet():
feat_points = torch.rand(2, 3+1024, 1000) # (B, C, N)
point_net = PointNet(3)
out = point_net(feat_points)
print(out.shape)

if __name__ == '__main__':
test_pointnet()
Loading