Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/rendering_modes/balanced.kit
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
rtx.sdg.force.disableColorRender=false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be set to False only if rendering is not "rgb" in the tiledcamera? Wondering if the logic belongs here or to the camera class.


rtx.translucency.enabled = false

rtx.reflections.enabled = false
Expand Down
2 changes: 2 additions & 0 deletions apps/rendering_modes/performance.kit
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
rtx.sdg.force.disableColorRender=true # change to false for RGB baselines

rtx.translucency.enabled = false

rtx.reflections.enabled = false
Expand Down
2 changes: 2 additions & 0 deletions apps/rendering_modes/quality.kit
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
rtx.sdg.force.disableColorRender=false

rtx.translucency.enabled = true

rtx.reflections.enabled = true
Expand Down
38 changes: 37 additions & 1 deletion source/isaaclab/isaaclab/sensors/camera/tiled_camera.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,28 @@ def _initialize_impl(self):
)
self._render_product_paths = [rp.path]

rep.AnnotatorRegistry.register_annotator_from_aov(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add comments on what these are and why are they needed.

aov="DiffuseAlbedoSD", output_data_type=np.uint8, output_channels=4
)
rep.AnnotatorRegistry.register_annotator_from_aov(
aov="SimpleShadingSD", output_data_type=np.uint8, output_channels=4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should output channels be 3 directly? Later on slicing is done to that anyway?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this only for Isaac Sim 5.1?

)
# Define the annotators based on requested data types
self._annotators = dict()
for annotator_type in self.cfg.data_types:
if annotator_type == "rgba" or annotator_type == "rgb":
annotator = rep.AnnotatorRegistry.get_annotator("rgb", device=self.device, do_array_copy=False)
self._annotators["rgba"] = annotator
elif annotator_type == "diffuse_albedo":
annotator = rep.AnnotatorRegistry.get_annotator(
"DiffuseAlbedoSD", device=self.device, do_array_copy=False
)
self._annotators["diffuse_albedo"] = annotator
elif annotator_type == "simple_shading":
annotator = rep.AnnotatorRegistry.get_annotator(
"SimpleShadingSD", device=self.device, do_array_copy=False
)
self._annotators["simple_shading"] = annotator
elif annotator_type == "depth" or annotator_type == "distance_to_image_plane":
# keep depth for backwards compatibility
annotator = rep.AnnotatorRegistry.get_annotator(
Expand Down Expand Up @@ -254,13 +270,16 @@ def _update_buffers_impl(self, env_ids: Sequence[int]):
else:
tiled_data_buffer = tiled_data_buffer.to(device=self.device)

# process data for different segmentation types
# process data for different segmentation types and custom annotators
# Note: Replicator returns raw buffers of dtype uint32 for segmentation types
# so we need to convert them to uint8 4 channel images for colorized types
# Note: Custom annotators (diffuse_albedo, simple_shading) also return 4 channel data
if (
(data_type == "semantic_segmentation" and self.cfg.colorize_semantic_segmentation)
or (data_type == "instance_segmentation_fast" and self.cfg.colorize_instance_segmentation)
or (data_type == "instance_id_segmentation_fast" and self.cfg.colorize_instance_id_segmentation)
or data_type == "diffuse_albedo"
or data_type == "simple_shading"
):
tiled_data_buffer = wp.array(
ptr=tiled_data_buffer.ptr, shape=(*tiled_data_buffer.shape, 4), dtype=wp.uint8, device=self.device
Expand All @@ -271,6 +290,13 @@ def _update_buffers_impl(self, env_ids: Sequence[int]):
if data_type == "motion_vectors":
tiled_data_buffer = tiled_data_buffer[:, :, :2].contiguous()

# For diffuse albedo, keep only the first three channels (RGB)
if data_type == "diffuse_albedo":
tiled_data_buffer = tiled_data_buffer[:, :, :3].contiguous()
# For simple shading, keep only the first three channels (RGB)
if data_type == "simple_shading":
tiled_data_buffer = tiled_data_buffer[:, :, :3].contiguous()

wp.launch(
kernel=reshape_tiled_image,
dim=(self._view.count, self.cfg.height, self.cfg.width),
Expand Down Expand Up @@ -347,6 +373,16 @@ def _create_buffers(self):
if "rgb" in self.cfg.data_types:
# RGB is the first 3 channels of RGBA
data_dict["rgb"] = data_dict["rgba"][..., :3]
if "diffuse_albedo" in self.cfg.data_types:
data_dict["diffuse_albedo"] = torch.zeros(
(self._view.count, self.cfg.height, self.cfg.width, 4), device=self.device, dtype=torch.uint8
).contiguous()
data_dict["diffuse_albedo"] = data_dict["diffuse_albedo"][..., :3]
Comment on lines +376 to +380
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Buffer is created with 4 channels, then immediately sliced to 3 channels. Consider initializing with 3 channels directly for clarity

Suggested change
if "diffuse_albedo" in self.cfg.data_types:
data_dict["diffuse_albedo"] = torch.zeros(
(self._view.count, self.cfg.height, self.cfg.width, 4), device=self.device, dtype=torch.uint8
).contiguous()
data_dict["diffuse_albedo"] = data_dict["diffuse_albedo"][..., :3]
if "diffuse_albedo" in self.cfg.data_types:
data_dict["diffuse_albedo"] = torch.zeros(
(self._view.count, self.cfg.height, self.cfg.width, 3), device=self.device, dtype=torch.uint8
).contiguous()

if "simple_shading" in self.cfg.data_types:
data_dict["simple_shading"] = torch.zeros(
(self._view.count, self.cfg.height, self.cfg.width, 4), device=self.device, dtype=torch.uint8
).contiguous()
data_dict["simple_shading"] = data_dict["simple_shading"][..., :3]
Comment on lines +381 to +385
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Buffer is created with 4 channels, then immediately sliced to 3 channels. Consider initializing with 3 channels directly for clarity

Suggested change
if "simple_shading" in self.cfg.data_types:
data_dict["simple_shading"] = torch.zeros(
(self._view.count, self.cfg.height, self.cfg.width, 4), device=self.device, dtype=torch.uint8
).contiguous()
data_dict["simple_shading"] = data_dict["simple_shading"][..., :3]
if "simple_shading" in self.cfg.data_types:
data_dict["simple_shading"] = torch.zeros(
(self._view.count, self.cfg.height, self.cfg.width, 3), device=self.device, dtype=torch.uint8
).contiguous()

if "distance_to_image_plane" in self.cfg.data_types:
data_dict["distance_to_image_plane"] = torch.zeros(
(self._view.count, self.cfg.height, self.cfg.width, 1), device=self.device, dtype=torch.float32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,63 @@
},
)


gym.register(
id="Isaac-Repose-Cube-Shadow-Segmentation-Direct-v0",
entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.shadow_hand_vision_env:ShadowHandVisionSegmentationEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandVisionFFPPORunnerCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_vision_cfg.yaml",
},
)


gym.register(
id="Isaac-Repose-Cube-Shadow-RGB-Direct-v0",
entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.shadow_hand_vision_env:ShadowHandVisionRGBEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandVisionFFPPORunnerCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_vision_cfg.yaml",
},
)

gym.register(
id="Isaac-Repose-Cube-Shadow-DiffuseAlbedo-Direct-v0",
entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.shadow_hand_vision_env:ShadowHandVisionDiffuseAlbedoEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandVisionFFPPORunnerCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_vision_cfg.yaml",
},
)

gym.register(
id="Isaac-Repose-Cube-Shadow-SimpleShading-Direct-v0",
entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.shadow_hand_vision_env:ShadowHandVisionSimpleShadingEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandVisionFFPPORunnerCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_vision_cfg.yaml",
},
)

gym.register(
id="Isaac-Repose-Cube-Shadow-Depth-Direct-v0",
entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv",
disable_env_checker=True,
kwargs={
"env_cfg_entry_point": f"{__name__}.shadow_hand_vision_env:ShadowHandVisionDepthEnvCfg",
"rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:ShadowHandVisionFFPPORunnerCfg",
"rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_vision_cfg.yaml",
},
)

gym.register(
id="Isaac-Repose-Cube-Shadow-Vision-Direct-Play-v0",
entry_point=f"{__name__}.shadow_hand_vision_env:ShadowHandVisionEnv",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
class FeatureExtractorNetwork(nn.Module):
"""CNN architecture used to regress keypoint positions of the in-hand cube from image data."""

def __init__(self):
def __init__(self, num_channel):
super().__init__()
num_channel = 7
self.num_channel = num_channel
self.cnn = nn.Sequential(
nn.Conv2d(num_channel, 16, kernel_size=6, stride=2, padding=0),
nn.ReLU(),
Expand All @@ -45,8 +45,11 @@ def __init__(self):

def forward(self, x):
x = x.permute(0, 3, 1, 2)
x[:, 0:3, :, :] = self.data_transforms(x[:, 0:3, :, :])
x[:, 4:7, :, :] = self.data_transforms(x[:, 4:7, :, :])
if self.num_channel == 7:
x[:, 0:3, :, :] = self.data_transforms(x[:, 0:3, :, :])
x[:, 4:7, :, :] = self.data_transforms(x[:, 4:7, :, :])
elif self.num_channel == 3:
x[:, 0:3, :, :] = self.data_transforms(x[:, 0:3, :, :])
cnn_x = self.cnn(x)
out = self.linear(cnn_x.view(-1, 128))
return out
Expand All @@ -65,6 +68,8 @@ class FeatureExtractorCfg:
write_image_to_file: bool = False
"""If True, the images from the camera sensor are written to file. Default is False."""

num_channel: int = 7


class FeatureExtractor:
"""Class for extracting features from image data.
Expand All @@ -86,7 +91,7 @@ def __init__(self, cfg: FeatureExtractorCfg, device: str, log_dir: str | None =
self.device = device

# Feature extractor model
self.feature_extractor = FeatureExtractorNetwork()
self.feature_extractor = FeatureExtractorNetwork(self.cfg.num_channel)
self.feature_extractor.to(self.device)

self.step_count = 0
Expand All @@ -112,8 +117,13 @@ def __init__(self, cfg: FeatureExtractorCfg, device: str, log_dir: str | None =
self.feature_extractor.eval()

def _preprocess_images(
self, rgb_img: torch.Tensor, depth_img: torch.Tensor, segmentation_img: torch.Tensor
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
self,
rgb_img: torch.Tensor,
depth_img: torch.Tensor,
segmentation_img: torch.Tensor,
albedo_img: torch.Tensor | None = None,
simple_shading_img: torch.Tensor | None = None,
) -> tuple[torch.Tensor | None, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None]:
"""Preprocesses the input images.

Args:
Expand All @@ -122,33 +132,63 @@ def _preprocess_images(
segmentation_img (torch.Tensor): Segmentation image tensor. Shape: (N, H, W, 3)

Returns:
tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Preprocessed RGB, depth, and segmentation
tuple[torch.Tensor | None, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None, torch.Tensor | None]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't do type annotations on docstrings since they are harder to maintain. Please remove instances of that everywhere.

Preprocessed RGB, depth, segmentation, albedo, and simple shading images
"""
rgb_img = rgb_img / 255.0
if rgb_img is not None:
rgb_img = rgb_img / 255.0
# process depth image
depth_img[depth_img == float("inf")] = 0
depth_img /= 5.0
depth_img /= torch.max(depth_img)
if depth_img is not None:
depth_img[depth_img == float("inf")] = 0
depth_img /= 5.0
depth_img /= torch.max(depth_img)
# process segmentation image
segmentation_img = segmentation_img / 255.0
mean_tensor = torch.mean(segmentation_img, dim=(1, 2), keepdim=True)
segmentation_img -= mean_tensor
return rgb_img, depth_img, segmentation_img

def _save_images(self, rgb_img: torch.Tensor, depth_img: torch.Tensor, segmentation_img: torch.Tensor):
if segmentation_img is not None:
segmentation_img = segmentation_img / 255.0
mean_tensor = torch.mean(segmentation_img, dim=(1, 2), keepdim=True)
segmentation_img -= mean_tensor
# process albedo image
if albedo_img is not None:
albedo_img = albedo_img / 255.0
# process simple shading image
if simple_shading_img is not None:
simple_shading_img = simple_shading_img / 255.0
return rgb_img, depth_img, segmentation_img, albedo_img, simple_shading_img

def _save_images(
self,
rgb_img: torch.Tensor | None,
depth_img: torch.Tensor | None,
segmentation_img: torch.Tensor | None,
albedo_img: torch.Tensor | None,
simple_shading_img: torch.Tensor | None,
):
"""Writes image buffers to file.

Args:
rgb_img (torch.Tensor): RGB image tensor. Shape: (N, H, W, 3).
depth_img (torch.Tensor): Depth image tensor. Shape: (N, H, W, 1).
segmentation_img (torch.Tensor): Segmentation image tensor. Shape: (N, H, W, 3).
"""
save_images_to_file(rgb_img, "shadow_hand_rgb.png")
save_images_to_file(depth_img, "shadow_hand_depth.png")
save_images_to_file(segmentation_img, "shadow_hand_segmentation.png")
if rgb_img is not None:
save_images_to_file(rgb_img, "shadow_hand_rgb.png")
if depth_img is not None:
save_images_to_file(depth_img, "shadow_hand_depth.png")
if segmentation_img is not None:
save_images_to_file(segmentation_img, "shadow_hand_segmentation.png")
if albedo_img is not None:
save_images_to_file(albedo_img, "shadow_hand_diffuse_albedo.png")
if simple_shading_img is not None:
save_images_to_file(simple_shading_img, "shadow_hand_simple_shading.png")

def step(
self, rgb_img: torch.Tensor, depth_img: torch.Tensor, segmentation_img: torch.Tensor, gt_pose: torch.Tensor
self,
rgb_img: torch.Tensor = None,
depth_img: torch.Tensor = None,
segmentation_img: torch.Tensor = None,
albedo_img: torch.Tensor = None,
simple_shading_img: torch.Tensor = None,
gt_pose: torch.Tensor = None,
) -> tuple[torch.Tensor, torch.Tensor]:
"""Extracts the features using the images and trains the model if the train flag is set to True.

Expand All @@ -162,15 +202,28 @@ def step(
tuple[torch.Tensor, torch.Tensor]: Pose loss and predicted pose.
"""

rgb_img, depth_img, segmentation_img = self._preprocess_images(rgb_img, depth_img, segmentation_img)
rgb_img, depth_img, segmentation_img, albedo_img, simple_shading_img = self._preprocess_images(
rgb_img, depth_img, segmentation_img, albedo_img, simple_shading_img
)

if self.cfg.write_image_to_file:
self._save_images(rgb_img, depth_img, segmentation_img)
self._save_images(rgb_img, depth_img, segmentation_img, albedo_img, simple_shading_img)

if self.cfg.train:
with torch.enable_grad():
with torch.inference_mode(False):
img_input = torch.cat((rgb_img, depth_img, segmentation_img), dim=-1)
if rgb_img is not None and depth_img is not None and segmentation_img is not None:
img_input = torch.cat((rgb_img, depth_img, segmentation_img), dim=-1)
elif albedo_img is not None:
img_input = albedo_img
elif simple_shading_img is not None:
img_input = simple_shading_img
elif rgb_img is not None:
img_input = rgb_img
elif depth_img is not None:
img_input = depth_img
elif segmentation_img is not None:
img_input = segmentation_img
self.optimizer.zero_grad()

predicted_pose = self.feature_extractor(img_input)
Expand All @@ -189,6 +242,17 @@ def step(

return pose_loss, predicted_pose
else:
img_input = torch.cat((rgb_img, depth_img, segmentation_img), dim=-1)
if albedo_img is not None:
img_input = albedo_img
elif simple_shading_img is not None:
img_input = simple_shading_img
elif rgb_img is not None and depth_img is not None and segmentation_img is not None:
img_input = torch.cat((rgb_img, depth_img, segmentation_img), dim=-1)
elif rgb_img is not None:
img_input = rgb_img
elif depth_img is not None:
img_input = depth_img
elif segmentation_img is not None:
img_input = segmentation_img
predicted_pose = self.feature_extractor(img_input)
return None, predicted_pose
Loading
Loading