Skip to content

Commit

Permalink
adding agent radius to config for obstacle maps; using 5% margin to i…
Browse files Browse the repository at this point in the history
…dentify edge detection; making gripper cam level; only using gripper cam for semantic value; adding gripper cam panning at the beginning of episode; only using all camera for point clouds for the first 10 steps to reduce noise during journey; allowing obstacle map inputs to update obstacles and/or update explored area; updating obstacle height range used for reality experiments; making inferred depth range farther (10m); adding target object to GDINO caption if doesn't already exist in it
  • Loading branch information
naokiyokoyamabd committed Sep 14, 2023
1 parent 4db07d8 commit 3f8f23c
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 54 deletions.
5 changes: 4 additions & 1 deletion config/experiments/reality.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ policy:
exploration_thresh: 0.0
obstacle_map_area_threshold: 1.5 # in square meters
text_prompt: "Seems like there is a target_object ahead."
min_obstacle_height: 0.3
max_obstacle_height: 1.5
agent_radius: 0.2

env:
max_body_cam_depth: 3.5
max_gripper_cam_depth: 5.0
max_gripper_cam_depth: 10.0
max_lin_dist: 0.2
max_ang_dist: 0.523599
time_step: 0.7
Expand Down
11 changes: 10 additions & 1 deletion zsos/mapping/object_point_cloud_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,4 +304,13 @@ def too_offset(mask: np.ndarray) -> bool:
third = mask.shape[1] // 3

# Check if the entire bounding rectangle is in the left or right third of the mask
return x + w <= third or x >= 2 * third
if x + w <= third:
# Check if the leftmost point is at the edge of the image
# return x == 0
return x <= int(0.05 * mask.shape[1])
elif x >= 2 * third:
# Check if the rightmost point is at the edge of the image
# return x + w == mask.shape[1]
return x + w >= int(0.95 * mask.shape[1])
else:
return False
60 changes: 34 additions & 26 deletions zsos/mapping/obstacle_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def update_map(
fx: float,
fy: float,
topdown_fov: float,
explore: bool = True,
update_obstacles: bool = True,
):
"""
Adds all obstacles from the current view to the map. Also updates the area
Expand All @@ -75,34 +77,40 @@ def update_map(
fy (float): The focal length of the camera in the y direction.
topdown_fov (float): The field of view of the depth camera projected onto
the topdown map.
explore (bool): Whether to update the explored area.
update_obstacles (bool): Whether to update the obstacle map.
"""
if self._hole_area_thresh == -1:
filled_depth = depth.copy()
filled_depth[depth == 0] = 1.0
else:
filled_depth = fill_small_holes(depth, self._hole_area_thresh)
scaled_depth = filled_depth * (max_depth - min_depth) + min_depth
mask = scaled_depth < max_depth
point_cloud_camera_frame = get_point_cloud(scaled_depth, mask, fx, fy)
point_cloud_episodic_frame = transform_points(
tf_camera_to_episodic, point_cloud_camera_frame
)
obstacle_cloud = filter_points_by_height(
point_cloud_episodic_frame, self._min_height, self._max_height
)

# Populate topdown map with obstacle locations
xy_points = obstacle_cloud[:, :2]
pixel_points = self._xy_to_px(xy_points)
self._map[pixel_points[:, 1], pixel_points[:, 0]] = 1
if update_obstacles:
if self._hole_area_thresh == -1:
filled_depth = depth.copy()
filled_depth[depth == 0] = 1.0
else:
filled_depth = fill_small_holes(depth, self._hole_area_thresh)
scaled_depth = filled_depth * (max_depth - min_depth) + min_depth
mask = scaled_depth < max_depth
point_cloud_camera_frame = get_point_cloud(scaled_depth, mask, fx, fy)
point_cloud_episodic_frame = transform_points(
tf_camera_to_episodic, point_cloud_camera_frame
)
obstacle_cloud = filter_points_by_height(
point_cloud_episodic_frame, self._min_height, self._max_height
)

# Update the navigable area, which is an inverse of the obstacle map after a
# dilation operation to accommodate the robot's radius.
self._navigable_map = 1 - cv2.dilate(
self._map.astype(np.uint8),
self._navigable_kernel,
iterations=1,
).astype(bool)
# Populate topdown map with obstacle locations
xy_points = obstacle_cloud[:, :2]
pixel_points = self._xy_to_px(xy_points)
self._map[pixel_points[:, 1], pixel_points[:, 0]] = 1

# Update the navigable area, which is an inverse of the obstacle map after a
# dilation operation to accommodate the robot's radius.
self._navigable_map = 1 - cv2.dilate(
self._map.astype(np.uint8),
self._navigable_kernel,
iterations=1,
).astype(bool)

if not explore:
return

# Update the explored area
agent_xy_location = tf_camera_to_episodic[:2, 3]
Expand Down
1 change: 1 addition & 0 deletions zsos/policy/base_objectnav_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ class ZSOSConfig:
vqa_prompt: str = "Is this "
coco_threshold: float = 0.6
non_coco_threshold: float = 0.4
agent_radius: float = 0.18

@classmethod
@property
Expand Down
58 changes: 49 additions & 9 deletions zsos/policy/reality_policies.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Any, Dict, Union
from typing import Any, Dict, List, Union

import numpy as np
import torch
Expand All @@ -11,6 +11,8 @@
from zsos.policy.base_objectnav_policy import BaseObjectNavPolicy, ZSOSConfig
from zsos.policy.itm_policy import ITMPolicyV2

INITIAL_ARM_YAWS = np.deg2rad([-90, -60, -30, 0, 30, 60, 90, 0]).tolist()


class RealityMixin:
"""
Expand All @@ -26,6 +28,7 @@ class RealityMixin:
"chair . table . tv . laptop . microwave . toaster . sink . refrigerator . book"
" . clock . vase . scissors . teddy bear . hair drier . toothbrush ."
)
_initial_yaws: List = INITIAL_ARM_YAWS.copy()

def __init__(self: BaseObjectNavPolicy, *args: Any, **kwargs: Any) -> None:
super().__init__(sync_explored_areas=True, *args, **kwargs)
Expand All @@ -42,13 +45,17 @@ def from_config(cls, config: DictConfig, *args_unused, **kwargs_unused):
return cls(**kwargs)

def act(
self: BaseObjectNavPolicy,
self: Union["RealityMixin", BaseObjectNavPolicy],
observations: Dict[str, Any],
rnn_hidden_states: Any,
prev_actions: Any,
masks: Tensor,
deterministic=False,
) -> Dict[str, Any]:
if observations["objectgoal"] not in self._non_coco_caption:
self._non_coco_caption = (
observations["objectgoal"] + " . " + self._non_coco_caption
)
parent_cls: BaseObjectNavPolicy = super() # type: ignore
action: Tensor = parent_cls.act(
observations, rnn_hidden_states, prev_actions, masks, deterministic
Expand All @@ -58,11 +65,22 @@ def act(
# is the linear velocity and the second element is the angular velocity. We
# convert this numpy array to a dictionary with keys "angular" and "linear" so
# that it can be passed to the Spot robot.
action_dict = {
"angular": action[0][0].item(),
"linear": action[0][1].item(),
"info": self._policy_info,
}
if self._done_initializing:
action_dict = {
"angular": action[0][0].item(),
"linear": action[0][1].item(),
"arm_yaw": -1,
"info": self._policy_info,
}
else:
action_dict = {
"angular": 0,
"linear": 0,
"arm_yaw": action[0][0].item(),
"info": self._policy_info,
}

self._done_initializing = len(self._initial_yaws) == 0

return action_dict

Expand All @@ -74,7 +92,12 @@ def get_action(
def _reset(self: BaseObjectNavPolicy) -> None:
parent_cls: BaseObjectNavPolicy = super() # type: ignore
parent_cls._reset()
self._done_initializing = True
self._initial_yaws = INITIAL_ARM_YAWS.copy()
self._done_initializing = False

def _initialize(self) -> Tensor:
yaw = self._initial_yaws.pop(0)
return torch.tensor([[yaw]], dtype=torch.float32)

def _cache_observations(
self: Union["RealityMixin", BaseObjectNavPolicy], observations: Dict[str, Any]
Expand All @@ -88,7 +111,7 @@ def _cache_observations(
return

self._obstacle_map: ObstacleMap
for obs_map_data in observations["obstacle_map_depths"]:
for obs_map_data in observations["obstacle_map_depths"][:-1]:
depth, tf, min_depth, max_depth, fx, fy, topdown_fov = obs_map_data
self._obstacle_map.update_map(
depth,
Expand All @@ -98,7 +121,24 @@ def _cache_observations(
fx,
fy,
topdown_fov,
explore=False,
)

_, tf, min_depth, max_depth, fx, fy, topdown_fov = observations[
"obstacle_map_depths"
][-1]
self._obstacle_map.update_map(
None,
tf,
min_depth,
max_depth,
fx,
fy,
topdown_fov,
explore=True,
update_obstacles=False,
)

self._obstacle_map.update_agent_traj(
observations["robot_xy"], observations["robot_heading"]
)
Expand Down
50 changes: 33 additions & 17 deletions zsos/reality/objectnav_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,24 @@

LEFT_CROP = 124
RIGHT_CROP = 60
NOMINAL_ARM_POSE = np.deg2rad([0, -170, 120, 0, 75, 0])
NOMINAL_ARM_POSE = np.deg2rad([0, -170, 120, 0, 55, 0])

VALUE_MAP_CAMS = [
SpotCamIds.BACK_FISHEYE,
SpotCamIds.BACK_DEPTH_IN_VISUAL_FRAME,
SpotCamIds.LEFT_FISHEYE,
SpotCamIds.LEFT_DEPTH_IN_VISUAL_FRAME,
SpotCamIds.RIGHT_FISHEYE,
SpotCamIds.RIGHT_DEPTH_IN_VISUAL_FRAME,
# SpotCamIds.BACK_FISHEYE,
# SpotCamIds.BACK_DEPTH_IN_VISUAL_FRAME,
# SpotCamIds.LEFT_FISHEYE,
# SpotCamIds.LEFT_DEPTH_IN_VISUAL_FRAME,
# SpotCamIds.RIGHT_FISHEYE,
# SpotCamIds.RIGHT_DEPTH_IN_VISUAL_FRAME,
SpotCamIds.HAND_COLOR,
]

POINT_CLOUD_CAMS = [
SpotCamIds.RIGHT_DEPTH_IN_VISUAL_FRAME,
SpotCamIds.FRONTLEFT_DEPTH,
SpotCamIds.FRONTRIGHT_DEPTH,
SpotCamIds.BACK_DEPTH_IN_VISUAL_FRAME,
SpotCamIds.LEFT_DEPTH_IN_VISUAL_FRAME,
SpotCamIds.RIGHT_DEPTH_IN_VISUAL_FRAME,
]

ALL_CAMS = list(set(VALUE_MAP_CAMS + POINT_CLOUD_CAMS))
Expand Down Expand Up @@ -95,9 +95,17 @@ def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]:
cv2.imshow("Visualization", cv2.resize(vis_img, (0, 0), fx=0.5, fy=0.5))
cv2.waitKey(1)

if "gripper_camera_pan" in action:
self._pan_gripper_camera(action["gripper_camera_pan"])
return super().step(action)
if action["arm_yaw"] == -1:
return super().step(action)

new_pose = np.array(NOMINAL_ARM_POSE)
new_pose[0] = action["arm_yaw"]
self.robot.set_arm_joints(new_pose, travel_time=0.5)
time.sleep(0.75)
done = False
self._num_steps += 1

return self._get_obs(), 0.0, done, {} # not using info dict yet

def _get_obs(self) -> Dict[str, np.ndarray]:
robot_xy, robot_heading = self._get_gps(), self._get_compass()
Expand Down Expand Up @@ -174,7 +182,11 @@ def _get_camera_obs(self):
# Obstacle map output is a list of (depth, tf, min_depth, max_depth, fx, fy,
# topdown_fov) for each of the cameras in POINT_CLOUD_CAMS
obstacle_map_depths = []
for src in POINT_CLOUD_CAMS:
if self._num_steps <= 10:
srcs = POINT_CLOUD_CAMS.copy()
else:
srcs = POINT_CLOUD_CAMS[:2]
for src in srcs:
depth = cam_data[src]["image"]
fx, fy = cam_data[src]["fx"], cam_data[src]["fy"]
tf = cam_data[src]["tf_camera_to_global"]
Expand All @@ -185,6 +197,15 @@ def _get_camera_obs(self):
src_data = (depth, tf, min_depth, self._max_body_cam_depth, fx, fy, fov)
obstacle_map_depths.append(src_data)

tf = cam_data[SpotCamIds.HAND_COLOR]["tf_camera_to_global"]
fx, fy = (
cam_data[SpotCamIds.HAND_COLOR]["fx"],
cam_data[SpotCamIds.HAND_COLOR]["fy"],
)
fov = get_fov(fx, cam_data[src]["image"].shape[1])
src_data = (None, tf, min_depth, self._max_body_cam_depth, fx, fy, fov)
obstacle_map_depths.append(src_data)

# Value map output is a list of (rgb, depth, tf_camera_to_episodic, min_depth,
# max_depth, fov) for each of the cameras in VALUE_MAP_CAMS
value_map_rgbd = []
Expand Down Expand Up @@ -231,8 +252,3 @@ def _get_compass(self) -> float:
global_yaw = self.robot.xy_yaw[1]
episodic_yaw = wrap_heading(global_yaw - self.episodic_start_yaw)
return episodic_yaw

def _pan_gripper_camera(self, yaw):
new_pose = np.array(NOMINAL_ARM_POSE)
new_pose[0] = yaw[0]
self.robot.set_arm_joints(new_pose, travel_time=self.time_step * 0.9)
2 changes: 2 additions & 0 deletions zsos/reality/pointnav_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(
self._max_ang_dist = max_ang_dist
self._time_step = time_step
self._cmd_id = None
self._num_steps = 0

def reset(self, goal: Any, relative=True, *args, **kwargs) -> Dict[str, np.ndarray]:
assert isinstance(goal, np.ndarray)
Expand Down Expand Up @@ -70,6 +71,7 @@ def step(self, action: Dict[str, np.ndarray]) -> Tuple[Dict, float, bool, Dict]:
blocking=False,
)
done = ang_dist == 0.0 and lin_dist == 0.0
self._num_steps += 1
return self._get_obs(), 0.0, done, {} # not using info dict yet

def _compute_velocities(self, action: Dict[str, np.ndarray]) -> Tuple[float, float]:
Expand Down

0 comments on commit 3f8f23c

Please sign in to comment.