bdaiinstitute · naokiyokoyamabd · Oct 5, 2023 · Oct 1, 2023 · Oct 2, 2023
diff --git a/README.md b/README.md
@@ -1,23 +1,62 @@
-# llm-object-search
+# Vision-Language Frontier Maps
 
-## Installation
+## 1. Installation
 
 Create the conda environment:
 ```bash
-conda_env_name=zsos # 'zero-shot object search'
+conda_env_name=vlfm
 conda create -n $conda_env_name python=3.9 -y &&
-conda activate $conda_env_name &&
+conda activate $conda_env_name
+```
 
-# Mamba is used for much, much faster installation.
-conda install mamba -y -c conda-forge &&
-mamba install \
-  habitat-sim=0.2.4 headless pytorch pytorch-cuda \
-  transformers \
-  -c aihabitat -c pytorch -c huggingface \
-  -c nvidia -c conda-forge -y
+Install all the dependencies:
+```bash
+pip install -e .[habitat]
+git clone [email protected]:WongKinYiu/yolov7.git  # if using YOLOv7
 ```
 
-Then, follow the instructions in [readmes/installing_habitat.md](readmes/installing_habitat.md) to install Habitat and relevant datasets.
+## 2. Downloading the HM3D dataset
+First, set the following variables during installation (don't need to put in .bashrc):
+```bash
+MATTERPORT_TOKEN_ID=<FILL IN FROM YOUR ACCOUNT INFO IN MATTERPORT>
+MATTERPORT_TOKEN_SECRET=<FILL IN FROM YOUR ACCOUNT INFO IN MATTERPORT>
+DATA_DIR=</path/to/vlfm/data>
+
+# Link to the HM3D ObjectNav episodes dataset, listed here:
+# https://github.com/facebookresearch/habitat-lab/blob/main/DATASETS.md#task-datasets
+# From the above page, locate the link to the HM3D ObjectNav dataset.
+# Verify that it is the same as the next two lines.
+HM3D_OBJECTNAV=https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/hm3d/v2/objectnav_hm3d_v2.zip
+```
+
+### Clone and install habitat-lab, then download datasets
+*Ensure that the correct conda environment is activated!!*
+```bash
+# Download HM3D 3D scans (scenes_dataset)
+python -m habitat_sim.utils.datasets_download \
+  --username $MATTERPORT_TOKEN_ID --password $MATTERPORT_TOKEN_SECRET \
+  --uids hm3d_train_v0.2 \
+  --data-path $DATA_DIR &&
+python -m habitat_sim.utils.datasets_download \
+  --username $MATTERPORT_TOKEN_ID --password $MATTERPORT_TOKEN_SECRET \
+  --uids hm3d_val_v0.2 \
+  --data-path $DATA_DIR &&
+
+# Download HM3D ObjectNav dataset episodes
+wget $HM3D_OBJECTNAV &&
+unzip objectnav_hm3d_v2.zip &&
+mkdir -p $DATA_DIR/datasets/objectnav/hm3d  &&
+mv objectnav_hm3d_v2 $DATA_DIR/datasets/objectnav/hm3d/v2 &&
+rm objectnav_hm3d_v2.zip
+```
+
+
+## 3. Downloading weights for various models
+The weights for MobileSAM, GroundingDINO, and PointNav must be saved to the `data/` directory. The weights can be downloaded from the following links:
+- `mobile_sam.pt`:  https://github.com/ChaoningZhang/MobileSAM
+- `groundingdino_swint_ogc.pth`: https://github.com/IDEA-Research/GroundingDINO
+- `yolov7-e6e.pt`: https://github.com/WongKinYiu/yolov7
+- `pointnav_weights.pth`:
 
 ### Installing GroundingDINO
 To install GroundingDINO, you will need `CUDA_HOME` set as an environment variable. If you would like to install a certain version of CUDA that is compatible with the one used to compile your version of pytorch, and you are using conda, you can run the following commands to install CUDA and set `CUDA_HOME`:
@@ -37,7 +76,3 @@ ln -s ${CONDA_PREFIX}/lib/python3.9/site-packages/nvidia/cublas/include/*  ${CON
 ln -s ${CONDA_PREFIX}/lib/python3.9/site-packages/nvidia/cusolver/include/*  ${CONDA_PREFIX}/include/ &&
 export CUDA_HOME=${CONDA_PREFIX}
 ```
-
-### TODO
-1. Add instructions for installing `frontier_exploration`
-2. Add instructions for installing FastChat
diff --git a/config/experiments/llm_objectnav_hm3d.yaml → config/experiments/vlfm_objectnav_hm3d.yaml b/config/experiments/llm_objectnav_hm3d.yaml → config/experiments/vlfm_objectnav_hm3d.yaml
@@ -28,7 +28,7 @@ habitat:
 
 habitat_baselines:
   evaluate: True
-  eval_ckpt_path_dir: dummy_policy.pth
+  eval_ckpt_path_dir: data/dummy_policy.pth
   num_environments: 1
   load_resume_state_config: False
 
@@ -37,7 +37,7 @@ habitat_baselines:
   video_dir: "video_dir"
   test_episode_count: -1
   checkpoint_folder: "data/new_checkpoints"
-  trainer_name: "ddppo"
+  trainer_name: "vlfm"
   num_updates: 270000
   log_interval: 10
   num_checkpoints: 100
@@ -51,7 +51,7 @@ habitat_baselines:
   rl:
 
     policy:
-      name: "PointNavResNetPolicy"
+      name: "HabitatITMPolicyV2"
 
     ppo:
       # ppo params

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,9 +5,9 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "zsos"
+name = "vlfm"
 version = "0.1"
-description = "Zero shot object search"
+description = "Vision-Language Frontier Maps"
 authors = [
     {name = "Naoki Yokoyama", email = "[email protected]"},
 ]
@@ -16,11 +16,14 @@ requires-python = ">=3.9"
 dependencies = [
     "torch >= 1.10.1",
     "numpy >= 1.22.4",
-    "frontier_exploration @ git+https://github.com/naokiyokoyama/frontier_exploration.git",
-    "transformers == 4.26.0",  # higher versions break BLIP-2
     "flask >= 2.3.2",
-    "gym >=0.22.0,<0.23.1",
-    "salesforce-lavis >= 1.0.2",
+    "seaborn >= 0.12.2",  # required by yolov7
+    "open3d >= 0.17.0",
+    "transformers == 4.26.0",  # higher versions break BLIP-2
+    "salesforce-lavis >= 1.0.2",  # for BLIP-2
+    "frontier_exploration @ git+https://github.com/naokiyokoyama/frontier_exploration.git",
+    "mobile_sam @ git+https://github.com/ChaoningZhang/MobileSAM.git",
+    "depth_camera_filtering @ git+https://github.com/naokiyokoyama/depth_camera_filtering",
 ]
 
 [project.optional-dependencies]
@@ -34,16 +37,13 @@ habitat = [
     "habitat-baselines == 0.2.420230405",
     "habitat-lab == 0.2.420230405",
 ]
-reality = [
-    "depth_camera_filtering @ git+https://github.com/naokiyokoyama/depth_camera_filtering",
-]
 
 [project.urls]
 "Homepage" = "theaiinstitute.com"
-"GitHub" = "https://github.com/bdaiinstitute/llm-object-search"
+"GitHub" = "https://github.com/bdaiinstitute/vlfm"
 
 [tool.setuptools]
-packages = ["zsos", "config"]
+packages = ["vlfm", "config"]
 
 [tool.ruff]
 # Enable pycodestyle (`E`), Pyflakes (`F`), and import sorting (`I`)

diff --git a/readmes/installing_habitat.md b/readmes/installing_habitat.md
diff --git a/scripts/launch_llm_server.sh b/scripts/launch_llm_server.sh
diff --git a/scripts/launch_vlm_servers.sh b/scripts/launch_vlm_servers.sh
@@ -1,18 +1,15 @@
 #!/usr/bin/env bash
 # Copyright [2023] Boston Dynamics AI Institute, Inc.
 
-# Ensure you have 'export OS_PYTHON=<PATH_TO_PYTHON>' in your .bashrc, where
+# Ensure you have 'export VLFM_PYTHON=<PATH_TO_PYTHON>' in your .bashrc, where
 # <PATH_TO_PYTHON> is the path to the python executable for your conda env
 # (e.g., PATH_TO_PYTHON=`conda activate <env_name> && which python`)
 
-# We add a sleep of 30 seconds after each command to ensure that the user can see any errors that occur
-# if they re-attach to the tmux session within 30 seconds of running this script.
-
-export OS_PYTHON=${OS_PYTHON:-`which python`}
+export VLFM_PYTHON=${VLFM_PYTHON:-`which python`}
 export MOBILE_SAM_CHECKPOINT=${MOBILE_SAM_CHECKPOINT:-data/mobile_sam.pt}
 export GROUNDING_DINO_CONFIG=${GROUNDING_DINO_CONFIG:-GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py}
 export GROUNDING_DINO_WEIGHTS=${GROUNDING_DINO_WEIGHTS:-data/groundingdino_swint_ogc.pth}
-export CLASSES_PATH=${CLASSES_PATH:-zsos/vlm/classes.txt}
+export CLASSES_PATH=${CLASSES_PATH:-vlfm/vlm/classes.txt}
 export GROUNDING_DINO_PORT=${GROUNDING_DINO_PORT:-12181}
 export BLIP2ITM_PORT=${BLIP2ITM_PORT:-12182}
 export SAM_PORT=${SAM_PORT:-12183}
@@ -31,10 +28,10 @@ tmux split-window -h -t ${session_name}:0.0
 tmux split-window -h -t ${session_name}:0.2
 
 # Run commands in each pane
-tmux send-keys -t ${session_name}:0.0 "${OS_PYTHON} -m zsos.vlm.grounding_dino --port ${GROUNDING_DINO_PORT}" C-m
-tmux send-keys -t ${session_name}:0.1 "${OS_PYTHON} -m zsos.vlm.blip2itm --port ${BLIP2ITM_PORT}" C-m
-tmux send-keys -t ${session_name}:0.2 "${OS_PYTHON} -m zsos.vlm.sam --port ${SAM_PORT}" C-m
-tmux send-keys -t ${session_name}:0.3 "${OS_PYTHON} -m zsos.vlm.yolov7 --port ${YOLOV7_PORT}" C-m
+tmux send-keys -t ${session_name}:0.0 "${VLFM_PYTHON} -m vlfm.vlm.grounding_dino --port ${GROUNDING_DINO_PORT}" C-m
+tmux send-keys -t ${session_name}:0.1 "${VLFM_PYTHON} -m vlfm.vlm.blip2itm --port ${BLIP2ITM_PORT}" C-m
+tmux send-keys -t ${session_name}:0.2 "${VLFM_PYTHON} -m vlfm.vlm.sam --port ${SAM_PORT}" C-m
+tmux send-keys -t ${session_name}:0.3 "${VLFM_PYTHON} -m vlfm.vlm.yolov7 --port ${YOLOV7_PORT}" C-m
 
 # Attach to the tmux session to view the windows
 echo "Created tmux session '${session_name}'. You must wait up to 90 seconds for the model weights to finish being loaded."

diff --git a/test/test_setup.py b/test/test_setup.py
@@ -1,25 +1,16 @@
 import os
 
-import torch
 from habitat_baselines.common.baseline_registry import baseline_registry  # noqa
 
-from zsos.run import get_config
+from vlfm.utils.generate_dummy_policy import save_dummy_policy
 
 
 def test_load_and_save_config():
     if not os.path.exists("build"):
         os.makedirs("build")
 
-    # Save a dummy state_dict using torch.save
-    config = get_config("config/experiments/llm_objectnav_hm3d.yaml")
-    dummy_dict = {
-        "config": config,
-        "extra_state": {"step": 0},
-        "state_dict": {},
-    }
-
     filename = "build/dummy_policy.pth"
-    torch.save(dummy_dict, filename)
+    save_dummy_policy(filename)
 
     # Get the file size of the output PDF
     file_size = os.path.getsize(filename)

diff --git a/test/test_visualization.py b/test/test_visualization.py
@@ -2,7 +2,7 @@
 
 import cv2
 
-from zsos.utils.visualization import generate_text_image
+from vlfm.utils.visualization import generate_text_image
 
 
 def test_visualization():

diff --git a/zsos/__init__.py → vlfm/__init__.py b/zsos/__init__.py → vlfm/__init__.py
diff --git a/zsos/mapping/base_map.py → vlfm/mapping/base_map.py b/zsos/mapping/base_map.py → vlfm/mapping/base_map.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from zsos.mapping.traj_visualizer import TrajectoryVisualizer
+from vlfm.mapping.traj_visualizer import TrajectoryVisualizer
 
 
 class BaseMap:

diff --git a/zsos/mapping/frontier_map.py → vlfm/mapping/frontier_map.py b/zsos/mapping/frontier_map.py → vlfm/mapping/frontier_map.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from zsos.vlm.blip2itm import BLIP2ITMClient
+from vlfm.vlm.blip2itm import BLIP2ITMClient
 
 
 class Frontier:

diff --git a/zsos/mapping/object_point_cloud_map.py → vlfm/mapping/object_point_cloud_map.py b/zsos/mapping/object_point_cloud_map.py → vlfm/mapping/object_point_cloud_map.py
@@ -4,7 +4,7 @@
 import numpy as np
 import open3d as o3d
 
-from zsos.utils.geometry_utils import (
+from vlfm.utils.geometry_utils import (
     extract_yaw,
     get_point_cloud,
     transform_points,

diff --git a/zsos/mapping/obstacle_map.py → vlfm/mapping/obstacle_map.py b/zsos/mapping/obstacle_map.py → vlfm/mapping/obstacle_map.py
@@ -5,10 +5,10 @@
 
 from frontier_exploration.frontier_detection import detect_frontier_waypoints
 from frontier_exploration.utils.fog_of_war import reveal_fog_of_war
-from zsos.mapping.base_map import BaseMap
-from zsos.mapping.value_map import JSON_PATH, KWARGS_JSON
-from zsos.utils.geometry_utils import extract_yaw, get_point_cloud, transform_points
-from zsos.utils.img_utils import fill_small_holes
+from vlfm.mapping.base_map import BaseMap
+from vlfm.mapping.value_map import JSON_PATH, KWARGS_JSON
+from vlfm.utils.geometry_utils import extract_yaw, get_point_cloud, transform_points
+from vlfm.utils.img_utils import fill_small_holes
 
 
 class ObstacleMap(BaseMap):

diff --git a/zsos/mapping/traj_visualizer.py → vlfm/mapping/traj_visualizer.py b/zsos/mapping/traj_visualizer.py → vlfm/mapping/traj_visualizer.py
diff --git a/zsos/mapping/value_map.py → vlfm/mapping/value_map.py b/zsos/mapping/value_map.py → vlfm/mapping/value_map.py
@@ -10,9 +10,9 @@
 import cv2
 import numpy as np
 
-from zsos.mapping.base_map import BaseMap
-from zsos.utils.geometry_utils import extract_yaw, get_rotation_matrix
-from zsos.utils.img_utils import (
+from vlfm.mapping.base_map import BaseMap
+from vlfm.utils.geometry_utils import extract_yaw, get_rotation_matrix
+from vlfm.utils.img_utils import (
     monochannel_to_inferno_rgb,
     pixel_value_within_radius,
     place_img_in_img,

diff --git a/zsos/measurements/traveled_stairs.py → vlfm/measurements/traveled_stairs.py b/zsos/measurements/traveled_stairs.py → vlfm/measurements/traveled_stairs.py
diff --git a/zsos/obs_transformers/resize.py → vlfm/obs_transformers/resize.py b/zsos/obs_transformers/resize.py → vlfm/obs_transformers/resize.py
@@ -17,7 +17,7 @@
 from hydra.core.config_store import ConfigStore
 from omegaconf import DictConfig
 
-from zsos.obs_transformers.utils import image_resize
+from vlfm.obs_transformers.utils import image_resize
 
 
 @baseline_registry.register_obs_transformer()

diff --git a/zsos/obs_transformers/utils.py → vlfm/obs_transformers/utils.py b/zsos/obs_transformers/utils.py → vlfm/obs_transformers/utils.py
diff --git a/zsos/policy/__init__.py → vlfm/policy/__init__.py b/zsos/policy/__init__.py → vlfm/policy/__init__.py