Skip to content

Commit

Permalink
interface: add Nerfstudio and RTMV importer (#1047)
Browse files Browse the repository at this point in the history
Co-authored-by: Mihai Cernea <[email protected]>
  • Loading branch information
cdcseacave and MihaiCernea authored Aug 18, 2023
1 parent a84072d commit a5707ae
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 14 deletions.
254 changes: 247 additions & 7 deletions apps/InterfaceMVSNet/InterfaceMVSNet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

#include "../../libs/MVS/Common.h"
#include "../../libs/MVS/Scene.h"
#define JSON_NOEXCEPTION
#include "../../libs/IO/json.hpp"
#include <boost/program_options.hpp>

using namespace MVS;
Expand All @@ -44,6 +46,8 @@ using namespace MVS;
#define MVSNET_CAMERAS_FOLDER _T("cams")
#define MVSNET_IMAGES_EXT _T(".jpg")
#define MVSNET_CAMERAS_NAME _T("_cam.txt")
#define RTMV_CAMERAS_EXT _T(".json")
#define NERFSTUDIO_TRANSFORMS _T("transforms.json")


// S T R U C T S ///////////////////////////////////////////////////
Expand Down Expand Up @@ -217,17 +221,14 @@ void ImageListParse(const LPSTR* argv, Matrix3x4& P)
// |--xxx.jpg
// |--xxx.jpg
// ....
// |--xxx.jpg
// |--cams
// |--xxx_cam.txt
// |--xxx_cam.txt
// ....
// |--xxx_cam.txt
// |--render_cams
// |--xxx_cam.txt
// |--xxx_cam.txt
// ....
// |--xxx_cam.txt
//
// where the camera parameter of one image stored in a cam.txt file contains the camera
// extrinsic E = [R|t], intrinsic K and the depth range:
Expand All @@ -243,12 +244,11 @@ void ImageListParse(const LPSTR* argv, Matrix3x4& P)
// K20 K21 K22
//
// DEPTH_MIN DEPTH_INTERVAL (DEPTH_NUM DEPTH_MAX)
bool ParseSceneMVSNet(Scene& scene)
bool ParseSceneMVSNet(Scene& scene, const std::filesystem::path& path)
{
#if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7))
String strPath(MAKE_PATH_FULL(WORKING_FOLDER_FULL, OPT::strInputFileName));
String strPath(path.string());
Util::ensureValidFolderPath(strPath);
const std::filesystem::path path(static_cast<std::string&>(strPath));
IIndex prevPlatformID = NO_ID;
for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path / MVSNET_IMAGES_FOLDER)) {
if (entry.path().extension() != MVSNET_IMAGES_EXT)
Expand Down Expand Up @@ -335,6 +335,246 @@ bool ParseSceneMVSNet(Scene& scene)
#endif // _SUPPORT_CPP17
}

// RTMV scene format: http://www.cs.umd.edu/~mmeshry/projects/rtmv
// |--sceneX
// |--images
// |--xxx.jpg
// |--xxx.jpg
// ....
// |--outputs (optional)
// |--depthxxxx.exr
// |--normalxxxx.exr
// ....
// |--transforms.json
bool ParseSceneNerfstudio(Scene& scene, const std::filesystem::path& path)
{
const nlohmann::json data = nlohmann::json::parse(std::ifstream((path / NERFSTUDIO_TRANSFORMS).string()));
if (data.empty())
return false;
// parse camera
const cv::Size resolution(data["w"].get<uint32_t>(), data["h"].get<uint32_t>());
const IIndex platformID = scene.platforms.size();
Platform& platform = scene.platforms.emplace_back();
Platform::Camera& camera = platform.cameras.emplace_back();
camera.K = KMatrix::IDENTITY;
camera.R = RMatrix::IDENTITY;
camera.C = CMatrix::ZERO;
camera.K(0,0) = data["fl_x"].get<REAL>();
camera.K(1,1) = data["fl_y"].get<REAL>();
camera.K(0,2) = data["cx"].get<REAL>();
camera.K(1,2) = data["cy"].get<REAL>();
const String cameraModel = data["camera_model"].get<std::string>();
if (cameraModel == "SIMPLE_PINHOLE") {
} else
// check ZERO radial distortion for all "PERSPECTIVE" type cameras
if (cameraModel == "PINHOLE" || cameraModel == "SIMPLE_RADIAL" || cameraModel == "RADIAL" || cameraModel == "OPENCV") {
const REAL k1 = data["k1"].get<REAL>();
const REAL k2 = data["k2"].get<REAL>();
const REAL p1 = data["p1"].get<REAL>();
const REAL p2 = data["p2"].get<REAL>();
if (k1 != 0 || k2 != 0 || p1 != 0 || p2 != 0) {
VERBOSE("error: radial distortion not supported");
return false;
}
} else {
VERBOSE("error: camera model not supported");
return false;
}
// parse images
const nlohmann::json& frames = data["frames"];
for (const nlohmann::json& frame: frames) {
// set image
// frames expected to be ordered in JSON
const IIndex imageID = scene.images.size();
const String strFileName((path / frame["file_path"].get<std::string>()).string());
Image& imageData = scene.images.AddEmpty();
imageData.platformID = platformID;
imageData.cameraID = 0; // only one camera per platform supported by this format
imageData.poseID = NO_ID;
imageData.ID = imageID;
imageData.name = strFileName;
ASSERT(Util::isFullPath(imageData.name));
// set image resolution
imageData.width = resolution.width;
imageData.height = resolution.height;
imageData.scale = 1;
// load camera pose
imageData.poseID = platform.poses.size();
Platform::Pose& pose = platform.poses.AddEmpty();
const auto Ps = frame["transform_matrix"].get<std::vector<std::vector<double>>>();
Eigen::Matrix4d P{
{Ps[0][0], Ps[0][1], Ps[0][2], Ps[0][3]},
{Ps[1][0], Ps[1][1], Ps[1][2], Ps[1][3]},
{Ps[2][0], Ps[2][1], Ps[2][2], Ps[2][3]},
{Ps[3][0], Ps[3][1], Ps[3][2], Ps[3][3]}
};
// revert nerfstudio conversion:
// convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL)
// c2w[0:3, 1:3] *= -1
// c2w = c2w[np.array([1, 0, 2, 3]), :]
// c2w[2, :] *= -1
P.row(2) *= -1;
P.row(0).swap(P.row(1));
P.col(2) *= -1;
P.col(1) *= -1;
// set camera pose
pose.R = P.topLeftCorner<3, 3>().transpose().eval();
pose.R.EnforceOrthogonality();
pose.C = P.topRightCorner<3, 1>().eval();
imageData.camera = platform.GetCamera(imageData.cameraID, imageData.poseID);
// try reading depth-map and normal-map
DepthMap depthMap; {
const String depthPath((path.parent_path() / String::FormatString("outputs/depth%04u.exr", imageID).c_str()).string());
const cv::Mat imgDepthMap = cv::imread(depthPath, cv::IMREAD_UNCHANGED);
if (imgDepthMap.empty()) {
VERBOSE("Unable to load depthmap %s.", depthPath.c_str());
continue;
}
imgDepthMap.convertTo(depthMap, CV_32FC1);
}
NormalMap normalMap; {
const String normalPath((path.parent_path() / String::FormatString("outputs/normal%04u.exr", imageID).c_str()).string());
const cv::Mat imgNormalMap = cv::imread(normalPath, cv::IMREAD_UNCHANGED);
if (imgNormalMap.empty()) {
VERBOSE("Unable to load normalMap %s.", normalPath.c_str());
continue;
}
imgNormalMap.convertTo(normalMap, CV_32FC3);
}
const ConfidenceMap confMap;
const ViewsMap viewsMap;
const IIndexArr IDs = {imageID};
double dMin, dMax;
cv::minMaxIdx(depthMap, &dMin, &dMax, NULL, NULL, depthMap > 0);
const String dmapPath((path.parent_path() / String::FormatString("depth%04u.dmap", imageID).c_str()).string());
if (!ExportDepthDataRaw(dmapPath,
imageData.name, IDs, resolution,
camera.K, pose.R, pose.C,
(float)dMin, (float)dMax,
depthMap, normalMap, confMap, viewsMap))
{
VERBOSE("Unable to save dmap: %s", dmapPath);

Check failure on line 456 in apps/InterfaceMVSNet/InterfaceMVSNet.cpp

View workflow job for this annotation

GitHub Actions / Build on macOS-latest

cannot pass object of non-trivial type 'const SEACAVE::String' through variadic method; call will abort at runtime [-Wnon-pod-varargs]
continue;
}
}
if (scene.images.size() < 2)
return false;
scene.nCalibratedImages = (unsigned)scene.images.size();
return true;
}

// RTMV scene format: http://www.cs.umd.edu/~mmeshry/projects/rtmv
// |--sceneX
// |--xxx.jpg
// |--xxx.json
// ....
bool ParseSceneRTMV(Scene& scene, const std::filesystem::path& path)
{
#if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7))
IIndex prevPlatformID = NO_ID;
for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path)) {
if (entry.path().extension() != RTMV_CAMERAS_EXT)
continue;
// parse camera
const std::string strFileName((path / entry.path().stem()).string());
const nlohmann::json dataCamera = nlohmann::json::parse(std::ifstream(strFileName+RTMV_CAMERAS_EXT));
if (dataCamera.empty())
continue;
const nlohmann::json& data = dataCamera["camera_data"];
const cv::Size resolution(data["width"].get<uint32_t>(), data["height"].get<uint32_t>());
// set platform
Matrix3x3 K = Matrix3x3::IDENTITY;
K(0,0) = data["intrinsics"]["fx"].get<REAL>();
K(1,1) = data["intrinsics"]["fy"].get<REAL>();
K(0,2) = data["intrinsics"]["cx"].get<REAL>();
K(1,2) = data["intrinsics"]["cy"].get<REAL>();
IIndex platformID;
if (prevPlatformID == NO_ID || !K.IsEqual(scene.platforms[prevPlatformID].cameras[0].K, 1e-3)) {
prevPlatformID = platformID = scene.platforms.size();
Platform& platform = scene.platforms.emplace_back();
Platform::Camera& camera = platform.cameras.emplace_back();
camera.K = K;
camera.R = RMatrix::IDENTITY;
camera.C = CMatrix::ZERO;
} else {
platformID = prevPlatformID;
}
Platform& platform = scene.platforms[platformID];
// set image
const IIndex imageID = scene.images.size();
Image& imageData = scene.images.AddEmpty();
imageData.platformID = platformID;
imageData.cameraID = 0; // only one camera per platform supported by this format
imageData.poseID = NO_ID;
imageData.ID = imageID;
imageData.name = strFileName+".jpg";
ASSERT(Util::isFullPath(imageData.name));
// set image resolution
imageData.width = resolution.width;
imageData.height = resolution.height;
imageData.scale = 1;
// load camera pose
imageData.poseID = platform.poses.size();
Platform::Pose& pose = platform.poses.AddEmpty();
const auto Ps = data["cam2world"].get<std::vector<std::vector<double>>>();
Eigen::Matrix4d P{
{Ps[0][0], Ps[1][0], Ps[2][0], Ps[3][0]},
{Ps[0][1], Ps[1][1], Ps[2][1], Ps[3][1]},
{Ps[0][2], Ps[1][2], Ps[2][2], Ps[3][2]},
{Ps[0][3], Ps[1][3], Ps[2][3], Ps[3][3]}
};
// apply the same transforms as nerfstudio converter
P.row(2) *= -1;
P.row(0).swap(P.row(1));
P.col(2) *= -1;
P.col(1) *= -1;
// set camera pose
pose.R = P.topLeftCorner<3, 3>().transpose().eval();
pose.R.EnforceOrthogonality();
pose.C = P.topRightCorner<3, 1>().eval();
imageData.camera = platform.GetCamera(imageData.cameraID, imageData.poseID);
}
if (scene.images.size() < 2)
return false;
scene.nCalibratedImages = (unsigned)scene.images.size();
return true;
#else
VERBOSE("error: C++17 is required to parse MVSNet format");
return false;
#endif // _SUPPORT_CPP17
}

bool ParseScene(Scene& scene)
{
#if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7))
String strPath(MAKE_PATH_FULL(WORKING_FOLDER_FULL, OPT::strInputFileName));
Util::ensureValidFolderPath(strPath);
const std::filesystem::path path(static_cast<const std::string&>(strPath));
enum Type {
MVSNet = 0,
NERFSTUDIO,
RTMV,
} sceneType = MVSNet;
for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path)) {
if (entry.path().extension() == RTMV_CAMERAS_EXT) {
if (entry.path().filename() == NERFSTUDIO_TRANSFORMS) {
sceneType = NERFSTUDIO;
break;
}
sceneType = RTMV;
}
}
switch (sceneType) {
case NERFSTUDIO: return ParseSceneNerfstudio(scene, path);
case RTMV: return ParseSceneRTMV(scene, path);
default: return ParseSceneMVSNet(scene, path);
}
#else
VERBOSE("error: C++17 is required to parse MVSNet format");
return false;
#endif // _SUPPORT_CPP17
}

} // unnamed namespace

int main(int argc, LPCTSTR* argv)
Expand All @@ -352,7 +592,7 @@ int main(int argc, LPCTSTR* argv)
Scene scene(OPT::nMaxThreads);

// convert data from MVSNet format to OpenMVS
if (!ParseSceneMVSNet(scene))
if (!ParseScene(scene))
return EXIT_FAILURE;

// write OpenMVS input data
Expand Down
1 change: 1 addition & 0 deletions apps/InterfacePolycam/InterfacePolycam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include "../../libs/MVS/Common.h"
#include "../../libs/MVS/Scene.h"
#define JSON_NOEXCEPTION
#include "../../libs/IO/json.hpp"
#include <boost/program_options.hpp>

Expand Down
12 changes: 8 additions & 4 deletions libs/MVS/PointCloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,10 +335,14 @@ bool PointCloud::Load(const String& fileName)
colors[v] = vertex.c;
if (!normals.empty())
normals[v] = vertex.n;
if (!pointViews.empty())
pointViews[v].CopyOfRemove(ViewArr(vertex.views.num, vertex.views.pIndices));
if (!pointWeights.empty())
pointWeights[v].CopyOfRemove(WeightArr(vertex.views.num, vertex.views.pWeights));
if (!pointViews.empty()) {
ViewArr pv(vertex.views.num, vertex.views.pIndices);
pointViews[v].CopyOfRemove(pv);
}
if (!pointWeights.empty()){
WeightArr pw(vertex.views.num, vertex.views.pWeights);
pointWeights[v].CopyOfRemove(pw);
}
}
} else {
ply.get_other_element();
Expand Down
4 changes: 2 additions & 2 deletions libs/MVS/Scene.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1799,7 +1799,7 @@ bool Scene::ComputeTowerCylinder(Point2f& centerPoint, float& fRadius, float& fR
// calculate tower radius as median distance from tower center to cameras
FloatArr cameraDistancesToMiddle(cameras2D.size());
FOREACH (camIdx, cameras2D)
cameraDistancesToMiddle[camIdx] = norm(cameras2D[camIdx] - centerPoint);
cameraDistancesToMiddle[camIdx] = (float)norm(cameras2D[camIdx] - centerPoint);
const float fMedianDistance = cameraDistancesToMiddle.GetMedian();
fRadius = MAXF(0.2f, (fMedianDistance - 1.f) / 3.f);
// get the average of top 85 to 95% of the highest distances to center
Expand Down Expand Up @@ -1882,7 +1882,7 @@ PointCloud Scene::BuildTowerMesh(const PointCloud& origPointCloud, const Point2f
} else {
cList<FloatArr> sliceDistances(nTargetCircles);
for (const Point3f& P : origPointCloud.points) {
const float d(norm(Point2f(P.x, P.y) - centerPoint));
const float d((float)norm(Point2f(P.x, P.y) - centerPoint));
if (d <= fROIRadius) {
const float fIdx((zMax - P.z) * nTargetDensity);
int bIdx(FLOOR2INT(fIdx));
Expand Down
9 changes: 8 additions & 1 deletion vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "openmvs",
"version": "2.1.0",
"version": "2.2.0",
"description": "OpenMVS: open Multi-View Stereo reconstruction library",
"homepage": "https://cdcseacave.github.io/openMVS",
"dependencies": [
Expand All @@ -17,6 +17,13 @@
"glew",
"glfw3",
"libpng",
{
"name": "opencv",
"features": [
"eigen",
"openexr"
]
},
"opencv",
"opengl",
"tiff",
Expand Down

0 comments on commit a5707ae

Please sign in to comment.