From a5707ae843222cf0a51a617c67dafb88cdfd2e4b Mon Sep 17 00:00:00 2001 From: cDc Date: Fri, 18 Aug 2023 10:54:32 +0300 Subject: [PATCH] interface: add Nerfstudio and RTMV importer (#1047) Co-authored-by: Mihai Cernea --- apps/InterfaceMVSNet/InterfaceMVSNet.cpp | 254 ++++++++++++++++++++- apps/InterfacePolycam/InterfacePolycam.cpp | 1 + libs/MVS/PointCloud.cpp | 12 +- libs/MVS/Scene.cpp | 4 +- vcpkg.json | 9 +- 5 files changed, 266 insertions(+), 14 deletions(-) diff --git a/apps/InterfaceMVSNet/InterfaceMVSNet.cpp b/apps/InterfaceMVSNet/InterfaceMVSNet.cpp index d258559d3..9cc9b9217 100644 --- a/apps/InterfaceMVSNet/InterfaceMVSNet.cpp +++ b/apps/InterfaceMVSNet/InterfaceMVSNet.cpp @@ -31,6 +31,8 @@ #include "../../libs/MVS/Common.h" #include "../../libs/MVS/Scene.h" +#define JSON_NOEXCEPTION +#include "../../libs/IO/json.hpp" #include using namespace MVS; @@ -44,6 +46,8 @@ using namespace MVS; #define MVSNET_CAMERAS_FOLDER _T("cams") #define MVSNET_IMAGES_EXT _T(".jpg") #define MVSNET_CAMERAS_NAME _T("_cam.txt") +#define RTMV_CAMERAS_EXT _T(".json") +#define NERFSTUDIO_TRANSFORMS _T("transforms.json") // S T R U C T S /////////////////////////////////////////////////// @@ -217,17 +221,14 @@ void ImageListParse(const LPSTR* argv, Matrix3x4& P) // |--xxx.jpg // |--xxx.jpg // .... -// |--xxx.jpg // |--cams // |--xxx_cam.txt // |--xxx_cam.txt // .... -// |--xxx_cam.txt // |--render_cams // |--xxx_cam.txt // |--xxx_cam.txt // .... -// |--xxx_cam.txt // // where the camera parameter of one image stored in a cam.txt file contains the camera // extrinsic E = [R|t], intrinsic K and the depth range: @@ -243,12 +244,11 @@ void ImageListParse(const LPSTR* argv, Matrix3x4& P) // K20 K21 K22 // // DEPTH_MIN DEPTH_INTERVAL (DEPTH_NUM DEPTH_MAX) -bool ParseSceneMVSNet(Scene& scene) +bool ParseSceneMVSNet(Scene& scene, const std::filesystem::path& path) { #if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7)) - String strPath(MAKE_PATH_FULL(WORKING_FOLDER_FULL, OPT::strInputFileName)); + String strPath(path.string()); Util::ensureValidFolderPath(strPath); - const std::filesystem::path path(static_cast(strPath)); IIndex prevPlatformID = NO_ID; for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path / MVSNET_IMAGES_FOLDER)) { if (entry.path().extension() != MVSNET_IMAGES_EXT) @@ -335,6 +335,246 @@ bool ParseSceneMVSNet(Scene& scene) #endif // _SUPPORT_CPP17 } +// RTMV scene format: http://www.cs.umd.edu/~mmeshry/projects/rtmv +// |--sceneX +// |--images +// |--xxx.jpg +// |--xxx.jpg +// .... +// |--outputs (optional) +// |--depthxxxx.exr +// |--normalxxxx.exr +// .... +// |--transforms.json +bool ParseSceneNerfstudio(Scene& scene, const std::filesystem::path& path) +{ + const nlohmann::json data = nlohmann::json::parse(std::ifstream((path / NERFSTUDIO_TRANSFORMS).string())); + if (data.empty()) + return false; + // parse camera + const cv::Size resolution(data["w"].get(), data["h"].get()); + const IIndex platformID = scene.platforms.size(); + Platform& platform = scene.platforms.emplace_back(); + Platform::Camera& camera = platform.cameras.emplace_back(); + camera.K = KMatrix::IDENTITY; + camera.R = RMatrix::IDENTITY; + camera.C = CMatrix::ZERO; + camera.K(0,0) = data["fl_x"].get(); + camera.K(1,1) = data["fl_y"].get(); + camera.K(0,2) = data["cx"].get(); + camera.K(1,2) = data["cy"].get(); + const String cameraModel = data["camera_model"].get(); + if (cameraModel == "SIMPLE_PINHOLE") { + } else + // check ZERO radial distortion for all "PERSPECTIVE" type cameras + if (cameraModel == "PINHOLE" || cameraModel == "SIMPLE_RADIAL" || cameraModel == "RADIAL" || cameraModel == "OPENCV") { + const REAL k1 = data["k1"].get(); + const REAL k2 = data["k2"].get(); + const REAL p1 = data["p1"].get(); + const REAL p2 = data["p2"].get(); + if (k1 != 0 || k2 != 0 || p1 != 0 || p2 != 0) { + VERBOSE("error: radial distortion not supported"); + return false; + } + } else { + VERBOSE("error: camera model not supported"); + return false; + } + // parse images + const nlohmann::json& frames = data["frames"]; + for (const nlohmann::json& frame: frames) { + // set image + // frames expected to be ordered in JSON + const IIndex imageID = scene.images.size(); + const String strFileName((path / frame["file_path"].get()).string()); + Image& imageData = scene.images.AddEmpty(); + imageData.platformID = platformID; + imageData.cameraID = 0; // only one camera per platform supported by this format + imageData.poseID = NO_ID; + imageData.ID = imageID; + imageData.name = strFileName; + ASSERT(Util::isFullPath(imageData.name)); + // set image resolution + imageData.width = resolution.width; + imageData.height = resolution.height; + imageData.scale = 1; + // load camera pose + imageData.poseID = platform.poses.size(); + Platform::Pose& pose = platform.poses.AddEmpty(); + const auto Ps = frame["transform_matrix"].get>>(); + Eigen::Matrix4d P{ + {Ps[0][0], Ps[0][1], Ps[0][2], Ps[0][3]}, + {Ps[1][0], Ps[1][1], Ps[1][2], Ps[1][3]}, + {Ps[2][0], Ps[2][1], Ps[2][2], Ps[2][3]}, + {Ps[3][0], Ps[3][1], Ps[3][2], Ps[3][3]} + }; + // revert nerfstudio conversion: + // convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL) + // c2w[0:3, 1:3] *= -1 + // c2w = c2w[np.array([1, 0, 2, 3]), :] + // c2w[2, :] *= -1 + P.row(2) *= -1; + P.row(0).swap(P.row(1)); + P.col(2) *= -1; + P.col(1) *= -1; + // set camera pose + pose.R = P.topLeftCorner<3, 3>().transpose().eval(); + pose.R.EnforceOrthogonality(); + pose.C = P.topRightCorner<3, 1>().eval(); + imageData.camera = platform.GetCamera(imageData.cameraID, imageData.poseID); + // try reading depth-map and normal-map + DepthMap depthMap; { + const String depthPath((path.parent_path() / String::FormatString("outputs/depth%04u.exr", imageID).c_str()).string()); + const cv::Mat imgDepthMap = cv::imread(depthPath, cv::IMREAD_UNCHANGED); + if (imgDepthMap.empty()) { + VERBOSE("Unable to load depthmap %s.", depthPath.c_str()); + continue; + } + imgDepthMap.convertTo(depthMap, CV_32FC1); + } + NormalMap normalMap; { + const String normalPath((path.parent_path() / String::FormatString("outputs/normal%04u.exr", imageID).c_str()).string()); + const cv::Mat imgNormalMap = cv::imread(normalPath, cv::IMREAD_UNCHANGED); + if (imgNormalMap.empty()) { + VERBOSE("Unable to load normalMap %s.", normalPath.c_str()); + continue; + } + imgNormalMap.convertTo(normalMap, CV_32FC3); + } + const ConfidenceMap confMap; + const ViewsMap viewsMap; + const IIndexArr IDs = {imageID}; + double dMin, dMax; + cv::minMaxIdx(depthMap, &dMin, &dMax, NULL, NULL, depthMap > 0); + const String dmapPath((path.parent_path() / String::FormatString("depth%04u.dmap", imageID).c_str()).string()); + if (!ExportDepthDataRaw(dmapPath, + imageData.name, IDs, resolution, + camera.K, pose.R, pose.C, + (float)dMin, (float)dMax, + depthMap, normalMap, confMap, viewsMap)) + { + VERBOSE("Unable to save dmap: %s", dmapPath); + continue; + } + } + if (scene.images.size() < 2) + return false; + scene.nCalibratedImages = (unsigned)scene.images.size(); + return true; +} + +// RTMV scene format: http://www.cs.umd.edu/~mmeshry/projects/rtmv +// |--sceneX +// |--xxx.jpg +// |--xxx.json +// .... +bool ParseSceneRTMV(Scene& scene, const std::filesystem::path& path) +{ + #if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7)) + IIndex prevPlatformID = NO_ID; + for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path)) { + if (entry.path().extension() != RTMV_CAMERAS_EXT) + continue; + // parse camera + const std::string strFileName((path / entry.path().stem()).string()); + const nlohmann::json dataCamera = nlohmann::json::parse(std::ifstream(strFileName+RTMV_CAMERAS_EXT)); + if (dataCamera.empty()) + continue; + const nlohmann::json& data = dataCamera["camera_data"]; + const cv::Size resolution(data["width"].get(), data["height"].get()); + // set platform + Matrix3x3 K = Matrix3x3::IDENTITY; + K(0,0) = data["intrinsics"]["fx"].get(); + K(1,1) = data["intrinsics"]["fy"].get(); + K(0,2) = data["intrinsics"]["cx"].get(); + K(1,2) = data["intrinsics"]["cy"].get(); + IIndex platformID; + if (prevPlatformID == NO_ID || !K.IsEqual(scene.platforms[prevPlatformID].cameras[0].K, 1e-3)) { + prevPlatformID = platformID = scene.platforms.size(); + Platform& platform = scene.platforms.emplace_back(); + Platform::Camera& camera = platform.cameras.emplace_back(); + camera.K = K; + camera.R = RMatrix::IDENTITY; + camera.C = CMatrix::ZERO; + } else { + platformID = prevPlatformID; + } + Platform& platform = scene.platforms[platformID]; + // set image + const IIndex imageID = scene.images.size(); + Image& imageData = scene.images.AddEmpty(); + imageData.platformID = platformID; + imageData.cameraID = 0; // only one camera per platform supported by this format + imageData.poseID = NO_ID; + imageData.ID = imageID; + imageData.name = strFileName+".jpg"; + ASSERT(Util::isFullPath(imageData.name)); + // set image resolution + imageData.width = resolution.width; + imageData.height = resolution.height; + imageData.scale = 1; + // load camera pose + imageData.poseID = platform.poses.size(); + Platform::Pose& pose = platform.poses.AddEmpty(); + const auto Ps = data["cam2world"].get>>(); + Eigen::Matrix4d P{ + {Ps[0][0], Ps[1][0], Ps[2][0], Ps[3][0]}, + {Ps[0][1], Ps[1][1], Ps[2][1], Ps[3][1]}, + {Ps[0][2], Ps[1][2], Ps[2][2], Ps[3][2]}, + {Ps[0][3], Ps[1][3], Ps[2][3], Ps[3][3]} + }; + // apply the same transforms as nerfstudio converter + P.row(2) *= -1; + P.row(0).swap(P.row(1)); + P.col(2) *= -1; + P.col(1) *= -1; + // set camera pose + pose.R = P.topLeftCorner<3, 3>().transpose().eval(); + pose.R.EnforceOrthogonality(); + pose.C = P.topRightCorner<3, 1>().eval(); + imageData.camera = platform.GetCamera(imageData.cameraID, imageData.poseID); + } + if (scene.images.size() < 2) + return false; + scene.nCalibratedImages = (unsigned)scene.images.size(); + return true; + #else + VERBOSE("error: C++17 is required to parse MVSNet format"); + return false; + #endif // _SUPPORT_CPP17 +} + +bool ParseScene(Scene& scene) +{ + #if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7)) + String strPath(MAKE_PATH_FULL(WORKING_FOLDER_FULL, OPT::strInputFileName)); + Util::ensureValidFolderPath(strPath); + const std::filesystem::path path(static_cast(strPath)); + enum Type { + MVSNet = 0, + NERFSTUDIO, + RTMV, + } sceneType = MVSNet; + for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path)) { + if (entry.path().extension() == RTMV_CAMERAS_EXT) { + if (entry.path().filename() == NERFSTUDIO_TRANSFORMS) { + sceneType = NERFSTUDIO; + break; + } + sceneType = RTMV; + } + } + switch (sceneType) { + case NERFSTUDIO: return ParseSceneNerfstudio(scene, path); + case RTMV: return ParseSceneRTMV(scene, path); + default: return ParseSceneMVSNet(scene, path); + } + #else + VERBOSE("error: C++17 is required to parse MVSNet format"); + return false; + #endif // _SUPPORT_CPP17 +} + } // unnamed namespace int main(int argc, LPCTSTR* argv) @@ -352,7 +592,7 @@ int main(int argc, LPCTSTR* argv) Scene scene(OPT::nMaxThreads); // convert data from MVSNet format to OpenMVS - if (!ParseSceneMVSNet(scene)) + if (!ParseScene(scene)) return EXIT_FAILURE; // write OpenMVS input data diff --git a/apps/InterfacePolycam/InterfacePolycam.cpp b/apps/InterfacePolycam/InterfacePolycam.cpp index c0b42fb7a..31c532e9a 100644 --- a/apps/InterfacePolycam/InterfacePolycam.cpp +++ b/apps/InterfacePolycam/InterfacePolycam.cpp @@ -31,6 +31,7 @@ #include "../../libs/MVS/Common.h" #include "../../libs/MVS/Scene.h" +#define JSON_NOEXCEPTION #include "../../libs/IO/json.hpp" #include diff --git a/libs/MVS/PointCloud.cpp b/libs/MVS/PointCloud.cpp index 15689a058..c62920961 100644 --- a/libs/MVS/PointCloud.cpp +++ b/libs/MVS/PointCloud.cpp @@ -335,10 +335,14 @@ bool PointCloud::Load(const String& fileName) colors[v] = vertex.c; if (!normals.empty()) normals[v] = vertex.n; - if (!pointViews.empty()) - pointViews[v].CopyOfRemove(ViewArr(vertex.views.num, vertex.views.pIndices)); - if (!pointWeights.empty()) - pointWeights[v].CopyOfRemove(WeightArr(vertex.views.num, vertex.views.pWeights)); + if (!pointViews.empty()) { + ViewArr pv(vertex.views.num, vertex.views.pIndices); + pointViews[v].CopyOfRemove(pv); + } + if (!pointWeights.empty()){ + WeightArr pw(vertex.views.num, vertex.views.pWeights); + pointWeights[v].CopyOfRemove(pw); + } } } else { ply.get_other_element(); diff --git a/libs/MVS/Scene.cpp b/libs/MVS/Scene.cpp index 74e84675d..4943cf4af 100644 --- a/libs/MVS/Scene.cpp +++ b/libs/MVS/Scene.cpp @@ -1799,7 +1799,7 @@ bool Scene::ComputeTowerCylinder(Point2f& centerPoint, float& fRadius, float& fR // calculate tower radius as median distance from tower center to cameras FloatArr cameraDistancesToMiddle(cameras2D.size()); FOREACH (camIdx, cameras2D) - cameraDistancesToMiddle[camIdx] = norm(cameras2D[camIdx] - centerPoint); + cameraDistancesToMiddle[camIdx] = (float)norm(cameras2D[camIdx] - centerPoint); const float fMedianDistance = cameraDistancesToMiddle.GetMedian(); fRadius = MAXF(0.2f, (fMedianDistance - 1.f) / 3.f); // get the average of top 85 to 95% of the highest distances to center @@ -1882,7 +1882,7 @@ PointCloud Scene::BuildTowerMesh(const PointCloud& origPointCloud, const Point2f } else { cList sliceDistances(nTargetCircles); for (const Point3f& P : origPointCloud.points) { - const float d(norm(Point2f(P.x, P.y) - centerPoint)); + const float d((float)norm(Point2f(P.x, P.y) - centerPoint)); if (d <= fROIRadius) { const float fIdx((zMax - P.z) * nTargetDensity); int bIdx(FLOOR2INT(fIdx)); diff --git a/vcpkg.json b/vcpkg.json index a74f701ea..0c1542b1a 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,6 +1,6 @@ { "name": "openmvs", - "version": "2.1.0", + "version": "2.2.0", "description": "OpenMVS: open Multi-View Stereo reconstruction library", "homepage": "https://cdcseacave.github.io/openMVS", "dependencies": [ @@ -17,6 +17,13 @@ "glew", "glfw3", "libpng", + { + "name": "opencv", + "features": [ + "eigen", + "openexr" + ] + }, "opencv", "opengl", "tiff",