Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

interface: nerfstudio and RTMV importer #1047

Merged
merged 9 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 247 additions & 7 deletions apps/InterfaceMVSNet/InterfaceMVSNet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

#include "../../libs/MVS/Common.h"
#include "../../libs/MVS/Scene.h"
#define JSON_NOEXCEPTION
#include "../../libs/IO/json.hpp"
#include <boost/program_options.hpp>

using namespace MVS;
Expand All @@ -44,6 +46,8 @@
#define MVSNET_CAMERAS_FOLDER _T("cams")
#define MVSNET_IMAGES_EXT _T(".jpg")
#define MVSNET_CAMERAS_NAME _T("_cam.txt")
#define RTMV_CAMERAS_EXT _T(".json")
#define NERFSTUDIO_TRANSFORMS _T("transforms.json")


// S T R U C T S ///////////////////////////////////////////////////
Expand Down Expand Up @@ -217,17 +221,14 @@
// |--xxx.jpg
// |--xxx.jpg
// ....
// |--xxx.jpg
// |--cams
// |--xxx_cam.txt
// |--xxx_cam.txt
// ....
// |--xxx_cam.txt
// |--render_cams
// |--xxx_cam.txt
// |--xxx_cam.txt
// ....
// |--xxx_cam.txt
//
// where the camera parameter of one image stored in a cam.txt file contains the camera
// extrinsic E = [R|t], intrinsic K and the depth range:
Expand All @@ -243,12 +244,11 @@
// K20 K21 K22
//
// DEPTH_MIN DEPTH_INTERVAL (DEPTH_NUM DEPTH_MAX)
bool ParseSceneMVSNet(Scene& scene)
bool ParseSceneMVSNet(Scene& scene, const std::filesystem::path& path)
{
#if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7))
String strPath(MAKE_PATH_FULL(WORKING_FOLDER_FULL, OPT::strInputFileName));
String strPath(path.string());
Util::ensureValidFolderPath(strPath);
const std::filesystem::path path(static_cast<std::string&>(strPath));
IIndex prevPlatformID = NO_ID;
for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path / MVSNET_IMAGES_FOLDER)) {
if (entry.path().extension() != MVSNET_IMAGES_EXT)
Expand Down Expand Up @@ -335,6 +335,246 @@
#endif // _SUPPORT_CPP17
}

// RTMV scene format: http://www.cs.umd.edu/~mmeshry/projects/rtmv
// |--sceneX
// |--images
// |--xxx.jpg
// |--xxx.jpg
// ....
// |--outputs (optional)
// |--depthxxxx.exr
// |--normalxxxx.exr
// ....
// |--transforms.json
bool ParseSceneNerfstudio(Scene& scene, const std::filesystem::path& path)
{
const nlohmann::json data = nlohmann::json::parse(std::ifstream((path / NERFSTUDIO_TRANSFORMS).string()));
if (data.empty())
return false;
// parse camera
const cv::Size resolution(data["w"].get<uint32_t>(), data["h"].get<uint32_t>());
const IIndex platformID = scene.platforms.size();
Platform& platform = scene.platforms.emplace_back();
Platform::Camera& camera = platform.cameras.emplace_back();
camera.K = KMatrix::IDENTITY;
camera.R = RMatrix::IDENTITY;
camera.C = CMatrix::ZERO;
camera.K(0,0) = data["fl_x"].get<REAL>();
camera.K(1,1) = data["fl_y"].get<REAL>();
camera.K(0,2) = data["cx"].get<REAL>();
camera.K(1,2) = data["cy"].get<REAL>();
const String cameraModel = data["camera_model"].get<std::string>();
if (cameraModel == "SIMPLE_PINHOLE") {
} else
// check ZERO radial distortion for all "PERSPECTIVE" type cameras
if (cameraModel == "PINHOLE" || cameraModel == "SIMPLE_RADIAL" || cameraModel == "RADIAL" || cameraModel == "OPENCV") {
const REAL k1 = data["k1"].get<REAL>();
const REAL k2 = data["k2"].get<REAL>();
const REAL p1 = data["p1"].get<REAL>();
const REAL p2 = data["p2"].get<REAL>();
if (k1 != 0 || k2 != 0 || p1 != 0 || p2 != 0) {
VERBOSE("error: radial distortion not supported");
return false;
}
} else {
VERBOSE("error: camera model not supported");
return false;
}
// parse images
const nlohmann::json& frames = data["frames"];
for (const nlohmann::json& frame: frames) {
// set image
// frames expected to be ordered in JSON
const IIndex imageID = scene.images.size();
const String strFileName((path / frame["file_path"].get<std::string>()).string());
Image& imageData = scene.images.AddEmpty();
imageData.platformID = platformID;
imageData.cameraID = 0; // only one camera per platform supported by this format
imageData.poseID = NO_ID;
imageData.ID = imageID;
imageData.name = strFileName;
ASSERT(Util::isFullPath(imageData.name));
// set image resolution
imageData.width = resolution.width;
imageData.height = resolution.height;
imageData.scale = 1;
// load camera pose
imageData.poseID = platform.poses.size();
Platform::Pose& pose = platform.poses.AddEmpty();
const auto Ps = frame["transform_matrix"].get<std::vector<std::vector<double>>>();
Eigen::Matrix4d P{
{Ps[0][0], Ps[0][1], Ps[0][2], Ps[0][3]},
{Ps[1][0], Ps[1][1], Ps[1][2], Ps[1][3]},
{Ps[2][0], Ps[2][1], Ps[2][2], Ps[2][3]},
{Ps[3][0], Ps[3][1], Ps[3][2], Ps[3][3]}
};
// revert nerfstudio conversion:
// convert from COLMAP's camera coordinate system (OpenCV) to ours (OpenGL)
// c2w[0:3, 1:3] *= -1
// c2w = c2w[np.array([1, 0, 2, 3]), :]
// c2w[2, :] *= -1
P.row(2) *= -1;
P.row(0).swap(P.row(1));
P.col(2) *= -1;
P.col(1) *= -1;
// set camera pose
pose.R = P.topLeftCorner<3, 3>().transpose().eval();
pose.R.EnforceOrthogonality();
pose.C = P.topRightCorner<3, 1>().eval();
imageData.camera = platform.GetCamera(imageData.cameraID, imageData.poseID);
// try reading depth-map and normal-map
DepthMap depthMap; {
const String depthPath((path.parent_path() / String::FormatString("outputs/depth%04u.exr", imageID).c_str()).string());
const cv::Mat imgDepthMap = cv::imread(depthPath, cv::IMREAD_UNCHANGED);
if (imgDepthMap.empty()) {
VERBOSE("Unable to load depthmap %s.", depthPath.c_str());
continue;
}
imgDepthMap.convertTo(depthMap, CV_32FC1);
}
NormalMap normalMap; {
const String normalPath((path.parent_path() / String::FormatString("outputs/normal%04u.exr", imageID).c_str()).string());
const cv::Mat imgNormalMap = cv::imread(normalPath, cv::IMREAD_UNCHANGED);
if (imgNormalMap.empty()) {
VERBOSE("Unable to load normalMap %s.", normalPath.c_str());
continue;
}
imgNormalMap.convertTo(normalMap, CV_32FC3);
}
const ConfidenceMap confMap;
const ViewsMap viewsMap;
const IIndexArr IDs = {imageID};
double dMin, dMax;
cv::minMaxIdx(depthMap, &dMin, &dMax, NULL, NULL, depthMap > 0);
const String dmapPath((path.parent_path() / String::FormatString("depth%04u.dmap", imageID).c_str()).string());
if (!ExportDepthDataRaw(dmapPath,
imageData.name, IDs, resolution,
camera.K, pose.R, pose.C,
(float)dMin, (float)dMax,
depthMap, normalMap, confMap, viewsMap))
{
VERBOSE("Unable to save dmap: %s", dmapPath);

Check failure on line 456 in apps/InterfaceMVSNet/InterfaceMVSNet.cpp

View workflow job for this annotation

GitHub Actions / Build on macOS-latest

cannot pass object of non-trivial type 'const SEACAVE::String' through variadic method; call will abort at runtime [-Wnon-pod-varargs]
continue;
}
}
if (scene.images.size() < 2)
return false;
scene.nCalibratedImages = (unsigned)scene.images.size();
return true;
}

// RTMV scene format: http://www.cs.umd.edu/~mmeshry/projects/rtmv
// |--sceneX
// |--xxx.jpg
// |--xxx.json
// ....
bool ParseSceneRTMV(Scene& scene, const std::filesystem::path& path)
{
#if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7))
IIndex prevPlatformID = NO_ID;
for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path)) {
if (entry.path().extension() != RTMV_CAMERAS_EXT)
continue;
// parse camera
const std::string strFileName((path / entry.path().stem()).string());
const nlohmann::json dataCamera = nlohmann::json::parse(std::ifstream(strFileName+RTMV_CAMERAS_EXT));
if (dataCamera.empty())
continue;
const nlohmann::json& data = dataCamera["camera_data"];
const cv::Size resolution(data["width"].get<uint32_t>(), data["height"].get<uint32_t>());
// set platform
Matrix3x3 K = Matrix3x3::IDENTITY;
K(0,0) = data["intrinsics"]["fx"].get<REAL>();
K(1,1) = data["intrinsics"]["fy"].get<REAL>();
K(0,2) = data["intrinsics"]["cx"].get<REAL>();
K(1,2) = data["intrinsics"]["cy"].get<REAL>();
IIndex platformID;
if (prevPlatformID == NO_ID || !K.IsEqual(scene.platforms[prevPlatformID].cameras[0].K, 1e-3)) {
prevPlatformID = platformID = scene.platforms.size();
Platform& platform = scene.platforms.emplace_back();
Platform::Camera& camera = platform.cameras.emplace_back();
camera.K = K;
camera.R = RMatrix::IDENTITY;
camera.C = CMatrix::ZERO;
} else {
platformID = prevPlatformID;
}
Platform& platform = scene.platforms[platformID];
// set image
const IIndex imageID = scene.images.size();
Image& imageData = scene.images.AddEmpty();
imageData.platformID = platformID;
imageData.cameraID = 0; // only one camera per platform supported by this format
imageData.poseID = NO_ID;
imageData.ID = imageID;
imageData.name = strFileName+".jpg";
ASSERT(Util::isFullPath(imageData.name));
// set image resolution
imageData.width = resolution.width;
imageData.height = resolution.height;
imageData.scale = 1;
// load camera pose
imageData.poseID = platform.poses.size();
Platform::Pose& pose = platform.poses.AddEmpty();
const auto Ps = data["cam2world"].get<std::vector<std::vector<double>>>();
Eigen::Matrix4d P{
{Ps[0][0], Ps[1][0], Ps[2][0], Ps[3][0]},
{Ps[0][1], Ps[1][1], Ps[2][1], Ps[3][1]},
{Ps[0][2], Ps[1][2], Ps[2][2], Ps[3][2]},
{Ps[0][3], Ps[1][3], Ps[2][3], Ps[3][3]}
};
// apply the same transforms as nerfstudio converter
P.row(2) *= -1;
P.row(0).swap(P.row(1));
P.col(2) *= -1;
P.col(1) *= -1;
// set camera pose
pose.R = P.topLeftCorner<3, 3>().transpose().eval();
pose.R.EnforceOrthogonality();
pose.C = P.topRightCorner<3, 1>().eval();
imageData.camera = platform.GetCamera(imageData.cameraID, imageData.poseID);
}
if (scene.images.size() < 2)
return false;
scene.nCalibratedImages = (unsigned)scene.images.size();
return true;
#else
VERBOSE("error: C++17 is required to parse MVSNet format");
return false;
#endif // _SUPPORT_CPP17
}

bool ParseScene(Scene& scene)
{
#if defined(_SUPPORT_CPP17) && (!defined(__GNUC__) || (__GNUC__ > 7))
String strPath(MAKE_PATH_FULL(WORKING_FOLDER_FULL, OPT::strInputFileName));
Util::ensureValidFolderPath(strPath);
const std::filesystem::path path(static_cast<const std::string&>(strPath));
enum Type {
MVSNet = 0,
NERFSTUDIO,
RTMV,
} sceneType = MVSNet;
for (const std::filesystem::directory_entry& entry : std::filesystem::directory_iterator(path)) {
if (entry.path().extension() == RTMV_CAMERAS_EXT) {
if (entry.path().filename() == NERFSTUDIO_TRANSFORMS) {
sceneType = NERFSTUDIO;
break;
}
sceneType = RTMV;
}
}
switch (sceneType) {
case NERFSTUDIO: return ParseSceneNerfstudio(scene, path);
case RTMV: return ParseSceneRTMV(scene, path);
default: return ParseSceneMVSNet(scene, path);
}
#else
VERBOSE("error: C++17 is required to parse MVSNet format");
return false;
#endif // _SUPPORT_CPP17
}

} // unnamed namespace

int main(int argc, LPCTSTR* argv)
Expand All @@ -352,7 +592,7 @@
Scene scene(OPT::nMaxThreads);

// convert data from MVSNet format to OpenMVS
if (!ParseSceneMVSNet(scene))
if (!ParseScene(scene))
return EXIT_FAILURE;

// write OpenMVS input data
Expand Down
1 change: 1 addition & 0 deletions apps/InterfacePolycam/InterfacePolycam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#include "../../libs/MVS/Common.h"
#include "../../libs/MVS/Scene.h"
#define JSON_NOEXCEPTION
#include "../../libs/IO/json.hpp"
#include <boost/program_options.hpp>

Expand Down
12 changes: 8 additions & 4 deletions libs/MVS/PointCloud.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,10 +335,14 @@ bool PointCloud::Load(const String& fileName)
colors[v] = vertex.c;
if (!normals.empty())
normals[v] = vertex.n;
if (!pointViews.empty())
pointViews[v].CopyOfRemove(ViewArr(vertex.views.num, vertex.views.pIndices));
if (!pointWeights.empty())
pointWeights[v].CopyOfRemove(WeightArr(vertex.views.num, vertex.views.pWeights));
if (!pointViews.empty()) {
ViewArr pv(vertex.views.num, vertex.views.pIndices);
pointViews[v].CopyOfRemove(pv);
}
if (!pointWeights.empty()){
WeightArr pw(vertex.views.num, vertex.views.pWeights);
pointWeights[v].CopyOfRemove(pw);
}
}
} else {
ply.get_other_element();
Expand Down
4 changes: 2 additions & 2 deletions libs/MVS/Scene.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1799,7 +1799,7 @@ bool Scene::ComputeTowerCylinder(Point2f& centerPoint, float& fRadius, float& fR
// calculate tower radius as median distance from tower center to cameras
FloatArr cameraDistancesToMiddle(cameras2D.size());
FOREACH (camIdx, cameras2D)
cameraDistancesToMiddle[camIdx] = norm(cameras2D[camIdx] - centerPoint);
cameraDistancesToMiddle[camIdx] = (float)norm(cameras2D[camIdx] - centerPoint);
const float fMedianDistance = cameraDistancesToMiddle.GetMedian();
fRadius = MAXF(0.2f, (fMedianDistance - 1.f) / 3.f);
// get the average of top 85 to 95% of the highest distances to center
Expand Down Expand Up @@ -1882,7 +1882,7 @@ PointCloud Scene::BuildTowerMesh(const PointCloud& origPointCloud, const Point2f
} else {
cList<FloatArr> sliceDistances(nTargetCircles);
for (const Point3f& P : origPointCloud.points) {
const float d(norm(Point2f(P.x, P.y) - centerPoint));
const float d((float)norm(Point2f(P.x, P.y) - centerPoint));
if (d <= fROIRadius) {
const float fIdx((zMax - P.z) * nTargetDensity);
int bIdx(FLOOR2INT(fIdx));
Expand Down
9 changes: 8 additions & 1 deletion vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "openmvs",
"version": "2.1.0",
"version": "2.2.0",
"description": "OpenMVS: open Multi-View Stereo reconstruction library",
"homepage": "https://cdcseacave.github.io/openMVS",
"dependencies": [
Expand All @@ -17,6 +17,13 @@
"glew",
"glfw3",
"libpng",
{
"name": "opencv",
"features": [
"eigen",
"openexr"
]
},
"opencv",
"opengl",
"tiff",
Expand Down
Loading