From 09217774ac1b13be21642ed1479a8914bfa35457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Thu, 28 Dec 2023 15:50:00 +0100 Subject: [PATCH 1/9] Minor refactor of arb debug facilities --- src/ArbConnectivity.hpp | 18 +-- src/ArbLattice.cpp | 255 +++++++++++++++++++++++----------------- src/ArbLattice.hpp | 2 + 3 files changed, 156 insertions(+), 119 deletions(-) diff --git a/src/ArbConnectivity.hpp b/src/ArbConnectivity.hpp index 0932a1df0..0ea356f99 100644 --- a/src/ArbConnectivity.hpp +++ b/src/ArbConnectivity.hpp @@ -30,17 +30,17 @@ struct ArbLatticeConnectivity { zones.reserve(getLocalSize()); } - void dump(std::string filename) { + void dump(std::string filename) const { FILE* f; - f = fopen(filename.c_str(),"w"); - fprintf(f,"idx_og,idx"); - for (size_t q=0;q& setting_zones, pugi::xml_node arb_node, MPI_Comm comm_) : LatticeBase(ZONESETTINGS, ZONE_MAX, num_snaps_, units_), comm(comm_) { +ArbLattice::ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map& setting_zones, pugi::xml_node arb_node, MPI_Comm comm_) + : LatticeBase(ZONESETTINGS, ZONE_MAX, num_snaps_, units_), comm(comm_) { initialize(num_snaps_, setting_zones, arb_node); } @@ -36,84 +36,25 @@ void ArbLattice::initialize(size_t num_snaps_, const std::map& const std::string cxn_path = name_attr.value(); readFromCxn(cxn_path); global_node_dist = computeInitialNodeDist(connect.num_nodes_global, mpitools::MPI_Size(comm)); - if (debug_name.size() != 0) { - connect.dump(formatAsString("%s_P%02d_conn_before.csv", debug_name, rank)); - } + debugDumpConnect("conn_before"); partition(); - if (debug_name.size() != 0) { - connect.dump(formatAsString("%s_P%02d_conn_after.csv", debug_name, rank)); - } - if (connect.getLocalSize() == 0) throw std::runtime_error{"At least one MPI rank has an empty partition, please use fewer MPI ranks"}; // Realistically, this should never happen + debugDumpConnect("conn_after"); + if (connect.getLocalSize() == 0) + throw std::runtime_error{"At least one MPI rank has an empty partition, please use fewer MPI ranks"}; // Realistically, this should never happen computeGhostNodes(); computeLocalPermutation(); allocDeviceMemory(); initDeviceData(arb_node, setting_zones); local_bounding_box = getLocalBoundingBox(); vtu_geom = makeVTUGeom(); - if (debug_name.size() != 0) { - std::string filename; - size_t i; - FILE* f; - - filename = formatAsString("%s_P%02d_loc_perm.csv", debug_name, rank); - f = fopen(filename.c_str(),"w"); - fprintf(f,"rank,globalIdx,idx\n"); - i = connect.chunk_begin; - for (const auto& idx : local_permutation) { - fprintf(f, "%d,%ld,%d\n", rank, i, idx); - i++; - } - assert(i == connect.chunk_end); - i = getLocalSize(); - for (const auto& gidx : ghost_nodes) { - fprintf(f, "%d,%ld,%ld\n", rank, gidx, i); - i++; - } - fprintf(f, "%d,%ld,%ld\n", rank, (long int) -1, i); - i++; - printf("i:%ld snaps_pitch: %ld\n", i, sizes.snaps_pitch); fflush(stdout); - assert(i <= sizes.snaps_pitch); - fclose(f); - - - filename = formatAsString("%s_P%02d.vtu", debug_name, rank); - const auto& [num_cells, num_points, coords, verts] = getVTUGeom(); - VtkFileOut vtu_file(filename, num_cells, num_points, coords.get(), verts.get(), MPMD.local, true, false); - { - std::vector< size_t > tab1(getLocalSize()); - std::vector< int > tab2(getLocalSize()); - std::vector< size_t > tab3(getLocalSize()); - for (size_t node = 0; node != connect.getLocalSize(); ++node) { - auto i = local_permutation.at(node); - tab1[i] = node + connect.chunk_begin; - tab2[i] = rank; - tab3[i] = connect.og_index[node]; - } - vtu_file.writeField("globalId", tab1.data()); - vtu_file.writeField("globalIdRank", tab2.data()); - vtu_file.writeField("globalIdOg", tab3.data()); - } - { - std::vector< signed long int > tab1(getLocalSize()*Q); - std::vector< int > tab2(getLocalSize()*Q); - for (size_t node = 0; node != connect.getLocalSize(); ++node) { - auto i = local_permutation.at(node); - for (size_t q = 0; q != Q; ++q) { - const auto nbr = connect.neighbor(q, node); - tab1[i * Q + q] = nbr; - const int owner = std::distance(global_node_dist.cbegin(), std::upper_bound(global_node_dist.cbegin(), global_node_dist.cend(), nbr)) - 1; - tab2[i * Q + q] = owner; - } - } - vtu_file.writeField("neighbour", tab1.data(), Q); - vtu_file.writeField("neighbourRank", tab2.data(), Q); - } - vtu_file.writeFooters(); - } + debugDumpVTU(); initCommManager(); initContainer(); - debug1("Initialized arbitrary lattice with: border nodes=%lu; interior nodes=%lu; ghost nodes=%lu", sizes.border_nodes, getLocalSize() - sizes.border_nodes, ghost_nodes.size()); + debug1("Initialized arbitrary lattice with: border nodes=%lu; interior nodes=%lu; ghost nodes=%lu", + sizes.border_nodes, + getLocalSize() - sizes.border_nodes, + ghost_nodes.size()); } int ArbLattice::reinitialize(size_t num_snaps_, const std::map& setting_zones, pugi::xml_node arb_node) { @@ -138,7 +79,9 @@ void ArbLattice::readFromCxn(const std::string& cxn_path) { // Open file + utils for error reporting std::fstream file(cxn_path, std::ios_base::in); - const auto wrap_err_msg = [&](std::string msg) { return "Error while reading "s.append(cxn_path.c_str()).append(" on MPI rank ").append(std::to_string(comm_rank)).append(": ").append(msg); }; + const auto wrap_err_msg = [&](std::string msg) { + return "Error while reading "s.append(cxn_path.c_str()).append(" on MPI rank ").append(std::to_string(comm_rank)).append(": ").append(msg); + }; const auto check_file_ok = [&](const std::string& err_message = "Unknown error") { if (!file) throw std::ios_base::failure(wrap_err_msg(err_message)); }; @@ -189,7 +132,12 @@ void ArbLattice::readFromCxn(const std::string& cxn_path) { const auto prov_it = std::find(q_provided.cbegin(), q_provided.cend(), req); if (prov_it == q_provided.cend()) { const auto [x, y, z] = req; - const auto err_msg = "The arbitrary lattice file does not provide the required direction: ["s.append(std::to_string(x)).append(", ").append(std::to_string(y)).append(", ").append(std::to_string(z)).append("]"); + const auto err_msg = "The arbitrary lattice file does not provide the required direction: ["s.append(std::to_string(x)) + .append(", ") + .append(std::to_string(y)) + .append(", ") + .append(std::to_string(z)) + .append("]"); throw std::runtime_error(wrap_err_msg(err_msg)); } req_prov_perm[i++] = std::distance(q_provided.cbegin(), prov_it); @@ -253,7 +201,10 @@ void ArbLattice::readFromCxn(const std::string& cxn_path) { void ArbLattice::partition() { if (mpitools::MPI_Size(comm) == 1) return; - const auto zero_dir_ind = std::distance(Model_m::offset_directions.cbegin(), std::find(Model_m::offset_directions.cbegin(), Model_m::offset_directions.cend(), OffsetDir{0, 0, 0})); // Note: the behavior is still correct even if (0,0,0) is not an offset direction + const auto zero_dir_ind = std::distance(Model_m::offset_directions.cbegin(), + std::find(Model_m::offset_directions.cbegin(), + Model_m::offset_directions.cend(), + OffsetDir{0, 0, 0})); // Note: the behavior is still correct even if (0,0,0) is not an offset direction const auto offset_dir_wgts = std::vector(Model_m::offset_direction_weights.begin(), Model_m::offset_direction_weights.end()); auto [dist, log] = partitionArbLattice(connect, offset_dir_wgts, zero_dir_ind, comm); for (const auto& [type, msg] : log) switch (type) { @@ -280,7 +231,7 @@ void ArbLattice::computeGhostNodes() { } void ArbLattice::computeLocalPermutation() { - std::vector< size_t > lids; // globalIdx - chunk_begin of elements + std::vector lids; // globalIdx - chunk_begin of elements lids.resize(connect.getLocalSize()); std::iota(lids.begin(), lids.end(), 0); const auto is_border_node = [&](int lid) { @@ -300,10 +251,7 @@ void ArbLattice::computeLocalPermutation() { std::sort(interior_begin, lids.end(), by_zyx); local_permutation.resize(connect.getLocalSize()); size_t i = 0; - for (const auto& lid : lids) { - local_permutation[lid] = i; - i++; - } + for (const auto& lid : lids) local_permutation[lid] = i++; } void ArbLattice::allocDeviceMemory() { @@ -322,7 +270,8 @@ std::vector ArbLattice::parseBrushFromXml(pugi::xml_n std::vector retval; for (auto node = arb_node.first_child(); node; node = node.next_sibling()) { // Requested node type - const auto ntf_iter = std::find_if(model->nodetypeflags.cbegin(), model->nodetypeflags.cend(), [name = node.name()](const auto& ntf) { return ntf.name == name; }); + const auto ntf_iter = + std::find_if(model->nodetypeflags.cbegin(), model->nodetypeflags.cend(), [name = node.name()](const auto& ntf) { return ntf.name == name; }); if (ntf_iter == model->nodetypeflags.cend()) throw std::runtime_error{formatAsString("Unknown node type: %s", node.name())}; // Determine what kind of node we're parsing and update the brush accordingly @@ -333,12 +282,16 @@ std::vector ArbLattice::parseBrushFromXml(pugi::xml_n const flag_t value = ntf_iter->flag | (zone_attr ? (setting_zones.at(zone_attr.value()) << model->settingzones.shift) : 0); const std::string group_name = group_attr.value(); const auto label_iter = label_to_ind_map.find(group_name); - if (label_iter == label_to_ind_map.end()) throw std::runtime_error{formatAsString("The required label %s is missing from the .cxn file", group_name)}; + if (label_iter == label_to_ind_map.end()) + throw std::runtime_error{formatAsString("The required label %s is missing from the .cxn file", group_name)}; const auto label = label_iter->second; - const auto has_label = [label](Span labels, std::array) { return std::find(labels.begin(), labels.end(), label) != labels.end(); }; + const auto has_label = [label](Span labels, std::array) { + return std::find(labels.begin(), labels.end(), label) != labels.end(); + }; retval.push_back(NodeTypeBrush{has_label, mask, value}); } else - throw std::runtime_error{std::string("The ArbitraryLattice XML node contains an incorrectly specified child named ") + node.name()}; // TODO: implement other node types, e.g. + throw std::runtime_error{std::string("The ArbitraryLattice XML node contains an incorrectly specified child named ") + + node.name()}; // TODO: implement other node types, e.g. } return retval; } @@ -347,7 +300,9 @@ void ArbLattice::computeNodeTypesOnHost(pugi::xml_node arb_node, const std::map< const auto local_sz = connect.getLocalSize(); const auto zone_sizes = Span(connect.zones_per_node.get(), local_sz); auto zone_offsets = std::vector(local_sz); - std::transform_exclusive_scan(zone_sizes.begin(), zone_sizes.end(), zone_offsets.begin(), size_t{0}, std::plus{}, [](auto label) -> size_t { return label; }); // labels are stored as a short type, we need to cast it to size_t before computing the scan + std::transform_exclusive_scan(zone_sizes.begin(), zone_sizes.end(), zone_offsets.begin(), size_t{0}, std::plus{}, [](auto label) -> size_t { + return label; + }); // labels are stored as a short type, we need to cast it to size_t before computing the scan const auto brushes = parseBrushFromXml(arb_node, setting_zones); node_types_host = std::pmr::vector(local_sz, &global_pinned_resource); for (size_t i = 0; i != local_sz; ++i) { @@ -446,11 +401,13 @@ int ArbLattice::fullLatticePos(double pos) const { lbRegion ArbLattice::getLocalBoundingBox() const { const auto local_sz = connect.getLocalSize(); - const Span x(connect.coords.get(), local_sz), y(std::next(connect.coords.get(), local_sz), local_sz), z(std::next(connect.coords.get(), 2 * local_sz), local_sz); + const Span x(connect.coords.get(), local_sz), y(std::next(connect.coords.get(), local_sz), local_sz), + z(std::next(connect.coords.get(), 2 * local_sz), local_sz); const auto [minx_it, maxx_it] = std::minmax_element(x.begin(), x.end()); const auto [miny_it, maxy_it] = std::minmax_element(y.begin(), y.end()); const auto [minz_it, maxz_it] = std::minmax_element(z.begin(), z.end()); - const int x_min = fullLatticePos(*minx_it), x_max = fullLatticePos(*maxx_it), y_min = fullLatticePos(*miny_it), y_max = fullLatticePos(*maxy_it), z_min = fullLatticePos(*minz_it), z_max = fullLatticePos(*maxz_it); + const int x_min = fullLatticePos(*minx_it), x_max = fullLatticePos(*maxx_it), y_min = fullLatticePos(*miny_it), y_max = fullLatticePos(*maxy_it), + z_min = fullLatticePos(*minz_it), z_max = fullLatticePos(*maxz_it); return lbRegion(x_min, y_min, z_min, x_max - x_min + 1, y_max - y_min + 1, z_max - z_min + 1); } @@ -463,7 +420,14 @@ ArbLattice::ArbVTUGeom ArbLattice::makeVTUGeom() const { const auto get_bb_verts = [&](unsigned node) { const double x = connect.coord(0, node), y = connect.coord(1, node), z = connect.coord(2, node); const int posx = fullLatticePos(x), posy = fullLatticePos(y), posz = fullLatticePos(z); - static constexpr std::array offsets = {std::array{0, 0, 0}, std::array{1, 0, 0}, std::array{1, 1, 0}, std::array{0, 1, 0}, std::array{0, 0, 1}, std::array{1, 0, 1}, std::array{1, 1, 1}, std::array{0, 1, 1}}; // We need a specific ordering to agree with the vtu spec + static constexpr std::array offsets = {std::array{0, 0, 0}, + std::array{1, 0, 0}, + std::array{1, 1, 0}, + std::array{0, 1, 0}, + std::array{0, 0, 1}, + std::array{1, 0, 1}, + std::array{1, 1, 1}, + std::array{0, 1, 1}}; // We need a specific ordering to agree with the vtu spec std::array retval{}; std::transform(offsets.begin(), offsets.end(), retval.begin(), [&](const auto& ofs) { const auto [dx, dy, dz] = ofs; @@ -482,7 +446,10 @@ ArbLattice::ArbVTUGeom ArbLattice::makeVTUGeom() const { return retval; }); - ArbVTUGeom retval{connect.getLocalSize(), full_to_red_map.size(), std::make_unique(full_to_red_map.size() * 3), std::make_unique(connect.getLocalSize() * 8)}; + ArbVTUGeom retval{connect.getLocalSize(), + full_to_red_map.size(), + std::make_unique(full_to_red_map.size() * 3), + std::make_unique(connect.getLocalSize() * 8)}; // Iterating across the entire bounding box is a bit hairy, but saves memory compared to the alternative (and we only do it once) for (Index vx = sx; vx != nx + sx; ++vx) for (Index vy = sy; vy != ny + sy; ++vy) @@ -531,8 +498,8 @@ void ArbLattice::getQuantity(int quant, real_t* host_tab, real_t scale) { void ArbLattice::initCommManager() { if (mpitools::MPI_Size(comm) == 1) return; int rank = mpitools::MPI_Rank(comm); - const auto& field_table = Model_m::field_streaming_table; - using NodeFieldP = std::array; // Node + field index. We can be a bit wasteful with storing both as 64b, since the number of border nodes is relatively small + const auto& field_table = Model_m::field_streaming_table; + using NodeFieldP = std::array; // Node + field index std::map> needed_fields; // in_nbrs to required N-F pairs, **we need it to be sorted** for (size_t node = 0; node != connect.getLocalSize(); ++node) { for (size_t q = 0; q != Q; ++q) { @@ -550,7 +517,8 @@ void ArbLattice::initCommManager() { vec.erase(std::unique(vec.begin(), vec.end()), vec.end()); comm_manager.in_nbrs.emplace_back(id, vec.size()); } - size_t recv_buf_size = std::transform_reduce(comm_manager.in_nbrs.cbegin(), comm_manager.in_nbrs.cend(), size_t{0}, std::plus{}, [](auto p) { return p.second; }); + const size_t recv_buf_size = + std::transform_reduce(comm_manager.in_nbrs.cbegin(), comm_manager.in_nbrs.cend(), size_t{0}, std::plus{}, [](auto p) { return p.second; }); comm_manager.recv_buf_host = std::pmr::vector(recv_buf_size, &global_pinned_resource); comm_manager.recv_buf_device = cudaMakeUnique(recv_buf_size); comm_manager.unpack_inds = cudaMakeUnique(recv_buf_size); @@ -574,11 +542,12 @@ void ArbLattice::initCommManager() { if (sz != 0) comm_manager.out_nbrs.emplace_back(out_id, sz); ++out_id; } - size_t send_buf_size = std::transform_reduce(comm_manager.out_nbrs.cbegin(), comm_manager.out_nbrs.cend(), size_t{0}, std::plus{}, [](auto p) { return p.second; }); + size_t send_buf_size = + std::transform_reduce(comm_manager.out_nbrs.cbegin(), comm_manager.out_nbrs.cend(), size_t{0}, std::plus{}, [](auto p) { return p.second; }); comm_manager.send_buf_host = std::pmr::vector(send_buf_size, &global_pinned_resource); comm_manager.send_buf_device = cudaMakeUnique(send_buf_size); comm_manager.pack_inds = cudaMakeUnique(send_buf_size); - + std::map> requested_fields; for (const auto& [id, sz] : comm_manager.out_nbrs) { auto& rf = requested_fields[id]; @@ -586,16 +555,8 @@ void ArbLattice::initCommManager() { } std::vector reqs; reqs.reserve(requested_fields.size() + needed_fields.size()); - for ( auto& [id, rf] : requested_fields) { - MPI_Request req; - MPI_Irecv(rf.data(), rf.size() * 2, mpitools::getMPIType(), id, 0, comm, &req); - reqs.push_back(req); - } - for (const auto& [id, nf] : needed_fields) { - MPI_Request req; - MPI_Isend(nf.data(), nf.size() * 2, mpitools::getMPIType(), id, 0, comm, &req); - reqs.push_back(req); - } + for (auto& [id, rf] : requested_fields) MPI_Irecv(rf.data(), rf.size() * 2, mpitools::getMPIType(), id, 0, comm, &reqs.emplace_back()); + for (const auto& [id, nf] : needed_fields) MPI_Isend(nf.data(), nf.size() * 2, mpitools::getMPIType(), id, 0, comm, &reqs.emplace_back()); MPI_Waitall(reqs.size(), reqs.data(), MPI_STATUSES_IGNORE); std::pmr::vector pack_inds_host(send_buf_size, &global_pinned_resource); auto pack_ind_iter = pack_inds_host.begin(); @@ -619,8 +580,8 @@ void ArbLattice::initCommManager() { size_t i; FILE* f; filename = formatAsString("%s_P%02d_pack.csv", debug_name, rank); - f = fopen(filename.c_str(),"w"); - fprintf(f,"rank,id,globalIdx,field,idx\n"); + f = fopen(filename.c_str(), "w"); + fprintf(f, "rank,id,globalIdx,field,idx\n"); i = 0; for (const auto& [id, nfps] : requested_fields) { for (const auto& [node, field] : nfps) { @@ -633,8 +594,8 @@ void ArbLattice::initCommManager() { assert(i == pack_inds_host.size()); fclose(f); filename = formatAsString("%s_P%02d_unpack.csv", debug_name, rank); - f = fopen(filename.c_str(),"w"); - fprintf(f,"rank,id,globalIdx,field,idx\n"); + f = fopen(filename.c_str(), "w"); + fprintf(f, "rank,id,globalIdx,field,idx\n"); i = 0; for (const auto& [id, nfps] : needed_fields) { for (const auto& [node, field] : nfps) { @@ -646,9 +607,7 @@ void ArbLattice::initCommManager() { } assert(i == unpack_inds_host.size()); fclose(f); - } - } void ArbLattice::communicateBorder() { @@ -670,14 +629,22 @@ void ArbLattice::communicateBorder() { void ArbLattice::MPIStream_A() { if (mpitools::MPI_Size(comm) == 1) return; launcher.pack(outStream); - CudaMemcpyAsync(comm_manager.send_buf_host.data(), comm_manager.send_buf_device.get(), comm_manager.send_buf_host.size() * sizeof(storage_t), CudaMemcpyDeviceToHost, outStream); + CudaMemcpyAsync(comm_manager.send_buf_host.data(), + comm_manager.send_buf_device.get(), + comm_manager.send_buf_host.size() * sizeof(storage_t), + CudaMemcpyDeviceToHost, + outStream); } void ArbLattice::MPIStream_B() { if (mpitools::MPI_Size(comm) == 1) return; CudaStreamSynchronize(outStream); communicateBorder(); - CudaMemcpyAsync(comm_manager.recv_buf_device.get(), comm_manager.recv_buf_host.data(), comm_manager.recv_buf_host.size() * sizeof(storage_t), CudaMemcpyHostToDevice, inStream); + CudaMemcpyAsync(comm_manager.recv_buf_device.get(), + comm_manager.recv_buf_host.data(), + comm_manager.recv_buf_host.size() * sizeof(storage_t), + CudaMemcpyHostToDevice, + inStream); launcher.unpack(inStream); CudaStreamSynchronize(inStream); } @@ -716,3 +683,71 @@ void ArbLattice::clearAdjoint() { zSet.ClearGrad(); } /// TODO section end + +void ArbLattice::debugDumpConnect(const std::string& name) const { + if (debug_name.size() != 0) connect.dump(formatAsString("%s_P%02d_%s.csv", debug_name, mpitools::MPI_Rank(comm), name)); +} + +void ArbLattice::debugDumpVTU() const { + if (debug_name.size() != 0) { + std::string filename; + size_t i; + FILE* f; + + const int rank = mpitools::MPI_Rank(comm); + filename = formatAsString("%s_P%02d_loc_perm.csv", debug_name, rank); + f = fopen(filename.c_str(), "w"); + fprintf(f, "rank,globalIdx,idx\n"); + i = connect.chunk_begin; + for (const auto& idx : local_permutation) { + fprintf(f, "%d,%ld,%d\n", rank, i, idx); + i++; + } + assert(i == connect.chunk_end); + i = getLocalSize(); + for (const auto& gidx : ghost_nodes) { + fprintf(f, "%d,%ld,%ld\n", rank, gidx, i); + i++; + } + fprintf(f, "%d,%ld,%ld\n", rank, (long int)-1, i); + i++; + printf("i:%ld snaps_pitch: %ld\n", i, sizes.snaps_pitch); + fflush(stdout); + assert(i <= sizes.snaps_pitch); + fclose(f); + + filename = formatAsString("%s_P%02d.vtu", debug_name, rank); + const auto& [num_cells, num_points, coords, verts] = getVTUGeom(); + VtkFileOut vtu_file(filename, num_cells, num_points, coords.get(), verts.get(), MPMD.local, true, false); + { + std::vector tab1(getLocalSize()); + std::vector tab2(getLocalSize()); + std::vector tab3(getLocalSize()); + for (size_t node = 0; node != connect.getLocalSize(); ++node) { + auto i = local_permutation.at(node); + tab1[i] = node + connect.chunk_begin; + tab2[i] = rank; + tab3[i] = connect.og_index[node]; + } + vtu_file.writeField("globalId", tab1.data()); + vtu_file.writeField("globalIdRank", tab2.data()); + vtu_file.writeField("globalIdOg", tab3.data()); + } + { + std::vector tab1(getLocalSize() * Q); + std::vector tab2(getLocalSize() * Q); + for (size_t node = 0; node != connect.getLocalSize(); ++node) { + auto i = local_permutation.at(node); + for (size_t q = 0; q != Q; ++q) { + const auto nbr = connect.neighbor(q, node); + tab1[i * Q + q] = nbr; + const int owner = std::distance(global_node_dist.cbegin(), std::upper_bound(global_node_dist.cbegin(), global_node_dist.cend(), nbr)) - 1; + tab2[i * Q + q] = owner; + } + } + vtu_file.writeField("neighbour", tab1.data(), Q); + vtu_file.writeField("neighbourRank", tab2.data(), Q); + } + vtu_file.writeFooters(); + } +} diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp index 00e60d630..05328df89 100644 --- a/src/ArbLattice.hpp +++ b/src/ArbLattice.hpp @@ -137,6 +137,8 @@ class ArbLattice : public LatticeBase { ArbVTUGeom makeVTUGeom() const; /// Compute VTU geometry void communicateBorder(); /// Send and receive border values in snap (overlapped with interior computation) unsigned lookupLocalGhostIndex(ArbLatticeConnectivity::Index gid) const; /// For a given ghost gid, look up its local id + void debugDumpConnect(const std::string& name) const; /// Dump connectivity info for debug purposes + void debugDumpVTU() const; /// Dump VTU info info for debug purposes }; #endif // ARBLATTICE_HPP From e2e599dad6a9d9d6e5a458d8fdaed91dff97aea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Thu, 28 Dec 2023 15:54:17 +0100 Subject: [PATCH 2/9] Allow the user to initialize NVFLAGS --- src/configure.ac | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/configure.ac b/src/configure.ac index 91c155190..d9f2d2fc2 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -194,11 +194,9 @@ AC_ARG_WITH([cpp-flags], [privide additionals flags for the compiler]), [CONF_CPPFLAGS="$withval"]) -NVFLAGS="" - if test "x${COMPILER_BINDIR}" != "x" then - NVFLAGS="-ccbin=${COMPILER_BINDIR} " + NVFLAGS="${NVFLAGS} -ccbin=${COMPILER_BINDIR} " fi if test -z "$CXX" From c61a63ced0addf16e76f228f59520ea7801c1196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Thu, 28 Dec 2023 18:04:35 +0100 Subject: [PATCH 3/9] toArb optimizations - edges between bulk nodes in the connectivity graph are now ignored (consequential for iteration communication volume and partitioning quality) - toArb memory requirement w.r.t. bounding box size reduced by half (now just 1 bit per voxel) - minor refactoring --- src/toArb.cpp | 120 ++++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 57 deletions(-) diff --git a/src/toArb.cpp b/src/toArb.cpp index e9b3cbb2e..7244fd721 100644 --- a/src/toArb.cpp +++ b/src/toArb.cpp @@ -6,18 +6,18 @@ #include #include -static long int linPos(long int x, long int y, long int z, long int nx, long int ny) { +static long linPos(long x, long y, long z, long nx, long ny) { return x + nx * y + nx * ny * z; } // i mod m, assuming i is greater than -m and less than 2*m -static long int fastModSingleWrap(long int i, long int m) { +static long fastModSingleWrap(long i, long m) { if (i < 0) return i + m; if (i >= m) return i - m; return i; } -static long int linPosBoundschecked(long int x, long int y, long int z, long int nx, long int ny, long int nz) { +static long linPosBoundschecked(long x, long y, long z, long nx, long ny, long nz) { return linPos(fastModSingleWrap(x, nx), fastModSingleWrap(y, ny), fastModSingleWrap(z, nz), nx, ny); } @@ -25,9 +25,9 @@ static long int linPosBoundschecked(long int x, long int y, long int z, long int static auto makeBulkBmp(const Geometry& geo, big_flag_t bulk_mask, big_flag_t bulk_flag) -> std::vector { const auto nx = geo.totalregion.nx, ny = geo.totalregion.ny, nz = geo.totalregion.nz; std::vector retval(nx * ny * nz); - for (long int z = 0; z < nz; z++) - for (long int y = 0; y < ny; y++) - for (long int x = 0; x < nx; x++) + for (long z = 0; z < nz; z++) + for (long y = 0; y < ny; y++) + for (long x = 0; x < nx; x++) if ((geo.geom[geo.region.offset(x, y, z)] & bulk_mask) == bulk_flag) { const auto lin_pos = linPos(x, y, z, nx, ny); retval[lin_pos] = true; @@ -35,42 +35,32 @@ static auto makeBulkBmp(const Geometry& geo, big_flag_t bulk_mask, big_flag_t bu return retval; } -// Mark void lattice nodes, i.e., bulk nodes which have all bulk neighbors -static auto makeVoidBmp(const lbRegion& region, const std::vector& bulk_bmp) -> std::vector { - const auto nx = region.nx, ny = region.ny, nz = region.nz; - std::vector retval(nx * ny * nz); - for (long int z = 0; z < nz; z++) - for (long int y = 0; y < ny; y++) - for (long int x = 0; x < nx; x++) { - const auto lin_pos = linPos(x, y, z, nx, ny); - if (!bulk_bmp[lin_pos]) continue; // Non-bulk nodes always stay - retval[lin_pos] = std::all_of(Model_m::offset_directions.begin(), Model_m::offset_directions.end(), [&](const auto& ofs_dir) -> bool { - const auto [dx, dy, dz] = ofs_dir; - const auto lin_pos_offset = linPosBoundschecked(x + dx, y + dy, z + dz, nx, ny, nz); - return bulk_bmp[lin_pos_offset]; - }); - } - return retval; -} - // Map from full Cartesian lattice linear index to arbitrary lattice index -static auto makeArbLatticeIndexMap(const lbRegion& region, const std::vector& void_bmp) -> std::unordered_map { - const auto nx = region.nx, ny = region.ny, nz = region.nz; - std::unordered_map retval(void_bmp.size() - std::count(void_bmp.begin(), void_bmp.end(), true)); - long int index = 0; - for (long int z = 0; z < nz; z++) - for (long int y = 0; y < ny; y++) - for (long int x = 0; x < nx; x++) { - const auto lin_pos = linPos(x, y, z, nx, ny); - if (!void_bmp[lin_pos]) retval.emplace(lin_pos, index++); +static auto makeArbLatticeIndexMap(const lbRegion& region, const std::vector& bulk_bmp) -> std::unordered_map { + const long nx = region.nx, ny = region.ny, nz = region.nz; + const auto is_void = [&](long x, long y, long z, long lin_pos) { + if (!bulk_bmp[lin_pos]) return false; // non-bulk nodes always stay + return std::all_of(Model_m::offset_directions.begin(), Model_m::offset_directions.end(), [&](const auto& ofs_dir) -> bool { + const auto [dx, dy, dz] = ofs_dir; + const auto lin_pos_offset = linPosBoundschecked(x + dx, y + dy, z + dz, nx, ny, nz); + return bulk_bmp[lin_pos_offset]; + }); + }; + std::unordered_map retval; + retval.max_load_factor(.5); + for (long index = 0, lin_pos = 0, z = 0; z < nz; ++z) + for (long y = 0; y < ny; ++y) + for (long x = 0; x < nx; ++x) { + if (!is_void(x, y, z, lin_pos)) retval.emplace(lin_pos, index++); + ++lin_pos; } return retval; } -static int writeArbLatticeHeader(std::fstream& file, size_t n_nodes, double underlying_grid_size, const Model& model, const std::map& zone_map) { +static int writeArbLatticeHeader(std::fstream& file, size_t n_nodes, double grid_size, const Model& model, const std::map& zone_map) { file << "OFFSET_DIRECTIONS " << Model_m::offset_directions.size() << '\n'; for (const auto [x, y, z] : Model_m::offset_directions) file << x << ' ' << y << ' ' << z << '\n'; - file << "GRID_SIZE " << underlying_grid_size << '\n'; + file << "GRID_SIZE " << grid_size << '\n'; file << "NODE_LABELS " << model.nodetypeflags.size() + zone_map.size() << '\n'; for (const auto& ntf : model.nodetypeflags) file << ntf.name << '\n'; for (const auto& [name, zf] : zone_map) file << "_Z_" << name << '\n'; @@ -78,13 +68,24 @@ static int writeArbLatticeHeader(std::fstream& file, size_t n_nodes, double unde return file.good() ? EXIT_SUCCESS : EXIT_FAILURE; } -static int writeArbLatticeNodes(const Geometry& geo, const Model& model, const std::map& zone_map, const std::unordered_map& lin_to_arb_index_map, const std::vector& void_bmp, std::fstream& file, double spacing) { - const auto nx = geo.totalregion.nx, ny = geo.totalregion.ny, nz = geo.totalregion.nz; - for (long int z = 0; z < nz; z++) - for (long int y = 0; y < ny; y++) - for (long int x = 0; x < nx; x++) { - const auto lin_pos = linPos(x, y, z, nx, ny); - if (void_bmp[lin_pos]) continue; +static int writeArbLatticeNodes(const Geometry& geo, + const Model& model, + const std::map& zone_map, + const std::unordered_map& lin_to_arb_index_map, + const std::vector& bulk_bmp, + std::fstream& file, + double spacing) { + const long nx = geo.totalregion.nx, ny = geo.totalregion.ny, nz = geo.totalregion.nz; + const auto get_nbr_id = [&](long my_pos, long nbr_pos) -> long { + if (my_pos != nbr_pos && bulk_bmp[my_pos] && bulk_bmp[nbr_pos]) return -1; // ignore edges between bulk nodes + const auto nbr_it = lin_to_arb_index_map.find(nbr_pos); + return nbr_it != lin_to_arb_index_map.end() ? nbr_it->second : -1; + }; + for (long lin_pos = 0, z = 0; z < nz; z++) + for (long y = 0; y < ny; y++) + for (long x = 0; x < nx; x++) { + const auto current_lin_pos = lin_pos++; + if (lin_to_arb_index_map.find(current_lin_pos) == lin_to_arb_index_map.end()) continue; // void node // Coordinates const double x_coord = (static_cast(x) + .5) * spacing; @@ -96,14 +97,18 @@ static int writeArbLatticeNodes(const Geometry& geo, const Model& model, const s for (const auto [dx, dy, dz] : Model_m::offset_directions) { const auto lin_pos_offset = linPosBoundschecked(x + dx, y + dy, z + dz, nx, ny, nz); const auto nbr_it = lin_to_arb_index_map.find(lin_pos_offset); - file << (nbr_it != lin_to_arb_index_map.end() ? nbr_it->second : -1) << ' '; + file << get_nbr_id(current_lin_pos, lin_pos_offset) << ' '; } // Groups & zones const auto flag = geo.geom[geo.region.offset(x, y, z)]; const int zone_flag = (flag & model.settingzones.flag) >> model.settingzones.shift; - const size_t n_groups = std::count_if(model.nodetypeflags.cbegin(), model.nodetypeflags.cend(), [flag](const auto& ntf) { return ntf.flag != 0 && (flag & ntf.group_flag) == ntf.flag; }); - const size_t n_zones = zone_flag == 0 ? 0 : std::count_if(zone_map.begin(), zone_map.end(), [zone_flag](const auto& zone_map_entry) { return zone_map_entry.second == zone_flag; }); + const size_t n_groups = std::count_if(model.nodetypeflags.cbegin(), model.nodetypeflags.cend(), [flag](const auto& ntf) { + return ntf.flag != 0 && (flag & ntf.group_flag) == ntf.flag; + }); + const size_t n_zones = zone_flag == 0 ? 0 : std::count_if(zone_map.begin(), zone_map.end(), [zone_flag](const auto& zone_map_entry) { + return zone_map_entry.second == zone_flag; + }); file << n_groups + n_zones << ' '; size_t gz_ind = 0; for (const auto& ntf : model.nodetypeflags) { @@ -115,9 +120,7 @@ static int writeArbLatticeNodes(const Geometry& geo, const Model& model, const s if (zone_flag == zf) file << gz_ind << ' '; ++gz_ind; } - file << '\n'; - if (!file.good()) break; // Fail early } @@ -125,15 +128,20 @@ static int writeArbLatticeNodes(const Geometry& geo, const Model& model, const s return file.good() ? EXIT_SUCCESS : EXIT_FAILURE; } -static int writeArbLattice(const Geometry& geo, const Model& model, const std::map& zone_map, const std::unordered_map& lin_to_arb_index_map, const std::vector& void_bmp, const std::string& filename, double spacing) { +static int writeArbLattice(const Geometry& geo, + const Model& model, + const std::map& zone_map, + const std::unordered_map& lin_to_arb_index_map, + const std::vector& bulk_bmp, + const std::string& filename, + double spacing) { std::fstream file(filename, std::ios_base::out); if (!file.good()) { ERROR("Failed to open .cxn file for writing"); return EXIT_FAILURE; } if (writeArbLatticeHeader(file, lin_to_arb_index_map.size(), spacing, model, zone_map)) return EXIT_FAILURE; - if (writeArbLatticeNodes(geo, model, zone_map, lin_to_arb_index_map, void_bmp, file, spacing)) return EXIT_FAILURE; - return EXIT_SUCCESS; + return writeArbLatticeNodes(geo, model, zone_map, lin_to_arb_index_map, bulk_bmp, file, spacing); } static int writeArbXml(const Solver& solver, const Geometry& geo, const Model& model, const std::string& cxn_path) { @@ -171,8 +179,8 @@ static int writeArbXml(const Solver& solver, const Geometry& geo, const Model& m return restartfile.save_file(filename.c_str()) ? EXIT_SUCCESS : EXIT_FAILURE; } -static long int liRegionSize(const lbRegion& region) { - const long int nx = region.nx, ny = region.ny, nz = region.nz; +static long liRegionSize(const lbRegion& region) { + const long nx = region.nx, ny = region.ny, nz = region.nz; return nx * ny * nz; } @@ -186,14 +194,12 @@ int toArbitrary(const Solver& solver, const Geometry& geo, const Model& model) { bulk_flag = ntf.flag; bulk_mask = ntf.group_flag; } - auto bulk_bmp = makeBulkBmp(geo, bulk_mask, bulk_flag); - const auto void_bmp = makeVoidBmp(geo.totalregion, bulk_bmp); - bulk_bmp = {}; // Explicitly free memory for subsequent stages - const auto id_map = makeArbLatticeIndexMap(geo.totalregion, void_bmp); + const auto bulk_bmp = makeBulkBmp(geo, bulk_mask, bulk_flag); + const auto id_map = makeArbLatticeIndexMap(geo.totalregion, bulk_bmp); output("Interior size: %lu / %li", id_map.size(), liRegionSize(geo.totalregion)); const auto filename = solver.outGlobalFile("ARB", ".cxn"); const double spacing = 1 / solver.units.alt("m"); output("Writing arbitrary lattice data to %s...", filename.c_str()); - if (writeArbLattice(geo, model, solver.setting_zones, id_map, void_bmp, filename, spacing)) return EXIT_FAILURE; + if (writeArbLattice(geo, model, solver.setting_zones, id_map, bulk_bmp, filename, spacing)) return EXIT_FAILURE; return writeArbXml(solver, geo, model, filename); } From 4e5a626e4d536161b457a9e6c12e332c06bb6912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Fri, 29 Dec 2023 12:40:21 +0100 Subject: [PATCH 4/9] Implemented failcheck handler for arbitrary grid --- src/Handlers/cbFailcheck.cpp | 168 +++++++++++++++-------------------- 1 file changed, 73 insertions(+), 95 deletions(-) diff --git a/src/Handlers/cbFailcheck.cpp b/src/Handlers/cbFailcheck.cpp index 135f84667..f05c16e75 100644 --- a/src/Handlers/cbFailcheck.cpp +++ b/src/Handlers/cbFailcheck.cpp @@ -2,102 +2,80 @@ std::string cbFailcheck::xmlname = "Failcheck"; -int cbFailcheck::Init () { - Callback::Init(); - currentlyactive = false; - const auto lattice = solver->getCartLattice(); - reg.dx = lattice->getLocalRegion().dx; - reg.dy = lattice->getLocalRegion().dy; - reg.dz = lattice->getLocalRegion().dz; - - - - pugi::xml_attribute attr = node.attribute("dx"); - if (attr) { - reg.dx = solver->units.alt(attr.value()); - } - attr = node.attribute("dy"); - if (attr) { - reg.dy = solver->units.alt(attr.value()); - } - attr = node.attribute("dz"); - if (attr) { - reg.dz = solver->units.alt(attr.value()); - } - - - attr = node.attribute("nx"); - if (attr) { - reg.nx = solver->units.alt(attr.value()); - } - attr = node.attribute("ny"); - if (attr) { - reg.ny = solver->units.alt(attr.value()); - } - attr = node.attribute("nz"); - if (attr) { - reg.nz = solver->units.alt(attr.value()); - } - - return 0; - } - - -int cbFailcheck::DoIt () { - Callback::DoIt(); - if (currentlyactive) return 0; - currentlyactive = true; - int ret = 0; - int fin; - fin = false; - - pugi::xml_attribute comp = node.attribute("what"); - - name_set components; - if(comp){ - components.add_from_string(comp.value(),','); - } else { - components.add_from_string("all",','); - } - - for (const Model::Quantity& it : solver->lattice->model->quantities) { - if (it.isAdjoint) continue; - if (components.in(it.name)) { - int comp = 1; - if (it.isVector) comp = 3; - real_t* tmp = new real_t[reg.size()*comp]; - solver->getCartLattice()->GetQuantity(it.id, reg, tmp, 1); - bool cond = false; - for (int k = 0; k < reg.size()*comp; k++){ - cond = cond || (std::isnan(tmp[k])); - } - delete[] tmp; - MPI_Allreduce(&cond,&fin,1,MPI_INT,MPI_LOR,MPMD.local); - - if(fin ){ - notice("Checking %s discovered NaN", it.name.c_str()); - break; - } - } - +int cbFailcheck::Init() { + Callback::Init(); + currentlyactive = false; + const auto init_cart = [&](const Lattice* lattice) { + reg.dx = lattice->getLocalRegion().dx; + reg.dy = lattice->getLocalRegion().dy; + reg.dz = lattice->getLocalRegion().dz; + const auto set_if_present = [&](const char* name, auto& value) { + const auto attribute = node.attribute(name); + if (attribute) value = solver->units.alt(attribute.value()); + }; + set_if_present("dx", reg.dx); + set_if_present("dy", reg.dy); + set_if_present("dz", reg.dz); + set_if_present("nx", reg.nx); + set_if_present("ny", reg.ny); + set_if_present("nz", reg.nz); + return EXIT_SUCCESS; + }; + const auto init_arb = [&](const Lattice*) { return EXIT_SUCCESS; }; + return std::visit(OverloadSet{init_cart, init_arb}, solver->getLatticeVariant()); +} + +int cbFailcheck::DoIt() { + Callback::DoIt(); + if (currentlyactive) return EXIT_SUCCESS; + currentlyactive = true; + + name_set components; + const auto comp = node.attribute("what"); + components.add_from_string(comp ? comp.value() : "all", ','); + + const auto check_for_nans = [&](const Model::Quantity& quantity) -> bool { + if (!components.in(quantity.name) || quantity.isAdjoint) return false; + + const auto get_quantity_vec = [&](int quant_id, int n_comps) -> std::vector { + const auto get_from_cart = [&](Lattice* lattice) { + std::vector retval(reg.size() * n_comps); + lattice->GetQuantity(quant_id, reg, retval.data(), 1.); + return retval; + }; + const auto get_from_arb = [&](Lattice* lattice) { + std::vector retval(lattice->getLocalSize() * n_comps); + lattice->getQuantity(quant_id, retval.data(), 1.); + return retval; + }; + return std::visit(OverloadSet{get_from_cart, get_from_arb}, solver->getLatticeVariant()); + }; + + const int n_comps = quantity.isVector ? 3 : 1; + const auto values = get_quantity_vec(quantity.id, n_comps); + int has_nans = std::any_of(values.begin(), values.end(), [](auto v) { return std::isnan(v); }); + MPI_Allreduce(MPI_IN_PLACE, &has_nans, 1, MPI_INT, MPI_LOR, MPMD.local); + if (has_nans) notice("Discovered NaN values in %s", quantity.name.c_str()); + return has_nans; + }; + + // Note: std::any_of would break early, we want to print all quantities which have NaN values, hence std::transform_reduce + const auto& quants = solver->lattice->model->quantities; + if (std::transform_reduce(quants.begin(), quants.end(), false, std::logical_or{}, check_for_nans)) { + notice("NaN value discovered. Executing final actions from the Failcheck element before full stop...\n"); + for (pugi::xml_node par = node.first_child(); par; par = par.next_sibling()) { + Handler hand(par, solver); + if (hand) hand.DoIt(); } - if (fin) { - notice("NaN value discovered. Executing final actions from the Failcheck element before full stop...\n"); - for (pugi::xml_node par = node.first_child(); par; par = par.next_sibling()) { - Handler hand(par, solver); - if (hand) hand.DoIt(); - } - notice("Stopping due to Nan value\n"); - ret = ITERATION_STOP; - } - return ret; - } - - -int cbFailcheck::Finish () { - return Callback::Finish(); - } + notice("Stopping due to NaN value\n"); + return ITERATION_STOP; + } + return EXIT_SUCCESS; +} +int cbFailcheck::Finish() { + return Callback::Finish(); +} // Register the handler (basing on xmlname) in the Handler Factory -template class HandlerFactory::Register< GenericAsk< cbFailcheck > >; +template class HandlerFactory::Register >; From 6bf10c4de6f8bfae1690e4e36febcc8d4ee3482a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Fri, 29 Dec 2023 18:16:54 +0100 Subject: [PATCH 5/9] Strategy selection for computing the local permutation --- src/ArbLattice.cpp | 55 +++++++++++++++++++++++++++++++++++----------- src/ArbLattice.hpp | 5 +++-- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/ArbLattice.cpp b/src/ArbLattice.cpp index 52fd5d08b..f67276664 100644 --- a/src/ArbLattice.cpp +++ b/src/ArbLattice.cpp @@ -42,7 +42,7 @@ void ArbLattice::initialize(size_t num_snaps_, const std::map& if (connect.getLocalSize() == 0) throw std::runtime_error{"At least one MPI rank has an empty partition, please use fewer MPI ranks"}; // Realistically, this should never happen computeGhostNodes(); - computeLocalPermutation(); + computeLocalPermutation(arb_node, setting_zones); allocDeviceMemory(); initDeviceData(arb_node, setting_zones); local_bounding_box = getLocalBoundingBox(); @@ -230,9 +230,41 @@ void ArbLattice::computeGhostNodes() { std::sort(ghost_nodes.begin(), ghost_nodes.end()); } -void ArbLattice::computeLocalPermutation() { - std::vector lids; // globalIdx - chunk_begin of elements - lids.resize(connect.getLocalSize()); +enum struct PermStrategy { None, Type, Coords, Both }; + +std::function ArbLattice::makePermCompare(pugi::xml_node arb_node, const std::map& setting_zones) { + // Note: copies of these closures will outlive the function call, careful with the captures (capture by copy) + const auto get_zyx = [this](int lid) { return std::array{connect.coord(2, lid), connect.coord(1, lid), connect.coord(0, lid)}; }; + const auto get_nt = [this](int lid) { return node_types_host.at(lid); }; + const auto get_nt_zyx = [get_zyx, get_nt](int lid) { return std::make_pair(get_nt(lid), get_zyx(lid)); }; + constexpr auto wrap_projection_as_comparison = [](const auto& proj) { return [proj](int lid1, int lid2) { return proj(lid1) < proj(lid2); }; }; + + using namespace std::string_view_literals; + static const auto strat_map = std::unordered_map{{"none"sv, PermStrategy::None}, + {"type"sv, PermStrategy::Type}, + {"coords"sv, PermStrategy::Coords}, + {"default"sv, PermStrategy::Both}, + {""sv, PermStrategy::Both}}; + const std::string_view strat_str = arb_node.attribute("permutation").value(); + const auto enum_it = strat_map.find(strat_str); + if (enum_it == strat_map.end()) throw std::runtime_error{"Unrecognized permutation strategy"}; + const auto strat = enum_it->second; + if (strat == PermStrategy::Type || strat == PermStrategy::Both) computeNodeTypesOnHost(arb_node, setting_zones, /*permute*/ false); + switch (strat) { + case PermStrategy::None: // Use initial ordering + return std::less{}; + case PermStrategy::Type: // Sort by node type only + return wrap_projection_as_comparison(get_nt); + case PermStrategy::Coords: // Sort by coordinates only + return wrap_projection_as_comparison(get_zyx); + case PermStrategy::Both: // Sort by node type, then coordinates + return wrap_projection_as_comparison(get_nt_zyx); + } + return {}; // avoid compiler warning +} + +void ArbLattice::computeLocalPermutation(pugi::xml_node arb_node, const std::map& setting_zones) { + std::vector lids(connect.getLocalSize()); // globalIdx - chunk_begin of elements std::iota(lids.begin(), lids.end(), 0); const auto is_border_node = [&](int lid) { for (size_t q = 0; q != Q; ++q) { @@ -243,12 +275,9 @@ void ArbLattice::computeLocalPermutation() { }; const auto interior_begin = std::stable_partition(lids.begin(), lids.end(), is_border_node); sizes.border_nodes = static_cast(std::distance(lids.begin(), interior_begin)); - const auto by_zyx = [&](int lid1, int lid2) { // Sort by z, then y, then x -> this should help with coalesced memory access - const auto get_zyx = [&](int lid) { return std::array{connect.coord(2, lid), connect.coord(1, lid), connect.coord(0, lid)}; }; - return get_zyx(lid1) < get_zyx(lid2); - }; - std::sort(lids.begin(), interior_begin, by_zyx); - std::sort(interior_begin, lids.end(), by_zyx); + const auto compare = makePermCompare(arb_node, setting_zones); + std::sort(lids.begin(), interior_begin, compare); + std::sort(interior_begin, lids.end(), compare); local_permutation.resize(connect.getLocalSize()); size_t i = 0; for (const auto& lid : lids) local_permutation[lid] = i++; @@ -296,7 +325,7 @@ std::vector ArbLattice::parseBrushFromXml(pugi::xml_n return retval; } -void ArbLattice::computeNodeTypesOnHost(pugi::xml_node arb_node, const std::map& setting_zones) { +void ArbLattice::computeNodeTypesOnHost(pugi::xml_node arb_node, const std::map& setting_zones, bool permute) { const auto local_sz = connect.getLocalSize(); const auto zone_sizes = Span(connect.zones_per_node.get(), local_sz); auto zone_offsets = std::vector(local_sz); @@ -310,7 +339,7 @@ void ArbLattice::computeNodeTypesOnHost(pugi::xml_node arb_node, const std::map< const auto point = std::array{connect.coord(0, i), connect.coord(1, i), connect.coord(2, i)}; for (const auto& [pred, mask, val] : brushes) if (pred(labels, point)) { - auto& dest = node_types_host[local_permutation[i]]; + auto& dest = node_types_host[permute ? local_permutation[i] : i]; dest = (dest & ~mask) | val; } } @@ -356,7 +385,7 @@ std::pmr::vector ArbLattice::computeNeighbors() const { void ArbLattice::initDeviceData(pugi::xml_node arb_node, const std::map& setting_zones) { fillWithStorageNaNAsync(snaps_device.get(), sizes.snaps_pitch * sizes.snaps * NF, inStream); - computeNodeTypesOnHost(arb_node, setting_zones); + computeNodeTypesOnHost(arb_node, setting_zones, /*permute*/ true); copyVecToDeviceAsync(node_types_device.get(), node_types_host, inStream); const auto nbrs = computeNeighbors(); copyVecToDeviceAsync(neighbors_device.get(), nbrs, inStream); diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp index 05328df89..2c2c2c0e1 100644 --- a/src/ArbLattice.hpp +++ b/src/ArbLattice.hpp @@ -122,11 +122,12 @@ class ArbLattice : public LatticeBase { void initialize(size_t num_snaps_, const std::map& setting_zones, pugi::xml_node arb_node); /// Init based on args void readFromCxn(const std::string& cxn_path); /// Read the lattice info from a .cxn file void partition(); /// Repartition the lattice, if ParMETIS is not present this is a noop - void computeLocalPermutation(); /// Compute the local permutation, see comment at the top + std::function makePermCompare(pugi::xml_node arb_node, const std::map& setting_zones); /// Make type-erased comparison operator for computing the local permutation, according to the strategy specified in the xml file + void computeLocalPermutation(pugi::xml_node arb_node, const std::map& setting_zones); /// Compute the local permutation, see comment at the top void computeGhostNodes(); /// Retrieve GIDs of ghost nodes from the connectivity info structure void allocDeviceMemory(); /// Allocate required device memory std::vector parseBrushFromXml(pugi::xml_node arb_node, const std::map& setting_zones) const; /// Parse the arbitrary lattice XML to determine the brush sequence to be applied to each node - void computeNodeTypesOnHost(pugi::xml_node arb_node, const std::map& setting_zones); /// Compute the node types to be stored on the device + void computeNodeTypesOnHost(pugi::xml_node arb_node, const std::map& setting_zones, bool permute); /// Compute the node types to be stored on the device, `permute` enables better code reuse std::pmr::vector computeCoords() const; /// Compute the coordinates 2D array to be stored on the device std::pmr::vector computeNeighbors() const; /// Compute the neighbors 2D array to be stored on the device void initDeviceData(pugi::xml_node arb_node, const std::map& setting_zones); /// Initialize data residing in device memory From 68eade25c266073c72121a15d32f5da015c641cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Sun, 31 Dec 2023 11:10:15 +0100 Subject: [PATCH 6/9] Default permutation strategy changed to "coords" --- src/ArbLattice.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ArbLattice.cpp b/src/ArbLattice.cpp index f67276664..4c98e4481 100644 --- a/src/ArbLattice.cpp +++ b/src/ArbLattice.cpp @@ -230,8 +230,6 @@ void ArbLattice::computeGhostNodes() { std::sort(ghost_nodes.begin(), ghost_nodes.end()); } -enum struct PermStrategy { None, Type, Coords, Both }; - std::function ArbLattice::makePermCompare(pugi::xml_node arb_node, const std::map& setting_zones) { // Note: copies of these closures will outlive the function call, careful with the captures (capture by copy) const auto get_zyx = [this](int lid) { return std::array{connect.coord(2, lid), connect.coord(1, lid), connect.coord(0, lid)}; }; @@ -239,16 +237,18 @@ std::function ArbLattice::makePermCompare(pugi::xml_node arb_nod const auto get_nt_zyx = [get_zyx, get_nt](int lid) { return std::make_pair(get_nt(lid), get_zyx(lid)); }; constexpr auto wrap_projection_as_comparison = [](const auto& proj) { return [proj](int lid1, int lid2) { return proj(lid1) < proj(lid2); }; }; - using namespace std::string_view_literals; - static const auto strat_map = std::unordered_map{{"none"sv, PermStrategy::None}, - {"type"sv, PermStrategy::Type}, - {"coords"sv, PermStrategy::Coords}, - {"default"sv, PermStrategy::Both}, - {""sv, PermStrategy::Both}}; + enum struct PermStrategy { None, Type, Coords, Both }; + static const std::unordered_map strat_map = {{"none", PermStrategy::None}, + {"type", PermStrategy::Type}, + {"coords", PermStrategy::Coords}, + {"both", PermStrategy::Both}, + {"", PermStrategy::Coords}}; // "" is the default const std::string_view strat_str = arb_node.attribute("permutation").value(); const auto enum_it = strat_map.find(strat_str); - if (enum_it == strat_map.end()) throw std::runtime_error{"Unrecognized permutation strategy"}; + if (enum_it == strat_map.end()) + throw std::runtime_error{"Unknown permutation strategy for ArbitraryLattice, valid values are: none, type, coords (default), both"}; const auto strat = enum_it->second; + if (strat == PermStrategy::Type || strat == PermStrategy::Both) computeNodeTypesOnHost(arb_node, setting_zones, /*permute*/ false); switch (strat) { case PermStrategy::None: // Use initial ordering From 660cb830f5df3fddc7f55099ad84a5fb40d0387f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Sun, 31 Dec 2023 12:14:41 +0100 Subject: [PATCH 7/9] BIN handler for arb lattice --- src/Handlers/cbBIN.cpp | 27 +++++++++------------ src/vtkLattice.cpp | 55 +++++++++++++++++++++++++++++++++--------- src/vtkLattice.h | 1 + 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/src/Handlers/cbBIN.cpp b/src/Handlers/cbBIN.cpp index e3c01a0e8..94b6521cf 100644 --- a/src/Handlers/cbBIN.cpp +++ b/src/Handlers/cbBIN.cpp @@ -3,21 +3,18 @@ std::string cbBIN::xmlname = "BIN"; #include "../HandlerFactory.h" #include "../vtkLattice.h" -int cbBIN::Init () { - Callback::Init(); - pugi::xml_attribute attr = node.attribute("name"); - nm = "BIN"; - if (attr) nm = attr.value(); - return 0; - } - - -int cbBIN::DoIt () { - Callback::DoIt(); - const auto filename = solver->outIterFile(nm, ""); - return binWriteLattice(filename, *solver->getCartLattice(), solver->units); - }; +int cbBIN::Init() { + Callback::Init(); + pugi::xml_attribute attr = node.attribute("name"); + nm = attr ? attr.value() : "BIN"; + return 0; +} +int cbBIN::DoIt() { + Callback::DoIt(); + const auto filename = solver->outIterFile(nm, ""); + return std::visit([&](const auto lattice_ptr) { return binWriteLattice(filename, *lattice_ptr, solver->units); }, solver->getLatticeVariant()); +}; // Register the handler (basing on xmlname) in the Handler Factory -template class HandlerFactory::Register< GenericAsk< cbBIN > >; +template class HandlerFactory::Register >; diff --git a/src/vtkLattice.cpp b/src/vtkLattice.cpp index bfe97fb85..14b6331ce 100644 --- a/src/vtkLattice.cpp +++ b/src/vtkLattice.cpp @@ -12,7 +12,22 @@ int vtkWriteLattice(const std::string& filename, CartLattice& lattice, const Uni const lbRegion& local_reg = lattice.getLocalRegion(); const lbRegion reg = local_reg.intersect(total_output_reg); size_t size = reg.size(); - myprint(1, -1, "Writing region %dx%dx%d + %d,%d,%d (size %d) from %dx%dx%d + %d,%d,%d", reg.nx, reg.ny, reg.nz, reg.dx, reg.dy, reg.dz, size, local_reg.nx, local_reg.ny, local_reg.nz, local_reg.dx, local_reg.dy, local_reg.dz); + myprint(1, + -1, + "Writing region %dx%dx%d + %d,%d,%d (size %d) from %dx%dx%d + %d,%d,%d", + reg.nx, + reg.ny, + reg.nz, + reg.dx, + reg.dy, + reg.dz, + size, + local_reg.nx, + local_reg.ny, + local_reg.nz, + local_reg.dx, + local_reg.dy, + local_reg.dz); vtkFileOut vtkFile(MPMD.local); if (vtkFile.Open(filename.c_str())) return -1; @@ -50,8 +65,10 @@ int vtkWriteLattice(const std::string& filename, CartLattice& lattice, const Uni int vtuWriteLattice(const std::string& filename, ArbLattice& lattice, const UnitEnv& units, const name_set& what) { try { const auto& [num_cells, num_points, coords, verts] = lattice.getVTUGeom(); - const bool has_scalars = std::find_if(lattice.model->quantities.begin(), lattice.model->quantities.end(), [](const auto& q) { return !q.isVector; }) != lattice.model->quantities.end(); - const bool has_vectors = std::find_if(lattice.model->quantities.begin(), lattice.model->quantities.end(), [](const auto& q) { return q.isVector; }) != lattice.model->quantities.end(); + const bool has_scalars = std::find_if(lattice.model->quantities.begin(), lattice.model->quantities.end(), [](const auto& q) { return !q.isVector; }) != + lattice.model->quantities.end(); + const bool has_vectors = std::find_if(lattice.model->quantities.begin(), lattice.model->quantities.end(), [](const auto& q) { return q.isVector; }) != + lattice.model->quantities.end(); VtkFileOut vtu_file(filename, num_cells, num_points, coords.get(), verts.get(), MPMD.local, has_scalars, has_vectors); const auto node_types_view = lattice.getNodeTypes(); @@ -84,23 +101,39 @@ int vtuWriteLattice(const std::string& filename, ArbLattice& lattice, const Unit int binWriteLattice(const std::string& filename, CartLattice& lattice, const UnitEnv& units) { const lbRegion& reg = lattice.getLocalRegion(); - FILE* f; - int size = reg.size(); + const int size = reg.size(); + const auto tmp = std::make_unique(lattice.getLocalSize() * 3); // Allocate for vector quantities and reuse for (const Model::Quantity& it : lattice.model->quantities) { - int comp = 1; - if (it.isVector) comp = 3; - auto tmp = std::make_unique(size * comp); + const int comp = it.isVector ? 3 : 1; lattice.GetQuantity(it.id, reg, tmp.get(), 1); const auto fn = formatAsString("%s.%s.bin", filename, it.name); - f = fopen(fn.c_str(), "w"); + auto f = fopen(fn.c_str(), "w"); if (f == NULL) { ERROR("Cannot open file: %s\n", fn.c_str()); - return -1; + return EXIT_FAILURE; } fwrite(tmp.get(), sizeof(real_t) * comp, size, f); fclose(f); } - return 0; + return EXIT_SUCCESS; +} + +int binWriteLattice(const std::string& filename, ArbLattice& lattice, const UnitEnv& units) { + const size_t size = lattice.getLocalSize(); + const auto tmp = std::make_unique(size * 3); // Allocate for vector quantities and reuse + for (const Model::Quantity& it : lattice.model->quantities) { + const int comp = it.isVector ? 3 : 1; + lattice.getQuantity(it.id, tmp.get(), 1.); + const auto fn = formatAsString("%s.%s.bin", filename, it.name); + auto f = fopen(fn.c_str(), "w"); + if (f == NULL) { + ERROR("Cannot open file: %s\n", fn.c_str()); + return EXIT_FAILURE; + } + fwrite(tmp.get(), sizeof(real_t) * comp, size, f); + fclose(f); + } + return EXIT_SUCCESS; } inline int txtWriteElement(FILE* f, float tmp) { diff --git a/src/vtkLattice.h b/src/vtkLattice.h index b25bcf013..38fcd3150 100644 --- a/src/vtkLattice.h +++ b/src/vtkLattice.h @@ -14,6 +14,7 @@ int vtkWriteLattice(const std::string& filename, CartLattice& lattice, const UnitEnv&, const name_set& s, const lbRegion& region); int vtuWriteLattice(const std::string& filename, ArbLattice& lattice, const UnitEnv&, const name_set& s); int binWriteLattice(const std::string& filename, CartLattice& lattice, const UnitEnv& units); +int binWriteLattice(const std::string& filename, ArbLattice& lattice, const UnitEnv& units); int txtWriteLattice(const std::string& filename, CartLattice& lattice, const UnitEnv&, const name_set& s, int type); void screenDumpLattice(const CartLattice& lattice); int initMean(const std::string& filename); From 5d7635b0aec5dc67857978444bede0047d1e3b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Sun, 31 Dec 2023 15:24:28 +0100 Subject: [PATCH 8/9] Added TXT handler for arb lattice --- src/ArbLattice.hpp | 2 ++ src/Handlers/cbTXT.cpp | 43 +++++++++-------------- src/vtkLattice.cpp | 77 ++++++++++++++++++++++++++++++++++++++++-- src/vtkLattice.h | 1 + 4 files changed, 93 insertions(+), 30 deletions(-) diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp index 2c2c2c0e1..9c7083bbd 100644 --- a/src/ArbLattice.hpp +++ b/src/ArbLattice.hpp @@ -85,6 +85,8 @@ class ArbLattice : public LatticeBase { void getQuantity(int quant, real_t* host_tab, real_t scale); /// Write GPU data to \p host_tab const ArbVTUGeom& getVTUGeom() const { return vtu_geom; } Span getNodeTypes() const { return {node_types_host.data(), node_types_host.size()}; } /// Get host view of node types (permuted) + const ArbLatticeConnectivity& getConnectivity() const { return connect; } + const std::vector& getLocalPermutation() const { return local_permutation; } protected: ArbLatticeLauncher launcher; /// Launcher responsible for running CUDA kernels on the lattice diff --git a/src/Handlers/cbTXT.cpp b/src/Handlers/cbTXT.cpp index 509751d8a..3e4948a04 100644 --- a/src/Handlers/cbTXT.cpp +++ b/src/Handlers/cbTXT.cpp @@ -3,33 +3,22 @@ std::string cbTXT::xmlname = "TXT"; #include "../HandlerFactory.h" #include "../vtkLattice.h" -int cbTXT::Init () { - Callback::Init(); - pugi::xml_attribute attr = node.attribute("name"); - nm = "TXT"; - if (attr) nm = attr.value(); - attr = node.attribute("what"); - if (attr) { - s.add_from_string(attr.value(),','); - } else { - s.add_from_string("all",','); - } - gzip = false; - attr = node.attribute("gzip"); - if (attr) gzip = attr.as_bool(); - txt_type = 0; - if (gzip) txt_type = 1; - return 0; - } - - -int cbTXT::DoIt () { - Callback::DoIt(); - const auto filename = solver->outIterFile(nm, ""); - auto& lattice = *solver->getCartLattice(); - return txtWriteLattice(filename.c_str(), lattice, solver->units, s, txt_type); - }; +int cbTXT::Init() { + Callback::Init(); + auto attr = node.attribute("name"); + nm = attr ? attr.value() : "TXT"; + attr = node.attribute("what"); + s.add_from_string(attr ? attr.value() : "all", ','); + gzip = node.attribute("gzip").as_bool(); + txt_type = gzip ? 1 : 0; + return EXIT_SUCCESS; +} +int cbTXT::DoIt() { + Callback::DoIt(); + const auto filename = solver->outIterFile(nm, ""); + return std::visit([&](const auto lattice_ptr) { return txtWriteLattice(filename, *lattice_ptr, solver->units, s, txt_type); }, solver->getLatticeVariant()); +}; // Register the handler (basing on xmlname) in the Handler Factory -template class HandlerFactory::Register< GenericAsk< cbTXT > >; +template class HandlerFactory::Register >; diff --git a/src/vtkLattice.cpp b/src/vtkLattice.cpp index 14b6331ce..63e8034bb 100644 --- a/src/vtkLattice.cpp +++ b/src/vtkLattice.cpp @@ -182,6 +182,7 @@ int txtWriteLattice(const std::string& filename, CartLattice& lattice, const Uni fclose(f); } + auto tmp = std::make_unique(size * 3); for (const Model::Quantity& it : lattice.model->quantities) { if (what.in(it.name)) { const auto fn = formatAsString("%s_%s.txt", filename, it.name); @@ -202,9 +203,7 @@ int txtWriteLattice(const std::string& filename, CartLattice& lattice, const Uni ERROR("Cannot open file: %s\n", fn.c_str()); return -1; } - double v = units.alt(it.unit); - auto tmp = std::make_unique(size); - lattice.GetQuantity(it.id, reg, tmp.get(), 1 / v); + lattice.GetQuantity(it.id, reg, tmp.get(), 1. / units.alt(it.unit)); txtWriteField(f, tmp.get(), reg.nx, size); fclose(f); } @@ -212,3 +211,75 @@ int txtWriteLattice(const std::string& filename, CartLattice& lattice, const Uni return 0; } + +int txtWriteLattice(const std::string& filename, ArbLattice& lattice, const UnitEnv& units, const name_set& what, int type) { + const size_t size = lattice.getLocalSize(); + if (D_MPI_RANK == 0) { + const auto fn = formatAsString("%s_info.txt", filename); + FILE* f = fopen(fn.c_str(), "w"); + if (f == NULL) { + ERROR("Cannot open file: %s\n", fn.c_str()); + return EXIT_FAILURE; + } + fprintf(f, "dx: %lg\n", 1 / units.alt("m")); + fprintf(f, "dt: %lg\n", 1 / units.alt("s")); + fprintf(f, "dm: %lg\n", 1 / units.alt("kg")); + fprintf(f, "dT: %lg\n", 1 / units.alt("K")); + fprintf(f, "size: %lu\n", size); + fclose(f); + } + + const auto open_out_stream = [&](const std::string& fn) -> FILE* { + switch (type) { + case 0: + return fopen(fn.c_str(), "w"); + case 1: { + const auto com = formatAsString("gzip > %s.gz", fn); + return popen(com.c_str(), "w"); + } + default: + ERROR("Unknown type in txtWriteLattice\n"); + return nullptr; + } + }; + const auto tmp = std::make_unique(size * 3); // allocate for vector quantity and reuse + const auto write_tmp_to_file = [&](const std::string& suffix, bool is_vector) { + const auto fn = formatAsString("%s_%s.txt", filename, suffix); + auto f = open_out_stream(fn); + if (!f) { + ERROR("Cannot open file: %s\n", fn.c_str()); + return EXIT_FAILURE; + } + for (size_t i = 0, n = 0; n != size; ++n) { + if (is_vector) { + txtWriteElement(f, tmp[i++]); + fprintf(f, " "); + txtWriteElement(f, tmp[i++]); + fprintf(f, " "); + txtWriteElement(f, tmp[i++]); + fprintf(f, "\n"); + } else { + txtWriteElement(f, tmp[n]); + fprintf(f, "\n"); + } + } + fclose(f); + return EXIT_SUCCESS; + }; + + for (const auto& quantity : lattice.model->quantities) { + if (what.in(quantity.name)) { + lattice.getQuantity(quantity.id, tmp.get(), 1. / units.alt(quantity.unit)); + if (write_tmp_to_file(quantity.name, quantity.isVector)) return EXIT_FAILURE; + } + } + + for (size_t i = 0; i != size; ++i) { + const auto perm_ind = lattice.getLocalPermutation()[i]; + for (size_t dim = 0; dim != 3; ++dim) { + const size_t dest_ind = 3 * perm_ind + dim; + tmp[dest_ind] = lattice.getConnectivity().coord(dim, i); + } + } + return write_tmp_to_file("coords", true); +} diff --git a/src/vtkLattice.h b/src/vtkLattice.h index 38fcd3150..5834110fc 100644 --- a/src/vtkLattice.h +++ b/src/vtkLattice.h @@ -16,6 +16,7 @@ int vtuWriteLattice(const std::string& filename, ArbLattice& lattice, const Unit int binWriteLattice(const std::string& filename, CartLattice& lattice, const UnitEnv& units); int binWriteLattice(const std::string& filename, ArbLattice& lattice, const UnitEnv& units); int txtWriteLattice(const std::string& filename, CartLattice& lattice, const UnitEnv&, const name_set& s, int type); +int txtWriteLattice(const std::string& filename, ArbLattice& lattice, const UnitEnv&, const name_set& s, int type); void screenDumpLattice(const CartLattice& lattice); int initMean(const std::string& filename); int writeMean(const std::string& filename, const CartLattice& lattice, int, int iter, double); From e02f0717347ec2ad596ef2b120741bb1f8edb970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ga=C5=82ecki?= Date: Sun, 31 Dec 2023 17:32:03 +0100 Subject: [PATCH 9/9] Added facilities for saving/loading the solution to/from binary files, with corresponding handlers --- src/ArbLattice.cpp | 73 ++++++++++++--- src/ArbLattice.hpp | 1 + src/Handlers/acLoadMemoryDump.cpp | 31 +++---- src/Handlers/cbSaveBinary.cpp | 52 +++++------ src/Handlers/cbSaveCheckpoint.cpp | 146 ++++++++++++++---------------- src/Handlers/cbSaveMemoryDump.cpp | 47 +++++----- src/Lists.h.Rt | 8 ++ 7 files changed, 198 insertions(+), 160 deletions(-) diff --git a/src/ArbLattice.cpp b/src/ArbLattice.cpp index 4c98e4481..8d7fd8e51 100644 --- a/src/ArbLattice.cpp +++ b/src/ArbLattice.cpp @@ -678,22 +678,73 @@ void ArbLattice::MPIStream_B() { CudaStreamSynchronize(inStream); } -/// TODO section -int ArbLattice::loadComp(const std::string& filename, const std::string& comp) { - throw std::runtime_error{"UNIMPLEMENTED"}; - return -1; +static int saveImpl(const std::string& filename, const storage_t* device_ptr, size_t size) { + std::pmr::vector tab(size); + CudaMemcpy(tab.data(), device_ptr, size * sizeof(storage_t), CudaMemcpyDeviceToHost); + auto file = fopen(filename.c_str(), "wb"); + if (!file) { + const auto err_msg = std::string("Failed to open ") + filename + " for writing"; + ERROR(err_msg.c_str()); + return EXIT_FAILURE; + } + const auto n_written = fwrite(tab.data(), sizeof(storage_t), size, file); + fclose(file); + if (n_written != size) { + const auto err_msg = std::string("Error writing to ") + filename; + ERROR(err_msg.c_str()); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; } -int ArbLattice::saveComp(const std::string& filename, const std::string& comp) const { - throw std::runtime_error{"UNIMPLEMENTED"}; - return -1; + +static int loadImpl(const std::string& filename, storage_t* device_ptr, size_t size) { + auto file = fopen(filename.c_str(), "rb"); + if (!file) { + const auto err_msg = std::string("Failed to open ") + filename + " for reading"; + ERROR(err_msg.c_str()); + return EXIT_FAILURE; + } + std::pmr::vector tab(size); + const auto n_read = fread(tab.data(), sizeof(storage_t), size, file); + fclose(file); + if (n_read != size) { + const auto err_msg = std::string("Error reading from ") + filename; + ERROR(err_msg.c_str()); + return EXIT_FAILURE; + } + CudaMemcpy(device_ptr, tab.data(), size * sizeof(storage_t), CudaMemcpyHostToDevice); + return EXIT_SUCCESS; } + +void ArbLattice::savePrimal(const std::string& filename, int snap_ind) const { + if (saveImpl(filename, getSnapPtr(snap_ind), sizes.snaps_pitch * NF)) throw std::runtime_error{"savePrimal failed"}; +} + int ArbLattice::loadPrimal(const std::string& filename, int snap_ind) { - throw std::runtime_error{"UNIMPLEMENTED"}; - return -1; + return loadImpl(filename, getSnapPtr(snap_ind), sizes.snaps_pitch * NF); } -void ArbLattice::savePrimal(const std::string& filename, int snap_ind) const { - throw std::runtime_error{"UNIMPLEMENTED"}; + +int ArbLattice::loadComp(const std::string& filename, const std::string& comp) { + const int comp_ind = Model_m::lookupFieldIndexByName(comp); + if (comp_ind == -1) { + const auto err_msg = std::string("ArbLattice::loadComp called with unknown component: ") + comp; + error(err_msg.c_str()); + return EXIT_FAILURE; + } + return loadImpl(filename, std::next(getSnapPtr(Snap), comp_ind * sizes.snaps_pitch), sizes.snaps_pitch); } + +int ArbLattice::saveComp(const std::string& filename, const std::string& comp) const { + const int comp_ind = Model_m::lookupFieldIndexByName(comp); + if (comp_ind == -1) { + const auto err_msg = std::string("ArbLattice::saveComp called with unknown component: ") + comp; + error(err_msg.c_str()); + return EXIT_FAILURE; + } + return saveImpl(filename, std::next(getSnapPtr(Snap), comp_ind * sizes.snaps_pitch), sizes.snaps_pitch); +} + +/// TODO section #ifdef ADJOINT int ArbLattice::loadAdj(const std::string& filename, int asnap_ind) { throw std::runtime_error{"UNIMPLEMENTED"}; diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp index 9c7083bbd..681113b80 100644 --- a/src/ArbLattice.hpp +++ b/src/ArbLattice.hpp @@ -107,6 +107,7 @@ class ArbLattice : public LatticeBase { }; storage_t* getSnapPtr(int snap_ind); /// Get device pointer to the specified snap (somewhere within the total snap allocation) + const storage_t* getSnapPtr(int snap_ind) const { return const_cast(this)->getSnapPtr(snap_ind); } #ifdef ADJOINT storage_t* getAdjointSnapPtr(int snap_ind); /// Get device pointer to the specified adjoint snap, snap_ind must be 0 or 1 #endif diff --git a/src/Handlers/acLoadMemoryDump.cpp b/src/Handlers/acLoadMemoryDump.cpp index d2238ac2c..b7468aaf9 100644 --- a/src/Handlers/acLoadMemoryDump.cpp +++ b/src/Handlers/acLoadMemoryDump.cpp @@ -2,25 +2,20 @@ std::string acLoadMemoryDump::xmlname = "LoadMemoryDump"; #include "../HandlerFactory.h" -int acLoadMemoryDump::Init () { - Action::Init(); - pugi::xml_attribute attr = node.attribute("file"); - if (!attr) { - attr = node.attribute("filename"); - if (!attr) { - error("No file specified in LoadMemoryDump\n"); - return -1; - } - } - pugi::xml_attribute attr2= node.attribute("comp"); - if (attr2) { - error("Depreceted API call. Use LoadBinary with comp parameter"); +int acLoadMemoryDump::Init() { + Action::Init(); + pugi::xml_attribute attr = node.attribute("file"); + if (!attr) { + attr = node.attribute("filename"); + if (!attr) { + error("No file specified in LoadMemoryDump\n"); + return EXIT_FAILURE; } - const auto lattice = solver->getCartLattice(); - lattice->loadSolution(attr.value()); - return 0; + } + if (node.attribute("comp")) error("Deprecated API call. Use LoadBinary with comp parameter"); + solver->lattice->loadSolution(attr.value()); + return EXIT_SUCCESS; } - // Register the handler (basing on xmlname) in the Handler Factory -template class HandlerFactory::Register< GenericAsk< acLoadMemoryDump > >; +template class HandlerFactory::Register >; diff --git a/src/Handlers/cbSaveBinary.cpp b/src/Handlers/cbSaveBinary.cpp index b0704ef26..39d253498 100644 --- a/src/Handlers/cbSaveBinary.cpp +++ b/src/Handlers/cbSaveBinary.cpp @@ -2,36 +2,30 @@ std::string cbSaveBinary::xmlname = "SaveBinary"; #include "../HandlerFactory.h" -int cbSaveBinary::Init () { - Callback::Init(); - pugi::xml_attribute attr = node.attribute("file"); - if (!attr) { - attr = node.attribute("filename"); - if (!attr) { - fn = solver->outIterFile("Save", ""); - } else { - fn = attr.value(); - } - } else { - fn = ((std::string) solver->outpath) + "_" + attr.value(); +int cbSaveBinary::Init() { + Callback::Init(); + auto attr = node.attribute("file"); + if (!attr) { + attr = node.attribute("filename"); + if (!attr) { + fn = solver->outIterFile("Save", ""); + } else { + fn = attr.value(); } - return 0; - } - - -int cbSaveBinary::DoIt () { - Callback::DoIt(); - pugi::xml_attribute attr = node.attribute("comp"); - const auto lattice = solver->getCartLattice(); - if (attr) { - lattice->saveComp(fn, attr.value()); - } else { - lattice->saveSolution(fn); - //error("Missing comp attribute in SaveBinary"); - } - return 0; - }; + } else { + fn = solver->outpath + "_" + attr.value(); + } + return 0; +} +int cbSaveBinary::DoIt() { + Callback::DoIt(); + const auto attr = node.attribute("comp"); + if (attr) solver->lattice->saveComp(fn, attr.value()); + else + solver->lattice->saveSolution(fn); + return EXIT_SUCCESS; +}; // Register the handler (basing on xmlname) in the Handler Factory -template class HandlerFactory::Register< GenericAsk< cbSaveBinary > >; +template class HandlerFactory::Register >; diff --git a/src/Handlers/cbSaveCheckpoint.cpp b/src/Handlers/cbSaveCheckpoint.cpp index c4f38c9cc..50585060c 100644 --- a/src/Handlers/cbSaveCheckpoint.cpp +++ b/src/Handlers/cbSaveCheckpoint.cpp @@ -2,9 +2,9 @@ std::string cbSaveCheckpoint::xmlname = "SaveCheckpoint"; #include "../HandlerFactory.h" -int cbSaveCheckpoint::Init () { - Callback::Init(); - /* +int cbSaveCheckpoint::Init() { + Callback::Init(); + /* Initialisation of handler checks for keep attribute inside of SaveCheckpoint. Keep attribute can be used to save the last "x" number of checkpoints, or specify @@ -12,92 +12,86 @@ int cbSaveCheckpoint::Init () { DEFAULT behaviour is to return only the most recent checkpoint. */ - pugi::xml_attribute attr = node.attribute("keep"); - if (attr) { - if (std::string(attr.value()) == "all"){ - // Look for the keyword all and use keep = 0 as flag to store all - keep = 0; - } else { - // If the attr value is not the string all, assume it is an int - keep = attr.as_int(); - if ( keep < 0) { - // Check the user hasn't set keep to a negative value - error("Keeping a negative no. of chckpnts not allowed, returning default behaviour."); - keep = 1; - } - } - } else{ - keep = 1; - } - - return 0; - } + pugi::xml_attribute attr = node.attribute("keep"); + if (attr) { + if (std::string(attr.value()) == "all") { + // Look for the keyword all and use keep = 0 as flag to store all + keep = 0; + } else { + // If the attr value is not the string all, assume it is an int + keep = attr.as_int(); + if (keep < 0) { + // Check the user hasn't set keep to a negative value + error("Keeping a negative no. of chckpnts not allowed, returning default behaviour."); + keep = 1; + } + } + } else { + keep = 1; + } + return 0; +} -int cbSaveCheckpoint::DoIt () { - Callback::DoIt(); - /* +int cbSaveCheckpoint::DoIt() { + Callback::DoIt(); + /* Here we saveSolution to a _x.pri file where x is the MPI rank. If keep == 0, then we save all solutions. Otherwise, we check the size of the queue; less than keep then save file, else delete the first set into the queue */ - output("writing checkpoint"); - const auto lattice = solver->getCartLattice(); - const auto filename = solver->outIterCollectiveFile("checkpoint", ""); - const auto restartFile = solver->outIterCollectiveFile("restart", ".xml"); - auto fileStr = lattice->saveSolution(filename); - std::string restStr; - if (D_MPI_RANK == 0 ) { - writeRestartFile(filename.c_str(), restartFile.c_str()); - restStr = restartFile; - } - if (keep != 0){ - myqueue.push( fileStr ); - myqueue_rst.push( restStr ); - if (myqueue.size() > (size_t) keep) { - // myqueue should only ever reach the size of keep - fileStr = myqueue.front(); - int rm_result = remove( fileStr.c_str() ); //Takes char - if (rm_result != 0) error("Checkpoint file was not deleted: %s",fileStr.c_str()); - myqueue.pop(); - - if (D_MPI_RANK == 0 ) { - restStr = myqueue_rst.front(); - rm_result = remove( restStr.c_str() ); - if (rm_result != 0) error("Restart file was not deleted: %s",restStr.c_str()); - myqueue_rst.pop(); - } - } - } + output("writing checkpoint"); + const auto filename = solver->outIterCollectiveFile("checkpoint", ""); + const auto restartFile = solver->outIterCollectiveFile("restart", ".xml"); + auto fileStr = solver->lattice->saveSolution(filename); + std::string restStr; + if (D_MPI_RANK == 0) { + writeRestartFile(filename.c_str(), restartFile.c_str()); + restStr = restartFile; + } + if (keep != 0) { + myqueue.push(fileStr); + myqueue_rst.push(restStr); + if (myqueue.size() > (size_t)keep) { + // myqueue should only ever reach the size of keep + fileStr = myqueue.front(); + int rm_result = remove(fileStr.c_str()); //Takes char + if (rm_result != 0) error("Checkpoint file was not deleted: %s", fileStr.c_str()); + myqueue.pop(); - return 0; - }; + if (D_MPI_RANK == 0) { + restStr = myqueue_rst.front(); + rm_result = remove(restStr.c_str()); + if (rm_result != 0) error("Restart file was not deleted: %s", restStr.c_str()); + myqueue_rst.pop(); + } + } + } -int cbSaveCheckpoint::writeRestartFile( const char * fn, const char * rf ) { + return 0; +}; - pugi::xml_document restartfile; - for (pugi::xml_node n = solver->configfile.first_child(); n; n = n.next_sibling()){ - restartfile.append_copy(n); - } +int cbSaveCheckpoint::writeRestartFile(const char* fn, const char* rf) { + pugi::xml_document restartfile; + for (pugi::xml_node n = solver->configfile.first_child(); n; n = n.next_sibling()) { restartfile.append_copy(n); } - pugi::xml_node n1 = restartfile.child("CLBConfig").child("LoadBinary"); - if (!n1){ - // If it doesn't exist, create it before solve - n1 = restartfile.child("CLBConfig").child("Solve"); - pugi::xml_node n2 = restartfile.child("CLBConfig").insert_child_before("LoadBinary", n1); - n2.append_attribute("file").set_value(fn); - } else { - // If it does exist, remove it and replace it with up to date file string - n1.remove_attribute(n1.attribute("file")); - n1.append_attribute("file").set_value(fn); - } + pugi::xml_node n1 = restartfile.child("CLBConfig").child("LoadBinary"); + if (!n1) { + // If it doesn't exist, create it before solve + n1 = restartfile.child("CLBConfig").child("Solve"); + pugi::xml_node n2 = restartfile.child("CLBConfig").insert_child_before("LoadBinary", n1); + n2.append_attribute("file").set_value(fn); + } else { + // If it does exist, remove it and replace it with up to date file string + n1.remove_attribute(n1.attribute("file")); + n1.append_attribute("file").set_value(fn); + } - restartfile.save_file( rf ); + restartfile.save_file(rf); - - return 0; + return 0; } // Register the handler (basing on xmlname) in the Handler Factory -template class HandlerFactory::Register< GenericAsk< cbSaveCheckpoint > >; +template class HandlerFactory::Register >; diff --git a/src/Handlers/cbSaveMemoryDump.cpp b/src/Handlers/cbSaveMemoryDump.cpp index f92df2aab..80655d41b 100644 --- a/src/Handlers/cbSaveMemoryDump.cpp +++ b/src/Handlers/cbSaveMemoryDump.cpp @@ -2,33 +2,28 @@ std::string cbSaveMemoryDump::xmlname = "SaveMemoryDump"; #include "../HandlerFactory.h" -int cbSaveMemoryDump::Init () { - Callback::Init(); - pugi::xml_attribute attr = node.attribute("file"); - if (!attr) { - attr = node.attribute("filename"); - if (!attr) { - fn = solver->outIterFile("Save", ""); - } else { - fn = attr.value(); - } - } else { - fn = solver->outpath + "_" + attr.value(); +int cbSaveMemoryDump::Init() { + Callback::Init(); + pugi::xml_attribute attr = node.attribute("file"); + if (!attr) { + attr = node.attribute("filename"); + if (!attr) { + fn = solver->outIterFile("Save", ""); + } else { + fn = attr.value(); } - return 0; - } - - -int cbSaveMemoryDump::DoIt () { - Callback::DoIt(); - pugi::xml_attribute attr= node.attribute("comp"); - if (attr) { - error("Depreceted API call. Use SaveBinary with comp parameter"); - } - solver->getCartLattice()->saveSolution(fn); - return 0; - }; + } else { + fn = solver->outpath + "_" + attr.value(); + } + return 0; +} +int cbSaveMemoryDump::DoIt() { + Callback::DoIt(); + if (node.attribute("comp")) error("Deprecated API call. Use SaveBinary with comp parameter"); + solver->lattice->saveSolution(fn); + return 0; +}; // Register the handler (basing on xmlname) in the Handler Factory -template class HandlerFactory::Register< GenericAsk< cbSaveMemoryDump > >; +template class HandlerFactory::Register >; diff --git a/src/Lists.h.Rt b/src/Lists.h.Rt index 8be6a844a..b92a1173b 100644 --- a/src/Lists.h.Rt +++ b/src/Lists.h.Rt @@ -283,6 +283,14 @@ class Model_m : public Model { public: Model_m(); + // Lookup density index by name + static int lookupFieldIndexByName(const std::string& name) { + if (name == "") return ; + return -1; + } + // Offset directions (Q) stored as constexpr array of int triplets; for compile-time computations requiring Q static constexpr auto offset_directions = model_details::makeOffsetDirections(); /// Array of offset directions static constexpr size_t Q = offset_directions.size(); /// Number of offset directions