Skip to content

Commit

Permalink
Local node permutation for arb lattice
Browse files Browse the repository at this point in the history
  • Loading branch information
kubagalecki committed Nov 22, 2023
1 parent 0080478 commit eef7fbd
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 16 deletions.
1 change: 1 addition & 0 deletions src/ArbConnectivity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct ArbLatticeConnectivity {
}

size_t getLocalSize() const { return chunk_end - chunk_begin; }
bool isGhost(Index nbr) const { return nbr != -1 && (nbr < static_cast<Index>(chunk_begin) || nbr >= static_cast<Index>(chunk_end)); }

double& coord(size_t dim, size_t local_node_ind) { return coords[local_node_ind + dim * getLocalSize()]; }
double coord(size_t dim, size_t local_node_ind) const { return coords[local_node_ind + dim * getLocalSize()]; }
Expand Down
77 changes: 70 additions & 7 deletions src/ArbLattice.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
#include "ArbLattice.hpp"

#include <algorithm>
#include <array>
#include <cassert>
#include <cmath>
#include <fstream>
#include <numeric>
#include <optional>
#include <unordered_set>

#include "PartitionArbLattice.hpp"
#include "UtilTypes.hpp"
#include "mpitools.hpp"

ArbLattice::ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map<std::string, int>& zone_map, const std::string& cxn_path, MPI_Comm comm_) : LatticeBase(ZONESETTINGS, ZONE_MAX, units_), num_snaps(num_snaps_), comm(comm_) {
readFromCxn(zone_map, cxn_path);
global_node_dist = computeInitialNodeDist(connect.num_nodes_global, mpitools::MPI_Size(comm));
partition();
if (connect.getLocalSize() == 0) throw std::runtime_error{"At least one MPI rank has an empty partition, please use fewer MPI ranks"}; // Realistically, this should never happen
computeGhostNodes();
computeLocalPermutation();
local_bounding_box = getLocalBoundingBox();
}

std::unordered_map<std::string, int> ArbLattice::makeGroupZoneMap(const std::map<std::string, int>& zone_map) const {
std::unordered_map<std::string, int> retval;
Expand All @@ -14,15 +31,13 @@ std::unordered_map<std::string, int> ArbLattice::makeGroupZoneMap(const std::map
return retval;
}

void ArbLattice::readFromCxn(const std::map<std::string, int>& zone_map, const std::string& cxn_path, MPI_Comm comm) {
void ArbLattice::readFromCxn(const std::map<std::string, int>& zone_map, const std::string& cxn_path) {
using namespace std::string_literals;
using namespace std::string_view_literals;

const auto gz_map = makeGroupZoneMap(zone_map);

int comm_rank{}, comm_size{};
MPI_Comm_rank(comm, &comm_rank);
MPI_Comm_size(comm, &comm_size);
const int comm_rank = mpitools::MPI_Rank(comm), comm_size = mpitools::MPI_Size(comm);

// Open file + utils for error reporting
std::fstream file(cxn_path, std::ios_base::in);
Expand Down Expand Up @@ -168,12 +183,14 @@ void ArbLattice::readFromCxn(const std::map<std::string, int>& zone_map, const s
check_file_ok("Failed to read node data");
}
});
}

if (comm_size == 1) return;
void ArbLattice::partition() {
if (mpitools::MPI_Size(comm) == 1) return;

const auto zero_dir_ind = std::distance(Model_m::offset_directions.cbegin(), std::find(Model_m::offset_directions.cbegin(), Model_m::offset_directions.cend(), std::array{0, 0, 0})); // Note: the behavior is still correct even if (0,0,0) is not an offset direction
const auto zero_dir_ind = std::distance(Model_m::offset_directions.cbegin(), std::find(Model_m::offset_directions.cbegin(), Model_m::offset_directions.cend(), OffsetDir{0, 0, 0})); // Note: the behavior is still correct even if (0,0,0) is not an offset direction
const auto offset_dir_wgts = std::vector(Model_m::offset_direction_weights.begin(), Model_m::offset_direction_weights.end());
const auto [dist, log] = partitionArbLattice(connect, offset_dir_wgts, zero_dir_ind, comm);
auto [dist, log] = partitionArbLattice(connect, offset_dir_wgts, zero_dir_ind, comm);
for (const auto& [type, msg] : log) switch (type) {
case PartOutput::MsgType::Notice:
NOTICE(msg.c_str());
Expand All @@ -184,4 +201,50 @@ void ArbLattice::readFromCxn(const std::map<std::string, int>& zone_map, const s
case PartOutput::MsgType::Error:
throw std::runtime_error(msg);
}
global_node_dist = std::move(dist);
}

void ArbLattice::computeGhostNodes() {
std::unordered_set<long> ghosts;
const Span all_nbrs(connect.nbrs.get(), Q * connect.getLocalSize());
for (auto nbr : all_nbrs)
if (connect.isGhost(nbr)) ghosts.insert(nbr);
ghost_nodes.reserve(ghosts.size());
std::copy(ghosts.cbegin(), ghosts.cend(), std::back_inserter(ghost_nodes));
std::sort(ghost_nodes.begin(), ghost_nodes.end());
}

void ArbLattice::computeLocalPermutation() {
local_permutation.resize(connect.getLocalSize());
std::iota(local_permutation.begin(), local_permutation.end(), 0);
const auto is_border_node = [&](int lid) {
for (size_t q = 0; q != Q; ++q) {
const auto nbr = connect.neighbor(q, lid);
if (connect.isGhost(nbr)) return true;
}
return false;
};
const auto interior_begin = std::stable_partition(local_permutation.begin(), local_permutation.end(), is_border_node);
num_border_nodes = static_cast<size_t>(std::distance(local_permutation.begin(), interior_begin));
const auto by_zyx = [&](int lid1, int lid2) { // Sort by z, then y, then x -> this should help with coalesced memory access
const auto get_zyx = [&](int lid) { return std::array{connect.coord(2, lid), connect.coord(1, lid), connect.coord(0, lid)}; };
return get_zyx(lid1) < get_zyx(lid2);
};
std::sort(local_permutation.begin(), interior_begin, by_zyx);
std::sort(interior_begin, local_permutation.end(), by_zyx);
}

int ArbLattice::fullLatticePos(double pos) const {
const auto retval = std::lround(pos / connect.grid_size - .5);
assert(retval <= std::numeric_limits<int>::max() && retval >= std::numeric_limits<int>::min());
return static_cast<int>(retval);
}

lbRegion ArbLattice::getLocalBoundingBox() const {
const auto x = Span(connect.coords.get(), connect.getLocalSize()), y = Span(std::next(connect.coords.get(), connect.getLocalSize()), connect.getLocalSize()), z = Span(std::next(connect.coords.get(), 2 * connect.getLocalSize()), connect.getLocalSize());
const auto [minx_it, maxx_it] = std::minmax_element(x.begin(), x.end());
const auto [miny_it, maxy_it] = std::minmax_element(y.begin(), y.end());
const auto [minz_it, maxz_it] = std::minmax_element(z.begin(), z.end());
const double x_min = *minx_it, x_max = *maxx_it, y_min = *miny_it, y_max = *maxy_it, z_min = *minz_it, z_max = *maxz_it;
return lbRegion(x_min, y_min, z_min, x_max - x_min, y_max - y_min, z_max - z_min);
}
39 changes: 31 additions & 8 deletions src/ArbLattice.hpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,41 @@
#ifndef ARBLATTICE_HPP
#define ARBLATTICE_HPP

#include <cstdint>
#include <map>
#include <string>

#include "ArbConnectivity.hpp"
#include "LatticeBase.hpp"
#include "Lists.h"
#include "Region.h"

// Note on node numbering: We have 2 indexing schemes - a global and a local one.
// Globally, nodes are numbered such that consecutive ranks own monotonically increasing intervals, (e.g. rank 0 owns nodes [0, n_0), rank 1 owns nodes [n_0, n_1), etc.) This is required for ParMETIS, but it is also a convenient scheme in general.
// When communicating between ranks, only this numbering is used.
// Locally, nodes are permuted such that:
// - -1 is a valid index, signifying that a neighbor is not present at a given direction
// - Owned boundary nodes (i.e. nodes which have ghost nodes among their neighbors) occupy the initial B indices, starting at 0. Their numbering within [0, B) is arbitrary w.r.t. the global scheme
// - Owned interior nodes occupy the indices [B, B + I), where I is the number of interior nodes. Again, their order within that interval is arbitrary
// - Ghost nodes (i.e. nodes neighboring owned nodes, but not owned by the current rank) occupy indices [B + I, B + I + G), where G is the number of ghost nodes. **Their numbering within that interval corresponds to the global numbering scheme (important for optimal packing)**
// Note that the GPU is only aware of the local numbering scheme, which uses 32b indexing, saving memory bandwidth. Local indexing can additionally put "similar" nodes next to each other, minimizing branching and promoting coalesced memory access.
// Naming: GID == global ID; LID == local ID

class ArbLattice : public LatticeBase {
public:
static constexpr size_t Q = Model_m::offset_directions.size();

private:
ArbLatticeConnectivity connect;
size_t num_snaps;
ArbLatticeConnectivity connect; /// Lattice connectivity info
std::vector<long> global_node_dist; /// Node distribution (in the ParMETIS sense), describing the GID node intervals owned by each process (identical in all ranks)
std::vector<long> ghost_nodes; /// Sorted GIDs of ghost nodes
size_t num_border_nodes; /// Number of border nodes, i.e., nodes which have a ghost neighbor
lbRegion local_bounding_box; /// The bounding box of the local region (the arbitrary lattice is a subset of a full Cartesian lattice, we can use this for some optimizations)
size_t num_snaps; /// Number of snaps to hold
MPI_Comm comm; /// Communicator associated with the lattice
std::vector<unsigned> local_permutation; /// The permutation of owned nodes w.r.t. the global indexing scheme, see comment at the top

public:
ArbLattice(size_t num_snaps_, const UnitEnv& units, const std::map<std::string, int>& zone_map, const std::string& cxn_path, MPI_Comm comm) : LatticeBase(ZONESETTINGS, ZONE_MAX, units), num_snaps(num_snaps_) { readFromCxn(zone_map, cxn_path, comm); }
static constexpr size_t Q = Model_m::offset_directions.size();

ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map<std::string, int>& zone_map, const std::string& cxn_path, MPI_Comm comm_);

size_t getLocalSize() const final { return connect.chunk_end - connect.chunk_begin; }
size_t getGlobalSize() const final { return connect.num_nodes_global; }
Expand All @@ -26,8 +44,13 @@ class ArbLattice : public LatticeBase {
void IterateTill(int, int) final {}

private:
std::unordered_map<std::string, int> makeGroupZoneMap(const std::map<std::string, int>& zone_map) const;
void readFromCxn(const std::map<std::string, int>& zone_map, const std::string& cxn_path, MPI_Comm comm);
std::unordered_map<std::string, int> makeGroupZoneMap(const std::map<std::string, int>& zone_map) const; /// Translate the groups and zone into a single indexing scheme for the purposes of uniquely mapping read IDs from the .cxn file
void readFromCxn(const std::map<std::string, int>& zone_map, const std::string& cxn_path); /// Read the lattice info from a .cxn file
void partition(); /// Repartition the lattice, if ParMETIS is not present this is a noop
void computeLocalPermutation(); /// Compute the local permutation, see comment at the top
void computeGhostNodes(); /// Retreive GIDs of ghost nodes from the connectivity info structure
int fullLatticePos(double pos) const; /// Compute the position (in terms of lattice offsets) of a node, assuming the arbitrary lattice is a subset of a Cartesian lattice
lbRegion getLocalBoundingBox() const; /// Compute local bounding box, assuming the arbitrary lattice is a subset of a Cartesian lattice
};

#endif // ARBLATTICE_HPP
2 changes: 1 addition & 1 deletion src/Lists.h.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ field_ind = 0
for (f in rows(Fields)) {
dirs = expand.grid(dx=f$minx:f$maxx, dy=f$miny:f$maxy, dz=f$minz:f$maxz)
for (d in rows(dirs)) {
cat(paste0(" set_field_dir(", field_ind, ", ", d$dx, ", ", d$dy,", ", d$dz, ");\n"))
cat(paste0(" set_field_dir(", field_ind, ", ", d$dx, ", ", d$dy,", ", d$dz, ");\n"))
}
field_ind = field_ind + 1
}
Expand Down

0 comments on commit eef7fbd

Please sign in to comment.