Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solid Container #435

Merged
merged 35 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
86a190d
Small cleanup of BallTree
llaniewski May 23, 2023
6d61fb3
Chanching to new nomenclature
llaniewski May 24, 2023
cfc2f51
Moving stuff to solid finder
llaniewski May 24, 2023
576151f
Progressing SolidContainer agnostic implementation
llaniewski May 26, 2023
94106a1
Working version with particle iterators
llaniewski May 31, 2023
fba806e
Adding cache version to SolidTree
llaniewski May 31, 2023
4bcdb69
Adding SolidGrid - not working
llaniewski May 31, 2023
84e44bf
Working SolidGrid
llaniewski May 31, 2023
df0babc
Hiding solid container building behind computation
llaniewski Jun 1, 2023
b4807f2
Fixing offset
Jun 1, 2023
8a7b457
cooperative_groups
Jun 5, 2023
bddbb28
Working non-cache grid iterator
llaniewski Jun 6, 2023
ad1b579
Some cleanup of atomics
llaniewski Jun 6, 2023
662f9c7
Unifying naming for atomic reduce
llaniewski Jun 6, 2023
b7c1416
Fixing warning about no return value
llaniewski Jun 6, 2023
be45bb6
Cleanup of cross.h
llaniewski Jun 6, 2023
655dc8f
Correcting auto return type
llaniewski Jun 6, 2023
2499bc7
Adding opportunistic sums
llaniewski Jun 6, 2023
999121f
Adding solid container options to configure
llaniewski Jun 6, 2023
b14ced3
Fixing the no-particle case
llaniewski Jun 13, 2023
d85387a
Merge remote-tracking branch 'origin/feature/chrono' into feature/fas…
llaniewski Jun 26, 2023
86b5800
Merge remote-tracking branch 'cfdgo/tests/particles' into feature/fas…
llaniewski Jul 11, 2023
54d47ac
Adding OPP functions as an option (disabled by default)
llaniewski Jul 12, 2023
6d8ff43
Adding OPP functions as an option in Particles.hpp
llaniewski Jul 12, 2023
b3fa4d1
Merge branch 'feature/fastdem' of github.com:llaniewski/TCLB into fea…
llaniewski Jul 12, 2023
c7fa4e5
Fixing error in R2I_caster causing wrong results on older GPUs
llaniewski Jul 14, 2023
f548205
Merge remote-tracking branch 'cfdgo/develop' into feature/fastdem
llaniewski Jul 31, 2023
56cf675
Moving the tests submodule to tests/external to make space for unit t…
llaniewski Jul 31, 2023
30eff8b
Adding margin
llaniewski Jul 13, 2023
ffc4570
Adding particle.margin as Dynamics.R option
llaniewski Jul 31, 2023
c5bda0e
Fixing paranoid error for float version
llaniewski Jul 31, 2023
872cd30
Adding tests of solid indexer
llaniewski Aug 1, 2023
ef471c6
Adding coverage options to solid tests
llaniewski Aug 1, 2023
23c590a
Adding non-sync particle iterator
llaniewski Aug 2, 2023
cd760c9
Moving d3q27_cumulant_part to fastdem
llaniewski Aug 2, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .github/workflows/other_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: TESTS

on: [ push, pull_request ]

jobs:
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
test:
- solid
steps:
- name: Git checkout
uses: actions/checkout@v3
with:
submodules: true
- name: Install dependencies
uses: ./.github/actions/install
with:
essentials: true
r: false
rdep: false
cuda: false
hip: false
openmpi: false
lcov: true
- name: Compile
shell: bash
run: |
cd tests/${{ matrix.test }}
make ADD_FLAGS="-fprofile-arcs -ftest-coverage"
- name: Run
shell: bash
run: |
cd tests/${{ matrix.test }}
make run
- name: Gather coverage data
uses: ./.github/actions/coverage
id: coverage
- name: Send coverage data
uses: codecov/codecov-action@v3
if: steps.coverage.outputs.reports != ''
with:
files: ${{ steps.coverage.outputs.reports }}
flags: ${{ matrix.model }}
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "TCLB_tests"]
path = tests
path = tests/external
url = ../../CFD-GO/TCLB_tests.git
2 changes: 1 addition & 1 deletion models/flow/d3q27_cumulant_part/Dynamics.c.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
?>

CudaDeviceFunction void CalcF(){
for (SyncParticleIterator p(X,Y,Z); p; ++p) {
for (auto p : SyncParticleIterator(X,Y,Z)) {
if ((NodeType & NODE_BOUNDARY) == 0) {
real_t d = getRho();
vector_t u;
Expand Down
2 changes: 1 addition & 1 deletion models/particles/d3q27_PSM/Dynamics.c.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ CudaDeviceFunction void CalcF() {
C( uP, 0)
C( sol, 0)
?>
for (SyncParticleIterator p(X,Y,Z); p; ++p) {
for (auto p : SyncParticleIterator(X,Y,Z)) {

real_t dist = sqrt(p.diff.x*p.diff.x + p.diff.y*p.diff.y + p.diff.z*p.diff.z);

Expand Down
55 changes: 0 additions & 55 deletions src/BallTree.h

This file was deleted.

3 changes: 3 additions & 0 deletions src/Consts.h.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#define X_BLOCK 32
#endif

#define PART_MAR <?%f PartMargin ?>
#define PART_MAR_BOX <?%f max(0,PartMargin-0.5) ?>

<?R
big_hex = function(x,bits=16) {
ret = ""
Expand Down
12 changes: 6 additions & 6 deletions src/Handlers/acRemoteForceInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ int acRemoteForceInterface::ConnectRemoteForceInterface(std::string integrator_)
double py = solver->lattice->py;
double pz = solver->lattice->pz;
solver->lattice->RFI.DeclareSimpleBox(
px + reg.dx,
px + reg.dx + reg.nx,
py + reg.dy,
py + reg.dy + reg.ny,
pz + reg.dz,
pz + reg.dz + reg.nz);
px + reg.dx - PART_MAR_BOX,
px + reg.dx + reg.nx + PART_MAR_BOX,
py + reg.dy - PART_MAR_BOX,
py + reg.dy + reg.ny + PART_MAR_BOX,
pz + reg.dz - PART_MAR_BOX,
pz + reg.dz + reg.nz + PART_MAR_BOX);
}
MPI_Barrier(MPMD.local);
solver->lattice->RFI.Connect(MPMD.work,inter.work);
Expand Down
1 change: 1 addition & 0 deletions src/Handlers/cbRunR.cpp.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ class rInfo: public rWrapper {
return ret;
}
ERROR("R: Not implemented!");
return ret;
}

Rcpp::CharacterVector Names() {
Expand Down
42 changes: 19 additions & 23 deletions src/Lattice.cu.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
#include "Lattice.h"
#include <mpi.h>
#include <assert.h>
#include "BallTree.hpp"
#include "SolidTree.hpp"
#include "SolidGrid.hpp"

#ifdef ENABLE_NVPROF
#include <nvToolsExt.h>
Expand Down Expand Up @@ -147,11 +148,10 @@ Lattice::Lattice(lbRegion _region, MPIInfo mpi_, int ns):region(_region), mpi(mp
<?R } ?>
ZoneIter = 0;
particle_data_size_max = 0;
SC.InitFinder(container->solidfinder);
container->particle_data = NULL;
container->particle_data_size = 0;
container->balltree_data = NULL;
balltree_data_size_max = 0;
BT.balls = &RFI;
SC.balls = &RFI;
RFI.name = "TCLB";
}

Expand Down Expand Up @@ -451,13 +451,7 @@ void Lattice::<?%s FunName ?>(int tab0, int tab1, int iter_type)
for (stage in rows(action_stages)) {
?>
DEBUG_PROF_PUSH("<?%s stage$name ?>");
//---------------- STAGE: <?%s stage$name ?> ----------------------- <?R
if (old_stage_level > 0) {
for (m in NonEmptyMargin) { ?>
container->in.<?%s m$name ?> = Snaps[tab1].<?%s m$name ?>; <?R
}
}
old_stage_level = old_stage_level + 1 ?>
//---------------- STAGE: <?%s stage$name ?> -----------------------

<?R if (stage$fixedPoint) { ?> for (int fix=0; fix<100; fix++) { <?R } ?>
<?R if (stage$first_particle) { ?>
Expand Down Expand Up @@ -486,18 +480,19 @@ void Lattice::<?%s FunName ?>(int tab0, int tab1, int iter_type)
CudaMemcpyAsync(container->particle_data, RFI.Particles(), RFI.mem_size(), CudaMemcpyHostToDevice, kernelStream);
}
DEBUG_PROF_PUSH("Tree Build");
BT.Build();
SC.Build();
DEBUG_PROF_POP();
if (BT.size() > balltree_data_size_max) {
if (container->balltree_data != NULL) CudaFree(container->balltree_data);
balltree_data_size_max = BT.size();
CudaMalloc(&container->balltree_data, BT.mem_size());
}
container->balltree_data_size = BT.size();
if (BT.mem_size() > 0) {
CudaMemcpyAsync(container->balltree_data, BT.Tree(), BT.mem_size(), CudaMemcpyHostToDevice, kernelStream);
}
SC.CopyToGPU(container->solidfinder, kernelStream);
<?R } ?>
<?R if (old_stage_level > 0) { ?>
MPIStream_B();
CudaDeviceSynchronize();
<?R for (m in NonEmptyMargin) { ?>
container->in.<?%s m$name ?> = Snaps[tab1].<?%s m$name ?>; <?R
}
}
old_stage_level = old_stage_level + 1
?>
container->CopyToConst();
DEBUG_PROF_PUSH("Calculation");
switch(iter_type & ITER_INTEG){
Expand Down Expand Up @@ -527,7 +522,6 @@ void Lattice::<?%s FunName ?>(int tab0, int tab1, int iter_type)
break;
}

MPIStream_B();
DEBUG_PROF_POP();
<?R if (stage$last_particle) { ?>
if (RFI.mem_size() > 0) {
Expand All @@ -549,10 +543,12 @@ void Lattice::<?%s FunName ?>(int tab0, int tab1, int iter_type)
DEBUG_PROF_POP();
RFI.SendForces();
<?R } ?>
CudaDeviceSynchronize();

<?R if (stage$fixedPoint) { ?> } // for(fix) <?R } ?>
DEBUG_PROF_POP();
<?R } ?>
MPIStream_B();
CudaDeviceSynchronize();
Snap = tab1;
MarkIteration();
updateAllSamples();
Expand Down
10 changes: 3 additions & 7 deletions src/Lattice.h.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
#include "ZoneSettings.h"
#include "SyntheticTurbulence.h"
#include "Sampler.h"
#include "RemoteForceInterface.h"
#include "BallTree.h"
#include "pinned_allocator.hpp"
#include "SolidContainer.h"


class lbRegion;
class LatticeContainer;
Expand Down Expand Up @@ -76,12 +75,9 @@ public:
lbRegion region; ///< Local lattice region
real_t px, py, pz;
MPIInfo mpi; ///< MPI information
typedef rfi::RemoteForceInterface< rfi::ForceCalculator, rfi::RotParticle, rfi::ArrayOfStructures, real_t, pinned_allocator<real_t> > rfi_t;
rfi_t RFI;
typedef BallTree< rfi_t > balltree_t;
balltree_t BT;
solidcontainer_t SC;
size_t particle_data_size_max;
size_t balltree_data_size_max;
char snapFileName[STRING_LEN];
Lattice (lbRegion region, MPIInfo, int);
~Lattice ();
Expand Down
2 changes: 1 addition & 1 deletion src/LatticeAccess.inc.cpp.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ template <class N> CudaDeviceFunction void LatticeContainer::getType(N & node)
} else if (access == "add") { ?>
<?%s m ?>[<?R C(off,float=FALSE) ?>] += <?%s d ?>; <?R
} else if (access == "atomicadd") { ?>
atomicAddP(&( <?%s m ?>[<?R C(off,float=FALSE) ?>] ),<?%s d ?>); <?R
CudaAtomicAdd(&( <?%s m ?>[<?R C(off,float=FALSE) ?>] ),<?%s d ?>); <?R
} else stop("Unknown access type in field.access.one");
}

Expand Down
7 changes: 2 additions & 5 deletions src/LatticeContainer.h.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
File defining LatticeContainer and some additional CUDA functions
*/
#include "SyntheticTurbulence.h"
#include "BallTree.h"
#include "SolidContainer.h"

#ifndef SETTINGS_H
<?R
Expand Down Expand Up @@ -47,8 +47,7 @@ class LatticeContainer {
cut_t* Q;
size_t particle_data_size;
real_t* particle_data;
size_t balltree_data_size;
tr_elem* balltree_data;
solidcontainer_t::finder_t solidfinder;
real_t * Globals; ///< Pointer to the GPU table to store the calculated values of Globals
int dx, dy, dz; ///< Offset of the region to calculate in the interior kernel run
int nx, ny, nz; ///< Size of the Lattice region
Expand Down Expand Up @@ -140,10 +139,8 @@ class LatticeContainer {
void Color( uchar4 *optr );
template<class N> inline void RunBorderT(CudaStream_t);
template<class N> inline void RunInteriorT(CudaStream_t);
template<class N> inline void RunParticlesT(CudaStream_t);
template < eOperationType I, eCalculateGlobals G, eStage S > void RunBorder(CudaStream_t);
template < eOperationType I, eCalculateGlobals G, eStage S > void RunInterior(CudaStream_t);
template < eOperationType I, eCalculateGlobals G, eStage S > void RunParticles(CudaStream_t);

void CopyToConst();
void WaitAll();
Expand Down
Loading