From d2fc8a27f407e8382aabc8c41a053844bbd76ac8 Mon Sep 17 00:00:00 2001 From: Markus Battarbee Date: Tue, 29 Aug 2023 16:54:49 +0300 Subject: [PATCH 1/3] ARCH fixes to LUMI-G compilation, squelch some warnigns, added NOMAD makefile for hackathon --- MAKE/Makefile.NOMAD | 89 +++++++++++++++++++++++++ arch/arch_device_hip.h | 56 ++++++++-------- datareduction/datareductionoperator.cpp | 86 ++++++++++++------------ 3 files changed, 158 insertions(+), 73 deletions(-) create mode 100644 MAKE/Makefile.NOMAD diff --git a/MAKE/Makefile.NOMAD b/MAKE/Makefile.NOMAD new file mode 100644 index 000000000..5e061c556 --- /dev/null +++ b/MAKE/Makefile.NOMAD @@ -0,0 +1,89 @@ +CMP = hipcc +LNK = hipcc + +# Modules loaded (after clean shell, no module purging, one-by-one not oneline) +# module load LUMI/22.08 +# module load partition/G +# module load cpeAMD +# module load rocm/5.3.3 + +#======== Vectorization ========== +#Set vector backend type for vlasov solvers, sets precision and length. +#Options: +# AVX: VEC4D_AGNER, VEC4F_AGNER, VEC8F_AGNER +# AVX512: VEC8D_AGNER, VEC16F_AGNER +# Fallback: VECTORCLASS = VEC_FALLBACK_GENERIC (Defaults to VECL8) +VECTORCLASS = VEC_FALLBACK_GENERIC + +#===== Vector Lenghts ==== +# Default for VEC_FALLBACK_GENERIC is WID=4, VECL=8 +WID=8 +VECL=64 +# WID=4 +# VECL=16 + +# Compile with GPU support (USE_HIP or USE_CUDA) +USE_HIP=1 + +#======= Compiler and compilation flags ========= +# NOTES on compiler flags: +# CXXFLAGS is for compiler flags, they are always used +# MATHFLAGS are for special math etc. flags, these are only applied on solver functions +# LDFLAGS flags for linker +# Important note: Do not edit COMPFLAGS in this file! + +CXXFLAGS += -g -O3 -x hip --amdgpu-target=gfx90a:xnack+ -std=c++17 -funroll-loops -fopenmp -fgpu-rdc -I. -Ihip -Iomp -D__HIP_PLATFORM_AMD__ -I${CRAY_MPICH_DIR}/include -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable + +testpackage: CXXFLAGS = -g -O2 -x hip --amdgpu-target=gfx90a:xnack+ -std=c++17 -fopenmp -fgpu-rdc -I. -Ihip -Iomp -D__HIP_PLATFORM_AMD__ -I${CRAY_MPICH_DIR}/include -fgpu-sanitize -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable + +LDFLAGS = -fopenmp --hip-link -lrt -lgomp -lpthread -fgpu-rdc -D__HIP_PLATFORM_AMD__ -L${CRAY_MPICH_DIR}/lib ${PE_MPICH_GTL_DIR_amd_gfx90a} --amdgpu-target=gfx90a:xnack+ +LIB_MPI = -lmpi ${PE_MPICH_GTL_LIBS_amd_gfx90a} + +# -fgpu-rdc # relocatable device code, needed for the velocity mesh +# -fgpu-sanitize + +#======== PAPI ========== +#Add PAPI_MEM define to use papi to report memory consumption? +CXXFLAGS += -DPAPI_MEM +testpackage: CXXFLAGS += -DPAPI_MEM + +#======== Allocator ========= +#Use jemalloc instead of system malloc to reduce memory fragmentation? https://github.com/jemalloc/jemalloc +#Configure jemalloc with --with-jemalloc-prefix=je_ when installing it +#Note: jemalloc not supported with GPUs +#CXXFLAGS += -DUSE_JEMALLOC -DJEMALLOC_NO_DEMANGLE +#testpackage: CXXFLAGS += -DUSE_JEMALLOC -DJEMALLOC_NO_DEMANGLE + +#-DNO_WRITE_AT_ALL: Define to disable write at all to +# avoid memleak (much slower IO) + +#======== Libraries =========== + +LIBRARY_PREFIX = /projappl/project_465000538/libraries + +# Compiled libraries +INC_BOOST = -isystem $(LIBRARY_PREFIX)/boost/include +LIB_BOOST = -L$(LIBRARY_PREFIX)/boost/lib -lboost_program_options -Wl,-rpath=$(LIBRARY_PREFIX)/boost/lib + +INC_ZOLTAN = -isystem $(LIBRARY_PREFIX)/zoltan/include +LIB_ZOLTAN = -L$(LIBRARY_PREFIX)/zoltan/lib -lzoltan -Wl,-rpath=$(LIBRARY_PREFIX)/zoltan/lib + +#INC_JEMALLOC = -I$(LIBRARY_PREFIX)/jemalloc/include +#LIB_JEMALLOC = -L$(LIBRARY_PREFIX)/jemalloc/lib -ljemalloc -Wl,-rpath=$(LIBRARY_PREFIX)/jemalloc/lib + +INC_PAPI = -isystem $(LIBRARY_PREFIX)/papi/include +LIB_PAPI = -lpapi -L$(LIBRARY_PREFIX)/papi/lib -Wl,-rpath=$(LIBRARY_PREFIX)/papi/lib + +INC_VLSV = -isystem $(LIBRARY_PREFIX)/vlsv +LIB_VLSV = -L$(LIBRARY_PREFIX)/vlsv -lvlsv -Wl,-rpath=$(LIBRARY_PREFIX)/vlsv + +INC_PROFILE = -isystem $(LIBRARY_PREFIX)/phiprof/include -D_ROCTX -I${ROCM_PATH}/include +LIB_PROFILE = -L$(LIBRARY_PREFIX)/phiprof/lib -lphiprof -lgfortran -Wl,-rpath=$(LIBRARY_PREFIX)/phiprof/lib -Wl,-rpath=${ROCM_PATH}/lib -lroctx64 -lroctracer64 + +# Header libraries + +INC_FSGRID = -I$(LIBRARY_PREFIX)/fsgrid/ +INC_EIGEN = -isystem $(LIBRARY_PREFIX)/eigen/ +INC_DCCRG = -I$(LIBRARY_PREFIX)/dccrg/ +# INC_VECTORCLASS = -I$(LIBRARY_PREFIX_HEADERS)/vectorclass/ # not used with GPU +INC_HASHINATOR = -isystem $(LIBRARY_PREFIX)/hashinator/ diff --git a/arch/arch_device_hip.h b/arch/arch_device_hip.h index 8497f4c88..d4f8365d2 100644 --- a/arch/arch_device_hip.h +++ b/arch/arch_device_hip.h @@ -102,11 +102,7 @@ #endif #define FULL_MASK 0xffffffffffffffff -#ifdef ARCH_MAIN - hipStream_t stream[64]; -#else - extern hipStream_t stream[]; -#endif +extern hipStream_t gpuStreamList[]; /* Define the HIP error checking macro */ #define CHK_ERR(err) (hip_error(err, __FILE__, __LINE__)) @@ -153,16 +149,16 @@ class buf { public: void syncDeviceData(void){ - CHK_ERR(hipMemcpyAsync(d_ptr, ptr, bytes, hipMemcpyHostToDevice, stream[thread_id])); + CHK_ERR(hipMemcpyAsync(d_ptr, ptr, bytes, hipMemcpyHostToDevice, gpuStreamList[thread_id])); } void syncHostData(void){ - CHK_ERR(hipMemcpyAsync(ptr, d_ptr, bytes, hipMemcpyDeviceToHost, stream[thread_id])); + CHK_ERR(hipMemcpyAsync(ptr, d_ptr, bytes, hipMemcpyDeviceToHost, gpuStreamList[thread_id])); } buf(T * const _ptr, uint _bytes) : ptr(_ptr), bytes(_bytes) { thread_id = omp_get_thread_num(); - CHK_ERR(hipMallocAsync(&d_ptr, bytes, stream[thread_id])); + CHK_ERR(hipMallocAsync(&d_ptr, bytes, gpuStreamList[thread_id])); syncDeviceData(); } @@ -173,7 +169,7 @@ class buf { if(!is_copy){ // syncHostData(); #ifdef __HIP_DEVICE_COMPILE__ - hipFreeAsync(d_ptr, stream[thread_id]); + hipFreeAsync(d_ptr, gpuStreamList[thread_id]); #endif } } @@ -204,7 +200,7 @@ __host__ __forceinline__ static void* allocate(size_t bytes) { void* ptr; const uint thread_id = omp_get_thread_num(); device_mempool_check(UINT64_MAX); - CHK_ERR(hipMallocAsync(&ptr, bytes, stream[thread_id])); + CHK_ERR(hipMallocAsync(&ptr, bytes, gpuStreamList[thread_id])); return ptr; } @@ -219,7 +215,7 @@ __host__ __forceinline__ static void* allocate(size_t bytes, hipStream_t stream) template __host__ __forceinline__ static void free(T* ptr) { const uint thread_id = omp_get_thread_num(); - CHK_ERR(hipFreeAsync(ptr, stream[thread_id])); + CHK_ERR(hipFreeAsync(ptr, gpuStreamList[thread_id])); } template @@ -230,7 +226,7 @@ __host__ __forceinline__ static void free(T* ptr, hipStream_t stream) { template __forceinline__ static void memcpy_h2d(T* dst, T* src, size_t bytes){ const uint thread_id = omp_get_thread_num(); - CHK_ERR(hipMemcpyAsync(dst, src, bytes, hipMemcpyHostToDevice, stream[thread_id])); + CHK_ERR(hipMemcpyAsync(dst, src, bytes, hipMemcpyHostToDevice, gpuStreamList[thread_id])); } template @@ -242,7 +238,7 @@ __forceinline__ static void memcpy_h2d(T* dst, T* src, size_t bytes, hipStream_t template __forceinline__ static void memcpy_d2h(T* dst, T* src, size_t bytes){ const uint thread_id = omp_get_thread_num(); - CHK_ERR(hipMemcpyAsync(dst, src, bytes, hipMemcpyDeviceToHost, stream[thread_id])); + CHK_ERR(hipMemcpyAsync(dst, src, bytes, hipMemcpyDeviceToHost, gpuStreamList[thread_id])); } template @@ -387,45 +383,45 @@ __forceinline__ static void parallel_reduce_driver(const uint (&limits)[NDim], L /* Create a device buffer for the reduction results */ T* d_buf; - CHK_ERR(hipMallocAsync(&d_buf, n_reductions*sizeof(T), stream[thread_id])); - CHK_ERR(hipMemcpyAsync(d_buf, sum, n_reductions*sizeof(T), hipMemcpyHostToDevice, stream[thread_id])); + CHK_ERR(hipMallocAsync(&d_buf, n_reductions*sizeof(T), gpuStreamList[thread_id])); + CHK_ERR(hipMemcpyAsync(d_buf, sum, n_reductions*sizeof(T), hipMemcpyHostToDevice, gpuStreamList[thread_id])); /* Create a device buffer to transfer the initial values to device */ T* d_const_buf; - CHK_ERR(hipMallocAsync(&d_const_buf, n_reductions*sizeof(T), stream[thread_id])); - CHK_ERR(hipMemcpyAsync(d_const_buf, d_buf, n_reductions*sizeof(T), hipMemcpyDeviceToDevice, stream[thread_id])); + CHK_ERR(hipMallocAsync(&d_const_buf, n_reductions*sizeof(T), gpuStreamList[thread_id])); + CHK_ERR(hipMemcpyAsync(d_const_buf, d_buf, n_reductions*sizeof(T), hipMemcpyDeviceToDevice, gpuStreamList[thread_id])); /* Create a device buffer to transfer the loop limits of each dimension to device */ uint* d_limits; - CHK_ERR(hipMallocAsync(&d_limits, NDim*sizeof(uint), stream[thread_id])); - CHK_ERR(hipMemcpyAsync(d_limits, limits, NDim*sizeof(uint), hipMemcpyHostToDevice,stream[thread_id])); + CHK_ERR(hipMallocAsync(&d_limits, NDim*sizeof(uint), gpuStreamList[thread_id])); + CHK_ERR(hipMemcpyAsync(d_limits, limits, NDim*sizeof(uint), hipMemcpyHostToDevice,gpuStreamList[thread_id])); /* Call the reduction kernel with different arguments depending * on if the number of reductions is known at the compile time */ - T* d_thread_data_dynamic; + T* d_thread_data_dynamic=0; // declared zero to suppress unitialized use warning if(NReduStatic == 0) { /* Get the cub temp storage size for the dynamic shared memory kernel argument */ constexpr auto cub_temp_storage_type_size = sizeof(typename hipcub::BlockReduce::TempStorage); /* Allocate memory for the thread data values */ - CHK_ERR(hipMallocAsync(&d_thread_data_dynamic, n_reductions * blocksize * gridsize * sizeof(T), stream[thread_id])); + CHK_ERR(hipMallocAsync(&d_thread_data_dynamic, n_reductions * blocksize * gridsize * sizeof(T), gpuStreamList[thread_id])); /* Call the kernel (the number of reductions not known at compile time) */ - reduction_kernel<<>>(loop_body, d_const_buf, d_buf, d_limits, n_total, n_reductions, d_thread_data_dynamic); + reduction_kernel<<>>(loop_body, d_const_buf, d_buf, d_limits, n_total, n_reductions, d_thread_data_dynamic); /* Synchronize and free the thread data allocation */ - CHK_ERR(hipStreamSynchronize(stream[thread_id])); - CHK_ERR(hipFreeAsync(d_thread_data_dynamic, stream[thread_id])); + CHK_ERR(hipStreamSynchronize(gpuStreamList[thread_id])); + CHK_ERR(hipFreeAsync(d_thread_data_dynamic, gpuStreamList[thread_id])); } else{ /* Call the kernel (the number of reductions known at compile time) */ - reduction_kernel<<>>(loop_body, d_const_buf, d_buf, d_limits, n_total, n_reductions, d_thread_data_dynamic); + reduction_kernel<<>>(loop_body, d_const_buf, d_buf, d_limits, n_total, n_reductions, d_thread_data_dynamic); /* Synchronize after kernel call */ - CHK_ERR(hipStreamSynchronize(stream[thread_id])); + CHK_ERR(hipStreamSynchronize(gpuStreamList[thread_id])); } /* Copy the results back to host and free the allocated memory back to pool*/ - CHK_ERR(hipMemcpyAsync(sum, d_buf, n_reductions*sizeof(T), hipMemcpyDeviceToHost, stream[thread_id])); - CHK_ERR(hipFreeAsync(d_buf, stream[thread_id])); - CHK_ERR(hipFreeAsync(d_const_buf, stream[thread_id])); - CHK_ERR(hipFreeAsync(d_limits, stream[thread_id])); + CHK_ERR(hipMemcpyAsync(sum, d_buf, n_reductions*sizeof(T), hipMemcpyDeviceToHost, gpuStreamList[thread_id])); + CHK_ERR(hipFreeAsync(d_buf, gpuStreamList[thread_id])); + CHK_ERR(hipFreeAsync(d_const_buf, gpuStreamList[thread_id])); + CHK_ERR(hipFreeAsync(d_limits, gpuStreamList[thread_id])); } } diff --git a/datareduction/datareductionoperator.cpp b/datareduction/datareductionoperator.cpp index 94b2b33bf..6bfe3b180 100644 --- a/datareduction/datareductionoperator.cpp +++ b/datareduction/datareductionoperator.cpp @@ -855,17 +855,17 @@ namespace DRO { const Real VY = block_parameters[BlockParams::VYCRD] + (j + HALF) * block_parameters[BlockParams::DVY]; const Real VZ = block_parameters[BlockParams::VZCRD] + (k + HALF) * block_parameters[BlockParams::DVZ]; // Compare the distance of the velocity cell from the center of the maxwellian distribution to the radius of the maxwellian distribution - if(calculateNonthermal == true && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - > thermalRadius*thermalRadius - || - calculateNonthermal == false && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - <= thermalRadius*thermalRadius ) { + if (((calculateNonthermal == true) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + > thermalRadius*thermalRadius)) + || + ((calculateNonthermal == false) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + <= thermalRadius*thermalRadius) )) { //The velocity cell is a part of the nonthermal/thermal population: lsum[0] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)] * DV3; } @@ -916,17 +916,17 @@ namespace DRO { const Real VY = block_parameters[BlockParams::VYCRD] + (j + HALF) * block_parameters[BlockParams::DVY]; const Real VZ = block_parameters[BlockParams::VZCRD] + (k + HALF) * block_parameters[BlockParams::DVZ]; // Calculate the distance of the velocity cell from the center of the maxwellian distribution and compare it to the approximate radius of the maxwellian distribution - if(calculateNonthermal == true && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - > thermalRadius*thermalRadius + if (((calculateNonthermal == true) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + > thermalRadius*thermalRadius)) || - calculateNonthermal == false && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - <= thermalRadius*thermalRadius ) { + ((calculateNonthermal == false) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + <= thermalRadius*thermalRadius) )) { // Add the value of the coordinates and multiply by the AVGS value of the velocity cell and the volume of the velocity cell lsum[0] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)]*VX*DV3; lsum[1] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)]*VY*DV3; @@ -987,17 +987,17 @@ namespace DRO { const Real VY = block_parameters[BlockParams::VYCRD] + (j + HALF) * block_parameters[BlockParams::DVY]; const Real VZ = block_parameters[BlockParams::VZCRD] + (k + HALF) * block_parameters[BlockParams::DVZ]; // Calculate the distance of the velocity cell from the center of the maxwellian distribution and compare it to the approximate radius of the maxwellian distribution - if(calculateNonthermal == true && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - > thermalRadius*thermalRadius - || - calculateNonthermal == false && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - <= thermalRadius*thermalRadius ) { + if (((calculateNonthermal == true) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + > thermalRadius*thermalRadius)) + || + ((calculateNonthermal == false) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + <= thermalRadius*thermalRadius ))) { lsum[0] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)] * (VX - averageVX) * (VX - averageVX) * DV3; lsum[1] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)] * (VY - averageVY) * (VY - averageVY) * DV3; lsum[2] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)] * (VZ - averageVZ) * (VZ - averageVZ) * DV3; @@ -1053,17 +1053,17 @@ namespace DRO { const Real VY = block_parameters[BlockParams::VYCRD] + (j + HALF) * block_parameters[BlockParams::DVY]; const Real VZ = block_parameters[BlockParams::VZCRD] + (k + HALF) * block_parameters[BlockParams::DVZ]; // Calculate the distance of the velocity cell from the center of the maxwellian distribution and compare it to the approximate radius of the maxwellian distribution - if(calculateNonthermal == true && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - > thermalRadius*thermalRadius - || - calculateNonthermal == false && - ( (thermalV[0] - VX) * (thermalV[0] - VX) - + (thermalV[1] - VY) * (thermalV[1] - VY) - + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) - <= thermalRadius*thermalRadius ) { + if (((calculateNonthermal == true) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + > thermalRadius*thermalRadius)) + || + ((calculateNonthermal == false) && + (( (thermalV[0] - VX) * (thermalV[0] - VX) + + (thermalV[1] - VY) * (thermalV[1] - VY) + + (thermalV[2] - VZ) * (thermalV[2] - VZ) ) + <= thermalRadius*thermalRadius ))) { lsum[0] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)] * (VX - averageVX) * (VY - averageVY) * DV3; lsum[1] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)] * (VZ - averageVZ) * (VX - averageVX) * DV3; lsum[2] += block_data[n * SIZE_VELBLOCK + cellIndex(i,j,k)] * (VY - averageVY) * (VZ - averageVZ) * DV3; From b5a8f802f6ebad912e1be68064cd03668592cef7 Mon Sep 17 00:00:00 2001 From: Markus Battarbee Date: Thu, 31 Aug 2023 16:33:39 +0300 Subject: [PATCH 2/3] Use the boost module on lumi, and if WID==8, halve the block counts. Testpackage config updates from threadsafe_randomgen PR. --- MAKE/Makefile.NOMAD | 14 ++- MAKE/Makefile.lumi_hipcc | 4 +- object_wrapper.cpp | 21 ++++ parameters.cpp | 7 +- parameters.h | 2 + .../tests/Flowthrough_amr/Flowthrough_amr.cfg | 22 ++-- .../Flowthrough_trans_periodic.cfg | 24 ++-- .../Flowthrough_x_inflow_y_outflow.cfg | 22 ++-- .../Flowthrough_x_inflow_y_outflow_acc.cfg | 22 ++-- .../tests/Ionosphere_small/Fluctuations.cfg | 18 +-- .../Ionosphere_small/Ionosphere_small.cfg | 18 +-- .../Magnetosphere_3D_small.cfg | 24 ++-- .../Magnetosphere_polar_small.cfg | 24 ++-- .../Magnetosphere_small.cfg | 24 ++-- .../Selfgen_Waves_Periodic.cfg | 2 +- .../acctest_1_maxw_500k_30kms_1deg.cfg | 2 +- .../acctest_2_maxw_500k_100k_20kms_10deg.cfg | 2 +- .../acctest_3_substeps/acctest_3_substeps.cfg | 2 +- .../acctest_4_helium/acctest_4_helium.cfg | 2 +- .../acctest_5_proton_antiproton.cfg | 2 +- .../tests/restart_read/restart_read.cfg | 22 ++-- .../tests/restart_write/restart_write.cfg | 22 ++-- .../test_fp_fsolver_only_3D.cfg | 6 +- .../test_fp_substeps/test_fp_substeps.cfg | 6 +- ...transtest_2_maxw_500k_100k_20kms_20x20.cfg | 4 +- .../tests/transtest_amr/transtest_amr.cfg | 105 ------------------ 26 files changed, 174 insertions(+), 249 deletions(-) delete mode 100644 testpackage/tests/transtest_amr/transtest_amr.cfg diff --git a/MAKE/Makefile.NOMAD b/MAKE/Makefile.NOMAD index 5e061c556..d777c4464 100644 --- a/MAKE/Makefile.NOMAD +++ b/MAKE/Makefile.NOMAD @@ -6,6 +6,7 @@ LNK = hipcc # module load partition/G # module load cpeAMD # module load rocm/5.3.3 +# module load Boost/1.79.0-cpeAMD-22.08 #======== Vectorization ========== #Set vector backend type for vlasov solvers, sets precision and length. @@ -19,8 +20,8 @@ VECTORCLASS = VEC_FALLBACK_GENERIC # Default for VEC_FALLBACK_GENERIC is WID=4, VECL=8 WID=8 VECL=64 -# WID=4 -# VECL=16 +#WID=4 +#VECL=16 # Compile with GPU support (USE_HIP or USE_CUDA) USE_HIP=1 @@ -36,7 +37,7 @@ CXXFLAGS += -g -O3 -x hip --amdgpu-target=gfx90a:xnack+ -std=c++17 -funroll-loop testpackage: CXXFLAGS = -g -O2 -x hip --amdgpu-target=gfx90a:xnack+ -std=c++17 -fopenmp -fgpu-rdc -I. -Ihip -Iomp -D__HIP_PLATFORM_AMD__ -I${CRAY_MPICH_DIR}/include -fgpu-sanitize -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable -LDFLAGS = -fopenmp --hip-link -lrt -lgomp -lpthread -fgpu-rdc -D__HIP_PLATFORM_AMD__ -L${CRAY_MPICH_DIR}/lib ${PE_MPICH_GTL_DIR_amd_gfx90a} --amdgpu-target=gfx90a:xnack+ +LDFLAGS = -fopenmp --hip-link -lrt -lpthread -fgpu-rdc -D__HIP_PLATFORM_AMD__ -L${CRAY_MPICH_DIR}/lib ${PE_MPICH_GTL_DIR_amd_gfx90a} --amdgpu-target=gfx90a:xnack+ LIB_MPI = -lmpi ${PE_MPICH_GTL_LIBS_amd_gfx90a} # -fgpu-rdc # relocatable device code, needed for the velocity mesh @@ -59,11 +60,12 @@ testpackage: CXXFLAGS += -DPAPI_MEM #======== Libraries =========== -LIBRARY_PREFIX = /projappl/project_465000538/libraries +LIBRARY_PREFIX = /scratch/project_465000538/vlasiator/libraries # Compiled libraries -INC_BOOST = -isystem $(LIBRARY_PREFIX)/boost/include -LIB_BOOST = -L$(LIBRARY_PREFIX)/boost/lib -lboost_program_options -Wl,-rpath=$(LIBRARY_PREFIX)/boost/lib +#INC_BOOST = -isystem $(LIBRARY_PREFIX)/boost/include +#LIB_BOOST = -L$(LIBRARY_PREFIX)/boost/lib -lboost_program_options -Wl,-rpath=$(LIBRARY_PREFIX)/boost/lib +LIB_BOOST = -lboost_program_options INC_ZOLTAN = -isystem $(LIBRARY_PREFIX)/zoltan/include LIB_ZOLTAN = -L$(LIBRARY_PREFIX)/zoltan/lib -lzoltan -Wl,-rpath=$(LIBRARY_PREFIX)/zoltan/lib diff --git a/MAKE/Makefile.lumi_hipcc b/MAKE/Makefile.lumi_hipcc index 4d013c762..0784582dc 100644 --- a/MAKE/Makefile.lumi_hipcc +++ b/MAKE/Makefile.lumi_hipcc @@ -34,9 +34,9 @@ USE_HIP=1 CXXFLAGS += -g -O3 -x hip --amdgpu-target=gfx90a:xnack+ -std=c++17 -funroll-loops -fopenmp -fgpu-rdc -I. -Ihip -Iomp -D__HIP_PLATFORM_AMD__ -I${CRAY_MPICH_DIR}/include -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable -testpackage: CXXFLAGS = -g -O2 -fopenmp -x hip --amdgpu-target=gfx90a:xnack+ -I. -Ihip -Iomp -D__HIP_PLATFORM_AMD__ -I${CRAY_MPICH_DIR}/include -funroll-loops -std=c++17 -fgpu-sanitize +testpackage: CXXFLAGS = -g -O2 -x hip --amdgpu-target=gfx90a:xnack+ -std=c++17 -fopenmp -fgpu-rdc -I. -Ihip -Iomp -D__HIP_PLATFORM_AMD__ -I${CRAY_MPICH_DIR}/include -fgpu-sanitize -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable -LDFLAGS = -fopenmp --hip-link -lrt -lgomp -lpthread -fgpu-rdc -D__HIP_PLATFORM_AMD__ -L${CRAY_MPICH_DIR}/lib ${PE_MPICH_GTL_DIR_amd_gfx90a} --amdgpu-target=gfx90a:xnack+ +LDFLAGS = -fopenmp --hip-link -lrt -lpthread -fgpu-rdc -D__HIP_PLATFORM_AMD__ -L${CRAY_MPICH_DIR}/lib ${PE_MPICH_GTL_DIR_amd_gfx90a} --amdgpu-target=gfx90a:xnack+ LIB_MPI = -lmpi ${PE_MPICH_GTL_LIBS_amd_gfx90a} # -fgpu-rdc # relocatable device code, needed for the velocity mesh diff --git a/object_wrapper.cpp b/object_wrapper.cpp index 157011a56..cbc0c7743 100644 --- a/object_wrapper.cpp +++ b/object_wrapper.cpp @@ -154,6 +154,27 @@ bool ObjectWrapper::getPopulationParameters() { std::cerr << errormsg; } + /* Special handling of GPU version with WID=8; halve the number of blocks */ + #ifdef USE_GPU + int myRank; + MPI_Comm_rank(MPI_COMM_WORLD,&myRank); + if ((WID==8 && P::adaptGPUWID)) { + // First verify that we can halve the value2 + if ( (vMesh.gridLength[0]%2==0) && (vMesh.gridLength[1]%2==0) && (vMesh.gridLength[2]%2==0)) { + vMesh.gridLength[0] /= 2; + vMesh.gridLength[1] /= 2; + vMesh.gridLength[2] /= 2; + if(myRank==MASTER_RANK) { + std::cerr<<" Note: Using GPU mode with WID=8; Halving velocity block counts per dimension. Deactivate with parameter adaptGPUWID=false."< P::localCells; +bool P::adaptGPUWID = true; + vector P::systemWriteName; vector P::systemWritePath; vector P::systemWriteTimeInterval; @@ -446,7 +448,9 @@ bool P::addParameters() { RP::add("AMR.box_center_z", "z coordinate of the center of the box that is refined (for testing)", 0.0); RP::add("AMR.transShortPencils", "if true, use one-cell pencils", false); RP::addComposing("AMR.filterpasses", string("AMR filter passes for each individual refinement level")); - + + RP::add("adaptGPUWID", "if true, will halve velocity block counts if GPU is in use and WID==8", true); + RP::add("fieldtracing.fieldLineTracer", "Field line tracing method to use for coupling ionosphere and magnetosphere (options are: Euler, BS)", std::string("Euler")); RP::add("fieldtracing.tracer_max_allowed_error", "Maximum allowed error for the adaptive field line tracers ", 1000); RP::add("fieldtracing.tracer_max_attempts", "Maximum allowed attempts for the adaptive field line tracers", 100); @@ -685,6 +689,7 @@ void Parameters::getParameters() { RP::get("AMR.box_center_z", P::amrBoxCenterZ); RP::get("AMR.transShortPencils", P::amrTransShortPencils); RP::get("AMR.filterpasses", P::blurPassString); + RP::get("adaptGPUWID", P::adaptGPUWID); // If we are in an AMR run we need to set up the filtering scheme. if (P::amrMaxSpatialRefLevel>0){ diff --git a/parameters.h b/parameters.h index a296b24c8..bf2e530c1 100644 --- a/parameters.h +++ b/parameters.h @@ -72,6 +72,8 @@ struct Parameters { static bool meshRepartitioned; /*!< If true, mesh was repartitioned on this time step.*/ static std::vector localCells; /*!< Cached copy of spatial cell IDs on this process.*/ + static bool adaptGPUWID; /*!< If true, GPU runs with WID=8 use halved velocity block counts.*/ + static uint diagnosticInterval; static std::vector systemWriteName; /*!< Names for the different classes of grid output*/ static std::vector systemWritePath; /*!< Save this series in this location. Default is ./ */ diff --git a/testpackage/tests/Flowthrough_amr/Flowthrough_amr.cfg b/testpackage/tests/Flowthrough_amr/Flowthrough_amr.cfg index fa7cf0f4e..3a3fa751b 100644 --- a/testpackage/tests/Flowthrough_amr/Flowthrough_amr.cfg +++ b/testpackage/tests/Flowthrough_amr/Flowthrough_amr.cfg @@ -31,15 +31,15 @@ t_max = 182.0 dt = 2.0 [proton_vspace] -vx_min = -2e6 -vx_max = +2e6 -vy_min = -2e6 -vy_max = +2e6 -vz_min = -2e6 -vz_max = +2e6 -vx_length = 15 -vy_length = 15 -vz_length = 15 +vx_min = -1.92e6 +vx_max = +1.92e6 +vy_min = -1.92e6 +vy_max = +1.92e6 +vz_min = -1.92e6 +vz_max = +1.92e6 +vx_length = 16 +vy_length = 16 +vz_length = 16 [io] write_initial_state = 1 @@ -102,8 +102,8 @@ VX0 = 1e5 VY0 = 0 VZ0 = 0 -nSpaceSamples = 2 -nVelocitySamples = 2 +nSpaceSamples = 1 +nVelocitySamples = 1 [loadBalance] algorithm = RCB diff --git a/testpackage/tests/Flowthrough_trans_periodic/Flowthrough_trans_periodic.cfg b/testpackage/tests/Flowthrough_trans_periodic/Flowthrough_trans_periodic.cfg index 02a72a846..6742e540e 100644 --- a/testpackage/tests/Flowthrough_trans_periodic/Flowthrough_trans_periodic.cfg +++ b/testpackage/tests/Flowthrough_trans_periodic/Flowthrough_trans_periodic.cfg @@ -26,7 +26,7 @@ output = populations_vg_v output = vg_boundarytype output = vg_rank output = populations_vg_blocks -output = populations_vg_blocks +output = populations_vg_nonmaxwellianity diagnostic = populations_vg_blocks [gridbuilder] @@ -48,15 +48,15 @@ mass_units = PROTON charge = 1 [proton_vspace] -vx_min = -600000.0 -vx_max = +600000.0 -vy_min = -600000.0 -vy_max = +600000.0 -vz_min = -600000.0 -vz_max = +600000.0 -vx_length = 15 -vy_length = 15 -vz_length = 15 +vx_min = -640000.0 +vx_max = +640000.0 +vy_min = -640000.0 +vy_max = +640000.0 +vz_min = -640000.0 +vz_max = +640000.0 +vx_length = 32 +vy_length = 32 +vz_length = 32 [proton_sparse] minValue = 1.0e-15 @@ -80,8 +80,8 @@ rho = 1000000.0 VX0 = 4e5 VY0 = 4e5 VZ0 = 4e5 -nSpaceSamples = 2 -nVelocitySamples = 2 +nSpaceSamples = 1 +nVelocitySamples = 1 [bailout] velocity_space_wall_block_margin = 0 diff --git a/testpackage/tests/Flowthrough_x_inflow_y_outflow/Flowthrough_x_inflow_y_outflow.cfg b/testpackage/tests/Flowthrough_x_inflow_y_outflow/Flowthrough_x_inflow_y_outflow.cfg index b25de181f..5d0861f41 100644 --- a/testpackage/tests/Flowthrough_x_inflow_y_outflow/Flowthrough_x_inflow_y_outflow.cfg +++ b/testpackage/tests/Flowthrough_x_inflow_y_outflow/Flowthrough_x_inflow_y_outflow.cfg @@ -47,15 +47,15 @@ mass_units = PROTON charge = 1 [proton_vspace] -vx_min = -600000.0 -vx_max = +600000.0 -vy_min = -600000.0 -vy_max = +600000.0 -vz_min = -600000.0 -vz_max = +600000.0 -vx_length = 15 -vy_length = 15 -vz_length = 15 +vx_min = -640000.0 +vx_max = +640000.0 +vy_min = -640000.0 +vy_max = +640000.0 +vz_min = -640000.0 +vz_max = +640000.0 +vx_length = 32 +vy_length = 32 +vz_length = 32 [proton_sparse] minValue = 1.0e-15 @@ -101,8 +101,8 @@ rho = 1000000.0 VX0 = 4e5 VY0 = 0 VZ0 = 0 -nSpaceSamples = 2 -nVelocitySamples = 2 +nSpaceSamples = 1 +nVelocitySamples = 1 [bailout] velocity_space_wall_block_margin = 0 diff --git a/testpackage/tests/Flowthrough_x_inflow_y_outflow_acc/Flowthrough_x_inflow_y_outflow_acc.cfg b/testpackage/tests/Flowthrough_x_inflow_y_outflow_acc/Flowthrough_x_inflow_y_outflow_acc.cfg index c089ef51f..7eb5198a9 100644 --- a/testpackage/tests/Flowthrough_x_inflow_y_outflow_acc/Flowthrough_x_inflow_y_outflow_acc.cfg +++ b/testpackage/tests/Flowthrough_x_inflow_y_outflow_acc/Flowthrough_x_inflow_y_outflow_acc.cfg @@ -47,15 +47,15 @@ mass_units = PROTON charge = 1 [proton_vspace] -vx_min = -600000.0 -vx_max = +600000.0 -vy_min = -600000.0 -vy_max = +600000.0 -vz_min = -600000.0 -vz_max = +600000.0 -vx_length = 15 -vy_length = 15 -vz_length = 15 +vx_min = -640000.0 +vx_max = +640000.0 +vy_min = -640000.0 +vy_max = +640000.0 +vz_min = -640000.0 +vz_max = +640000.0 +vx_length = 32 +vy_length = 32 +vz_length = 32 [proton_sparse] minValue = 1.0e-15 @@ -101,8 +101,8 @@ rho = 1000000.0 VX0 = 4e5 VY0 = 0 VZ0 = 0 -nSpaceSamples = 2 -nVelocitySamples = 2 +nSpaceSamples = 1 +nVelocitySamples = 1 [bailout] velocity_space_wall_block_margin = 0 diff --git a/testpackage/tests/Ionosphere_small/Fluctuations.cfg b/testpackage/tests/Ionosphere_small/Fluctuations.cfg index d3092ff10..f89cb3162 100644 --- a/testpackage/tests/Ionosphere_small/Fluctuations.cfg +++ b/testpackage/tests/Ionosphere_small/Fluctuations.cfg @@ -47,15 +47,15 @@ z_max = 1.5e5 dt = 2.05e-05 [proton_vspace] -vx_min = -4.0e6 -vx_max = +4.0e6 -vy_min = -4.0e6 -vy_max = +4.0e6 -vz_min = -4.0e6 -vz_max = +4.0e6 -vx_length = 67 # 100 km/s resolution - more isn't needed for protons -vy_length = 67 -vz_length = 67 +vx_min = -3.96e6 +vx_max = +3.96e6 +vy_min = -3.96e6 +vy_max = +3.96e6 +vz_min = -3.96e6 +vz_max = +3.96e6 +vx_length = 66 # 30 km/s resolution +vy_length = 66 +vz_length = 66 [proton_sparse] minValue = 1.0e-16 #minValue = 0e-14 diff --git a/testpackage/tests/Ionosphere_small/Ionosphere_small.cfg b/testpackage/tests/Ionosphere_small/Ionosphere_small.cfg index 00d1c3316..029b1a877 100644 --- a/testpackage/tests/Ionosphere_small/Ionosphere_small.cfg +++ b/testpackage/tests/Ionosphere_small/Ionosphere_small.cfg @@ -37,15 +37,15 @@ max_spatial_level = 1 refine_radius = 2.2e8 #about 35 RE [proton_vspace] -vx_min = -4.02e6 -vx_max = +4.02e6 -vy_min = -4.02e6 -vy_max = +4.02e6 -vz_min = -4.02e6 -vz_max = +4.02e6 -vx_length = 67 -vy_length = 67 -vz_length = 67 +vx_min = -3.96e6 +vx_max = +3.96e6 +vy_min = -3.96e6 +vy_max = +3.96e6 +vz_min = -3.96e6 +vz_max = +3.96e6 +vx_length = 66 # 30 km/s resolution +vy_length = 66 +vz_length = 66 [proton_sparse] minValue = 1.0e-15 diff --git a/testpackage/tests/Magnetosphere_3D_small/Magnetosphere_3D_small.cfg b/testpackage/tests/Magnetosphere_3D_small/Magnetosphere_3D_small.cfg index 83b52c3bd..27c353aa2 100644 --- a/testpackage/tests/Magnetosphere_3D_small/Magnetosphere_3D_small.cfg +++ b/testpackage/tests/Magnetosphere_3D_small/Magnetosphere_3D_small.cfg @@ -37,15 +37,15 @@ max_spatial_level = 1 refine_radius = 2.2e8 #about 35 RE [proton_vspace] -vx_min = -4.02e6 -vx_max = +4.02e6 -vy_min = -4.02e6 -vy_max = +4.02e6 -vz_min = -4.02e6 -vz_max = +4.02e6 -vx_length = 67 -vy_length = 67 -vz_length = 67 +vx_min = -3.96e6 +vx_max = +3.96e6 +vy_min = -3.96e6 +vy_max = +3.96e6 +vz_min = -3.96e6 +vz_max = +3.96e6 +vx_length = 66 # 30 km/s resolution +vy_length = 66 +vz_length = 66 [proton_sparse] minValue = 1.0e-15 @@ -81,16 +81,16 @@ periodic_y = no periodic_z = no boundary = Outflow boundary = Maxwellian -boundary = Conductingsphere +boundary = Copysphere -[conductingsphere] +[copysphere] centerX = 0.0 centerY = 0.0 centerZ = 0.0 radius = 70e6 precedence = 2 -[proton_conductingsphere] +[proton_copysphere] rho = 1.0e6 T = 0.5e6 diff --git a/testpackage/tests/Magnetosphere_polar_small/Magnetosphere_polar_small.cfg b/testpackage/tests/Magnetosphere_polar_small/Magnetosphere_polar_small.cfg index a3a64c807..a1d7a9b3d 100644 --- a/testpackage/tests/Magnetosphere_polar_small/Magnetosphere_polar_small.cfg +++ b/testpackage/tests/Magnetosphere_polar_small/Magnetosphere_polar_small.cfg @@ -37,15 +37,15 @@ t_max = 20.05 [proton_vspace] -vx_min = -2.0e6 -vx_max = +2.0e6 -vy_min = -2.0e6 -vy_max = +2.0e6 -vz_min = -2.0e6 -vz_max = +2.0e6 -vx_length = 25 -vy_length = 25 -vz_length = 25 +vx_min = -2.08e6 +vx_max = +2.08e6 +vy_min = -2.08e6 +vy_max = +2.08e6 +vz_min = -2.08e6 +vz_max = +2.08e6 +vx_length = 26 # 40 km/s resolution +vy_length = 26 +vz_length = 26 [proton_sparse] minValue = 1.0e-15 @@ -87,9 +87,9 @@ periodic_y = yes periodic_z = no boundary = Outflow boundary = Maxwellian -boundary = Conductingsphere +boundary = Copysphere -[conductingsphere] +[copysphere] centerX = 0.0 centerY = 0.0 centerZ = 0.0 @@ -97,7 +97,7 @@ geometry = 2 radius = 50.0e6 precedence = 2 -[proton_conductingsphere] +[proton_copysphere] rho = 1.0e6 T = 0.5e6 VX0 = 0.0 diff --git a/testpackage/tests/Magnetosphere_small/Magnetosphere_small.cfg b/testpackage/tests/Magnetosphere_small/Magnetosphere_small.cfg index 35f3bc5ca..621e45826 100644 --- a/testpackage/tests/Magnetosphere_small/Magnetosphere_small.cfg +++ b/testpackage/tests/Magnetosphere_small/Magnetosphere_small.cfg @@ -33,15 +33,15 @@ t_max = 10.1 #timestep_max = 100 [proton_vspace] -vx_min = -2.0e6 -vx_max = +2.0e6 -vy_min = -2.0e6 -vy_max = +2.0e6 -vz_min = -2.0e6 -vz_max = +2.0e6 -vx_length = 25 -vy_length = 25 -vz_length = 25 +vx_min = -2.08e6 +vx_max = +2.08e6 +vy_min = -2.08e6 +vy_max = +2.08e6 +vz_min = -2.08e6 +vz_max = +2.08e6 +vx_length = 26 # 40 km/s resolution +vy_length = 26 +vz_length = 26 [proton_sparse] minValue = 1.0e-15 @@ -77,16 +77,16 @@ periodic_y = no periodic_z = yes boundary = Outflow boundary = Maxwellian -boundary = Conductingsphere +boundary = Copysphere -[conductingsphere] +[copysphere] centerX = 0.0 centerY = 0.0 centerZ = 0.0 radius = 38.2e6 precedence = 2 -[proton_conductingsphere] +[proton_copysphere] rho = 1.0e6 T=100000.0 diff --git a/testpackage/tests/Selfgen_Waves_Periodic/Selfgen_Waves_Periodic.cfg b/testpackage/tests/Selfgen_Waves_Periodic/Selfgen_Waves_Periodic.cfg index 3eac7f946..ee1a11d3d 100644 --- a/testpackage/tests/Selfgen_Waves_Periodic/Selfgen_Waves_Periodic.cfg +++ b/testpackage/tests/Selfgen_Waves_Periodic/Selfgen_Waves_Periodic.cfg @@ -77,7 +77,7 @@ magYPertAbsAmp = 0 magZPertAbsAmp = 0 densityModel = testcase -nVelocitySamples = 3 +nVelocitySamples = 1 [proton_MultiPeak] n = 2 diff --git a/testpackage/tests/acctest_1_maxw_500k_30kms_1deg/acctest_1_maxw_500k_30kms_1deg.cfg b/testpackage/tests/acctest_1_maxw_500k_30kms_1deg/acctest_1_maxw_500k_30kms_1deg.cfg index 4e73cbbdb..53610b1cc 100644 --- a/testpackage/tests/acctest_1_maxw_500k_30kms_1deg/acctest_1_maxw_500k_30kms_1deg.cfg +++ b/testpackage/tests/acctest_1_maxw_500k_30kms_1deg/acctest_1_maxw_500k_30kms_1deg.cfg @@ -113,7 +113,7 @@ lambda = 10 magXPertAbsAmp = 0 magYPertAbsAmp = 0 magZPertAbsAmp = 0 -nVelocitySamples = 4 +nVelocitySamples = 1 [proton_MultiPeak] n = 1 diff --git a/testpackage/tests/acctest_2_maxw_500k_100k_20kms_10deg/acctest_2_maxw_500k_100k_20kms_10deg.cfg b/testpackage/tests/acctest_2_maxw_500k_100k_20kms_10deg/acctest_2_maxw_500k_100k_20kms_10deg.cfg index 346331a9a..ee5eaa432 100644 --- a/testpackage/tests/acctest_2_maxw_500k_100k_20kms_10deg/acctest_2_maxw_500k_100k_20kms_10deg.cfg +++ b/testpackage/tests/acctest_2_maxw_500k_100k_20kms_10deg/acctest_2_maxw_500k_100k_20kms_10deg.cfg @@ -77,7 +77,7 @@ Bz = 1.1135233442526334e-10 magXPertAbsAmp = 0 magYPertAbsAmp = 0 magZPertAbsAmp = 0 -nVelocitySamples = 3 +nVelocitySamples = 1 [proton_MultiPeak] n = 2 diff --git a/testpackage/tests/acctest_3_substeps/acctest_3_substeps.cfg b/testpackage/tests/acctest_3_substeps/acctest_3_substeps.cfg index 4381c1f90..303640837 100644 --- a/testpackage/tests/acctest_3_substeps/acctest_3_substeps.cfg +++ b/testpackage/tests/acctest_3_substeps/acctest_3_substeps.cfg @@ -81,7 +81,7 @@ Bz = 1.1135233442526334e-10 magXPertAbsAmp = 0 magYPertAbsAmp = 0 magZPertAbsAmp = 0 -nVelocitySamples = 3 +nVelocitySamples = 1 [proton_MultiPeak] n = 2 diff --git a/testpackage/tests/acctest_4_helium/acctest_4_helium.cfg b/testpackage/tests/acctest_4_helium/acctest_4_helium.cfg index ff8b4d8c8..fc93ac49a 100644 --- a/testpackage/tests/acctest_4_helium/acctest_4_helium.cfg +++ b/testpackage/tests/acctest_4_helium/acctest_4_helium.cfg @@ -80,7 +80,7 @@ lambda = 10 magXPertAbsAmp = 0 magYPertAbsAmp = 0 magZPertAbsAmp = 0 -nVelocitySamples = 4 +nVelocitySamples = 1 [helium_MultiPeak] n = 1 diff --git a/testpackage/tests/acctest_5_proton_antiproton/acctest_5_proton_antiproton.cfg b/testpackage/tests/acctest_5_proton_antiproton/acctest_5_proton_antiproton.cfg index 8134fb6a3..f8c3d23e2 100644 --- a/testpackage/tests/acctest_5_proton_antiproton/acctest_5_proton_antiproton.cfg +++ b/testpackage/tests/acctest_5_proton_antiproton/acctest_5_proton_antiproton.cfg @@ -97,7 +97,7 @@ Bz = 1.1135233442526334e-10 magXPertAbsAmp = 0 magYPertAbsAmp = 0 magZPertAbsAmp = 0 -nVelocitySamples = 3 +nVelocitySamples = 1 [proton_MultiPeak] n = 2 diff --git a/testpackage/tests/restart_read/restart_read.cfg b/testpackage/tests/restart_read/restart_read.cfg index ff3295fed..225e22b76 100644 --- a/testpackage/tests/restart_read/restart_read.cfg +++ b/testpackage/tests/restart_read/restart_read.cfg @@ -50,15 +50,15 @@ mass_units = PROTON charge = 1 [proton_vspace] -vx_min = -600000.0 -vx_max = +600000.0 -vy_min = -600000.0 -vy_max = +600000.0 -vz_min = -600000.0 -vz_max = +600000.0 -vx_length = 15 -vy_length = 15 -vz_length = 15 +vx_min = -640000.0 +vx_max = +640000.0 +vy_min = -640000.0 +vy_max = +640000.0 +vz_min = -640000.0 +vz_max = +640000.0 +vx_length = 32 +vy_length = 32 +vz_length = 32 [proton_sparse] minValue = 1.0e-15 @@ -104,8 +104,8 @@ rho = 1000000.0 VX0 = 4e5 VY0 = 0 VZ0 = 0 -nSpaceSamples = 2 -nVelocitySamples = 2 +nSpaceSamples = 1 +nVelocitySamples = 1 [bailout] velocity_space_wall_block_margin = 0 diff --git a/testpackage/tests/restart_write/restart_write.cfg b/testpackage/tests/restart_write/restart_write.cfg index dbe9170eb..06105607e 100644 --- a/testpackage/tests/restart_write/restart_write.cfg +++ b/testpackage/tests/restart_write/restart_write.cfg @@ -53,15 +53,15 @@ mass_units = PROTON charge = 1 [proton_vspace] -vx_min = -600000.0 -vx_max = +600000.0 -vy_min = -600000.0 -vy_max = +600000.0 -vz_min = -600000.0 -vz_max = +600000.0 -vx_length = 15 -vy_length = 15 -vz_length = 15 +vx_min = -640000.0 +vx_max = +640000.0 +vy_min = -640000.0 +vy_max = +640000.0 +vz_min = -640000.0 +vz_max = +640000.0 +vx_length = 32 +vy_length = 32 +vz_length = 32 [proton_sparse] minValue = 1.0e-15 @@ -107,8 +107,8 @@ rho = 1000000.0 VX0 = 4e5 VY0 = 0 VZ0 = 0 -nSpaceSamples = 2 -nVelocitySamples = 2 +nSpaceSamples = 1 +nVelocitySamples = 1 [bailout] velocity_space_wall_block_margin = 0 diff --git a/testpackage/tests/test_fp_fsolver_only_3D/test_fp_fsolver_only_3D.cfg b/testpackage/tests/test_fp_fsolver_only_3D/test_fp_fsolver_only_3D.cfg index a9fbf8333..979936d54 100644 --- a/testpackage/tests/test_fp_fsolver_only_3D/test_fp_fsolver_only_3D.cfg +++ b/testpackage/tests/test_fp_fsolver_only_3D/test_fp_fsolver_only_3D.cfg @@ -45,9 +45,9 @@ vy_min = -2.0e4 vy_max = +2.0e4 vz_min = -2.0e4 vz_max = +2.0e4 -vx_length = 9 -vy_length = 9 -vz_length = 9 +vx_length = 10 +vy_length = 10 +vz_length = 10 [proton_sparse] minValue = 1.0e-15 diff --git a/testpackage/tests/test_fp_substeps/test_fp_substeps.cfg b/testpackage/tests/test_fp_substeps/test_fp_substeps.cfg index 72f8c2fae..10638e883 100644 --- a/testpackage/tests/test_fp_substeps/test_fp_substeps.cfg +++ b/testpackage/tests/test_fp_substeps/test_fp_substeps.cfg @@ -45,9 +45,9 @@ vy_min = -2.0e4 vy_max = +2.0e4 vz_min = -2.0e4 vz_max = +2.0e4 -vx_length = 9 -vy_length = 9 -vz_length = 9 +vx_length = 10 +vy_length = 10 +vz_length = 10 [proton_sparse] minValue = 1.0e-15 diff --git a/testpackage/tests/transtest_2_maxw_500k_100k_20kms_20x20/transtest_2_maxw_500k_100k_20kms_20x20.cfg b/testpackage/tests/transtest_2_maxw_500k_100k_20kms_20x20/transtest_2_maxw_500k_100k_20kms_20x20.cfg index 4c4be9db7..00773fc06 100644 --- a/testpackage/tests/transtest_2_maxw_500k_100k_20kms_20x20/transtest_2_maxw_500k_100k_20kms_20x20.cfg +++ b/testpackage/tests/transtest_2_maxw_500k_100k_20kms_20x20/transtest_2_maxw_500k_100k_20kms_20x20.cfg @@ -44,7 +44,7 @@ vz_max = +2.0e6 vx_length = 50 vy_length = 50 vz_length = 50 -max_refinement_level = 0 + [proton_sparse] minValue = 1.0e-16 @@ -77,7 +77,7 @@ magXPertAbsAmp = 0 magYPertAbsAmp = 0 magZPertAbsAmp = 0 -nVelocitySamples = 3 +nVelocitySamples = 1 [proton_MultiPeak] n = 1 diff --git a/testpackage/tests/transtest_amr/transtest_amr.cfg b/testpackage/tests/transtest_amr/transtest_amr.cfg deleted file mode 100644 index bb065566a..000000000 --- a/testpackage/tests/transtest_amr/transtest_amr.cfg +++ /dev/null @@ -1,105 +0,0 @@ -dynamic_timestep = 1 -project = testAmr -ParticlePopulations = proton -propagate_field = 0 -propagate_vlasov_acceleration = 0 -propagate_vlasov_translation = 1 - -[proton_properties] -mass = 1 -mass_units = PROTON -charge = 1 - -[io] -diagnostic_write_interval = 1 -write_initial_state = 1 - -system_write_t_interval = 180.0 -system_write_file_name = fullf -system_write_distribution_stride = 1 -system_write_distribution_xline_stride = 0 -system_write_distribution_yline_stride = 0 -system_write_distribution_zline_stride = 0 - -[AMR] -max_spatial_level = 2 -box_half_width_x = 1 -box_half_width_y = 1 -box_half_width_z = 1 -box_center_x = 1.0e6 -box_center_y = 1.0e6 -box_center_z = 1.0e6 - -[gridbuilder] -x_length = 8 -y_length = 8 -z_length = 8 -x_min = -1.0e6 -x_max = 1.0e6 -y_min = -1.0e6 -y_max = 1.0e6 -z_min = -1.0e6 -z_max = 1.0e6 -t_max = 181.0 - -[proton_vspace] -vx_min = -2.0e6 -vx_max = +2.0e6 -vy_min = -2.0e6 -vy_max = +2.0e6 -vz_min = -2.0e6 -vz_max = +2.0e6 -vx_length = 1 -vy_length = 1 -vz_length = 1 -max_refinement_level = 1 -[proton_sparse] -minValue = 1.0e-16 - -[boundaries] -periodic_x = yes -periodic_y = yes -periodic_z = yes - -[variables] -output = populations_vg_rho -output = fg_b -output = vg_pressure -output = populations_vg_v -output = fg_e -output = vg_rank -output = populations_vg_blocks -#output = populations_vg_acceleration_subcycles - -diagnostic = populations_vg_blocks -#diagnostic = vg_pressure -#diagnostic = populations_vg_rho -#diagnostic = populations_vg_rho_loss_adjust - -[testAmr] -#magnitude of 1.82206867e-10 gives a period of 360s, useful for testing... -Bx = 1.2e-10 -By = 0.8e-10 -Bz = 1.1135233442526334e-10 -magXPertAbsAmp = 0 -magYPertAbsAmp = 0 -magZPertAbsAmp = 0 -densityModel = uniform -nVelocitySamples = 3 - -[proton_testAmr] -n = 1 -Vx = 5e5 -Vy = 5e5 -Vz = 0.0 -Tx = 500000.0 -Ty = 500000.0 -Tz = 500000.0 -rho = 1.0e6 -rhoPertAbsAmp = 1.0e5 - -[loadBalance] -algorithm = RCB - -[bailout] -velocity_space_wall_block_margin = 0 From 10f2c7724e4d85e6d11e78bab3b03bd1e2d52468 Mon Sep 17 00:00:00 2001 From: Markus Battarbee Date: Thu, 31 Aug 2023 17:05:42 +0300 Subject: [PATCH 3/3] Fixed compilation of vlsvdiff on GPU systems --- Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index cd7cbc8bc..983510c4c 100644 --- a/Makefile +++ b/Makefile @@ -335,6 +335,7 @@ DEPS_PARTICLES = particles/particles.h particles/particles.cpp particles/field.h readparameters.h version.h particles/scenario.h particles/histogram.h OBJS_PARTICLES = particles/physconst.o particles/particles.o particles/readfields.o particles/particleparameters.o particles/distribution.o readparameters.o version.o particles/scenario.o particles/histogram.o +# todo: verify compilation and working of tools other than vlsvdiff vlsvextract: ${DEPS_VLSVREADER} ${DEPS_VLSVREADERINTERFACE} tools/vlsvextract.h tools/vlsvextract.cpp ${OBJS_VLSVREADER} ${OBJS_VLSVREADERINTERFACE} ${CMP} ${CXXFLAGS} ${FLAGS} -c tools/vlsvextract.cpp ${INC_BOOST} ${INC_DCCRG} ${INC_EIGEN} ${INC_VLSV} -I$(CURDIR) ${LNK} -o vlsvextract_${FP_PRECISION} vlsvextract.o ${OBJS_VLSVREADERINTERFACE} ${LIB_BOOST} ${LIB_DCCRG} ${LIB_VLSV} ${LDFLAGS} @@ -343,9 +344,10 @@ vlsv2silo: ${DEPS_VLSVREADERINTERFACE} tools/vlsv2silo.cpp ${OBJS_VLSVREADERIN ${CMP} ${CXXFLAGS} ${FLAGS} -c tools/vlsv2silo.cpp ${INC_SILO} ${INC_VLSV} -I$(CURDIR) ${LNK} -o vlsv2silo_${FP_PRECISION} vlsv2silo.o ${OBJS_VLSVREADERINTERFACE} ${LIB_SILO} ${LIB_VLSV} ${LDFLAGS} -vlsvdiff: ${DEPS_VLSVREADERINTERFACE} tools/vlsvdiff.cpp ${OBJS_VLSVREADEREXTRA} ${OBJS_VLSVREADERINTERFACE} - ${CMP} ${CXXEXTRAFLAGS} ${FLAGS} -c tools/vlsvdiff.cpp ${INC_VLSV} -I$(CURDIR) - ${LNK} -o vlsvdiff_${FP_PRECISION} vlsvdiff.o ${OBJS_VLSVREADERINTERFACE} ${LIB_VLSV} ${LDFLAGS} +vlsvdiff: tools/vlsvdiff.cpp + @echo [CC] $< + $(SILENT)$(CMP) $(CXXEXTRAFLAGS) ${MATHFLAGS} ${FLAGS} -c tools/vlsvdiff.cpp ${INC_DCCRG} ${INC_VLSV} ${INC_FSGRID} + $(SILENT)${LNK} ${LDFLAGS} -o vlsvdiff_${FP_PRECISION} vlsvdiff.o ${OBJS_VLSVREADERINTERFACE} ${LIB_VLSV} ${LIBS} vlsvreaderinterface.o: tools/vlsvreaderinterface.h tools/vlsvreaderinterface.cpp ${CMP} ${CXXFLAGS} ${FLAGS} -c tools/vlsvreaderinterface.cpp ${INC_VLSV} -I$(CURDIR)