diff --git a/src/CartLatticeLauncher.hpp.Rt b/src/CartLatticeLauncher.hpp.Rt index a8fa47f8b..0fbad39ff 100644 --- a/src/CartLatticeLauncher.hpp.Rt +++ b/src/CartLatticeLauncher.hpp.Rt @@ -153,13 +153,13 @@ public: template < eOperationType I, eCalculateGlobals G, eStage S > void CartLatticeLauncher::RunInterior(CudaStream_t stream) const { const CartInteriorExecutor< I, G, S > executor(container, data); - LaunchExecutorNoWait(executor, stream); + LaunchExecutorAsync(executor, stream); } template < eOperationType I, eCalculateGlobals G, eStage S > void CartLatticeLauncher::RunBorder(CudaStream_t stream) const { const CartBorderExecutor< I, G, S > executor(container, data); - LaunchExecutorNoWait(executor, stream); + LaunchExecutorAsync(executor, stream); } diff --git a/src/GetThreads.h b/src/GetThreads.h index 79524f365..0e2f7e8fc 100644 --- a/src/GetThreads.h +++ b/src/GetThreads.h @@ -98,7 +98,7 @@ void LaunchExecutor(const EX& executor) { } template -void LaunchExecutorNoWait(const EX& executor, CudaStream_t stream) { +void LaunchExecutorAsync(const EX& executor, CudaStream_t stream) { const auto exec_params = ComputeLaunchParams(executor); - CudaKernelRunNoWait(Kernel< EX >, exec_params.blx, exec_params.thr, stream, executor); + CudaKernelRunAsync(Kernel< EX >, exec_params.blx, exec_params.thr, stream, executor); } diff --git a/src/cross.h b/src/cross.h index 77c0b1855..5d5b424e7 100644 --- a/src/cross.h +++ b/src/cross.h @@ -56,16 +56,16 @@ #ifndef CROSS_HIP #define CudaKernelRun(a__,b__,c__,...) a__<<>>(__VA_ARGS__); HANDLE_ERROR( cudaDeviceSynchronize()); HANDLE_ERROR( cudaGetLastError() ) #ifdef CROSS_SYNC - #define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); HANDLE_ERROR( cudaDeviceSynchronize()); HANDLE_ERROR( cudaGetLastError() ); + #define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); HANDLE_ERROR( cudaDeviceSynchronize()); HANDLE_ERROR( cudaGetLastError() ); #else - #define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); + #define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); #endif #else #define CudaKernelRun(a__,b__,c__,...) a__<<>>(__VA_ARGS__); HANDLE_ERROR( hipDeviceSynchronize()); HANDLE_ERROR( hipGetLastError() ) #ifdef CROSS_SYNC - #define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); HANDLE_ERROR( hipDeviceSynchronize()); HANDLE_ERROR( hipGetLastError() ); + #define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); HANDLE_ERROR( hipDeviceSynchronize()); HANDLE_ERROR( hipGetLastError() ); #else - #define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); + #define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<>>(__VA_ARGS__); #endif #endif #define CudaBlock blockIdx @@ -302,7 +302,7 @@ } template - inline void CudaKernelRunNoWait(F &&func, const dim3& blocks, const dim3& threads, CudaStream_t stream, P &&... args) { + inline void CudaKernelRunAsync(F &&func, const dim3& blocks, const dim3& threads, CudaStream_t stream, P &&... args) { CPUKernelRun(func, blocks, std::forward

(args)...); }