Skip to content

Commit

Permalink
Renamed NoWait to Async
Browse files Browse the repository at this point in the history
  • Loading branch information
kubagalecki committed Oct 21, 2023
1 parent ef50874 commit 0890776
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
4 changes: 2 additions & 2 deletions src/CartLatticeLauncher.hpp.Rt
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,13 @@ public:
template < eOperationType I, eCalculateGlobals G, eStage S >
void CartLatticeLauncher::RunInterior(CudaStream_t stream) const {
const CartInteriorExecutor< I, G, S > executor(container, data);
LaunchExecutorNoWait(executor, stream);
LaunchExecutorAsync(executor, stream);
}

template < eOperationType I, eCalculateGlobals G, eStage S >
void CartLatticeLauncher::RunBorder(CudaStream_t stream) const {
const CartBorderExecutor< I, G, S > executor(container, data);
LaunchExecutorNoWait(executor, stream);
LaunchExecutorAsync(executor, stream);
}


Expand Down
4 changes: 2 additions & 2 deletions src/GetThreads.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ void LaunchExecutor(const EX& executor) {
}

template<class EX>
void LaunchExecutorNoWait(const EX& executor, CudaStream_t stream) {
void LaunchExecutorAsync(const EX& executor, CudaStream_t stream) {
const auto exec_params = ComputeLaunchParams(executor);
CudaKernelRunNoWait(Kernel< EX >, exec_params.blx, exec_params.thr, stream, executor);
CudaKernelRunAsync(Kernel< EX >, exec_params.blx, exec_params.thr, stream, executor);
}
10 changes: 5 additions & 5 deletions src/cross.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,16 @@
#ifndef CROSS_HIP
#define CudaKernelRun(a__,b__,c__,...) a__<<<b__,c__>>>(__VA_ARGS__); HANDLE_ERROR( cudaDeviceSynchronize()); HANDLE_ERROR( cudaGetLastError() )
#ifdef CROSS_SYNC
#define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<<b__,c__>>>(__VA_ARGS__); HANDLE_ERROR( cudaDeviceSynchronize()); HANDLE_ERROR( cudaGetLastError() );
#define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<<b__,c__>>>(__VA_ARGS__); HANDLE_ERROR( cudaDeviceSynchronize()); HANDLE_ERROR( cudaGetLastError() );
#else
#define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<<b__,c__,0,e__>>>(__VA_ARGS__);
#define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<<b__,c__,0,e__>>>(__VA_ARGS__);
#endif
#else
#define CudaKernelRun(a__,b__,c__,...) a__<<<b__,c__>>>(__VA_ARGS__); HANDLE_ERROR( hipDeviceSynchronize()); HANDLE_ERROR( hipGetLastError() )
#ifdef CROSS_SYNC
#define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<<b__,c__>>>(__VA_ARGS__); HANDLE_ERROR( hipDeviceSynchronize()); HANDLE_ERROR( hipGetLastError() );
#define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<<b__,c__>>>(__VA_ARGS__); HANDLE_ERROR( hipDeviceSynchronize()); HANDLE_ERROR( hipGetLastError() );
#else
#define CudaKernelRunNoWait(a__,b__,c__,e__,...) a__<<<b__,c__,0,e__>>>(__VA_ARGS__);
#define CudaKernelRunAsync(a__,b__,c__,e__,...) a__<<<b__,c__,0,e__>>>(__VA_ARGS__);
#endif
#endif
#define CudaBlock blockIdx
Expand Down Expand Up @@ -302,7 +302,7 @@
}

template <typename F, typename ...P>
inline void CudaKernelRunNoWait(F &&func, const dim3& blocks, const dim3& threads, CudaStream_t stream, P &&... args) {
inline void CudaKernelRunAsync(F &&func, const dim3& blocks, const dim3& threads, CudaStream_t stream, P &&... args) {
CPUKernelRun(func, blocks, std::forward<P>(args)...);
}

Expand Down

0 comments on commit 0890776

Please sign in to comment.