diff --git a/libcudacxx/codegen/CMakeLists.txt b/libcudacxx/codegen/CMakeLists.txt index b0df1b5a98..3477f988af 100644 --- a/libcudacxx/codegen/CMakeLists.txt +++ b/libcudacxx/codegen/CMakeLists.txt @@ -19,8 +19,8 @@ target_compile_features( add_dependencies(libcudacxx.atomics.codegen codegen) -set(atomic_generated_output "${libcudacxx_BINARY_DIR}/codegen/atomic_cuda_generated.h") -set(atomic_install_location "${libcudacxx_SOURCE_DIR}/include/cuda/std/detail/libcxx/include/support/atomic") +set(atomic_generated_output "${libcudacxx_BINARY_DIR}/codegen/cuda_ptx_generated.h") +set(atomic_install_location "${libcudacxx_SOURCE_DIR}/include/cuda/std/__atomic/functions") add_custom_target( libcudacxx.atomics.codegen.execute @@ -32,13 +32,13 @@ add_dependencies(libcudacxx.atomics.codegen libcudacxx.atomics.codegen.execute) add_custom_target( libcudacxx.atomics.codegen.install - COMMAND ${CMAKE_COMMAND} -E copy "${atomic_generated_output}" "${atomic_install_location}/atomic_cuda_generated.h" - BYPRODUCTS "${atomic_install_location}/atomic_cuda_generated.h" + COMMAND ${CMAKE_COMMAND} -E copy "${atomic_generated_output}" "${atomic_install_location}/cuda_ptx_generated.h" + BYPRODUCTS "${atomic_install_location}/cuda_ptx_generated.h" ) add_dependencies(libcudacxx.atomics.codegen.install libcudacxx.atomics.codegen.execute) add_test( NAME libcudacxx.atomics.codegen.diff - COMMAND ${CMAKE_COMMAND} -E compare_files "${atomic_install_location}/atomic_cuda_generated.h" "${atomic_generated_output}" + COMMAND ${CMAKE_COMMAND} -E compare_files "${atomic_install_location}/cuda_ptx_generated.h" "${atomic_generated_output}" ) diff --git a/libcudacxx/codegen/codegen.cpp b/libcudacxx/codegen/codegen.cpp index 77d96a92d9..b7111c44d7 100644 --- a/libcudacxx/codegen/codegen.cpp +++ b/libcudacxx/codegen/codegen.cpp @@ -66,7 +66,7 @@ int main() std::vector cv_qualifier{"volatile ", ""}; - std::ofstream out("atomic_cuda_generated.h"); + std::ofstream out("cuda_ptx_generated.h"); out << R"XXX(//===----------------------------------------------------------------------===// // @@ -78,8 +78,36 @@ int main() // //===----------------------------------------------------------------------===// -// This is a autogenerated file, we want to ensure that it contains exactly the contentes we want to generate +// This is an autogenerated file, we want to ensure that it contains exactly the contents we want to generate // clang-format off + +#ifndef _LIBCUDACXX___ATOMIC_FUNCTIONS_CUDA_PTX_GENERATED_H +#define _LIBCUDACXX___ATOMIC_FUNCTIONS_CUDA_PTX_GENERATED_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +#include +#include +#include + +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +#if defined(_CCCL_CUDA_COMPILER) + )XXX"; auto scopenametag = [&](auto scope) { @@ -302,11 +330,11 @@ int main() { out << "template = 0>\n"; out << "_CCCL_DEVICE bool __atomic_compare_exchange_cuda(" << cv - << "_Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int " + << "_Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int " "__failure_memorder, " << scopenametag(s.first) << ") {\n"; out << " uint" << sz << "_t __tmp = 0, __old = 0, __old_tmp;\n"; - out << " memcpy(&__tmp, __desired, " << sz / 8 << ");\n"; + out << " memcpy(&__tmp, &__desired, " << sz / 8 << ");\n"; out << " memcpy(&__old, __expected, " << sz / 8 << ");\n"; out << " __old_tmp = __old;\n"; out << " NV_DISPATCH_TARGET(\n"; @@ -503,6 +531,9 @@ int main() } } + out << "\n#endif // defined(_CCCL_CUDA_COMPILER)\n"; + out << "\n_LIBCUDACXX_END_NAMESPACE_STD\n"; + out << "\n#endif // _LIBCUDACXX___ATOMIC_FUNCTIONS_CUDA_PTX_GENERATED_H\n"; out << "\n// clang-format on\n"; return 0; diff --git a/libcudacxx/examples/rtc_example.cpp b/libcudacxx/examples/rtc_example.cpp index 08ce22adf2..513e580584 100644 --- a/libcudacxx/examples/rtc_example.cpp +++ b/libcudacxx/examples/rtc_example.cpp @@ -50,11 +50,11 @@ template static constexpr T min(T a, T b) { return a < b ? a : b; } struct trie { struct ref { - cuda::std::atomic ptr = ATOMIC_VAR_INIT(nullptr); + cuda::std::atomic ptr = LIBCUDACXX_ATOMIC_VAR_INIT(nullptr); // the flag will protect against multiple pointer updates - cuda::std::atomic_flag flag = ATOMIC_FLAG_INIT; + cuda::std::atomic_flag flag = LIBCUDACXX_ATOMIC_FLAG_INIT; } next[26]; - cuda::std::atomic count = ATOMIC_VAR_INIT(0); + cuda::std::atomic count = LIBCUDACXX_ATOMIC_VAR_INIT(0); }; __host__ __device__ int index_of(char c) { diff --git a/libcudacxx/examples/trie.cu b/libcudacxx/examples/trie.cu index b4b7a7a5f1..3a16fdceeb 100644 --- a/libcudacxx/examples/trie.cu +++ b/libcudacxx/examples/trie.cu @@ -36,11 +36,11 @@ struct trie { struct ref { - cuda::atomic ptr = ATOMIC_VAR_INIT(nullptr); + cuda::atomic ptr = LIBCUDACXX_ATOMIC_VAR_INIT(nullptr); // the flag will protect against multiple pointer updates - cuda::std::atomic_flag flag = ATOMIC_FLAG_INIT; + cuda::std::atomic_flag flag = LIBCUDACXX_ATOMIC_FLAG_INIT; } next[26]; - cuda::std::atomic count = ATOMIC_VAR_INIT(0); + cuda::std::atomic count = LIBCUDACXX_ATOMIC_VAR_INIT(0); }; __host__ __device__ int index_of(char c) { diff --git a/libcudacxx/examples/trie_mt.cpp b/libcudacxx/examples/trie_mt.cpp index 22fdb68499..2e2a46df29 100644 --- a/libcudacxx/examples/trie_mt.cpp +++ b/libcudacxx/examples/trie_mt.cpp @@ -36,11 +36,11 @@ struct trie { struct ref { - std::atomic ptr = ATOMIC_VAR_INIT(nullptr); + std::atomic ptr = LIBCUDACXX_ATOMIC_VAR_INIT(nullptr); // the flag will protect against multiple pointer updates - std::atomic_flag flag = ATOMIC_VAR_INIT(0); + std::atomic_flag flag = LIBCUDACXX_ATOMIC_VAR_INIT(0); } next[26]; - std::atomic count = ATOMIC_VAR_INIT(0); + std::atomic count = LIBCUDACXX_ATOMIC_VAR_INIT(0); }; int index_of(char c) { diff --git a/libcudacxx/include/cuda/atomic b/libcudacxx/include/cuda/atomic index 3c9e76cb1d..06dd1c785c 100644 --- a/libcudacxx/include/cuda/atomic +++ b/libcudacxx/include/cuda/atomic @@ -11,6 +11,14 @@ #ifndef _CUDA_ATOMIC #define _CUDA_ATOMIC -#include +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header #endif // _CUDA_ATOMIC diff --git a/libcudacxx/include/cuda/std/__atomic/api/common.h b/libcudacxx/include/cuda/std/__atomic/api/common.h new file mode 100644 index 0000000000..e3f8c7c3e6 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/api/common.h @@ -0,0 +1,192 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_API_COMMON_H +#define __LIBCUDACXX___ATOMIC_API_COMMON_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +// API definitions for the base atomic implementation +#define _LIBCUDACXX_ATOMIC_COMMON_IMPL(_CONST, _VOLATILE) \ + _CCCL_HOST_DEVICE inline bool is_lock_free() const _VOLATILE noexcept \ + { \ + return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp)); \ + } \ + _CCCL_HOST_DEVICE inline void store(_Tp __d, memory_order __m = memory_order_seq_cst) \ + _CONST _VOLATILE noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) \ + { \ + __atomic_store_dispatch(&__a, __d, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp load(memory_order __m = memory_order_seq_cst) \ + const _VOLATILE noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) \ + { \ + return __atomic_load_dispatch(&__a, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline operator _Tp() const _VOLATILE noexcept \ + { \ + return load(); \ + } \ + _CCCL_HOST_DEVICE inline _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) _CONST _VOLATILE noexcept \ + { \ + return __atomic_exchange_dispatch(&__a, __d, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline bool compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) \ + _CONST _VOLATILE noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) \ + { \ + return __atomic_compare_exchange_weak_dispatch(&__a, &__e, __d, __s, __f, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline bool compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) \ + _CONST _VOLATILE noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) \ + { \ + return __atomic_compare_exchange_strong_dispatch(&__a, &__e, __d, __s, __f, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline bool compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) \ + _CONST _VOLATILE noexcept \ + { \ + if (memory_order_acq_rel == __m) \ + return __atomic_compare_exchange_weak_dispatch(&__a, &__e, __d, __m, memory_order_acquire, _Sco{}); \ + else if (memory_order_release == __m) \ + return __atomic_compare_exchange_weak_dispatch(&__a, &__e, __d, __m, memory_order_relaxed, _Sco{}); \ + else \ + return __atomic_compare_exchange_weak_dispatch(&__a, &__e, __d, __m, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline bool compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) \ + _CONST _VOLATILE noexcept \ + { \ + if (memory_order_acq_rel == __m) \ + return __atomic_compare_exchange_strong_dispatch(&__a, &__e, __d, __m, memory_order_acquire, _Sco{}); \ + else if (memory_order_release == __m) \ + return __atomic_compare_exchange_strong_dispatch(&__a, &__e, __d, __m, memory_order_relaxed, _Sco{}); \ + else \ + return __atomic_compare_exchange_strong_dispatch(&__a, &__e, __d, __m, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _VOLATILE noexcept \ + { \ + __atomic_wait(&__a, __v, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline void notify_one() _CONST _VOLATILE noexcept \ + { \ + __atomic_notify_one(&__a, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline void notify_all() _CONST _VOLATILE noexcept \ + { \ + __atomic_notify_all(&__a, _Sco{}); \ + } + +// API definitions for arithmetic atomics +#define _LIBCUDACXX_ATOMIC_ARITHMETIC_IMPL(_CONST, _VOLATILE) \ + _CCCL_HOST_DEVICE inline _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) _CONST _VOLATILE noexcept \ + { \ + return __atomic_fetch_add_dispatch(&__a, __op, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) _CONST _VOLATILE noexcept \ + { \ + return __atomic_fetch_sub_dispatch(&__a, __op, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator++(int) _CONST _VOLATILE noexcept \ + { \ + return fetch_add(_Tp(1)); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator--(int) _CONST _VOLATILE noexcept \ + { \ + return fetch_sub(_Tp(1)); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator++() _CONST _VOLATILE noexcept \ + { \ + return fetch_add(_Tp(1)) + _Tp(1); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator--() _CONST _VOLATILE noexcept \ + { \ + return fetch_sub(_Tp(1)) - _Tp(1); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator+=(_Tp __op) _CONST _VOLATILE noexcept \ + { \ + return fetch_add(__op) + __op; \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator-=(_Tp __op) _CONST _VOLATILE noexcept \ + { \ + return fetch_sub(__op) - __op; \ + } + +// API definitions for bitwise atomics +#define _LIBCUDACXX_ATOMIC_BITWISE_IMPL(_CONST, _VOLATILE) \ + _CCCL_HOST_DEVICE inline _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) _CONST _VOLATILE noexcept \ + { \ + return __atomic_fetch_and_dispatch(&__a, __op, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) _CONST _VOLATILE noexcept \ + { \ + return __atomic_fetch_or_dispatch(&__a, __op, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) _CONST _VOLATILE noexcept \ + { \ + return __atomic_fetch_xor_dispatch(&__a, __op, __m, _Sco{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator&=(_Tp __op) _CONST _VOLATILE noexcept \ + { \ + return fetch_and(__op) & __op; \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator|=(_Tp __op) _CONST _VOLATILE noexcept \ + { \ + return fetch_or(__op) | __op; \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator^=(_Tp __op) _CONST _VOLATILE noexcept \ + { \ + return fetch_xor(__op) ^ __op; \ + } + +// API definitions for atomics with pointers +#define _LIBCUDACXX_ATOMIC_POINTER_IMPL(_CONST, _VOLATILE) \ + _CCCL_HOST_DEVICE inline _Tp fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) \ + _CONST _VOLATILE noexcept \ + { \ + return __atomic_fetch_add_dispatch(&__a, __op, __m, __thread_scope_system_tag{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) \ + _CONST _VOLATILE noexcept \ + { \ + return __atomic_fetch_sub_dispatch(&__a, __op, __m, __thread_scope_system_tag{}); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator++(int) _CONST _VOLATILE noexcept \ + { \ + return fetch_add(1); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator--(int) _CONST _VOLATILE noexcept \ + { \ + return fetch_sub(1); \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator++() _CONST _VOLATILE noexcept \ + { \ + return fetch_add(1) + 1; \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator--() _CONST _VOLATILE noexcept \ + { \ + return fetch_sub(1) - 1; \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator+=(ptrdiff_t __op) _CONST _VOLATILE noexcept \ + { \ + return fetch_add(__op) + __op; \ + } \ + _CCCL_HOST_DEVICE inline _Tp operator-=(ptrdiff_t __op) _CONST _VOLATILE noexcept \ + { \ + return fetch_sub(__op) - __op; \ + } + +#endif // __LIBCUDACXX___ATOMIC_API_COMMON_H diff --git a/libcudacxx/include/cuda/std/__atomic/api/owned.h b/libcudacxx/include/cuda/std/__atomic/api/owned.h new file mode 100644 index 0000000000..fdbc8baac2 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/api/owned.h @@ -0,0 +1,134 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_API_OWNED_H +#define __LIBCUDACXX___ATOMIC_API_OWNED_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +template +struct __atomic_common +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_common(_Tp __v) + : __a(__v) + {} + + constexpr inline __atomic_common() = default; + + __atomic_storage_t<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, ) + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, volatile) +}; + +template +struct __atomic_arithmetic +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_arithmetic(_Tp __v) + : __a(__v) + {} + + constexpr inline __atomic_arithmetic() = default; + + __atomic_storage_t<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, ) + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, volatile) + + _LIBCUDACXX_ATOMIC_ARITHMETIC_IMPL(, ) + _LIBCUDACXX_ATOMIC_ARITHMETIC_IMPL(, volatile) +}; + +template +struct __atomic_bitwise +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_bitwise(_Tp __v) + : __a(__v) + {} + + constexpr inline __atomic_bitwise() = default; + + __atomic_storage_t<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, ) + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, volatile) + + _LIBCUDACXX_ATOMIC_ARITHMETIC_IMPL(, ) + _LIBCUDACXX_ATOMIC_ARITHMETIC_IMPL(, volatile) + + _LIBCUDACXX_ATOMIC_BITWISE_IMPL(, ) + _LIBCUDACXX_ATOMIC_BITWISE_IMPL(, volatile) +}; + +template +struct __atomic_pointer +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_pointer(_Tp __v) + : __a(__v) + {} + + constexpr inline __atomic_pointer() = default; + + __atomic_storage_t<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, ) + _LIBCUDACXX_ATOMIC_COMMON_IMPL(, volatile) + + _LIBCUDACXX_ATOMIC_POINTER_IMPL(, ) + _LIBCUDACXX_ATOMIC_POINTER_IMPL(, volatile) +}; + +template +using __atomic_impl = + _If::value, + __atomic_pointer<_Tp, __scope_to_tag<_Sco>>, + _If::value, + __atomic_arithmetic<_Tp, __scope_to_tag<_Sco>>, + _If::value, + __atomic_bitwise<_Tp, __scope_to_tag<_Sco>>, + __atomic_common<_Tp, __scope_to_tag<_Sco>>>>>; + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // __LIBCUDACXX___ATOMIC_API_OWNED_H diff --git a/libcudacxx/include/cuda/std/__atomic/api/reference.h b/libcudacxx/include/cuda/std/__atomic/api/reference.h new file mode 100644 index 0000000000..eeba3a6746 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/api/reference.h @@ -0,0 +1,114 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_API_REFERENCE_H +#define __LIBCUDACXX___ATOMIC_API_REFERENCE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +template +struct __atomic_ref_common +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_ref_common(_Tp& __v) + : __a(&__v) + {} + + __atomic_ref_storage<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(const, ) +}; + +template +struct __atomic_ref_arithmetic +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_ref_arithmetic(_Tp& __v) + : __a(&__v) + {} + + __atomic_ref_storage<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(const, ) + _LIBCUDACXX_ATOMIC_ARITHMETIC_IMPL(const, ) +}; + +template +struct __atomic_ref_bitwise +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_ref_bitwise(_Tp& __v) + : __a(&__v) + {} + + __atomic_ref_storage<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(const, ) + _LIBCUDACXX_ATOMIC_ARITHMETIC_IMPL(const, ) + _LIBCUDACXX_ATOMIC_BITWISE_IMPL(const, ) +}; + +template +struct __atomic_ref_pointer +{ + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_ref_pointer(_Tp& __v) + : __a(&__v) + {} + + __atomic_ref_storage<_Tp> __a; + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + + _LIBCUDACXX_ATOMIC_COMMON_IMPL(const, ) + _LIBCUDACXX_ATOMIC_POINTER_IMPL(const, ) +}; + +template +using __atomic_ref_impl = + _If::value, + __atomic_ref_pointer<_Tp, __scope_to_tag<_Sco>>, + _If::value, + __atomic_ref_arithmetic<_Tp, __scope_to_tag<_Sco>>, + _If::value, + __atomic_ref_bitwise<_Tp, __scope_to_tag<_Sco>>, + __atomic_ref_common<_Tp, __scope_to_tag<_Sco>>>>>; + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // __LIBCUDACXX___ATOMIC_API_REFERENCE_H diff --git a/libcudacxx/include/cuda/std/__atomic/functions.h b/libcudacxx/include/cuda/std/__atomic/functions.h new file mode 100644 index 0000000000..76cea325ce --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/functions.h @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_FUNCTIONS_H +#define __LIBCUDACXX___ATOMIC_FUNCTIONS_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include + +// Device atomics +#include +#include + +// Host atomics +#include + +#endif // __LIBCUDACXX___ATOMIC_FUNCTIONS_H diff --git a/libcudacxx/include/cuda/std/__atomic/functions/cuda_ptx_derived.h b/libcudacxx/include/cuda/std/__atomic/functions/cuda_ptx_derived.h new file mode 100644 index 0000000000..13f534b905 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/functions/cuda_ptx_derived.h @@ -0,0 +1,203 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_FUNCTIONS_DERIVED_H +#define __LIBCUDACXX___ATOMIC_FUNCTIONS_DERIVED_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +#if defined(_CCCL_CUDA_COMPILER) + +template = 0> +_CCCL_DEVICE bool __atomic_compare_exchange_cuda( + _Tp volatile* __ptr, _Tp* __expected, const _Tp __desired, bool, int __success_memorder, int __failure_memorder, _Sco) +{ + auto const __aligned = (uint32_t*) ((intptr_t) __ptr & ~(sizeof(uint32_t) - 1)); + auto const __offset = uint32_t((intptr_t) __ptr & (sizeof(uint32_t) - 1)) * 8; + auto const __mask = ((1 << sizeof(_Tp) * 8) - 1) << __offset; + + uint32_t __old = *__expected << __offset; + uint32_t __old_value; + while (1) + { + __old_value = (__old & __mask) >> __offset; + if (__old_value != *__expected) + { + break; + } + uint32_t const __attempt = (__old & ~__mask) | (*__desired << __offset); + if (__atomic_compare_exchange_cuda( + __aligned, &__old, &__attempt, true, __success_memorder, __failure_memorder, _Sco{})) + { + return true; + } + } + *__expected = __old_value; + return false; +} + +template = 0> +_CCCL_DEVICE void __atomic_exchange_cuda(_Tp volatile* __ptr, _Tp* __val, _Tp* __ret, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + while (!__atomic_compare_exchange_cuda(__ptr, &__expected, __val, true, __memorder, __memorder, _Sco{})) + ; + *__ret = __expected; +} + +template = 0> +_CCCL_DEVICE _Tp __atomic_fetch_add_cuda(_Tp volatile* __ptr, _Up __val, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + _Tp __desired = __expected + __val; + while (!__atomic_compare_exchange_cuda(__ptr, &__expected, __desired, true, __memorder, __memorder, _Sco{})) + { + __desired = __expected + __val; + } + return __expected; +} + +template ::value, int> = 0> +_CCCL_DEVICE _Tp __atomic_fetch_max_cuda(_Tp volatile* __ptr, _Up __val, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + _Tp __desired = __expected > __val ? __expected : __val; + + while (__desired == __val + && !__atomic_compare_exchange_cuda(__ptr, &__expected, __desired, true, __memorder, __memorder, _Sco{})) + { + __desired = __expected > __val ? __expected : __val; + } + + return __expected; +} + +template ::value, int> = 0> +_CCCL_DEVICE _Tp __atomic_fetch_min_cuda(_Tp volatile* __ptr, _Up __val, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + _Tp __desired = __expected < __val ? __expected : __val; + + while (__desired == __val + && !__atomic_compare_exchange_cuda(__ptr, &__expected, __desired, true, __memorder, __memorder, _Sco{})) + { + __desired = __expected < __val ? __expected : __val; + } + + return __expected; +} + +template = 0> +_CCCL_DEVICE _Tp __atomic_fetch_sub_cuda(_Tp volatile* __ptr, _Up __val, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + _Tp __desired = __expected - __val; + while (!__atomic_compare_exchange_cuda(__ptr, &__expected, __desired, true, __memorder, __memorder, _Sco{})) + { + __desired = __expected - __val; + } + return __expected; +} + +template = 0> +_CCCL_DEVICE _Tp __atomic_fetch_and_cuda(_Tp volatile* __ptr, _Up __val, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + _Tp __desired = __expected & __val; + while (!__atomic_compare_exchange_cuda(__ptr, &__expected, __desired, true, __memorder, __memorder, _Sco{})) + { + __desired = __expected & __val; + } + return __expected; +} + +template = 0> +_CCCL_DEVICE _Tp __atomic_fetch_xor_cuda(_Tp volatile* __ptr, _Up __val, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + _Tp __desired = __expected ^ __val; + while (!__atomic_compare_exchange_cuda(__ptr, &__expected, __desired, true, __memorder, __memorder, _Sco{})) + { + __desired = __expected ^ __val; + } + return __expected; +} + +template = 0> +_CCCL_DEVICE _Tp __atomic_fetch_or_cuda(_Tp volatile* __ptr, _Up __val, int __memorder, _Sco) +{ + _Tp __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, _Sco{}); + _Tp __desired = __expected | __val; + while (!__atomic_compare_exchange_cuda(__ptr, &__expected, __desired, true, __memorder, __memorder, _Sco{})) + { + __desired = __expected | __val; + } + return __expected; +} + +template +_CCCL_DEVICE _Tp __atomic_load_n_cuda(const _Tp volatile* __ptr, int __memorder, _Sco) +{ + _Tp __ret; + __atomic_load_cuda(__ptr, &__ret, __memorder, _Sco{}); + return __ret; +} + +template +_CCCL_DEVICE void __atomic_store_n_cuda(_Tp volatile* __ptr, _Tp __val, int __memorder, _Sco) +{ + __atomic_store_cuda(__ptr, &__val, __memorder, _Sco{}); +} + +template +_CCCL_DEVICE bool __atomic_compare_exchange_n_cuda( + _Tp volatile* __ptr, _Tp* __expected, _Tp __desired, bool __weak, int __success_memorder, int __failure_memorder, _Sco) +{ + return __atomic_compare_exchange_cuda( + __ptr, __expected, __desired, __weak, __success_memorder, __failure_memorder, _Sco{}); +} + +template +_CCCL_DEVICE _Tp __atomic_exchange_n_cuda(_Tp volatile* __ptr, _Tp __val, int __memorder, _Sco) +{ + _Tp __ret; + __atomic_exchange_cuda(__ptr, &__val, &__ret, __memorder, _Sco{}); + return __ret; +} + +_CCCL_DEVICE static inline void __atomic_signal_fence_cuda(int) +{ + asm volatile("" ::: "memory"); +} + +#endif // defined(_CCCL_CUDA_COMPILER) + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // __LIBCUDACXX___ATOMIC_FUNCTIONS_DERIVED_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda_generated.h b/libcudacxx/include/cuda/std/__atomic/functions/cuda_ptx_generated.h similarity index 99% rename from libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda_generated.h rename to libcudacxx/include/cuda/std/__atomic/functions/cuda_ptx_generated.h index 648de27352..6d3ad940f3 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda_generated.h +++ b/libcudacxx/include/cuda/std/__atomic/functions/cuda_ptx_generated.h @@ -8,8 +8,36 @@ // //===----------------------------------------------------------------------===// -// This is a autogenerated file, we want to ensure that it contains exactly the contentes we want to generate +// This is an autogenerated file, we want to ensure that it contains exactly the contents we want to generate // clang-format off + +#ifndef _LIBCUDACXX___ATOMIC_FUNCTIONS_CUDA_PTX_GENERATED_H +#define _LIBCUDACXX___ATOMIC_FUNCTIONS_CUDA_PTX_GENERATED_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +#include +#include +#include + +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +#if defined(_CCCL_CUDA_COMPILER) + static inline _CCCL_DEVICE void __cuda_membar_block() { asm volatile("membar.cta;":::"memory"); } static inline _CCCL_DEVICE void __cuda_fence_acq_rel_block() { asm volatile("fence.acq_rel.cta;":::"memory"); } static inline _CCCL_DEVICE void __cuda_fence_sc_block() { asm volatile("fence.sc.cta;":::"memory"); } @@ -249,9 +277,9 @@ template static inli template static inline _CCCL_DEVICE void __cuda_compare_exchange_release_32_block(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.release.cta.b32 %0,[%1],%2,%3;" : "=r"(__dst) : "l"(__ptr),"r"(__cmp),"r"(__op) : "memory"); } template static inline _CCCL_DEVICE void __cuda_compare_exchange_volatile_32_block(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.cta.b32 %0,[%1],%2,%3;" : "=r"(__dst) : "l"(__ptr),"r"(__cmp),"r"(__op) : "memory"); } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { uint32_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 4); + memcpy(&__tmp, &__desired, 4); memcpy(&__old, __expected, 4); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -283,9 +311,9 @@ _CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *_ return __ret; } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { uint32_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 4); + memcpy(&__tmp, &__desired, 4); memcpy(&__old, __expected, 4); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -1156,9 +1184,9 @@ template static inli template static inline _CCCL_DEVICE void __cuda_compare_exchange_release_64_block(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.release.cta.b64 %0,[%1],%2,%3;" : "=l"(__dst) : "l"(__ptr),"l"(__cmp),"l"(__op) : "memory"); } template static inline _CCCL_DEVICE void __cuda_compare_exchange_volatile_64_block(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.cta.b64 %0,[%1],%2,%3;" : "=l"(__dst) : "l"(__ptr),"l"(__cmp),"l"(__op) : "memory"); } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { uint64_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 8); + memcpy(&__tmp, &__desired, 8); memcpy(&__old, __expected, 8); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -1190,9 +1218,9 @@ _CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *_ return __ret; } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_block_tag) { uint64_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 8); + memcpy(&__tmp, &__desired, 8); memcpy(&__old, __expected, 8); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -2426,9 +2454,9 @@ template static inli template static inline _CCCL_DEVICE void __cuda_compare_exchange_release_32_device(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.release.gpu.b32 %0,[%1],%2,%3;" : "=r"(__dst) : "l"(__ptr),"r"(__cmp),"r"(__op) : "memory"); } template static inline _CCCL_DEVICE void __cuda_compare_exchange_volatile_32_device(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.gpu.b32 %0,[%1],%2,%3;" : "=r"(__dst) : "l"(__ptr),"r"(__cmp),"r"(__op) : "memory"); } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { uint32_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 4); + memcpy(&__tmp, &__desired, 4); memcpy(&__old, __expected, 4); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -2460,9 +2488,9 @@ _CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *_ return __ret; } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { uint32_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 4); + memcpy(&__tmp, &__desired, 4); memcpy(&__old, __expected, 4); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -3333,9 +3361,9 @@ template static inli template static inline _CCCL_DEVICE void __cuda_compare_exchange_release_64_device(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.release.gpu.b64 %0,[%1],%2,%3;" : "=l"(__dst) : "l"(__ptr),"l"(__cmp),"l"(__op) : "memory"); } template static inline _CCCL_DEVICE void __cuda_compare_exchange_volatile_64_device(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.gpu.b64 %0,[%1],%2,%3;" : "=l"(__dst) : "l"(__ptr),"l"(__cmp),"l"(__op) : "memory"); } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { uint64_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 8); + memcpy(&__tmp, &__desired, 8); memcpy(&__old, __expected, 8); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -3367,9 +3395,9 @@ _CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *_ return __ret; } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_device_tag) { uint64_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 8); + memcpy(&__tmp, &__desired, 8); memcpy(&__old, __expected, 8); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -4603,9 +4631,9 @@ template static inli template static inline _CCCL_DEVICE void __cuda_compare_exchange_release_32_system(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.release.sys.b32 %0,[%1],%2,%3;" : "=r"(__dst) : "l"(__ptr),"r"(__cmp),"r"(__op) : "memory"); } template static inline _CCCL_DEVICE void __cuda_compare_exchange_volatile_32_system(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.sys.b32 %0,[%1],%2,%3;" : "=r"(__dst) : "l"(__ptr),"r"(__cmp),"r"(__op) : "memory"); } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { uint32_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 4); + memcpy(&__tmp, &__desired, 4); memcpy(&__old, __expected, 4); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -4637,9 +4665,9 @@ _CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *_ return __ret; } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { uint32_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 4); + memcpy(&__tmp, &__desired, 4); memcpy(&__old, __expected, 4); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -5510,9 +5538,9 @@ template static inli template static inline _CCCL_DEVICE void __cuda_compare_exchange_release_64_system(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.release.sys.b64 %0,[%1],%2,%3;" : "=l"(__dst) : "l"(__ptr),"l"(__cmp),"l"(__op) : "memory"); } template static inline _CCCL_DEVICE void __cuda_compare_exchange_volatile_64_system(_CUDA_A __ptr, _CUDA_B& __dst, _CUDA_C __cmp, _CUDA_D __op) { asm volatile("atom.cas.sys.b64 %0,[%1],%2,%3;" : "=l"(__dst) : "l"(__ptr),"l"(__cmp),"l"(__op) : "memory"); } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { uint64_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 8); + memcpy(&__tmp, &__desired, 8); memcpy(&__old, __expected, 8); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -5544,9 +5572,9 @@ _CCCL_DEVICE bool __atomic_compare_exchange_cuda(volatile _Type *__ptr, _Type *_ return __ret; } template = 0> -_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type *__desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { +_CCCL_DEVICE bool __atomic_compare_exchange_cuda(_Type *__ptr, _Type *__expected, const _Type __desired, bool, int __success_memorder, int __failure_memorder, __thread_scope_system_tag) { uint64_t __tmp = 0, __old = 0, __old_tmp; - memcpy(&__tmp, __desired, 8); + memcpy(&__tmp, &__desired, 8); memcpy(&__old, __expected, 8); __old_tmp = __old; NV_DISPATCH_TARGET( @@ -6542,4 +6570,10 @@ _CCCL_DEVICE _Type* __atomic_fetch_sub_cuda(_Type **__ptr, ptrdiff_t __val, int return __ret; } +#endif // defined(_CCCL_CUDA_COMPILER) + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_FUNCTIONS_CUDA_PTX_GENERATED_H + // clang-format on diff --git a/libcudacxx/include/cuda/std/__atomic/functions/host.h b/libcudacxx/include/cuda/std/__atomic/functions/host.h new file mode 100644 index 0000000000..59dc6bd093 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/functions/host.h @@ -0,0 +1,250 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMICS_FUNCTIONS_HOST_H +#define _LIBCUDACXX___ATOMICS_FUNCTIONS_HOST_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Watomic-alignment") + +#if !defined(_CCCL_COMPILER_NVRTC) + +template +struct __atomic_alignment_wrapper +{ + _CCCL_ALIGNAS(sizeof(_Tp)) _Tp __atom; +}; + +template +__atomic_alignment_wrapper<__remove_cv_t<_Tp>>& __atomic_auto_align(_Tp* __a) +{ + using __aligned_t = __atomic_alignment_wrapper<__remove_cv_t<_Tp>>; + return *reinterpret_cast<__aligned_t*>(__a); +}; +template +const __atomic_alignment_wrapper<__remove_cv_t<_Tp>>& __atomic_auto_align(const _Tp* __a) +{ + using __aligned_t = const __atomic_alignment_wrapper<__remove_cv_t<_Tp>>; + return *reinterpret_cast<__aligned_t*>(__a); +}; +template +volatile __atomic_alignment_wrapper<__remove_cv_t<_Tp>>& __atomic_auto_align(volatile _Tp* __a) +{ + using __aligned_t = volatile __atomic_alignment_wrapper<__remove_cv_t<_Tp>>; + return *reinterpret_cast<__aligned_t*>(__a); +}; +template +const volatile __atomic_alignment_wrapper<__remove_cv_t<_Tp>>& __atomic_auto_align(const volatile _Tp* __a) +{ + using __aligned_t = const volatile __atomic_alignment_wrapper<__remove_cv_t<_Tp>>; + return *reinterpret_cast<__aligned_t*>(__a); +}; + +// Guard ifdef for lock free query in case it is assigned elsewhere (MSVC/CUDA) +inline void __atomic_thread_fence_host(memory_order __order) +{ + __atomic_thread_fence(__atomic_order_to_int(__order)); +} + +inline void __atomic_signal_fence_host(memory_order __order) +{ + __atomic_signal_fence(__atomic_order_to_int(__order)); +} + +template +inline void __atomic_store_host(_Tp* __a, _Up __val, memory_order __order) +{ + __atomic_store( + &__atomic_auto_align<_Tp>(__a), &__atomic_auto_align<__remove_cv_t<_Tp>>(&__val), __atomic_order_to_int(__order)); +} + +template +inline auto __atomic_load_host(_Tp* __a, memory_order __order) -> __remove_cv_t<_Tp> +{ + __remove_cv_t<_Tp> __ret; + __atomic_load( + &__atomic_auto_align<_Tp>(__a), &__atomic_auto_align<__remove_cv_t<_Tp>>(&__ret), __atomic_order_to_int(__order)); + return __ret; +} + +template +inline auto __atomic_exchange_host(_Tp* __a, _Up __val, memory_order __order) -> __remove_cv_t<_Tp> +{ + __remove_cv_t<_Tp> __ret; + __atomic_exchange(&__atomic_auto_align<_Tp>(__a), + &__atomic_auto_align<__remove_cv_t<_Tp>>(&__val), + &__atomic_auto_align<__remove_cv_t<_Tp>>(&__ret), + __atomic_order_to_int(__order)); + return __ret; +} + +template +inline bool __atomic_compare_exchange_strong_host( + _Tp* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure) +{ + return __atomic_compare_exchange( + &__atomic_auto_align<_Tp>(__a), + &__atomic_auto_align<__remove_cv_t<_Tp>>(__expected), + &__atomic_auto_align<__remove_cv_t<_Tp>>(&__value), + false, + __atomic_order_to_int(__success), + __atomic_failure_order_to_int(__failure)); +} + +template +inline bool __atomic_compare_exchange_weak_host( + _Tp* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure) +{ + return __atomic_compare_exchange( + &__atomic_auto_align<_Tp>(__a), + &__atomic_auto_align<__remove_cv_t<_Tp>>(__expected), + &__atomic_auto_align<__remove_cv_t<_Tp>>(&__value), + true, + __atomic_order_to_int(__success), + __atomic_failure_order_to_int(__failure)); +} + +template +struct __atomic_ptr_skip +{ + static constexpr auto __skip = 1; +}; + +template +struct __atomic_ptr_skip<_Tp*> +{ + static constexpr auto __skip = sizeof(_Tp); +}; + +// FIXME: Haven't figured out what the spec says about using arrays with +// atomic_fetch_add. Force a failure rather than creating bad behavior. +template +struct __atomic_ptr_skip<_Tp[]> +{}; +template +struct __atomic_ptr_skip<_Tp[n]> +{}; + +template +using __atomic_ptr_skip_t = __atomic_ptr_skip<__remove_cvref_t<_Tp>>; + +template ::value, int> = 0> +inline __remove_cv_t<_Tp> __atomic_fetch_add_host(_Tp* __a, _Td __delta, memory_order __order) +{ + constexpr auto __skip_v = __atomic_ptr_skip_t<_Tp>::__skip; + return __atomic_fetch_add(__a, __delta * __skip_v, __atomic_order_to_int(__order)); +} + +template ::value, int> = 0> +inline __remove_cv_t<_Tp> __atomic_fetch_add_host(_Tp* __a, _Td __delta, memory_order __order) +{ + auto __expected = __atomic_load_host(__a, memory_order_relaxed); + auto __desired = __expected + __delta; + + while (!__atomic_compare_exchange_strong_host(__a, &__expected, __desired, __order, __order)) + { + __desired = __expected + __delta; + } + + return __expected; +} + +template ::value, int> = 0> +inline __remove_cv_t<_Tp> __atomic_fetch_sub_host(_Tp* __a, _Td __delta, memory_order __order) +{ + constexpr auto __skip_v = __atomic_ptr_skip_t<_Tp>::__skip; + return __atomic_fetch_sub(__a, __delta * __skip_v, __atomic_order_to_int(__order)); +} + +template ::value, int> = 0> +inline __remove_cv_t<_Tp> __atomic_fetch_sub_host(_Tp* __a, _Td __delta, memory_order __order) +{ + auto __expected = __atomic_load_host(__a, memory_order_relaxed); + auto __desired = __expected - __delta; + + while (!__atomic_compare_exchange_strong_host(__a, &__expected, __desired, __order, __order)) + { + __desired = __expected - __delta; + } + + return __expected; +} + +template +inline __remove_cv_t<_Tp> __atomic_fetch_and_host(_Tp* __a, _Td __pattern, memory_order __order) +{ + return __atomic_fetch_and(__a, __pattern, __atomic_order_to_int(__order)); +} + +template +inline __remove_cv_t<_Tp> __atomic_fetch_or_host(_Tp* __a, _Td __pattern, memory_order __order) +{ + return __atomic_fetch_or(__a, __pattern, __atomic_order_to_int(__order)); +} + +template +inline __remove_cv_t<_Tp> __atomic_fetch_xor_host(_Tp* __a, _Td __pattern, memory_order __order) +{ + return __atomic_fetch_xor(__a, __pattern, __atomic_order_to_int(__order)); +} + +template +inline __remove_cv_t<_Tp> __atomic_fetch_max_host(_Tp* __a, _Td __val, memory_order __order) +{ + auto __expected = __atomic_load_host(__a, memory_order_relaxed); + auto __desired = __expected > __val ? __expected : __val; + + while (__desired == __val && !__atomic_compare_exchange_strong_host(__a, &__expected, __desired, __order, __order)) + { + __desired = __expected > __val ? __expected : __val; + } + + return __expected; +} + +template +inline __remove_cv_t<_Tp> __atomic_fetch_min_host(_Tp* __a, _Td __val, memory_order __order) +{ + auto __expected = __atomic_load_host(__a, memory_order_relaxed); + auto __desired = __expected < __val ? __expected : __val; + + while (__desired == __val && !__atomic_compare_exchange_strong_host(__a, &__expected, __desired, __order, __order)) + { + __desired = __expected < __val ? __expected : __val; + } + + return __expected; +} + +#endif // !defined(_CCCL_COMPILER_NVRTC) + +_CCCL_DIAG_POP + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMICS_FUNCTIONS_HOST_H diff --git a/libcudacxx/include/cuda/std/__atomic/order.h b/libcudacxx/include/cuda/std/__atomic/order.h new file mode 100644 index 0000000000..935efab757 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/order.h @@ -0,0 +1,156 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_ORDER_H +#define __LIBCUDACXX___ATOMIC_ORDER_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +#define _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) \ + _LIBCUDACXX_DIAGNOSE_WARNING( \ + __m == memory_order_consume || __m == memory_order_acquire || __m == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") + +#define _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) \ + _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_release || __m == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") + +#define _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__m, __f) \ + _LIBCUDACXX_DIAGNOSE_WARNING(__f == memory_order_release || __f == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") + +#ifndef __ATOMIC_RELAXED +# define __ATOMIC_RELAXED 0 +# define __ATOMIC_CONSUME 1 +# define __ATOMIC_ACQUIRE 2 +# define __ATOMIC_RELEASE 3 +# define __ATOMIC_ACQ_REL 4 +# define __ATOMIC_SEQ_CST 5 +#endif //__ATOMIC_RELAXED + +// Figure out what the underlying type for `memory_order` would be if it were +// declared as an unscoped enum (accounting for -fshort-enums). Use this result +// to pin the underlying type in C++20. +enum __legacy_memory_order +{ + __mo_relaxed, + __mo_consume, + __mo_acquire, + __mo_release, + __mo_acq_rel, + __mo_seq_cst +}; + +using __memory_order_underlying_t = underlying_type<__legacy_memory_order>::type; + +#if _CCCL_STD_VER >= 2020 + +enum class memory_order : __memory_order_underlying_t +{ + relaxed = __mo_relaxed, + consume = __mo_consume, + acquire = __mo_acquire, + release = __mo_release, + acq_rel = __mo_acq_rel, + seq_cst = __mo_seq_cst +}; + +inline constexpr auto memory_order_relaxed = memory_order::relaxed; +inline constexpr auto memory_order_consume = memory_order::consume; +inline constexpr auto memory_order_acquire = memory_order::acquire; +inline constexpr auto memory_order_release = memory_order::release; +inline constexpr auto memory_order_acq_rel = memory_order::acq_rel; +inline constexpr auto memory_order_seq_cst = memory_order::seq_cst; + +#else // ^^^ C++20 ^^^ / vvv C++17 vvv + +typedef enum memory_order +{ + memory_order_relaxed = __mo_relaxed, + memory_order_consume = __mo_consume, + memory_order_acquire = __mo_acquire, + memory_order_release = __mo_release, + memory_order_acq_rel = __mo_acq_rel, + memory_order_seq_cst = __mo_seq_cst, +} memory_order; + +#endif // _CCCL_STD_VER >= 2020 + +_CCCL_HOST_DEVICE inline int __stronger_order_cuda(int __a, int __b) +{ + int const __max = __a > __b ? __a : __b; + if (__max != __ATOMIC_RELEASE) + { + return __max; + } + constexpr int __xform[] = {__ATOMIC_RELEASE, __ATOMIC_ACQ_REL, __ATOMIC_ACQ_REL, __ATOMIC_RELEASE}; + return __xform[__a < __b ? __a : __b]; +} + +_CCCL_HOST_DEVICE inline constexpr int __atomic_order_to_int(memory_order __order) +{ + // Avoid switch statement to make this a constexpr. + return __order == memory_order_relaxed + ? __ATOMIC_RELAXED + : (__order == memory_order_acquire + ? __ATOMIC_ACQUIRE + : (__order == memory_order_release + ? __ATOMIC_RELEASE + : (__order == memory_order_seq_cst + ? __ATOMIC_SEQ_CST + : (__order == memory_order_acq_rel ? __ATOMIC_ACQ_REL : __ATOMIC_CONSUME)))); +} + +_CCCL_HOST_DEVICE inline constexpr int __atomic_failure_order_to_int(memory_order __order) +{ + // Avoid switch statement to make this a constexpr. + return __order == memory_order_relaxed + ? __ATOMIC_RELAXED + : (__order == memory_order_acquire + ? __ATOMIC_ACQUIRE + : (__order == memory_order_release + ? __ATOMIC_RELAXED + : (__order == memory_order_seq_cst + ? __ATOMIC_SEQ_CST + : (__order == memory_order_acq_rel ? __ATOMIC_ACQUIRE : __ATOMIC_CONSUME)))); +} + +static_assert((is_same::type, __memory_order_underlying_t>::value), + "unexpected underlying type for std::memory_order"); + +_LIBCUDACXX_END_NAMESPACE_STD + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA + +using memory_order = _CUDA_VSTD::memory_order; + +_LIBCUDACXX_INLINE_VAR constexpr memory_order memory_order_relaxed = _CUDA_VSTD::memory_order_relaxed; +_LIBCUDACXX_INLINE_VAR constexpr memory_order memory_order_consume = _CUDA_VSTD::memory_order_consume; +_LIBCUDACXX_INLINE_VAR constexpr memory_order memory_order_acquire = _CUDA_VSTD::memory_order_acquire; +_LIBCUDACXX_INLINE_VAR constexpr memory_order memory_order_release = _CUDA_VSTD::memory_order_release; +_LIBCUDACXX_INLINE_VAR constexpr memory_order memory_order_acq_rel = _CUDA_VSTD::memory_order_acq_rel; +_LIBCUDACXX_INLINE_VAR constexpr memory_order memory_order_seq_cst = _CUDA_VSTD::memory_order_seq_cst; + +_LIBCUDACXX_END_NAMESPACE_CUDA + +#endif // __LIBCUDACXX___ATOMIC_ORDER_H diff --git a/libcudacxx/include/cuda/std/__atomic/platform.h b/libcudacxx/include/cuda/std/__atomic/platform.h new file mode 100644 index 0000000000..6367e20234 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/platform.h @@ -0,0 +1,89 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_PLATFORM_H +#define __LIBCUDACXX___ATOMIC_PLATFORM_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if defined(_CCCL_COMPILER_MSVC) +# include +#endif + +#if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) +# define LIBCUDACXX_ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE +# define LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE +# define LIBCUDACXX_ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE +# define LIBCUDACXX_ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE +# define LIBCUDACXX_ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE +# define LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE +# define LIBCUDACXX_ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE +# define LIBCUDACXX_ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE +# define LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE +# define LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE +#elif defined(__GCC_ATOMIC_BOOL_LOCK_FREE) +# define LIBCUDACXX_ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +# define LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +# define LIBCUDACXX_ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +# define LIBCUDACXX_ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +# define LIBCUDACXX_ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE +# define LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE +# define LIBCUDACXX_ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE +# define LIBCUDACXX_ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE +# define LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE +# define LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE +#else // !defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) +# define LIBCUDACXX_ATOMIC_BOOL_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_CHAR16_T_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_CHAR32_T_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_WCHAR_T_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_INT_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_LONG_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE 2 +# define LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE 2 +#endif + +#define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(size) (size <= 8) + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) +template +struct __atomic_is_always_lock_free +{ + enum + { + __value = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0) + }; +}; +#else +template +struct __atomic_is_always_lock_free +{ + enum + { + __value = sizeof(_Tp) <= 8 + }; +}; +#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // __LIBCUDACXX___ATOMIC_PLATFORM_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_msvc.h b/libcudacxx/include/cuda/std/__atomic/platform/msvc_to_builtins.h similarity index 92% rename from libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_msvc.h rename to libcudacxx/include/cuda/std/__atomic/platform/msvc_to_builtins.h index 53cd9cd4d7..8afa9756ef 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_msvc.h +++ b/libcudacxx/include/cuda/std/__atomic/platform/msvc_to_builtins.h @@ -1,4 +1,3 @@ -// -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of libcu++, the C++ Standard Library for your entire system, @@ -9,28 +8,42 @@ // //===----------------------------------------------------------------------===// -#ifndef _MSC_VER -# error "This file is only for CL.EXE's benefit" -#endif +#ifndef __LIBCUDACXX___ATOMIC_PLATFORM_MSVC_H +#define __LIBCUDACXX___ATOMIC_PLATFORM_MSVC_H -#define _LIBCUDACXX_COMPILER_BARRIER() _ReadWriteBarrier() +#include -#if defined(_M_ARM) || defined(_M_ARM64) -# define _LIBCUDACXX_MEMORY_BARRIER() __dmb(0xB) // inner shared data memory barrier -# define _LIBCUDACXX_COMPILER_OR_MEMORY_BARRIER() _LIBCUDACXX_MEMORY_BARRIER() -#elif defined(_M_IX86) || defined(_M_X64) -# define _LIBCUDACXX_MEMORY_BARRIER() __faststorefence() +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if defined(_CCCL_COMPILER_MSVC) + +# include +# include + +# include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +# define _LIBCUDACXX_COMPILER_BARRIER() _ReadWriteBarrier() + +# if defined(_M_ARM) || defined(_M_ARM64) +# define _LIBCUDACXX_MEMORY_BARRIER() __dmb(0xB) // inner shared data memory barrier +# define _LIBCUDACXX_COMPILER_OR_MEMORY_BARRIER() _LIBCUDACXX_MEMORY_BARRIER() +# elif defined(_M_IX86) || defined(_M_X64) +# define _LIBCUDACXX_MEMORY_BARRIER() __faststorefence() // x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics -# define _LIBCUDACXX_COMPILER_OR_MEMORY_BARRIER() _LIBCUDACXX_COMPILER_BARRIER() -#else // ^^^ x86/x64 / unsupported hardware vvv -# error Unsupported hardware -#endif // hardware +# define _LIBCUDACXX_COMPILER_OR_MEMORY_BARRIER() _LIBCUDACXX_COMPILER_BARRIER() +# else // ^^^ x86/x64 / unsupported hardware vvv +# error Unsupported hardware +# endif // hardware // MSVC Does not have compiler intrinsics for lock-free checking -#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE -# define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) (__x <= 8) -#endif - inline int __stronger_order_msvc(int __a, int __b) { int const __max = __a > __b ? __a : __b; @@ -64,41 +77,41 @@ using _enable_if_sized_as = typename enable_if::typ template = 0> void __atomic_load_relaxed(const volatile _Type* __ptr, _Type* __ret) { -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN __int8 __tmp = *(const volatile __int8*) __ptr; -#else +# else __int8 __tmp = __iso_volatile_load8((const volatile __int8*) __ptr); -#endif +# endif *__ret = reinterpret_cast<_Type&>(__tmp); } template = 0> void __atomic_load_relaxed(const volatile _Type* __ptr, _Type* __ret) { -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN __int16 __tmp = *(const volatile __int16*) __ptr; -#else +# else __int16 __tmp = __iso_volatile_load16((const volatile __int16*) __ptr); -#endif +# endif *__ret = reinterpret_cast<_Type&>(__tmp); } template = 0> void __atomic_load_relaxed(const volatile _Type* __ptr, _Type* __ret) { -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN __int32 __tmp = *(const volatile __int32*) __ptr; -#else +# else __int32 __tmp = __iso_volatile_load32((const volatile __int32*) __ptr); -#endif +# endif *__ret = reinterpret_cast<_Type&>(__tmp); } template = 0> void __atomic_load_relaxed(const volatile _Type* __ptr, _Type* __ret) { -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN __int64 __tmp = *(const volatile __int64*) __ptr; -#else +# else __int64 __tmp = __iso_volatile_load64((const volatile __int64*) __ptr); -#endif +# endif *__ret = reinterpret_cast<_Type&>(__tmp); } @@ -128,45 +141,45 @@ void __atomic_store_relaxed(volatile _Type* __ptr, _Type* __val) { auto __t = reinterpret_cast<__int8*>(__val); auto __d = reinterpret_cast(__ptr); -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN (void) _InterlockedExchange8(__d, *__t); -#else +# else __iso_volatile_store8(__d, *__t); -#endif +# endif } template = 0> void __atomic_store_relaxed(volatile _Type* __ptr, _Type* __val) { auto __t = reinterpret_cast<__int16*>(__val); auto __d = reinterpret_cast(__ptr); -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN (void) _InterlockedExchange16(__d, *__t); -#else +# else __iso_volatile_store16(__d, *__t); -#endif +# endif } template = 0> void __atomic_store_relaxed(volatile _Type* __ptr, _Type* __val) { auto __t = reinterpret_cast<__int32*>(__val); auto __d = reinterpret_cast(__ptr); -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN // int cannot be converted to long?... (void) _InterlockedExchange(reinterpret_cast(__d), *__t); -#else +# else __iso_volatile_store32(__d, *__t); -#endif +# endif } template = 0> void __atomic_store_relaxed(volatile _Type* __ptr, _Type* __val) { auto __t = reinterpret_cast<__int64*>(__val); auto __d = reinterpret_cast(__ptr); -#ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# ifdef _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN (void) _InterlockedExchange64(__d, *__t); -#else +# else __iso_volatile_store64(__d, *__t); -#endif +# endif } template @@ -622,4 +635,8 @@ _Type __atomic_fetch_min(_Type volatile* __ptr, _Delta __val, int __memorder) return __expected; } -#include +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // defined(_CCCL_COMPILER_MSVC) + +#endif // __LIBCUDACXX___ATOMIC_PLATFORM_MSVC_H diff --git a/libcudacxx/include/cuda/std/__atomic/scopes.h b/libcudacxx/include/cuda/std/__atomic/scopes.h new file mode 100644 index 0000000000..70af777d5c --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/scopes.h @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_SCOPES_H +#define __LIBCUDACXX___ATOMIC_SCOPES_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +// REMEMBER CHANGES TO THESE ARE ABI BREAKING +// TODO: Space values out for potential new scopes +#ifndef __ATOMIC_BLOCK +# define __ATOMIC_SYSTEM 0 // 0 indicates default +# define __ATOMIC_DEVICE 1 +# define __ATOMIC_BLOCK 2 +# define __ATOMIC_THREAD 10 +#endif //__ATOMIC_BLOCK + +enum thread_scope +{ + thread_scope_system = __ATOMIC_SYSTEM, + thread_scope_device = __ATOMIC_DEVICE, + thread_scope_block = __ATOMIC_BLOCK, + thread_scope_thread = __ATOMIC_THREAD +}; + +struct __thread_scope_thread_tag +{}; +struct __thread_scope_block_tag +{}; +struct __thread_scope_device_tag +{}; +struct __thread_scope_system_tag +{}; + +template +struct __scope_enum_to_tag +{}; +/* This would be the implementation once an actual thread-scope backend exists. +template<> struct __scope_enum_to_tag<(int)thread_scope_thread> { + using type = __thread_scope_thread_tag; }; +Until then: */ +template <> +struct __scope_enum_to_tag<(int) thread_scope_thread> +{ + using __tag = __thread_scope_block_tag; +}; +template <> +struct __scope_enum_to_tag<(int) thread_scope_block> +{ + using __tag = __thread_scope_block_tag; +}; +template <> +struct __scope_enum_to_tag<(int) thread_scope_device> +{ + using __tag = __thread_scope_device_tag; +}; +template <> +struct __scope_enum_to_tag<(int) thread_scope_system> +{ + using __tag = __thread_scope_system_tag; +}; + +template +using __scope_to_tag = typename __scope_enum_to_tag<_Scope>::__tag; + +_LIBCUDACXX_END_NAMESPACE_STD + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA + +using _CUDA_VSTD::thread_scope; +using _CUDA_VSTD::thread_scope_block; +using _CUDA_VSTD::thread_scope_device; +using _CUDA_VSTD::thread_scope_system; +using _CUDA_VSTD::thread_scope_thread; + +using _CUDA_VSTD::__thread_scope_block_tag; +using _CUDA_VSTD::__thread_scope_device_tag; +using _CUDA_VSTD::__thread_scope_system_tag; + +_LIBCUDACXX_END_NAMESPACE_CUDA + +#endif // __LIBCUDACXX___ATOMIC_SCOPES_H diff --git a/libcudacxx/include/cuda/std/__atomic/types.h b/libcudacxx/include/cuda/std/__atomic/types.h new file mode 100644 index 0000000000..4b58ba4901 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/types.h @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef __LIBCUDACXX___ATOMIC_TYPES_H +#define __LIBCUDACXX___ATOMIC_TYPES_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +template +struct __atomic_traits +{ + static constexpr bool __atomic_requires_lock = !__atomic_is_always_lock_free<_Tp>::__value; + static constexpr bool __atomic_requires_small = sizeof(_Tp) < 4; + static constexpr bool __atomic_supports_reference = + __atomic_is_always_lock_free<_Tp>::__value && (sizeof(_Tp) >= 4 && sizeof(_Tp) <= 8); +}; + +template +using __atomic_storage_t = + _If<__atomic_traits<_Tp>::__atomic_requires_small, + __atomic_small_storage<_Tp>, + _If<__atomic_traits<_Tp>::__atomic_requires_lock, __atomic_locked_storage<_Tp>, __atomic_storage<_Tp>>>; + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // __LIBCUDACXX___ATOMIC_TYPES_H diff --git a/libcudacxx/include/cuda/std/__atomic/types/base.h b/libcudacxx/include/cuda/std/__atomic/types/base.h new file mode 100644 index 0000000000..ecee01eb15 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/types/base.h @@ -0,0 +1,239 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMIC_TYPES_BASE_H +#define _LIBCUDACXX___ATOMIC_TYPES_BASE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +template +struct __atomic_storage +{ + using __underlying_t = _Tp; + static constexpr __atomic_tag __tag = __atomic_tag::__atomic_base_tag; + +#if !defined(_CCCL_COMPILER_GCC) || (__GNUC__ >= 5) + static_assert(_CCCL_TRAIT(is_trivially_copyable, _Tp), + "std::atomic requires that 'Tp' be a trivially copyable type"); +#endif + + _CCCL_ALIGNAS(sizeof(_Tp)) _Tp __a_value; + + constexpr explicit __atomic_storage() noexcept = default; + + _CCCL_HOST_DEVICE constexpr explicit inline __atomic_storage(_Tp value) noexcept + : __a_value(value) + {} + + _CCCL_HOST_DEVICE inline auto get() noexcept -> __underlying_t* + { + return &__a_value; + } + _CCCL_HOST_DEVICE inline auto get() const noexcept -> const __underlying_t* + { + return &__a_value; + } + _CCCL_HOST_DEVICE inline auto get() volatile noexcept -> volatile __underlying_t* + { + return &__a_value; + } + _CCCL_HOST_DEVICE inline auto get() const volatile noexcept -> const volatile __underlying_t* + { + return &__a_value; + } +}; + +_CCCL_HOST_DEVICE inline void __atomic_thread_fence_dispatch(memory_order __order) +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (__atomic_thread_fence_cuda(static_cast<__memory_order_underlying_t>(__order), __thread_scope_system_tag());), + NV_IS_HOST, + (__atomic_thread_fence_host(__order);)) +} + +_CCCL_HOST_DEVICE inline void __atomic_signal_fence_dispatch(memory_order __order) +{ + NV_DISPATCH_TARGET(NV_IS_DEVICE, + (__atomic_signal_fence_cuda(static_cast<__memory_order_underlying_t>(__order));), + NV_IS_HOST, + (__atomic_signal_fence_host(__order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline void __atomic_init_dispatch(_Sto* __a, _Up __val) +{ + __atomic_assign_volatile(__a->get(), __val); +} + +template = 0> +_CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (__atomic_store_n_cuda(__a->get(), __val, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (__atomic_store_host(__a->get(), __val, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (return __atomic_load_n_cuda(__a->get(), static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (return __atomic_load_host(__a->get(), __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (return __atomic_exchange_n_cuda(__a->get(), __value, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (return __atomic_exchange_host(__a->get(), __value, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_strong_dispatch( + _Sto* __a, _Up* __expected, _Up __val, memory_order __success, memory_order __failure, _Sco = {}) +{ + bool __result = false; + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (__result = __atomic_compare_exchange_cuda( + __a->get(), + __expected, + __val, + false, + static_cast<__memory_order_underlying_t>(__success), + static_cast<__memory_order_underlying_t>(__failure), + _Sco{});), + NV_IS_HOST, + (__result = __atomic_compare_exchange_strong_host(__a->get(), __expected, __val, __success, __failure);)) + return __result; +} + +template = 0> +_CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_weak_dispatch( + _Sto* __a, _Up* __expected, _Up __val, memory_order __success, memory_order __failure, _Sco = {}) +{ + bool __result = false; + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (__result = __atomic_compare_exchange_cuda( + __a->get(), + __expected, + __val, + true, + static_cast<__memory_order_underlying_t>(__success), + static_cast<__memory_order_underlying_t>(__failure), + _Sco{});), + NV_IS_HOST, + (__result = __atomic_compare_exchange_weak_host(__a->get(), __expected, __val, __success, __failure);)) + return __result; +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (return __atomic_fetch_add_cuda(__a->get(), __delta, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (return __atomic_fetch_add_host(__a->get(), __delta, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (return __atomic_fetch_sub_cuda(__a->get(), __delta, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (return __atomic_fetch_sub_host(__a->get(), __delta, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (return __atomic_fetch_and_cuda(__a->get(), __pattern, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (return __atomic_fetch_and_host(__a->get(), __pattern, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (return __atomic_fetch_or_cuda(__a->get(), __pattern, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (return __atomic_fetch_or_host(__a->get(), __pattern, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (return __atomic_fetch_xor_cuda(__a->get(), __pattern, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + NV_IS_HOST, + (return __atomic_fetch_xor_host(__a->get(), __pattern, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_IF_TARGET( + NV_IS_DEVICE, + (return __atomic_fetch_max_cuda(__a->get(), __val, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + (return __atomic_fetch_max_host(__a->get(), __val, __order);)) +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + NV_IF_TARGET( + NV_IS_DEVICE, + (return __atomic_fetch_min_cuda(__a->get(), __val, static_cast<__memory_order_underlying_t>(__order), _Sco{});), + (return __atomic_fetch_min_host(__a->get(), __val, __order);)) +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_TYPES_BASE_H diff --git a/libcudacxx/include/cuda/std/__atomic/types/common.h b/libcudacxx/include/cuda/std/__atomic/types/common.h new file mode 100644 index 0000000000..9a44fe7034 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/types/common.h @@ -0,0 +1,100 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMIC_TYPES_COMMON_H +#define _LIBCUDACXX___ATOMIC_TYPES_COMMON_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +enum class __atomic_tag +{ + __atomic_base_tag, + __atomic_locked_tag, + __atomic_small_tag, +}; + +// Helpers to SFINAE on the tag inside the storage object +template +using __atomic_storage_is_base = __enable_if_t<__atomic_tag::__atomic_base_tag == __remove_cvref_t<_Sto>::__tag, int>; +template +using __atomic_storage_is_locked = + __enable_if_t<__atomic_tag::__atomic_locked_tag == __remove_cvref_t<_Sto>::__tag, int>; +template +using __atomic_storage_is_small = __enable_if_t<__atomic_tag::__atomic_small_tag == __remove_cvref_t<_Sto>::__tag, int>; + +template +using __atomic_underlying_t = typename _Tp::__underlying_t; +template +using __atomic_underlying_remove_cv_t = __remove_cv_t; + +// [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because +// the default operator= in an object is not volatile, a byte-by-byte copy +// is required. +template +_CCCL_HOST_DEVICE __enable_if_t<_CCCL_TRAIT(is_assignable, _Tp&, _Tv)> +__atomic_assign_volatile(_Tp* __a_value, _Tv const& __val) +{ + *__a_value = __val; +} + +template +_CCCL_HOST_DEVICE __enable_if_t<_CCCL_TRAIT(is_assignable, _Tp&, _Tv)> +__atomic_assign_volatile(_Tp volatile* __a_value, _Tv volatile const& __val) +{ + volatile char* __to = reinterpret_cast(__a_value); + volatile char* __end = __to + sizeof(_Tp); + volatile const char* __from = reinterpret_cast(&__val); + while (__to != __end) + { + *__to++ = *__from++; + } +} + +_CCCL_HOST_DEVICE inline int __atomic_memcmp(void const* __lhs, void const* __rhs, size_t __count) +{ + NV_DISPATCH_TARGET( + NV_IS_DEVICE, + (auto __lhs_c = reinterpret_cast(__lhs); + auto __rhs_c = reinterpret_cast(__rhs); + while (__count--) { + auto const __lhs_v = *__lhs_c++; + auto const __rhs_v = *__rhs_c++; + if (__lhs_v < __rhs_v) + { + return -1; + } + if (__lhs_v > __rhs_v) + { + return 1; + } + } return 0;), + NV_IS_HOST, + (return memcmp(__lhs, __rhs, __count);)) +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_TYPES_COMMON_H diff --git a/libcudacxx/include/cuda/std/__atomic/types/locked.h b/libcudacxx/include/cuda/std/__atomic/types/locked.h new file mode 100644 index 0000000000..1fc5103d2a --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/types/locked.h @@ -0,0 +1,221 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMIC_TYPES_LOCKED_H +#define _LIBCUDACXX___ATOMIC_TYPES_LOCKED_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +// Locked atomics must override the dispatch to be able to implement RMW primitives around the embedded lock. +template +struct __atomic_locked_storage +{ + using __underlying_t = _Tp; + static constexpr __atomic_tag __tag = __atomic_tag::__atomic_locked_tag; + + _Tp __a_value; + mutable __atomic_storage<_LIBCUDACXX_ATOMIC_FLAG_TYPE> __a_lock; + + explicit constexpr __atomic_locked_storage() noexcept = default; + + _CCCL_HOST_DEVICE constexpr explicit inline __atomic_locked_storage(_Tp value) noexcept + : __a_value(value) + , __a_lock{} + {} + + template + _CCCL_HOST_DEVICE inline void __lock(_Sco) const volatile noexcept + { + while (1 == __atomic_exchange_dispatch(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire, _Sco{})) + /*spin*/; + } + template + _CCCL_HOST_DEVICE inline void __lock(_Sco) const noexcept + { + while (1 == __atomic_exchange_dispatch(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire, _Sco{})) + /*spin*/; + } + template + _CCCL_HOST_DEVICE inline void __unlock(_Sco) const volatile noexcept + { + __atomic_store_dispatch(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release, _Sco{}); + } + template + _CCCL_HOST_DEVICE inline void __unlock(_Sco) const noexcept + { + __atomic_store_dispatch(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release, _Sco{}); + } +}; + +template = 0> +_CCCL_HOST_DEVICE inline void __atomic_init_dispatch(_Sto* __a, _Up __val) +{ + __atomic_assign_volatile(&__a->__a_value, __val); +} + +template = 0> +_CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memory_order, _Sco = {}) +{ + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__a->__a_value, __val); + __a->__unlock(_Sco{}); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __old; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__old, __a->__a_value); + __a->__unlock(_Sco{}); + return __old; +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __old; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__old, __a->__a_value); + __atomic_assign_volatile(&__a->__a_value, __value); + __a->__unlock(_Sco{}); + return __old; +} + +template = 0> +_CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_strong_dispatch( + _Sto* __a, _Up* __expected, _Up __value, memory_order, memory_order, _Sco = {}) +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __temp; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__temp, __a->__a_value); + bool __ret = __temp == *__expected; + if (__ret) + { + __atomic_assign_volatile(&__a->__a_value, __value); + } + else + { + __atomic_assign_volatile(__expected, __a->__a_value); + } + __a->__unlock(_Sco{}); + return __ret; +} + +template = 0> +_CCCL_HOST_DEVICE inline bool +__atomic_compare_exchange_weak_dispatch(_Sto* __a, _Up* __expected, _Up __value, memory_order, memory_order, _Sco = {}) +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __temp; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__temp, __a->__a_value); + bool __ret = __temp == *__expected; + if (__ret) + { + __atomic_assign_volatile(&__a->__a_value, __value); + } + else + { + __atomic_assign_volatile(__expected, __a->__a_value); + } + __a->__unlock(_Sco{}); + return __ret; +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __old; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__old, __a->__a_value); + __atomic_assign_volatile(&__a->__a_value, _Tp(__old + __delta)); + __a->__unlock(_Sco{}); + return __old; +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __old; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__old, __a->__a_value); + __atomic_assign_volatile(&__a->__a_value, _Tp(__old - __delta)); + __a->__unlock(_Sco{}); + return __old; +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __old; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__old, __a->__a_value); + __atomic_assign_volatile(&__a->__a_value, _Tp(__old & __pattern)); + __a->__unlock(_Sco{}); + return __old; +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __old; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__old, __a->__a_value); + __atomic_assign_volatile(&__a->__a_value, _Tp(__old | __pattern)); + __a->__unlock(_Sco{}); + return __old; +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + _Tp __old; + __a->__lock(_Sco{}); + __atomic_assign_volatile(&__old, __a->__a_value); + __atomic_assign_volatile(&__a->__a_value, _Tp(__old ^ __pattern)); + __a->__unlock(_Sco{}); + return __old; +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_TYPES_LOCKED_H diff --git a/libcudacxx/include/cuda/std/__atomic/types/reference.h b/libcudacxx/include/cuda/std/__atomic/types/reference.h new file mode 100644 index 0000000000..a83c8e5832 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/types/reference.h @@ -0,0 +1,69 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMIC_TYPES_REFERENCE_H +#define _LIBCUDACXX___ATOMIC_TYPES_REFERENCE_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +// Reference is compatible with __atomic_base_tag and uses the default dispatch +template +struct __atomic_ref_storage +{ + using __underlying_t = _Tp; + static constexpr __atomic_tag __tag = __atomic_tag::__atomic_base_tag; + +#if !defined(_CCCL_COMPILER_GCC) || (__GNUC__ >= 5) + static_assert(_CCCL_TRAIT(is_trivially_copyable, _Tp), + "std::atomic_ref requires that 'Tp' be a trivially copyable type"); +#endif + + _Tp* __a_value; + + __atomic_ref_storage() = delete; + + _CCCL_HOST_DEVICE constexpr explicit inline __atomic_ref_storage(_Tp* value) noexcept + : __a_value(value) + {} + + _CCCL_HOST_DEVICE inline auto get() noexcept -> __underlying_t* + { + return __a_value; + } + _CCCL_HOST_DEVICE inline auto get() const noexcept -> __underlying_t* + { + return __a_value; + } + _CCCL_HOST_DEVICE inline auto get() volatile noexcept -> volatile __underlying_t* + { + return __a_value; + } + _CCCL_HOST_DEVICE inline auto get() const volatile noexcept -> volatile __underlying_t* + { + return __a_value; + } +}; + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_TYPES_REFERENCE_H diff --git a/libcudacxx/include/cuda/std/__atomic/types/small.h b/libcudacxx/include/cuda/std/__atomic/types/small.h new file mode 100644 index 0000000000..8f38df5bb0 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/types/small.h @@ -0,0 +1,222 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMIC_TYPES_SMALL_H +#define _LIBCUDACXX___ATOMIC_TYPES_SMALL_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +// manipulated by PTX without any performance overhead +template +using __atomic_small_proxy_t = _If<_CCCL_TRAIT(is_signed, _Tp), int32_t, uint32_t>; + +// Arithmetic conversions to/from proxy types +template = 0> +_CCCL_HOST_DEVICE constexpr __atomic_small_proxy_t<_Tp> __atomic_small_to_32(_Tp __val) +{ + return static_cast<__atomic_small_proxy_t<_Tp>>(__val); +} + +template = 0> +_CCCL_HOST_DEVICE constexpr inline _Tp __atomic_small_from_32(__atomic_small_proxy_t<_Tp> __val) +{ + return static_cast<_Tp>(__val); +} + +// Non-arithmetic conversion to/from proxy types +template = 0> +_CCCL_HOST_DEVICE inline __atomic_small_proxy_t<_Tp> __atomic_small_to_32(_Tp __val) +{ + __atomic_small_proxy_t<_Tp> __temp{}; + memcpy(&__temp, &__val, sizeof(_Tp)); + return __temp; +} + +template = 0> +_CCCL_HOST_DEVICE inline _Tp __atomic_small_from_32(__atomic_small_proxy_t<_Tp> __val) +{ + _Tp __temp{}; + memcpy(&__temp, &__val, sizeof(_Tp)); + return __temp; +} + +template +struct __atomic_small_storage +{ + using __underlying_t = _Tp; + using __proxy_t = __atomic_small_proxy_t<_Tp>; + static constexpr __atomic_tag __tag = __atomic_tag::__atomic_small_tag; + + _CCCL_HOST_DEVICE constexpr inline explicit __atomic_small_storage() noexcept + : __a_value{__proxy_t{}} {}; + + _CCCL_HOST_DEVICE constexpr inline explicit __atomic_small_storage(_Tp __value) noexcept + : __a_value{__atomic_small_to_32(__value)} + {} + + __atomic_storage<__proxy_t> __a_value; +}; + +template = 0> +_CCCL_HOST_DEVICE inline void __atomic_init_dispatch(_Sto* __a, _Up __val) +{ + __atomic_init_dispatch(&__a->__a_value, __atomic_small_to_32(__val)); +} + +template = 0> +_CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) +{ + __atomic_store_dispatch(&__a->__a_value, __atomic_small_to_32(__val), __order, _Sco{}); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>(__atomic_load_dispatch(&__a->__a_value, __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_exchange_dispatch(&__a->__a_value, __atomic_small_to_32(__value), __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_weak_dispatch( + _Sto* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure, _Sco = {}) +{ + using _Tp = __atomic_underlying_t<_Sto>; + auto __temp_expected = __atomic_small_to_32(*__expected); + auto const __ret = __atomic_compare_exchange_weak_dispatch( + &__a->__a_value, &__temp_expected, __atomic_small_to_32(__value), __success, __failure, _Sco{}); + auto const __actual = __atomic_small_from_32<_Tp>(__temp_expected); + constexpr auto __mask = static_cast((1u << (8 * sizeof(_Tp))) - 1); + if (!__ret) + { + if (0 == __atomic_memcmp(&__actual, __expected, sizeof(_Tp))) + { + __atomic_fetch_and_dispatch(&__a->__a_value, __mask, memory_order_relaxed, _Sco{}); + } + else + { + *__expected = __actual; + } + } + return __ret; +} + +template = 0> +_CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_strong_dispatch( + _Sto* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure, _Sco = {}) +{ + using _Tp = __atomic_underlying_t<_Sto>; + auto const __old = *__expected; + while (1) + { + if (__atomic_compare_exchange_weak_dispatch(__a, __expected, __value, __success, __failure, _Sco{})) + { + return true; + } + if (0 != __atomic_memcmp(&__old, __expected, sizeof(_Tp))) + { + return false; + } + } +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_fetch_add_dispatch(&__a->__a_value, __atomic_small_to_32(__delta), __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_fetch_sub_dispatch(&__a->__a_value, __atomic_small_to_32(__delta), __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_fetch_and_dispatch(&__a->__a_value, __atomic_small_to_32(__pattern), __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_fetch_or_dispatch(&__a->__a_value, __atomic_small_to_32(__pattern), __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_fetch_xor_dispatch(&__a->__a_value, __atomic_small_to_32(__pattern), __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_fetch_max_dispatch(&__a->__a_value, __atomic_small_to_32(__val), __order, _Sco{})); +} + +template = 0> +_CCCL_HOST_DEVICE inline auto __atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) + -> __atomic_underlying_t<_Sto> +{ + using _Tp = __atomic_underlying_t<_Sto>; + return __atomic_small_from_32<_Tp>( + __atomic_fetch_min_dispatch(&__a->__a_value, __atomic_small_to_32(__val), __order, _Sco{})); +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_TYPES_SMALL_H diff --git a/libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h b/libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h new file mode 100644 index 0000000000..29130ee244 --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMIC_WAIT_NOTIFY_WAIT_H +#define _LIBCUDACXX___ATOMIC_WAIT_NOTIFY_WAIT_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +extern "C" _CCCL_DEVICE void __atomic_try_wait_unsupported_before_SM_70__(); + +template +_LIBCUDACXX_INLINE_VISIBILITY void +__atomic_try_wait_slow(_Tp const volatile* __a, __atomic_underlying_remove_cv_t<_Tp> __val, memory_order __order, _Sco) +{ + NV_DISPATCH_TARGET(NV_PROVIDES_SM_70, __atomic_try_wait_slow_fallback(__a, __val, __order, _Sco{}); + , NV_IS_HOST, __atomic_try_wait_slow_fallback(__a, __val, __order, _Sco{}); + , NV_ANY_TARGET, __atomic_try_wait_unsupported_before_SM_70__();); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void __atomic_notify_one(_Tp const volatile*, _Sco) +{ + NV_DISPATCH_TARGET(NV_PROVIDES_SM_70, , NV_IS_HOST, , NV_ANY_TARGET, __atomic_try_wait_unsupported_before_SM_70__();); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void __atomic_notify_all(_Tp const volatile*, _Sco) +{ + NV_DISPATCH_TARGET(NV_PROVIDES_SM_70, , NV_IS_HOST, , NV_ANY_TARGET, __atomic_try_wait_unsupported_before_SM_70__();); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY bool __nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) +{ +#if defined(_CCCL_CUDA_COMPILER) + return __lhs == __rhs; +#else + return memcmp(&__lhs, &__rhs, sizeof(_Tp)) == 0; +#endif +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void __atomic_wait( + _Tp const volatile* __a, __atomic_underlying_remove_cv_t<_Tp> const __val, memory_order __order, _Sco = {}) +{ + for (int __i = 0; __i < _LIBCUDACXX_POLLING_COUNT; ++__i) + { + if (!__nonatomic_compare_equal(__atomic_load_dispatch(__a, __order, _Sco{}), __val)) + { + return; + } + if (__i < 12) + { + __libcpp_thread_yield_processor(); + } + else + { + __libcpp_thread_yield(); + } + } + while (__nonatomic_compare_equal(__atomic_load_dispatch(__a, __order, _Sco{}), __val)) + { + __atomic_try_wait_slow(__a, __val, __order, _Sco{}); + } +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_WAIT_NOTIFY_WAIT_H diff --git a/libcudacxx/include/cuda/std/__atomic/wait/polling.h b/libcudacxx/include/cuda/std/__atomic/wait/polling.h new file mode 100644 index 0000000000..8fe5f24b6d --- /dev/null +++ b/libcudacxx/include/cuda/std/__atomic/wait/polling.h @@ -0,0 +1,61 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___ATOMIC_WAIT_POLLING_H +#define _LIBCUDACXX___ATOMIC_WAIT_POLLING_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include +#include + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +template +struct __atomic_poll_tester +{ + using __underlying_t = __atomic_underlying_remove_cv_t<_Tp>; + + _Tp const volatile* __atom; + __underlying_t __val; + memory_order __order; + + _CCCL_HOST_DEVICE __atomic_poll_tester(_Tp const volatile* __a, __underlying_t __v, memory_order __o) + : __atom(__a) + , __val(__v) + , __order(__o) + {} + + _CCCL_HOST_DEVICE bool operator()() const + { + return !(__atomic_load_dispatch(__atom, __order, _Sco{}) == __val); + } +}; + +template +_CCCL_HOST_DEVICE void __atomic_try_wait_slow_fallback( + _Tp const volatile* __a, __atomic_underlying_remove_cv_t<_Tp> __val, memory_order __order, _Sco) +{ + __libcpp_thread_poll_with_backoff(__atomic_poll_tester<_Tp, _Sco>(__a, __val, __order)); +} + +_LIBCUDACXX_END_NAMESPACE_STD + +#endif // _LIBCUDACXX___ATOMIC_WAIT_POLLING_H diff --git a/libcudacxx/include/cuda/std/__cuda/atomic.h b/libcudacxx/include/cuda/std/__cuda/atomic.h index c75d0c54da..d45a12c155 100644 --- a/libcudacxx/include/cuda/std/__cuda/atomic.h +++ b/libcudacxx/include/cuda/std/__cuda/atomic.h @@ -21,231 +21,90 @@ # pragma system_header #endif // no system header -_LIBCUDACXX_BEGIN_NAMESPACE_CUDA - -using std::__detail::thread_scope; -using std::__detail::thread_scope_block; -using std::__detail::thread_scope_device; -using std::__detail::thread_scope_system; -using std::__detail::thread_scope_thread; - -namespace __detail -{ -using std::__detail::__thread_scope_block_tag; -using std::__detail::__thread_scope_device_tag; -using std::__detail::__thread_scope_system_tag; -} // namespace __detail - -using memory_order = std::memory_order; +#include -constexpr memory_order memory_order_relaxed = std::memory_order_relaxed; -constexpr memory_order memory_order_consume = std::memory_order_consume; -constexpr memory_order memory_order_acquire = std::memory_order_acquire; -constexpr memory_order memory_order_release = std::memory_order_release; -constexpr memory_order memory_order_acq_rel = std::memory_order_acq_rel; -constexpr memory_order memory_order_seq_cst = std::memory_order_seq_cst; +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA // atomic template -struct atomic : public std::__atomic_base<_Tp, _Sco> +struct atomic : public _CUDA_VSTD::__atomic_impl<_Tp, _Sco> { - typedef std::__atomic_base<_Tp, _Sco> __base; + using value_type = _Tp; constexpr atomic() noexcept = default; - _CCCL_HOST_DEVICE constexpr atomic(_Tp __d) noexcept - : __base(__d) - {} - - _CCCL_HOST_DEVICE _Tp operator=(_Tp __d) volatile noexcept - { - __base::store(__d); - return __d; - } - _CCCL_HOST_DEVICE _Tp operator=(_Tp __d) noexcept - { - __base::store(__d); - return __d; - } - - _CCCL_HOST_DEVICE _Tp fetch_max(const _Tp& __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return std::__detail::__cxx_atomic_fetch_max(&this->__a_, __op, __m); - } - _CCCL_HOST_DEVICE _Tp fetch_min(const _Tp& __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return std::__detail::__cxx_atomic_fetch_min(&this->__a_, __op, __m); - } -}; - -// atomic - -template -struct atomic<_Tp*, _Sco> : public std::__atomic_base<_Tp*, _Sco> -{ - typedef std::__atomic_base<_Tp*, _Sco> __base; - - constexpr atomic() noexcept = default; - _CCCL_HOST_DEVICE constexpr atomic(_Tp* __d) noexcept - : __base(__d) + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp __d) noexcept + : _CUDA_VSTD::__atomic_impl<_Tp, _Sco>(__d) {} - _CCCL_HOST_DEVICE _Tp* operator=(_Tp* __d) volatile noexcept + atomic(const atomic&) = delete; + atomic& operator=(const atomic&) = delete; + atomic& operator=(const atomic&) volatile = delete; + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) volatile noexcept { - __base::store(__d); + this->store(__d); return __d; } - _CCCL_HOST_DEVICE _Tp* operator=(_Tp* __d) noexcept + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) noexcept { - __base::store(__d); + this->store(__d); return __d; } - _CCCL_HOST_DEVICE _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _CCCL_HOST_DEVICE _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _CCCL_HOST_DEVICE _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_max(const _Tp& __op, memory_order __m = memory_order_seq_cst) noexcept { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + return _CUDA_VSTD::__atomic_fetch_max_dispatch(&this->__a, __op, __m, _CUDA_VSTD::__scope_to_tag<_Sco>{}); } - _CCCL_HOST_DEVICE _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_max(const _Tp& __op, memory_order __m = memory_order_seq_cst) volatile noexcept { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + return _CUDA_VSTD::__atomic_fetch_max_dispatch(&this->__a, __op, __m, _CUDA_VSTD::__scope_to_tag<_Sco>{}); } - _CCCL_HOST_DEVICE _Tp* operator++(int) volatile noexcept - { - return fetch_add(1); - } - _CCCL_HOST_DEVICE _Tp* operator++(int) noexcept - { - return fetch_add(1); - } - _CCCL_HOST_DEVICE _Tp* operator--(int) volatile noexcept - { - return fetch_sub(1); - } - _CCCL_HOST_DEVICE _Tp* operator--(int) noexcept - { - return fetch_sub(1); - } - _CCCL_HOST_DEVICE _Tp* operator++() volatile noexcept - { - return fetch_add(1) + 1; - } - _CCCL_HOST_DEVICE _Tp* operator++() noexcept - { - return fetch_add(1) + 1; - } - _CCCL_HOST_DEVICE _Tp* operator--() volatile noexcept - { - return fetch_sub(1) - 1; - } - _CCCL_HOST_DEVICE _Tp* operator--() noexcept - { - return fetch_sub(1) - 1; - } - _CCCL_HOST_DEVICE _Tp* operator+=(ptrdiff_t __op) volatile noexcept - { - return fetch_add(__op) + __op; - } - _CCCL_HOST_DEVICE _Tp* operator+=(ptrdiff_t __op) noexcept - { - return fetch_add(__op) + __op; - } - _CCCL_HOST_DEVICE _Tp* operator-=(ptrdiff_t __op) volatile noexcept + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_min(const _Tp& __op, memory_order __m = memory_order_seq_cst) noexcept { - return fetch_sub(__op) - __op; + return _CUDA_VSTD::__atomic_fetch_min_dispatch(&this->__a, __op, __m, _CUDA_VSTD::__scope_to_tag<_Sco>{}); } - _CCCL_HOST_DEVICE _Tp* operator-=(ptrdiff_t __op) noexcept + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_min(const _Tp& __op, memory_order __m = memory_order_seq_cst) volatile noexcept { - return fetch_sub(__op) - __op; + return _CUDA_VSTD::__atomic_fetch_min_dispatch(&this->__a, __op, __m, _CUDA_VSTD::__scope_to_tag<_Sco>{}); } }; // atomic_ref template -struct atomic_ref : public std::__atomic_base_ref<_Tp, _Sco> +struct atomic_ref : public _CUDA_VSTD::__atomic_ref_impl<_Tp, _Sco> { - typedef std::__atomic_base_ref<_Tp, _Sco> __base; - - _CCCL_HOST_DEVICE constexpr atomic_ref(_Tp& __d) noexcept - : __base(__d) - {} + using value_type = _Tp; - _CCCL_HOST_DEVICE _Tp operator=(_Tp __d) const noexcept - { - __base::store(__d); - return __d; - } + static constexpr size_t required_alignment = sizeof(_Tp); - _CCCL_HOST_DEVICE _Tp fetch_max(const _Tp& __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return std::__detail::__cxx_atomic_fetch_max(&this->__a_, __op, __m); - } + static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; - _CCCL_HOST_DEVICE _Tp fetch_min(const _Tp& __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return std::__detail::__cxx_atomic_fetch_min(&this->__a_, __op, __m); - } -}; - -// atomic_ref - -template -struct atomic_ref<_Tp*, _Sco> : public std::__atomic_base_ref<_Tp*, _Sco> -{ - typedef std::__atomic_base_ref<_Tp*, _Sco> __base; - - _CCCL_HOST_DEVICE constexpr atomic_ref(_Tp*& __d) noexcept - : __base(__d) + _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp& __ref) + : _CUDA_VSTD::__atomic_ref_impl<_Tp, _Sco>(__ref) {} - _CCCL_HOST_DEVICE _Tp* operator=(_Tp* __d) const noexcept + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __v) const noexcept { - __base::store(__d); - return __d; + this->store(__v); + return __v; } - _CCCL_HOST_DEVICE _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _CCCL_HOST_DEVICE _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } + atomic_ref(const atomic_ref&) noexcept = default; + atomic_ref& operator=(const atomic_ref&) = delete; + atomic_ref& operator=(const atomic_ref&) const = delete; - _CCCL_HOST_DEVICE _Tp* operator++(int) const noexcept + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_max(const _Tp& __op, memory_order __m = memory_order_seq_cst) const noexcept { - return fetch_add(1); + return _CUDA_VSTD::__atomic_fetch_max_dispatch(&this->__a, __op, __m, _CUDA_VSTD::__scope_to_tag<_Sco>{}); } - _CCCL_HOST_DEVICE _Tp* operator--(int) const noexcept - { - return fetch_sub(1); - } - _CCCL_HOST_DEVICE _Tp* operator++() const noexcept - { - return fetch_add(1) + 1; - } - _CCCL_HOST_DEVICE _Tp* operator--() const noexcept - { - return fetch_sub(1) - 1; - } - _CCCL_HOST_DEVICE _Tp* operator+=(ptrdiff_t __op) const noexcept - { - return fetch_add(__op) + __op; - } - _CCCL_HOST_DEVICE _Tp* operator-=(ptrdiff_t __op) const noexcept + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_min(const _Tp& __op, memory_order __m = memory_order_seq_cst) const noexcept { - return fetch_sub(__op) - __op; + return _CUDA_VSTD::__atomic_fetch_min_dispatch(&this->__a, __op, __m, _CUDA_VSTD::__scope_to_tag<_Sco>{}); } }; @@ -256,25 +115,25 @@ atomic_thread_fence(memory_order __m, thread_scope _Scope = thread_scope::thread NV_IS_DEVICE, (switch (_Scope) { case thread_scope::thread_scope_system: - std::__detail::__atomic_thread_fence_cuda((int) __m, __detail::__thread_scope_system_tag()); + _CUDA_VSTD::__atomic_thread_fence_cuda((int) __m, __thread_scope_system_tag{}); break; case thread_scope::thread_scope_device: - std::__detail::__atomic_thread_fence_cuda((int) __m, __detail::__thread_scope_device_tag()); + _CUDA_VSTD::__atomic_thread_fence_cuda((int) __m, __thread_scope_device_tag{}); break; case thread_scope::thread_scope_block: - std::__detail::__atomic_thread_fence_cuda((int) __m, __detail::__thread_scope_block_tag()); + _CUDA_VSTD::__atomic_thread_fence_cuda((int) __m, __thread_scope_block_tag{}); break; // Atomics scoped to themselves do not require fencing case thread_scope::thread_scope_thread: break; }), NV_IS_HOST, - ((void) _Scope; std::atomic_thread_fence(__m);)) + ((void) _Scope; _CUDA_VSTD::atomic_thread_fence(__m);)) } inline _CCCL_HOST_DEVICE void atomic_signal_fence(memory_order __m) { - std::atomic_signal_fence(__m); + _CUDA_VSTD::atomic_signal_fence(__m); } _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/__cuda/atomic_prelude.h b/libcudacxx/include/cuda/std/__cuda/atomic_prelude.h deleted file mode 100644 index 4e43fb4481..0000000000 --- a/libcudacxx/include/cuda/std/__cuda/atomic_prelude.h +++ /dev/null @@ -1,64 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX___CUDA_ATOMIC_PRELUDE_H -#define _LIBCUDACXX___CUDA_ATOMIC_PRELUDE_H - -#include - -#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) -# pragma GCC system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) -# pragma clang system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) -# pragma system_header -#endif // no system header - -#ifndef _CCCL_COMPILER_NVRTC -# include // TRANSITION: Fix transitive includes - -# include -static_assert(ATOMIC_BOOL_LOCK_FREE == 2, ""); -static_assert(ATOMIC_CHAR_LOCK_FREE == 2, ""); -static_assert(ATOMIC_CHAR16_T_LOCK_FREE == 2, ""); -static_assert(ATOMIC_CHAR32_T_LOCK_FREE == 2, ""); -static_assert(ATOMIC_WCHAR_T_LOCK_FREE == 2, ""); -static_assert(ATOMIC_SHORT_LOCK_FREE == 2, ""); -static_assert(ATOMIC_INT_LOCK_FREE == 2, ""); -static_assert(ATOMIC_LONG_LOCK_FREE == 2, ""); -static_assert(ATOMIC_LLONG_LOCK_FREE == 2, ""); -static_assert(ATOMIC_POINTER_LOCK_FREE == 2, ""); -# undef ATOMIC_BOOL_LOCK_FREE -# undef ATOMIC_BOOL_LOCK_FREE -# undef ATOMIC_CHAR_LOCK_FREE -# undef ATOMIC_CHAR16_T_LOCK_FREE -# undef ATOMIC_CHAR32_T_LOCK_FREE -# undef ATOMIC_WCHAR_T_LOCK_FREE -# undef ATOMIC_SHORT_LOCK_FREE -# undef ATOMIC_INT_LOCK_FREE -# undef ATOMIC_LONG_LOCK_FREE -# undef ATOMIC_LLONG_LOCK_FREE -# undef ATOMIC_POINTER_LOCK_FREE -# undef ATOMIC_FLAG_INIT -# undef ATOMIC_VAR_INIT -#endif // _CCCL_COMPILER_NVRTC - -// pre-define lock free query for heterogeneous compatibility -#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE -# define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) (__x <= 8) -#endif - -#ifndef _CCCL_COMPILER_NVRTC -# include - -# include -#endif // _CCCL_COMPILER_NVRTC - -#endif // _LIBCUDACXX___CUDA_ATOMIC_PRELUDE_H diff --git a/libcudacxx/include/cuda/std/__cuda/barrier.h b/libcudacxx/include/cuda/std/__cuda/barrier.h index b116540607..8533501ae1 100644 --- a/libcudacxx/include/cuda/std/__cuda/barrier.h +++ b/libcudacxx/include/cuda/std/__cuda/barrier.h @@ -25,6 +25,7 @@ # pragma system_header #endif // no system header +#include #include // _CUDA_VSTD::__void_t #include // _LIBCUDACXX_UNREACHABLE @@ -124,7 +125,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA template <> class barrier : public __block_scope_barrier_base { - using __barrier_base = _CUDA_VSTD::__barrier_base<_CUDA_VSTD::__empty_completion, (int) thread_scope_block>; + using __barrier_base = _CUDA_VSTD::__barrier_base<_CUDA_VSTD::__empty_completion, thread_scope_block>; __barrier_base __barrier; _CCCL_DEVICE friend inline _CUDA_VSTD::uint64_t* diff --git a/libcudacxx/include/cuda/std/atomic b/libcudacxx/include/cuda/std/atomic index 8b7e696e93..8ae9efb420 100644 --- a/libcudacxx/include/cuda/std/atomic +++ b/libcudacxx/include/cuda/std/atomic @@ -1,18 +1,32 @@ -//===----------------------------------------------------------------------===// +// -*- C++ -*- +//===--------------------------- atomic -----------------------------------===// // -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. // //===----------------------------------------------------------------------===// #ifndef _CUDA_STD_ATOMIC #define _CUDA_STD_ATOMIC +// clang-format off + #include +#ifdef _LIBCUDACXX_HAS_NO_THREADS +# error is not supported on this single threaded system +#endif +#ifdef _LIBCUDACXX_HAS_NO_ATOMIC_HEADER +# error is not implemented +#endif +#ifdef _LIBCUDACXX_UNSUPPORTED_THREAD_API +# error " is not supported on this system" +#endif +#ifdef kill_dependency +# error C++ standard library is incompatible with +#endif + #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) @@ -21,9 +35,803 @@ # pragma system_header #endif // no system header +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// clang-format on + _CCCL_PUSH_MACROS -#include +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp kill_dependency(_Tp __y) noexcept +{ + return __y; +} + +// atomic +template +struct atomic : public __atomic_impl<_Tp> +{ + using value_type = _Tp; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic() noexcept + : __atomic_impl<_Tp>() + {} + + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp __d) noexcept + : __atomic_impl<_Tp>(__d) + {} + + atomic(const atomic&) = delete; + atomic& operator=(const atomic&) = delete; + atomic& operator=(const atomic&) volatile = delete; + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) volatile noexcept + { + this->store(__d); + return __d; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) noexcept + { + this->store(__d); + return __d; + } +}; + +// atomic_ref +template +struct atomic_ref : public __atomic_ref_impl<_Tp> +{ + using value_type = _Tp; + + static constexpr size_t required_alignment = sizeof(_Tp); + + static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; + + _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp& __ref) + : __atomic_ref_impl<_Tp>(__ref) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __v) const noexcept + { + this->store(__v); + return __v; + } + + atomic_ref(const atomic_ref&) noexcept = default; + atomic_ref& operator=(const atomic_ref&) = delete; + atomic_ref& operator=(const atomic_ref&) const = delete; +}; + +// atomic_is_lock_free + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) noexcept +{ + return __o->is_lock_free(); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const atomic<_Tp>* __o) noexcept +{ + return __o->is_lock_free(); +} + +// atomic_init + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(volatile atomic<_Tp>* __o, _Tp __d) noexcept +{ + __atomic_init_dispatch(&__o->__a, __d); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(atomic<_Tp>* __o, _Tp __d) noexcept +{ + __atomic_init_dispatch(&__o->__a, __d); +} + +// atomic_store + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(volatile atomic<_Tp>* __o, _Tp __d) noexcept +{ + __o->store(__d); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(atomic<_Tp>* __o, _Tp __d) noexcept +{ + __o->store(__d); +} + +// atomic_store_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) +{ + __o->store(__d, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) +{ + __o->store(__d, __m); +} + +// atomic_load + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const volatile atomic<_Tp>* __o) noexcept +{ + return __o->load(); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const atomic<_Tp>* __o) noexcept +{ + return __o->load(); +} + +// atomic_load_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) +{ + return __o->load(__m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) +{ + return __o->load(__m); +} + +// atomic_exchange + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) noexcept +{ + return __o->exchange(__d); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(atomic<_Tp>* __o, _Tp __d) noexcept +{ + return __o->exchange(__d); +} + +// atomic_exchange_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +{ + return __o->exchange(__d, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +{ + return __o->exchange(__d, __m); +} + +// atomic_compare_exchange_weak + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +{ + return __o->compare_exchange_weak(*__e, __d); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +{ + return __o->compare_exchange_weak(*__e, __d); +} + +// atomic_compare_exchange_strong + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +{ + return __o->compare_exchange_strong(*__e, __d); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +{ + return __o->compare_exchange_strong(*__e, __d); +} + +// atomic_compare_exchange_weak_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak_explicit( + volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) +{ + return __o->compare_exchange_weak(*__e, __d, __s, __f); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY bool +atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) +{ + return __o->compare_exchange_weak(*__e, __d, __s, __f); +} + +// atomic_compare_exchange_strong_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( + volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) +{ + return __o->compare_exchange_strong(*__e, __d, __s, __f); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( + atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) +{ + return __o->compare_exchange_strong(*__e, __d, __s, __f); +} + +// atomic_wait + +template +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept +{ + return __o->wait(__v); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept +{ + return __o->wait(__v); +} + +// atomic_wait_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait_explicit(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) +{ + return __o->wait(__v, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) +{ + return __o->wait(__v, __m); +} + +// atomic_notify_one + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(volatile atomic<_Tp>* __o) noexcept +{ + __o->notify_one(); +} +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(atomic<_Tp>* __o) noexcept +{ + __o->notify_one(); +} + +// atomic_notify_one + +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(volatile atomic<_Tp>* __o) noexcept +{ + __o->notify_all(); +} +template +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(atomic<_Tp>* __o) noexcept +{ + __o->notify_all(); +} + +// atomic_fetch_add + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_add(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_add(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +{ + return __o->fetch_add(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +{ + return __o->fetch_add(__op); +} + +// atomic_fetch_add_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_add(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_add(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept +{ + return __o->fetch_add(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_add_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept +{ + return __o->fetch_add(__op, __m); +} + +// atomic_fetch_sub + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_sub(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_sub(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +{ + return __o->fetch_sub(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +{ + return __o->fetch_sub(__op); +} + +// atomic_fetch_sub_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_sub(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_sub(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept +{ + return __o->fetch_sub(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_sub_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept +{ + return __o->fetch_sub(__op, __m); +} + +// atomic_fetch_and + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_and(volatile atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_and(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_and(atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_and(__op); +} + +// atomic_fetch_and_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_and(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_and_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_and(__op, __m); +} + +// atomic_fetch_or + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_or(volatile atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_or(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_or(atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_or(__op); +} + +// atomic_fetch_or_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_or(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_or_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_or(__op, __m); +} + +// atomic_fetch_xor + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_xor(volatile atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_xor(__op); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_xor(atomic<_Tp>* __o, _Tp __op) noexcept +{ + return __o->fetch_xor(__op); +} + +// atomic_fetch_xor_explicit + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_xor(__op, __m); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> +atomic_fetch_xor_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept +{ + return __o->fetch_xor(__op, __m); +} + +// flag type and operations + +struct atomic_flag +{ + __atomic_storage_t<_LIBCUDACXX_ATOMIC_FLAG_TYPE> __a; + + _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __atomic_load_dispatch(&__a, __m, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const noexcept + { + return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __atomic_load_dispatch(&__a, __m, __thread_scope_system_tag{}); + } + + _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __atomic_exchange_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept + { + return __atomic_exchange_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) volatile noexcept + { + __atomic_store_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) noexcept + { + __atomic_store_dispatch(&__a, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m, __thread_scope_system_tag{}); + } + + _LIBCUDACXX_INLINE_VISIBILITY void + wait(_LIBCUDACXX_ATOMIC_FLAG_TYPE __v, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + __atomic_wait(&__a, __v, __m, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY void + wait(_LIBCUDACXX_ATOMIC_FLAG_TYPE __v, memory_order __m = memory_order_seq_cst) const noexcept + { + __atomic_wait(&__a, __v, __m, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept + { + __atomic_notify_one(&__a, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept + { + __atomic_notify_one(&__a, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept + { + __atomic_notify_all(&__a, __thread_scope_system_tag{}); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept + { + __atomic_notify_all(&__a, __thread_scope_system_tag{}); + } + + atomic_flag() noexcept = default; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic_flag(bool __b) noexcept + : __a(__b) + {} // EXTENSION + + atomic_flag(const atomic_flag&) = delete; + atomic_flag& operator=(const atomic_flag&) = delete; + atomic_flag& operator=(const atomic_flag&) volatile = delete; +}; + +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const volatile atomic_flag* __o) noexcept +{ + return __o->test(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const atomic_flag* __o) noexcept +{ + return __o->test(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY bool +atomic_flag_test_explicit(const volatile atomic_flag* __o, memory_order __m) noexcept +{ + return __o->test(__m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) noexcept +{ + return __o->test(__m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(volatile atomic_flag* __o) noexcept +{ + return __o->test_and_set(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(atomic_flag* __o) noexcept +{ + return __o->test_and_set(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY bool +atomic_flag_test_and_set_explicit(volatile atomic_flag* __o, memory_order __m) noexcept +{ + return __o->test_and_set(__m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) noexcept +{ + return __o->test_and_set(__m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(volatile atomic_flag* __o) noexcept +{ + __o->clear(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(atomic_flag* __o) noexcept +{ + __o->clear(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void +atomic_flag_clear_explicit(volatile atomic_flag* __o, memory_order __m) noexcept +{ + __o->clear(__m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) noexcept +{ + __o->clear(__m); +} + +#if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700 + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) noexcept +{ + __o->wait(__v); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const atomic_flag* __o, bool __v) noexcept +{ + __o->wait(__v); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void +atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) noexcept +{ + __o->wait(__v, __m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void +atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) noexcept +{ + __o->wait(__v, __m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(volatile atomic_flag* __o) noexcept +{ + __o->notify_one(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(atomic_flag* __o) noexcept +{ + __o->notify_one(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(volatile atomic_flag* __o) noexcept +{ + __o->notify_all(); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(atomic_flag* __o) noexcept +{ + __o->notify_all(); +} + +#endif + +// fences + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_thread_fence(memory_order __m) noexcept +{ + __atomic_thread_fence_dispatch(__m); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_signal_fence(memory_order __m) noexcept +{ + __atomic_signal_fence_dispatch(__m); +} + +// Atomics for standard typedef types + +typedef atomic atomic_bool; +typedef atomic atomic_char; +typedef atomic atomic_schar; +typedef atomic atomic_uchar; +typedef atomic atomic_short; +typedef atomic atomic_ushort; +typedef atomic atomic_int; +typedef atomic atomic_uint; +typedef atomic atomic_long; +typedef atomic atomic_ulong; +typedef atomic atomic_llong; +typedef atomic atomic_ullong; +typedef atomic atomic_char16_t; +typedef atomic atomic_char32_t; +typedef atomic atomic_wchar_t; + +typedef atomic atomic_int_least8_t; +typedef atomic atomic_uint_least8_t; +typedef atomic atomic_int_least16_t; +typedef atomic atomic_uint_least16_t; +typedef atomic atomic_int_least32_t; +typedef atomic atomic_uint_least32_t; +typedef atomic atomic_int_least64_t; +typedef atomic atomic_uint_least64_t; + +typedef atomic atomic_int_fast8_t; +typedef atomic atomic_uint_fast8_t; +typedef atomic atomic_int_fast16_t; +typedef atomic atomic_uint_fast16_t; +typedef atomic atomic_int_fast32_t; +typedef atomic atomic_uint_fast32_t; +typedef atomic atomic_int_fast64_t; +typedef atomic atomic_uint_fast64_t; + +typedef atomic atomic_int8_t; +typedef atomic atomic_uint8_t; +typedef atomic atomic_int16_t; +typedef atomic atomic_uint16_t; +typedef atomic atomic_int32_t; +typedef atomic atomic_uint32_t; +typedef atomic atomic_int64_t; +typedef atomic atomic_uint64_t; + +typedef atomic atomic_intptr_t; +typedef atomic atomic_uintptr_t; +typedef atomic atomic_size_t; +typedef atomic atomic_ptrdiff_t; +typedef atomic atomic_intmax_t; +typedef atomic atomic_uintmax_t; + +static_assert(LIBCUDACXX_ATOMIC_INT_LOCK_FREE, "This library assumes atomic is lock-free."); + +typedef atomic atomic_signed_lock_free; +typedef atomic atomic_unsigned_lock_free; + +#define LIBCUDACXX_ATOMIC_FLAG_INIT \ + { \ + false \ + } +#define LIBCUDACXX_ATOMIC_VAR_INIT(__v) \ + { \ + __v \ + } + +_LIBCUDACXX_END_NAMESPACE_STD _CCCL_POP_MACROS diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support b/libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support index d6889e1822..9f5fbe9255 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support @@ -20,7 +20,6 @@ # pragma system_header #endif // no system header -#include #include #include #include diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/atomic b/libcudacxx/include/cuda/std/detail/libcxx/include/atomic deleted file mode 100644 index 5656afa683..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/atomic +++ /dev/null @@ -1,2999 +0,0 @@ -// -*- C++ -*- -//===--------------------------- atomic -----------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX_ATOMIC -#define _LIBCUDACXX_ATOMIC - -/* - atomic synopsis - -namespace std -{ - -// feature test macro - -#define __cpp_lib_atomic_is_always_lock_free // as specified by SG10 - - // order and consistency - - enum memory_order: unspecified // enum class in C++20 - { - relaxed, - consume, // load-consume - acquire, // load-acquire - release, // store-release - acq_rel, // store-release load-acquire - seq_cst // store-release load-acquire - }; - - inline constexpr auto memory_order_relaxed = memory_order::relaxed; - inline constexpr auto memory_order_consume = memory_order::consume; - inline constexpr auto memory_order_acquire = memory_order::acquire; - inline constexpr auto memory_order_release = memory_order::release; - inline constexpr auto memory_order_acq_rel = memory_order::acq_rel; - inline constexpr auto memory_order_seq_cst = memory_order::seq_cst; - -template T kill_dependency(T y) noexcept; - -// lock-free property - -#define ATOMIC_BOOL_LOCK_FREE unspecified -#define ATOMIC_CHAR_LOCK_FREE unspecified -#define ATOMIC_CHAR16_T_LOCK_FREE unspecified -#define ATOMIC_CHAR32_T_LOCK_FREE unspecified -#define ATOMIC_WCHAR_T_LOCK_FREE unspecified -#define ATOMIC_SHORT_LOCK_FREE unspecified -#define ATOMIC_INT_LOCK_FREE unspecified -#define ATOMIC_LONG_LOCK_FREE unspecified -#define ATOMIC_LLONG_LOCK_FREE unspecified -#define ATOMIC_POINTER_LOCK_FREE unspecified - -// flag type and operations - -typedef struct atomic_flag -{ - bool test_and_set(memory_order m = memory_order_seq_cst) volatile noexcept; - bool test_and_set(memory_order m = memory_order_seq_cst) noexcept; - void clear(memory_order m = memory_order_seq_cst) volatile noexcept; - void clear(memory_order m = memory_order_seq_cst) noexcept; - atomic_flag() noexcept = default; - atomic_flag(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) volatile = delete; -} atomic_flag; - -bool - atomic_flag_test_and_set(volatile atomic_flag* obj) noexcept; - -bool - atomic_flag_test_and_set(atomic_flag* obj) noexcept; - -bool - atomic_flag_test_and_set_explicit(volatile atomic_flag* obj, - memory_order m) noexcept; - -bool - atomic_flag_test_and_set_explicit(atomic_flag* obj, memory_order m) noexcept; - -void - atomic_flag_clear(volatile atomic_flag* obj) noexcept; - -void - atomic_flag_clear(atomic_flag* obj) noexcept; - -void - atomic_flag_clear_explicit(volatile atomic_flag* obj, memory_order m) noexcept; - -void - atomic_flag_clear_explicit(atomic_flag* obj, memory_order m) noexcept; - -#define ATOMIC_FLAG_INIT see below -#define ATOMIC_VAR_INIT(value) see below - -template -struct atomic -{ - static constexpr bool is_always_lock_free; - bool is_lock_free() const volatile noexcept; - bool is_lock_free() const noexcept; - void store(T desr, memory_order m = memory_order_seq_cst) volatile noexcept; - void store(T desr, memory_order m = memory_order_seq_cst) noexcept; - T load(memory_order m = memory_order_seq_cst) const volatile noexcept; - T load(memory_order m = memory_order_seq_cst) const noexcept; - operator T() const volatile noexcept; - operator T() const noexcept; - T exchange(T desr, memory_order m = memory_order_seq_cst) volatile noexcept; - T exchange(T desr, memory_order m = memory_order_seq_cst) noexcept; - bool compare_exchange_weak(T& expc, T desr, - memory_order s, memory_order f) volatile noexcept; - bool compare_exchange_weak(T& expc, T desr, memory_order s, memory_order f) noexcept; - bool compare_exchange_strong(T& expc, T desr, - memory_order s, memory_order f) volatile noexcept; - bool compare_exchange_strong(T& expc, T desr, - memory_order s, memory_order f) noexcept; - bool compare_exchange_weak(T& expc, T desr, - memory_order m = memory_order_seq_cst) volatile noexcept; - bool compare_exchange_weak(T& expc, T desr, - memory_order m = memory_order_seq_cst) noexcept; - bool compare_exchange_strong(T& expc, T desr, - memory_order m = memory_order_seq_cst) volatile noexcept; - bool compare_exchange_strong(T& expc, T desr, - memory_order m = memory_order_seq_cst) noexcept; - - atomic() noexcept = default; - constexpr atomic(T desr) noexcept; - atomic(const atomic&) = delete; - atomic& operator=(const atomic&) = delete; - atomic& operator=(const atomic&) volatile = delete; - T operator=(T) volatile noexcept; - T operator=(T) noexcept; -}; - -template <> -struct atomic -{ - static constexpr bool is_always_lock_free; - bool is_lock_free() const volatile noexcept; - bool is_lock_free() const noexcept; - void store(integral desr, memory_order m = memory_order_seq_cst) volatile noexcept; - void store(integral desr, memory_order m = memory_order_seq_cst) noexcept; - integral load(memory_order m = memory_order_seq_cst) const volatile noexcept; - integral load(memory_order m = memory_order_seq_cst) const noexcept; - operator integral() const volatile noexcept; - operator integral() const noexcept; - integral exchange(integral desr, - memory_order m = memory_order_seq_cst) volatile noexcept; - integral exchange(integral desr, memory_order m = memory_order_seq_cst) noexcept; - bool compare_exchange_weak(integral& expc, integral desr, - memory_order s, memory_order f) volatile noexcept; - bool compare_exchange_weak(integral& expc, integral desr, - memory_order s, memory_order f) noexcept; - bool compare_exchange_strong(integral& expc, integral desr, - memory_order s, memory_order f) volatile noexcept; - bool compare_exchange_strong(integral& expc, integral desr, - memory_order s, memory_order f) noexcept; - bool compare_exchange_weak(integral& expc, integral desr, - memory_order m = memory_order_seq_cst) volatile noexcept; - bool compare_exchange_weak(integral& expc, integral desr, - memory_order m = memory_order_seq_cst) noexcept; - bool compare_exchange_strong(integral& expc, integral desr, - memory_order m = memory_order_seq_cst) volatile noexcept; - bool compare_exchange_strong(integral& expc, integral desr, - memory_order m = memory_order_seq_cst) noexcept; - - integral - fetch_add(integral op, memory_order m = memory_order_seq_cst) volatile noexcept; - integral fetch_add(integral op, memory_order m = memory_order_seq_cst) noexcept; - integral - fetch_sub(integral op, memory_order m = memory_order_seq_cst) volatile noexcept; - integral fetch_sub(integral op, memory_order m = memory_order_seq_cst) noexcept; - integral - fetch_and(integral op, memory_order m = memory_order_seq_cst) volatile noexcept; - integral fetch_and(integral op, memory_order m = memory_order_seq_cst) noexcept; - integral - fetch_or(integral op, memory_order m = memory_order_seq_cst) volatile noexcept; - integral fetch_or(integral op, memory_order m = memory_order_seq_cst) noexcept; - integral - fetch_xor(integral op, memory_order m = memory_order_seq_cst) volatile noexcept; - integral fetch_xor(integral op, memory_order m = memory_order_seq_cst) noexcept; - - atomic() noexcept = default; - constexpr atomic(integral desr) noexcept; - atomic(const atomic&) = delete; - atomic& operator=(const atomic&) = delete; - atomic& operator=(const atomic&) volatile = delete; - integral operator=(integral desr) volatile noexcept; - integral operator=(integral desr) noexcept; - - integral operator++(int) volatile noexcept; - integral operator++(int) noexcept; - integral operator--(int) volatile noexcept; - integral operator--(int) noexcept; - integral operator++() volatile noexcept; - integral operator++() noexcept; - integral operator--() volatile noexcept; - integral operator--() noexcept; - integral operator+=(integral op) volatile noexcept; - integral operator+=(integral op) noexcept; - integral operator-=(integral op) volatile noexcept; - integral operator-=(integral op) noexcept; - integral operator&=(integral op) volatile noexcept; - integral operator&=(integral op) noexcept; - integral operator|=(integral op) volatile noexcept; - integral operator|=(integral op) noexcept; - integral operator^=(integral op) volatile noexcept; - integral operator^=(integral op) noexcept; -}; - -template -struct atomic -{ - static constexpr bool is_always_lock_free; - bool is_lock_free() const volatile noexcept; - bool is_lock_free() const noexcept; - void store(T* desr, memory_order m = memory_order_seq_cst) volatile noexcept; - void store(T* desr, memory_order m = memory_order_seq_cst) noexcept; - T* load(memory_order m = memory_order_seq_cst) const volatile noexcept; - T* load(memory_order m = memory_order_seq_cst) const noexcept; - operator T*() const volatile noexcept; - operator T*() const noexcept; - T* exchange(T* desr, memory_order m = memory_order_seq_cst) volatile noexcept; - T* exchange(T* desr, memory_order m = memory_order_seq_cst) noexcept; - bool compare_exchange_weak(T*& expc, T* desr, - memory_order s, memory_order f) volatile noexcept; - bool compare_exchange_weak(T*& expc, T* desr, - memory_order s, memory_order f) noexcept; - bool compare_exchange_strong(T*& expc, T* desr, - memory_order s, memory_order f) volatile noexcept; - bool compare_exchange_strong(T*& expc, T* desr, - memory_order s, memory_order f) noexcept; - bool compare_exchange_weak(T*& expc, T* desr, - memory_order m = memory_order_seq_cst) volatile noexcept; - bool compare_exchange_weak(T*& expc, T* desr, - memory_order m = memory_order_seq_cst) noexcept; - bool compare_exchange_strong(T*& expc, T* desr, - memory_order m = memory_order_seq_cst) volatile noexcept; - bool compare_exchange_strong(T*& expc, T* desr, - memory_order m = memory_order_seq_cst) noexcept; - T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile noexcept; - T* fetch_add(ptrdiff_t op, memory_order m = memory_order_seq_cst) noexcept; - T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst) volatile noexcept; - T* fetch_sub(ptrdiff_t op, memory_order m = memory_order_seq_cst) noexcept; - - atomic() noexcept = default; - constexpr atomic(T* desr) noexcept; - atomic(const atomic&) = delete; - atomic& operator=(const atomic&) = delete; - atomic& operator=(const atomic&) volatile = delete; - - T* operator=(T*) volatile noexcept; - T* operator=(T*) noexcept; - T* operator++(int) volatile noexcept; - T* operator++(int) noexcept; - T* operator--(int) volatile noexcept; - T* operator--(int) noexcept; - T* operator++() volatile noexcept; - T* operator++() noexcept; - T* operator--() volatile noexcept; - T* operator--() noexcept; - T* operator+=(ptrdiff_t op) volatile noexcept; - T* operator+=(ptrdiff_t op) noexcept; - T* operator-=(ptrdiff_t op) volatile noexcept; - T* operator-=(ptrdiff_t op) noexcept; -}; - - -template - bool - atomic_is_lock_free(const volatile atomic* obj) noexcept; - -template - bool - atomic_is_lock_free(const atomic* obj) noexcept; - -template - void - atomic_init(volatile atomic* obj, T desr) noexcept; - -template - void - atomic_init(atomic* obj, T desr) noexcept; - -template - void - atomic_store(volatile atomic* obj, T desr) noexcept; - -template - void - atomic_store(atomic* obj, T desr) noexcept; - -template - void - atomic_store_explicit(volatile atomic* obj, T desr, memory_order m) noexcept; - -template - void - atomic_store_explicit(atomic* obj, T desr, memory_order m) noexcept; - -template - T - atomic_load(const volatile atomic* obj) noexcept; - -template - T - atomic_load(const atomic* obj) noexcept; - -template - T - atomic_load_explicit(const volatile atomic* obj, memory_order m) noexcept; - -template - T - atomic_load_explicit(const atomic* obj, memory_order m) noexcept; - -template - T - atomic_exchange(volatile atomic* obj, T desr) noexcept; - -template - T - atomic_exchange(atomic* obj, T desr) noexcept; - -template - T - atomic_exchange_explicit(volatile atomic* obj, T desr, memory_order m) noexcept; - -template - T - atomic_exchange_explicit(atomic* obj, T desr, memory_order m) noexcept; - -template - bool - atomic_compare_exchange_weak(volatile atomic* obj, T* expc, T desr) noexcept; - -template - bool - atomic_compare_exchange_weak(atomic* obj, T* expc, T desr) noexcept; - -template - bool - atomic_compare_exchange_strong(volatile atomic* obj, T* expc, T desr) noexcept; - -template - bool - atomic_compare_exchange_strong(atomic* obj, T* expc, T desr) noexcept; - -template - bool - atomic_compare_exchange_weak_explicit(volatile atomic* obj, T* expc, - T desr, - memory_order s, memory_order f) noexcept; - -template - bool - atomic_compare_exchange_weak_explicit(atomic* obj, T* expc, T desr, - memory_order s, memory_order f) noexcept; - -template - bool - atomic_compare_exchange_strong_explicit(volatile atomic* obj, - T* expc, T desr, - memory_order s, memory_order f) noexcept; - -template - bool - atomic_compare_exchange_strong_explicit(atomic* obj, T* expc, - T desr, - memory_order s, memory_order f) noexcept; - -template - Integral - atomic_fetch_add(volatile atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_add(atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_add_explicit(volatile atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_add_explicit(atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_sub(volatile atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_sub(atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_sub_explicit(volatile atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_sub_explicit(atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_and(volatile atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_and(atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_and_explicit(volatile atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_and_explicit(atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_or(volatile atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_or(atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_or_explicit(volatile atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_or_explicit(atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_xor(volatile atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_xor(atomic* obj, Integral op) noexcept; - -template - Integral - atomic_fetch_xor_explicit(volatile atomic* obj, Integral op, - memory_order m) noexcept; -template - Integral - atomic_fetch_xor_explicit(atomic* obj, Integral op, - memory_order m) noexcept; - -template - T* - atomic_fetch_add(volatile atomic* obj, ptrdiff_t op) noexcept; - -template - T* - atomic_fetch_add(atomic* obj, ptrdiff_t op) noexcept; - -template - T* - atomic_fetch_add_explicit(volatile atomic* obj, ptrdiff_t op, - memory_order m) noexcept; -template - T* - atomic_fetch_add_explicit(atomic* obj, ptrdiff_t op, memory_order m) noexcept; - -template - T* - atomic_fetch_sub(volatile atomic* obj, ptrdiff_t op) noexcept; - -template - T* - atomic_fetch_sub(atomic* obj, ptrdiff_t op) noexcept; - -template - T* - atomic_fetch_sub_explicit(volatile atomic* obj, ptrdiff_t op, - memory_order m) noexcept; -template - T* - atomic_fetch_sub_explicit(atomic* obj, ptrdiff_t op, memory_order m) noexcept; - -// Atomics for standard typedef types - -typedef atomic atomic_bool; -typedef atomic atomic_char; -typedef atomic atomic_schar; -typedef atomic atomic_uchar; -typedef atomic atomic_short; -typedef atomic atomic_ushort; -typedef atomic atomic_int; -typedef atomic atomic_uint; -typedef atomic atomic_long; -typedef atomic atomic_ulong; -typedef atomic atomic_llong; -typedef atomic atomic_ullong; -typedef atomic atomic_char16_t; -typedef atomic atomic_char32_t; -typedef atomic atomic_wchar_t; - -typedef atomic atomic_int_least8_t; -typedef atomic atomic_uint_least8_t; -typedef atomic atomic_int_least16_t; -typedef atomic atomic_uint_least16_t; -typedef atomic atomic_int_least32_t; -typedef atomic atomic_uint_least32_t; -typedef atomic atomic_int_least64_t; -typedef atomic atomic_uint_least64_t; - -typedef atomic atomic_int_fast8_t; -typedef atomic atomic_uint_fast8_t; -typedef atomic atomic_int_fast16_t; -typedef atomic atomic_uint_fast16_t; -typedef atomic atomic_int_fast32_t; -typedef atomic atomic_uint_fast32_t; -typedef atomic atomic_int_fast64_t; -typedef atomic atomic_uint_fast64_t; - -typedef atomic atomic_int8_t; -typedef atomic atomic_uint8_t; -typedef atomic atomic_int16_t; -typedef atomic atomic_uint16_t; -typedef atomic atomic_int32_t; -typedef atomic atomic_uint32_t; -typedef atomic atomic_int64_t; -typedef atomic atomic_uint64_t; - -typedef atomic atomic_intptr_t; -typedef atomic atomic_uintptr_t; -typedef atomic atomic_size_t; -typedef atomic atomic_ptrdiff_t; -typedef atomic atomic_intmax_t; -typedef atomic atomic_uintmax_t; - -// fences - -void atomic_thread_fence(memory_order m) noexcept; -void atomic_signal_fence(memory_order m) noexcept; - -} // std - -*/ - -#include - -#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) -# pragma GCC system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) -# pragma clang system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) -# pragma system_header -#endif // no system header - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include // all public C++ headers provide the assertion handler -#include -#include -#include -#include -#include - -_CCCL_PUSH_MACROS - -#ifdef _LIBCUDACXX_HAS_NO_THREADS -# error is not supported on this single threaded system -#endif -#ifdef _LIBCUDACXX_HAS_NO_ATOMIC_HEADER -# error is not implemented -#endif -#ifdef _LIBCUDACXX_UNSUPPORTED_THREAD_API -# error " is not supported on this system" -#endif -#ifdef kill_dependency -# error C++ standard library is incompatible with -#endif - -#define _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) \ - _LIBCUDACXX_DIAGNOSE_WARNING( \ - __m == memory_order_consume || __m == memory_order_acquire || __m == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") - -#define _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_release || __m == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") - -#define _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__m, __f) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__f == memory_order_release || __f == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") - -#if defined(_LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL) -# include -#endif - -#if !defined(_CCCL_COMPILER_NVRTC) -# include -#endif - -#if !defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) -# define ATOMIC_BOOL_LOCK_FREE 2 -# define ATOMIC_CHAR_LOCK_FREE 2 -# define ATOMIC_CHAR16_T_LOCK_FREE 2 -# define ATOMIC_CHAR32_T_LOCK_FREE 2 -# define ATOMIC_WCHAR_T_LOCK_FREE 2 -# define ATOMIC_SHORT_LOCK_FREE 2 -# define ATOMIC_INT_LOCK_FREE 2 -# define ATOMIC_LONG_LOCK_FREE 2 -# define ATOMIC_LLONG_LOCK_FREE 2 -# define ATOMIC_POINTER_LOCK_FREE 2 -#endif //! defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) - -#ifndef __ATOMIC_RELAXED -# define __ATOMIC_RELAXED 0 -# define __ATOMIC_CONSUME 1 -# define __ATOMIC_ACQUIRE 2 -# define __ATOMIC_RELEASE 3 -# define __ATOMIC_ACQ_REL 4 -# define __ATOMIC_SEQ_CST 5 -#endif //__ATOMIC_RELAXED - -_LIBCUDACXX_BEGIN_NAMESPACE_STD - -// Figure out what the underlying type for `memory_order` would be if it were -// declared as an unscoped enum (accounting for -fshort-enums). Use this result -// to pin the underlying type in C++20. -enum __legacy_memory_order -{ - __mo_relaxed, - __mo_consume, - __mo_acquire, - __mo_release, - __mo_acq_rel, - __mo_seq_cst -}; - -typedef underlying_type<__legacy_memory_order>::type __memory_order_underlying_t; - -#if _CCCL_STD_VER > 2017 - -enum class memory_order : __memory_order_underlying_t -{ - relaxed = __mo_relaxed, - consume = __mo_consume, - acquire = __mo_acquire, - release = __mo_release, - acq_rel = __mo_acq_rel, - seq_cst = __mo_seq_cst -}; - -inline constexpr auto memory_order_relaxed = memory_order::relaxed; -inline constexpr auto memory_order_consume = memory_order::consume; -inline constexpr auto memory_order_acquire = memory_order::acquire; -inline constexpr auto memory_order_release = memory_order::release; -inline constexpr auto memory_order_acq_rel = memory_order::acq_rel; -inline constexpr auto memory_order_seq_cst = memory_order::seq_cst; - -#else - -typedef enum memory_order -{ - memory_order_relaxed = __mo_relaxed, - memory_order_consume = __mo_consume, - memory_order_acquire = __mo_acquire, - memory_order_release = __mo_release, - memory_order_acq_rel = __mo_acq_rel, - memory_order_seq_cst = __mo_seq_cst, -} memory_order; - -#endif // _CCCL_STD_VER > 2017 - -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) -{ -#if defined(_CCCL_CUDA_COMPILER) - return __lhs == __rhs; -#else - return memcmp(&__lhs, &__rhs, sizeof(_Tp)) == 0; -#endif -} - -static_assert((is_same::type, __memory_order_underlying_t>::value), - "unexpected underlying type for std::memory_order"); - -#if defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) || defined(_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS) - -// [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because -// the default operator= in an object is not volatile, a byte-by-byte copy -// is required. -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value> -__cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) -{ - __a_value = __val; -} -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value> -__cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) -{ - volatile char* __to = reinterpret_cast(&__a_value); - volatile char* __end = __to + sizeof(_Tp); - volatile const char* __from = reinterpret_cast(&__val); - while (__to != __end) - { - *__to++ = *__from++; - } -} - -#endif - -// Headers are wrapped like so: (cuda::std::|std::)detail -namespace __detail -{ -#if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_EXT) -# include -#endif - -#if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_IMPL) -# include -#elif defined(_LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL) -# include -#elif defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) -# include -#elif defined(_LIBCUDACXX_HAS_C_ATOMIC_IMP) -// TODO: Maybe support C11 atomics? -// #include -#endif // _LIBCUDACXX_HAS_GCC_ATOMIC_IMP, _LIBCUDACXX_HAS_C_ATOMIC_IMP -} // namespace __detail - -using __detail::__cxx_atomic_base_impl; -using __detail::__cxx_atomic_compare_exchange_strong; -using __detail::__cxx_atomic_compare_exchange_weak; -using __detail::__cxx_atomic_exchange; -using __detail::__cxx_atomic_fetch_add; -using __detail::__cxx_atomic_fetch_and; -using __detail::__cxx_atomic_fetch_or; -using __detail::__cxx_atomic_fetch_sub; -using __detail::__cxx_atomic_fetch_xor; -using __detail::__cxx_atomic_load; -using __detail::__cxx_atomic_ref_base_impl; -using __detail::__cxx_atomic_signal_fence; -using __detail::__cxx_atomic_store; -using __detail::__cxx_atomic_thread_fence; - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp kill_dependency(_Tp __y) noexcept -{ - return __y; -} - -#if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) -# define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE -# define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE -# define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE -# define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE -# define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE -# define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE -# define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE -# define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE -# define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE -# define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE -#elif defined(__GCC_ATOMIC_BOOL_LOCK_FREE) -# define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE -# define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE -# define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE -# define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE -# define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE -# define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE -# define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE -# define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE -# define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE -# define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE -#endif - -#ifdef _LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS - -template -struct __cxx_atomic_lock_impl -{ - _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_lock_impl() noexcept - : __a_value() - , __a_lock(0) - {} - _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_lock_impl(_Tp value) noexcept - : __a_value(value) - , __a_lock(0) - {} - - _Tp __a_value; - mutable __cxx_atomic_base_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, _Sco> __a_lock; - - _LIBCUDACXX_INLINE_VISIBILITY void __lock() const volatile - { - while (1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) - /*spin*/; - } - _LIBCUDACXX_INLINE_VISIBILITY void __lock() const - { - while (1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) - /*spin*/; - } - _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const volatile - { - __cxx_atomic_store(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release); - } - _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const - { - __cxx_atomic_store(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const volatile - { - __lock(); - _Tp __old; - __cxx_atomic_assign_volatile(__old, __a_value); - __unlock(); - return __old; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const - { - __lock(); - _Tp __old = __a_value; - __unlock(); - return __old; - } -}; - -template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) -{ - __cxx_atomic_assign_volatile(__a->__a_value, __val); -} -template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) -{ - __a->__a_value = __val; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void -__cxx_atomic_store(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) -{ - __a->__lock(); - __cxx_atomic_assign_volatile(__a->__a_value, __val); - __a->__unlock(); -} -template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_store(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) -{ - __a->__lock(); - __a->__a_value = __val; - __a->__unlock(); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp __cxx_atomic_load(const volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) -{ - return __a->__read(); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp __cxx_atomic_load(const __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) -{ - return __a->__read(); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) -{ - __a->__lock(); - _Tp __old; - __cxx_atomic_assign_volatile(__old, __a->__a_value); - __cxx_atomic_assign_volatile(__a->__a_value, __value); - __a->__unlock(); - return __old; -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_exchange(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) -{ - __a->__lock(); - _Tp __old = __a->__a_value; - __a->__a_value = __value; - __a->__unlock(); - return __old; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( - volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) -{ - __a->__lock(); - _Tp __temp; - __cxx_atomic_assign_volatile(__temp, __a->__a_value); - bool __ret = __temp == *__expected; - if (__ret) - { - __cxx_atomic_assign_volatile(__a->__a_value, __value); - } - else - { - __cxx_atomic_assign_volatile(*__expected, __a->__a_value); - } - __a->__unlock(); - return __ret; -} -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( - __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) -{ - __a->__lock(); - bool __ret = __a->__a_value == *__expected; - if (__ret) - { - __a->__a_value = __value; - } - else - { - *__expected = __a->__a_value; - } - __a->__unlock(); - return __ret; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( - volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) -{ - __a->__lock(); - _Tp __temp; - __cxx_atomic_assign_volatile(__temp, __a->__a_value); - bool __ret = __temp == *__expected; - if (__ret) - { - __cxx_atomic_assign_volatile(__a->__a_value, __value); - } - else - { - __cxx_atomic_assign_volatile(*__expected, __a->__a_value); - } - __a->__unlock(); - return __ret; -} -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( - __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) -{ - __a->__lock(); - bool __ret = __a->__a_value == *__expected; - if (__ret) - { - __a->__a_value = __value; - } - else - { - *__expected = __a->__a_value; - } - __a->__unlock(); - return __ret; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) -{ - __a->__lock(); - _Tp __old; - __cxx_atomic_assign_volatile(__old, __a->__a_value); - __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old + __delta)); - __a->__unlock(); - return __old; -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) -{ - __a->__lock(); - _Tp __old = __a->__a_value; - __a->__a_value += __delta; - __a->__unlock(); - return __old; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -__cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*, _Sco>* __a, ptrdiff_t __delta, memory_order) -{ - __a->__lock(); - _Tp* __old; - __cxx_atomic_assign_volatile(__old, __a->__a_value); - __cxx_atomic_assign_volatile(__a->__a_value, __old + __delta); - __a->__unlock(); - return __old; -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -__cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*, _Sco>* __a, ptrdiff_t __delta, memory_order) -{ - __a->__lock(); - _Tp* __old = __a->__a_value; - __a->__a_value += __delta; - __a->__unlock(); - return __old; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) -{ - __a->__lock(); - _Tp __old; - __cxx_atomic_assign_volatile(__old, __a->__a_value); - __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old - __delta)); - __a->__unlock(); - return __old; -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) -{ - __a->__lock(); - _Tp __old = __a->__a_value; - __a->__a_value -= __delta; - __a->__unlock(); - return __old; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) -{ - __a->__lock(); - _Tp __old; - __cxx_atomic_assign_volatile(__old, __a->__a_value); - __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old & __pattern)); - __a->__unlock(); - return __old; -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) -{ - __a->__lock(); - _Tp __old = __a->__a_value; - __a->__a_value &= __pattern; - __a->__unlock(); - return __old; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) -{ - __a->__lock(); - _Tp __old; - __cxx_atomic_assign_volatile(__old, __a->__a_value); - __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old | __pattern)); - __a->__unlock(); - return __old; -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) -{ - __a->__lock(); - _Tp __old = __a->__a_value; - __a->__a_value |= __pattern; - __a->__unlock(); - return __old; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) -{ - __a->__lock(); - _Tp __old; - __cxx_atomic_assign_volatile(__old, __a->__a_value); - __cxx_atomic_assign_volatile(__a->__a_value, _Tp(__old ^ __pattern)); - __a->__unlock(); - return __old; -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) -{ - __a->__lock(); - _Tp __old = __a->__a_value; - __a->__a_value ^= __pattern; - __a->__unlock(); - return __old; -} - -# if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - -template -struct __cxx_is_always_lock_free -{ - enum - { - __value = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0) - }; -}; - -# else - -template -struct __cxx_is_always_lock_free -{ - enum - { - __value = sizeof(_Tp) <= 8 - }; -}; - -# endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - -template -struct __cxx_atomic_impl_conditional -{ - using type = __conditional_t<__cxx_is_always_lock_free<_Tp>::__value, - __cxx_atomic_base_impl<_Tp, _Sco>, - __cxx_atomic_lock_impl<_Tp, _Sco>>; -}; - -template ::type> -#else -template > -#endif //_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS -struct __cxx_atomic_impl : public _Base -{ - __cxx_atomic_impl() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_impl(_Tp value) noexcept - : _Base(value) - {} -}; - -template -_LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_impl<_Tp, _Sco>* __cxx_atomic_rebind(_Tp* __inst) -{ - static_assert(sizeof(__cxx_atomic_impl<_Tp, _Sco>) == sizeof(_Tp), ""); - static_assert(alignof(__cxx_atomic_impl<_Tp, _Sco>) == alignof(_Tp), ""); - return (__cxx_atomic_impl<_Tp, _Sco>*) __inst; -} - -template -using __cxx_atomic_ref_impl = __cxx_atomic_ref_base_impl<_Tp, _Sco>; - -#ifdef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - -template , int _Sco = _Ty::__sco> -struct __cxx_atomic_poll_tester -{ - _Ty const volatile* __a; - _Tp __val; - memory_order __order; - - _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_poll_tester(_Ty const volatile* __a_, _Tp __val_, memory_order __order_) - : __a(__a_) - , __val(__val_) - , __order(__order_) - {} - - _LIBCUDACXX_INLINE_VISIBILITY bool operator()() const - { - return !(__cxx_atomic_load(__a, __order) == __val); - } -}; - -template , int _Sco = _Ty::__sco> -_LIBCUDACXX_INLINE_VISIBILITY void -__cxx_atomic_try_wait_slow_fallback(_Ty const volatile* __a, _Tp __val, memory_order __order) -{ - __libcpp_thread_poll_with_backoff(__cxx_atomic_poll_tester<_Ty>(__a, __val, __order)); -} - -#endif - -#ifdef _LIBCUDACXX_HAS_PLATFORM_WAIT - -template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) -{ -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto* const __c = __libcpp_contention_state(__a); - __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__version), (__libcpp_platform_wait_t) 1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 0, memory_order_relaxed)) - { - __libcpp_platform_wake(&__c->__version, true); - } -# endif -} -template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) -{ - __cxx_atomic_notify_all(__a); -} -template , - int _Sco = _Ty::__sco, - __enable_if_t::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void -__cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp const __val, memory_order __order) -{ -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto* const __c = __libcpp_contention_state(__a); - __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - auto const __version = __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__version), memory_order_relaxed); - if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - { - return; - } - if (sizeof(__libcpp_platform_wait_t) < 8) - { - constexpr timespec __timeout = {2, 0}; // Hedge on rare 'int version' aliasing. - __libcpp_platform_wait(&__c->__version, __version, &__timeout); - } - else - { - __libcpp_platform_wait(&__c->__version, __version, nullptr); - } -# else - __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); -# endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE -} - -template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void -__cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp __val, memory_order) -{ -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto* const __c = __libcpp_contention_state(__a); - __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); -# endif - __libcpp_platform_wait((_Tp*) __a, __val, nullptr); -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - __cxx_atomic_fetch_sub(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); -# endif -} -template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) -{ -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto* const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) -# endif - __libcpp_platform_wake((_Tp*) __a, true); -} -template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) -{ -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto* const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) -# endif - __libcpp_platform_wake((_Tp*) __a, false); -} - -#elif !defined(_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE) - -template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) -{ - auto* const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 == __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__credit), memory_order_relaxed)) - { - return; - } - if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t) 0, memory_order_relaxed)) - { - __libcpp_mutex_lock(&__c->__mutex); - __libcpp_mutex_unlock(&__c->__mutex); - __libcpp_condvar_broadcast(&__c->__condvar); - } -} -template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) -{ - __cxx_atomic_notify_all(__a); -} -template -_LIBCUDACXX_INLINE_VISIBILITY void -__cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp const __val, memory_order __order) -{ - auto* const __c = __libcpp_contention_state(__a); - __libcpp_mutex_lock(&__c->__mutex); - __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t) 1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - { - __libcpp_condvar_wait(&__c->__condvar, &__c->__mutex); - } - __libcpp_mutex_unlock(&__c->__mutex); -} - -#else - -template -struct __atomic_wait_and_notify_supported -# if defined(__CUDA_MINIMUM_ARCH__) && __CUDA_MINIMUM_ARCH__ < 700 - : false_type -# else - : true_type -# endif -{}; - -template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp __val, memory_order __order) -{ - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic wait operations are unsupported on Pascal"); - __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); -} - -template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(_Ty const volatile*) -{ - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, - "atomic notify-one operations are unsupported on Pascal"); -} - -template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(_Ty const volatile*) -{ - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, - "atomic notify-all operations are unsupported on Pascal"); -} - -#endif // _LIBCUDACXX_HAS_PLATFORM_WAIT || !defined(_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE) - -template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_wait(_Ty const volatile* __a, _Tp const __val, memory_order __order) -{ - for (int __i = 0; __i < _LIBCUDACXX_POLLING_COUNT; ++__i) - { - if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - { - return; - } - if (__i < 12) - { - __libcpp_thread_yield_processor(); - } - else - { - __libcpp_thread_yield(); - } - } - while (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - { - __cxx_atomic_try_wait_slow(__a, __val, __order); - } -} - -template -struct __atomic_base_storage -{ - mutable _Storage __a_; - - __atomic_base_storage() = default; - __atomic_base_storage(const __atomic_base_storage&) = default; - __atomic_base_storage(__atomic_base_storage&&) = default; - - __atomic_base_storage& operator=(const __atomic_base_storage&) = default; - __atomic_base_storage& operator=(__atomic_base_storage&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_storage(_Storage&& __a) noexcept - : __a_(_CUDA_VSTD::forward<_Storage>(__a)) - {} -}; - -template -struct __atomic_base_core : public __atomic_base_storage<_Tp, _Storage> -{ - __atomic_base_core() = default; - __atomic_base_core(const __atomic_base_core&) = delete; - __atomic_base_core(__atomic_base_core&&) = delete; - - __atomic_base_core& operator=(const __atomic_base_core&) = delete; - __atomic_base_core& operator=(__atomic_base_core&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_core(_Storage&& __a) noexcept - : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) - {} - -#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); -#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - - _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const volatile noexcept - { - return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp)); - } - _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const noexcept - { - return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free(); - } - _LIBCUDACXX_INLINE_VISIBILITY - - void - store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - { - __cxx_atomic_store(&this->__a_, __d, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void store(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - { - __cxx_atomic_store(&this->__a_, __d, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - { - return __cxx_atomic_load(&this->__a_, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - { - return __cxx_atomic_load(&this->__a_, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const volatile noexcept - { - return load(); - } - _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const noexcept - { - return load(); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_exchange(&this->__a_, __d, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_exchange(&this->__a_, __d, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); - } - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); - } - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); - } - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); - } - } - - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - __cxx_atomic_wait(&this->__a_, __v, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept - { - __cxx_atomic_wait(&this->__a_, __v, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept - { - __cxx_atomic_notify_one(&this->__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept - { - __cxx_atomic_notify_one(&this->__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept - { - __cxx_atomic_notify_all(&this->__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept - { - __cxx_atomic_notify_all(&this->__a_); - } -}; - -template -struct __atomic_base_core<_Tp, true, _Storage> : public __atomic_base_storage<_Tp, _Storage> -{ - __atomic_base_core() = default; - __atomic_base_core(const __atomic_base_core&) = default; - __atomic_base_core(__atomic_base_core&&) = default; - - __atomic_base_core& operator=(const __atomic_base_core&) = default; - __atomic_base_core& operator=(__atomic_base_core&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_core(_Storage&& __a) noexcept - : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) - {} - -#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); -#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - - _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const volatile noexcept - { - return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp)); - } - _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const noexcept - { - return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free(); - } - _LIBCUDACXX_INLINE_VISIBILITY - - void - store(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - { - __cxx_atomic_store(&this->__a_, __d, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void store(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - { - __cxx_atomic_store(&this->__a_, __d, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - { - return __cxx_atomic_load(&this->__a_, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - { - return __cxx_atomic_load(&this->__a_, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const volatile noexcept - { - return load(); - } - _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const noexcept - { - return load(); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - return __cxx_atomic_exchange(&this->__a_, __d, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_exchange(&this->__a_, __d, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY bool compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const - volatile noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const - volatile noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); - } - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); - } - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); - } - } - _LIBCUDACXX_INLINE_VISIBILITY bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - { - if (memory_order_acq_rel == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - } - else if (memory_order_release == __m) - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - } - else - { - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); - } - } - - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - __cxx_atomic_wait(&this->__a_, __v, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept - { - __cxx_atomic_wait(&this->__a_, __v, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const volatile noexcept - { - __cxx_atomic_notify_one(&this->__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const noexcept - { - __cxx_atomic_notify_one(&this->__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const volatile noexcept - { - __cxx_atomic_notify_all(&this->__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const noexcept - { - __cxx_atomic_notify_all(&this->__a_); - } -}; - -template -struct __atomic_base_arithmetic : public __atomic_base_core<_Tp, _Cq, _Storage> -{ - __atomic_base_arithmetic() = default; - __atomic_base_arithmetic(const __atomic_base_arithmetic&) = delete; - __atomic_base_arithmetic(__atomic_base_arithmetic&&) = delete; - - __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = delete; - __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_arithmetic(_Storage&& __a) noexcept - : __atomic_base_core<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) volatile noexcept - { - return fetch_add(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) noexcept - { - return fetch_add(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) volatile noexcept - { - return fetch_sub(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) noexcept - { - return fetch_sub(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() volatile noexcept - { - return fetch_add(_Tp(1)) + _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() noexcept - { - return fetch_add(_Tp(1)) + _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() volatile noexcept - { - return fetch_sub(_Tp(1)) - _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() noexcept - { - return fetch_sub(_Tp(1)) - _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) volatile noexcept - { - return fetch_add(__op) + __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) noexcept - { - return fetch_add(__op) + __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) volatile noexcept - { - return fetch_sub(__op) - __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) noexcept - { - return fetch_sub(__op) - __op; - } -}; - -template -struct __atomic_base_arithmetic<_Tp, true, _Storage> : public __atomic_base_core<_Tp, true, _Storage> -{ - __atomic_base_arithmetic() = default; - __atomic_base_arithmetic(const __atomic_base_arithmetic&) = default; - __atomic_base_arithmetic(__atomic_base_arithmetic&&) = default; - - __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = default; - __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_arithmetic(_Storage&& __a) noexcept - : __atomic_base_core<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) const volatile noexcept - { - return fetch_add(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) const noexcept - { - return fetch_add(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) const volatile noexcept - { - return fetch_sub(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) const noexcept - { - return fetch_sub(_Tp(1)); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() const volatile noexcept - { - return fetch_add(_Tp(1)) + _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() const noexcept - { - return fetch_add(_Tp(1)) + _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() const volatile noexcept - { - return fetch_sub(_Tp(1)) - _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() const noexcept - { - return fetch_sub(_Tp(1)) - _Tp(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) const volatile noexcept - { - return fetch_add(__op) + __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) const noexcept - { - return fetch_add(__op) + __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) const volatile noexcept - { - return fetch_sub(__op) - __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) const noexcept - { - return fetch_sub(__op) - __op; - } -}; - -template -struct __atomic_base_bitwise : public __atomic_base_arithmetic<_Tp, _Cq, _Storage> -{ - __atomic_base_bitwise() = default; - __atomic_base_bitwise(const __atomic_base_bitwise&) = delete; - __atomic_base_bitwise(__atomic_base_bitwise&&) = delete; - - __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = delete; - __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_bitwise(_Storage&& __a) noexcept - : __atomic_base_arithmetic<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_and(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_and(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_or(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_or(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) volatile noexcept - { - return fetch_and(__op) & __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) noexcept - { - return fetch_and(__op) & __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) volatile noexcept - { - return fetch_or(__op) | __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) noexcept - { - return fetch_or(__op) | __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) volatile noexcept - { - return fetch_xor(__op) ^ __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) noexcept - { - return fetch_xor(__op) ^ __op; - } -}; - -template -struct __atomic_base_bitwise<_Tp, true, _Storage> : public __atomic_base_arithmetic<_Tp, true, _Storage> -{ - __atomic_base_bitwise() = default; - __atomic_base_bitwise(const __atomic_base_bitwise&) = default; - __atomic_base_bitwise(__atomic_base_bitwise&&) = default; - - __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = default; - __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_bitwise(_Storage&& __a) noexcept - : __atomic_base_arithmetic<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - return __cxx_atomic_fetch_and(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_and(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - return __cxx_atomic_fetch_or(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_or(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) const volatile noexcept - { - return fetch_and(__op) & __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) const noexcept - { - return fetch_and(__op) & __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) const volatile noexcept - { - return fetch_or(__op) | __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) const noexcept - { - return fetch_or(__op) | __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) const volatile noexcept - { - return fetch_xor(__op) ^ __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) const noexcept - { - return fetch_xor(__op) ^ __op; - } -}; - -template -using __atomic_select_base = - __conditional_t::value, - __atomic_base_arithmetic<_Tp, _Cq, _Storage>, - __conditional_t::value, - __atomic_base_bitwise<_Tp, _Cq, _Storage>, - __atomic_base_core<_Tp, _Cq, _Storage>>>; - -template >> -struct __atomic_base : public _Base -{ - __atomic_base() = default; - __atomic_base(const __atomic_base&) = delete; - __atomic_base(__atomic_base&&) = delete; - - __atomic_base& operator=(const __atomic_base&) = delete; - __atomic_base& operator=(__atomic_base&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base(const _Tp& __a) noexcept - : _Base(__cxx_atomic_impl<_Tp, _Sco>(__a)) - {} -}; - -template >> -struct __atomic_base_ref : public _Base -{ - __atomic_base_ref() = default; - __atomic_base_ref(const __atomic_base_ref&) = default; - __atomic_base_ref(__atomic_base_ref&&) = default; - - __atomic_base_ref& operator=(const __atomic_base_ref&) = default; - __atomic_base_ref& operator=(__atomic_base_ref&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_ref(_Tp& __a) noexcept - : _Base(__cxx_atomic_ref_impl<_Tp, _Sco>(__a)) - {} -}; - -#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) -template -constexpr bool __atomic_base_core<_Tp, _Cq, _Storage>::is_always_lock_free; -#endif - -// atomic -template -struct atomic : public __atomic_base<_Tp> -{ - typedef __atomic_base<_Tp> __base; - using value_type = _Tp; - - atomic() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp __d) noexcept - : __base(__d) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) volatile noexcept - { - __base::store(__d); - return __d; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) noexcept - { - __base::store(__d); - return __d; - } -}; - -// atomic - -template -struct atomic<_Tp*> : public __atomic_base<_Tp*> -{ - typedef __atomic_base<_Tp*> __base; - using value_type = _Tp*; - - atomic() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp* __d) noexcept - : __base(__d) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __d) volatile noexcept - { - __base::store(__d); - return __d; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __d) noexcept - { - __base::store(__d); - return __d; - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) volatile noexcept - { - return fetch_add(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) noexcept - { - return fetch_add(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) volatile noexcept - { - return fetch_sub(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) noexcept - { - return fetch_sub(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() volatile noexcept - { - return fetch_add(1) + 1; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() noexcept - { - return fetch_add(1) + 1; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() volatile noexcept - { - return fetch_sub(1) - 1; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() noexcept - { - return fetch_sub(1) - 1; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) volatile noexcept - { - return fetch_add(__op) + __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) noexcept - { - return fetch_add(__op) + __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) volatile noexcept - { - return fetch_sub(__op) - __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) noexcept - { - return fetch_sub(__op) - __op; - } -}; - -// atomic_ref - -template -struct atomic_ref : public __atomic_base_ref<_Tp> -{ - typedef __atomic_base_ref<_Tp> __base; - using value_type = _Tp; - - static constexpr size_t required_alignment = sizeof(_Tp); - - static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; - - _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp& __ref) - : __base(__ref) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __v) const volatile noexcept - { - __base::store(__v); - return __v; - } -}; - -// atomic_ref - -template -struct atomic_ref<_Tp*> : public __atomic_base_ref<_Tp*> -{ - typedef __atomic_base_ref<_Tp*> __base; - using value_type = _Tp*; - - static constexpr size_t required_alignment = sizeof(_Tp*); - - static constexpr bool is_always_lock_free = sizeof(_Tp*) <= 8; - - _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp*& __ref) - : __base(__ref) - {} - - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __v) const noexcept - { - __base::store(__v); - return __v; - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_add(&this->__a_, __op, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept - { - return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) const noexcept - { - return fetch_add(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) const noexcept - { - return fetch_sub(1); - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() const noexcept - { - return fetch_add(1) + 1; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() const noexcept - { - return fetch_sub(1) - 1; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) const noexcept - { - return fetch_add(__op) + __op; - } - _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) const noexcept - { - return fetch_sub(__op) - __op; - } -}; - -// atomic_is_lock_free - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) noexcept -{ - return __o->is_lock_free(); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const atomic<_Tp>* __o) noexcept -{ - return __o->is_lock_free(); -} - -// atomic_init - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(volatile atomic<_Tp>* __o, _Tp __d) noexcept -{ - __cxx_atomic_init(&__o->__a_, __d); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(atomic<_Tp>* __o, _Tp __d) noexcept -{ - __cxx_atomic_init(&__o->__a_, __d); -} - -// atomic_store - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(volatile atomic<_Tp>* __o, _Tp __d) noexcept -{ - __o->store(__d); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(atomic<_Tp>* __o, _Tp __d) noexcept -{ - __o->store(__d); -} - -// atomic_store_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) -{ - __o->store(__d, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) -{ - __o->store(__d, __m); -} - -// atomic_load - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const volatile atomic<_Tp>* __o) noexcept -{ - return __o->load(); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const atomic<_Tp>* __o) noexcept -{ - return __o->load(); -} - -// atomic_load_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) -{ - return __o->load(__m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) -{ - return __o->load(__m); -} - -// atomic_exchange - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) noexcept -{ - return __o->exchange(__d); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(atomic<_Tp>* __o, _Tp __d) noexcept -{ - return __o->exchange(__d); -} - -// atomic_exchange_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept -{ - return __o->exchange(__d, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept -{ - return __o->exchange(__d, __m); -} - -// atomic_compare_exchange_weak - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept -{ - return __o->compare_exchange_weak(*__e, __d); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept -{ - return __o->compare_exchange_weak(*__e, __d); -} - -// atomic_compare_exchange_strong - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept -{ - return __o->compare_exchange_strong(*__e, __d); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept -{ - return __o->compare_exchange_strong(*__e, __d); -} - -// atomic_compare_exchange_weak_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak_explicit( - volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) -{ - return __o->compare_exchange_weak(*__e, __d, __s, __f); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool -atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) -{ - return __o->compare_exchange_weak(*__e, __d, __s, __f); -} - -// atomic_compare_exchange_strong_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( - volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) -{ - return __o->compare_exchange_strong(*__e, __d, __s, __f); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( - atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) -{ - return __o->compare_exchange_strong(*__e, __d, __s, __f); -} - -// atomic_wait - -template -_LIBCUDACXX_INLINE_VISIBILITY void -atomic_wait(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept -{ - return __o->wait(__v); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept -{ - return __o->wait(__v); -} - -// atomic_wait_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY void -atomic_wait_explicit(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) -{ - return __o->wait(__v, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void -atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) -{ - return __o->wait(__v, __m); -} - -// atomic_notify_one - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(volatile atomic<_Tp>* __o) noexcept -{ - __o->notify_one(); -} -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(atomic<_Tp>* __o) noexcept -{ - __o->notify_one(); -} - -// atomic_notify_one - -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(volatile atomic<_Tp>* __o) noexcept -{ - __o->notify_all(); -} -template -_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(atomic<_Tp>* __o) noexcept -{ - __o->notify_all(); -} - -// atomic_fetch_add - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_add(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_add(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept -{ - return __o->fetch_add(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept -{ - return __o->fetch_add(__op); -} - -// atomic_fetch_add_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_add(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_add(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept -{ - return __o->fetch_add(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -atomic_fetch_add_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept -{ - return __o->fetch_add(__op, __m); -} - -// atomic_fetch_sub - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_sub(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_sub(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept -{ - return __o->fetch_sub(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept -{ - return __o->fetch_sub(__op); -} - -// atomic_fetch_sub_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_sub(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY - __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> - atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_sub(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept -{ - return __o->fetch_sub(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -atomic_fetch_sub_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept -{ - return __o->fetch_sub(__op, __m); -} - -// atomic_fetch_and - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_and(volatile atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_and(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_and(atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_and(__op); -} - -// atomic_fetch_and_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_and(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_and_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_and(__op, __m); -} - -// atomic_fetch_or - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_or(volatile atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_or(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_or(atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_or(__op); -} - -// atomic_fetch_or_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_or(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_or_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_or(__op, __m); -} - -// atomic_fetch_xor - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_xor(volatile atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_xor(__op); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_xor(atomic<_Tp>* __o, _Tp __op) noexcept -{ - return __o->fetch_xor(__op); -} - -// atomic_fetch_xor_explicit - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_xor(__op, __m); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> -atomic_fetch_xor_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept -{ - return __o->fetch_xor(__op, __m); -} - -// flag type and operations - -typedef struct atomic_flag -{ - __cxx_atomic_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, 0> __a_; - - _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const volatile noexcept - { - return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m); - } - _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const noexcept - { - return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m); - } - - _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept - { - return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m); - } - _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept - { - return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) volatile noexcept - { - __cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) noexcept - { - __cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m); - } - -#if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700 - _LIBCUDACXX_INLINE_VISIBILITY void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - { - __cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void wait(bool __v, memory_order __m = memory_order_seq_cst) const noexcept - { - __cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept - { - __cxx_atomic_notify_one(&__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept - { - __cxx_atomic_notify_one(&__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept - { - __cxx_atomic_notify_all(&__a_); - } - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept - { - __cxx_atomic_notify_all(&__a_); - } -#endif - - atomic_flag() noexcept = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic_flag(bool __b) noexcept - : __a_(__b) - {} // EXTENSION - - atomic_flag(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) volatile = delete; -} atomic_flag; - -inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const volatile atomic_flag* __o) noexcept -{ - return __o->test(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const atomic_flag* __o) noexcept -{ - return __o->test(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY bool -atomic_flag_test_explicit(const volatile atomic_flag* __o, memory_order __m) noexcept -{ - return __o->test(__m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) noexcept -{ - return __o->test(__m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(volatile atomic_flag* __o) noexcept -{ - return __o->test_and_set(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(atomic_flag* __o) noexcept -{ - return __o->test_and_set(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY bool -atomic_flag_test_and_set_explicit(volatile atomic_flag* __o, memory_order __m) noexcept -{ - return __o->test_and_set(__m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) noexcept -{ - return __o->test_and_set(__m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(volatile atomic_flag* __o) noexcept -{ - __o->clear(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(atomic_flag* __o) noexcept -{ - __o->clear(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void -atomic_flag_clear_explicit(volatile atomic_flag* __o, memory_order __m) noexcept -{ - __o->clear(__m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) noexcept -{ - __o->clear(__m); -} - -#if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700 - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) noexcept -{ - __o->wait(__v); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const atomic_flag* __o, bool __v) noexcept -{ - __o->wait(__v); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void -atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) noexcept -{ - __o->wait(__v, __m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void -atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) noexcept -{ - __o->wait(__v, __m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(volatile atomic_flag* __o) noexcept -{ - __o->notify_one(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(atomic_flag* __o) noexcept -{ - __o->notify_one(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(volatile atomic_flag* __o) noexcept -{ - __o->notify_all(); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(atomic_flag* __o) noexcept -{ - __o->notify_all(); -} - -#endif - -// fences - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_thread_fence(memory_order __m) noexcept -{ - __cxx_atomic_thread_fence(__m); -} - -inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_signal_fence(memory_order __m) noexcept -{ - __cxx_atomic_signal_fence(__m); -} - -// Atomics for standard typedef types - -typedef atomic atomic_bool; -typedef atomic atomic_char; -typedef atomic atomic_schar; -typedef atomic atomic_uchar; -typedef atomic atomic_short; -typedef atomic atomic_ushort; -typedef atomic atomic_int; -typedef atomic atomic_uint; -typedef atomic atomic_long; -typedef atomic atomic_ulong; -typedef atomic atomic_llong; -typedef atomic atomic_ullong; -typedef atomic atomic_char16_t; -typedef atomic atomic_char32_t; -typedef atomic atomic_wchar_t; - -typedef atomic atomic_int_least8_t; -typedef atomic atomic_uint_least8_t; -typedef atomic atomic_int_least16_t; -typedef atomic atomic_uint_least16_t; -typedef atomic atomic_int_least32_t; -typedef atomic atomic_uint_least32_t; -typedef atomic atomic_int_least64_t; -typedef atomic atomic_uint_least64_t; - -typedef atomic atomic_int_fast8_t; -typedef atomic atomic_uint_fast8_t; -typedef atomic atomic_int_fast16_t; -typedef atomic atomic_uint_fast16_t; -typedef atomic atomic_int_fast32_t; -typedef atomic atomic_uint_fast32_t; -typedef atomic atomic_int_fast64_t; -typedef atomic atomic_uint_fast64_t; - -typedef atomic atomic_int8_t; -typedef atomic atomic_uint8_t; -typedef atomic atomic_int16_t; -typedef atomic atomic_uint16_t; -typedef atomic atomic_int32_t; -typedef atomic atomic_uint32_t; -typedef atomic atomic_int64_t; -typedef atomic atomic_uint64_t; - -typedef atomic atomic_intptr_t; -typedef atomic atomic_uintptr_t; -typedef atomic atomic_size_t; -typedef atomic atomic_ptrdiff_t; -typedef atomic atomic_intmax_t; -typedef atomic atomic_uintmax_t; - -static_assert(ATOMIC_INT_LOCK_FREE, "This library assumes atomic is lock-free."); - -typedef atomic atomic_signed_lock_free; -typedef atomic atomic_unsigned_lock_free; - -#define ATOMIC_FLAG_INIT \ - { \ - false \ - } -#define ATOMIC_VAR_INIT(__v) \ - { \ - __v \ - } - -_LIBCUDACXX_END_NAMESPACE_STD - -#include -_CCCL_POP_MACROS - -#endif // _LIBCUDACXX_ATOMIC diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/barrier b/libcudacxx/include/cuda/std/detail/libcxx/include/barrier index c2308aeb88..58e0e2d240 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/barrier +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/barrier @@ -79,17 +79,17 @@ template class alignas(64) __barrier_base { ptrdiff_t __expected; - __atomic_base __expected_adjustment; + __atomic_impl __expected_adjustment; _CompletionF __completion; using __phase_t = uint8_t; - __atomic_base<__phase_t, _Sco> __phase; + __atomic_impl<__phase_t, _Sco> __phase; struct alignas(64) __state_t { struct { - __atomic_base<__phase_t, _Sco> __phase = ATOMIC_VAR_INIT(0); + __atomic_impl<__phase_t, _Sco> __phase = LIBCUDACXX_ATOMIC_VAR_INIT(0); } __tickets[64]; }; ::std::vector<__state_t> __state; @@ -263,12 +263,12 @@ _LIBCUDACXX_INLINE_VISIBILITY bool __call_try_wait_parity(const _Barrier& __b, b return __b.__try_wait_parity(__parity); } -template +template class __barrier_base { - _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_base __expected, __arrived; + _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_impl __expected, __arrived; _LIBCUDACXX_BARRIER_ALIGNMENTS _CompletionF __completion; - _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_base __phase; + _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_impl __phase; public: using arrival_token = bool; @@ -321,7 +321,7 @@ public: __completion(); __arrived.store(__new_expected, memory_order_relaxed); __phase.store(!__old_phase, memory_order_release); - __cxx_atomic_notify_all(&__phase.__a_); + __atomic_notify_all(&__phase.__a, __scope_to_tag<_Sco>{}); } return __old_phase; } @@ -345,7 +345,7 @@ public: } }; -template +template class __barrier_base<__empty_completion, _Sco> { static constexpr uint64_t __expected_unit = 1ull; @@ -354,7 +354,7 @@ class __barrier_base<__empty_completion, _Sco> static constexpr uint64_t __phase_bit = 1ull << 63; static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; - _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_base __phase_arrived_expected; + _LIBCUDACXX_BARRIER_ALIGNMENTS __atomic_impl __phase_arrived_expected; public: using arrival_token = uint64_t; @@ -457,6 +457,7 @@ public: _LIBCUDACXX_END_NAMESPACE_STD #include + _CCCL_POP_MACROS #endif //_LIBCUDACXX_BARRIER diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/latch b/libcudacxx/include/cuda/std/detail/libcxx/include/latch index 1272091737..26442e8283 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/latch +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/latch @@ -48,9 +48,11 @@ namespace std # pragma system_header #endif // no system header +#include #include #include // all public C++ headers provide the assertion handler #include +#include _CCCL_PUSH_MACROS @@ -66,10 +68,10 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD # define _LIBCUDACXX_LATCH_ALIGNMENT #endif -template +template class __latch_base { - _LIBCUDACXX_LATCH_ALIGNMENT __atomic_base __counter; + _LIBCUDACXX_LATCH_ALIGNMENT __atomic_impl __counter; public: inline _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __latch_base(ptrdiff_t __expected) @@ -123,6 +125,7 @@ using latch = __latch_base<>; _LIBCUDACXX_END_NAMESPACE_STD #include + _CCCL_POP_MACROS #endif //_LIBCUDACXX_LATCH diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/semaphore b/libcudacxx/include/cuda/std/detail/libcxx/include/semaphore index 6f2f3f9c12..74b421d903 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/semaphore +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/semaphore @@ -68,7 +68,7 @@ _CCCL_PUSH_MACROS _LIBCUDACXX_BEGIN_NAMESPACE_STD -template +template class __atomic_semaphore_base { _LIBCUDACXX_INLINE_VISIBILITY bool __fetch_sub_if_slow(ptrdiff_t __old) @@ -119,7 +119,7 @@ class __atomic_semaphore_base }, __rel_time); } - __atomic_base __count; + __atomic_impl __count; public: _LIBCUDACXX_INLINE_VISIBILITY static constexpr ptrdiff_t max() noexcept @@ -191,7 +191,7 @@ public: #ifndef _LIBCUDACXX_USE_NATIVE_SEMAPHORES -template +template class __atomic_semaphore_base<_Sco, 1> { _LIBCUDACXX_INLINE_VISIBILITY bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time) @@ -202,7 +202,7 @@ class __atomic_semaphore_base<_Sco, 1> }, __rel_time); } - __atomic_base __available; + __atomic_impl __available; public: _LIBCUDACXX_INLINE_VISIBILITY static constexpr ptrdiff_t max() noexcept @@ -269,7 +269,7 @@ public: #else -template +template class __sem_semaphore_base { _LIBCUDACXX_INLINE_VISIBILITY bool __backfill(bool __success) @@ -278,81 +278,83 @@ class __sem_semaphore_base if (__success) { auto const __back_amount = __backbuffer.fetch_sub(2, memory_order_acquire); - bool const __post_one = __back_amount > 0; - bool const __post_two = __back_amount > 1; - auto const __success = (!__post_one || __libcpp_semaphore_post(&__semaphore)) - && (!__post_two || __libcpp_semaphore_post(&__semaphore)); - _LIBCUDACXX_ASSERT(__success, ""); - if (!__post_one || !__post_two) - { - __backbuffer.fetch_add(!__post_one ? 2 : 1, memory_order_relaxed); - } } -# endif - return __success; + bool const __post_one = __back_amount > 0; + bool const __post_two = __back_amount > 1; + auto const __success = + (!__post_one || __libcpp_semaphore_post(&__semaphore)) && (!__post_two || __libcpp_semaphore_post(&__semaphore)); + _LIBCUDACXX_ASSERT(__success, ""); + if (!__post_one || !__post_two) + { + __backbuffer.fetch_add(!__post_one ? 2 : 1, memory_order_relaxed); + } } +# endif + return __success; +} - _LIBCUDACXX_INLINE_VISIBILITY bool __try_acquire_fast() - { +_LIBCUDACXX_INLINE_VISIBILITY bool +__try_acquire_fast() +{ # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_FRONT_BUFFER - ptrdiff_t __old; - __libcpp_thread_poll_with_backoff( - [&]() { - __old = __frontbuffer.load(memory_order_relaxed); - return 0 != (__old >> 32); - }, - chrono::microseconds(5)); + ptrdiff_t __old; + __libcpp_thread_poll_with_backoff( + [&]() { + __old = __frontbuffer.load(memory_order_relaxed); + return 0 != (__old >> 32); + }, + chrono::microseconds(5)); - // always steal if you can - while (__old >> 32) - { - if (__frontbuffer.compare_exchange_weak(__old, __old - (1ll << 32), memory_order_acquire)) - { - return true; - } - } - // record we're waiting - __old = __frontbuffer.fetch_add(1ll, memory_order_release); - // ALWAYS steal if you can! - while (__old >> 32) + // always steal if you can + while (__old >> 32) + { + if (__frontbuffer.compare_exchange_weak(__old, __old - (1ll << 32), memory_order_acquire)) { - if (__frontbuffer.compare_exchange_weak(__old, __old - (1ll << 32), memory_order_acquire)) - { - break; - } + return true; } - // not going to wait after all - if (__old >> 32) + } + // record we're waiting + __old = __frontbuffer.fetch_add(1ll, memory_order_release); + // ALWAYS steal if you can! + while (__old >> 32) + { + if (__frontbuffer.compare_exchange_weak(__old, __old - (1ll << 32), memory_order_acquire)) { - return __try_done(true); + break; } -# endif - // the wait has begun... - return false; } - - _LIBCUDACXX_INLINE_VISIBILITY bool __try_done(bool __success) + // not going to wait after all + if (__old >> 32) { + return __try_done(true); + } +# endif + // the wait has begun... + return false; +} + +_LIBCUDACXX_INLINE_VISIBILITY bool __try_done(bool __success) +{ # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_FRONT_BUFFER - // record we're NOT waiting - __frontbuffer.fetch_sub(1ll, memory_order_release); + // record we're NOT waiting + __frontbuffer.fetch_sub(1ll, memory_order_release); # endif - return __backfill(__success); - } + return __backfill(__success); +} - _LIBCUDACXX_INLINE_VISIBILITY void __release_slow(ptrdiff_t __post_amount) - { +_LIBCUDACXX_INLINE_VISIBILITY void __release_slow(ptrdiff_t __post_amount) +{ # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_BACK_BUFFER - bool const __post_one = __post_amount > 0; - bool const __post_two = __post_amount > 1; - if (__post_amount > 2) - { - __backbuffer.fetch_add(__post_amount - 2, memory_order_acq_rel); - } - auto const __success = - (!__post_one || __libcpp_semaphore_post(&__semaphore)) && (!__post_two || __libcpp_semaphore_post(&__semaphore)); - _LIBCUDACXX_ASSERT(__success, ""); + bool const __post_one = __post_amount > 0; + bool const __post_two = __post_amount > 1; + if (__post_amount > 2) + { + __backbuffer.fetch_add(__post_amount - 2, memory_order_acq_rel); + } + auto const __success = + (!__post_one || __libcpp_semaphore_post(&__semaphore)) && (!__post_two || __libcpp_semaphore_post(&__semaphore)); + _LIBCUDACXX_ASSERT(__success, ""); # else for (; __post_amount; --__post_amount) { @@ -360,101 +362,102 @@ class __sem_semaphore_base _LIBCUDACXX_ASSERT(__success, ""); } # endif - } +} - __libcpp_semaphore_t __semaphore; +__libcpp_semaphore_t __semaphore; # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_FRONT_BUFFER - __atomic_base __frontbuffer; +__atomic_impl __frontbuffer; # endif # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_BACK_BUFFER - __atomic_base __backbuffer; +__atomic_impl __backbuffer; # endif public: - static constexpr ptrdiff_t max() noexcept - { - return _LIBCUDACXX_SEMAPHORE_MAX; - } +static constexpr ptrdiff_t max() noexcept +{ + return _LIBCUDACXX_SEMAPHORE_MAX; +} - _LIBCUDACXX_INLINE_VISIBILITY __sem_semaphore_base(ptrdiff_t __count = 0) - : __semaphore() +_LIBCUDACXX_INLINE_VISIBILITY __sem_semaphore_base(ptrdiff_t __count = 0) + : __semaphore() # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_FRONT_BUFFER - , __frontbuffer(__count << 32) + , __frontbuffer(__count << 32) # endif # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_BACK_BUFFER - , __backbuffer(0) + , __backbuffer(0) # endif - { - _LIBCUDACXX_ASSERT(__count <= max(), ""); - auto const __success = +{ + _LIBCUDACXX_ASSERT(__count <= max(), ""); + auto const __success = # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_FRONT_BUFFER - __libcpp_semaphore_init(&__semaphore, 0); + __libcpp_semaphore_init(&__semaphore, 0); # else __libcpp_semaphore_init(&__semaphore, __count); # endif - _LIBCUDACXX_ASSERT(__success, ""); - } + _LIBCUDACXX_ASSERT(__success, ""); +} - _LIBCUDACXX_INLINE_VISIBILITY ~__sem_semaphore_base() - { +_LIBCUDACXX_INLINE_VISIBILITY ~__sem_semaphore_base() +{ # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_FRONT_BUFFER - _LIBCUDACXX_ASSERT(0 == (__frontbuffer.load(memory_order_relaxed) & ~0u), ""); + _LIBCUDACXX_ASSERT(0 == (__frontbuffer.load(memory_order_relaxed) & ~0u), ""); # endif - auto const __success = __libcpp_semaphore_destroy(&__semaphore); - _LIBCUDACXX_ASSERT(__success, ""); - } + auto const __success = __libcpp_semaphore_destroy(&__semaphore); + _LIBCUDACXX_ASSERT(__success, ""); +} - __sem_semaphore_base(const __sem_semaphore_base&) = delete; - __sem_semaphore_base& operator=(const __sem_semaphore_base&) = delete; +__sem_semaphore_base(const __sem_semaphore_base&) = delete; +__sem_semaphore_base& operator=(const __sem_semaphore_base&) = delete; - _LIBCUDACXX_INLINE_VISIBILITY void release(ptrdiff_t __update = 1) - { +_LIBCUDACXX_INLINE_VISIBILITY void release(ptrdiff_t __update = 1) +{ # ifndef _LIBCUDACXX_HAS_NO_SEMAPHORE_FRONT_BUFFER - // boldly assume the semaphore is taken but uncontended - ptrdiff_t __old = 0; - // try to fast-release as long as it's uncontended - while (0 == (__old & ~0ul)) - { - if (__frontbuffer.compare_exchange_weak(__old, __old + (__update << 32), memory_order_acq_rel)) - { - return; - } - } -# endif - // slow-release it is - __release_slow(__update); - } - - _LIBCUDACXX_INLINE_VISIBILITY void acquire() + // boldly assume the semaphore is taken but uncontended + ptrdiff_t __old = 0; + // try to fast-release as long as it's uncontended + while (0 == (__old & ~0ul)) { - if (!__try_acquire_fast()) + if (__frontbuffer.compare_exchange_weak(__old, __old + (__update << 32), memory_order_acq_rel)) { - __try_done(__libcpp_semaphore_wait(&__semaphore)); + return; } } +# endif + // slow-release it is + __release_slow(__update); +} - _LIBCUDACXX_INLINE_VISIBILITY bool try_acquire() noexcept +_LIBCUDACXX_INLINE_VISIBILITY void acquire() +{ + if (!__try_acquire_fast()) { - return try_acquire_for(chrono::nanoseconds(0)); + __try_done(__libcpp_semaphore_wait(&__semaphore)); } +} - template - _LIBCUDACXX_INLINE_VISIBILITY bool try_acquire_until(chrono::time_point const& __abs_time) - { - auto const current = max(Clock::now(), __abs_time); - return try_acquire_for(chrono::duration_cast(__abs_time - current)); - } +_LIBCUDACXX_INLINE_VISIBILITY bool try_acquire() noexcept +{ + return try_acquire_for(chrono::nanoseconds(0)); +} - template - _LIBCUDACXX_INLINE_VISIBILITY bool try_acquire_for(chrono::duration const& __rel_time) - { - return __try_acquire_fast() || __try_done(__libcpp_semaphore_wait_timed(&__semaphore, __rel_time)); - } -}; +template +_LIBCUDACXX_INLINE_VISIBILITY bool try_acquire_until(chrono::time_point const& __abs_time) +{ + auto const current = max(Clock::now(), __abs_time); + return try_acquire_for(chrono::duration_cast(__abs_time - current)); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY bool try_acquire_for(chrono::duration const& __rel_time) +{ + return __try_acquire_fast() || __try_done(__libcpp_semaphore_wait_timed(&__semaphore, __rel_time)); +} +} +; #endif //_LIBCUDACXX_HAS_NO_SEMAPHORES -template +template using __semaphore_base = #ifdef _LIBCUDACXX_USE_NATIVE_SEMAPHORES __conditional_t<__least_max_value <= __sem_semaphore_base<_Sco>::max(), @@ -466,13 +469,13 @@ using __semaphore_base = ; template -class counting_semaphore : public __semaphore_base<__least_max_value, 0> +class counting_semaphore : public __semaphore_base<__least_max_value, thread_scope_system> { - static_assert(__least_max_value <= __semaphore_base<__least_max_value, 0>::max(), ""); + static_assert(__least_max_value <= __semaphore_base<__least_max_value, thread_scope_system>::max(), ""); public: _LIBCUDACXX_INLINE_VISIBILITY constexpr counting_semaphore(ptrdiff_t __count = 0) - : __semaphore_base<__least_max_value, 0>(__count) + : __semaphore_base<__least_max_value, thread_scope_system>(__count) {} ~counting_semaphore() = default; diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h deleted file mode 100644 index 65be5cfd97..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_base.h +++ /dev/null @@ -1,246 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX_ATOMIC_BASE_H -#define _LIBCUDACXX_ATOMIC_BASE_H - -#include - -// Guard ifdef for lock free query in case it is assigned elsewhere (MSVC/CUDA) -#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE -# define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) __atomic_is_lock_free(__x, 0) -#endif - -_LIBCUDACXX_INLINE_VISIBILITY inline constexpr int __cxx_atomic_order_to_int(memory_order __order) -{ - // Avoid switch statement to make this a constexpr. - return __order == memory_order_relaxed - ? __ATOMIC_RELAXED - : (__order == memory_order_acquire - ? __ATOMIC_ACQUIRE - : (__order == memory_order_release - ? __ATOMIC_RELEASE - : (__order == memory_order_seq_cst - ? __ATOMIC_SEQ_CST - : (__order == memory_order_acq_rel ? __ATOMIC_ACQ_REL : __ATOMIC_CONSUME)))); -} - -_LIBCUDACXX_INLINE_VISIBILITY inline constexpr int __cxx_atomic_failure_order_to_int(memory_order __order) -{ - // Avoid switch statement to make this a constexpr. - return __order == memory_order_relaxed - ? __ATOMIC_RELAXED - : (__order == memory_order_acquire - ? __ATOMIC_ACQUIRE - : (__order == memory_order_release - ? __ATOMIC_RELAXED - : (__order == memory_order_seq_cst - ? __ATOMIC_SEQ_CST - : (__order == memory_order_acq_rel ? __ATOMIC_ACQUIRE : __ATOMIC_CONSUME)))); -} - -template -inline void __cxx_atomic_init(volatile _Tp* __a, _Up __val) -{ - auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); - __cxx_atomic_assign_volatile(*__a_tmp, __val); -} - -template -inline void __cxx_atomic_init(_Tp* __a, _Up __val) -{ - auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); - *__a_tmp = __val; -} - -inline void __cxx_atomic_thread_fence(memory_order __order) -{ - __atomic_thread_fence(__cxx_atomic_order_to_int(__order)); -} - -inline void __cxx_atomic_signal_fence(memory_order __order) -{ - __atomic_signal_fence(__cxx_atomic_order_to_int(__order)); -} - -template -inline void __cxx_atomic_store(_Tp* __a, _Up __val, memory_order __order) -{ - auto __v_temp = __cxx_atomic_wrap_to_base(__a, __val); - __atomic_store(__cxx_atomic_unwrap(__a), &__v_temp, __cxx_atomic_order_to_int(__order)); -} - -template -inline auto __cxx_atomic_load(const _Tp* __a, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __ret = __cxx_atomic_base_temporary(__a); - __atomic_load(__cxx_atomic_unwrap(__a), &__ret, __cxx_atomic_order_to_int(__order)); - return *__cxx_get_underlying_atomic(&__ret); -} - -template -inline auto __cxx_atomic_exchange(_Tp* __a, _Up __val, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __v_temp = __cxx_atomic_wrap_to_base(__a, __val); - auto __ret = __cxx_atomic_base_temporary(__a); - __atomic_exchange(__cxx_atomic_unwrap(__a), &__v_temp, &__ret, __cxx_atomic_order_to_int(__order)); - return *__cxx_get_underlying_atomic(&__ret); -} - -template -inline bool __cxx_atomic_compare_exchange_strong( - _Tp* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure) -{ - (void) __expected; - return __atomic_compare_exchange( - __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)), - __expected, - &__value, - false, - __cxx_atomic_order_to_int(__success), - __cxx_atomic_failure_order_to_int(__failure)); -} - -template -inline bool __cxx_atomic_compare_exchange_weak( - _Tp* __a, _Up* __expected, _Up __value, memory_order __success, memory_order __failure) -{ - (void) __expected; - return __atomic_compare_exchange( - __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)), - __expected, - &__value, - true, - __cxx_atomic_order_to_int(__success), - __cxx_atomic_failure_order_to_int(__failure)); -} - -template -struct __atomic_ptr_inc -{ - enum - { - value = 1 - }; -}; - -template -struct __atomic_ptr_inc<_Tp*> -{ - enum - { - value = sizeof(_Tp) - }; -}; - -// FIXME: Haven't figured out what the spec says about using arrays with -// atomic_fetch_add. Force a failure rather than creating bad behavior. -template -struct __atomic_ptr_inc<_Tp[]> -{}; -template -struct __atomic_ptr_inc<_Tp[n]> -{}; - -template >::value, int> = 0> -inline auto __cxx_atomic_fetch_add(_Tp* __a, _Td __delta, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - constexpr auto __skip_v = __atomic_ptr_inc<__cxx_atomic_underlying_t<_Tp>>::value; - auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); - return __atomic_fetch_add(__a_tmp, __delta * __skip_v, __cxx_atomic_order_to_int(__order)); -} - -template >::value, int> = 0> -inline auto __cxx_atomic_fetch_add(_Tp* __a, _Td __delta, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __expected = __cxx_atomic_load(__a, memory_order_relaxed); - auto __desired = __expected + __delta; - - while (!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) - { - __desired = __expected + __delta; - } - - return __expected; -} - -template >::value, int> = 0> -inline auto __cxx_atomic_fetch_sub(_Tp* __a, _Td __delta, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - constexpr auto __skip_v = __atomic_ptr_inc<__cxx_atomic_underlying_t<_Tp>>::value; - auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); - return __atomic_fetch_sub(__a_tmp, __delta * __skip_v, __cxx_atomic_order_to_int(__order)); -} - -template >::value, int> = 0> -inline auto __cxx_atomic_fetch_sub(_Tp* __a, _Td __delta, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __expected = __cxx_atomic_load(__a, memory_order_relaxed); - auto __desired = __expected - __delta; - - while (!__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) - { - __desired = __expected - __delta; - } - - return __expected; -} - -template -inline auto __cxx_atomic_fetch_and(_Tp* __a, _Td __pattern, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); - return __atomic_fetch_and(__a_tmp, __pattern, __cxx_atomic_order_to_int(__order)); -} - -template -inline auto __cxx_atomic_fetch_or(_Tp* __a, _Td __pattern, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); - return __atomic_fetch_or(__a_tmp, __pattern, __cxx_atomic_order_to_int(__order)); -} - -template -inline auto __cxx_atomic_fetch_xor(_Tp* __a, _Td __pattern, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __a_tmp = __cxx_get_underlying_atomic(__cxx_atomic_unwrap(__a)); - return __atomic_fetch_xor(__a_tmp, __pattern, __cxx_atomic_order_to_int(__order)); -} - -template -inline auto __cxx_atomic_fetch_max(_Tp* __a, _Td __val, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __expected = __cxx_atomic_load(__a, memory_order_relaxed); - auto __desired = __expected > __val ? __expected : __val; - - while (__desired == __val && !__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) - { - __desired = __expected > __val ? __expected : __val; - } - - return __expected; -} - -template -inline auto __cxx_atomic_fetch_min(_Tp* __a, _Td __val, memory_order __order) -> __cxx_atomic_underlying_t<_Tp> -{ - auto __expected = __cxx_atomic_load(__a, memory_order_relaxed); - auto __desired = __expected < __val ? __expected : __val; - - while (__desired == __val && !__cxx_atomic_compare_exchange_strong(__a, &__expected, __desired, __order, __order)) - { - __desired = __expected < __val ? __expected : __val; - } - - return __expected; -} - -#endif // _LIBCUDACXX_ATOMIC_BASE_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_c11.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_c11.h deleted file mode 100644 index 1e5c55d243..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_c11.h +++ /dev/null @@ -1,241 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -// Atomics for C11 - -template -struct __cxx_atomic_base_impl -{ - _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_base_impl() noexcept = default; - - constexpr explicit __cxx_atomic_base_impl(_Tp value) noexcept - : __a_value(value) - {} - _LIBCUDACXX_DISABLE_EXTENSION_WARNING _Atomic(_Tp) __a_value; -}; - -#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE -# define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) __c11_atomic_is_lock_free(__x, 0) -#endif - -_LIBCUDACXX_INLINE_VISIBILITY inline void __cxx_atomic_thread_fence(memory_order __order) noexcept -{ - __c11_atomic_thread_fence(static_cast<__memory_order_underlying_t>(__order)); -} - -_LIBCUDACXX_INLINE_VISIBILITY inline void __cxx_atomic_signal_fence(memory_order __order) noexcept -{ - __c11_atomic_signal_fence(static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __val) noexcept -{ - __c11_atomic_init(&__a->__a_value, __val); -} -template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val) noexcept -{ - __c11_atomic_init(&__a->__a_value, __val); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY void -__cxx_atomic_store(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __val, memory_order __order) noexcept -{ - __c11_atomic_store(&__a->__a_value, __val, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY void -__cxx_atomic_store(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val, memory_order __order) noexcept -{ - __c11_atomic_store(&__a->__a_value, __val, static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_load(__cxx_atomic_base_impl<_Tp> const volatile* __a, memory_order __order) noexcept -{ - using __ptr_type = typename remove_const__a_value)>::type*; - return __c11_atomic_load(const_cast<__ptr_type>(&__a->__a_value), static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_load(__cxx_atomic_base_impl<_Tp> const* __a, memory_order __order) noexcept -{ - using __ptr_type = typename remove_const__a_value)>::type*; - return __c11_atomic_load(const_cast<__ptr_type>(&__a->__a_value), static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_exchange(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __value, memory_order __order) noexcept -{ - return __c11_atomic_exchange(&__a->__a_value, __value, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_exchange(__cxx_atomic_base_impl<_Tp>* __a, _Tp __value, memory_order __order) noexcept -{ - return __c11_atomic_exchange(&__a->__a_value, __value, static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( - __cxx_atomic_base_impl<_Tp> volatile* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) noexcept -{ - return __c11_atomic_compare_exchange_strong( - &__a->__a_value, - __expected, - __value, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( - __cxx_atomic_base_impl<_Tp>* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) noexcept -{ - return __c11_atomic_compare_exchange_strong( - &__a->__a_value, - __expected, - __value, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( - __cxx_atomic_base_impl<_Tp> volatile* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) noexcept -{ - return __c11_atomic_compare_exchange_weak( - &__a->__a_value, - __expected, - __value, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( - __cxx_atomic_base_impl<_Tp>* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) noexcept -{ - return __c11_atomic_compare_exchange_weak( - &__a->__a_value, - __expected, - __value, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp>* __a, _Tp __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -__cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp*> volatile* __a, ptrdiff_t __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -__cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp*>* __a, ptrdiff_t __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_add(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp>* __a, _Tp __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -__cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp*> volatile* __a, ptrdiff_t __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp* -__cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp*>* __a, ptrdiff_t __delta, memory_order __order) noexcept -{ - return __c11_atomic_fetch_sub(&__a->__a_value, __delta, static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __pattern, memory_order __order) noexcept -{ - return __c11_atomic_fetch_and(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) noexcept -{ - return __c11_atomic_fetch_and(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __pattern, memory_order __order) noexcept -{ - return __c11_atomic_fetch_or(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) noexcept -{ - return __c11_atomic_fetch_or(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order)); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp> volatile* __a, _Tp __pattern, memory_order __order) noexcept -{ - return __c11_atomic_fetch_xor(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order)); -} -template -_LIBCUDACXX_INLINE_VISIBILITY _Tp -__cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) noexcept -{ - return __c11_atomic_fetch_xor(&__a->__a_value, __pattern, static_cast<__memory_order_underlying_t>(__order)); -} diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h deleted file mode 100644 index b6fa9a16fd..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h +++ /dev/null @@ -1,787 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -#if defined(__CUDA_MINIMUM_ARCH__) \ - && ((!defined(_CCCL_COMPILER_MSVC) && __CUDA_MINIMUM_ARCH__ < 600) \ - || (defined(_CCCL_COMPILER_MSVC) && __CUDA_MINIMUM_ARCH__ < 700)) -# error "CUDA atomics are only supported for sm_60 and up on *nix and sm_70 and up on Windows." -#endif - -inline _CCCL_HOST_DEVICE int __stronger_order_cuda(int __a, int __b) -{ - int const __max = __a > __b ? __a : __b; - if (__max != __ATOMIC_RELEASE) - { - return __max; - } - static int const __xform[] = {__ATOMIC_RELEASE, __ATOMIC_ACQ_REL, __ATOMIC_ACQ_REL, __ATOMIC_RELEASE}; - return __xform[__a < __b ? __a : __b]; -} - -// pre-define lock free query for heterogeneous compatibility -#ifndef _LIBCUDACXX_ATOMIC_IS_LOCK_FREE -# define _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(__x) (__x <= 8) -#endif - -// Wrap host atomic implementations into a sub-namespace -namespace __host -{ -#if defined(_CCCL_COMPILER_MSVC) -# include -#elif defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) -# include -#elif defined(_LIBCUDACXX_HAS_C11_ATOMIC_IMP) -// TODO -// # include -#elif defined(_CCCL_COMPILER_NVRTC) -# include -#endif -} // namespace __host - -using __host::__cxx_atomic_underlying_t; - -#include -#include - -_CCCL_HOST_DEVICE inline void __cxx_atomic_thread_fence(memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (__atomic_thread_fence_cuda(static_cast<__memory_order_underlying_t>(__order), __thread_scope_system_tag());), - NV_IS_HOST, - (__host::__cxx_atomic_thread_fence(__order);)) -} - -_CCCL_HOST_DEVICE inline void __cxx_atomic_signal_fence(memory_order __order) -{ - NV_DISPATCH_TARGET(NV_IS_DEVICE, - (__atomic_signal_fence_cuda(static_cast<__memory_order_underlying_t>(__order));), - NV_IS_HOST, - (__host::__cxx_atomic_signal_fence(__order);)) -} - -template -struct __cxx_atomic_base_heterogeneous_impl -{ - __cxx_atomic_base_heterogeneous_impl() noexcept = default; - - _CCCL_HOST_DEVICE constexpr explicit __cxx_atomic_base_heterogeneous_impl(_Tp __value) - : __a_value(__value) - {} - - using __underlying_t = _Tp; - static constexpr int __sco = _Sco; - - __host::__cxx_atomic_base_impl<_Tp, _Sco> __a_value; -}; - -template -struct __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, true> -{ - __cxx_atomic_base_heterogeneous_impl() noexcept = default; - - static_assert(sizeof(_Tp) >= 4, "atomic_ref does not support 1 or 2 byte types"); - static_assert(sizeof(_Tp) <= 8, "atomic_ref does not support types larger than 8 bytes"); - - _CCCL_HOST_DEVICE constexpr explicit __cxx_atomic_base_heterogeneous_impl(_Tp& __value) - : __a_value(__value) - {} - - using __underlying_t = _Tp; - static constexpr int __sco = _Sco; - - __host::__cxx_atomic_ref_base_impl<_Tp, _Sco> __a_value; -}; - -template -_CCCL_HOST_DEVICE constexpr _Tp* -__cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a) noexcept -{ - return __cxx_get_underlying_atomic(&__a->__a_value); -} - -template -_CCCL_HOST_DEVICE constexpr volatile _Tp* -__cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a) noexcept -{ - return __cxx_get_underlying_atomic(&__a->__a_value); -} - -template -_CCCL_HOST_DEVICE constexpr const _Tp* -__cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const* __a) noexcept -{ - return __cxx_get_underlying_atomic(&__a->__a_value); -} - -template -_CCCL_HOST_DEVICE constexpr const volatile _Tp* -__cxx_get_underlying_device_atomic(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const volatile* __a) noexcept -{ - return __cxx_get_underlying_atomic(&__a->__a_value); -} - -template -using __cxx_atomic_small_to_32 = __conditional_t::value, int32_t, uint32_t>; - -// Arithmetic conversions to/from proxy types -template ::value, int> = 0> -constexpr _CCCL_HOST_DEVICE inline __cxx_atomic_small_to_32<_Tp> __cxx_small_to_32(_Tp __val) -{ - return static_cast<__cxx_atomic_small_to_32<_Tp>>(__val); -} - -template ::value, int> = 0> -constexpr _CCCL_HOST_DEVICE inline _Tp __cxx_small_from_32(__cxx_atomic_small_to_32<_Tp> __val) -{ - return static_cast<_Tp>(__val); -} - -// Non-arithmetic conversion to/from proxy types -template ::value, int> = 0> -_CCCL_HOST_DEVICE inline __cxx_atomic_small_to_32<_Tp> __cxx_small_to_32(_Tp __val) -{ - __cxx_atomic_small_to_32<_Tp> __temp{}; - memcpy(&__temp, &__val, sizeof(_Tp)); - return __temp; -} - -template ::value, int> = 0> -_CCCL_HOST_DEVICE inline _Tp __cxx_small_from_32(__cxx_atomic_small_to_32<_Tp> __val) -{ - _Tp __temp{}; - memcpy(&__temp, &__val, sizeof(_Tp)); - return __temp; -} - -template -struct __cxx_atomic_base_small_impl -{ - __cxx_atomic_base_small_impl() noexcept = default; - _CCCL_HOST_DEVICE constexpr explicit __cxx_atomic_base_small_impl(_Tp __value) - : __a_value(__cxx_small_to_32(__value)) - {} - - using __underlying_t = _Tp; - static constexpr int __sco = _Sco; - - __cxx_atomic_base_heterogeneous_impl<__cxx_atomic_small_to_32<_Tp>, _Sco, false> __a_value; -}; - -template -using __cxx_atomic_base_impl = - __conditional_t, - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco>>; - -template -using __cxx_atomic_ref_base_impl = __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, true>; - -template -_CCCL_HOST_DEVICE void __cxx_atomic_init(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __val) -{ - alignas(_Tp) auto __tmp = __val; - __cxx_atomic_assign_volatile(*__cxx_get_underlying_device_atomic(__a), __tmp); -} - -template -_CCCL_HOST_DEVICE void __cxx_atomic_init(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __val) -{ - alignas(_Tp) auto __tmp = __val; - __cxx_atomic_assign_volatile(*__cxx_get_underlying_device_atomic(__a), __tmp); -} - -template -_CCCL_HOST_DEVICE void -__cxx_atomic_store(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __val, memory_order __order) -{ - alignas(_Tp) auto __tmp = __val; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (__atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), - __tmp, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (__host::__cxx_atomic_store(&__a->__a_value, __tmp, __order);)) -} - -template -_CCCL_HOST_DEVICE void -__cxx_atomic_store(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __val, memory_order __order) -{ - alignas(_Tp) auto __tmp = __val; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (__atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), - __tmp, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (__host::__cxx_atomic_store(&__a->__a_value, __tmp, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_load(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const* __a, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_load(&__a->__a_value, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_load(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const volatile* __a, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_load(&__a->__a_value, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_exchange(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __val, memory_order __order) -{ - alignas(_Tp) auto __tmp = __val; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), - __tmp, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_exchange(&__a->__a_value, __tmp, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_exchange( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __val, memory_order __order) -{ - alignas(_Tp) auto __tmp = __val; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), - __tmp, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_exchange(&__a->__a_value, __tmp, __order);)) -} - -template -_CCCL_HOST_DEVICE bool __cxx_atomic_compare_exchange_strong( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, - _Tp* __expected, - _Tp __val, - memory_order __success, - memory_order __failure) -{ - alignas(_Tp) auto __tmp = *__expected; - bool __result = false; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (alignas(_Tp) auto __tmp_v = __val; - __result = __atomic_compare_exchange_cuda( - __cxx_get_underlying_device_atomic(__a), - &__tmp, - &__tmp_v, - false, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure), - __scope_tag<_Sco>());), - NV_IS_HOST, - (__result = __host::__cxx_atomic_compare_exchange_strong(&__a->__a_value, &__tmp, __val, __success, __failure);)) - *__expected = __tmp; - return __result; -} - -template -_CCCL_HOST_DEVICE bool __cxx_atomic_compare_exchange_strong( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, - _Tp* __expected, - _Tp __val, - memory_order __success, - memory_order __failure) -{ - alignas(_Tp) auto __tmp = *__expected; - bool __result = false; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (alignas(_Tp) auto __tmp_v = __val; - __result = __atomic_compare_exchange_cuda( - __cxx_get_underlying_device_atomic(__a), - &__tmp, - &__tmp_v, - false, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure), - __scope_tag<_Sco>());), - NV_IS_HOST, - (__result = __host::__cxx_atomic_compare_exchange_strong(&__a->__a_value, &__tmp, __val, __success, __failure);)) - *__expected = __tmp; - return __result; -} - -template -_CCCL_HOST_DEVICE bool __cxx_atomic_compare_exchange_weak( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, - _Tp* __expected, - _Tp __val, - memory_order __success, - memory_order __failure) -{ - alignas(_Tp) auto __tmp = *__expected; - bool __result = false; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (alignas(_Tp) auto __tmp_v = __val; - __result = __atomic_compare_exchange_cuda( - __cxx_get_underlying_device_atomic(__a), - &__tmp, - &__tmp_v, - true, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure), - __scope_tag<_Sco>());), - NV_IS_HOST, - (__result = __host::__cxx_atomic_compare_exchange_weak(&__a->__a_value, &__tmp, __val, __success, __failure);)) - *__expected = __tmp; - return __result; -} - -template -_CCCL_HOST_DEVICE bool __cxx_atomic_compare_exchange_weak( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, - _Tp* __expected, - _Tp __val, - memory_order __success, - memory_order __failure) -{ - alignas(_Tp) auto __tmp = *__expected; - bool __result = false; - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (alignas(_Tp) auto __tmp_v = __val; - __result = __atomic_compare_exchange_cuda( - __cxx_get_underlying_device_atomic(__a), - &__tmp, - &__tmp_v, - true, - static_cast<__memory_order_underlying_t>(__success), - static_cast<__memory_order_underlying_t>(__failure), - __scope_tag<_Sco>());), - NV_IS_HOST, - (__result = __host::__cxx_atomic_compare_exchange_weak(&__a->__a_value, &__tmp, __val, __success, __failure);)) - *__expected = __tmp; - return __result; -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_fetch_add(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_fetch_add( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp* __cxx_atomic_fetch_add( - __cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref>* __a, ptrdiff_t __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp* __cxx_atomic_fetch_add( - __cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_fetch_sub(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_fetch_sub( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp* __cxx_atomic_fetch_sub( - __cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref>* __a, ptrdiff_t __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp* __cxx_atomic_fetch_sub( - __cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), - __delta, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_fetch_and(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __pattern, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), - __pattern, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_and(&__a->__a_value, __pattern, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_fetch_and( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), - __pattern, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_and(&__a->__a_value, __pattern, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_fetch_or(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __pattern, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), - __pattern, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_or(&__a->__a_value, __pattern, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_fetch_or( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), - __pattern, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_or(&__a->__a_value, __pattern, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_fetch_xor(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Tp __pattern, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), - __pattern, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_xor(&__a->__a_value, __pattern, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_fetch_xor( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), - __pattern, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - NV_IS_HOST, - (return __host::__cxx_atomic_fetch_xor(&__a->__a_value, __pattern, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_fetch_max(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Delta __val, memory_order __order) -{ - NV_IF_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), - __val, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - (return __host::__cxx_atomic_fetch_max(&__a->__a_value, __val, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_fetch_max( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) -{ - NV_IF_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), - __val, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - (return __host::__cxx_atomic_fetch_max(&__a->__a_value, __val, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp -__cxx_atomic_fetch_min(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref>* __a, _Delta __val, memory_order __order) -{ - NV_IF_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), - __val, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - (return __host::__cxx_atomic_fetch_min(&__a->__a_value, __val, __order);)) -} - -template -_CCCL_HOST_DEVICE _Tp __cxx_atomic_fetch_min( - __cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) -{ - NV_IF_TARGET( - NV_IS_DEVICE, - (return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), - __val, - static_cast<__memory_order_underlying_t>(__order), - __scope_tag<_Sco>());), - (return __host::__cxx_atomic_fetch_min(&__a->__a_value, __val, __order);)) -} - -template -_CCCL_HOST_DEVICE inline void __cxx_atomic_init(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __val) -{ - __cxx_atomic_init(&__a->__a_value, __cxx_small_to_32(__val)); -} - -template -_CCCL_HOST_DEVICE inline void -__cxx_atomic_store(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __val, memory_order __order) -{ - __cxx_atomic_store(&__a->__a_value, __cxx_small_to_32(__val), __order); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_load(__cxx_atomic_base_small_impl<_Tp, _Sco> const volatile* __a, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_load(&__a->__a_value, __order)); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_exchange(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __value, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_exchange(&__a->__a_value, __cxx_small_to_32(__value), __order)); -} -_CCCL_HOST_DEVICE inline int __cuda_memcmp(void const* __lhs, void const* __rhs, size_t __count) -{ - NV_DISPATCH_TARGET( - NV_IS_DEVICE, - (auto __lhs_c = reinterpret_cast(__lhs); - auto __rhs_c = reinterpret_cast(__rhs); - while (__count--) { - auto const __lhs_v = *__lhs_c++; - auto const __rhs_v = *__rhs_c++; - if (__lhs_v < __rhs_v) - { - return -1; - } - if (__lhs_v > __rhs_v) - { - return 1; - } - } return 0;), - NV_IS_HOST, - (return memcmp(__lhs, __rhs, __count);)) -} - -template -_CCCL_HOST_DEVICE inline bool __cxx_atomic_compare_exchange_weak( - __cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) -{ - auto __temp = __cxx_small_to_32(*__expected); - auto const __ret = - __cxx_atomic_compare_exchange_weak(&__a->__a_value, &__temp, __cxx_small_to_32(__value), __success, __failure); - auto const __actual = __cxx_small_from_32<_Tp>(__temp); - constexpr auto __mask = static_cast((1u << (8 * sizeof(_Tp))) - 1); - if (!__ret) - { - if (0 == __cuda_memcmp(&__actual, __expected, sizeof(_Tp))) - { - __cxx_atomic_fetch_and(&__a->__a_value, __mask, memory_order_relaxed); - } - else - { - *__expected = __actual; - } - } - return __ret; -} - -template -_CCCL_HOST_DEVICE inline bool __cxx_atomic_compare_exchange_strong( - __cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) -{ - auto const __old = *__expected; - while (1) - { - if (__cxx_atomic_compare_exchange_weak(__a, __expected, __value, __success, __failure)) - { - return true; - } - if (0 != __cuda_memcmp(&__old, __expected, sizeof(_Tp))) - { - return false; - } - } -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_fetch_add(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __delta, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_fetch_add(&__a->__a_value, __cxx_small_to_32(__delta), __order)); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_fetch_sub(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __delta, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_fetch_sub(&__a->__a_value, __cxx_small_to_32(__delta), __order)); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_fetch_and(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __pattern, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_fetch_and(&__a->__a_value, __cxx_small_to_32(__pattern), __order)); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_fetch_or(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __pattern, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_fetch_or(&__a->__a_value, __cxx_small_to_32(__pattern), __order)); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_fetch_xor(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Tp __pattern, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_fetch_xor(&__a->__a_value, __cxx_small_to_32(__pattern), __order)); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_fetch_max(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Delta __val, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_fetch_max(&__a->__a_value, __cxx_small_to_32(__val), __order)); -} - -template -_CCCL_HOST_DEVICE inline _Tp -__cxx_atomic_fetch_min(__cxx_atomic_base_small_impl<_Tp, _Sco> volatile* __a, _Delta __val, memory_order __order) -{ - return __cxx_small_from_32<_Tp>(__cxx_atomic_fetch_min(&__a->__a_value, __cxx_small_to_32(__val), __order)); -} diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda_derived.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda_derived.h deleted file mode 100644 index 891b0ffe1c..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda_derived.h +++ /dev/null @@ -1,190 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -template ::type = 0> -bool _CCCL_DEVICE __atomic_compare_exchange_cuda( - _Type volatile* __ptr, - _Type* __expected, - const _Type* __desired, - bool, - int __success_memorder, - int __failure_memorder, - _Scope __s) -{ - auto const __aligned = (uint32_t*) ((intptr_t) __ptr & ~(sizeof(uint32_t) - 1)); - auto const __offset = uint32_t((intptr_t) __ptr & (sizeof(uint32_t) - 1)) * 8; - auto const __mask = ((1 << sizeof(_Type) * 8) - 1) << __offset; - - uint32_t __old = *__expected << __offset; - uint32_t __old_value; - while (1) - { - __old_value = (__old & __mask) >> __offset; - if (__old_value != *__expected) - { - break; - } - uint32_t const __attempt = (__old & ~__mask) | (*__desired << __offset); - if (__atomic_compare_exchange_cuda(__aligned, &__old, &__attempt, true, __success_memorder, __failure_memorder, __s)) - { - return true; - } - } - *__expected = __old_value; - return false; -} - -template ::type = 0> -void _CCCL_DEVICE __atomic_exchange_cuda(_Type volatile* __ptr, _Type* __val, _Type* __ret, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - while (!__atomic_compare_exchange_cuda(__ptr, &__expected, __val, true, __memorder, __memorder, __s)) - ; - *__ret = __expected; -} - -template ::type = 0> -_Type _CCCL_DEVICE __atomic_fetch_add_cuda(_Type volatile* __ptr, _Delta __val, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - _Type __desired = __expected + __val; - while (!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) - { - __desired = __expected + __val; - } - return __expected; -} - -template < - class _Type, - class _Delta, - class _Scope, - typename _CUDA_VSTD::enable_if::value, int>::type = 0> -_Type _CCCL_HOST_DEVICE __atomic_fetch_max_cuda(_Type volatile* __ptr, _Delta __val, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - _Type __desired = __expected > __val ? __expected : __val; - - while (__desired == __val - && !__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) - { - __desired = __expected > __val ? __expected : __val; - } - - return __expected; -} - -template < - class _Type, - class _Delta, - class _Scope, - typename _CUDA_VSTD::enable_if::value, int>::type = 0> -_Type _CCCL_HOST_DEVICE __atomic_fetch_min_cuda(_Type volatile* __ptr, _Delta __val, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - _Type __desired = __expected < __val ? __expected : __val; - - while (__desired == __val - && !__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) - { - __desired = __expected < __val ? __expected : __val; - } - - return __expected; -} - -template ::type = 0> -_Type _CCCL_DEVICE __atomic_fetch_sub_cuda(_Type volatile* __ptr, _Delta __val, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - _Type __desired = __expected - __val; - while (!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) - { - __desired = __expected - __val; - } - return __expected; -} - -template ::type = 0> -_Type _CCCL_DEVICE __atomic_fetch_and_cuda(_Type volatile* __ptr, _Delta __val, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - _Type __desired = __expected & __val; - while (!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) - { - __desired = __expected & __val; - } - return __expected; -} - -template ::type = 0> -_Type _CCCL_DEVICE __atomic_fetch_xor_cuda(_Type volatile* __ptr, _Delta __val, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - _Type __desired = __expected ^ __val; - while (!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) - { - __desired = __expected ^ __val; - } - return __expected; -} - -template ::type = 0> -_Type _CCCL_DEVICE __atomic_fetch_or_cuda(_Type volatile* __ptr, _Delta __val, int __memorder, _Scope __s) -{ - _Type __expected = __atomic_load_n_cuda(__ptr, __ATOMIC_RELAXED, __s); - _Type __desired = __expected | __val; - while (!__atomic_compare_exchange_cuda(__ptr, &__expected, &__desired, true, __memorder, __memorder, __s)) - { - __desired = __expected | __val; - } - return __expected; -} - -template -_Type _CCCL_DEVICE __atomic_load_n_cuda(const _Type volatile* __ptr, int __memorder, _Scope __s) -{ - _Type __ret; - __atomic_load_cuda(__ptr, &__ret, __memorder, __s); - return __ret; -} - -template -void _CCCL_DEVICE __atomic_store_n_cuda(_Type volatile* __ptr, _Type __val, int __memorder, _Scope __s) -{ - __atomic_store_cuda(__ptr, &__val, __memorder, __s); -} - -template -bool _CCCL_DEVICE __atomic_compare_exchange_n_cuda( - _Type volatile* __ptr, - _Type* __expected, - _Type __desired, - bool __weak, - int __success_memorder, - int __failure_memorder, - _Scope __s) -{ - return __atomic_compare_exchange_cuda( - __ptr, __expected, &__desired, __weak, __success_memorder, __failure_memorder, __s); -} - -template -_Type _CCCL_DEVICE __atomic_exchange_n_cuda(_Type volatile* __ptr, _Type __val, int __memorder, _Scope __s) -{ - _Type __ret; - __atomic_exchange_cuda(__ptr, &__val, &__ret, __memorder, __s); - return __ret; -} - -static inline _CCCL_DEVICE void __atomic_signal_fence_cuda(int) -{ - asm volatile("" ::: "memory"); -} diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_gcc.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_gcc.h deleted file mode 100644 index 8d5d7967cb..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_gcc.h +++ /dev/null @@ -1,17 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX_ATOMIC_GCC_H -#define _LIBCUDACXX_ATOMIC_GCC_H - -#include - -#endif // _LIBCUDACXX_ATOMIC_GCC_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_nvrtc.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_nvrtc.h deleted file mode 100644 index 129b088081..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_nvrtc.h +++ /dev/null @@ -1,17 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX_ATOMIC_NVRTC_H -#define _LIBCUDACXX_ATOMIC_NVRTC_H - -#include - -#endif // _LIBCUDACXX_ATOMIC_NVRTC_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_scopes.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_scopes.h deleted file mode 100644 index 9a035b1e4d..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/atomic_scopes.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef __LIBCUDACXX_ATOMIC_SCOPES_H -#define __LIBCUDACXX_ATOMIC_SCOPES_H - -// REMEMBER CHANGES TO THESE ARE ABI BREAKING -// TODO: Space values out for potential new scopes -#ifndef __ATOMIC_BLOCK -# define __ATOMIC_SYSTEM 0 // 0 indicates default -# define __ATOMIC_DEVICE 1 -# define __ATOMIC_BLOCK 2 -# define __ATOMIC_THREAD 10 -#endif //__ATOMIC_BLOCK - -enum thread_scope -{ - thread_scope_system = __ATOMIC_SYSTEM, - thread_scope_device = __ATOMIC_DEVICE, - thread_scope_block = __ATOMIC_BLOCK, - thread_scope_thread = __ATOMIC_THREAD -}; - -#define _LIBCUDACXX_ATOMIC_SCOPE_TYPE ::cuda::thread_scope -#define _LIBCUDACXX_ATOMIC_SCOPE_DEFAULT ::cuda::thread_scope::system - -struct __thread_scope_thread_tag -{}; -struct __thread_scope_block_tag -{}; -struct __thread_scope_device_tag -{}; -struct __thread_scope_system_tag -{}; - -template -struct __scope_enum_to_tag -{}; -/* This would be the implementation once an actual thread-scope backend exists. -template<> struct __scope_enum_to_tag<(int)thread_scope_thread> { - using type = __thread_scope_thread_tag; }; -Until then: */ -template <> -struct __scope_enum_to_tag<(int) thread_scope_thread> -{ - using type = __thread_scope_block_tag; -}; -template <> -struct __scope_enum_to_tag<(int) thread_scope_block> -{ - using type = __thread_scope_block_tag; -}; -template <> -struct __scope_enum_to_tag<(int) thread_scope_device> -{ - using type = __thread_scope_device_tag; -}; -template <> -struct __scope_enum_to_tag<(int) thread_scope_system> -{ - using type = __thread_scope_system_tag; -}; - -template -_LIBCUDACXX_INLINE_VISIBILITY auto constexpr __scope_tag() -> typename __scope_enum_to_tag<_Scope>::type -{ - return typename __scope_enum_to_tag<_Scope>::type(); -} - -#endif // __LIBCUDACXX_ATOMIC_SCOPES_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/cxx_atomic.h b/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/cxx_atomic.h deleted file mode 100644 index a4212f44a7..0000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/support/atomic/cxx_atomic.h +++ /dev/null @@ -1,180 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of libcu++, the C++ Standard Library for your entire system, -// under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX_CXX_ATOMIC_H -#define _LIBCUDACXX_CXX_ATOMIC_H - -template -struct __cxx_atomic_base_impl -{ - using __underlying_t = _Tp; - using __temporary_t = __cxx_atomic_base_impl<_Tp, _Sco>; - using __wrap_t = __cxx_atomic_base_impl<_Tp, _Sco>; - - static constexpr int __sco = _Sco; - -#if !defined(_CCCL_COMPILER_GCC) || (__GNUC__ >= 5) - static_assert(is_trivially_copyable<_Tp>::value, "std::atomic requires that 'Tp' be a trivially copyable type"); -#endif - - constexpr __cxx_atomic_base_impl() noexcept = default; - constexpr __cxx_atomic_base_impl(__cxx_atomic_base_impl&&) noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_base_impl(_Tp value) noexcept - : __a_value(value) - {} - - __cxx_atomic_base_impl& operator=(const __cxx_atomic_base_impl&) noexcept = default; - - _CCCL_ALIGNAS(sizeof(_Tp)) _Tp __a_value; -}; - -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco>* __a) noexcept -{ - return &__a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr volatile _Tp* -__cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> volatile* __a) noexcept -{ - return &__a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const _Tp* -__cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const* __a) noexcept -{ - return &__a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const volatile _Tp* -__cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco> const volatile* __a) noexcept -{ - return &__a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr __cxx_atomic_base_impl<_Tp, _Sco>* -__cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco>* __a) noexcept -{ - return __a; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr volatile __cxx_atomic_base_impl<_Tp, _Sco>* -__cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco> volatile* __a) noexcept -{ - return __a; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const __cxx_atomic_base_impl<_Tp, _Sco>* -__cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco> const* __a) noexcept -{ - return __a; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const volatile __cxx_atomic_base_impl<_Tp, _Sco>* -__cxx_atomic_unwrap(__cxx_atomic_base_impl<_Tp, _Sco> const volatile* __a) noexcept -{ - return __a; -} - -template -struct __cxx_atomic_ref_base_impl -{ - using __underlying_t = _Tp; - using __temporary_t = _Tp; - using __wrap_t = _Tp; - - static constexpr int __sco = _Sco; - -#if !defined(_CCCL_COMPILER_GCC) || (__GNUC__ >= 5) - static_assert(is_trivially_copyable<_Tp>::value, - "std::atomic_ref requires that 'Tp' be a trivially copyable type"); -#endif - - constexpr __cxx_atomic_ref_base_impl() noexcept = delete; - constexpr __cxx_atomic_ref_base_impl(__cxx_atomic_ref_base_impl&&) noexcept = default; - constexpr __cxx_atomic_ref_base_impl(const __cxx_atomic_ref_base_impl&) noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_ref_base_impl(_Tp& value) noexcept - : __a_value(&value) - {} - - _Tp* __a_value; -}; - -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr _Tp* -__cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco>* __a) noexcept -{ - return __a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr volatile _Tp* -__cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> volatile* __a) noexcept -{ - return __a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const _Tp* -__cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const* __a) noexcept -{ - return __a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const volatile _Tp* -__cxx_get_underlying_atomic(__cxx_atomic_ref_base_impl<_Tp, _Sco> const volatile* __a) noexcept -{ - return __a->__a_value; -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr _Tp* __cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco>* __a) noexcept -{ - return __cxx_get_underlying_atomic(__a); -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr volatile _Tp* -__cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco> volatile* __a) noexcept -{ - return __cxx_get_underlying_atomic(__a); -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const _Tp* -__cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco> const* __a) noexcept -{ - return __cxx_get_underlying_atomic(__a); -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr const volatile _Tp* -__cxx_atomic_unwrap(__cxx_atomic_ref_base_impl<_Tp, _Sco> const volatile* __a) noexcept -{ - return __cxx_get_underlying_atomic(__a); -} - -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr _Tp* __cxx_get_underlying_atomic(_Tp* __a) noexcept -{ - return __a; -} - -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr auto __cxx_atomic_wrap_to_base(_Tp*, _Up __val) noexcept -> - typename _Tp::__wrap_t -{ - return typename _Tp::__wrap_t(__val); -} -template -_LIBCUDACXX_INLINE_VISIBILITY constexpr auto __cxx_atomic_base_temporary(_Tp*) noexcept -> typename _Tp::__temporary_t -{ - return typename _Tp::__temporary_t(); -} - -template -using __cxx_atomic_underlying_t = typename _Tp::__underlying_t; - -#endif //_LIBCUDACXX_CXX_ATOMIC_H diff --git a/libcudacxx/test/libcudacxx/cuda/annotated_ptr/utils.h b/libcudacxx/test/libcudacxx/cuda/annotated_ptr/utils.h index 5eddfd442d..588bbedb4f 100644 --- a/libcudacxx/test/libcudacxx/cuda/annotated_ptr/utils.h +++ b/libcudacxx/test/libcudacxx/cuda/annotated_ptr/utils.h @@ -14,6 +14,7 @@ #endif #include +#include #if defined(DEBUG) # define DPRINTF(...) \ diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch.fail.cpp b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch.fail.cpp index e2d73258c9..2a855a6223 100644 --- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch.fail.cpp +++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch.fail.cpp @@ -9,9 +9,9 @@ // UNSUPPORTED: libcpp-has-no-threads, pre-sm-60 // UNSUPPORTED: windows && pre-sm-70 -// +// -#include +#include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp index 2c83f5d66e..3818fc3ab7 100644 --- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp @@ -9,9 +9,9 @@ // UNSUPPORTED: libcpp-has-no-threads, pre-sm-60 // UNSUPPORTED: windows && pre-sm-70 -// +// -#include +#include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp index 05920744c6..4a5c9dfef2 100644 --- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp @@ -9,9 +9,9 @@ // UNSUPPORTED: libcpp-has-no-threads, pre-sm-60 // UNSUPPORTED: windows && pre-sm-70 -// +// -#include +#include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h index ae3ac2ec5e..cc54eda725 100644 --- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h +++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h @@ -9,7 +9,7 @@ #ifndef ATOMIC_HELPERS_H #define ATOMIC_HELPERS_H -#include +#include #include #include "test_macros.h" diff --git a/libcudacxx/test/libcudacxx/cuda/bad_atomic_alignment.pass.cpp b/libcudacxx/test/libcudacxx/cuda/bad_atomic_alignment.pass.cpp index d0566c3a14..e4a099ac6c 100644 --- a/libcudacxx/test/libcudacxx/cuda/bad_atomic_alignment.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/bad_atomic_alignment.pass.cpp @@ -37,8 +37,11 @@ struct TestFn A& t = *sel.construct(); cuda::std::atomic_init(&t, key{1, 2}); auto r = t.load(); + auto d = key{5, 5}; t.store(r); (void) t.exchange(r); + (void) t.compare_exchange_weak(r, d, cuda::memory_order_seq_cst, cuda::memory_order_seq_cst); + (void) t.compare_exchange_strong(d, r, cuda::memory_order_seq_cst, cuda::memory_order_seq_cst); } { struct alignas(8) key @@ -51,8 +54,11 @@ struct TestFn A& t = *sel.construct(); cuda::std::atomic_init(&t, key{1, 2}); auto r = t.load(); + auto d = key{5, 5}; t.store(r); (void) t.exchange(r); + (void) t.compare_exchange_weak(r, d, cuda::memory_order_seq_cst, cuda::memory_order_seq_cst); + (void) t.compare_exchange_strong(d, r, cuda::memory_order_seq_cst, cuda::memory_order_seq_cst); } } }; diff --git a/libcudacxx/test/libcudacxx/cuda/pipeline_group_concept.h b/libcudacxx/test/libcudacxx/cuda/pipeline_group_concept.h index 2410abea06..83d08371d5 100644 --- a/libcudacxx/test/libcudacxx/cuda/pipeline_group_concept.h +++ b/libcudacxx/test/libcudacxx/cuda/pipeline_group_concept.h @@ -13,6 +13,7 @@ // TODO: Remove pointless comparison suppression when compiler fixes short-circuiting #include +#include #include "test_macros.h" diff --git a/libcudacxx/test/libcudacxx/heterogeneous/helpers.h b/libcudacxx/test/libcudacxx/heterogeneous/helpers.h index 7691912558..3b6759d61c 100644 --- a/libcudacxx/test/libcudacxx/heterogeneous/helpers.h +++ b/libcudacxx/test/libcudacxx/heterogeneous/helpers.h @@ -81,10 +81,6 @@ __host__ inline std::vector& host_threads() __host__ inline void sync_host_threads() { -#ifdef DEBUG_TESTERS - printf("%s\n", __PRETTY_FUNCTION__); - fflush(stdout); -#endif for (auto&& thread : host_threads()) { thread.join(); @@ -100,10 +96,6 @@ __host__ inline std::vector& device_threads() __host__ inline void sync_device_threads() { -#ifdef DEBUG_TESTERS - printf("%s\n", __PRETTY_FUNCTION__); - fflush(stdout); -#endif for (auto&& thread : device_threads()) { thread.join(); @@ -217,14 +209,14 @@ template void device_initialize(T& object) { #ifdef DEBUG_TESTERS - printf("%s\n", __PRETTY_FUNCTION__); + printf(" %s\n", __PRETTY_FUNCTION__); fflush(stdout); #endif auto kernel_launcher = [&object](cudaStream_t stream) { constexpr auto tc = threadcount_trait::value; #ifdef DEBUG_TESTERS - printf("%i device init threads launched\r\n", (int) tc); + printf(" %i device init threads launched\r\n", (int) tc); fflush(stdout); #endif initialization_kernel<<<1, tc, 0, stream>>>(object); @@ -234,10 +226,6 @@ void device_initialize(T& object) if (!async_initialize_trait::value) { -#ifdef DEBUG_TESTERS - printf("init not async, synchronizing\r\n"); - fflush(stdout); -#endif HETEROGENEOUS_SAFE_CALL(cudaDeviceSynchronize()); sync_all(); } @@ -247,14 +235,14 @@ template void device_validate(T& object) { #ifdef DEBUG_TESTERS - printf("%s\n", __PRETTY_FUNCTION__); + printf(" %s\n", __PRETTY_FUNCTION__); fflush(stdout); #endif auto kernel_launcher = [&object](cudaStream_t stream) { constexpr auto tc = threadcount_trait::value; #ifdef DEBUG_TESTERS - printf("%i device validate threads launched\r\n", (int) tc); + printf(" %i device validate threads launched\r\n", (int) tc); fflush(stdout); #endif validation_kernel<<<1, tc, 0, stream>>>(object); @@ -264,10 +252,6 @@ void device_validate(T& object) if (!async_validate_trait::value) { -#ifdef DEBUG_TESTERS - printf("validate not async, synchronizing\r\n"); - fflush(stdout); -#endif HETEROGENEOUS_SAFE_CALL(cudaDeviceSynchronize()); sync_all(); } @@ -277,13 +261,13 @@ template void host_initialize(T& object) { #ifdef DEBUG_TESTERS - printf("%s\n", __PRETTY_FUNCTION__); + printf(" %s\n", __PRETTY_FUNCTION__); fflush(stdout); #endif constexpr auto tc = threadcount_trait::value; #ifdef DEBUG_TESTERS - printf("%i host init threads launched\r\n", (int) tc); + printf(" %i host init threads launched\r\n", (int) tc); fflush(stdout); #endif @@ -296,10 +280,6 @@ void host_initialize(T& object) if (!async_initialize_trait::value) { -#ifdef DEBUG_TESTERS - printf("init not async, synchronizing\r\n"); - fflush(stdout); -#endif HETEROGENEOUS_SAFE_CALL(cudaDeviceSynchronize()); sync_all(); } @@ -309,13 +289,13 @@ template void host_validate(T& object) { #ifdef DEBUG_TESTERS - printf("%s\n", __PRETTY_FUNCTION__); + printf(" %s\n", __PRETTY_FUNCTION__); fflush(stdout); #endif constexpr auto tc = threadcount_trait::value; #ifdef DEBUG_TESTERS - printf("%i host validate threads launched\r\n", (int) tc); + printf(" %i host validate threads launched\r\n", (int) tc); fflush(stdout); #endif @@ -328,10 +308,6 @@ void host_validate(T& object) if (!async_initialize_trait::value) { -#ifdef DEBUG_TESTERS - printf("validate not async, synchronizing\r\n"); - fflush(stdout); -#endif HETEROGENEOUS_SAFE_CALL(cudaDeviceSynchronize()); sync_all(); } @@ -396,7 +372,7 @@ template testers, Args... args) // ex: type_list using initial_launcher_list = append_n>, host_launcher>; +#ifdef DEBUG_TESTERS + printf("Launching %zd permutations\r\n", sizeof...(Testers)); + fflush(stdout); +#endif permute_tests(test_harness, initial_launcher_list{}); } @@ -652,10 +632,18 @@ void validate_pinned(Args... args) { using list_t = typename validate_list::type; list_t list0; +#ifdef DEBUG_TESTERS + printf("%s\n", "Launching permuted H/D tests"); + fflush(stdout); +#endif validate_device_dynamic(list0, args...); if (check_managed_memory_support(is_tester_list_async::value)) { +#ifdef DEBUG_TESTERS + printf("%s\n", "Launching mixed H/D tests"); + fflush(stdout); +#endif typename validate_list::type list1; validate_managed(list1, args...); } diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/init.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/init.pass.cpp index 72090475a4..9bf8624f67 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/init.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.flag/init.pass.cpp @@ -14,7 +14,7 @@ // struct atomic_flag -// atomic_flag() = ATOMIC_FLAG_INIT; +// atomic_flag() = LIBCUDACXX_ATOMIC_FLAG_INIT; #include #include @@ -24,9 +24,9 @@ int main(int, char**) { NV_DISPATCH_TARGET(NV_IS_HOST, - (cuda::std::atomic_flag f = ATOMIC_FLAG_INIT; assert(f.test_and_set() == 0);), + (cuda::std::atomic_flag f = LIBCUDACXX_ATOMIC_FLAG_INIT; assert(f.test_and_set() == 0);), NV_PROVIDES_SM_70, - (cuda::std::atomic_flag f = ATOMIC_FLAG_INIT; assert(f.test_and_set() == 0);)) + (cuda::std::atomic_flag f = LIBCUDACXX_ATOMIC_FLAG_INIT; assert(f.test_and_set() == 0);)) return 0; } diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp index 9e5e9d41e7..7ec8db0973 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp @@ -38,7 +38,7 @@ __host__ __device__ void checkAlwaysLockFree() } // FIXME: This separate test is needed to work around llvm.org/PR31864 -// which causes ATOMIC_LLONG_LOCK_FREE to be defined as '1' in 32-bit builds +// which causes LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE to be defined as '1' in 32-bit builds // even though __atomic_always_lock_free returns true for the same type. constexpr bool NeedWorkaroundForPR31864 = #if defined(__clang__) @@ -53,8 +53,8 @@ template __host__ __device__ void checkLongLongTypes() { - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_LLONG_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_LLONG_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE), ""); } // Used to make the calls to __atomic_always_lock_free dependent on a template @@ -74,7 +74,7 @@ __host__ __device__ void checkLongLongTypes() constexpr bool ExpectLockFree = __atomic_always_lock_free(getSizeOf(), 0); static_assert(cuda::std::atomic::is_always_lock_free == ExpectLockFree, ""); static_assert(cuda::std::atomic::is_always_lock_free == ExpectLockFree, ""); - static_assert((0 != ATOMIC_LLONG_LOCK_FREE) == ExpectLockFree, ""); + static_assert((0 != LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE) == ExpectLockFree, ""); } __host__ __device__ void run() @@ -143,22 +143,23 @@ __host__ __device__ void run() }); // C macro and static constexpr must be consistent. - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_BOOL_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_CHAR_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_CHAR16_T_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_CHAR32_T_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_WCHAR_T_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_SHORT_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_SHORT_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_INT_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_INT_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_LONG_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_LONG_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_BOOL_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_CHAR16_T_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_CHAR32_T_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_WCHAR_T_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_INT_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_INT_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_LONG_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_LONG_LOCK_FREE), ""); checkLongLongTypes(); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), ""); - static_assert(cuda::std::atomic::is_always_lock_free == (2 == ATOMIC_POINTER_LOCK_FREE), ""); + static_assert(cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE), ""); + static_assert( + cuda::std::atomic::is_always_lock_free == (2 == LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE), ""); } int main(int, char**) diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/lockfree.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/lockfree.pass.cpp index 1ca3afd2f7..f0853813ad 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/lockfree.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.lockfree/lockfree.pass.cpp @@ -29,16 +29,26 @@ int main(int, char**) { - assert(ATOMIC_BOOL_LOCK_FREE == 0 || ATOMIC_BOOL_LOCK_FREE == 1 || ATOMIC_BOOL_LOCK_FREE == 2); - assert(ATOMIC_CHAR_LOCK_FREE == 0 || ATOMIC_CHAR_LOCK_FREE == 1 || ATOMIC_CHAR_LOCK_FREE == 2); - assert(ATOMIC_CHAR16_T_LOCK_FREE == 0 || ATOMIC_CHAR16_T_LOCK_FREE == 1 || ATOMIC_CHAR16_T_LOCK_FREE == 2); - assert(ATOMIC_CHAR32_T_LOCK_FREE == 0 || ATOMIC_CHAR32_T_LOCK_FREE == 1 || ATOMIC_CHAR32_T_LOCK_FREE == 2); - assert(ATOMIC_WCHAR_T_LOCK_FREE == 0 || ATOMIC_WCHAR_T_LOCK_FREE == 1 || ATOMIC_WCHAR_T_LOCK_FREE == 2); - assert(ATOMIC_SHORT_LOCK_FREE == 0 || ATOMIC_SHORT_LOCK_FREE == 1 || ATOMIC_SHORT_LOCK_FREE == 2); - assert(ATOMIC_INT_LOCK_FREE == 0 || ATOMIC_INT_LOCK_FREE == 1 || ATOMIC_INT_LOCK_FREE == 2); - assert(ATOMIC_LONG_LOCK_FREE == 0 || ATOMIC_LONG_LOCK_FREE == 1 || ATOMIC_LONG_LOCK_FREE == 2); - assert(ATOMIC_LLONG_LOCK_FREE == 0 || ATOMIC_LLONG_LOCK_FREE == 1 || ATOMIC_LLONG_LOCK_FREE == 2); - assert(ATOMIC_POINTER_LOCK_FREE == 0 || ATOMIC_POINTER_LOCK_FREE == 1 || ATOMIC_POINTER_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_BOOL_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_BOOL_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_BOOL_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_CHAR_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_CHAR16_T_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_CHAR16_T_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_CHAR16_T_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_CHAR32_T_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_CHAR32_T_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_CHAR32_T_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_WCHAR_T_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_WCHAR_T_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_WCHAR_T_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_SHORT_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_INT_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_INT_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_INT_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_LONG_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_LONG_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_LONG_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_LLONG_LOCK_FREE == 2); + assert(LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE == 0 || LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE == 1 + || LIBCUDACXX_ATOMIC_POINTER_LOCK_FREE == 2); return 0; } diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address.pass.cpp index 37bfc73300..74dc6f8515 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address.pass.cpp @@ -68,6 +68,7 @@ // T* operator-=(ptrdiff_t op); // }; +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref.pass.cpp index 0cae7e53a6..376ca94e19 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref.pass.cpp @@ -68,6 +68,7 @@ // T* operator-=(ptrdiff_t op); // }; +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp index 9108280b80..9adc1d390b 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/address_ref_constness.pass.cpp @@ -68,6 +68,7 @@ // T* operator-=(ptrdiff_t op); // }; +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/atomic_copyable.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/atomic_copyable.pass.cpp index 3650b84f07..a9486a5dcd 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/atomic_copyable.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/atomic_copyable.pass.cpp @@ -15,6 +15,7 @@ // +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp index 6dc016dabf..131d3677d1 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp @@ -51,6 +51,7 @@ // // typedef atomic atomic_bool; +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp index 6105a54918..13b1afe169 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp @@ -36,6 +36,7 @@ // typedef atomic atomic_intmax_t; // typedef atomic atomic_uintmax_t; +#include #include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/enum_class.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/enum_class.pass.cpp index 1904c53206..adc43d32a5 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/enum_class.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/enum_class.pass.cpp @@ -49,6 +49,7 @@ // T operator=(T) noexcept; // }; +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp index f000d0e69a..28145c99bf 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp @@ -72,6 +72,7 @@ // floating_point operator-=(floating_point op); // }; +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp index c790be5b6a..ce25bc45d3 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp @@ -72,6 +72,7 @@ // floating_point operator-=(floating_point op); // }; +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp index 6ff9981471..7c5dae71a9 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp @@ -12,6 +12,7 @@ // +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral.pass.cpp index 272cedff26..ed53c53c57 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral.pass.cpp @@ -86,6 +86,7 @@ // integral operator^=(integral op); // }; +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref.pass.cpp index b685255e02..56153f3664 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref.pass.cpp @@ -86,6 +86,7 @@ // integral operator^=(integral op); // }; +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp index 2b20eb7841..b237c862a5 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp @@ -86,6 +86,7 @@ // integral operator^=(integral op); // }; +#include #include #include diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp index a3acff9845..d81e4d11e9 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp @@ -12,7 +12,7 @@ // -// #define ATOMIC_VAR_INIT(value) +// #define LIBCUDACXX_ATOMIC_VAR_INIT(value) #include #include @@ -22,7 +22,7 @@ int main(int, char**) { - cuda::std::atomic v = ATOMIC_VAR_INIT(5); + cuda::std::atomic v = LIBCUDACXX_ATOMIC_VAR_INIT(5); assert(v == 5); return 0; diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp index 050bb36e72..b033b1ff83 100644 --- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp +++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp @@ -20,6 +20,7 @@ #define _LIBCUDACXX_DISABLE_DEPRECATION_WARNINGS +#include #include #include #include @@ -62,7 +63,7 @@ struct TestFunc #if !defined(_GNUC_VER) || _GNUC_VER >= 409 // TODO: Figure out why this is failing with GCC 4.8.2 on CentOS 7 only. { - constexpr Atomic a = ATOMIC_VAR_INIT(t); + constexpr Atomic a = LIBCUDACXX_ATOMIC_VAR_INIT(t); assert(a == t); } #endif diff --git a/libcudacxx/test/utils/libcudacxx/test/format.py b/libcudacxx/test/utils/libcudacxx/test/format.py index f2b6f478fb..3a58447989 100644 --- a/libcudacxx/test/utils/libcudacxx/test/format.py +++ b/libcudacxx/test/utils/libcudacxx/test/format.py @@ -74,6 +74,10 @@ def getTestsInDirectory(self, testSuite, path_in_suite, yield lit.Test.Test(testSuite, path_in_suite + (filename,), localConfig) + def getTestsForPath(self, testSuite, path_in_suite, + litConfig, localConfig): + yield lit.Test.Test(testSuite, path_in_suite, localConfig) + def execute(self, test, lit_config): while True: try: