Skip to content

Commit

Permalink
Rework builtin handling (NVIDIA#2461)
Browse files Browse the repository at this point in the history
* Move builtin detection to its own file

* Try to reenable more builtins

* Address review comments
  • Loading branch information
miscco authored and fbusato committed Oct 2, 2024
1 parent 2b8de16 commit caff9bc
Show file tree
Hide file tree
Showing 212 changed files with 1,204 additions and 1,103 deletions.
13 changes: 2 additions & 11 deletions cudax/include/cuda/experimental/__async/meta.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -534,13 +534,13 @@ using __m_at = _Ts...[__v<_Np>];
template <size_t _Np, class... _Ts>
using __m_at_c = _Ts...[_Np];

#elif __has_builtin(__type_pack_element)
#elif defined(_CCCL_BUILTIN_TYPE_PACK_ELEMENT)

template <bool>
struct __m_at_
{
template <class _Np, class... _Ts>
using __f = __type_pack_element<__v<_Np>, _Ts...>;
using __f = _CCCL_BUILTIN_TYPE_PACK_ELEMENT(__v<_Np>, _Ts...);
};

template <class _Np, class... _Ts>
Expand Down Expand Up @@ -675,18 +675,9 @@ struct __mconcat_into_q
};

// The following must be super-fast to compile, so use an intrinsic directly if it is available
#if defined(_LIBCUDACXX_IS_BASE_OF) && !defined(_LIBCUDACXX_USE_IS_BASE_OF_FALLBACK)

template <class _Set, class... _Ty>
_CCCL_INLINE_VAR constexpr bool __mset_contains = (_LIBCUDACXX_IS_BASE_OF(__mtype<_Ty>, _Set) && ...);

#else

template <class _Set, class... _Ty>
_CCCL_INLINE_VAR constexpr bool __mset_contains = (_CUDA_VSTD::is_base_of_v<__mtype<_Ty>, _Set> && ...);

#endif

namespace __set
{
template <class... _Ts>
Expand Down
21 changes: 4 additions & 17 deletions cudax/include/cuda/experimental/__async/type_traits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,13 @@

namespace cuda::experimental::__async
{
#if __has_builtin(__remove_reference)

template <class _Ty>
using __remove_ref_t = __remove_reference(_Ty);

#elif __has_builtin(__remove_reference_t)

template <class _Ty>
using __remove_ref_t = __remove_reference_t(_Ty);

#else

template <class _Ty>
using __remove_ref_t = _CUDA_VSTD::remove_reference_t<_Ty>;

#endif
using __remove_ref_t = _CUDA_VSTD::__libcpp_remove_reference_t<_Ty>;

//////////////////////////////////////////////////////////////////////////////////////////////////
// __decay_t: An efficient implementation for ::std::decay
#if __has_builtin(__decay)
#if defined(_CCCL_BUILTIN_DECAY)

template <class _Ty>
using __decay_t = __decay(_Ty);
Expand All @@ -59,7 +46,7 @@ using __decay_t = __decay(_Ty);
// template <class _Ty>
// using __decay_t = _CUDA_VSTD::decay_t<_Ty>;

#else
#else // ^^^ _CCCL_BUILTIN_DECAY ^^^ / vvv !_CCCL_BUILTIN_DECAY vvv

struct __decay_object
{
Expand Down Expand Up @@ -141,7 +128,7 @@ inline __decay_void __mdecay<void const>;
template <class _Ty>
using __decay_t = typename decltype(__mdecay<_Ty>)::template __f<_Ty>;

#endif
#endif // _CCCL_BUILTIN_DECAY

//////////////////////////////////////////////////////////////////////////////////////////////////
// __copy_cvref_t: For copying cvref from one type to another
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/__cccl_config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#define _CUDA__CCCL_CONFIG

#include <cuda/std/__cccl/attributes.h> // IWYU pragma: export
#include <cuda/std/__cccl/builtin.h> // IWYU pragma: export
#include <cuda/std/__cccl/compiler.h> // IWYU pragma: export
#include <cuda/std/__cccl/diagnostic.h> // IWYU pragma: export
#include <cuda/std/__cccl/dialect.h> // IWYU pragma: export
Expand Down
8 changes: 4 additions & 4 deletions libcudacxx/include/cuda/std/__algorithm/copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,13 @@ template <class _Tp, class _Up>
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 bool __constexpr_tail_overlap(_Tp* __first, _Up* __needle, _Tp* __last)
{
_LIBCUDACXX_UNUSED_VAR(__last);
#if __has_builtin(__builtin_constant_p) || defined(_CCCL_COMPILER_GCC)
#if defined(_CCCL_BUILTIN_CONSTANT_P)
NV_IF_ELSE_TARGET(NV_IS_HOST,
(return __builtin_constant_p(__first < __needle) && __first < __needle;),
(return _CCCL_BUILTIN_CONSTANT_P(__first < __needle) && __first < __needle;),
(return __constexpr_tail_overlap_fallback(__first, __needle, __last);))
#else
#else // ^^^ _CCCL_BUILTIN_CONSTANT_P ^^^ / vvv !_CCCL_BUILTIN_CONSTANT_P vvv
return __constexpr_tail_overlap_fallback(__first, __needle, __last);
#endif
#endif // !_CCCL_BUILTIN_CONSTANT_P
}

template <class _AlgPolicy,
Expand Down
18 changes: 9 additions & 9 deletions libcudacxx/include/cuda/std/__bit/bit_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@

_LIBCUDACXX_BEGIN_NAMESPACE_STD

#if defined(_LIBCUDACXX_BIT_CAST)
#if defined(_CCCL_BUILTIN_BIT_CAST)
# define _LIBCUDACXX_CONSTEXPR_BIT_CAST constexpr
#else // ^^^ _LIBCUDACXX_BIT_CAST ^^^ / vvv !_LIBCUDACXX_BIT_CAST vvv
#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv
# define _LIBCUDACXX_CONSTEXPR_BIT_CAST
# if defined(_CCCL_COMPILER_GCC) && __GNUC__ >= 8
// GCC starting with GCC8 warns about our extended floating point types having protected data members
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_GCC("-Wclass-memaccess")
# endif // _CCCL_COMPILER_GCC >= 8
#endif // !_LIBCUDACXX_BIT_CAST
#endif // !_CCCL_BUILTIN_BIT_CAST

template <
class _To,
Expand All @@ -48,23 +48,23 @@ template <
__enable_if_t<_CCCL_TRAIT(is_trivially_copyable, _From) || _CCCL_TRAIT(__is_extended_floating_point, _From), int> = 0>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _LIBCUDACXX_CONSTEXPR_BIT_CAST _To bit_cast(const _From& __from) noexcept
{
#if defined(_LIBCUDACXX_BIT_CAST)
return _LIBCUDACXX_BIT_CAST(_To, __from);
#else // ^^^ _LIBCUDACXX_BIT_CAST ^^^ / vvv !_LIBCUDACXX_BIT_CAST vvv
#if defined(_CCCL_BUILTIN_BIT_CAST)
return _CCCL_BUILTIN_BIT_CAST(_To, __from);
#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv
static_assert(_CCCL_TRAIT(is_trivially_default_constructible, _To),
"The compiler does not support __builtin_bit_cast, so bit_cast additionally requires the destination "
"type to be trivially constructible");
_To __temp;
_CUDA_VSTD::memcpy(&__temp, &__from, sizeof(_To));
return __temp;
#endif // !_LIBCUDACXX_BIT_CAST
#endif // !_CCCL_BUILTIN_BIT_CAST
}

#if !defined(_LIBCUDACXX_BIT_CAST)
#if !defined(_CCCL_BUILTIN_BIT_CAST)
# if defined(_CCCL_COMPILER_GCC) && __GNUC__ >= 8
_CCCL_DIAG_POP
# endif // _CCCL_COMPILER_GCC >= 8
#endif // !_LIBCUDACXX_BIT_CAST
#endif // !_CCCL_BUILTIN_BIT_CAST

_LIBCUDACXX_END_NAMESPACE_STD

Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__bit/clz.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_clz(uint32_t __x)
}
return 32; // Undefined Behavior.
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_clz32(static_cast<uint64_t>(__x), 0);
}
Expand Down Expand Up @@ -141,7 +141,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_clz(uint64_t __x)
# endif
return 64; // Undefined Behavior.
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_clz64(static_cast<uint64_t>(__x));
}
Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__bit/ctz.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_ctz(uint32_t __x)
}
return 32;
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_ctz32(static_cast<uint64_t>(__x), 0);
}
Expand Down Expand Up @@ -143,7 +143,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_ctz(uint64_t __x)
# endif
return 64;
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_ctz64(__x);
}
Expand Down
Loading

0 comments on commit caff9bc

Please sign in to comment.