Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework builtin handling #2461

Merged
merged 3 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions cudax/include/cuda/experimental/__async/meta.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -534,13 +534,13 @@ using __m_at = _Ts...[__v<_Np>];
template <size_t _Np, class... _Ts>
using __m_at_c = _Ts...[_Np];

#elif __has_builtin(__type_pack_element)
#elif defined(_CCCL_BUILTIN_TYPE_PACK_ELEMENT)

template <bool>
struct __m_at_
{
template <class _Np, class... _Ts>
using __f = __type_pack_element<__v<_Np>, _Ts...>;
using __f = _CCCL_BUILTIN_TYPE_PACK_ELEMENT(__v<_Np>, _Ts...);
};

template <class _Np, class... _Ts>
Expand Down Expand Up @@ -675,18 +675,9 @@ struct __mconcat_into_q
};

// The following must be super-fast to compile, so use an intrinsic directly if it is available
#if defined(_LIBCUDACXX_IS_BASE_OF) && !defined(_LIBCUDACXX_USE_IS_BASE_OF_FALLBACK)

template <class _Set, class... _Ty>
_CCCL_INLINE_VAR constexpr bool __mset_contains = (_LIBCUDACXX_IS_BASE_OF(__mtype<_Ty>, _Set) && ...);

#else

template <class _Set, class... _Ty>
_CCCL_INLINE_VAR constexpr bool __mset_contains = (_CUDA_VSTD::is_base_of_v<__mtype<_Ty>, _Set> && ...);

#endif

Comment on lines -678 to -689
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as the comment above says, this is a particularly hot piece of meta-programming, so the direct use of the intrinsic here is intentional.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no difference between what was there and what is now.

is_base_of_v is directly given by the compiler builtin without going through is_base::value, so both conditionals are exactly the same

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instantiating a variable template is not free. this instantiates N extra variable templates than using the intrinsic directly.

namespace __set
{
template <class... _Ts>
Expand Down
21 changes: 4 additions & 17 deletions cudax/include/cuda/experimental/__async/type_traits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,13 @@

namespace cuda::experimental::__async
{
#if __has_builtin(__remove_reference)

template <class _Ty>
using __remove_ref_t = __remove_reference(_Ty);

#elif __has_builtin(__remove_reference_t)

template <class _Ty>
using __remove_ref_t = __remove_reference_t(_Ty);

#else

template <class _Ty>
using __remove_ref_t = _CUDA_VSTD::remove_reference_t<_Ty>;

#endif
using __remove_ref_t = _CUDA_VSTD::__libcpp_remove_reference_t<_Ty>;

//////////////////////////////////////////////////////////////////////////////////////////////////
// __decay_t: An efficient implementation for ::std::decay
#if __has_builtin(__decay)
#if defined(_CCCL_BUILTIN_DECAY)

template <class _Ty>
using __decay_t = __decay(_Ty);
Expand All @@ -59,7 +46,7 @@ using __decay_t = __decay(_Ty);
// template <class _Ty>
// using __decay_t = _CUDA_VSTD::decay_t<_Ty>;

#else
#else // ^^^ _CCCL_BUILTIN_DECAY ^^^ / vvv !_CCCL_BUILTIN_DECAY vvv

struct __decay_object
{
Expand Down Expand Up @@ -141,7 +128,7 @@ inline __decay_void __mdecay<void const>;
template <class _Ty>
using __decay_t = typename decltype(__mdecay<_Ty>)::template __f<_Ty>;

#endif
#endif // _CCCL_BUILTIN_DECAY

//////////////////////////////////////////////////////////////////////////////////////////////////
// __copy_cvref_t: For copying cvref from one type to another
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/__cccl_config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#define _CUDA__CCCL_CONFIG

#include <cuda/std/__cccl/attributes.h> // IWYU pragma: export
#include <cuda/std/__cccl/builtin.h> // IWYU pragma: export
#include <cuda/std/__cccl/compiler.h> // IWYU pragma: export
#include <cuda/std/__cccl/diagnostic.h> // IWYU pragma: export
#include <cuda/std/__cccl/dialect.h> // IWYU pragma: export
Expand Down
8 changes: 4 additions & 4 deletions libcudacxx/include/cuda/std/__algorithm/copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,13 @@ template <class _Tp, class _Up>
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 bool __constexpr_tail_overlap(_Tp* __first, _Up* __needle, _Tp* __last)
{
_LIBCUDACXX_UNUSED_VAR(__last);
#if __has_builtin(__builtin_constant_p) || defined(_CCCL_COMPILER_GCC)
#if defined(_CCCL_BUILTIN_CONSTANT_P)
NV_IF_ELSE_TARGET(NV_IS_HOST,
(return __builtin_constant_p(__first < __needle) && __first < __needle;),
(return _CCCL_BUILTIN_CONSTANT_P(__first < __needle) && __first < __needle;),
(return __constexpr_tail_overlap_fallback(__first, __needle, __last);))
#else
#else // ^^^ _CCCL_BUILTIN_CONSTANT_P ^^^ / vvv !_CCCL_BUILTIN_CONSTANT_P vvv
return __constexpr_tail_overlap_fallback(__first, __needle, __last);
#endif
#endif // !_CCCL_BUILTIN_CONSTANT_P
}

template <class _AlgPolicy,
Expand Down
18 changes: 9 additions & 9 deletions libcudacxx/include/cuda/std/__bit/bit_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@

_LIBCUDACXX_BEGIN_NAMESPACE_STD

#if defined(_LIBCUDACXX_BIT_CAST)
#if defined(_CCCL_BUILTIN_BIT_CAST)
# define _LIBCUDACXX_CONSTEXPR_BIT_CAST constexpr
#else // ^^^ _LIBCUDACXX_BIT_CAST ^^^ / vvv !_LIBCUDACXX_BIT_CAST vvv
#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv
# define _LIBCUDACXX_CONSTEXPR_BIT_CAST
# if defined(_CCCL_COMPILER_GCC) && __GNUC__ >= 8
// GCC starting with GCC8 warns about our extended floating point types having protected data members
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_GCC("-Wclass-memaccess")
# endif // _CCCL_COMPILER_GCC >= 8
#endif // !_LIBCUDACXX_BIT_CAST
#endif // !_CCCL_BUILTIN_BIT_CAST

template <
class _To,
Expand All @@ -48,23 +48,23 @@ template <
__enable_if_t<_CCCL_TRAIT(is_trivially_copyable, _From) || _CCCL_TRAIT(__is_extended_floating_point, _From), int> = 0>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _LIBCUDACXX_CONSTEXPR_BIT_CAST _To bit_cast(const _From& __from) noexcept
{
#if defined(_LIBCUDACXX_BIT_CAST)
return _LIBCUDACXX_BIT_CAST(_To, __from);
#else // ^^^ _LIBCUDACXX_BIT_CAST ^^^ / vvv !_LIBCUDACXX_BIT_CAST vvv
#if defined(_CCCL_BUILTIN_BIT_CAST)
return _CCCL_BUILTIN_BIT_CAST(_To, __from);
#else // ^^^ _CCCL_BUILTIN_BIT_CAST ^^^ / vvv !_CCCL_BUILTIN_BIT_CAST vvv
static_assert(_CCCL_TRAIT(is_trivially_default_constructible, _To),
"The compiler does not support __builtin_bit_cast, so bit_cast additionally requires the destination "
"type to be trivially constructible");
_To __temp;
_CUDA_VSTD::memcpy(&__temp, &__from, sizeof(_To));
return __temp;
#endif // !_LIBCUDACXX_BIT_CAST
#endif // !_CCCL_BUILTIN_BIT_CAST
}

#if !defined(_LIBCUDACXX_BIT_CAST)
#if !defined(_CCCL_BUILTIN_BIT_CAST)
# if defined(_CCCL_COMPILER_GCC) && __GNUC__ >= 8
_CCCL_DIAG_POP
# endif // _CCCL_COMPILER_GCC >= 8
#endif // !_LIBCUDACXX_BIT_CAST
#endif // !_CCCL_BUILTIN_BIT_CAST

_LIBCUDACXX_END_NAMESPACE_STD

Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__bit/clz.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_clz(uint32_t __x)
}
return 32; // Undefined Behavior.
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_clz32(static_cast<uint64_t>(__x), 0);
}
Expand Down Expand Up @@ -141,7 +141,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_clz(uint64_t __x)
# endif
return 64; // Undefined Behavior.
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_clz64(static_cast<uint64_t>(__x));
}
Expand Down
4 changes: 2 additions & 2 deletions libcudacxx/include/cuda/std/__bit/ctz.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_ctz(uint32_t __x)
}
return 32;
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_ctz32(static_cast<uint64_t>(__x), 0);
}
Expand Down Expand Up @@ -143,7 +143,7 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int __libcpp_ctz(uint64_t __x)
# endif
return 64;
}
# endif // _LIBCUDACXX_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)
# endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && !defined(__CUDA_ARCH__)

return __binary_ctz64(__x);
}
Expand Down
Loading
Loading