We currently do not expose any meow_t alias until C++14 (#2763)

This forces us to duplicate a ton of code with `__meow_t` for no benbefit at all We already backport half the standard so kets just do the right thing and use those aliases whenever possible While we are at it also actually use the `_CCCL_NODDEBUG_ALIAS` attribute when possible
NVIDIA · Nov 12, 2024 · 87f7246 · 87f7246
1 parent 16ac5be
commit 87f7246
Show file tree

Hide file tree

Showing 245 changed files with 1,319 additions and 1,497 deletions.
diff --git a/c2h/include/c2h/catch2_test_helper.h b/c2h/include/c2h/catch2_test_helper.h
@@ -193,11 +193,11 @@ auto BitwiseEqualsRange(const Range& range) -> CustomEqualsRangeMatcher<Range, b
 #include <cuda/std/tuple>
 _LIBCUDACXX_BEGIN_NAMESPACE_STD
 template <size_t N, typename... T>
-__enable_if_t<(N == sizeof...(T))> print_elem(::std::ostream&, const tuple<T...>&)
+enable_if_t<(N == sizeof...(T))> print_elem(::std::ostream&, const tuple<T...>&)
 {}
 
 template <size_t N, typename... T>
-__enable_if_t<(N < sizeof...(T))> print_elem(::std::ostream& os, const tuple<T...>& tup)
+enable_if_t<(N < sizeof...(T))> print_elem(::std::ostream& os, const tuple<T...>& tup)
 {
   _CCCL_IF_CONSTEXPR (N != 0)
   {

diff --git a/cub/benchmarks/bench/transform/babelstream.h b/cub/benchmarks/bench/transform/babelstream.h
@@ -91,7 +91,7 @@ struct narrowing_error : std::runtime_error
 
 // from C++ GSL
 // implementation insipired by: https://github.com/microsoft/GSL/blob/main/include/gsl/narrow
-template <typename DstT, typename SrcT, ::cuda::std::__enable_if_t<::cuda::std::is_arithmetic<SrcT>::value, int> = 0>
+template <typename DstT, typename SrcT, ::cuda::std::enable_if_t<::cuda::std::is_arithmetic<SrcT>::value, int> = 0>
 constexpr DstT narrow(SrcT value)
 {
   constexpr bool is_different_signedness = ::cuda::std::is_signed<SrcT>::value != ::cuda::std::is_signed<DstT>::value;

diff --git a/cub/cub/detail/choose_offset.cuh b/cub/cub/detail/choose_offset.cuh
@@ -148,7 +148,7 @@ using choose_signed_offset_t = typename choose_signed_offset<NumItemsT>::type;
 template <typename... Iter>
 struct common_iterator_value
 {
-  using type = ::cuda::std::__common_type_t<::cuda::std::__iter_value_type<Iter>...>;
+  using type = ::cuda::std::common_type_t<::cuda::std::__iter_value_type<Iter>...>;
 };
 template <typename... Iter>
 using common_iterator_value_t = typename common_iterator_value<Iter...>::type;

diff --git a/cub/cub/detail/type_traits.cuh b/cub/cub/detail/type_traits.cuh
@@ -50,7 +50,7 @@ _CCCL_SUPPRESS_DEPRECATED_PUSH
 _CCCL_SUPPRESS_DEPRECATED_POP
 #include <cuda/std/type_traits>
 
-#define _CUB_TEMPLATE_REQUIRES(...) ::cuda::std::__enable_if_t<(__VA_ARGS__)>* = nullptr
+#define _CUB_TEMPLATE_REQUIRES(...) ::cuda::std::enable_if_t<(__VA_ARGS__)>* = nullptr
 
 CUB_NAMESPACE_BEGIN
 namespace detail

diff --git a/cub/cub/device/dispatch/dispatch_merge.cuh b/cub/cub/device/dispatch/dispatch_merge.cuh
@@ -40,7 +40,7 @@ class choose_merge_agent
                                     && sizeof(typename fallback_agent_t::TempStorage) <= max_smem_per_block;
 
 public:
-  using type = ::cuda::std::__conditional_t<use_fallback, fallback_agent_t, default_agent_t>;
+  using type = ::cuda::std::conditional_t<use_fallback, fallback_agent_t, default_agent_t>;
 };
 
 // Computes the merge path intersections at equally wide intervals. The approach is outlined in the paper:

diff --git a/cub/cub/device/dispatch/dispatch_transform.cuh b/cub/cub/device/dispatch/dispatch_transform.cuh
@@ -147,7 +147,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void prefetch_tile(const T* addr, int tile_size)
 // TODO(miscco): we should probably constrain It to not be a contiguous iterator in C++17 (and change the overload
 // above to accept any contiguous iterator)
 // overload for any iterator that is not a pointer, do nothing
-template <int, typename It, ::cuda::std::__enable_if_t<!::cuda::std::is_pointer<It>::value, int> = 0>
+template <int, typename It, ::cuda::std::enable_if_t<!::cuda::std::is_pointer<It>::value, int> = 0>
 _CCCL_DEVICE _CCCL_FORCEINLINE void prefetch_tile(It, int)
 {}
 
@@ -232,20 +232,20 @@ _CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply(F&& f, Tuple&& t)
   -> decltype(poor_apply_impl(
     ::cuda::std::forward<F>(f),
     ::cuda::std::forward<Tuple>(t),
-    ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::__libcpp_remove_reference_t<Tuple>>::value>{}))
+    ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t<Tuple>>::value>{}))
 {
   return poor_apply_impl(
     ::cuda::std::forward<F>(f),
     ::cuda::std::forward<Tuple>(t),
-    ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::__libcpp_remove_reference_t<Tuple>>::value>{});
+    ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t<Tuple>>::value>{});
 }
 
 // mult must be a power of 2
 template <typename Integral>
 _CCCL_HOST_DEVICE _CCCL_FORCEINLINE constexpr auto round_up_to_po2_multiple(Integral x, Integral mult) -> Integral
 {
 #if _CCCL_STD_VER > 2011
-  _CCCL_ASSERT(::cuda::std::has_single_bit(static_cast<::cuda::std::__make_unsigned_t<Integral>>(mult)), "");
+  _CCCL_ASSERT(::cuda::std::has_single_bit(static_cast<::cuda::std::make_unsigned_t<Integral>>(mult)), "");
 #endif // _CCCL_STD_VER > 2011
   return (x + mult - 1) & ~(mult - 1);
 }
@@ -544,15 +544,15 @@ using needs_aligned_ptr_t =
                              >;
 
 #ifdef _CUB_HAS_TRANSFORM_UBLKCP
-template <Algorithm Alg, typename It, ::cuda::std::__enable_if_t<needs_aligned_ptr_t<Alg>::value, int> = 0>
+template <Algorithm Alg, typename It, ::cuda::std::enable_if_t<needs_aligned_ptr_t<Alg>::value, int> = 0>
 _CCCL_DEVICE _CCCL_FORCEINLINE auto select_kernel_arg(
   ::cuda::std::integral_constant<Algorithm, Alg>, kernel_arg<It>&& arg) -> aligned_base_ptr<value_t<It>>&&
 {
   return ::cuda::std::move(arg.aligned_ptr);
 }
 #endif // _CUB_HAS_TRANSFORM_UBLKCP
 
-template <Algorithm Alg, typename It, ::cuda::std::__enable_if_t<!needs_aligned_ptr_t<Alg>::value, int> = 0>
+template <Algorithm Alg, typename It, ::cuda::std::enable_if_t<!needs_aligned_ptr_t<Alg>::value, int> = 0>
 _CCCL_DEVICE _CCCL_FORCEINLINE auto
 select_kernel_arg(::cuda::std::integral_constant<Algorithm, Alg>, kernel_arg<It>&& arg) -> It&&
 {

diff --git a/cub/cub/thread/thread_reduce.cuh b/cub/cub/thread/thread_reduce.cuh
@@ -131,9 +131,9 @@ ThreadReduceSequential(const Input& input, ReductionOp reduction_op)
 /// Specialization for DPX reduction
 template <typename Input, typename ReductionOp>
 _CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto
-ThreadReduceDpx(const Input& input, ReductionOp reduction_op) -> ::cuda::std::__remove_cvref_t<decltype(input[0])>
+ThreadReduceDpx(const Input& input, ReductionOp reduction_op) -> ::cuda::std::remove_cvref_t<decltype(input[0])>
 {
-  using T              = ::cuda::std::__remove_cvref_t<decltype(input[0])>;
+  using T              = ::cuda::std::remove_cvref_t<decltype(input[0])>;
   constexpr int length = detail::static_size<Input>();
   T array[length];
 #  pragma unroll
@@ -153,7 +153,7 @@ ThreadReduceDpx(const Input& input, ReductionOp reduction_op) -> ::cuda::std::__
 // DPX/Sequential dispatch
 template <typename Input,
           typename ReductionOp,
-          typename ValueT = ::cuda::std::__remove_cvref_t<decltype(::cuda::std::declval<Input>()[0])>,
+          typename ValueT = ::cuda::std::remove_cvref_t<decltype(::cuda::std::declval<Input>()[0])>,
           typename AccumT = ::cuda::std::__accumulator_t<ReductionOp, ValueT>,
           _CUB_TEMPLATE_REQUIRES(enable_dpx_reduction<Input, ReductionOp, AccumT>())>
 _CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE AccumT ThreadReduce(const Input& input, ReductionOp reduction_op)
@@ -170,7 +170,7 @@ _CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE AccumT ThreadReduce(const Input&
 
 template <typename Input,
           typename ReductionOp,
-          typename ValueT = ::cuda::std::__remove_cvref_t<decltype(::cuda::std::declval<Input>()[0])>,
+          typename ValueT = ::cuda::std::remove_cvref_t<decltype(::cuda::std::declval<Input>()[0])>,
           typename AccumT = ::cuda::std::__accumulator_t<ReductionOp, ValueT>,
           _CUB_TEMPLATE_REQUIRES(!enable_dpx_reduction<Input, ReductionOp, AccumT>())>
 _CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE AccumT ThreadReduce(const Input& input, ReductionOp reduction_op)
@@ -213,7 +213,7 @@ template <typename Input,
           typename ReductionOp,
           typename PrefixT,
 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
-          typename ValueT = ::cuda::std::__remove_cvref_t<decltype(::cuda::std::declval<Input>()[0])>,
+          typename ValueT = ::cuda::std::remove_cvref_t<decltype(::cuda::std::declval<Input>()[0])>,
 #endif // !DOXYGEN_SHOULD_SKIP_THIS
           typename AccumT = ::cuda::std::__accumulator_t<ReductionOp, ValueT, PrefixT>>
 _CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE AccumT

diff --git a/cub/cub/util_type.cuh b/cub/cub/util_type.cuh
@@ -100,7 +100,7 @@ using value_t =
   typename std::iterator_traits<Iterator>::value_type;
 #  endif // defined(_CCCL_COMPILER_NVRTC)
 
-template <typename It, typename FallbackT, bool = ::cuda::std::is_void<::cuda::std::__remove_pointer_t<It>>::value>
+template <typename It, typename FallbackT, bool = ::cuda::std::is_void<::cuda::std::remove_pointer_t<It>>::value>
 struct non_void_value_impl
 {
   using type = FallbackT;

diff --git a/cub/test/catch2_test_device_adjacent_difference_substract_left.cu b/cub/test/catch2_test_device_adjacent_difference_substract_left.cu
@@ -163,7 +163,7 @@ C2H_TEST("DeviceAdjacentDifference::SubtractLeftCopy works with pointers", "[dev
 template <class T>
 struct cust_diff
 {
-  template <class T2, cuda::std::__enable_if_t<cuda::std::is_same<T, T2>::value, int> = 0>
+  template <class T2, cuda::std::enable_if_t<cuda::std::is_same<T, T2>::value, int> = 0>
   __host__ __device__ constexpr T2 operator()(const T2& lhs, const T2& rhs) const noexcept
   {
     return lhs - rhs;

diff --git a/cub/test/catch2_test_device_adjacent_difference_substract_right.cu b/cub/test/catch2_test_device_adjacent_difference_substract_right.cu
@@ -92,7 +92,7 @@ C2H_TEST("DeviceAdjacentDifference::SubtractRightCopy does not change the input"
 template <class T>
 struct ref_diff
 {
-  template <class T2, cuda::std::__enable_if_t<cuda::std::is_same<T, T2>::value, int> = 0>
+  template <class T2, cuda::std::enable_if_t<cuda::std::is_same<T, T2>::value, int> = 0>
   __host__ __device__ constexpr T2 operator()(const T2& lhs, const T2& rhs) const noexcept
   {
     return rhs - lhs;

diff --git a/cudax/include/cuda/experimental/__async/cpos.cuh b/cudax/include/cuda/experimental/__async/cpos.cuh
@@ -44,13 +44,13 @@ struct scheduler_t
 {};
 
 template <class _Ty>
-using __sender_concept_t = typename __remove_ref_t<_Ty>::sender_concept;
+using __sender_concept_t = typename _CUDA_VSTD::remove_reference_t<_Ty>::sender_concept;
 
 template <class _Ty>
-using __receiver_concept_t = typename __remove_ref_t<_Ty>::receiver_concept;
+using __receiver_concept_t = typename _CUDA_VSTD::remove_reference_t<_Ty>::receiver_concept;
 
 template <class _Ty>
-using __scheduler_concept_t = typename __remove_ref_t<_Ty>::scheduler_concept;
+using __scheduler_concept_t = typename _CUDA_VSTD::remove_reference_t<_Ty>::scheduler_concept;
 
 template <class _Ty>
 inline constexpr bool __is_sender = __type_valid_v<__sender_concept_t, _Ty>;

diff --git a/cudax/include/cuda/experimental/__async/meta.cuh b/cudax/include/cuda/experimental/__async/meta.cuh
@@ -94,7 +94,7 @@ template <class... _What>
 struct _ERROR : __merror_base
 {
   template <class...>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = _ERROR;
+  using __call _CCCL_NODEBUG_ALIAS = _ERROR;
 
   _ERROR operator+();
 
@@ -138,14 +138,14 @@ template <bool _Error>
 struct __type_self_or_error_with_
 {
   template <class _Ty, class... _With>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = _Ty;
+  using __call _CCCL_NODEBUG_ALIAS = _Ty;
 };
 
 template <>
 struct __type_self_or_error_with_<true>
 {
   template <class _Ty, class... _With>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = decltype(__declval<_Ty&>().with(__declval<_ERROR<_With...>&>()));
+  using __call _CCCL_NODEBUG_ALIAS = decltype(__declval<_Ty&>().with(__declval<_ERROR<_With...>&>()));
 };
 
 template <class _Ty, class... _With>
@@ -162,7 +162,7 @@ struct __type_try__<false>
   using __call_q = _Fn<_Ts...>;
 
   template <class _Fn, class... _Ts>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = typename _Fn::template __call<_Ts...>;
+  using __call _CCCL_NODEBUG_ALIAS = typename _Fn::template __call<_Ts...>;
 };
 
 template <>
@@ -172,7 +172,7 @@ struct __type_try__<true>
   using __call_q = __type_find_error<_Ts...>;
 
   template <class _Fn, class... _Ts>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = __type_find_error<_Fn, _Ts...>;
+  using __call _CCCL_NODEBUG_ALIAS = __type_find_error<_Fn, _Ts...>;
 };
 
 template <class _Fn, class... _Ts>
@@ -187,7 +187,7 @@ template <class _Fn>
 struct __type_try
 {
   template <class... _Ts>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = __type_try_call<_Fn, _Ts...>;
+  using __call _CCCL_NODEBUG_ALIAS = __type_try_call<_Fn, _Ts...>;
 };
 
 template <template <class...> class _Fn, class... _Default>
@@ -198,7 +198,7 @@ template <template <class...> class _Fn>
 struct __type_try_quote<_Fn>
 {
   template <class... _Ts>
-  using __call _LIBCUDACXX_NODEBUG_TYPE =
+  using __call _CCCL_NODEBUG_ALIAS =
     typename __type_try__<__type_contains_error<_Ts...>>::template __call_q<_Fn, _Ts...>;
 };
 
@@ -207,7 +207,7 @@ template <template <class...> class _Fn, class _Default>
 struct __type_try_quote<_Fn, _Default>
 {
   template <class... _Ts>
-  using __call _LIBCUDACXX_NODEBUG_TYPE =
+  using __call _CCCL_NODEBUG_ALIAS =
     typename _CUDA_VSTD::_If<__type_valid_v<_Fn, _Ts...>, //
                              __type_try_quote<_Fn>,
                              _CUDA_VSTD::__type_always<_Default>>::template __call<_Ts...>;
@@ -230,20 +230,20 @@ template <template <class...> class _Second, template <class...> class _First>
 struct __type_compose_quote
 {
   template <class... _Ts>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = _Second<_First<_Ts...>>;
+  using __call _CCCL_NODEBUG_ALIAS = _Second<_First<_Ts...>>;
 };
 
 struct __type_count
 {
   template <class... _Ts>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = _CUDA_VSTD::integral_constant<size_t, sizeof...(_Ts)>;
+  using __call _CCCL_NODEBUG_ALIAS = _CUDA_VSTD::integral_constant<size_t, sizeof...(_Ts)>;
 };
 
 template <template <class...> class _Continuation>
 struct __type_concat_into_quote
 {
   template <class... _Args>
-  using __call _LIBCUDACXX_NODEBUG_TYPE =
+  using __call _CCCL_NODEBUG_ALIAS =
     _CUDA_VSTD::__type_call1<_CUDA_VSTD::__type_concat<_CUDA_VSTD::__as_type_list<_Args>...>,
                              _CUDA_VSTD::__type_quote<_Continuation>>;
 };
@@ -252,7 +252,7 @@ template <class _Ty>
 struct __type_self_or
 {
   template <class _Uy = _Ty>
-  using __call _LIBCUDACXX_NODEBUG_TYPE = _Uy;
+  using __call _CCCL_NODEBUG_ALIAS = _Uy;
 };
 } // namespace cuda::experimental::__async
 

diff --git a/cudax/include/cuda/experimental/__async/type_traits.cuh b/cudax/include/cuda/experimental/__async/type_traits.cuh
@@ -35,9 +35,6 @@
 namespace cuda::experimental::__async
 {
 
-template <class _Ty>
-using __remove_ref_t = _CUDA_VSTD::__libcpp_remove_reference_t<_Ty>;
-
 //////////////////////////////////////////////////////////////////////////////////////////////////
 // __decay_t: An efficient implementation for ::std::decay
 #if defined(_CCCL_BUILTIN_DECAY)

diff --git a/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh
@@ -125,12 +125,12 @@ struct get_level_helper
 
 template <typename QueryLevel, typename Hierarchy>
 _CCCL_INLINE_VAR constexpr bool has_level =
-  detail::has_level_helper<QueryLevel, ::cuda::std::__remove_cvref_t<Hierarchy>>::value;
+  detail::has_level_helper<QueryLevel, ::cuda::std::remove_cvref_t<Hierarchy>>::value;
 
 template <typename QueryLevel, typename Hierarchy>
 _CCCL_INLINE_VAR constexpr bool has_level_or_unit =
-  detail::has_level_helper<QueryLevel, ::cuda::std::__remove_cvref_t<Hierarchy>>::value
-  || detail::has_unit<QueryLevel, ::cuda::std::__remove_cvref_t<Hierarchy>>::value;
+  detail::has_level_helper<QueryLevel, ::cuda::std::remove_cvref_t<Hierarchy>>::value
+  || detail::has_unit<QueryLevel, ::cuda::std::remove_cvref_t<Hierarchy>>::value;
 
 namespace detail
 {

diff --git a/docs/repo.toml b/docs/repo.toml
@@ -436,7 +436,7 @@ doxygen_predefined = [
   "_LIBCUDACXX_AND=&&",
   "_LIBCUDACXX_EAT_REST(x)=",
   "_LIBCUDACXX_GLOBAL_CONSTANT=inline",
-  "_LIBCUDACXX_REQUIRES(x)= ::cuda::std::__enable_if_t<x, int> = 0>",
+  "_LIBCUDACXX_REQUIRES(x)= ::cuda::std::enable_if_t<x, int> = 0>",
   "_LIBCUDACXX_TEMPLATE(x)=template<x, ",
   "_LIBCUDACXX_TRAILING_REQUIRES(x)=-> x _LIBCUDACXX_EAT_REST",
   "LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE=",

diff --git a/libcudacxx/include/cuda/__cmath/ceil_div.h b/libcudacxx/include/cuda/__cmath/ceil_div.h
@@ -39,12 +39,12 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA
 //! @pre \p __b must be positive
 template <class _Tp,
           class _Up,
-          _CUDA_VSTD::__enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_unsigned, _Tp), int> = 0,
-          _CUDA_VSTD::__enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_integral, _Up), int> = 0>
+          _CUDA_VSTD::enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_unsigned, _Tp), int> = 0,
+          _CUDA_VSTD::enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_integral, _Up), int> = 0>
 _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Tp ceil_div(const _Tp __a, const _Up __b) noexcept
 {
   _CCCL_ASSERT(__b > _Up(0), "cuda::ceil_div: b must be positive");
-  using _UCommon   = _CUDA_VSTD::__make_unsigned_t<_CUDA_VSTD::__common_type_t<_Tp, _Up>>;
+  using _UCommon   = _CUDA_VSTD::make_unsigned_t<_CUDA_VSTD::common_type_t<_Tp, _Up>>;
   const auto __res = static_cast<_UCommon>(__a) / static_cast<_UCommon>(__b);
   return static_cast<_Tp>(__res + (__res * static_cast<_UCommon>(__b) != static_cast<_UCommon>(__a)));
 }
@@ -56,13 +56,13 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Tp ceil_div(con
 //! @pre \p __b must be positive
 template <class _Tp,
           class _Up,
-          _CUDA_VSTD::__enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_signed, _Tp), int>   = 0,
-          _CUDA_VSTD::__enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_integral, _Up), int> = 0>
+          _CUDA_VSTD::enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_signed, _Tp), int>   = 0,
+          _CUDA_VSTD::enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_integral, _Up), int> = 0>
 _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Tp ceil_div(const _Tp __a, const _Up __b) noexcept
 {
   _CCCL_ASSERT(__a >= _Tp(0), "cuda::ceil_div: a must be non negative");
   _CCCL_ASSERT(__b > _Up(0), "cuda::ceil_div: b must be positive");
-  using _UCommon = _CUDA_VSTD::__make_unsigned_t<_CUDA_VSTD::__common_type_t<_Tp, _Up>>;
+  using _UCommon = _CUDA_VSTD::make_unsigned_t<_CUDA_VSTD::common_type_t<_Tp, _Up>>;
   // Due to the precondition `__a >= 0` we can safely cast to unsigned without danger of overflowing
   return static_cast<_Tp>((static_cast<_UCommon>(__a) + static_cast<_UCommon>(__b) - 1) / static_cast<_UCommon>(__b));
 }
@@ -74,11 +74,11 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Tp ceil_div(con
 //! @pre \p __b must be positive
 template <class _Tp,
           class _Up,
-          _CUDA_VSTD::__enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_integral, _Tp), int> = 0,
-          _CUDA_VSTD::__enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_enum, _Up), int>     = 0>
+          _CUDA_VSTD::enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_integral, _Tp), int> = 0,
+          _CUDA_VSTD::enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_enum, _Up), int>     = 0>
 _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Tp ceil_div(const _Tp __a, const _Up __b) noexcept
 {
-  return ::cuda::ceil_div(__a, static_cast<_CUDA_VSTD::__underlying_type_t<_Up>>(__b));
+  return ::cuda::ceil_div(__a, static_cast<_CUDA_VSTD::underlying_type_t<_Up>>(__b));
 }
 
 _LIBCUDACXX_END_NAMESPACE_CUDA

diff --git a/libcudacxx/include/cuda/__functional/maximum.h b/libcudacxx/include/cuda/__functional/maximum.h
@@ -42,7 +42,7 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT maximum<void>
 {
   _CCCL_EXEC_CHECK_DISABLE
   template <class _T1, class _T2>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _CUDA_VSTD::__common_type_t<_T1, _T2>
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _CUDA_VSTD::common_type_t<_T1, _T2>
   operator()(const _T1& __lhs, const _T2& __rhs) const noexcept(noexcept((__lhs < __rhs) ? __rhs : __lhs))
   {
     return (__lhs < __rhs) ? __rhs : __lhs;

diff --git a/libcudacxx/include/cuda/__functional/minimum.h b/libcudacxx/include/cuda/__functional/minimum.h
@@ -42,7 +42,7 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT minimum<void>
 {
   _CCCL_EXEC_CHECK_DISABLE
   template <class _T1, class _T2>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _CUDA_VSTD::__common_type_t<_T1, _T2>
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _CUDA_VSTD::common_type_t<_T1, _T2>
   operator()(const _T1& __lhs, const _T2& __rhs) const noexcept(noexcept((__lhs < __rhs) ? __lhs : __rhs))
   {
     return (__lhs < __rhs) ? __lhs : __rhs;