Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix device/host call issues in CUDA (when not using relaxed constexpr workaround) #360

Merged
merged 13 commits into from
Oct 4, 2024
Merged
3 changes: 3 additions & 0 deletions include/experimental/__p0009_bits/layout_left.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,12 @@ class layout_left::mapping {

// Not really public, but currently needed to implement fully constexpr useable submdspan:
template<size_t N, class SizeType, size_t ... E, size_t ... Idx>
MDSPAN_INLINE_FUNCTION
constexpr index_type __get_stride(MDSPAN_IMPL_STANDARD_NAMESPACE::extents<SizeType, E...>,std::integer_sequence<size_t, Idx...>) const {
return _MDSPAN_FOLD_TIMES_RIGHT((Idx<N? __extents.template __extent<Idx>():1),1);
}
template<size_t N>
MDSPAN_INLINE_FUNCTION
constexpr index_type __stride() const noexcept {
return __get_stride<N>(__extents, std::make_index_sequence<extents_type::rank()>());
}
Expand All @@ -255,6 +257,7 @@ class layout_left::mapping {
SliceSpecifiers... slices) const;

template<class... SliceSpecifiers>
MDSPAN_INLINE_FUNCTION
friend constexpr auto submdspan_mapping(
const mapping& src, SliceSpecifiers... slices) {
return src.submdspan_mapping_impl(slices...);
Expand Down
3 changes: 3 additions & 0 deletions include/experimental/__p0009_bits/layout_right.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,12 @@ class layout_right::mapping {

// Not really public, but currently needed to implement fully constexpr useable submdspan:
template<size_t N, class SizeType, size_t ... E, size_t ... Idx>
MDSPAN_INLINE_FUNCTION
constexpr index_type __get_stride(MDSPAN_IMPL_STANDARD_NAMESPACE::extents<SizeType, E...>,std::integer_sequence<size_t, Idx...>) const {
return _MDSPAN_FOLD_TIMES_RIGHT((Idx>N? __extents.template __extent<Idx>():1),1);
}
template<size_t N>
MDSPAN_INLINE_FUNCTION
constexpr index_type __stride() const noexcept {
return __get_stride<N>(__extents, std::make_index_sequence<extents_type::rank()>());
}
Expand All @@ -252,6 +254,7 @@ class layout_right::mapping {
SliceSpecifiers... slices) const;

template<class... SliceSpecifiers>
MDSPAN_INLINE_FUNCTION
friend constexpr auto submdspan_mapping(
const mapping& src, SliceSpecifiers... slices) {
return src.submdspan_mapping_impl(slices...);
Expand Down
26 changes: 16 additions & 10 deletions include/experimental/__p0009_bits/layout_stride.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@ struct layout_stride {
}

template<class IntegralType>
MDSPAN_INLINE_FUNCTION
static constexpr const __strides_storage_t fill_strides(const std::array<IntegralType,extents_type::rank()>& s) {
return __strides_storage_t{static_cast<index_type>(s[Idxs])...};
}
Expand All @@ -206,7 +205,7 @@ struct layout_stride {
class IntegralType,
// The is_convertible condition is added to make sfinae valid
// the extents_type::rank() > 0 is added to avoid use of non-standard zero length c-array
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// the extents_type::rank() > 0 is added to avoid use of non-standard zero length c-array

This part of the comment is no longer needed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are users of mdspan which are not Kokkos users and rely on C++14.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but yeah good observation I just pushed something to remove the comments

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// the extents_type::rank() > 0 is added to avoid use of non-standard zero length c-array

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

(std::is_convertible<IntegralType, typename extents_type::index_type>::value && (extents_type::rank() > 0))
(std::is_convertible<IntegralType, typename extents_type::index_type>::value)
)
MDSPAN_INLINE_FUNCTION
// despite the requirement some compilers still complain about zero length array during parsing
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment seems out-of-date now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

Expand All @@ -218,7 +217,6 @@ struct layout_stride {

#ifdef __cpp_lib_span
template<class IntegralType>
MDSPAN_INLINE_FUNCTION
static constexpr const __strides_storage_t fill_strides(const std::span<IntegralType,extents_type::rank()>& s) {
return __strides_storage_t{static_cast<index_type>(s[Idxs])...};
}
Expand All @@ -242,10 +240,13 @@ struct layout_stride {
// Can't use defaulted parameter in the __deduction_workaround template because of a bug in MSVC warning C4348.
using __impl = __deduction_workaround<std::make_index_sequence<Extents::rank()>>;

MDSPAN_FUNCTION
static constexpr __strides_storage_t strides_storage(detail::with_rank<0>) {
return {};
}

template <std::size_t N>
MDSPAN_FUNCTION
static constexpr __strides_storage_t strides_storage(detail::with_rank<N>) {
__strides_storage_t s{};

Expand Down Expand Up @@ -273,7 +274,7 @@ struct layout_stride {

//--------------------------------------------------------------------------------

MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping() noexcept
MDSPAN_INLINE_FUNCTION constexpr mapping() noexcept
#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS)
: __members{
#else
Expand All @@ -299,7 +300,6 @@ struct layout_stride {
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&)
)
)
MDSPAN_INLINE_FUNCTION
constexpr
mapping(
extents_type const& e,
Expand Down Expand Up @@ -333,8 +333,7 @@ struct layout_stride {
// MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type
// error C2641: cannot deduce template arguments for 'MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride::mapping'
_MDSPAN_TRAIT(std::is_convertible, const std::remove_const_t<IntegralTypes>&, typename Extents::index_type) &&
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&) &&
(Extents::rank() > 0)
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&)
)
)
MDSPAN_INLINE_FUNCTION
Expand All @@ -345,7 +344,7 @@ struct layout_stride {
// despite the requirement some compilers still complain about zero length array during parsing
// making it length 1 now, but since the thing can't be instantiated due to requirement the actual
// instantiation of strides_storage will not fail despite mismatching length
IntegralTypes (&s)[extents_type::rank()>0?extents_type::rank():1]
const IntegralTypes (&s)[extents_type::rank()>0?extents_type::rank():1]
) noexcept
#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS)
: __members{
Expand Down Expand Up @@ -379,7 +378,6 @@ struct layout_stride {
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&)
)
)
MDSPAN_INLINE_FUNCTION
constexpr
mapping(
extents_type const& e,
Expand Down Expand Up @@ -476,7 +474,8 @@ struct layout_stride {
MDSPAN_INLINE_FUNCTION
constexpr index_type required_span_size() const noexcept {
index_type span_size = 1;
for(unsigned r = 0; r < extents_type::rank(); r++) {
// using int here to avoid warning about pointless comparison to 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose you mean the rank 0 case

for(int r = 0; r < static_cast<int>(extents_type::rank()); r++) {
// Return early if any of the extents are zero
if(extents().extent(r)==0) return 0;
span_size += ( static_cast<index_type>(extents().extent(r) - 1 ) * __strides_storage()[r]);
Expand Down Expand Up @@ -509,15 +508,18 @@ struct layout_stride {
MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; }

private:
MDSPAN_INLINE_FUNCTION
constexpr bool exhaustive_for_nonzero_span_size() const
{
return required_span_size() == __get_size(extents(), std::make_index_sequence<extents_type::rank()>());
}

MDSPAN_INLINE_FUNCTION
constexpr bool is_exhaustive_impl(detail::with_rank<0>) const
{
return true;
}
MDSPAN_INLINE_FUNCTION
constexpr bool is_exhaustive_impl(detail::with_rank<1>) const
{
if (required_span_size() != static_cast<index_type>(0)) {
Expand All @@ -526,6 +528,7 @@ struct layout_stride {
return stride(0) == 1;
}
template <std::size_t N>
MDSPAN_INLINE_FUNCTION
constexpr bool is_exhaustive_impl(detail::with_rank<N>) const
{
if (required_span_size() != static_cast<index_type>(0)) {
Expand Down Expand Up @@ -627,6 +630,7 @@ struct layout_stride {
SliceSpecifiers... slices) const;

template<class... SliceSpecifiers>
MDSPAN_INLINE_FUNCTION
friend constexpr auto submdspan_mapping(
const mapping& src, SliceSpecifiers... slices) {
return src.submdspan_mapping_impl(slices...);
Expand All @@ -637,10 +641,12 @@ struct layout_stride {
namespace detail {

template <class Layout, class Extents, class Mapping>
MDSPAN_INLINE_FUNCTION
constexpr void validate_strides(with_rank<0>, Layout, const Extents&, const Mapping&)
{}

template <std::size_t N, class Layout, class Extents, class Mapping>
MDSPAN_INLINE_FUNCTION
constexpr void validate_strides(with_rank<N>, Layout, const Extents& ext, const Mapping& other)
{
static_assert(std::is_same<typename Mapping::layout_type, layout_stride>::value &&
Expand Down
108 changes: 108 additions & 0 deletions include/experimental/__p0009_bits/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include <cstddef>
#include <type_traits>
#include <array>
#include <utility>

namespace MDSPAN_IMPL_STANDARD_NAMESPACE {
namespace detail {
Expand Down Expand Up @@ -64,6 +66,112 @@ constexpr struct
}
} stride;

template<class T>
MDSPAN_INLINE_FUNCTION
constexpr void maybe_unused_variable(const T&) {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function seems unused.

Suggested change
template<class T>
MDSPAN_INLINE_FUNCTION
constexpr void maybe_unused_variable(const T&) {}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed



// same as std::integral_constant but with __host__ __device__ annotations on
// the implicit conversion function and the call operator
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need/want conversion from to the std:: counterpart?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because there are places where we pass in std::integral_constant, and std::integral_constant is the standard approved way to pass certain ways to certain functions. For example submdspan(a, std::integral_constant<int, 1>()) is fine in device code.

template <class T, T v>
struct integral_constant {
using value_type = T;
using type = integral_constant<T, v>;

MDSPAN_INLINE_FUNCTION_DEFAULTED
constexpr integral_constant() = default;

MDSPAN_INLINE_FUNCTION_DEFAULTED
constexpr integral_constant(std::integral_constant<T,v>) {};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DEFAULTED seems inappropriate. Do you need the extra annotations at all here?

Suggested change
MDSPAN_INLINE_FUNCTION_DEFAULTED
constexpr integral_constant(std::integral_constant<T,v>) {};
MDSPAN_INLINE_FUNCTION
constexpr integral_constant(std::integral_constant<T,v>) {};

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't be host device in the 1st place

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes it can. Nothing in this calls device functions. Constructor of std::integral_constant is defaulted, so it works too. The only thing which doesn't work of std::integral_constant is the conversion operator to its scaler type.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then what Daniel said

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(minus the inline)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah clear bug (funnily enough one of the primary bugs we had elsewhere)


static constexpr T value = v;
MDSPAN_INLINE_FUNCTION constexpr operator value_type() const noexcept {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer MDSPAN_FUNCTION here and below

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return value;
}
MDSPAN_INLINE_FUNCTION constexpr value_type operator()() const noexcept {
return value;
}
MDSPAN_INLINE_FUNCTION constexpr operator std::integral_constant<T,v>() const noexcept {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No way that can be annotated host device

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course it works:

https://godbolt.org/z/bb8qdT31Y

We can use any standard library stuff as long as we don't call non-defaulted member functions. And we don't here.

return std::integral_constant<T,v>{};
}
};

// The tuple implementation only comes in play when using capabilities
// such as submdspan which require C++17 anyway
#if MDSPAN_HAS_CXX_17
template<class T, size_t Idx>
struct tuple_member {
using type = T;
static constexpr size_t idx = Idx;
T val;
MDSPAN_FUNCTION constexpr T& get() { return val; }
MDSPAN_FUNCTION constexpr const T& get() const { return val; }
};

template<size_t SearchIdx, size_t Idx, class T>
struct tuple_idx_matcher {
using type = tuple_member<T, Idx>;
template<class Other>
MDSPAN_FUNCTION
constexpr auto operator + (Other v) const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using + seems a little weird. What | or || be slightly better?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we just need something which expands.

if constexpr (Idx == SearchIdx) { return *this; }
else { return v; }
}
};

template<class IdxSeq, class ... Elements>
struct tuple_impl;

template<size_t ... Idx, class ... Elements>
struct tuple_impl<std::index_sequence<Idx...>, Elements...>: public tuple_member<Elements, Idx> ... {

MDSPAN_FUNCTION
constexpr tuple_impl(Elements ... vals):tuple_member<Elements, Idx>{vals}... {}

template<size_t N>
MDSPAN_FUNCTION
constexpr auto& get() {
using base_t = decltype((tuple_idx_matcher<N, Idx, Elements>() + ...) );
return base_t::type::get();
}
template<size_t N>
MDSPAN_FUNCTION
constexpr const auto& get() const {
using base_t = decltype((tuple_idx_matcher<N, Idx, Elements>() + ...) );
return base_t::type::get();
}
};

// A simple tuple-like class for representing slices internally and is compatible with device code
// This doesn't support type access since we don't need it
// This is not meant as an external API
template<class ... Elements>
struct tuple: public tuple_impl<decltype(std::make_index_sequence<sizeof...(Elements)>()), Elements...> {
MDSPAN_FUNCTION
constexpr tuple(Elements ... vals):tuple_impl<decltype(std::make_index_sequence<sizeof...(Elements)>()), Elements ...>(vals ...) {}
};

template<size_t Idx, class ... Args>
MDSPAN_FUNCTION
constexpr auto& get(tuple<Args...>& vals) { return vals.template get<Idx>(); }

template<size_t Idx, class ... Args>
MDSPAN_FUNCTION
constexpr const auto& get(const tuple<Args...>& vals) { return vals.template get<Idx>(); }

template<class ... Elements>
tuple(Elements ...) -> tuple<Elements...>;
#endif

template<class T, size_t ... Idx>
constexpr auto c_array_to_std(std::index_sequence<Idx...>, const T(&values)[sizeof...(Idx)]) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer calling it to_std_array

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also thinking you could should write a for loop instead of a fold expression

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got rid of the function for now, not really needed for what we do.

return std::array<T, sizeof...(Idx)>{values[Idx]...};
}
template<class T, size_t N>
constexpr auto c_array_to_std(const T(&values)[N]) {
return c_array_to_std(std::make_index_sequence<N>(), values);
}

} // namespace detail

constexpr struct mdspan_non_standard_tag {
Expand Down
Loading
Loading