-
Notifications
You must be signed in to change notification settings - Fork 69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix device/host call issues in CUDA (when not using relaxed constexpr workaround) #360
Changes from 10 commits
a0c6886
56d8424
00969d8
aaa2b87
37283cd
97c1813
0fcdad7
9f9a555
43d0595
b0c2452
e818f4d
f86bfe8
f5017f3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -197,7 +197,6 @@ struct layout_stride { | |||
} | ||||
|
||||
template<class IntegralType> | ||||
MDSPAN_INLINE_FUNCTION | ||||
static constexpr const __strides_storage_t fill_strides(const std::array<IntegralType,extents_type::rank()>& s) { | ||||
return __strides_storage_t{static_cast<index_type>(s[Idxs])...}; | ||||
} | ||||
|
@@ -206,7 +205,7 @@ struct layout_stride { | |||
class IntegralType, | ||||
// The is_convertible condition is added to make sfinae valid | ||||
// the extents_type::rank() > 0 is added to avoid use of non-standard zero length c-array | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||||
(std::is_convertible<IntegralType, typename extents_type::index_type>::value && (extents_type::rank() > 0)) | ||||
(std::is_convertible<IntegralType, typename extents_type::index_type>::value) | ||||
) | ||||
MDSPAN_INLINE_FUNCTION | ||||
// despite the requirement some compilers still complain about zero length array during parsing | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment seems out-of-date now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||||
|
@@ -218,7 +217,6 @@ struct layout_stride { | |||
|
||||
#ifdef __cpp_lib_span | ||||
template<class IntegralType> | ||||
MDSPAN_INLINE_FUNCTION | ||||
static constexpr const __strides_storage_t fill_strides(const std::span<IntegralType,extents_type::rank()>& s) { | ||||
return __strides_storage_t{static_cast<index_type>(s[Idxs])...}; | ||||
} | ||||
|
@@ -242,10 +240,13 @@ struct layout_stride { | |||
// Can't use defaulted parameter in the __deduction_workaround template because of a bug in MSVC warning C4348. | ||||
using __impl = __deduction_workaround<std::make_index_sequence<Extents::rank()>>; | ||||
|
||||
MDSPAN_FUNCTION | ||||
static constexpr __strides_storage_t strides_storage(detail::with_rank<0>) { | ||||
return {}; | ||||
} | ||||
|
||||
template <std::size_t N> | ||||
MDSPAN_FUNCTION | ||||
static constexpr __strides_storage_t strides_storage(detail::with_rank<N>) { | ||||
__strides_storage_t s{}; | ||||
|
||||
|
@@ -273,7 +274,7 @@ struct layout_stride { | |||
|
||||
//-------------------------------------------------------------------------------- | ||||
|
||||
MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping() noexcept | ||||
MDSPAN_INLINE_FUNCTION constexpr mapping() noexcept | ||||
#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) | ||||
: __members{ | ||||
#else | ||||
|
@@ -299,7 +300,6 @@ struct layout_stride { | |||
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&) | ||||
) | ||||
) | ||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr | ||||
mapping( | ||||
extents_type const& e, | ||||
|
@@ -333,8 +333,7 @@ struct layout_stride { | |||
// MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type | ||||
// error C2641: cannot deduce template arguments for 'MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride::mapping' | ||||
_MDSPAN_TRAIT(std::is_convertible, const std::remove_const_t<IntegralTypes>&, typename Extents::index_type) && | ||||
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&) && | ||||
(Extents::rank() > 0) | ||||
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&) | ||||
) | ||||
) | ||||
MDSPAN_INLINE_FUNCTION | ||||
|
@@ -345,7 +344,7 @@ struct layout_stride { | |||
// despite the requirement some compilers still complain about zero length array during parsing | ||||
// making it length 1 now, but since the thing can't be instantiated due to requirement the actual | ||||
// instantiation of strides_storage will not fail despite mismatching length | ||||
IntegralTypes (&s)[extents_type::rank()>0?extents_type::rank():1] | ||||
const IntegralTypes (&s)[extents_type::rank()>0?extents_type::rank():1] | ||||
) noexcept | ||||
#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) | ||||
: __members{ | ||||
|
@@ -379,7 +378,6 @@ struct layout_stride { | |||
_MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t<IntegralTypes>&) | ||||
) | ||||
) | ||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr | ||||
mapping( | ||||
extents_type const& e, | ||||
|
@@ -476,7 +474,8 @@ struct layout_stride { | |||
MDSPAN_INLINE_FUNCTION | ||||
constexpr index_type required_span_size() const noexcept { | ||||
index_type span_size = 1; | ||||
for(unsigned r = 0; r < extents_type::rank(); r++) { | ||||
// using int here to avoid warning about pointless comparison to 0 | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose you mean the rank 0 case |
||||
for(int r = 0; r < static_cast<int>(extents_type::rank()); r++) { | ||||
// Return early if any of the extents are zero | ||||
if(extents().extent(r)==0) return 0; | ||||
span_size += ( static_cast<index_type>(extents().extent(r) - 1 ) * __strides_storage()[r]); | ||||
|
@@ -509,15 +508,18 @@ struct layout_stride { | |||
MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; } | ||||
|
||||
private: | ||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr bool exhaustive_for_nonzero_span_size() const | ||||
{ | ||||
return required_span_size() == __get_size(extents(), std::make_index_sequence<extents_type::rank()>()); | ||||
} | ||||
|
||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr bool is_exhaustive_impl(detail::with_rank<0>) const | ||||
{ | ||||
return true; | ||||
} | ||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr bool is_exhaustive_impl(detail::with_rank<1>) const | ||||
{ | ||||
if (required_span_size() != static_cast<index_type>(0)) { | ||||
|
@@ -526,6 +528,7 @@ struct layout_stride { | |||
return stride(0) == 1; | ||||
} | ||||
template <std::size_t N> | ||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr bool is_exhaustive_impl(detail::with_rank<N>) const | ||||
{ | ||||
if (required_span_size() != static_cast<index_type>(0)) { | ||||
|
@@ -627,6 +630,7 @@ struct layout_stride { | |||
SliceSpecifiers... slices) const; | ||||
|
||||
template<class... SliceSpecifiers> | ||||
MDSPAN_INLINE_FUNCTION | ||||
friend constexpr auto submdspan_mapping( | ||||
const mapping& src, SliceSpecifiers... slices) { | ||||
return src.submdspan_mapping_impl(slices...); | ||||
|
@@ -637,10 +641,12 @@ struct layout_stride { | |||
namespace detail { | ||||
|
||||
template <class Layout, class Extents, class Mapping> | ||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr void validate_strides(with_rank<0>, Layout, const Extents&, const Mapping&) | ||||
{} | ||||
|
||||
template <std::size_t N, class Layout, class Extents, class Mapping> | ||||
MDSPAN_INLINE_FUNCTION | ||||
constexpr void validate_strides(with_rank<N>, Layout, const Extents& ext, const Mapping& other) | ||||
{ | ||||
static_assert(std::is_same<typename Mapping::layout_type, layout_stride>::value && | ||||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -2,6 +2,8 @@ | |||||||||
|
||||||||||
#include <cstddef> | ||||||||||
#include <type_traits> | ||||||||||
#include <array> | ||||||||||
#include <utility> | ||||||||||
|
||||||||||
namespace MDSPAN_IMPL_STANDARD_NAMESPACE { | ||||||||||
namespace detail { | ||||||||||
|
@@ -64,6 +66,112 @@ constexpr struct | |||||||||
} | ||||||||||
} stride; | ||||||||||
|
||||||||||
template<class T> | ||||||||||
MDSPAN_INLINE_FUNCTION | ||||||||||
constexpr void maybe_unused_variable(const T&) {} | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function seems unused.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed |
||||||||||
|
||||||||||
|
||||||||||
// same as std::integral_constant but with __host__ __device__ annotations on | ||||||||||
// the implicit conversion function and the call operator | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need/want conversion from to the std:: counterpart? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because there are places where we pass in std::integral_constant, and std::integral_constant is the standard approved way to pass certain ways to certain functions. For example |
||||||||||
template <class T, T v> | ||||||||||
struct integral_constant { | ||||||||||
using value_type = T; | ||||||||||
using type = integral_constant<T, v>; | ||||||||||
|
||||||||||
MDSPAN_INLINE_FUNCTION_DEFAULTED | ||||||||||
constexpr integral_constant() = default; | ||||||||||
|
||||||||||
MDSPAN_INLINE_FUNCTION_DEFAULTED | ||||||||||
constexpr integral_constant(std::integral_constant<T,v>) {}; | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't be host device in the 1st place There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes it can. Nothing in this calls device functions. Constructor of std::integral_constant is defaulted, so it works too. The only thing which doesn't work of std::integral_constant is the conversion operator to its scaler type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then what Daniel said There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (minus the inline) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah clear bug (funnily enough one of the primary bugs we had elsewhere) |
||||||||||
|
||||||||||
static constexpr T value = v; | ||||||||||
MDSPAN_INLINE_FUNCTION constexpr operator value_type() const noexcept { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||||||
return value; | ||||||||||
} | ||||||||||
MDSPAN_INLINE_FUNCTION constexpr value_type operator()() const noexcept { | ||||||||||
return value; | ||||||||||
} | ||||||||||
MDSPAN_INLINE_FUNCTION constexpr operator std::integral_constant<T,v>() const noexcept { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No way that can be annotated host device There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course it works: https://godbolt.org/z/bb8qdT31Y We can use any standard library stuff as long as we don't call non-defaulted member functions. And we don't here. |
||||||||||
return std::integral_constant<T,v>{}; | ||||||||||
} | ||||||||||
}; | ||||||||||
|
||||||||||
// The tuple implementation only comes in play when using capabilities | ||||||||||
// such as submdspan which require C++17 anyway | ||||||||||
#if MDSPAN_HAS_CXX_17 | ||||||||||
template<class T, size_t Idx> | ||||||||||
struct tuple_member { | ||||||||||
using type = T; | ||||||||||
static constexpr size_t idx = Idx; | ||||||||||
T val; | ||||||||||
MDSPAN_FUNCTION constexpr T& get() { return val; } | ||||||||||
MDSPAN_FUNCTION constexpr const T& get() const { return val; } | ||||||||||
}; | ||||||||||
|
||||||||||
template<size_t SearchIdx, size_t Idx, class T> | ||||||||||
struct tuple_idx_matcher { | ||||||||||
using type = tuple_member<T, Idx>; | ||||||||||
template<class Other> | ||||||||||
MDSPAN_FUNCTION | ||||||||||
constexpr auto operator + (Other v) const { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we just need something which expands. |
||||||||||
if constexpr (Idx == SearchIdx) { return *this; } | ||||||||||
else { return v; } | ||||||||||
} | ||||||||||
}; | ||||||||||
|
||||||||||
template<class IdxSeq, class ... Elements> | ||||||||||
struct tuple_impl; | ||||||||||
|
||||||||||
template<size_t ... Idx, class ... Elements> | ||||||||||
struct tuple_impl<std::index_sequence<Idx...>, Elements...>: public tuple_member<Elements, Idx> ... { | ||||||||||
|
||||||||||
MDSPAN_FUNCTION | ||||||||||
constexpr tuple_impl(Elements ... vals):tuple_member<Elements, Idx>{vals}... {} | ||||||||||
|
||||||||||
template<size_t N> | ||||||||||
MDSPAN_FUNCTION | ||||||||||
constexpr auto& get() { | ||||||||||
using base_t = decltype((tuple_idx_matcher<N, Idx, Elements>() + ...) ); | ||||||||||
return base_t::type::get(); | ||||||||||
} | ||||||||||
template<size_t N> | ||||||||||
MDSPAN_FUNCTION | ||||||||||
constexpr const auto& get() const { | ||||||||||
using base_t = decltype((tuple_idx_matcher<N, Idx, Elements>() + ...) ); | ||||||||||
return base_t::type::get(); | ||||||||||
} | ||||||||||
}; | ||||||||||
|
||||||||||
// A simple tuple-like class for representing slices internally and is compatible with device code | ||||||||||
// This doesn't support type access since we don't need it | ||||||||||
// This is not meant as an external API | ||||||||||
template<class ... Elements> | ||||||||||
struct tuple: public tuple_impl<decltype(std::make_index_sequence<sizeof...(Elements)>()), Elements...> { | ||||||||||
MDSPAN_FUNCTION | ||||||||||
constexpr tuple(Elements ... vals):tuple_impl<decltype(std::make_index_sequence<sizeof...(Elements)>()), Elements ...>(vals ...) {} | ||||||||||
}; | ||||||||||
|
||||||||||
template<size_t Idx, class ... Args> | ||||||||||
MDSPAN_FUNCTION | ||||||||||
constexpr auto& get(tuple<Args...>& vals) { return vals.template get<Idx>(); } | ||||||||||
|
||||||||||
template<size_t Idx, class ... Args> | ||||||||||
MDSPAN_FUNCTION | ||||||||||
constexpr const auto& get(const tuple<Args...>& vals) { return vals.template get<Idx>(); } | ||||||||||
|
||||||||||
template<class ... Elements> | ||||||||||
tuple(Elements ...) -> tuple<Elements...>; | ||||||||||
#endif | ||||||||||
|
||||||||||
template<class T, size_t ... Idx> | ||||||||||
constexpr auto c_array_to_std(std::index_sequence<Idx...>, const T(&values)[sizeof...(Idx)]) { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer calling it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also thinking you could should write a for loop instead of a fold expression There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got rid of the function for now, not really needed for what we do. |
||||||||||
return std::array<T, sizeof...(Idx)>{values[Idx]...}; | ||||||||||
} | ||||||||||
template<class T, size_t N> | ||||||||||
constexpr auto c_array_to_std(const T(&values)[N]) { | ||||||||||
return c_array_to_std(std::make_index_sequence<N>(), values); | ||||||||||
} | ||||||||||
|
||||||||||
} // namespace detail | ||||||||||
|
||||||||||
constexpr struct mdspan_non_standard_tag { | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part of the comment is no longer needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are users of mdspan which are not Kokkos users and rely on C++14.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but yeah good observation I just pushed something to remove the comments