From fda2d8a3c2549f0e9ffacb3491c5198cf865cda1 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 19 Nov 2024 15:46:57 -0500 Subject: [PATCH] Pass the Execution Space as TT template parameter Defaults to Host execution so existing code is not affected. Properly set by make_tt. We cannot query TT::derivedT for flags because at the time TT is instantiated because derivedT is incomplete at that point. For now pass the Space as template parameter. We need to find a different way if we want to have multiple implementations of a task. Signed-off-by: Joseph Schuchart --- ttg/ttg/madness/fwd.h | 4 ++- ttg/ttg/madness/ttg.h | 5 ++-- ttg/ttg/make_tt.h | 7 +---- ttg/ttg/parsec/fwd.h | 4 ++- ttg/ttg/parsec/task.h | 12 ++++---- ttg/ttg/parsec/ttg.h | 70 +++++++++++++++++-------------------------- 6 files changed, 43 insertions(+), 59 deletions(-) diff --git a/ttg/ttg/madness/fwd.h b/ttg/ttg/madness/fwd.h index 6bee1a832..99a864870 100644 --- a/ttg/ttg/madness/fwd.h +++ b/ttg/ttg/madness/fwd.h @@ -8,7 +8,9 @@ namespace ttg_madness { - template > + template , + ttg::ExecutionSpace Space = ttg::ExecutionSpace::Host> class TT; /// \internal the OG name diff --git a/ttg/ttg/madness/ttg.h b/ttg/ttg/madness/ttg.h index d84f29dcc..7884e8865 100644 --- a/ttg/ttg/madness/ttg.h +++ b/ttg/ttg/madness/ttg.h @@ -184,8 +184,9 @@ namespace ttg_madness { /// values /// flowing into this TT; a const type indicates nonmutating (read-only) use, nonconst type /// indicates mutating use (e.g. the corresponding input can be used as scratch, moved-from, etc.) - template - class TT : public ttg::TTBase, public ::madness::WorldObject> { + template + class TT : public ttg::TTBase, public ::madness::WorldObject> { + static_assert(Space == ttg::ExecutionSpace::Host, "MADNESS backend only supports Host Execution Space"); static_assert(ttg::meta::is_typelist_v, "The fourth template for ttg::TT must be a ttg::typelist containing the input types"); using input_tuple_type = ttg::meta::typelist_to_tuple_t; diff --git a/ttg/ttg/make_tt.h b/ttg/ttg/make_tt.h index d3912576a..c69492159 100644 --- a/ttg/ttg/make_tt.h +++ b/ttg/ttg/make_tt.h @@ -17,7 +17,7 @@ class CallableWrapTT : public TT< keyT, output_terminalsT, CallableWrapTT, - ttg::typelist> { + ttg::typelist, space> { using baseT = typename CallableWrapTT::ttT; using input_values_tuple_type = typename baseT::input_values_tuple_type; @@ -44,11 +44,6 @@ class CallableWrapTT void; #endif // TTG_HAVE_COROUTINE -public: - static constexpr bool have_cuda_op = (space == ttg::ExecutionSpace::CUDA); - static constexpr bool have_hip_op = (space == ttg::ExecutionSpace::HIP); - static constexpr bool have_level_zero_op = (space == ttg::ExecutionSpace::L0); - protected: template diff --git a/ttg/ttg/parsec/fwd.h b/ttg/ttg/parsec/fwd.h index de7996962..8b07eaf4a 100644 --- a/ttg/ttg/parsec/fwd.h +++ b/ttg/ttg/parsec/fwd.h @@ -10,7 +10,9 @@ extern "C" struct parsec_context_s; namespace ttg_parsec { - template > + template , + ttg::ExecutionSpace Space = ttg::ExecutionSpace::Host> class TT; /// \internal the OG name diff --git a/ttg/ttg/parsec/task.h b/ttg/ttg/parsec/task.h index f29ca8ecb..65712dec1 100644 --- a/ttg/ttg/parsec/task.h +++ b/ttg/ttg/parsec/task.h @@ -252,9 +252,9 @@ namespace ttg_parsec { template parsec_hook_return_t invoke_op() { if constexpr (Space == ttg::ExecutionSpace::Host) { - return TT::template static_op(&this->parsec_task); + return TT::static_op(&this->parsec_task); } else { - return TT::template device_static_op(&this->parsec_task); + return TT::device_static_op(&this->parsec_task); } } @@ -263,7 +263,7 @@ namespace ttg_parsec { if constexpr (Space == ttg::ExecutionSpace::Host) { return PARSEC_HOOK_RETURN_DONE; } else { - return TT::template device_static_evaluate(&this->parsec_task); + return TT::device_static_evaluate(&this->parsec_task); } } @@ -310,9 +310,9 @@ namespace ttg_parsec { template parsec_hook_return_t invoke_op() { if constexpr (Space == ttg::ExecutionSpace::Host) { - return TT::template static_op(&this->parsec_task); + return TT::static_op(&this->parsec_task); } else { - return TT::template device_static_op(&this->parsec_task); + return TT::device_static_op(&this->parsec_task); } } @@ -321,7 +321,7 @@ namespace ttg_parsec { if constexpr (Space == ttg::ExecutionSpace::Host) { return PARSEC_HOOK_RETURN_DONE; } else { - return TT::template device_static_evaluate(&this->parsec_task); + return TT::device_static_evaluate(&this->parsec_task); } } diff --git a/ttg/ttg/parsec/ttg.h b/ttg/ttg/parsec/ttg.h index 47ab79068..e9abd4afd 100644 --- a/ttg/ttg/parsec/ttg.h +++ b/ttg/ttg/parsec/ttg.h @@ -514,8 +514,9 @@ namespace ttg_parsec { #endif // TTG_USE_USER_TERMDET } - template > - void register_tt_profiling(const TT *t) { + template , ttg::ExecutionSpace Space> + void register_tt_profiling(const TT *t) { #if defined(PARSEC_PROF_TRACE) std::stringstream ss; build_composite_name_rec(t->ttg_ptr(), ss); @@ -1180,7 +1181,7 @@ namespace ttg_parsec { } // namespace detail - template + template class TT : public ttg::TTBase, detail::ParsecTTBase { private: /// preconditions @@ -1217,29 +1218,17 @@ namespace ttg_parsec { public: /// @return true if derivedT::have_cuda_op exists and is defined to true static constexpr bool derived_has_cuda_op() { - if constexpr (ttg::meta::is_detected_v) { - return derivedT::have_cuda_op; - } else { - return false; - } + return Space == ttg::ExecutionSpace::CUDA; } /// @return true if derivedT::have_hip_op exists and is defined to true static constexpr bool derived_has_hip_op() { - if constexpr (ttg::meta::is_detected_v) { - return derivedT::have_hip_op; - } else { - return false; - } + return Space == ttg::ExecutionSpace::HIP; } /// @return true if derivedT::have_hip_op exists and is defined to true static constexpr bool derived_has_level_zero_op() { - if constexpr (ttg::meta::is_detected_v) { - return derivedT::have_level_zero_op; - } else { - return false; - } + return Space == ttg::ExecutionSpace::L0; } /// @return true if the TT supports device execution @@ -1354,18 +1343,17 @@ namespace ttg_parsec { /// dispatches a call to derivedT::op /// @return void if called a synchronous function, or ttg::coroutine_handle<> if called a coroutine (if non-null, /// points to the suspended coroutine) - template + template auto op(Args &&...args) { derivedT *derived = static_cast(this); - //if constexpr (Space == ttg::ExecutionSpace::Host) { - using return_type = decltype(derived->op(std::forward(args)...)); - if constexpr (std::is_same_v) { - derived->op(std::forward(args)...); - return; - } - else { - return derived->op(std::forward(args)...); - } + using return_type = decltype(derived->op(std::forward(args)...)); + if constexpr (std::is_same_v) { + derived->op(std::forward(args)...); + return; + } + else { + return derived->op(std::forward(args)...); + } } template @@ -1418,7 +1406,6 @@ namespace ttg_parsec { /** * Submit callback called by PaRSEC once all input transfers have completed. */ - template static int device_static_submit(parsec_device_gpu_module_t *gpu_device, parsec_gpu_task_t *gpu_task, parsec_gpu_exec_stream_t *gpu_stream) { @@ -1464,7 +1451,7 @@ namespace ttg_parsec { #endif // defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) && defined(TTG_HAVE_CUDA) /* Here we call back into the coroutine again after the transfers have completed */ - static_op(&task->parsec_task); + static_op(&task->parsec_task); ttg::device::detail::reset_current(); @@ -1494,7 +1481,6 @@ namespace ttg_parsec { return rc; } - template static parsec_hook_return_t device_static_evaluate(parsec_task_t* parsec_task) { task_t *task = (task_t*)parsec_task; @@ -1509,7 +1495,7 @@ namespace ttg_parsec { gpu_task->task_type = 0; // user task gpu_task->last_data_check_epoch = 0; // used internally gpu_task->pushout = 0; - gpu_task->submit = &TT::device_static_submit; + gpu_task->submit = &TT::device_static_submit; // one way to force the task device // currently this will probably break all of PaRSEC if this hint @@ -1527,7 +1513,7 @@ namespace ttg_parsec { task->dev_ptr->task_class = *task->parsec_task.task_class; // first invocation of the coroutine to get the coroutine handle - static_op(parsec_task); + static_op(parsec_task); /* when we come back here, the flows in gpu_task are set (see register_device_memory) */ @@ -1577,7 +1563,6 @@ namespace ttg_parsec { } - template static parsec_hook_return_t device_static_op(parsec_task_t* parsec_task) { static_assert(derived_has_device_op()); @@ -1649,7 +1634,6 @@ namespace ttg_parsec { } #endif // TTG_HAVE_DEVICE - template static parsec_hook_return_t static_op(parsec_task_t *parsec_task) { task_t *task = (task_t*)parsec_task; @@ -1675,14 +1659,14 @@ namespace ttg_parsec { if constexpr (!ttg::meta::is_void_v && !ttg::meta::is_empty_tuple_v) { auto input = make_tuple_of_ref_from_array(task, std::make_index_sequence{}); - TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->template op(task->key, std::move(input), obj->output_terminals)); + TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->op(task->key, std::move(input), obj->output_terminals)); } else if constexpr (!ttg::meta::is_void_v && ttg::meta::is_empty_tuple_v) { - TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->template op(task->key, obj->output_terminals)); + TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->op(task->key, obj->output_terminals)); } else if constexpr (ttg::meta::is_void_v && !ttg::meta::is_empty_tuple_v) { auto input = make_tuple_of_ref_from_array(task, std::make_index_sequence{}); - TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->template op(std::move(input), obj->output_terminals)); + TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->op(std::move(input), obj->output_terminals)); } else if constexpr (ttg::meta::is_void_v && ttg::meta::is_empty_tuple_v) { - TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->template op(obj->output_terminals)); + TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->op(obj->output_terminals)); } else { ttg::abort(); } @@ -1758,7 +1742,6 @@ namespace ttg_parsec { return PARSEC_HOOK_RETURN_DONE; } - template static parsec_hook_return_t static_op_noarg(parsec_task_t *parsec_task) { task_t *task = static_cast(parsec_task); @@ -1774,9 +1757,9 @@ namespace ttg_parsec { assert(detail::parsec_ttg_caller == NULL); detail::parsec_ttg_caller = task; if constexpr (!ttg::meta::is_void_v) { - TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->template op(task->key, obj->output_terminals)); + TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->op(task->key, obj->output_terminals)); } else if constexpr (ttg::meta::is_void_v) { - TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->template op(obj->output_terminals)); + TTG_PROCESS_TT_OP_RETURN(suspended_task_address, task->coroutine_id, baseobj->op(obj->output_terminals)); } else // unreachable ttg:: abort(); detail::parsec_ttg_caller = NULL; @@ -4330,7 +4313,7 @@ namespace ttg_parsec { void make_executable() override { world.impl().register_tt_profiling(this); register_static_op_function(); - ttg::TTBase::make_executable(); + ::ttg::TTBase::make_executable(); } /// keymap accessor @@ -4376,6 +4359,7 @@ namespace ttg_parsec { return ttg::device::Device(dm(key), ttg::ExecutionSpace::L0); } else { throw std::runtime_error("Unknown device type!"); + return ttg::device::Device{}; } }; }