diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp index fe1c639c02c2..58add66b1777 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_decl.hpp @@ -562,7 +562,7 @@ namespace Tpetra { /// around with multiple memory spaces. MultiVector (const Teuchos::RCP& map, const typename dual_view_type::t_dev& d_view); - + /// \brief Expert mode constructor, that takes a Kokkos::DualView /// of the MultiVector's data and the "original" /// Kokkos::DualView of the data, and returns a MultiVector that @@ -841,6 +841,9 @@ namespace Tpetra { /// that one thread might win; it's that the value might get /// messed up.) /// + /// GPU synchronization semantics: Will sync if last access was on device. + /// Will not sync if last access was on host. + /// /// \param gblRow [in] Global row index of the entry to modify. /// This must be a valid global row index on the calling /// process with respect to the MultiVector's Map. @@ -878,6 +881,9 @@ namespace Tpetra { /// that one thread might win; it's that the value might get /// messed up.) /// + /// GPU synchronization semantics: Will sync if last access was on device. + /// Will not sync if last access was on host. + /// /// \param gblRow [in] Global row index of the entry to modify. /// This must be a valid global row index on the calling /// process with respect to the MultiVector's Map. diff --git a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp index 74c6bb72bc08..3413d302a991 100644 --- a/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp +++ b/packages/tpetra/core/test/MultiVector/MultiVector_UnitTests.cpp @@ -16,6 +16,7 @@ #include "Teuchos_DefaultSerialComm.hpp" #include "Teuchos_SerialDenseMatrix.hpp" #include "Teuchos_TypeNameTraits.hpp" +#include "Tpetra_Details_ExecutionSpaces.hpp" #include // FINISH: add test for MultiVector with a node containing zero local entries @@ -853,8 +854,8 @@ namespace { { tmv2x2.multiply(NO_TRANS,CONJ_TRANS,S1,tmv2x3,tmv2x3,S0); sdm2x2.multiply(NO_TRANS,CONJ_TRANS,S1,sdm2x3,sdm2x3,S0); - { - auto tmpView = tmv2x2.get1dView(); sdmView = arrayView(sdm2x2.values(),sdm2x2.numRows()*sdm2x2.numCols()); + { + auto tmpView = tmv2x2.get1dView(); sdmView = arrayView(sdm2x2.values(),sdm2x2.numRows()*sdm2x2.numCols()); TEST_COMPARE_FLOATING_ARRAYS(tmpView,sdmView,testingTol() * errorTolSlack); } } @@ -868,8 +869,8 @@ namespace { tmv2x2.multiply(CONJ_TRANS,NO_TRANS,S1,tmv3x2,tmv3x2,S0); Kokkos::fence (); sdm2x2.multiply(CONJ_TRANS,NO_TRANS,S1,sdm3x2,sdm3x2,S0); - { - auto tmpView = tmv2x2.get1dView(); sdmView = arrayView(sdm2x2.values(),sdm2x2.numRows()*sdm2x2.numCols()); + { + auto tmpView = tmv2x2.get1dView(); sdmView = arrayView(sdm2x2.values(),sdm2x2.numRows()*sdm2x2.numCols()); TEST_COMPARE_FLOATING_ARRAYS(tmpView,sdmView,testingTol() * errorTolSlack); } } @@ -883,8 +884,8 @@ namespace { tmv3x3.multiply(CONJ_TRANS,CONJ_TRANS,S1,tmv2x3,tmv3x2,S0); Kokkos::fence (); sdm3x3.multiply(CONJ_TRANS,CONJ_TRANS,S1,sdm2x3,sdm3x2,S0); - { - auto tmpView = tmv3x3.get1dView(); sdmView = arrayView(sdm3x3.values(),sdm3x3.numRows()*sdm3x3.numCols()); + { + auto tmpView = tmv3x3.get1dView(); sdmView = arrayView(sdm3x3.values(),sdm3x3.numRows()*sdm3x3.numCols()); TEST_COMPARE_FLOATING_ARRAYS(tmpView,sdmView,testingTol() * errorTolSlack); } } @@ -2854,7 +2855,7 @@ namespace { bjview[i] *= as(2); } } - + lclSuccess = success ? 1 : 0; gblSuccess = 0; // output argument reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess)); @@ -4317,10 +4318,10 @@ namespace { const size_t numLclRows = 10; const size_t numVecs = 3; - /// KJ : release local object, this workflow is problematic. - /// a user create a device view and hand it to tpetra. + /// KJ : release local object, this workflow is problematic. + /// a user create a device view and hand it to tpetra. /// tpetra now has unmatched referecne count for host and device view - /// as the local device view is alive. this is the case that we do not want + /// as the local device view is alive. this is the case that we do not want /// to encourage users. typename dual_view_type::t_dev X_lcl ("X_lcl", numLclRows, numVecs); @@ -4346,13 +4347,13 @@ namespace { RCP map = rcp (new map_type (INVALID, numLclRows, indexBase, comm)); - /// KJ : release local object, this workflow is problematic. - /// a user create a device view and hand it to tpetra. + /// KJ : release local object, this workflow is problematic. + /// a user create a device view and hand it to tpetra. /// tpetra now has unmatched referecne count for host and device view - /// as the local device view is alive. this is the case that we do not want + /// as the local device view is alive. this is the case that we do not want /// to encourage users. MV X_gbl (map, X_lcl); - + { lclSuccess = success ? 1 : 0; gblSuccess = 0; // output argument @@ -4438,7 +4439,7 @@ namespace { // We modified on device above, and we're about to modify on host // now, so we need to sync to host first. auto X_host = X_gbl.getLocalViewHost(Tpetra::Access::ReadWrite); - + { lclSuccess = success ? 1 : 0; gblSuccess = 0; // output argument @@ -4451,7 +4452,7 @@ namespace { os << "Proc " << comm->getRank () << ": checkpoint 5" << std::endl; std::cerr << os.str (); } - + Kokkos::deep_copy (X_host, THREE); { lclSuccess = success ? 1 : 0; @@ -5184,7 +5185,7 @@ namespace { const device_view x_d = x.getLocalViewDevice(Tpetra::Access::ReadWrite); - host_view y_h = create_mirror_view(x_d); + host_view y_h = create_mirror_view(x_d); size_t correct_count; // Check to see if we'll be deep_copy-ing between memory spaces @@ -5196,42 +5197,42 @@ namespace { } - // Stop / Start (reset first to clear counts from previous unit test calls) - Tpetra::Details::DeepCopyCounter::reset(); + // Stop / Start (reset first to clear counts from previous unit test calls) + Tpetra::Details::DeepCopyCounter::reset(); Tpetra::Details::DeepCopyCounter::start(); Kokkos::deep_copy(y_h,x_d); - Tpetra::Details::DeepCopyCounter::stop(); - size_t count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); + Tpetra::Details::DeepCopyCounter::stop(); + size_t count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,correct_count); // Reset / get_count (should be zero now) - Tpetra::Details::DeepCopyCounter::reset(); - count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); + Tpetra::Details::DeepCopyCounter::reset(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,0); // Second Stop / Start (should have the original count) Tpetra::Details::DeepCopyCounter::start(); Kokkos::deep_copy(y_h,x_d); - Tpetra::Details::DeepCopyCounter::stop(); - count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); + Tpetra::Details::DeepCopyCounter::stop(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,correct_count); // This guy should not get counted, since the counter is stopped Kokkos::deep_copy(y_h,x_d); - count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,correct_count); // Third Second Stop / Start (should have double the original count) Tpetra::Details::DeepCopyCounter::start(); Kokkos::deep_copy(y_h,x_d); - Tpetra::Details::DeepCopyCounter::stop(); - count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); + Tpetra::Details::DeepCopyCounter::stop(); + count = Tpetra::Details::DeepCopyCounter::get_count_different_space(); TEST_EQUALITY(count,2*correct_count); - + } @@ -5244,45 +5245,45 @@ namespace { // Global fences size_t global_correct_count=1; - // Stop / Start (reset first to clear counts from previous unit test calls) - Tpetra::Details::FenceCounter::reset(); + // Stop / Start (reset first to clear counts from previous unit test calls) + Tpetra::Details::FenceCounter::reset(); Tpetra::Details::FenceCounter::start(); Kokkos::fence(); - Tpetra::Details::FenceCounter::stop(); - size_t global_count = Tpetra::Details::FenceCounter::get_count_global(space); - size_t instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + Tpetra::Details::FenceCounter::stop(); + size_t global_count = Tpetra::Details::FenceCounter::get_count_global(space); + size_t instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,global_correct_count); TEST_EQUALITY(instance_count,0); // Reset / get_count (should be zero now) - Tpetra::Details::FenceCounter::reset(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + Tpetra::Details::FenceCounter::reset(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,0); TEST_EQUALITY(instance_count,0); // Second Stop / Start (should have the original count) Tpetra::Details::FenceCounter::start(); Kokkos::fence(); - Tpetra::Details::FenceCounter::stop(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,global_correct_count); TEST_EQUALITY(instance_count,0); // This guy should not get counted, since the counter is stopped Kokkos::fence(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,global_correct_count); TEST_EQUALITY(instance_count,0); // Third Second Stop / Start (should have double the original count) Tpetra::Details::FenceCounter::start(); Kokkos::fence(); - Tpetra::Details::FenceCounter::stop(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,2*global_correct_count); TEST_EQUALITY(instance_count,0); @@ -5290,54 +5291,49 @@ namespace { // Instance Fences size_t instance_correct_count = 1; - // Stop / Start (reset first to clear counts from previous unit test calls) - Tpetra::Details::FenceCounter::reset(); + // Stop / Start (reset first to clear counts from previous unit test calls) + Tpetra::Details::FenceCounter::reset(); Tpetra::Details::FenceCounter::start(); exec_space.fence(); - Tpetra::Details::FenceCounter::stop(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,0); TEST_EQUALITY(instance_count,instance_correct_count); // Reset / get_count (should be zero now) - Tpetra::Details::FenceCounter::reset(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + Tpetra::Details::FenceCounter::reset(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,0); TEST_EQUALITY(instance_count,0); // Second Stop / Start (should have the original count) Tpetra::Details::FenceCounter::start(); - exec_space.fence(); - Tpetra::Details::FenceCounter::stop(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + exec_space.fence(); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,0); TEST_EQUALITY(instance_count,instance_correct_count); // This guy should not get counted, since the counter is stopped - exec_space.fence(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + exec_space.fence(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,0); TEST_EQUALITY(instance_count,instance_correct_count); // Third Second Stop / Start (should have double the original count) Tpetra::Details::FenceCounter::start(); - exec_space.fence(); - Tpetra::Details::FenceCounter::stop(); - global_count =Tpetra::Details::FenceCounter::get_count_global(space); - instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + exec_space.fence(); + Tpetra::Details::FenceCounter::stop(); + global_count =Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); TEST_EQUALITY(global_count,0); - TEST_EQUALITY(instance_count,2*instance_correct_count); + TEST_EQUALITY(instance_count,2*instance_correct_count); } - - - - - #ifdef KOKKOS_ENABLE_OPENMP TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, OpenMP_ThreadedSum, LO , GO , Scalar , Node ) { // Restrict to OpenMPNode and disable in debug mode (weird things happen w/ GCC 8.3.0 since RCP's @@ -5366,6 +5362,103 @@ namespace { #endif + + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL( MultiVector, syncTest_replaceGlobalValues, LO , GO , Scalar , Node ) { + using namespace Tpetra::Details; + typedef Tpetra::Map map_type; + typedef Tpetra::MultiVector MV; + typedef typename MV::impl_scalar_type IST; + IST IST_one = Teuchos::ScalarTraits::one(); + Scalar S_one = Teuchos::ScalarTraits::one(); + + // Make vector + RCP > comm = Tpetra::getDefaultComm (); + RCP map = rcp (new map_type (100, 0, comm)); + MV x(map, 1); + + // Names and such + auto exec_space = typename Node::execution_space(); + const std::string space = exec_space.name(); + const bool is_gpu = Spaces::is_gpu_exec_space(); + size_t global_count, instance_count; + + // NOTE: Global fences (aka Kokkos::fence()) fence both the host and device execution space, + // so our counts there get doubled. + + /***********************************************************************/ + // Device-then-replace (impl_scalar_type) + out<<"*** Device-then-replace impl_scalar_type ***"<getGlobalElement(0),0,IST_one); + Tpetra::Details::FenceCounter::stop(); + global_count = Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,2*is_gpu); + TEST_EQUALITY(instance_count,0); + } + + /***********************************************************************/ + // Host-then-replace (impl_scalar_type) + out<<"*** Host-then-replace impl_scalar_type ***"<getGlobalElement(0),0,IST_one); + Tpetra::Details::FenceCounter::stop(); + global_count = Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,0); + } + + /***********************************************************************/ + // Device-then-replace (Scalar) + out<<"*** Device-then-replace Scalar ***"<getGlobalElement(0),0,S_one); + Tpetra::Details::FenceCounter::stop(); + global_count = Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,2*is_gpu); + TEST_EQUALITY(instance_count,0); + } + + /***********************************************************************/ + // Host-then-replace (Scalar) + out<<"*** Host-then-replace Scalar ***"<getGlobalElement(0),0,S_one); + Tpetra::Details::FenceCounter::stop(); + global_count = Tpetra::Details::FenceCounter::get_count_global(space); + instance_count = Tpetra::Details::FenceCounter::get_count_instance(space); + TEST_EQUALITY(global_count,0); + TEST_EQUALITY(instance_count,0); + } + + } + + // // INSTANTIATIONS // @@ -5411,7 +5504,9 @@ namespace { TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, Swap, LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, DualViewRefcountCheck, LO, GO, SCALAR, NODE ) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, CopyCounterCheck, LO, GO, SCALAR, NODE ) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, FenceCounterCheck, LO, GO, SCALAR, NODE ) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, FenceCounterCheck, LO, GO, SCALAR, NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT( MultiVector, syncTest_replaceGlobalValues, LO, GO, SCALAR, NODE ) + #ifdef KOKKOS_ENABLE_OPENMP // Add special test for OpenMP