A few modifications to simplify some of the GPU related code and pote…

…ntially fix some bugs...
LLNL · Jan 3, 2024 · c049c83 · c049c83
1 parent a1e9424
commit c049c83
Show file tree

Hide file tree

Showing 11 changed files with 121 additions and 126 deletions.
diff --git a/src/mechanics_driver.cpp b/src/mechanics_driver.cpp
@@ -171,8 +171,7 @@ int main(int argc, char *argv[])
    else if (toml_opt.rtmodel == RTModel::GPU) {
 #if defined(RAJA_ENABLE_CUDA) 
       device_config = "raja-cuda";
-#endif
-#if defined(RAJA_ENABLE_HIP)
+#elif defined(RAJA_ENABLE_HIP)
       device_config = "raja-hip";
 #endif
    }

diff --git a/src/mechanics_ecmech.cpp b/src/mechanics_ecmech.cpp
@@ -107,7 +107,7 @@ void kernel_postprocessing(const int npts, const int nstatev, const double dt, c
                            const double* stress_svec_p_array, const double* vol_ratio_array,
                            const double* eng_int_array, const double* beg_state_vars_array,
                            double* state_vars_array, double* stress_array,
-                           double* ddsdde_array)
+                           double* ddsdde_array, Assembly assembly)
 {
    const int ind_int_eng = nstatev - ecmech::ne;
    const int ind_pl_work = ecmech::evptn::iHistA_flowStr;
@@ -151,7 +151,12 @@ void kernel_postprocessing(const int npts, const int nstatev, const double dt, c
          stress[2] += stress_mean;
       }); // end of npts loop
 
-   MFEM_FORALL(i_pts, npts, {
+   // No need to transpose this if running on the GPU and doing EA
+   if ((assembly == Assembly::EA) and mfem::Device::Allows(Backend::DEVICE_MASK)) { return; }
+   else
+   {
+      // std::cout << "rotate tan stiffness mat" << std::endl;
+      MFEM_FORALL(i_pts, npts, {
          // ExaCMech saves this in Row major, so we need to get out the transpose.
          // The good thing is we can do this all in place no problem.
          double* ddsdde = &(ddsdde_array[i_pts * ecmech::nsvec * ecmech::nsvec]);
@@ -163,6 +168,7 @@ void kernel_postprocessing(const int npts, const int nstatev, const double dt, c
             }
          }
       });
+   }
 } // end of post-processing func
 
 // The different CPU, OpenMP, and GPU kernels aren't needed here, since they're
@@ -247,6 +253,6 @@ void ExaCMechModel::ModelSetup(const int nqpts, const int nelems, const int /*sp
    CALI_MARK_BEGIN("ecmech_postprocessing");
    kernel_postprocessing(npts, nstatev, dt, dEff, stress_svec_p_array_data,
                          vol_ratio_array_data, eng_int_array_data, state_vars_beg, state_vars_array,
-                         stress_array, ddsdde_array);
+                         stress_array, ddsdde_array, assembly);
    CALI_MARK_END("ecmech_postprocessing");
 } // End of ModelSetup function
diff --git a/src/mechanics_ecmech.hpp b/src/mechanics_ecmech.hpp
@@ -43,9 +43,9 @@ class ExaCMechModel : public ExaModel
                     mfem::QuadratureFunction *_q_matVars1,
                     mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
                     mfem::Vector *_props, int _nProps, int _nStateVars, double _temp_k,
-                    ecmech::ExecutionStrategy _accel, bool _PA) :
+                    ecmech::ExecutionStrategy _accel, Assembly _assembly) :
          ExaModel(_q_stress0, _q_stress1, _q_matGrad, _q_matVars0, _q_matVars1,
-                  _beg_coords, _end_coords, _props, _nProps, _nStateVars, _PA),
+                  _beg_coords, _end_coords, _props, _nProps, _nStateVars, _assembly),
          temp_k(_temp_k), accel(_accel)
       {
          // First find the total number of points that we're dealing with so nelems * nqpts
@@ -128,10 +128,10 @@ class ECMechXtalModel : public ExaCMechModel
                       mfem::QuadratureFunction *_q_matVars1,
                       mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
                       mfem::Vector *_props, int _nProps, int _nStateVars, double _temp_k,
-                      ecmech::ExecutionStrategy _accel, bool _PA) :
+                      ecmech::ExecutionStrategy _accel, Assembly _assembly) :
          ExaCMechModel(_q_stress0, _q_stress1, _q_matGrad, _q_matVars0, _q_matVars1,
                        _beg_coords, _end_coords, _props, _nProps, _nStateVars, _temp_k,
-                       _accel, _PA)
+                       _accel, _assembly)
       {
          // For FCC material models we have the following state variables
          // and their number of components
@@ -248,32 +248,32 @@ class ECMechXtalModel : public ExaCMechModel
       /// MFEM_FORALL requiring it to be public
       void init_state_vars(mfem::QuadratureFunction *_q_matVars0, std::vector<double> hist_init)
       {
-	 mfem::Vector histInit(ecmechXtal::numHist, mfem::Device::GetMemoryType());
-	 histInit.UseDevice(true); histInit.HostReadWrite();
-	 assert(hist_init.size() == ecmechXtal::numHist);
+         mfem::Vector histInit(ecmechXtal::numHist, mfem::Device::GetMemoryType());
+         histInit.UseDevice(true); histInit.HostReadWrite();
+         assert(hist_init.size() == ecmechXtal::numHist);
 
          for (uint i = 0; i < hist_init.size(); i++) {
-	    histInit(i) = hist_init.at(i);
+            histInit(i) = hist_init.at(i);
          }
 
-	 const double* histInit_vec = histInit.Read(); 
+         const double* histInit_vec = histInit.Read(); 
 
          double* state_vars = _q_matVars0->ReadWrite();
 
          int qf_size = (_q_matVars0->Size()) / (_q_matVars0->GetVDim());
 
          int vdim = _q_matVars0->GetVDim();
 
-	 const int ind_dp_eff_ = ind_dp_eff;
-	 const int ind_eql_pl_strain_ = ind_eql_pl_strain;
-	 const int ind_pl_work_ = ind_pl_work;
-	 const int ind_num_evals_ = ind_num_evals;
-	 const int ind_hardness_ = ind_hardness;
-	 const int ind_vols_ = ind_vols;
-	 const int ind_int_eng_ = ind_int_eng;
-	 const int ind_dev_elas_strain_ = ind_dev_elas_strain;
-	 const int ind_gdot_ = ind_gdot;
-	 const int nslip = num_slip;
+         const int ind_dp_eff_ = ind_dp_eff;
+         const int ind_eql_pl_strain_ = ind_eql_pl_strain;
+         const int ind_pl_work_ = ind_pl_work;
+         const int ind_num_evals_ = ind_num_evals;
+         const int ind_hardness_ = ind_hardness;
+         const int ind_vols_ = ind_vols;
+         const int ind_int_eng_ = ind_int_eng;
+         const int ind_dev_elas_strain_ = ind_dev_elas_strain;
+         const int ind_gdot_ = ind_gdot;
+         const int nslip = num_slip;
 
          mfem::MFEM_FORALL(i, qf_size, {
             const int ind = i * vdim;
@@ -301,14 +301,14 @@ class ECMechXtalModel : public ExaCMechModel
       // We're re-using our deformation gradient quadrature function for this
       // calculation which is why we use a 9 dim QF rather than a 6 dim QF
       virtual void calcDpMat(mfem::QuadratureFunction &DpMat) const override {
-	 auto slip_geom = mat_model->getSlipGeom();
+         auto slip_geom = mat_model->getSlipGeom();
          const int ind_slip = ind_gdot;
-	 const int ind_quats_ = ind_quats;
+         const int ind_quats_ = ind_quats;
          const int npts = DpMat.GetSpace()->GetSize();
          auto gdot = mfem::Reshape(matVars1->Read(), matVars1->GetVDim(), npts);
          auto d_dpmat = mfem::Reshape(DpMat.Write(), 3, 3, npts);
 
-	 static constexpr const int nslip = ecmechXtal::nslip;
+         static constexpr const int nslip = ecmechXtal::nslip;
 
          MFEM_ASSERT(DpMat.GetVDim() == 9, "DpMat needs to have a vdim of 9");
 

diff --git a/src/mechanics_kernels.hpp b/src/mechanics_kernels.hpp
@@ -70,7 +70,7 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes,
             el_vol = vol_sum.get();
         }
     }
-    #if defined(RAJA_ENABLE_OPENMP)
+#if defined(RAJA_ENABLE_OPENMP)
     if (class_device == RTModel::OPENMP) {
         const double* qf_data = qf->HostRead();
         const double* wts_data = wts.HostRead();
@@ -86,41 +86,31 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes,
             el_vol = vol_sum.get();
         }
     }
-    #endif
-    #if defined(RAJA_ENABLE_CUDA)
-    if (class_device == RTModel::GPU) {
-        const double* qf_data = qf->Read();
-        const double* wts_data = wts.Read();
-        for (int j = 0; j < size; j++) {
-            RAJA::ReduceSum<RAJA::cuda_reduce, double> cuda_sum(0.0);
-            RAJA::ReduceSum<RAJA::cuda_reduce, double> vol_sum(0.0);
-            RAJA::forall<RAJA::cuda_exec<1024> >(default_range, [ = ] RAJA_DEVICE(int i_npts){
-                const double* val = &(qf_data[i_npts * size]);
-                cuda_sum += wts_data[i_npts] * val[j];
-                vol_sum += wts_data[i_npts];
-            });
-            data[j] = cuda_sum.get();
-            el_vol = vol_sum.get();
-        }
-    }
-    #endif
-    #if defined(RAJA_ENABLE_HIP)
+#endif
+#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP)
     if (class_device == RTModel::GPU) {
         const double* qf_data = qf->Read();
         const double* wts_data = wts.Read();
+#if defined(RAJA_ENABLE_CUDA)
+        using gpu_reduce = RAJA::cuda_reduce;
+        using gpu_policy = RAJA::cuda_exec<1024>;
+#else
+        using gpu_reduce = RAJA::hip_reduce;
+        using gpu_policy = RAJA::hip_exec<1024>;
+#endif
         for (int j = 0; j < size; j++) {
-            RAJA::ReduceSum<RAJA::hip_reduce, double> hip_sum(0.0);
-            RAJA::ReduceSum<RAJA::hip_reduce, double> vol_sum(0.0);
-            RAJA::forall<RAJA::hip_exec<1024> >(default_range, [ = ] RAJA_DEVICE(int i_npts){
+            RAJA::ReduceSum<RAJA::gpu_reduce, double> gpu_sum(0.0);
+            RAJA::ReduceSum<RAJA::gpu_reduce, double> vol_sum(0.0);
+            RAJA::forall<gpu_policy>(default_range, [ = ] RAJA_DEVICE(int i_npts){
                 const double* val = &(qf_data[i_npts * size]);
-                hip_sum += wts_data[i_npts] * val[j];
+                gpu_sum += wts_data[i_npts] * val[j];
                 vol_sum += wts_data[i_npts];
             });
-            data[j] = hip_sum.get();
+            data[j] = gpu_sum.get();
             el_vol = vol_sum.get();
         }
     }
-    #endif
+#endif
 
     for (int i = 0; i < size; i++) {
         tensor[i] = data[i];

diff --git a/src/mechanics_model.cpp b/src/mechanics_model.cpp
@@ -129,6 +129,29 @@ void computeDefGrad(QuadratureFunction *qf, ParFiniteElementSpace *fes,
    return;
 }
 
+ExaModel::ExaModel(mfem::QuadratureFunction *q_stress0, mfem::QuadratureFunction *q_stress1,
+                   mfem::QuadratureFunction *q_matGrad, mfem::QuadratureFunction *q_matVars0,
+                   mfem::QuadratureFunction *q_matVars1,
+                   mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
+                   mfem::Vector *props, int nProps, int nStateVars, Assembly _assembly) :
+         numProps(nProps), numStateVars(nStateVars),
+         beg_coords(_beg_coords),
+         end_coords(_end_coords),
+         stress0(q_stress0),
+         stress1(q_stress1),
+         matGrad(q_matGrad),
+         matVars0(q_matVars0),
+         matVars1(q_matVars1),
+         matProps(props),
+         assembly(_assembly)
+      {
+         if (assembly == Assembly::PA) {
+            int npts = q_matGrad->Size() / q_matGrad->GetVDim();
+            matGradPA.SetSize(81 * npts, mfem::Device::GetMemoryType());
+            matGradPA.UseDevice(true);
+         }
+      }
+
 // This method sets the end time step stress to the beginning step
 // and then returns the internal data pointer of the end time step
 // array.

diff --git a/src/mechanics_model.hpp b/src/mechanics_model.hpp
@@ -1,6 +1,8 @@
 #ifndef MECHANICS_MODEL
 #define MECHANICS_MODEL
 
+#include "option_types.hpp"
+
 #include "mfem.hpp"
 
 #include <utility>
@@ -57,7 +59,7 @@ class ExaModel
       // the same at all quadrature points. That is, the material properties are
       // constant and not dependent on space
       mfem::Vector *matProps;
-      bool PA;
+      Assembly assembly;
       // Temporary fix just to make sure things work
       mfem::Vector matGradPA;
 
@@ -69,24 +71,7 @@ class ExaModel
                mfem::QuadratureFunction *q_matGrad, mfem::QuadratureFunction *q_matVars0,
                mfem::QuadratureFunction *q_matVars1,
                mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
-               mfem::Vector *props, int nProps, int nStateVars, bool _PA) :
-         numProps(nProps), numStateVars(nStateVars),
-         beg_coords(_beg_coords),
-         end_coords(_end_coords),
-         stress0(q_stress0),
-         stress1(q_stress1),
-         matGrad(q_matGrad),
-         matVars0(q_matVars0),
-         matVars1(q_matVars1),
-         matProps(props),
-         PA(_PA)
-      {
-         if (_PA) {
-            int npts = q_matGrad->Size() / q_matGrad->GetVDim();
-            matGradPA.SetSize(81 * npts, mfem::Device::GetMemoryType());
-            matGradPA.UseDevice(true);
-         }
-      }
+               mfem::Vector *props, int nProps, int nStateVars, Assembly _assembly);
 
       virtual ~ExaModel() { }
 

diff --git a/src/mechanics_operator.cpp b/src/mechanics_operator.cpp
@@ -46,18 +46,13 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
 
    assembly = options.assembly;
 
-   bool partial_assembly = false;
-   if (assembly == Assembly::PA) {
-      partial_assembly = true;
-   }
-
    if (options.mech_type == MechType::UMAT) {
       // Our class will initialize our deformation gradients and
       // our local shape function gradients which are taken with respect
       // to our initial mesh when 1st created.
       model = new AbaqusUmatModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                   &q_kinVars0, &beg_crds, &end_crds,
-                                  &matProps, options.nProps, nStateVars, &fes, partial_assembly);
+                                  &matProps, options.nProps, nStateVars, &fes, assembly);
 
       // Add the user defined integrator
       if (options.integ_type == IntegrationType::FULL) {
@@ -94,7 +89,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
             model = new VoceFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                      &beg_crds, &end_crds,
                                      &matProps, options.nProps, nStateVars, options.temp_k, accel,
-                                     partial_assembly);
+                                     assembly);
 
             // Add the user defined integrator
             if (options.integ_type == IntegrationType::FULL) {
@@ -111,7 +106,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
             model = new VoceNLFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                        &beg_crds, &end_crds,
                                        &matProps, options.nProps, nStateVars, options.temp_k, accel,
-                                       partial_assembly);
+                                       assembly);
 
             // Add the user defined integrator
             if (options.integ_type == IntegrationType::FULL) {
@@ -128,7 +123,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
             model = new KinKMBalDDFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                            &beg_crds, &end_crds,
                                            &matProps, options.nProps, nStateVars, options.temp_k, accel,
-                                           partial_assembly);
+                                           assembly);
 
             // Add the user defined integrator
             if (options.integ_type == IntegrationType::FULL) {
@@ -147,7 +142,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
             model = new KinKMBalDDHCPModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                            &beg_crds, &end_crds,
                                            &matProps, options.nProps, nStateVars, options.temp_k, accel,
-                                           partial_assembly);
+                                           assembly);
 
             // Add the user defined integrator
             if (options.integ_type == IntegrationType::FULL) {
@@ -167,7 +162,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
             model = new VoceBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                      &beg_crds, &end_crds,
                                      &matProps, options.nProps, nStateVars, options.temp_k, accel,
-                                     partial_assembly);
+                                     assembly);
 
             // Add the user defined integrator
             if (options.integ_type == IntegrationType::FULL) {
@@ -184,7 +179,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
             model = new VoceNLBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                        &beg_crds, &end_crds,
                                        &matProps, options.nProps, nStateVars, options.temp_k, accel,
-                                       partial_assembly);
+                                       assembly);
 
             // Add the user defined integrator
             if (options.integ_type == IntegrationType::FULL) {
@@ -201,7 +196,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
             model = new KinKMbalDDBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
                                            &beg_crds, &end_crds,
                                            &matProps, options.nProps, nStateVars, options.temp_k, accel,
-                                           partial_assembly);
+                                           assembly);
 
             // Add the user defined integrator
             if (options.integ_type == IntegrationType::FULL) {

diff --git a/src/mechanics_umat.hpp b/src/mechanics_umat.hpp
@@ -62,12 +62,12 @@ class AbaqusUmatModel : public ExaModel
                       mfem::QuadratureFunction *_q_matVars1, mfem::QuadratureFunction *_q_defGrad0,
                       mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
                       mfem::Vector *_props, int _nProps,
-                      int _nStateVars, mfem::ParFiniteElementSpace* fes, bool _PA) :
+                      int _nStateVars, mfem::ParFiniteElementSpace* fes, Assembly _assembly) :
          ExaModel(_q_stress0,
                   _q_stress1, _q_matGrad, _q_matVars0,
                   _q_matVars1,
                   _beg_coords, _end_coords,
-                  _props, _nProps, _nStateVars, _PA), loc_fes(fes),
+                  _props, _nProps, _nStateVars, _assembly), loc_fes(fes),
          defGrad0(_q_defGrad0)
       {
          init_loc_sf_grads(fes);