Skip to content

Commit

Permalink
A few modifications to simplify some of the GPU related code and pote…
Browse files Browse the repository at this point in the history
…ntially fix some bugs...
  • Loading branch information
rcarson3 committed Jan 3, 2024
1 parent a1e9424 commit c049c83
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 126 deletions.
3 changes: 1 addition & 2 deletions src/mechanics_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,7 @@ int main(int argc, char *argv[])
else if (toml_opt.rtmodel == RTModel::GPU) {
#if defined(RAJA_ENABLE_CUDA)
device_config = "raja-cuda";
#endif
#if defined(RAJA_ENABLE_HIP)
#elif defined(RAJA_ENABLE_HIP)
device_config = "raja-hip";
#endif
}
Expand Down
12 changes: 9 additions & 3 deletions src/mechanics_ecmech.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void kernel_postprocessing(const int npts, const int nstatev, const double dt, c
const double* stress_svec_p_array, const double* vol_ratio_array,
const double* eng_int_array, const double* beg_state_vars_array,
double* state_vars_array, double* stress_array,
double* ddsdde_array)
double* ddsdde_array, Assembly assembly)
{
const int ind_int_eng = nstatev - ecmech::ne;
const int ind_pl_work = ecmech::evptn::iHistA_flowStr;
Expand Down Expand Up @@ -151,7 +151,12 @@ void kernel_postprocessing(const int npts, const int nstatev, const double dt, c
stress[2] += stress_mean;
}); // end of npts loop

MFEM_FORALL(i_pts, npts, {
// No need to transpose this if running on the GPU and doing EA
if ((assembly == Assembly::EA) and mfem::Device::Allows(Backend::DEVICE_MASK)) { return; }
else
{
// std::cout << "rotate tan stiffness mat" << std::endl;
MFEM_FORALL(i_pts, npts, {
// ExaCMech saves this in Row major, so we need to get out the transpose.
// The good thing is we can do this all in place no problem.
double* ddsdde = &(ddsdde_array[i_pts * ecmech::nsvec * ecmech::nsvec]);
Expand All @@ -163,6 +168,7 @@ void kernel_postprocessing(const int npts, const int nstatev, const double dt, c
}
}
});
}
} // end of post-processing func

// The different CPU, OpenMP, and GPU kernels aren't needed here, since they're
Expand Down Expand Up @@ -247,6 +253,6 @@ void ExaCMechModel::ModelSetup(const int nqpts, const int nelems, const int /*sp
CALI_MARK_BEGIN("ecmech_postprocessing");
kernel_postprocessing(npts, nstatev, dt, dEff, stress_svec_p_array_data,
vol_ratio_array_data, eng_int_array_data, state_vars_beg, state_vars_array,
stress_array, ddsdde_array);
stress_array, ddsdde_array, assembly);
CALI_MARK_END("ecmech_postprocessing");
} // End of ModelSetup function
44 changes: 22 additions & 22 deletions src/mechanics_ecmech.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ class ExaCMechModel : public ExaModel
mfem::QuadratureFunction *_q_matVars1,
mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
mfem::Vector *_props, int _nProps, int _nStateVars, double _temp_k,
ecmech::ExecutionStrategy _accel, bool _PA) :
ecmech::ExecutionStrategy _accel, Assembly _assembly) :
ExaModel(_q_stress0, _q_stress1, _q_matGrad, _q_matVars0, _q_matVars1,
_beg_coords, _end_coords, _props, _nProps, _nStateVars, _PA),
_beg_coords, _end_coords, _props, _nProps, _nStateVars, _assembly),
temp_k(_temp_k), accel(_accel)
{
// First find the total number of points that we're dealing with so nelems * nqpts
Expand Down Expand Up @@ -128,10 +128,10 @@ class ECMechXtalModel : public ExaCMechModel
mfem::QuadratureFunction *_q_matVars1,
mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
mfem::Vector *_props, int _nProps, int _nStateVars, double _temp_k,
ecmech::ExecutionStrategy _accel, bool _PA) :
ecmech::ExecutionStrategy _accel, Assembly _assembly) :
ExaCMechModel(_q_stress0, _q_stress1, _q_matGrad, _q_matVars0, _q_matVars1,
_beg_coords, _end_coords, _props, _nProps, _nStateVars, _temp_k,
_accel, _PA)
_accel, _assembly)
{
// For FCC material models we have the following state variables
// and their number of components
Expand Down Expand Up @@ -248,32 +248,32 @@ class ECMechXtalModel : public ExaCMechModel
/// MFEM_FORALL requiring it to be public
void init_state_vars(mfem::QuadratureFunction *_q_matVars0, std::vector<double> hist_init)
{
mfem::Vector histInit(ecmechXtal::numHist, mfem::Device::GetMemoryType());
histInit.UseDevice(true); histInit.HostReadWrite();
assert(hist_init.size() == ecmechXtal::numHist);
mfem::Vector histInit(ecmechXtal::numHist, mfem::Device::GetMemoryType());
histInit.UseDevice(true); histInit.HostReadWrite();
assert(hist_init.size() == ecmechXtal::numHist);

for (uint i = 0; i < hist_init.size(); i++) {
histInit(i) = hist_init.at(i);
histInit(i) = hist_init.at(i);
}

const double* histInit_vec = histInit.Read();
const double* histInit_vec = histInit.Read();

double* state_vars = _q_matVars0->ReadWrite();

int qf_size = (_q_matVars0->Size()) / (_q_matVars0->GetVDim());

int vdim = _q_matVars0->GetVDim();

const int ind_dp_eff_ = ind_dp_eff;
const int ind_eql_pl_strain_ = ind_eql_pl_strain;
const int ind_pl_work_ = ind_pl_work;
const int ind_num_evals_ = ind_num_evals;
const int ind_hardness_ = ind_hardness;
const int ind_vols_ = ind_vols;
const int ind_int_eng_ = ind_int_eng;
const int ind_dev_elas_strain_ = ind_dev_elas_strain;
const int ind_gdot_ = ind_gdot;
const int nslip = num_slip;
const int ind_dp_eff_ = ind_dp_eff;
const int ind_eql_pl_strain_ = ind_eql_pl_strain;
const int ind_pl_work_ = ind_pl_work;
const int ind_num_evals_ = ind_num_evals;
const int ind_hardness_ = ind_hardness;
const int ind_vols_ = ind_vols;
const int ind_int_eng_ = ind_int_eng;
const int ind_dev_elas_strain_ = ind_dev_elas_strain;
const int ind_gdot_ = ind_gdot;
const int nslip = num_slip;

mfem::MFEM_FORALL(i, qf_size, {
const int ind = i * vdim;
Expand Down Expand Up @@ -301,14 +301,14 @@ class ECMechXtalModel : public ExaCMechModel
// We're re-using our deformation gradient quadrature function for this
// calculation which is why we use a 9 dim QF rather than a 6 dim QF
virtual void calcDpMat(mfem::QuadratureFunction &DpMat) const override {
auto slip_geom = mat_model->getSlipGeom();
auto slip_geom = mat_model->getSlipGeom();
const int ind_slip = ind_gdot;
const int ind_quats_ = ind_quats;
const int ind_quats_ = ind_quats;
const int npts = DpMat.GetSpace()->GetSize();
auto gdot = mfem::Reshape(matVars1->Read(), matVars1->GetVDim(), npts);
auto d_dpmat = mfem::Reshape(DpMat.Write(), 3, 3, npts);

static constexpr const int nslip = ecmechXtal::nslip;
static constexpr const int nslip = ecmechXtal::nslip;

MFEM_ASSERT(DpMat.GetVDim() == 9, "DpMat needs to have a vdim of 9");

Expand Down
42 changes: 16 additions & 26 deletions src/mechanics_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes,
el_vol = vol_sum.get();
}
}
#if defined(RAJA_ENABLE_OPENMP)
#if defined(RAJA_ENABLE_OPENMP)
if (class_device == RTModel::OPENMP) {
const double* qf_data = qf->HostRead();
const double* wts_data = wts.HostRead();
Expand All @@ -86,41 +86,31 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes,
el_vol = vol_sum.get();
}
}
#endif
#if defined(RAJA_ENABLE_CUDA)
if (class_device == RTModel::GPU) {
const double* qf_data = qf->Read();
const double* wts_data = wts.Read();
for (int j = 0; j < size; j++) {
RAJA::ReduceSum<RAJA::cuda_reduce, double> cuda_sum(0.0);
RAJA::ReduceSum<RAJA::cuda_reduce, double> vol_sum(0.0);
RAJA::forall<RAJA::cuda_exec<1024> >(default_range, [ = ] RAJA_DEVICE(int i_npts){
const double* val = &(qf_data[i_npts * size]);
cuda_sum += wts_data[i_npts] * val[j];
vol_sum += wts_data[i_npts];
});
data[j] = cuda_sum.get();
el_vol = vol_sum.get();
}
}
#endif
#if defined(RAJA_ENABLE_HIP)
#endif
#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP)
if (class_device == RTModel::GPU) {
const double* qf_data = qf->Read();
const double* wts_data = wts.Read();
#if defined(RAJA_ENABLE_CUDA)
using gpu_reduce = RAJA::cuda_reduce;
using gpu_policy = RAJA::cuda_exec<1024>;
#else
using gpu_reduce = RAJA::hip_reduce;
using gpu_policy = RAJA::hip_exec<1024>;
#endif
for (int j = 0; j < size; j++) {
RAJA::ReduceSum<RAJA::hip_reduce, double> hip_sum(0.0);
RAJA::ReduceSum<RAJA::hip_reduce, double> vol_sum(0.0);
RAJA::forall<RAJA::hip_exec<1024> >(default_range, [ = ] RAJA_DEVICE(int i_npts){
RAJA::ReduceSum<RAJA::gpu_reduce, double> gpu_sum(0.0);
RAJA::ReduceSum<RAJA::gpu_reduce, double> vol_sum(0.0);
RAJA::forall<gpu_policy>(default_range, [ = ] RAJA_DEVICE(int i_npts){
const double* val = &(qf_data[i_npts * size]);
hip_sum += wts_data[i_npts] * val[j];
gpu_sum += wts_data[i_npts] * val[j];
vol_sum += wts_data[i_npts];
});
data[j] = hip_sum.get();
data[j] = gpu_sum.get();
el_vol = vol_sum.get();
}
}
#endif
#endif

for (int i = 0; i < size; i++) {
tensor[i] = data[i];
Expand Down
23 changes: 23 additions & 0 deletions src/mechanics_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,29 @@ void computeDefGrad(QuadratureFunction *qf, ParFiniteElementSpace *fes,
return;
}

ExaModel::ExaModel(mfem::QuadratureFunction *q_stress0, mfem::QuadratureFunction *q_stress1,
mfem::QuadratureFunction *q_matGrad, mfem::QuadratureFunction *q_matVars0,
mfem::QuadratureFunction *q_matVars1,
mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
mfem::Vector *props, int nProps, int nStateVars, Assembly _assembly) :
numProps(nProps), numStateVars(nStateVars),
beg_coords(_beg_coords),
end_coords(_end_coords),
stress0(q_stress0),
stress1(q_stress1),
matGrad(q_matGrad),
matVars0(q_matVars0),
matVars1(q_matVars1),
matProps(props),
assembly(_assembly)
{
if (assembly == Assembly::PA) {
int npts = q_matGrad->Size() / q_matGrad->GetVDim();
matGradPA.SetSize(81 * npts, mfem::Device::GetMemoryType());
matGradPA.UseDevice(true);
}
}

// This method sets the end time step stress to the beginning step
// and then returns the internal data pointer of the end time step
// array.
Expand Down
23 changes: 4 additions & 19 deletions src/mechanics_model.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef MECHANICS_MODEL
#define MECHANICS_MODEL

#include "option_types.hpp"

#include "mfem.hpp"

#include <utility>
Expand Down Expand Up @@ -57,7 +59,7 @@ class ExaModel
// the same at all quadrature points. That is, the material properties are
// constant and not dependent on space
mfem::Vector *matProps;
bool PA;
Assembly assembly;
// Temporary fix just to make sure things work
mfem::Vector matGradPA;

Expand All @@ -69,24 +71,7 @@ class ExaModel
mfem::QuadratureFunction *q_matGrad, mfem::QuadratureFunction *q_matVars0,
mfem::QuadratureFunction *q_matVars1,
mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
mfem::Vector *props, int nProps, int nStateVars, bool _PA) :
numProps(nProps), numStateVars(nStateVars),
beg_coords(_beg_coords),
end_coords(_end_coords),
stress0(q_stress0),
stress1(q_stress1),
matGrad(q_matGrad),
matVars0(q_matVars0),
matVars1(q_matVars1),
matProps(props),
PA(_PA)
{
if (_PA) {
int npts = q_matGrad->Size() / q_matGrad->GetVDim();
matGradPA.SetSize(81 * npts, mfem::Device::GetMemoryType());
matGradPA.UseDevice(true);
}
}
mfem::Vector *props, int nProps, int nStateVars, Assembly _assembly);

virtual ~ExaModel() { }

Expand Down
21 changes: 8 additions & 13 deletions src/mechanics_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,13 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,

assembly = options.assembly;

bool partial_assembly = false;
if (assembly == Assembly::PA) {
partial_assembly = true;
}

if (options.mech_type == MechType::UMAT) {
// Our class will initialize our deformation gradients and
// our local shape function gradients which are taken with respect
// to our initial mesh when 1st created.
model = new AbaqusUmatModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&q_kinVars0, &beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, &fes, partial_assembly);
&matProps, options.nProps, nStateVars, &fes, assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand Down Expand Up @@ -94,7 +89,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
model = new VoceFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, options.temp_k, accel,
partial_assembly);
assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand All @@ -111,7 +106,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
model = new VoceNLFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, options.temp_k, accel,
partial_assembly);
assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand All @@ -128,7 +123,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
model = new KinKMBalDDFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, options.temp_k, accel,
partial_assembly);
assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand All @@ -147,7 +142,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
model = new KinKMBalDDHCPModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, options.temp_k, accel,
partial_assembly);
assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand All @@ -167,7 +162,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
model = new VoceBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, options.temp_k, accel,
partial_assembly);
assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand All @@ -184,7 +179,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
model = new VoceNLBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, options.temp_k, accel,
partial_assembly);
assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand All @@ -201,7 +196,7 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes,
model = new KinKMbalDDBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1,
&beg_crds, &end_crds,
&matProps, options.nProps, nStateVars, options.temp_k, accel,
partial_assembly);
assembly);

// Add the user defined integrator
if (options.integ_type == IntegrationType::FULL) {
Expand Down
4 changes: 2 additions & 2 deletions src/mechanics_umat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ class AbaqusUmatModel : public ExaModel
mfem::QuadratureFunction *_q_matVars1, mfem::QuadratureFunction *_q_defGrad0,
mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords,
mfem::Vector *_props, int _nProps,
int _nStateVars, mfem::ParFiniteElementSpace* fes, bool _PA) :
int _nStateVars, mfem::ParFiniteElementSpace* fes, Assembly _assembly) :
ExaModel(_q_stress0,
_q_stress1, _q_matGrad, _q_matVars0,
_q_matVars1,
_beg_coords, _end_coords,
_props, _nProps, _nStateVars, _PA), loc_fes(fes),
_props, _nProps, _nStateVars, _assembly), loc_fes(fes),
defGrad0(_q_defGrad0)
{
init_loc_sf_grads(fes);
Expand Down
Loading

0 comments on commit c049c83

Please sign in to comment.