Skip to content

Commit

Permalink
Merge pull request #190 from issp-center-dev/fix_test40
Browse files Browse the repository at this point in the history
Bug Fix: Out-of-memory in MPI+Canonnical+Sz/N-changing exitation
  • Loading branch information
k-yoshimi authored Dec 3, 2024
2 parents 51f3b87 + 522fa44 commit 8639a32
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 31 deletions.
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ The distribution of the program package and the source codes for HPhi follow GNU

[“Quantum lattice model solver HΦ”, M. Kawamura, K. Yoshimi, T. Misawa, Y. Yamaji, S. Todo, and N. Kawashima, Computer Physics Communications 217, 180 (2017).](https://doi.org/10.1016/j.cpc.2017.04.006)

[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, arXiv:2307.13222.](https://arxiv.org/abs/2307.13222)
[“Update of HΦ: Newly added functions and methods in versions 2 and 3”, K. Ido, M. Kawamura, Y. Motoyama, K. Yoshimi, Y. Yamaji, S. Todo, N. Kawashima, and T. Misawa, Comput. Phys. Commun. 298, 109093 (2024).](https://doi.org/10.1016/j.cpc.2024.109093)

Bibtex:

Expand All @@ -47,13 +47,16 @@ url = {https://www.sciencedirect.com/science/article/pii/S0010465517301200},
author = {Mitsuaki Kawamura and Kazuyoshi Yoshimi and Takahiro Misawa and Youhei Yamaji and Synge Todo and Naoki Kawashima}
}

@misc{ido2023update,
title={Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3},
author={Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa},
year={2023},
eprint={2307.13222},
archivePrefix={arXiv},
primaryClass={cond-mat.str-el}
@article{ido2024update,
title = {Update of $\mathcal{H}\Phi$: Newly added functions and methods in versions 2 and 3},
author = {Kota Ido and Mitsuaki Kawamura and Yuichi Motoyama and Kazuyoshi Yoshimi and Youhei Yamaji and Synge Todo and Naoki Kawashima and Takahiro Misawa},
journal = {Computer Physics Communications},
volume = {298},
pages = {109093},
year = {2024},
issn = {0010-4655},
doi = {https://doi.org/10.1016/j.cpc.2024.109093},
url = {https://www.sciencedirect.com/science/article/pii/S001046552400016X}
}


Expand Down
18 changes: 9 additions & 9 deletions src/mltplyMPIHubbard.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ double complex child_CisAjt_MPIdouble(
long unsigned int *list_2_2_target//!<[in]
) {
#ifdef MPI
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn;
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0;
unsigned long int idim_max_buf, j, ioff;
MPI_Status statusMPI;
double complex trans, dmv;
Expand All @@ -164,9 +164,7 @@ double complex child_CisAjt_MPIdouble(
}/*if (state1 == 0 && state2 == mask2)*/
else if (state1 == mask1 && state2 == 0) {
trans = -(double) Fsgn * conj(tmp_trans);
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) {
trans = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}/*if (state1 == mask1 && state2 == 0)*/
else return 0;

Expand All @@ -184,6 +182,8 @@ double complex child_CisAjt_MPIdouble(
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1)return 0;

if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC) {
#pragma omp parallel for default(none) private(j, dmv, ioff) \
Expand Down Expand Up @@ -363,7 +363,7 @@ double complex child_general_hopp_MPIdouble(
double complex *tmp_v1//!<[in] v0 = H v1
) {
#ifdef MPI
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn;
int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn, only_send = 0;
unsigned long int idim_max_buf, j, ioff;
MPI_Status statusMPI;
double complex trans, dmv, dam_pr;
Expand All @@ -385,7 +385,7 @@ double complex child_general_hopp_MPIdouble(
}
else if (state1 == mask1 && state2 == 0) {
trans = -(double) Fsgn * conj(tmp_trans);
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) trans = 0;
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) only_send = 1;
}
else return 0;

Expand All @@ -402,6 +402,8 @@ double complex child_general_hopp_MPIdouble(
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1)return 0;

dam_pr = 0.0;
#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, Fsgn, ioff) \
firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0)
Expand Down Expand Up @@ -506,9 +508,7 @@ double complex child_general_hopp_MPIsingle(
else if (state2 == 0) {
state1check = mask1;
trans = -(double) Fsgn * conj(tmp_trans);
if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) {
trans = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) return 0;
}
else return 0;

Expand Down
17 changes: 13 additions & 4 deletions src/mltplyMPIHubbardCore.c
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI(
double complex dam_pr = 0.0;
unsigned long int i_max = X->Check.idim_max;
unsigned long int idim_max_buf;
int iCheck, ierr, Fsgn;
int iCheck, ierr, Fsgn, only_send = 0;
unsigned long int isite1, isite2, isite3, isite4;
unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
unsigned long int j, Adiff, Bdiff;
Expand Down Expand Up @@ -959,7 +959,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI(
tmp_isite2 = X->Def.OrgTpow[2 * org_isite3 + org_ispin3];
tmp_isite1 = X->Def.OrgTpow[2 * org_isite4 + org_ispin4];
iFlgHermite = TRUE;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}/*if (iCheck == TRUE)*/
else return 0.0;
}/*if (iCheck == FALSE)*/
Expand Down Expand Up @@ -1019,6 +1019,9 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0)
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;

if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite
&& org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite)
{
Expand Down Expand Up @@ -1135,7 +1138,7 @@ double complex child_CisAjtCkuAku_Hubbard_MPI(
double complex dam_pr = 0.0;
unsigned long int i_max = X->Check.idim_max;
unsigned long int idim_max_buf, ioff;
int iCheck, ierr, Fsgn;
int iCheck, ierr, Fsgn, only_send = 0;
unsigned long int isite1, isite2, isite3;
unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4;
unsigned long int j, Asum, Adiff;
Expand Down Expand Up @@ -1169,13 +1172,15 @@ double complex child_CisAjtCkuAku_Hubbard_MPI(
Asum = tmp_isite3 + tmp_isite4;
if (tmp_isite4 > tmp_isite3) Adiff = tmp_isite4 - tmp_isite3 * 2;
else Adiff = tmp_isite3 - tmp_isite4 * 2;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0;
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
//printf("tmp_isite1=%ld, tmp_isite2=%ld, Adiff=%ld\n", tmp_isite1, tmp_isite2, Adiff);
}/*if (iCheck == TRUE)*/
else return 0.0;
}/*if (iCheck == FALSE)*/

if (myrank == origin) {// only k is in PE

if (only_send == 1) return 0;
//for hermite
#pragma omp parallel default(none) reduction(+:dam_pr) \
firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1)
Expand Down Expand Up @@ -1207,6 +1212,8 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;

#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \
firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \
shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2)
Expand Down Expand Up @@ -1593,6 +1600,7 @@ double complex child_Cis_MPI(

if (state2 == mask2) {
trans = 0;
return 0;
}
else if (state2 == 0) {
trans = (double)Fsgn * tmp_trans;
Expand Down Expand Up @@ -1673,6 +1681,7 @@ double complex child_Ajt_MPI(

if (state2 == 0) {
trans = 0;
return 0;
}
else if (state2 == mask2) {
trans = (double)Fsgn * tmp_trans;
Expand Down
16 changes: 8 additions & 8 deletions src/mltplyMPISpin.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ double complex child_general_int_spin_MPIdouble(
double complex *tmp_v1//!<[in] Vector to be producted
) {
#ifdef MPI
int mask1, mask2, state1, state2, ierr, origin;
int mask1, mask2, state1, state2, ierr, origin, only_send = 0;
unsigned long int idim_max_buf, j, ioff;
MPI_Status statusMPI;
double complex Jint, dmv, dam_pr;
Expand All @@ -88,9 +88,7 @@ double complex child_general_int_spin_MPIdouble(
}
else if (state1 == org_ispin1 && state2 == org_ispin3) {
Jint = conj(tmp_J);
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
Jint = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}
else return 0;

Expand All @@ -104,6 +102,8 @@ double complex child_general_int_spin_MPIdouble(
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;

dam_pr = 0.0;
if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) {
#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff) \
Expand Down Expand Up @@ -232,7 +232,7 @@ double complex child_general_int_spin_MPIsingle(
double complex *tmp_v1//!<[in] Vector to be producted
) {
#ifdef MPI
int mask2, state2, ierr, origin;
int mask2, state2, ierr, origin, only_send = 0;
unsigned long int mask1, idim_max_buf, j, ioff, state1, jreal, state1check;
MPI_Status statusMPI;
double complex Jint, dmv, dam_pr;
Expand All @@ -250,9 +250,7 @@ double complex child_general_int_spin_MPIsingle(
else if (state2 == org_ispin3) {
state1check = (unsigned long int) org_ispin1;
Jint = conj(tmp_J);
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) {
Jint = 0;
}
if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) only_send = 1;
}
else return 0;

Expand All @@ -268,6 +266,8 @@ double complex child_general_int_spin_MPIsingle(
v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0,
MPI_COMM_WORLD, &statusMPI);
if (ierr != 0) exitMPI(-1);

if (only_send == 1) return 0;
/*
Index in the intra PE
*/
Expand Down
4 changes: 2 additions & 2 deletions test/lanczos_spin_kagome.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1372,7 +1372,7 @@ cat > reference.dat <<EOF
EOF
paste output/zvo_cisajscktalt.dat reference.dat > paste3.dat
diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($9-$19)*($9-$19)+($10-$20)*($10-$20))}
END{printf "%8.6f", diff}' paste3.dat`
END{printf "%7.5f", diff}' paste3.dat`

test "${diff}" = "0.000000"
test "${diff}" = "0.00000"
exit $?

0 comments on commit 8639a32

Please sign in to comment.