From c470af1c4f09c0eec7756be1b4fd7b6aeceba0e9 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Fri, 15 Feb 2019 17:16:57 +0900 Subject: [PATCH 01/50] Backup --- src/CalcSpectrum.c | 8 +- src/CalcSpectrumByLanczos.c | 2 +- src/CalcSpectrumByTPQ.c | 2 +- src/Lanczos_EigenValue.c | 6 +- src/PairEx.c | 12 +- src/PairExHubbard.c | 30 ++-- src/PairExSpin.c | 52 +++---- src/SingleEx.c | 8 +- src/SingleExHubbard.c | 24 ++-- src/diagonalcalc.c | 50 +++---- src/include/CalcSpectrum.h | 4 +- src/include/CalcSpectrumByLanczos.h | 2 +- src/include/CalcSpectrumByTPQ.h | 2 +- src/include/Lanczos_EigenValue.h | 6 +- src/include/PairEx.h | 4 +- src/include/PairExHubbard.h | 8 +- src/include/PairExSpin.h | 24 ++-- src/include/SingleEx.h | 4 +- src/include/SingleExHubbard.h | 8 +- src/include/diagonalcalc.h | 4 +- src/include/mltply.h | 2 +- src/include/mltplyHubbard.h | 36 ++--- src/include/mltplyHubbardCore.h | 70 +++++----- src/include/mltplyMPIBoost.h | 4 +- src/include/mltplyMPIHubbard.h | 32 ++--- src/include/mltplyMPIHubbardCore.h | 76 +++++------ src/include/mltplyMPISpin.h | 44 +++--- src/include/mltplyMPISpinCore.h | 144 ++++++++++---------- src/include/mltplySpin.h | 34 ++--- src/include/mltplySpinCore.h | 32 ++--- src/mltply.c | 12 +- src/mltplyHubbard.c | 106 +++++++-------- src/mltplyHubbardCore.c | 204 ++++++++++------------------ src/mltplyMPIBoost.c | 14 +- src/mltplyMPIHubbard.c | 48 +++---- src/mltplyMPIHubbardCore.c | 132 +++++++++--------- src/mltplyMPISpin.c | 76 +++++------ src/mltplyMPISpinCore.c | 160 +++++++++++----------- src/mltplySpin.c | 120 ++++++++-------- src/mltplySpinCore.c | 32 ++--- 40 files changed, 789 insertions(+), 849 deletions(-) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 9d077a541..4fec982e7 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -313,8 +313,8 @@ int CalcSpectrum( int GetExcitedState ( struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ) { if(X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0){ @@ -324,12 +324,12 @@ int GetExcitedState if(X->Def.NSingleExcitationOperator > 0){ - if(GetSingleExcitedState(X,tmp_v0, tmp_v1)!=TRUE){ + if(GetSingleExcitedState(X,nstate,tmp_v0, tmp_v1)!=TRUE){ return FALSE; } } else if(X->Def.NPairExcitationOperator >0){ - if(GetPairExcitedState(X,tmp_v0, tmp_v1)!=TRUE){ + if(GetPairExcitedState(X,nstate,tmp_v0, tmp_v1)!=TRUE){ return FALSE; } } diff --git a/src/CalcSpectrumByLanczos.c b/src/CalcSpectrumByLanczos.c index 24fad869d..f4f1e1cac 100644 --- a/src/CalcSpectrumByLanczos.c +++ b/src/CalcSpectrumByLanczos.c @@ -42,7 +42,7 @@ /// \author Kazuyoshi Yoshimi (The University of Tokyo) int CalcSpectrumByLanczos( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double dnorm, int Nomega, double complex *dcSpectrum, diff --git a/src/CalcSpectrumByTPQ.c b/src/CalcSpectrumByTPQ.c index 5afbc993a..9052ae6b2 100644 --- a/src/CalcSpectrumByTPQ.c +++ b/src/CalcSpectrumByTPQ.c @@ -129,7 +129,7 @@ int GetCalcSpectrumTPQ(double complex dcomega, double dtemp, double dspecifichea /// \author Kazuyoshi Yoshimi (The University of Tokyo) int CalcSpectrumByTPQ( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double dnorm, int Nomega, double complex *dcSpectrum, diff --git a/src/Lanczos_EigenValue.c b/src/Lanczos_EigenValue.c index cdad2f793..74e83f751 100644 --- a/src/Lanczos_EigenValue.c +++ b/src/Lanczos_EigenValue.c @@ -325,7 +325,7 @@ int Lanczos_GetTridiagonalMatrixComponents( struct BindStruct *X, double *_alpha, double *_beta, - double complex *tmp_v1, + double complex **tmp_v1, unsigned long int *liLanczos_step ) { @@ -465,7 +465,7 @@ int ReadInitialVector(struct BindStruct *X, double complex* _v0, double complex /// \author Kazuyoshi Yoshimi (The University of Tokyo) int OutputLanczosVector(struct BindStruct *X, double complex* tmp_v0, - double complex *tmp_v1, + double complex **tmp_v1, unsigned long int liLanczosStp_vec){ char sdt[D_FileNameMax]; FILE *fp; @@ -496,7 +496,7 @@ int OutputLanczosVector(struct BindStruct *X, /// Output: Large.iv. /// \param tmp_v0 [out] The initial vector whose components are zero. /// \param tmp_v1 [out] The initial vector whose components are randomly given when initial_mode=1, otherwise, iv-th component is only given. -void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1) { +void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1) { int iproc; long int i, iv, i_max; unsigned long int i_max_tmp, sum_i_max; diff --git a/src/PairEx.c b/src/PairEx.c index 5d64e29b4..be68604c9 100644 --- a/src/PairEx.c +++ b/src/PairEx.c @@ -46,8 +46,8 @@ int GetPairExcitedState ( struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ) { int iret; @@ -73,21 +73,21 @@ int GetPairExcitedState switch(X->Def.iCalcModel){ case HubbardGC: - iret=GetPairExcitedStateHubbardGC(X, tmp_v0, tmp_v1); + iret=GetPairExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1); break; case KondoGC: case Hubbard: case Kondo: - iret=GetPairExcitedStateHubbard(X, tmp_v0, tmp_v1); + iret=GetPairExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1); break; case Spin: // for the Sz-conserved spin system - iret =GetPairExcitedStateSpin(X, tmp_v0, tmp_v1); + iret =GetPairExcitedStateSpin(X, nstate, tmp_v0, tmp_v1); break; case SpinGC: - iret=GetPairExcitedStateSpinGC(X,tmp_v0, tmp_v1); + iret=GetPairExcitedStateSpinGC(X,nstate,tmp_v0, tmp_v1); break; default: diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index a14fe378c..c4a2b1fcb 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -36,8 +36,8 @@ /// \version 1.2 int GetPairExcitedStateHubbardGC( struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ @@ -102,7 +102,7 @@ int GetPairExcitedStateHubbardGC( } else { X_GC_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, - -conj(tmp_trans), X, tmp_v0, tmp_v1); + -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); } } else { @@ -111,7 +111,7 @@ int GetPairExcitedStateHubbardGC( isite1=X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; #pragma omp parallel for default(none) private(j) firstprivate(i_max,X,isite1, tmp_trans) shared(tmp_v0, tmp_v1) for(j=1;j<=i_max;j++){ - GC_AisCis(j, tmp_v0, tmp_v1, X, isite1, -tmp_trans); + GC_AisCis(j, nstate, tmp_v0, tmp_v1, X, isite1, -tmp_trans); } } else { @@ -136,8 +136,8 @@ int GetPairExcitedStateHubbardGC( /// \version 1.2 int GetPairExcitedStateHubbard( struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ long unsigned int i,j, idim_maxMPI; long unsigned int irght,ilft,ihfbit; @@ -163,7 +163,7 @@ int GetPairExcitedStateHubbard( X->Large.mode=M_CALCSPEC; // X->Large.mode = M_MLTPLY; - double complex *tmp_v1bufOrg; + double complex **tmp_v1bufOrg; //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); @@ -187,16 +187,16 @@ int GetPairExcitedStateHubbard( if (org_isite1 > X->Def.Nsite && org_isite2 > X->Def.Nsite) { - X_child_CisAjt_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_trans, X, tmp_v0, tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); + X_child_CisAjt_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); } else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if(org_isite1 < org_isite2) { - X_child_CisAjt_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, tmp_v0, + X_child_CisAjt_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); } else{ - X_child_CisAjt_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -conj(tmp_trans), X, tmp_v0, + X_child_CisAjt_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); } } else{ @@ -231,15 +231,15 @@ int GetPairExcitedStateHubbard( } } else{ - X_child_general_hopp_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_trans, X, tmp_v0, tmp_v1); + X_child_general_hopp_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); } } else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite){ if(org_isite1 < org_isite2){ - X_child_general_hopp_MPIsingle(org_isite1-1, org_sigma1,org_isite2-1, org_sigma2, -tmp_trans, X, tmp_v0, tmp_v1); + X_child_general_hopp_MPIsingle(org_isite1-1, org_sigma1,org_isite2-1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); } else{ - X_child_general_hopp_MPIsingle(org_isite2-1, org_sigma2, org_isite1-1, org_sigma1, -conj(tmp_trans), X, tmp_v0, tmp_v1); + X_child_general_hopp_MPIsingle(org_isite2-1, org_sigma2, org_isite1-1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); } } else{ @@ -249,7 +249,7 @@ int GetPairExcitedStateHubbard( if(org_isite1==org_isite2 && org_sigma1==org_sigma2){ is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; if( X->Def.PairExcitationOperator[i][4]==0) { -#pragma omp parallel for default(none) shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) +#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; num1 = ibit / is; @@ -257,7 +257,7 @@ int GetPairExcitedStateHubbard( } } else{ -#pragma omp parallel for default(none) shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) +#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; num1 = (1-ibit / is); diff --git a/src/PairExSpin.c b/src/PairExSpin.c index b4adbc9ab..351427038 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -34,17 +34,17 @@ /// \version 1.2 int GetPairExcitedStateSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ int iret=0; if (X->Def.iFlgGeneralSpin == FALSE) { - iret=GetPairExcitedStateHalfSpinGC(X, tmp_v0, tmp_v1); + iret=GetPairExcitedStateHalfSpinGC(X, nstate, tmp_v0, tmp_v1); } else{ - iret=GetPairExcitedStateGeneralSpinGC(X, tmp_v0, tmp_v1); + iret=GetPairExcitedStateGeneralSpinGC(X, nstate, tmp_v0, tmp_v1); } return iret; } @@ -61,8 +61,8 @@ int GetPairExcitedStateSpinGC( /// \version 1.2 int GetPairExcitedStateHalfSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ long unsigned int i,j; @@ -85,15 +85,15 @@ int GetPairExcitedStateHalfSpinGC( if(org_isite1 > X->Def.Nsite){ if(org_sigma1==org_sigma2){ // longitudinal magnetic field if(X->Def.PairExcitationOperator[i][4]==0) { - X_GC_child_CisAis_spin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, tmp_v0, tmp_v1); + X_GC_child_CisAis_spin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, nstate, tmp_v0, tmp_v1); } else{ - X_GC_child_AisCis_spin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, tmp_v0, tmp_v1); + X_GC_child_AisCis_spin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); } } else{ // transverse magnetic field //fprintf(stdoutMPI, "Debug: test, org_isite1=%d, org_sigma1=%d, orgsima_2=%d\n", org_isite1, org_sigma1, org_sigma2); - X_GC_child_CisAit_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, tmp_trans, X, tmp_v0, tmp_v1); + X_GC_child_CisAit_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); } }else{ isite1 = X->Def.Tpow[org_isite1-1]; @@ -143,8 +143,8 @@ int GetPairExcitedStateHalfSpinGC( /// \version 1.2 int GetPairExcitedStateGeneralSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { long unsigned int i, j; @@ -167,14 +167,14 @@ int GetPairExcitedStateGeneralSpinGC( if(org_sigma1==org_sigma2){ if(X->Def.PairExcitationOperator[i][4]==0) { // longitudinal magnetic field - X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, tmp_v0, tmp_v1); + X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, nstate, tmp_v0, tmp_v1); } else{ - X_GC_child_AisCis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, tmp_v0, tmp_v1); + X_GC_child_AisCis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); } }else{ // transverse magnetic field - X_GC_child_CisAit_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, tmp_trans, X, tmp_v0, tmp_v1); + X_GC_child_CisAit_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); } } else{//org_isite1 <= X->Def.Nsite @@ -225,16 +225,16 @@ int GetPairExcitedStateGeneralSpinGC( /// \version 1.2 int GetPairExcitedStateSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ int iret=0; if (X->Def.iFlgGeneralSpin == FALSE) { - iret=GetPairExcitedStateHalfSpin(X, tmp_v0, tmp_v1); + iret=GetPairExcitedStateHalfSpin(X, nstate, tmp_v0, tmp_v1); } else{ - iret=GetPairExcitedStateGeneralSpin(X, tmp_v0, tmp_v1); + iret=GetPairExcitedStateGeneralSpin(X, nstate, tmp_v0, tmp_v1); } return iret; } @@ -250,8 +250,8 @@ int GetPairExcitedStateSpin( /// \version 1.2 int GetPairExcitedStateHalfSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { @@ -268,7 +268,7 @@ int GetPairExcitedStateHalfSpin( i_max = X->Check.idim_maxOrg; - double complex *tmp_v1bufOrg; + double complex **tmp_v1bufOrg; //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); @@ -321,7 +321,7 @@ int GetPairExcitedStateHalfSpin( } } else { //org_sigma1 != org_sigma2 // for the canonical case if (org_isite1 > X->Def.Nsite) {//For MPI - X_child_CisAit_spin_MPIdouble(org_isite1-1, org_sigma2, tmp_trans, X, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, X->Def.Tpow,list_1_org, list_1buf_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft,X->Large.ihfbit); + X_child_CisAit_spin_MPIdouble(org_isite1-1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, X->Def.Tpow,list_1_org, list_1buf_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft,X->Large.ihfbit); } else { isite1 = X->Def.Tpow[org_isite1 - 1]; @@ -352,8 +352,8 @@ int GetPairExcitedStateHalfSpin( /// \version 1.2 int GetPairExcitedStateGeneralSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { @@ -367,7 +367,7 @@ int GetPairExcitedStateGeneralSpin( int tmp_sgn, num1; i_max = X->Check.idim_maxOrg; - double complex *tmp_v1bufOrg; + double complex **tmp_v1bufOrg; //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); @@ -403,7 +403,7 @@ int GetPairExcitedStateGeneralSpin( } }//org_sigma1=org_sigma2 else {//org_sigma1 != org_sigma2 - X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, tmp_v0, + X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, list_1_org, list_1buf_org, X->Large.ihfbit); } diff --git a/src/SingleEx.c b/src/SingleEx.c index 4f79d3404..7f1999c0a 100644 --- a/src/SingleEx.c +++ b/src/SingleEx.c @@ -29,8 +29,8 @@ Target System: Hubbard, Kondo */ int GetSingleExcitedState( struct BindStruct *X,//!Def.iCalcModel) { case HubbardGC: - iret = GetSingleExcitedStateHubbardGC(X, tmp_v0, tmp_v1); + iret = GetSingleExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1); break; case KondoGC: case Hubbard: case Kondo: - iret = GetSingleExcitedStateHubbard(X, tmp_v0, tmp_v1); + iret = GetSingleExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1); break; case Spin: diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index 47ea9dc00..8664ccd31 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -33,8 +33,8 @@ */ int GetSingleExcitedStateHubbard( struct BindStruct *X,//!Def.NSingleExcitationOperator == 0) { return TRUE; } - double complex *tmp_v1bufOrg; + double complex **tmp_v1bufOrg; //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); @@ -63,7 +63,7 @@ int GetSingleExcitedStateHubbard( is1_spin = X->Def.Tpow[2 * org_isite + ispin]; if (itype == 1) { if (org_isite >= X->Def.Nsite) { - X_Cis_MPI(org_isite, ispin, tmpphi, tmp_v0, tmp_v1, tmp_v1bufOrg, idim_max, \ + X_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, idim_max, \ X->Def.Tpow, list_1_org, list_1buf_org, list_2_1, list_2_2, \ X->Large.irght, X->Large.ilft, X->Large.ihfbit); } @@ -78,7 +78,7 @@ int GetSingleExcitedStateHubbard( } else if (itype == 0) { if (org_isite >= X->Def.Nsite) { - X_Ajt_MPI(org_isite, ispin, tmpphi, tmp_v0, tmp_v1, tmp_v1bufOrg, \ + X_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, \ idim_max, X->Def.Tpow, list_1_org, list_1buf_org, \ list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); } @@ -106,8 +106,8 @@ int GetSingleExcitedStateHubbard( */ int GetSingleExcitedStateHubbardGC( struct BindStruct *X,//!Def.NSingleExcitationOperator == 0) { return TRUE; } - double complex *tmp_v1bufOrg; + double complex **tmp_v1bufOrg; //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); @@ -136,27 +136,27 @@ int GetSingleExcitedStateHubbardGC( tmpphi = X->Def.ParaSingleExcitationOperator[i]; if (itype == 1) { if (org_isite >= X->Def.Nsite) { - X_GC_Cis_MPI(org_isite, ispin, tmpphi, tmp_v0, tmp_v1, idim_max, tmp_v1bufOrg, X->Def.Tpow); + X_GC_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, idim_max, tmp_v1bufOrg, X->Def.Tpow); } else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X) \ firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) for (j = 1; j <= idim_max; j++) { is1_spin = X->Def.Tpow[2 * org_isite + ispin]; - GC_Cis(j, tmp_v0, tmp_v1, is1_spin, tmpphi, &tmp_off); + GC_Cis(j, nstate, tmp_v0, tmp_v1, is1_spin, tmpphi, &tmp_off); }/*for (j = 1; j <= idim_max; j++)*/ } } else if (itype == 0) { if (org_isite >= X->Def.Nsite) { - X_GC_Ajt_MPI(org_isite, ispin, tmpphi, tmp_v0, tmp_v1, idim_max, tmp_v1bufOrg, X->Def.Tpow); + X_GC_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, idim_max, tmp_v1bufOrg, X->Def.Tpow); } else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X) \ firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) for (j = 1; j <= idim_max; j++) { is1_spin = X->Def.Tpow[2 * org_isite + ispin]; - GC_Ajt(j, tmp_v0, tmp_v1, is1_spin, tmpphi, &tmp_off); + GC_Ajt(j, nstate, tmp_v0, tmp_v1, is1_spin, tmpphi, &tmp_off); }/*for (j = 1; j <= idim_max; j++)*/ } } diff --git a/src/diagonalcalc.c b/src/diagonalcalc.c index 58275d9c7..3a6ee7157 100644 --- a/src/diagonalcalc.c +++ b/src/diagonalcalc.c @@ -45,8 +45,8 @@ int SetDiagonalTETransfer( double dtmp_V, long unsigned int spin, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); int SetDiagonalTEInterAll( @@ -56,8 +56,8 @@ int SetDiagonalTEInterAll( long unsigned int isigma2, double dtmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); int SetDiagonalTEChemi( @@ -65,8 +65,8 @@ int SetDiagonalTEChemi( long unsigned int spin, double dtmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); /** @@ -197,8 +197,8 @@ int diagonalcalcForTE ( const int _istep, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ) { long unsigned int i; @@ -211,7 +211,7 @@ int diagonalcalcForTE isite1 = X->Def.TETransferDiagonal[_istep][i][0] + 1; A_spin = X->Def.TETransferDiagonal[_istep][i][1]; tmp_V = X->Def.ParaTETransferDiagonal[_istep][i]; - SetDiagonalTETransfer(isite1, tmp_V, A_spin, X, tmp_v0, tmp_v1); + SetDiagonalTETransfer(isite1, tmp_V, A_spin, X, nstate, tmp_v0, tmp_v1); } } else if (X->Def.NTEInterAllDiagonal[_istep] >0) { @@ -223,7 +223,7 @@ int diagonalcalcForTE B_spin = X->Def.TEInterAllDiagonal[_istep][i][3]; tmp_V = X->Def.ParaTEInterAllDiagonal[_istep][i]; - if (SetDiagonalTEInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X, tmp_v0, tmp_v1) != 0) { + if (SetDiagonalTEInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { return -1; } } @@ -233,7 +233,7 @@ int diagonalcalcForTE isite1 = X->Def.TEChemi[_istep][i] + 1; A_spin = X->Def.SpinTEChemi[_istep][i]; tmp_V = -X->Def.ParaTEChemi[_istep][i]; - if (SetDiagonalTEChemi(isite1, A_spin, tmp_V, X, tmp_v0, tmp_v1) != 0) { + if (SetDiagonalTEChemi(isite1, A_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { return -1; } } @@ -1435,8 +1435,8 @@ int SetDiagonalTEInterAll( long unsigned int isigma2, double dtmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ){ long unsigned int is1_spin; long unsigned int is2_spin; @@ -1764,8 +1764,8 @@ int SetDiagonalTEChemi( long unsigned int spin, double dtmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ){ long unsigned int is1_up; long unsigned int num1; @@ -1855,7 +1855,7 @@ firstprivate(i_max, dtmp_V) private(j) is1 = X->Def.Tpow[2*isite1-1]; } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for(j = 1;j <= i_max;j++){ ibit1 = list_1[j]&is1; @@ -1868,7 +1868,7 @@ firstprivate(i_max, dtmp_V) private(j) case SpinGC: if(X->Def.iFlgGeneralSpin==FALSE){ is1_up = X->Def.Tpow[isite1-1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) for(j = 1;j <= i_max;j++){ num1=(((j-1)& is1_up)/is1_up)^(1-spin); tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; @@ -1890,7 +1890,7 @@ firstprivate(i_max, dtmp_V) private(j) case Spin: if(X->Def.iFlgGeneralSpin==FALSE){ is1_up = X->Def.Tpow[isite1-1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) for(j = 1;j <= i_max;j++){ num1=((list_1[j]& is1_up)/is1_up)^(1-spin); tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; @@ -1942,8 +1942,8 @@ int SetDiagonalTETransfer double dtmp_V, long unsigned int spin, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ){ long unsigned int is1_up; long unsigned int ibit1_up; @@ -2011,7 +2011,7 @@ int SetDiagonalTETransfer } else { is1 = X->Def.Tpow[2 * isite1 - 1]; } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { ibit1 = (j - 1) & is1; @@ -2029,7 +2029,7 @@ int SetDiagonalTETransfer } else { is1 = X->Def.Tpow[2 * isite1 - 1]; } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { ibit1 = list_1[j] & is1; @@ -2042,7 +2042,7 @@ int SetDiagonalTETransfer case SpinGC: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) for (j = 1; j <= i_max; j++) { ibit1_up = (((j - 1) & is1_up) / is1_up) ^ (1 - spin); @@ -2065,7 +2065,7 @@ int SetDiagonalTETransfer case Spin: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1)\ +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) for (j = 1; j <= i_max; j++) { ibit1_up = ((list_1[j] & is1_up) / is1_up) ^ (1 - spin); @@ -2073,7 +2073,7 @@ int SetDiagonalTETransfer dam_pr += dtmp_V * ibit1_up * conj(tmp_v1[j]) * tmp_v1[j]; } } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, tmp_v0, tmp_v1)\ +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); diff --git a/src/include/CalcSpectrum.h b/src/include/CalcSpectrum.h index c85223cd2..166bd4191 100644 --- a/src/include/CalcSpectrum.h +++ b/src/include/CalcSpectrum.h @@ -22,8 +22,8 @@ int CalcSpectrum( int GetExcitedState( struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); diff --git a/src/include/CalcSpectrumByLanczos.h b/src/include/CalcSpectrumByLanczos.h index a483e672a..2a7724e2b 100644 --- a/src/include/CalcSpectrumByLanczos.h +++ b/src/include/CalcSpectrumByLanczos.h @@ -18,7 +18,7 @@ int CalcSpectrumByLanczos( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double norm, int Nomega, double complex *dcSpectrum, diff --git a/src/include/CalcSpectrumByTPQ.h b/src/include/CalcSpectrumByTPQ.h index b5f71bad5..3b9e4ab79 100644 --- a/src/include/CalcSpectrumByTPQ.h +++ b/src/include/CalcSpectrumByTPQ.h @@ -18,7 +18,7 @@ int CalcSpectrumByTPQ( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double norm, int Nomega, double complex *dcSpectrum, diff --git a/src/include/Lanczos_EigenValue.h b/src/include/Lanczos_EigenValue.h index b94075295..0bd0605ad 100644 --- a/src/include/Lanczos_EigenValue.h +++ b/src/include/Lanczos_EigenValue.h @@ -17,11 +17,11 @@ int Lanczos_EigenValue(struct BindStruct *X); int Lanczos_GetTridiagonalMatrixComponents(struct BindStruct *X, double *alpha, double *beta, double complex *_v1, unsigned long int *Lanczos_step); -int ReadInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1, unsigned long int *liLanczosStp_vec); +int ReadInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1, unsigned long int *liLanczosStp_vec); -int OutputLanczosVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1, unsigned long int liLanczosStp_vec); +int OutputLanczosVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1, unsigned long int liLanczosStp_vec); -void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1); +void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1); int ReadTMComponents( struct BindStruct *X, diff --git a/src/include/PairEx.h b/src/include/PairEx.h index b7d99f741..a8cccccba 100644 --- a/src/include/PairEx.h +++ b/src/include/PairEx.h @@ -19,6 +19,6 @@ int GetPairExcitedState ( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); diff --git a/src/include/PairExHubbard.h b/src/include/PairExHubbard.h index fa6c4e797..50c9c5c9e 100644 --- a/src/include/PairExHubbard.h +++ b/src/include/PairExHubbard.h @@ -18,13 +18,13 @@ int GetPairExcitedStateHubbardGC( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); int GetPairExcitedStateHubbard( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); diff --git a/src/include/PairExSpin.h b/src/include/PairExSpin.h index bfa110673..1339f2d45 100644 --- a/src/include/PairExSpin.h +++ b/src/include/PairExSpin.h @@ -18,43 +18,43 @@ int GetPairExcitedStateSpinGC( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); int GetPairExcitedStateHalfSpinGC( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); int GetPairExcitedStateGeneralSpinGC( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); int GetPairExcitedStateSpin( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); int GetPairExcitedStateHalfSpin( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); int GetPairExcitedStateGeneralSpin( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); diff --git a/src/include/SingleEx.h b/src/include/SingleEx.h index 0c940c853..73c514521 100644 --- a/src/include/SingleEx.h +++ b/src/include/SingleEx.h @@ -20,6 +20,6 @@ int GetSingleExcitedState ( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); diff --git a/src/include/SingleExHubbard.h b/src/include/SingleExHubbard.h index cc067a68d..ef7b84c37 100644 --- a/src/include/SingleExHubbard.h +++ b/src/include/SingleExHubbard.h @@ -19,13 +19,13 @@ int GetSingleExcitedStateHubbard ( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); int GetSingleExcitedStateHubbardGC ( struct BindStruct *X, - double complex *tmp_v0, /**< [out] Result v0 = H v1*/ - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); diff --git a/src/include/diagonalcalc.h b/src/include/diagonalcalc.h index 9d0a91b94..d4244f415 100644 --- a/src/include/diagonalcalc.h +++ b/src/include/diagonalcalc.h @@ -65,7 +65,7 @@ int SetDiagonalInterAll int diagonalcalcForTE( const int _istep, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); diff --git a/src/include/mltply.h b/src/include/mltply.h index f973fd23f..e12b7b32b 100644 --- a/src/include/mltply.h +++ b/src/include/mltply.h @@ -19,6 +19,6 @@ #include "Common.h" -int mltply(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); #endif /* HPHI_MLTPLY_H */ diff --git a/src/include/mltplyHubbard.h b/src/include/mltplyHubbard.h index 55fb74ef5..d0bc0534f 100644 --- a/src/include/mltplyHubbard.h +++ b/src/include/mltplyHubbard.h @@ -19,30 +19,30 @@ #include "Common.h" -int mltplyHubbard(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplyHubbard(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -int mltplyHubbardGC(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplyHubbardGC(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); double complex GC_child_general_hopp ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, double complex trans ); double complex GC_child_general_int( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex child_general_int ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); @@ -57,36 +57,36 @@ double complex child_general_hopp double complex child_exchange ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex child_pairhopp ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex GC_child_exchange ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex GC_child_pairlift ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex GC_child_pairhopp ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); diff --git a/src/include/mltplyHubbardCore.h b/src/include/mltplyHubbardCore.h index 9aca29d15..55800e672 100644 --- a/src/include/mltplyHubbardCore.h +++ b/src/include/mltplyHubbardCore.h @@ -22,8 +22,8 @@ double complex child_pairhopp_element ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -31,8 +31,8 @@ double complex child_pairhopp_element double complex GC_child_exchange_element ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -40,8 +40,8 @@ double complex GC_child_exchange_element double complex GC_child_pairhopp_element ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -49,8 +49,8 @@ double complex GC_child_pairhopp_element double complex child_exchange_element ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -61,8 +61,8 @@ double complex child_CisAisCisAis_element long unsigned int isite1, long unsigned int isite3, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -76,8 +76,8 @@ double complex child_CisAisCjtAku_element long unsigned int Bsum, long unsigned int Bdiff, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -91,8 +91,8 @@ double complex child_CisAjtCkuAku_element long unsigned int Asum, long unsigned int Adiff, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -109,8 +109,8 @@ double complex child_CisAjtCkuAlv_element long unsigned int Bsum, long unsigned int Bdiff, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off_2 ); @@ -121,8 +121,8 @@ double complex GC_child_CisAisCisAis_element long unsigned int isite1, long unsigned int isite3, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -136,8 +136,8 @@ double complex GC_child_CisAisCjtAku_element long unsigned int Bsum, long unsigned int Bdiff, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -151,8 +151,8 @@ double complex GC_child_CisAjtCkuAku_element long unsigned int Asum, long unsigned int Adiff, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -169,8 +169,8 @@ double complex GC_child_CisAjtCkuAlv_element long unsigned int Bsum, long unsigned int Bdiff, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off_2 ); @@ -180,7 +180,7 @@ double complex GC_CisAis ( long unsigned int j, double complex *tmp_v0, - double complex *tmp_v1, + double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, double complex tmp_trans @@ -188,8 +188,8 @@ double complex GC_CisAis double complex GC_AisCis( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, double complex tmp_trans @@ -230,7 +230,7 @@ double complex CisAjt ( long unsigned int j, double complex *tmp_v0, - double complex *tmp_v1, + double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, @@ -244,7 +244,7 @@ double complex GC_CisAjt ( long unsigned int j, double complex *tmp_v0, - double complex *tmp_v1, + double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, @@ -296,8 +296,8 @@ int child_exchange_GetInfo double complex GC_Ajt ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, long unsigned int is1_spin, double complex tmp_V, long unsigned int *tmp_off @@ -306,8 +306,8 @@ double complex GC_Ajt double complex GC_Cis ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, long unsigned int is1_spin, double complex tmp_V, long unsigned int *tmp_off @@ -318,8 +318,8 @@ double complex GC_Cis double complex GC_Ajt ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, long unsigned int is1_spin, double complex tmp_V, long unsigned int *tmp_off diff --git a/src/include/mltplyMPIBoost.h b/src/include/mltplyMPIBoost.h index 82403308c..5857fd4c3 100644 --- a/src/include/mltplyMPIBoost.h +++ b/src/include/mltplyMPIBoost.h @@ -24,8 +24,8 @@ void child_general_int_spin_MPIBoost ( struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, double complex *tmp_v2, double complex *tmp_v3 ); diff --git a/src/include/mltplyMPIHubbard.h b/src/include/mltplyMPIHubbard.h index 06a49cf6a..bfdcfb85d 100644 --- a/src/include/mltplyMPIHubbard.h +++ b/src/include/mltplyMPIHubbard.h @@ -25,8 +25,8 @@ void GC_child_general_hopp_MPIdouble ( unsigned long int itrans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_general_hopp_MPIdouble @@ -37,16 +37,16 @@ double complex X_GC_child_general_hopp_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X , - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void GC_child_general_hopp_MPIsingle ( unsigned long int itrans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_general_hopp_MPIsingle @@ -57,8 +57,8 @@ double complex X_GC_child_general_hopp_MPIsingle int org_ispin2, double complex tmp_trans, struct BindStruct *X , - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); @@ -66,8 +66,8 @@ void child_general_hopp_MPIdouble ( unsigned long int itrans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_general_hopp_MPIdouble @@ -78,16 +78,16 @@ double complex X_child_general_hopp_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X , - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void child_general_hopp_MPIsingle ( unsigned long int itrans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_general_hopp_MPIsingle @@ -98,6 +98,6 @@ double complex X_child_general_hopp_MPIsingle int org_ispin2, double complex tmp_trans, struct BindStruct *X , - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); diff --git a/src/include/mltplyMPIHubbardCore.h b/src/include/mltplyMPIHubbardCore.h index ed970de7e..4b8fd8d01 100644 --- a/src/include/mltplyMPIHubbardCore.h +++ b/src/include/mltplyMPIHubbardCore.h @@ -83,8 +83,8 @@ double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI int org_ispin3, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI @@ -99,8 +99,8 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI int isigma4, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI @@ -113,8 +113,8 @@ double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI int isigma3, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjtAku_Hubbard_MPI @@ -127,8 +127,8 @@ double complex X_GC_child_CisAisCjtAku_Hubbard_MPI int isigma4, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAis_Hubbard_MPI @@ -137,8 +137,8 @@ double complex X_GC_child_CisAis_Hubbard_MPI int org_ispin1, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAjt_Hubbard_MPI @@ -149,8 +149,8 @@ double complex X_GC_child_CisAjt_Hubbard_MPI int org_ispin2, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAisCjtAjt_Hubbard_MPI @@ -161,8 +161,8 @@ double complex X_child_CisAisCjtAjt_Hubbard_MPI int org_ispin3, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAjtCkuAlv_Hubbard_MPI @@ -177,8 +177,8 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI int isigma4, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAjtCkuAku_Hubbard_MPI @@ -191,8 +191,8 @@ double complex X_child_CisAjtCkuAku_Hubbard_MPI int isigma3, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAisCjtAku_Hubbard_MPI @@ -205,8 +205,8 @@ double complex X_child_CisAisCjtAku_Hubbard_MPI int isigma4, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAis_Hubbard_MPI @@ -215,8 +215,8 @@ double complex X_child_CisAis_Hubbard_MPI int org_ispin1, double complex tmp_V, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAjt_MPIdouble @@ -227,8 +227,8 @@ double complex X_child_CisAjt_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, double complex *v1buf, long unsigned int *list_1_org, long unsigned int *list_1buf_org, @@ -244,8 +244,8 @@ double complex X_child_CisAjt_MPIsingle int org_ispin2, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, double complex *v1buf, long unsigned int *list_1_org, long unsigned int *list_1buf_org, @@ -259,10 +259,10 @@ double complex X_GC_Cis_MPI int org_isite, int org_ispin, double complex tmp_trans, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, unsigned long int idim_max, - double complex *tmp_v1buf, + double complex **tmp_v1buf, unsigned long int *Tpow ); @@ -271,10 +271,10 @@ double complex X_GC_Ajt_MPI int org_isite, int org_ispin, double complex tmp_trans, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, unsigned long int idim_max, - double complex *tmp_v1buf, + double complex **tmp_v1buf, long unsigned int *Tpow ); @@ -283,9 +283,9 @@ double complex X_Cis_MPI int org_isite, unsigned int org_ispin, double complex tmp_trans, - double complex *tmp_v0, - double complex *tmp_v1, - double complex *tmp_v1buf, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, + double complex **tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, long unsigned int *list_1_org, @@ -302,9 +302,9 @@ double complex X_Ajt_MPI int org_isite, unsigned int org_ispin, double complex tmp_trans, - double complex *tmp_v0, - double complex *tmp_v1, - double complex *tmp_v1buf, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, + double complex **tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, long unsigned int *list_1_org, diff --git a/src/include/mltplyMPISpin.h b/src/include/mltplyMPISpin.h index 641e85c2e..7f005b322 100644 --- a/src/include/mltplyMPISpin.h +++ b/src/include/mltplyMPISpin.h @@ -25,8 +25,8 @@ void child_general_int_spin_MPIdouble ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_general_int_spin_MPIdouble @@ -39,8 +39,8 @@ double complex X_child_general_int_spin_MPIdouble int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); @@ -49,16 +49,16 @@ double complex X_child_general_int_spin_TotalS_MPIdouble int org_isite1, int org_isite3, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void child_general_int_spin_MPIsingle ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_general_int_spin_MPIsingle @@ -71,54 +71,54 @@ double complex X_child_general_int_spin_MPIsingle int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void GC_child_general_int_spin_MPIdouble ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void GC_child_general_int_spin_MPIsingle ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void GC_child_general_int_GeneralSpin_MPIdouble ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void GC_child_general_int_GeneralSpin_MPIsingle ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void child_general_int_GeneralSpin_MPIdouble ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void child_general_int_GeneralSpin_MPIsingle ( unsigned long int i_int, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); diff --git a/src/include/mltplyMPISpinCore.h b/src/include/mltplyMPISpinCore.h index 38620d04b..bb550e5d0 100644 --- a/src/include/mltplyMPISpinCore.h +++ b/src/include/mltplyMPISpinCore.h @@ -30,8 +30,8 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble @@ -43,8 +43,8 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble @@ -57,8 +57,8 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); //general spin - single @@ -71,8 +71,8 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle @@ -84,8 +84,8 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle @@ -98,8 +98,8 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAit_GeneralSpin_MPIdouble @@ -109,8 +109,8 @@ double complex X_GC_child_CisAit_GeneralSpin_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAis_GeneralSpin_MPIdouble @@ -119,8 +119,8 @@ double complex X_GC_child_CisAis_GeneralSpin_MPIdouble int org_ispin1, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_AisCis_GeneralSpin_MPIdouble @@ -129,8 +129,8 @@ double complex X_GC_child_AisCis_GeneralSpin_MPIdouble int org_ispin1, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble @@ -141,8 +141,8 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble int org_ispin3, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle @@ -153,8 +153,8 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle int org_ispin3, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAit_GeneralSpin_MPIdouble @@ -164,9 +164,9 @@ double complex X_child_CisAit_GeneralSpin_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1, - double complex *tmp_v1buf, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, + double complex **tmp_v1buf, unsigned long int idim_max, long unsigned int *list_1_org, long unsigned int *list_1buf_org, @@ -184,8 +184,8 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble @@ -197,8 +197,8 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAitCjuAju_spin_MPIdouble @@ -210,8 +210,8 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIdouble int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjuAju_spin_MPIdouble @@ -222,8 +222,8 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIdouble int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle @@ -236,8 +236,8 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle @@ -249,8 +249,8 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAitCjuAju_spin_MPIsingle @@ -262,8 +262,8 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIsingle int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjuAju_spin_MPIsingle @@ -274,8 +274,8 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIsingle int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAisCjuAju_spin_MPIsingle @@ -286,8 +286,8 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIsingle int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAit_spin_MPIdouble @@ -297,8 +297,8 @@ double complex X_GC_child_CisAit_spin_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X , - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_CisAis_spin_MPIdouble @@ -307,8 +307,8 @@ double complex X_GC_child_CisAis_spin_MPIdouble int org_ispin1, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_GC_child_AisCis_spin_MPIdouble @@ -317,8 +317,8 @@ double complex X_GC_child_AisCis_spin_MPIdouble int org_ispin1, double complex tmp_trans, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAit_spin_MPIdouble @@ -327,9 +327,9 @@ double complex X_child_CisAit_spin_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1, /**< [in] v0 = H v1*/ - double complex *tmp_v1buf, + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1, /**< [in] v0 = H v1*/ + double complex **tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, long unsigned int *list_1_org, @@ -349,8 +349,8 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble @@ -363,8 +363,8 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); //general spin - single @@ -376,8 +376,8 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle int org_ispin3, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle @@ -390,54 +390,54 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle int org_ispin4, double complex tmp_J, struct BindStruct *X, - double complex *tmp_v0, - double complex *tmp_v1 + int nstate, double complex **tmp_v0, + double complex **tmp_v1 ); void GC_child_CisAisCjuAjv_spin_MPIdouble ( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); void GC_child_CisAitCjuAju_spin_MPIdouble ( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); void GC_child_CisAitCiuAiv_spin_MPIdouble ( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); void GC_child_CisAisCjuAjv_spin_MPIsingle ( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); void GC_child_CisAitCjuAju_spin_MPIsingle ( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); void GC_child_CisAitCiuAiv_spin_MPIsingle ( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ); diff --git a/src/include/mltplySpin.h b/src/include/mltplySpin.h index 5a0ccc9b9..dabab0a60 100644 --- a/src/include/mltplySpin.h +++ b/src/include/mltplySpin.h @@ -19,54 +19,54 @@ #include "Common.h" -int mltplySpin(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplySpin(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -int mltplyHalfSpin(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplyHalfSpin(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -int mltplyGeneralSpin(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplyGeneralSpin(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -int mltplySpinGC(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplySpinGC(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -int mltplyHalfSpinGC(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplyHalfSpinGC(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -int mltplyGeneralSpinGC(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplyGeneralSpinGC(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -int mltplySpinGCBoost(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1); +int mltplySpinGCBoost(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); double complex GC_child_general_int_spin ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex child_general_int_spin ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex GC_child_exchange_spin ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex child_exchange_spin ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); double complex GC_child_pairlift_spin ( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); diff --git a/src/include/mltplySpinCore.h b/src/include/mltplySpinCore.h index 1090ab01e..a46318e0b 100644 --- a/src/include/mltplySpinCore.h +++ b/src/include/mltplySpinCore.h @@ -66,8 +66,8 @@ double complex child_CisAisCisAis_spin_element long unsigned int org_sigma2, long unsigned int org_sigma4, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); @@ -79,8 +79,8 @@ double complex child_CisAisCitAiu_spin_element long unsigned int isA_up, long unsigned int isB_up, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -93,8 +93,8 @@ double complex child_CisAitCiuAiu_spin_element long unsigned int isA_up, long unsigned int isB_up, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -107,8 +107,8 @@ double complex child_CisAitCiuAiv_spin_element long unsigned int isA_up, long unsigned int isB_up, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off_2 ); @@ -123,8 +123,8 @@ double complex GC_child_CisAisCisAis_spin_element long unsigned int org_sigma2, long unsigned int org_sigma4, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ); @@ -136,8 +136,8 @@ double complex GC_child_CisAisCitAiu_spin_element long unsigned int isA_up, long unsigned int isB_up, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -150,8 +150,8 @@ double complex GC_child_CisAitCiuAiu_spin_element long unsigned int isA_up, long unsigned int isB_up, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -164,8 +164,8 @@ double complex GC_child_CisAitCiuAiv_spin_element long unsigned int isA_up, long unsigned int isB_up, double complex tmp_V, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off_2 ); diff --git a/src/mltply.c b/src/mltply.c index bd72ef1d7..bc5ebb9b8 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -53,7 +53,7 @@ * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int mltply(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1) { +int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1) { long unsigned int j=0; long unsigned int irght=0; long unsigned int ilft=0; @@ -101,25 +101,25 @@ int mltply(struct BindStruct *X, double complex *tmp_v0,double complex *tmp_v1) } X->Large.prdct += dam_pr; StopTimer(100); - if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, tmp_v0, tmp_v1); + if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, nstate, tmp_v0, tmp_v1); switch (X->Def.iCalcModel) { case HubbardGC: - mltplyHubbardGC(X, tmp_v0, tmp_v1); + mltplyHubbardGC(X, nstate, tmp_v0, tmp_v1); break; case KondoGC: case Hubbard: case Kondo: - mltplyHubbard(X, tmp_v0, tmp_v1); + mltplyHubbard(X, nstate, tmp_v0, tmp_v1); break; case Spin: - mltplySpin(X, tmp_v0, tmp_v1); + mltplySpin(X, nstate, tmp_v0, tmp_v1); break; case SpinGC: - mltplySpinGC(X, tmp_v0, tmp_v1); + mltplySpinGC(X, nstate, tmp_v0, tmp_v1); break; default: diff --git a/src/mltplyHubbard.c b/src/mltplyHubbard.c index 24b868625..3a4d49ef8 100644 --- a/src/mltplyHubbard.c +++ b/src/mltplyHubbard.c @@ -149,8 +149,8 @@ Other */ int mltplyHubbard( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ){ long unsigned int i; long unsigned int isite1, isite2, sigma1, sigma2; @@ -175,17 +175,17 @@ int mltplyHubbard( if (X->Def.EDGeneralTransfer[i][0] + 1 > X->Def.Nsite && X->Def.EDGeneralTransfer[i][2] + 1 > X->Def.Nsite) { StartTimer(311); - child_general_hopp_MPIdouble(i, X, tmp_v0, tmp_v1); + child_general_hopp_MPIdouble(i, X, nstate, tmp_v0, tmp_v1); StopTimer(311); } else if (X->Def.EDGeneralTransfer[i][2] + 1 > X->Def.Nsite) { StartTimer(312); - child_general_hopp_MPIsingle(i, X, tmp_v0, tmp_v1); + child_general_hopp_MPIsingle(i, X, nstate, tmp_v0, tmp_v1); StopTimer(312); } else if (X->Def.EDGeneralTransfer[i][0] + 1 > X->Def.Nsite) { StartTimer(312); - child_general_hopp_MPIsingle(i + 1, X, tmp_v0, tmp_v1); + child_general_hopp_MPIsingle(i + 1, X, nstate, tmp_v0, tmp_v1); StopTimer(312); } else { @@ -235,20 +235,20 @@ int mltplyHubbard( if (ibitsite1 == ibitsite2 && ibitsite3 == ibitsite4) { dam_pr += X_child_CisAisCjtAjt_Hubbard_MPI(isite1 - 1, sigma1, isite3 - 1, sigma3, - tmp_V, X, tmp_v0, tmp_v1); + tmp_V, X, nstate, tmp_v0, tmp_v1); } else if (ibitsite1 == ibitsite2 && ibitsite3 != ibitsite4) { dam_pr += X_child_CisAisCjtAku_Hubbard_MPI(isite1 - 1, sigma1, isite3 - 1, sigma3, isite4 - 1, sigma4, - tmp_V, X, tmp_v0, tmp_v1); + tmp_V, X, nstate, tmp_v0, tmp_v1); } else if (ibitsite1 != ibitsite2 && ibitsite3 == ibitsite4) { dam_pr += X_child_CisAjtCkuAku_Hubbard_MPI(isite1 - 1, sigma1, isite2 - 1, sigma2, - isite3 - 1, sigma3, tmp_V, X, tmp_v0, tmp_v1); + isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); } else if (ibitsite1 != ibitsite2 && ibitsite3 != ibitsite4) { dam_pr += X_child_CisAjtCkuAlv_Hubbard_MPI(isite1 - 1, sigma1, isite2 - 1, sigma2, - isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, tmp_v0, tmp_v1); + isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); } StopTimer(321); } @@ -292,7 +292,7 @@ int mltplyHubbard( dam_pr = X_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.PairHopping[i][0], sigma1, X->Def.PairHopping[i][1], sigma1, X->Def.PairHopping[i][0], sigma2, X->Def.PairHopping[i][1], sigma2, - X->Def.ParaPairHopping[i], X, tmp_v0, tmp_v1); + X->Def.ParaPairHopping[i], X, nstate, tmp_v0, tmp_v1); StopTimer(331); } else { @@ -322,7 +322,7 @@ int mltplyHubbard( dam_pr = X_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.ExchangeCoupling[i][0], sigma1, X->Def.ExchangeCoupling[i][1], sigma1, X->Def.ExchangeCoupling[i][1], sigma2, X->Def.ExchangeCoupling[i][0], sigma2, - X->Def.ParaExchangeCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(341); } else { @@ -346,8 +346,8 @@ int mltplyHubbard( */ int mltplyHubbardGC( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ){ long unsigned int i; long unsigned int isite1, isite2, sigma1, sigma2; @@ -372,17 +372,17 @@ int mltplyHubbardGC( if (X->Def.EDGeneralTransfer[i][0] + 1 > X->Def.Nsite && X->Def.EDGeneralTransfer[i][2] + 1 > X->Def.Nsite) { StartTimer(211); - GC_child_general_hopp_MPIdouble(i, X, tmp_v0, tmp_v1); + GC_child_general_hopp_MPIdouble(i, X, nstate, tmp_v0, tmp_v1); StopTimer(211); } else if (X->Def.EDGeneralTransfer[i][2] + 1 > X->Def.Nsite){ StartTimer(212); - GC_child_general_hopp_MPIsingle(i, X, tmp_v0, tmp_v1); + GC_child_general_hopp_MPIsingle(i, X, nstate, tmp_v0, tmp_v1); StopTimer(212); } else if (X->Def.EDGeneralTransfer[i][0] + 1 > X->Def.Nsite) { StartTimer(212); - GC_child_general_hopp_MPIsingle(i+1, X, tmp_v0, tmp_v1); + GC_child_general_hopp_MPIsingle(i+1, X, nstate, tmp_v0, tmp_v1); StopTimer(212); } else { @@ -429,16 +429,16 @@ int mltplyHubbardGC( ibitsite4 = X->Def.OrgTpow[2 * isite4 - 2 + sigma4]; if (ibitsite1 == ibitsite2 && ibitsite3 == ibitsite4) dam_pr = X_GC_child_CisAisCjtAjt_Hubbard_MPI( - isite1 - 1, sigma1, isite3 - 1, sigma3, tmp_V, X, tmp_v0, tmp_v1); + isite1 - 1, sigma1, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 == ibitsite2 && ibitsite3 != ibitsite4) dam_pr = X_GC_child_CisAisCjtAku_Hubbard_MPI( - isite1 - 1, sigma1, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, tmp_v0, tmp_v1); + isite1 - 1, sigma1, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 != ibitsite2 && ibitsite3 == ibitsite4) dam_pr = X_GC_child_CisAjtCkuAku_Hubbard_MPI( - isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, tmp_V, X, tmp_v0, tmp_v1); + isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 != ibitsite2 && ibitsite3 != ibitsite4) dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI( - isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, tmp_v0, tmp_v1); + isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); StopTimer(221); }//InterPE else{ @@ -480,7 +480,7 @@ int mltplyHubbardGC( dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.PairHopping[i][0], sigma1, X->Def.PairHopping[i][1], sigma1, X->Def.PairHopping[i][0], sigma2, X->Def.PairHopping[i][1], sigma2, - X->Def.ParaPairHopping[i], X, tmp_v0, tmp_v1); + X->Def.ParaPairHopping[i], X, nstate, tmp_v0, tmp_v1); StopTimer(231); } else { @@ -510,7 +510,7 @@ int mltplyHubbardGC( dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.ExchangeCoupling[i][0], sigma1, X->Def.ExchangeCoupling[i][1], sigma1, X->Def.ExchangeCoupling[i][1], sigma2, X->Def.ExchangeCoupling[i][0], sigma2, - X->Def.ParaExchangeCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(241); } else { @@ -538,8 +538,8 @@ int mltplyHubbardGC( @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex child_pairhopp( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { long int j; @@ -549,7 +549,7 @@ double complex child_pairhopp( #pragma omp parallel for default(none) reduction(+:dam_pr) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += child_pairhopp_element(j, tmp_v0, tmp_v1, X, &off); + dam_pr += child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); return dam_pr; }/*double complex child_pairhopp*/ /** @@ -559,8 +559,8 @@ double complex child_pairhopp( @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex child_exchange( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { long int j; @@ -570,7 +570,7 @@ double complex child_exchange( #pragma omp parallel for default(none) reduction(+:dam_pr) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += child_exchange_element(j, tmp_v0, tmp_v1, X, &off); + dam_pr += child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); return dam_pr; }/*double complex child_exchange*/ /** @@ -580,8 +580,8 @@ double complex child_exchange( @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex child_general_hopp( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X,//!<[inout] double complex trans//!<[in] Hopping integral ) { @@ -597,7 +597,7 @@ double complex child_general_hopp( #pragma omp parallel for default(none) reduction(+:dam_pr) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans) * trans; + dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans) * trans; return dam_pr; }/*double complex child_general_hopp*/ /** @@ -607,8 +607,8 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j) shared(tmp_v0, t @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex GC_child_general_hopp( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X,//!<[inout] double complex trans//!<[in] Hopping integral ) { @@ -626,13 +626,13 @@ double complex GC_child_general_hopp( #pragma omp parallel for default(none) reduction(+:dam_pr) \ private(j) firstprivate(i_max,X,isite1, trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAis(j, tmp_v0, tmp_v1, X, isite1, trans) * trans; + dam_pr += GC_CisAis(j, nstate, tmp_v0, tmp_v1, X, isite1, trans) * trans; }/*if (isite1 == isite2)*/ else { #pragma omp parallel for default(none) reduction(+:dam_pr) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j,tmp_off) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off) * trans; + dam_pr += GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off) * trans; } return dam_pr; }/*double complex GC_child_general_hopp*/ @@ -643,8 +643,8 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j,tmp_off) shared(t @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex child_general_int( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { double complex dam_pr, tmp_V; @@ -677,24 +677,24 @@ shared(tmp_v0, tmp_v1) if (isite1 == isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += child_CisAisCisAis_element(j, isite1, isite3, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + dam_pr += child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 == isite4)*/ else if (isite1 == isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += child_CisAisCjtAku_element( - j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 != isite4)*/ else if (isite1 != isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + dam_pr += child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 != isite2 && isite3 == isite4)*/ else if (isite1 != isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += child_CisAjtCkuAlv_element( - j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, tmp_v0, tmp_v1, X, &tmp_off_2); + j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off_2); }/*if (isite1 != isite2 && isite3 != isite4)*/ }/*End of parallel region*/ return dam_pr; @@ -706,8 +706,8 @@ shared(tmp_v0, tmp_v1) @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex GC_child_general_int( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { double complex dam_pr, tmp_V; @@ -738,24 +738,24 @@ shared(tmp_v0, tmp_v1) if (isite1 == isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + dam_pr += GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 == isite4)*/ else if (isite1 == isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + dam_pr += GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 != isite4)*/ else if (isite1 != isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += GC_child_CisAjtCkuAku_element( - j, isite1, isite2, isite3, Asum, Adiff, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + j, isite1, isite2, isite3, Asum, Adiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 != isite2 && isite3 == isite4)*/ else if (isite1 != isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += GC_child_CisAjtCkuAlv_element( - j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, tmp_v0, tmp_v1, X, &tmp_off_2); + j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off_2); }/*if (isite1 != isite2 && isite3 != isite4)*/ }/*End of parallel region*/ return dam_pr; @@ -767,8 +767,8 @@ shared(tmp_v0, tmp_v1) @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex GC_child_pairhopp( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { long int j; @@ -778,7 +778,7 @@ double complex GC_child_pairhopp( #pragma omp parallel for default(none) reduction(+:dam_pr) firstprivate(i_max,X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_pairhopp_element(j, tmp_v0, tmp_v1, X, &off); + dam_pr += GC_child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); return dam_pr; }/*double complex GC_child_pairhopp*/ @@ -789,8 +789,8 @@ double complex GC_child_pairhopp( @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex GC_child_exchange( - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X ) { long int j; @@ -801,7 +801,7 @@ double complex GC_child_exchange( #pragma omp parallel for default(none) \ reduction(+:dam_pr) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_exchange_element(j, tmp_v0, tmp_v1, X, &off); + dam_pr += GC_child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); return dam_pr; }/*double complex GC_child_exchange*/ /******************************************************************************/ diff --git a/src/mltplyHubbardCore.c b/src/mltplyHubbardCore.c index 5b4c97cdc..d4cc16fea 100644 --- a/src/mltplyHubbardCore.c +++ b/src/mltplyHubbardCore.c @@ -21,6 +21,8 @@ #include "mltplyCommon.h" #include "mltplyHubbardCore.h" +void zaxpy_(int *n, double complex *a, double complex *x, int *incx, double complex *y, int *incy); + /******************************************************************************/ //[s] GetInfo functions /******************************************************************************/ @@ -230,10 +232,10 @@ int child_exchange_GetInfo( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAis( +void GC_CisAis( long unsigned int j,//!<[in] Index of element of wavefunction - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of @f$(i \sigma)@f$ double complex tmp_trans//!<[in] Transfer integral @@ -241,26 +243,22 @@ double complex GC_CisAis( long unsigned int A_ibit_tmp; long unsigned int list_1_j; double complex dmv; - double complex dam_pr; + int one = 1; list_1_j = j - 1; A_ibit_tmp = (list_1_j & is1_spin) / is1_spin; - dmv = tmp_v1[j] * A_ibit_tmp; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { - tmp_v0[j] += dmv * tmp_trans; - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - dam_pr = dmv * conj(tmp_v1[j]); - return dam_pr; + dmv = tmp_trans * A_ibit_tmp; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*double complex GC_CisAis*/ /** @brief Operation of @f$t c_{i\sigma} c_{i\sigma}^\dagger@f$ (Grandcanonical) @return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_AisCis( +void GC_AisCis( long unsigned int j,//!<[in] Index of element of wavefunction - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of @f$(i \sigma)@f$ double complex tmp_trans//!<[in] Transfer integral @@ -268,16 +266,12 @@ double complex GC_AisCis( long unsigned int A_ibit_tmp; long unsigned int list_1_j; double complex dmv; - double complex dam_pr; + int one = 1; list_1_j = j - 1; A_ibit_tmp = (list_1_j & is1_spin) / is1_spin; - dmv = tmp_v1[j] * (1 - A_ibit_tmp); - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { - tmp_v0[j] += dmv * tmp_trans; - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - dam_pr = dmv * conj(tmp_v1[j]); - return dam_pr; + dmv = tmp_trans * (1 - A_ibit_tmp); + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*double complex GC_AisCis*/ /** @brief @f$c_{is}\\dagger c_{is}@f$ term in Hubbard (canonical) @@ -302,10 +296,10 @@ int X_CisAis( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex CisAjt( +void CisAjt( long unsigned int j,//!<[in] Index of wavefunction - double complex *tmp_v0,//!<[inout] @f$v_0 = H v_1@f$ - double complex *tmp_v1,//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f$v_0 = H v_1@f$ + double complex **tmp_v1,//!<[in] Vector to be producted struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of (is) long unsigned int is2_spin,//!<[in] Mask for occupation of (jt) @@ -316,7 +310,8 @@ double complex CisAjt( long unsigned int ibit_tmp_1, ibit_tmp_2; long unsigned int bit, iexchg, off; int sgn; - double complex dmv, dam_pr; + double complex dmv; + int one = 1; ibit_tmp_1 = (list_1[j] & is1_spin); ibit_tmp_2 = (list_1[j] & is2_spin); @@ -326,23 +321,13 @@ double complex CisAjt( iexchg = list_1[j] ^ sum_spin; if(GetOffComp(list_2_1, list_2_2, iexchg, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &off)==FALSE){ - return 0; - } -/* - if(X->Large.mode==M_CORR){ - fprintf(stdout, "DEBUG-1: myrank=%d, org=%d, bit=%d, iexchg=%d, list_1[%d]=%d\n", - myrank, list_1[j], bit, iexchg, off, list_1[off]); - } -*/ - dmv = sgn * tmp_v1[j]; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[off] += tmp_V * dmv; + return; } - dam_pr = dmv * conj(tmp_v1[off]); - return dam_pr; + dmv = sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off][0], &one); } else { - return 0; + return; } } /** @@ -351,10 +336,10 @@ double complex CisAjt( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAjt( +void GC_CisAjt( long unsigned int j,//!<[in] Index of wavefunction - double complex *tmp_v0,//!<[in] @f$v_0 = H v_1@f$ - double complex *tmp_v1,//!<[in]Vector to be producted + int nstate, double complex **tmp_v0,//!<[in] @f$v_0 = H v_1@f$ + double complex **tmp_v1,//!<[in]Vector to be producted struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of (is) long unsigned int is2_spin,//!<[in] Mask for occupation of (jt) @@ -367,7 +352,7 @@ double complex GC_CisAjt( long unsigned int ibit_tmp_1, ibit_tmp_2; long unsigned int bit; int sgn; - double complex dmv, dam_pr; + double complex dmv; list_1_j = j - 1; ibit_tmp_1 = (list_1_j & is1_spin); @@ -383,8 +368,6 @@ double complex GC_CisAjt( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[list_1_off + 1] += dmv * tmp_V; } - dam_pr = dmv * conj(tmp_v1[list_1_off + 1]); - return dam_pr; } else { return 0; @@ -469,10 +452,10 @@ int X_GC_CisAjt( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_exchange_element( +void child_exchange_element( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[inout] @f$v_0 = H v_1@f$ - double complex *tmp_v1,//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f$v_0 = H v_1@f$ + double complex **tmp_v1,//!<[in] Vector to be producted struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[off] Index of wavefunction of final state ) { @@ -489,7 +472,6 @@ double complex child_exchange_element( long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; - double complex dam_pr = 0; ibit1_up = list_1[j] & is1_up; ibit2_up = list_1[j] & is2_up; @@ -507,7 +489,6 @@ double complex child_exchange_element( if (mode == M_MLTPLY) { tmp_v0[off] += dmv; } - dam_pr += dmv * conj(tmp_v1[off]); } else if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1[j] - (is1_up + is2_down); @@ -520,9 +501,7 @@ double complex child_exchange_element( if (mode == M_MLTPLY) { tmp_v0[off] += dmv; } - dam_pr += dmv * conj(tmp_v1[off]); } - return dam_pr; }/*double complex child_exchange_element*/ /** @brief Compute pairhopp term of canonical Hubbard system @@ -530,10 +509,10 @@ double complex child_exchange_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_pairhopp_element( +void child_pairhopp_element( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -550,7 +529,6 @@ double complex child_pairhopp_element( long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; - double complex dam_pr = 0; ibit1_up = list_1[j] & is1_up; ibit2_up = list_1[j] & is2_up; @@ -569,9 +547,7 @@ double complex child_pairhopp_element( if (mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { tmp_v0[off] += dmv; } - dam_pr += dmv * conj(tmp_v1[off]); } - return dam_pr; }/*double complex child_pairhopp_element*/ /** @brief Compute exchange term of grandcanonical Hubbard system @@ -579,10 +555,10 @@ double complex child_pairhopp_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_exchange_element( +void GC_child_exchange_element( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -596,7 +572,6 @@ double complex GC_child_exchange_element( long unsigned int list_1_j, list_1_off; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; - double complex dam_pr = 0; list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -615,7 +590,6 @@ double complex GC_child_exchange_element( if (mode == M_MLTPLY) { tmp_v0[list_1_off + 1] += dmv; } - dam_pr += dmv * conj(tmp_v1[list_1_off + 1]); } else if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1_j - (is1_up + is2_down); @@ -627,9 +601,7 @@ double complex GC_child_exchange_element( if (mode == M_MLTPLY) { tmp_v0[list_1_off + 1] += dmv; } - dam_pr += dmv * conj(tmp_v1[list_1_off + 1]); } - return dam_pr; }/*double complex GC_child_exchange_element*/ /** @brief Compute pairhopp term of grandcanonical Hubbard system @@ -637,10 +609,10 @@ double complex GC_child_exchange_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_pairhopp_element( +void GC_child_pairhopp_element( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -655,7 +627,6 @@ double complex GC_child_pairhopp_element( double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; - double complex dam_pr = 0 + 0 * I; list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -675,9 +646,7 @@ double complex GC_child_pairhopp_element( if (mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { tmp_v0[list_1_off + 1] += dmv; } - dam_pr += dmv * conj(tmp_v1[list_1_off + 1]); } - return dam_pr; } /** @brief Compute @f$c_{is}^\dagger c_{is} c_{is}^\dagger c_{is}@f$ @@ -686,27 +655,24 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_CisAisCisAis_element( +void child_CisAisCisAis_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0 + 0 * I; tmp_sgn = X_CisAis(list_1[j], X, isite3); tmp_sgn *= X_CisAis(list_1[j], X, isite1); dmv = tmp_V * tmp_v1[j] * tmp_sgn; if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[j] += dmv; } - dam_pr = conj(tmp_v1[j]) * dmv; - return dam_pr; }/*double complex child_CisAisCisAis_element*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ @@ -715,7 +681,7 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_CisAisCjtAku_element( +void child_CisAisCjtAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 @@ -723,14 +689,13 @@ double complex child_CisAisCjtAku_element( long unsigned int Bsum,//!<[in] Bit mask for hopping long unsigned int Bdiff,//!<[in] Bit mask for Fermion sign double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0 + 0 * I; tmp_sgn = X_CisAjt(list_1[j], X, isite3, isite4, Bsum, Bdiff, tmp_off); if (tmp_sgn != 0) { tmp_sgn *= X_CisAis(list_1[*tmp_off], X, isite1); @@ -739,10 +704,8 @@ double complex child_CisAisCjtAku_element( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[*tmp_off] += dmv; } - dam_pr = conj(tmp_v1[*tmp_off]) * dmv; } } - return dam_pr; }/*double complex child_CisAisCjtAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ @@ -751,7 +714,7 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_CisAjtCkuAku_element( +void child_CisAjtCkuAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -759,15 +722,13 @@ double complex child_CisAjtCkuAku_element( long unsigned int Asum,//!<[in] Bit mask for hopping long unsigned int Adiff,//!<[in] Bit mask for Fermion sign double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; - double complex dam_pr; - dam_pr = 0; tmp_sgn = X_CisAis(list_1[j], X, isite3); if (tmp_sgn != 0) { tmp_sgn *= X_CisAjt(list_1[j], X, isite1, isite2, Asum, Adiff, tmp_off); @@ -776,10 +737,8 @@ double complex child_CisAjtCkuAku_element( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[*tmp_off] += dmv; } - dam_pr = conj(tmp_v1[*tmp_off]) * dmv; } } - return dam_pr; }/*double complex child_CisAjtCkuAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ @@ -788,7 +747,7 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_CisAjtCkuAlv_element( +void child_CisAjtCkuAlv_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -799,8 +758,8 @@ double complex child_CisAjtCkuAlv_element( long unsigned int Bsum,//!<[in] Bit mask for hopping long unsigned int Bdiff,//!<[in] Bit mask for Fermion sign double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off_2//!<[out] Index of final wavefunction ) { @@ -808,7 +767,6 @@ double complex child_CisAjtCkuAlv_element( long unsigned int tmp_off_1; double complex dmv; - double complex dam_pr = 0; tmp_sgn = X_GC_CisAjt(list_1[j], X, isite3, isite4, Bsum, Bdiff, &tmp_off_1); if (tmp_sgn != 0) { @@ -818,10 +776,8 @@ double complex child_CisAjtCkuAlv_element( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[*tmp_off_2] += dmv; } - dam_pr = conj(tmp_v1[*tmp_off_2]) * dmv; } } - return dam_pr; }/*double complex child_CisAjtCkuAlv_element*/ //[s] Grand Canonical /** @@ -831,19 +787,18 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAisCisAis_element( +void GC_child_CisAisCisAis_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0; tmp_sgn = X_CisAis(j - 1, X, isite3); tmp_sgn *= X_CisAis(j - 1, X, isite1); if (tmp_sgn != 0) { @@ -851,9 +806,7 @@ double complex GC_child_CisAisCisAis_element( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[j] += dmv; } - dam_pr = conj(tmp_v1[j]) * dmv; } - return dam_pr; }/*double complex GC_child_CisAisCisAis_element*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ @@ -862,7 +815,7 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAisCjtAku_element( +void GC_child_CisAisCjtAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 @@ -870,14 +823,13 @@ double complex GC_child_CisAisCjtAku_element( long unsigned int Bsum,//!<[in] Bit mask for hopping long unsigned int Bdiff,//!<[in] Bit mask for Fermion sign double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0 + 0 * I; tmp_sgn = X_GC_CisAjt((j - 1), X, isite3, isite4, Bsum, Bdiff, tmp_off); if (tmp_sgn != 0) { tmp_sgn *= X_CisAis(*tmp_off, X, isite1); @@ -886,10 +838,8 @@ double complex GC_child_CisAisCjtAku_element( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[*tmp_off + 1] += dmv; } - dam_pr = conj(tmp_v1[*tmp_off + 1]) * dmv; } } - return dam_pr; }/*double complex GC_child_CisAisCjtAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ @@ -898,7 +848,7 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAjtCkuAku_element( +void GC_child_CisAjtCkuAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -906,14 +856,13 @@ double complex GC_child_CisAjtCkuAku_element( long unsigned int Asum,//!<[in] Bit mask for hopping long unsigned int Adiff,//!<[in] Bit mask for Fermion sign double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0 + 0 * I; tmp_sgn = X_CisAis((j - 1), X, isite3); if (tmp_sgn != 0) { tmp_sgn *= X_GC_CisAjt((j - 1), X, isite1, isite2, Asum, Adiff, tmp_off); @@ -922,10 +871,8 @@ double complex GC_child_CisAjtCkuAku_element( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[*tmp_off + 1] += dmv; } - dam_pr = conj(tmp_v1[*tmp_off + 1]) * dmv; }/*if (tmp_sgn != 0)*/ }/*if (tmp_sgn != 0)*/ - return dam_pr; }/*double complex GC_child_CisAjtCkuAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ @@ -934,7 +881,7 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAjtCkuAlv_element( +void GC_child_CisAjtCkuAlv_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -945,15 +892,14 @@ double complex GC_child_CisAjtCkuAlv_element( long unsigned int Bsum,//!<[in] Bit mask for hopping long unsigned int Bdiff,//!<[in] Bit mask for Fermion sign double complex tmp_V,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off_2//!<[out] Index of final wavefunction ) { int tmp_sgn; long unsigned int tmp_off_1; double complex dmv; - double complex dam_pr = 0 + 0 * I; tmp_sgn = X_GC_CisAjt((j - 1), X, isite3, isite4, Bsum, Bdiff, &tmp_off_1); if (tmp_sgn != 0) { @@ -963,10 +909,8 @@ double complex GC_child_CisAjtCkuAlv_element( if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply tmp_v0[*tmp_off_2 + 1] += dmv; } - dam_pr = conj(tmp_v1[*tmp_off_2 + 1]) * dmv; } } - return dam_pr; }/*double complex GC_child_CisAjtCkuAlv_element*/ //[e] Grand Canonical /** @@ -977,10 +921,10 @@ term of grandcanonical Hubbard system @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -double complex GC_Cis( +void GC_Cis( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[in] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied long unsigned int is1_spin,//!<[in] Bit mask double complex tmp_V,//!<[in] Coupling constant long unsigned int *tmp_off//!<[in] Index of final wavefunction @@ -989,7 +933,7 @@ double complex GC_Cis( long unsigned int ibit_tmp_1; long unsigned int bit; int sgn, ipsgn; - double complex dmv, dam_pr; + double complex dmv; list_1_j = j - 1; @@ -1013,8 +957,6 @@ double complex GC_Cis( //if (X->Large.mode == M_MLTPLY) { // for multply tmp_v0[list_1_off + 1] += dmv * tmp_V; //} - dam_pr = dmv * conj(tmp_v1[list_1_off + 1]); - return dam_pr; } else { return 0; @@ -1028,10 +970,10 @@ term of grandcanonical Hubbard system @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -double complex GC_Ajt( +void GC_Ajt( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[in] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied long unsigned int is1_spin,//!<[in] Bit mask double complex tmp_V,//!<[in] Coupling constant long unsigned int *tmp_off//!<[in] Index of final wavefunction @@ -1040,7 +982,7 @@ double complex GC_Ajt( long unsigned int ibit_tmp_1; long unsigned int bit; int sgn, ipsgn; - double complex dmv, dam_pr; + double complex dmv; list_1_j = j - 1; @@ -1063,8 +1005,6 @@ double complex GC_Ajt( //if (X->Large.mode == M_MLTPLY) { // for multply tmp_v0[list_1_off + 1] += dmv * tmp_V; //} - dam_pr = dmv * conj(tmp_v1[list_1_off + 1]); - return dam_pr; } else { return 0; @@ -1132,7 +1072,7 @@ term of canonical Hubbard system @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -double complex X_Ajt( +void X_Ajt( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int is1_spin,//!<[in] Bit mask long unsigned int *tmp_off,//!<[out] Index of final wavefunction diff --git a/src/mltplyMPIBoost.c b/src/mltplyMPIBoost.c index 51c63ce67..b390ef875 100644 --- a/src/mltplyMPIBoost.c +++ b/src/mltplyMPIBoost.c @@ -35,8 +35,8 @@ void zgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, double complex * */ void child_general_int_spin_MPIBoost( struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/, + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/, double complex *tmp_v2 /**< [inout] bufffer*/, double complex *tmp_v3 /**< [inout] bufffer*/ ) @@ -245,7 +245,7 @@ void child_general_int_spin_MPIBoost( iomp=i_max/(int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+2); #pragma omp parallel default(none) private(arrayx,arrayz,arrayw,ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \ - shared(matJL,matI,iomp,i_max,myrank,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,tmp_v0,tmp_v1,tmp_v3) + shared(matJL,matI,iomp,i_max,myrank,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,nstate,tmp_v0,tmp_v1,tmp_v3) { arrayx = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1))); @@ -356,7 +356,7 @@ void child_general_int_spin_MPIBoost( if(pivot_flag==1){ iomp=i_max/(int)pow(2.0,X->Boost.ishift_nspin); #pragma omp parallel for default(none) private(ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \ - firstprivate(iomp) shared(i_max,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,X,tmp_v0,tmp_v1) + firstprivate(iomp) shared(i_max,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,X,nstate,tmp_v0,tmp_v1) for(ell5 = 0; ell5 < iomp; ell5++ ){ for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){ tmp_v0[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v1[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)]; @@ -373,7 +373,7 @@ void child_general_int_spin_MPIBoost( } else{ #pragma omp parallel for default(none) private(ell4) \ - shared(i_max,tmp_v0,tmp_v1,tmp_v3) + shared(i_max,nstate,tmp_v0,tmp_v1,tmp_v3) for(ell4 = 0; ell4 < i_max; ell4++ ){ tmp_v0[1 + ell4] = tmp_v1[1 + ell4]; tmp_v1[1 + ell4] = tmp_v3[1 + ell4]; @@ -392,7 +392,7 @@ void child_general_int_spin_MPIBoost( iomp=(int)pow(2.0,X->Boost.W0)/nproc; #pragma omp parallel for default(none) private(ell4,ell5,ell6) \ - firstprivate(iomp) shared(i_max,X,nproc,tmp_v0,tmp_v1,tmp_v2,tmp_v3) + firstprivate(iomp) shared(i_max,X,nproc,nstate,tmp_v0,tmp_v1,tmp_v2,tmp_v3) //for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.W0)/nproc; ell4++ ){ for(ell4 = 0; ell4 < iomp; ell4++ ){ for(ell5 = 0; ell5 < nproc; ell5++ ){ @@ -409,7 +409,7 @@ void child_general_int_spin_MPIBoost( /* dam_pr= X_child_general_int_spin_MPIBoost ( - matJ, X, tmp_v0, tmp_v1); + matJ, X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; */ diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index 4ef12ebaf..daf18bf00 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -33,15 +33,15 @@ void GC_child_general_hopp_MPIdouble ( unsigned long int itrans,//!<[in] Transfer ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr = 0; dam_pr = X_GC_child_general_hopp_MPIdouble( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], - X->Def.EDParaGeneralTransfer[itrans], X, tmp_v0, tmp_v1); + X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void GC_child_general_hopp_MPIdouble*/ @@ -58,8 +58,8 @@ double complex X_GC_child_general_hopp_MPIdouble( int org_ispin2,//!<[in] @f$\sigma_2@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ double complex tmp_trans,//!<[in] Transfer @f$t@f$ struct BindStruct *X,//!< [inout] - double complex *tmp_v0,//!< [out] Result v0 = H v1 - double complex *tmp_v1 //!< [in] v0 = H v1 + int nstate, double complex **tmp_v0,//!< [out] Result v0 = H v1 + double complex **tmp_v1 //!< [in] v0 = H v1 ) { #ifdef MPI int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; @@ -134,8 +134,8 @@ double complex X_child_CisAjt_MPIdouble( int org_ispin2,//!<[in] @f$\sigma_2@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ double complex tmp_trans,//!<[in] Transfer @f$t@f$ struct BindStruct *X,//!< [inout] - double complex *tmp_v0,//!< [out] Result v0 = H v1 - double complex *tmp_v1,//!< [in] v0 = H v1 + int nstate, double complex **tmp_v0,//!< [out] Result v0 = H v1 + double complex **tmp_v1,//!< [in] v0 = H v1 double complex *v1buf,//!<[in] long unsigned int *list_1_org,//!<[in] long unsigned int *list_1buf_org,//!<[in] @@ -211,15 +211,15 @@ double complex X_child_CisAjt_MPIdouble( void GC_child_general_hopp_MPIsingle( unsigned long int itrans,//!<[in] Transfer ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr=0; dam_pr=X_GC_child_general_hopp_MPIsingle( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], - X->Def.EDParaGeneralTransfer[itrans], X, tmp_v0, tmp_v1 ); + X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1 ); X->Large.prdct += dam_pr; #endif }/*void GC_child_general_hopp_MPIsingle*/ @@ -237,8 +237,8 @@ double complex X_GC_child_general_hopp_MPIsingle( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Hopping integral struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ) { #ifdef MPI int mask2, state1, state2, ierr, origin, bit2diff, Fsgn; @@ -334,15 +334,15 @@ double complex X_GC_child_general_hopp_MPIsingle( void child_general_hopp_MPIdouble( unsigned long int itrans,//!<[in] Transfer ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr; dam_pr =X_child_general_hopp_MPIdouble( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], - X->Def.EDParaGeneralTransfer[itrans], X, tmp_v0, tmp_v1); + X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void child_general_hopp_MPIdouble*/ @@ -359,8 +359,8 @@ double complex X_child_general_hopp_MPIdouble( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Hopping integral struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ) { #ifdef MPI int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; @@ -439,15 +439,15 @@ double complex X_child_general_hopp_MPIdouble( void child_general_hopp_MPIsingle( unsigned long int itrans,//!<[in] Transfer ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr; dam_pr =X_child_general_hopp_MPIsingle( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], - X->Def.EDParaGeneralTransfer[itrans], X, tmp_v0, tmp_v1); + X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void child_general_hopp_MPIsingle*/ @@ -464,8 +464,8 @@ double complex X_child_general_hopp_MPIsingle( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Hopping integral struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ) { #ifdef MPI int mask2, state2, ierr, origin, bit2diff, Fsgn; @@ -576,8 +576,8 @@ double complex X_child_CisAjt_MPIsingle( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Hopping integral struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1,//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1,//!<[in] v0 = H v1 double complex *v1buf,//!<[in] Buffer for sendrecv of wavefunction long unsigned int *list_1_org,//!<[in] Similler to ::list_1 long unsigned int *list_1buf_org,//!<[in] Similler to ::list_1buf diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 7ea072539..129849101 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -267,8 +267,8 @@ double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI( int org_ispin3,//!<[in] Spin 3 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0.0; @@ -284,7 +284,7 @@ double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI( return 0.0; } -#pragma omp parallel reduction(+:dam_pr) default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_v0, tmp_v1) \ +#pragma omp parallel reduction(+:dam_pr) default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, nstate, tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off, tmp_ispin1) { @@ -349,8 +349,8 @@ double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI( int org_ispin3,//!<[in] Spin 3 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0.0; @@ -408,12 +408,12 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared( { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off); + dam_pr += GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off); if (X->Large.mode != M_CORR) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off); + dam_pr += GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off); }/*if (X->Large.mode != M_CORR)*/ }/*End of paralle region*/ return dam_pr; @@ -429,7 +429,7 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared( if (ierr != 0) exitMPI(-1); #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, org_rankbit, Adiff) \ -shared(v1buf, tmp_v1, tmp_v0, myrank, origin, isite3, org_isite3, isite1, isite2, org_isite2, org_isite1) \ +shared(v1buf, tmp_v1, nstate, tmp_v0, myrank, origin, isite3, org_isite3, isite1, isite2, org_isite2, org_isite1) \ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4) { if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) { @@ -520,14 +520,14 @@ double complex X_GC_child_CisAisCjtAku_Hubbard_MPI( int org_ispin4,//!<[in] Spin 4 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0; dam_pr = X_GC_child_CisAjtCkuAku_Hubbard_MPI( org_isite4, org_ispin4, org_isite3, org_ispin3, - org_isite1, org_ispin1, conj(tmp_V), X, tmp_v0, tmp_v1); + org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1); return conj(dam_pr); #else return 0.0; @@ -549,8 +549,8 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI( int org_ispin4,//!<[in] Spin 4 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0; @@ -603,9 +603,9 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI( if (myrank == origin) { if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv //calc nis - dam_pr = X_GC_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1); + dam_pr = X_GC_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); //calc -nisniv - dam_pr -= X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1); + dam_pr -= X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (isite1 == isite4 && isite2 == isite3)*/ else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE //calc CisAku @@ -615,28 +615,28 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI( #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j - 1, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off); + dam_pr += GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off); //calc -CisAku njv dam_pr -= X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4, - org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1); + org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) { //for hermite #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j - 1, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off); + dam_pr += GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off); //calc -njvCkuAis dam_pr -= X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, - org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1); + org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (X->Large.mode != M_CORR)*/ }/*if (isite2 == isite3)*/ else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE dam_pr = -X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, - org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1); + org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) { //for hermite dam_pr += -X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2, - org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1); + org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (X->Large.mode != M_CORR)*/ }/*if (isite2 != isite3)*/ return dam_pr; @@ -723,8 +723,8 @@ double complex X_GC_child_CisAis_Hubbard_MPI( int org_ispin1,//!<[in] Spin 1 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0.0; @@ -799,18 +799,18 @@ double complex X_GC_child_CisAjt_Hubbard_MPI( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0.0; // MPI_Status statusMPI; if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) { - dam_pr = X_GC_child_general_hopp_MPIdouble(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, tmp_v0, tmp_v1); + dam_pr = X_GC_child_general_hopp_MPIdouble(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1); } else if (org_isite1 + 1 > X->Def.Nsite || org_isite2 + 1 > X->Def.Nsite) { - dam_pr = X_GC_child_general_hopp_MPIsingle(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, tmp_v0, tmp_v1); + dam_pr = X_GC_child_general_hopp_MPIsingle(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1); } else { //error message will be added. @@ -833,8 +833,8 @@ double complex X_child_CisAisCjtAjt_Hubbard_MPI( int org_ispin3,//!<[in] Spin 3 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0.0; @@ -915,8 +915,8 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI( int org_ispin4,//!<[in] Spin 4 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0; @@ -966,9 +966,9 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI( if (myrank == origin) { if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv //calc nis - dam_pr = X_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1); + dam_pr = X_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); //calc -nisniv - dam_pr -= X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1); + dam_pr -= X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); }/* if (isite1 == isite4 && isite2 == isite3)*/ else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE if (isite4 > isite1) Adiff = isite4 - isite1 * 2; @@ -976,30 +976,30 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI( //calc CisAku #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ -firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0, list_1) +firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, nstate, tmp_v0, list_1) for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V); + dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V); //calc -CisAku njv dam_pr -= X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4, - org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1); + org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) { //for hermite #pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V); + dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V); //calc -njvCkuAis - dam_pr -= X_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, org_isite1, org_ispin1, tmp_V, X, tmp_v0, tmp_v1); + dam_pr -= X_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (X->Large.mode != M_CORR)*/ }/*if (isite2 == isite3)*/ else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE - dam_pr = -X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, org_isite2, org_ispin2, tmp_V, X, tmp_v0, tmp_v1); + dam_pr = -X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) //for hermite: CisAkuCjtAis=-CisAisCjtAku dam_pr = -X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2, - org_isite3, org_ispin3, tmp_V, X, tmp_v0, tmp_v1); + org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (isite2 != isite3)*/ return dam_pr; }//myrank =origin @@ -1038,7 +1038,7 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) }/*if (iFlgHermite == TRUE)*/ dam_pr = 0; #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff) \ -firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0, list_2_1, list_2_2, list_1buf) +firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, nstate, tmp_v0, list_2_1, list_2_2, list_1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for @@ -1073,7 +1073,7 @@ firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0, list_2_1, lis #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, ioff) \ firstprivate(myrank, idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, \ org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4) \ -shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2) +shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for @@ -1127,8 +1127,8 @@ double complex X_child_CisAjtCkuAku_Hubbard_MPI( int org_ispin3,//!<[in] Spin 3 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0.0; @@ -1183,12 +1183,12 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V); + dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V); if (X->Large.mode != M_CORR) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V); + dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V); }/*if (X->Large.mode != M_CORR)*/ }/*End of parallel region*/ return dam_pr; @@ -1210,7 +1210,7 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp #pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \ -shared(v1buf, tmp_v1, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2) +shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2) { if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) { @@ -1316,15 +1316,15 @@ double complex X_child_CisAisCjtAku_Hubbard_MPI( int org_ispin4,//!<[in] Spin 4 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0; dam_pr = X_child_CisAjtCkuAku_Hubbard_MPI( org_isite4, org_ispin4, org_isite3, org_ispin3, - org_isite1, org_ispin1, conj(tmp_V), X, tmp_v0, tmp_v1); + org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1); return conj(dam_pr); #else @@ -1337,8 +1337,8 @@ double complex X_child_CisAis_Hubbard_MPI( int org_ispin1,//!<[in] Spin 1 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[inout] Initial wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI double complex dam_pr = 0.0; @@ -1413,10 +1413,10 @@ double complex X_GC_Cis_MPI( int org_isite,//!<[in] Site i int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant//!<[in] - double complex *tmp_v0,//!<[out] Result v0 += H v1*/, - double complex *tmp_v1,//!<[in] v0 += H v1*/, + int nstate, double complex **tmp_v0,//!<[out] Result v0 += H v1*/, + double complex **tmp_v1,//!<[in] v0 += H v1*/, unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max - double complex *tmp_v1buf,//!<[in] buffer for wavefunction + double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { #ifdef MPI @@ -1481,10 +1481,10 @@ double complex X_GC_Ajt_MPI( int org_isite,//!<[in] Site j int org_ispin,//!<[in] Spin t double complex tmp_trans,//!<[in] Coupling constant//!<[in] - double complex *tmp_v0,//!<[out] Result v0 += H v1*/, - double complex *tmp_v1,//!<[in] v0 += H v1*/, + int nstate, double complex **tmp_v0,//!<[out] Result v0 += H v1*/, + double complex **tmp_v1,//!<[in] v0 += H v1*/, unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max - double complex *tmp_v1buf,//!<[in] buffer for wavefunction + double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { #ifdef MPI @@ -1543,9 +1543,9 @@ double complex X_Cis_MPI( int org_isite,//!<[in] Site i unsigned int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[inout] Initial wavefunction - double complex *tmp_v1buf,//!<[in] buffer for wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[inout] Initial wavefunction + double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max long unsigned int *Tpow,//!<[in] Similar to DefineList::Tpow long unsigned int *list_1_org,//!<[in] Similar to ::list_1 @@ -1601,7 +1601,7 @@ double complex X_Cis_MPI( dam_pr = 0.0; #pragma omp parallel for default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ -shared(tmp_v1buf, tmp_v1, tmp_v0, list_1buf_org) +shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) {//idim_max_buf -> original GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); @@ -1622,9 +1622,9 @@ double complex X_Ajt_MPI( int org_isite,//!<[in] Site j unsigned int org_ispin,//!<[in] Spin t double complex tmp_trans,//!<[in] Coupling constant - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[inout] Initial wavefunction - double complex *tmp_v1buf,//!<[in] buffer for wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[inout] Initial wavefunction + double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max long unsigned int *Tpow,//!<[in] Similar to DefineList::Tpow long unsigned int *list_1_org,//!<[in] Similar to ::list_1 @@ -1679,7 +1679,7 @@ double complex X_Ajt_MPI( dam_pr = 0.0; #pragma omp parallel for default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ -shared(tmp_v1buf, tmp_v1, tmp_v0, list_1buf_org) +shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index 2125659c0..f55bbaca8 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -35,8 +35,8 @@ void child_general_int_spin_MPIdouble( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr = 0; @@ -44,7 +44,7 @@ void child_general_int_spin_MPIdouble( (int)X->Def.InterAll_OffDiagonal[i_int][0], (int)X->Def.InterAll_OffDiagonal[i_int][1], (int)X->Def.InterAll_OffDiagonal[i_int][3], (int)X->Def.InterAll_OffDiagonal[i_int][4], (int)X->Def.InterAll_OffDiagonal[i_int][5], (int)X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); /** Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ to LargeList::prdct @@ -67,8 +67,8 @@ double complex X_child_general_int_spin_MPIdouble( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask1, mask2, state1, state2, ierr, origin; @@ -141,8 +141,8 @@ double complex X_child_general_int_spin_TotalS_MPIdouble( int org_isite1,//!<[in] site 1 int org_isite3,//!<[in] site 3 struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ){ #ifdef MPI int mask1, mask2, num1_up, num2_up, ierr, origin; @@ -196,8 +196,8 @@ double complex X_child_general_int_spin_TotalS_MPIdouble( void child_general_int_spin_MPIsingle( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr = 0; @@ -206,7 +206,7 @@ void child_general_int_spin_MPIsingle( (int)X->Def.InterAll_OffDiagonal[i_int][0], (int)X->Def.InterAll_OffDiagonal[i_int][1], (int)X->Def.InterAll_OffDiagonal[i_int][3], (int)X->Def.InterAll_OffDiagonal[i_int][4], (int)X->Def.InterAll_OffDiagonal[i_int][5], (int)X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); /** Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ to LargeList::prdct @@ -228,8 +228,8 @@ double complex X_child_general_int_spin_MPIsingle( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask2, state2, ierr, origin; @@ -342,19 +342,19 @@ shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) void GC_child_general_int_spin_MPIdouble( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ if (X->Def.InterAll_OffDiagonal[i_int][1] == X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] != X->Def.InterAll_OffDiagonal[i_int][7]) { - GC_child_CisAisCjuAjv_spin_MPIdouble(i_int, X, tmp_v0, tmp_v1); + GC_child_CisAisCjuAjv_spin_MPIdouble(i_int, X, nstate, tmp_v0, tmp_v1); } else if (X->Def.InterAll_OffDiagonal[i_int][1] != X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] == X->Def.InterAll_OffDiagonal[i_int][7]) { - GC_child_CisAitCjuAju_spin_MPIdouble(i_int, X, tmp_v0, tmp_v1); + GC_child_CisAitCjuAju_spin_MPIdouble(i_int, X, nstate, tmp_v0, tmp_v1); } else { - GC_child_CisAitCiuAiv_spin_MPIdouble(i_int, X, tmp_v0, tmp_v1); + GC_child_CisAitCiuAiv_spin_MPIdouble(i_int, X, nstate, tmp_v0, tmp_v1); } }/*void GC_child_general_int_spin_MPIdouble*/ /** @@ -365,19 +365,19 @@ void GC_child_general_int_spin_MPIdouble( void GC_child_general_int_spin_MPIsingle( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ if (X->Def.InterAll_OffDiagonal[i_int][1] == X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] != X->Def.InterAll_OffDiagonal[i_int][7]) { - GC_child_CisAisCjuAjv_spin_MPIsingle(i_int, X, tmp_v0, tmp_v1); + GC_child_CisAisCjuAjv_spin_MPIsingle(i_int, X, nstate, tmp_v0, tmp_v1); } else if (X->Def.InterAll_OffDiagonal[i_int][1] != X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] == X->Def.InterAll_OffDiagonal[i_int][7]) { - GC_child_CisAitCjuAju_spin_MPIsingle(i_int, X, tmp_v0, tmp_v1); + GC_child_CisAitCjuAju_spin_MPIsingle(i_int, X, nstate, tmp_v0, tmp_v1); } else { - GC_child_CisAitCiuAiv_spin_MPIsingle(i_int, X, tmp_v0, tmp_v1); + GC_child_CisAitCiuAiv_spin_MPIsingle(i_int, X, nstate, tmp_v0, tmp_v1); } }/*void GC_child_general_int_spin_MPIsingle*/ /** @@ -388,8 +388,8 @@ void GC_child_general_int_spin_MPIsingle( void GC_child_general_int_GeneralSpin_MPIdouble( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr; @@ -400,21 +400,21 @@ void GC_child_general_int_GeneralSpin_MPIdouble( dam_pr = X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], - X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else if (X->Def.InterAll_OffDiagonal[i_int][1] != X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] == X->Def.InterAll_OffDiagonal[i_int][7]) { dam_pr = X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], - X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else { dam_pr = X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } X->Large.prdct += dam_pr; #endif @@ -427,8 +427,8 @@ void GC_child_general_int_GeneralSpin_MPIdouble( void GC_child_general_int_GeneralSpin_MPIsingle( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr; @@ -438,21 +438,21 @@ void GC_child_general_int_GeneralSpin_MPIsingle( dam_pr = X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], - X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else if (X->Def.InterAll_OffDiagonal[i_int][1] != X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] == X->Def.InterAll_OffDiagonal[i_int][7]) { dam_pr = X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], - X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else { dam_pr = X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } X->Large.prdct += dam_pr; @@ -466,15 +466,15 @@ void GC_child_general_int_GeneralSpin_MPIsingle( void child_general_int_GeneralSpin_MPIdouble( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ double complex dam_pr; dam_pr = X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; }/*void GC_child_general_int_spin_MPIdouble*/ @@ -486,8 +486,8 @@ void child_general_int_GeneralSpin_MPIdouble( void child_general_int_GeneralSpin_MPIsingle( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ double complex dam_pr; @@ -495,7 +495,7 @@ void child_general_int_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; }/*void GC_child_general_int_spin_MPIsingle*/ diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 053e8c9cd..d5258598d 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -79,8 +79,8 @@ General two body term: void GC_child_CisAitCiuAiv_spin_MPIdouble( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/) + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/) { #ifdef MPI double complex dam_pr; @@ -88,7 +88,7 @@ void GC_child_CisAitCiuAiv_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int],X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int],X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ @@ -109,8 +109,8 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble( int org_ispin4,//!<[in] spin v double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask1, mask2, state1, state2, ierr, origin; @@ -125,7 +125,7 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble( } else { if (org_ispin1 == org_ispin4 && org_ispin2 == org_ispin3) { //CisAitCitAis=CisAis - dam_pr = X_GC_child_CisAis_spin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, tmp_v0, tmp_v1); + dam_pr = X_GC_child_CisAis_spin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, nstate, tmp_v0, tmp_v1); return (dam_pr); } else { //CisAitCisAit=0 @@ -191,15 +191,15 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble( void GC_child_CisAisCjuAjv_spin_MPIdouble( unsigned long int i_int /**< [in] Interaction ID*/, struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ #ifdef MPI double complex dam_pr; dam_pr = X_GC_child_CisAisCjuAjv_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], - X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ @@ -217,8 +217,8 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask1, mask2, state2, ierr; @@ -287,8 +287,8 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble( void GC_child_CisAitCjuAju_spin_MPIdouble( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ) { #ifdef MPI @@ -296,7 +296,7 @@ void GC_child_CisAitCjuAju_spin_MPIdouble( dam_pr = X_GC_child_CisAitCjuAju_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], - X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ @@ -314,8 +314,8 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIdouble( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask1, mask2, state1, ierr, num1; @@ -401,8 +401,8 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIdouble( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ){ #ifdef MPI long unsigned int mask1, mask2, num1,num2; @@ -452,8 +452,8 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIsingle( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI long unsigned int mask1, mask2, num1, num2; @@ -500,8 +500,8 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIsingle( void GC_child_CisAitCiuAiv_spin_MPIsingle( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr; @@ -509,7 +509,7 @@ void GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], - X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ @@ -528,8 +528,8 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask2, state2, ierr, origin; @@ -608,15 +608,15 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle( void GC_child_CisAisCjuAjv_spin_MPIsingle( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr; dam_pr =X_GC_child_CisAisCjuAjv_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], - X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAisCjuAjv_spin_MPIsingle*/ @@ -634,8 +634,8 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle( int org_ispin4,//!<[in] Spin 2 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask2, state2, ierr, origin; @@ -713,15 +713,15 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle( void GC_child_CisAitCjuAju_spin_MPIsingle( unsigned long int i_int,//!<[in] Interaction ID struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[out] Result v0 = H v1 - double complex *tmp_v1//!<[in] v0 = H v1 + int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 + double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI double complex dam_pr; dam_pr =X_GC_child_CisAitCjuAju_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], - X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, tmp_v0, tmp_v1); + X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAisCjuAjv_spin_MPIsingle*/ @@ -739,8 +739,8 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIsingle( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI int mask2, state2; @@ -831,8 +831,8 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI unsigned long int off, j; @@ -906,8 +906,8 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Copupling constatnt struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ - double complex *tmp_v1//!<[in] Vector to be producted + int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ + double complex **tmp_v1//!<[in] Vector to be producted ) { #ifdef MPI unsigned long int j, off; @@ -983,8 +983,8 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ) { #ifdef MPI unsigned long int tmp_off, off, j; @@ -996,7 +996,7 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4 && org_ispin2 == org_ispin3) { //cisaitcitais=cisais && cisaitcitais =cisais - dam_pr = X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, tmp_v0, tmp_v1); + dam_pr = X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, nstate, tmp_v0, tmp_v1); return (dam_pr); } //cisaitcisait @@ -1072,8 +1072,8 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ) { #ifdef MPI unsigned long int j, num1; @@ -1127,8 +1127,8 @@ double complex X_GC_child_CisAit_GeneralSpin_MPIdouble( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ) { #ifdef MPI unsigned long int off, j; @@ -1190,8 +1190,8 @@ double complex X_GC_child_CisAis_GeneralSpin_MPIdouble( int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ) { #ifdef MPI unsigned long int j, num1; @@ -1240,8 +1240,8 @@ double complex X_GC_child_AisCis_GeneralSpin_MPIdouble( int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ) { #ifdef MPI unsigned long int j, num1; @@ -1291,9 +1291,9 @@ double complex X_child_CisAit_GeneralSpin_MPIdouble( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1,//!<[in] Input wavefunction - double complex *tmp_v1buf,//!<[inout] buffer for wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1,//!<[in] Input wavefunction + double complex **tmp_v1buf,//!<[inout] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max long unsigned int *list_1_org,//!<[in] Similar to ::list_1 long unsigned int *list_1buf_org,//!<[in] Similar to ::list_1buf @@ -1367,8 +1367,8 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ){ #ifdef MPI unsigned long int off, j, num1; @@ -1446,8 +1446,8 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ){ #ifdef MPI unsigned long int num1, j, off; @@ -1520,8 +1520,8 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ){ #ifdef MPI unsigned long int off, j; @@ -1602,8 +1602,8 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ){ #ifdef MPI unsigned long int j, num1; @@ -1658,8 +1658,8 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ){ #ifdef MPI unsigned long int tmp_off, off, j, idim_max_buf; @@ -1752,8 +1752,8 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ) { #ifdef MPI unsigned long int j, num1; @@ -1821,8 +1821,8 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle( int org_ispin3,//!<[in] Spin 3 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ) { #ifdef MPI @@ -1879,8 +1879,8 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_ispin4,//!<[in] Spin 4 double complex tmp_J,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Resulting wavefunction - double complex *tmp_v1//!<[in] Input wavefunction + int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + double complex **tmp_v1//!<[in] Input wavefunction ){ #ifdef MPI unsigned long int tmp_off, off, j, idim_max_buf; @@ -1973,8 +1973,8 @@ double complex X_GC_child_CisAit_spin_MPIdouble( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/) + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/) { #ifdef MPI int mask1, state1, ierr, origin; @@ -2046,9 +2046,9 @@ double complex X_child_CisAit_spin_MPIdouble( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1, /**< [in] v0 = H v1*/ - double complex *tmp_v1buf,//!<[in] buffer for wavefunction + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1, /**< [in] v0 = H v1*/ + double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max long unsigned int *Tpow,//!<[in] Similar to DefineList::Tpow long unsigned int *list_1_org,//!<[in] Similar to ::list_1 @@ -2124,8 +2124,8 @@ double complex X_GC_child_CisAis_spin_MPIdouble( int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ #ifdef MPI long unsigned int j; @@ -2136,7 +2136,7 @@ double complex X_GC_child_CisAis_spin_MPIdouble( ibit1 = (((unsigned long int)myrank& mask1)/mask1)^(1-org_ispin1); dam_pr = 0.0; -#pragma omp parallel reduction(+:dam_pr)default(none) shared(tmp_v1, tmp_v0, ibit1) \ +#pragma omp parallel reduction(+:dam_pr)default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ firstprivate(X, tmp_trans) private(j) { if (ibit1 != 0) { @@ -2171,8 +2171,8 @@ double complex X_GC_child_AisCis_spin_MPIdouble( int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X /**< [inout]*/, - double complex *tmp_v0 /**< [out] Result v0 = H v1*/, - double complex *tmp_v1 /**< [in] v0 = H v1*/ + int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ #ifdef MPI long unsigned int j; @@ -2183,7 +2183,7 @@ double complex X_GC_child_AisCis_spin_MPIdouble( ibit1 = (((unsigned long int)myrank& mask1) / mask1) ^ (1 - org_ispin1); dam_pr = 0.0; -#pragma omp parallel reduction(+:dam_pr)default(none) shared(tmp_v1, tmp_v0, ibit1) \ +#pragma omp parallel reduction(+:dam_pr)default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ firstprivate(X, tmp_trans) private(j) { if (ibit1 == 0) { diff --git a/src/mltplySpin.c b/src/mltplySpin.c index ff5a46f79..c65f08621 100644 --- a/src/mltplySpin.c +++ b/src/mltplySpin.c @@ -174,14 +174,14 @@ General on-site term */ int mltplySpin( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ) { int iret=0; if (X->Def.iFlgGeneralSpin == FALSE) - iret = mltplyHalfSpin(X, tmp_v0, tmp_v1); + iret = mltplyHalfSpin(X, nstate, tmp_v0, tmp_v1); else - iret = mltplyGeneralSpin(X, tmp_v0, tmp_v1); + iret = mltplyGeneralSpin(X, nstate, tmp_v0, tmp_v1); return iret; }/*int mltplySpin*/ /** @@ -191,8 +191,8 @@ int mltplySpin( */ int mltplyHalfSpin( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ) { long unsigned int i; long unsigned int isite1, isite2, sigma1, sigma2; @@ -215,17 +215,17 @@ int mltplyHalfSpin( if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite && X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(411); - child_general_int_spin_MPIdouble(i, X, tmp_v0, tmp_v1); + child_general_int_spin_MPIdouble(i, X, nstate, tmp_v0, tmp_v1); StopTimer(411); } else if (X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(412); - child_general_int_spin_MPIsingle(i, X, tmp_v0, tmp_v1); + child_general_int_spin_MPIsingle(i, X, nstate, tmp_v0, tmp_v1); StopTimer(412); } else if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite) { StartTimer(413); - child_general_int_spin_MPIsingle(i + 1, X, tmp_v0, tmp_v1); + child_general_int_spin_MPIsingle(i + 1, X, nstate, tmp_v0, tmp_v1); StopTimer(413); } else { @@ -259,7 +259,7 @@ int mltplyHalfSpin( dam_pr = X_child_general_int_spin_MPIdouble( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, - X->Def.ParaExchangeCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(421); } else if (X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { @@ -267,7 +267,7 @@ int mltplyHalfSpin( dam_pr = X_child_general_int_spin_MPIsingle( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, - X->Def.ParaExchangeCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(422); } else if (X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite) { @@ -275,7 +275,7 @@ int mltplyHalfSpin( dam_pr = X_child_general_int_spin_MPIsingle( X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ExchangeCoupling[i][0], sigma1, sigma2, - conj(X->Def.ParaExchangeCoupling[i]), X, tmp_v0, tmp_v1); + conj(X->Def.ParaExchangeCoupling[i]), X, nstate, tmp_v0, tmp_v1); StopTimer(423); } else { @@ -298,8 +298,8 @@ int mltplyHalfSpin( */ int mltplyGeneralSpin( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ){ long unsigned int j; long unsigned int i; @@ -333,17 +333,17 @@ int mltplyGeneralSpin( if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite && X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(411); - child_general_int_GeneralSpin_MPIdouble(i, X, tmp_v0, tmp_v1); + child_general_int_GeneralSpin_MPIdouble(i, X, nstate, tmp_v0, tmp_v1); StopTimer(411); } else if (X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(412); - child_general_int_GeneralSpin_MPIsingle(i, X, tmp_v0, tmp_v1); + child_general_int_GeneralSpin_MPIsingle(i, X, nstate, tmp_v0, tmp_v1); StopTimer(412); } else if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite) { StartTimer(412); - child_general_int_GeneralSpin_MPIsingle(i + 1, X, tmp_v0, tmp_v1); + child_general_int_GeneralSpin_MPIsingle(i + 1, X, nstate, tmp_v0, tmp_v1); StopTimer(412); } else { @@ -392,19 +392,19 @@ int mltplyGeneralSpin( */ int mltplySpinGC( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ) { int iret=0; if (X->Def.iFlgGeneralSpin == FALSE) - iret = mltplyHalfSpinGC(X, tmp_v0, tmp_v1); + iret = mltplyHalfSpinGC(X, nstate, tmp_v0, tmp_v1); else - iret = mltplyGeneralSpinGC(X, tmp_v0, tmp_v1); + iret = mltplyGeneralSpinGC(X, nstate, tmp_v0, tmp_v1); if(iret != 0) return iret; if(X->Boost.flgBoost == 1) - iret = mltplySpinGCBoost(X, tmp_v0, tmp_v1); + iret = mltplySpinGCBoost(X, nstate, tmp_v0, tmp_v1); return iret; }/*int mltplySpinGC*/ @@ -415,8 +415,8 @@ int mltplySpinGC( */ int mltplyHalfSpinGC( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ) { long unsigned int j; long unsigned int i; @@ -451,7 +451,7 @@ int mltplyHalfSpinGC( dam_pr += X_GC_child_CisAit_spin_MPIdouble( X->Def.EDGeneralTransfer[i][0], X->Def.EDGeneralTransfer[i][1], X->Def.EDGeneralTransfer[i][3], -X->Def.EDParaGeneralTransfer[i], - X, tmp_v0, tmp_v1); + X, nstate, tmp_v0, tmp_v1); StopTimer(511); } }/*if(X->Def.EDGeneralTransfer[i][0]+1 > X->Def.Nsite)*/ @@ -501,17 +501,17 @@ shared(tmp_v0, tmp_v1) if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite && X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(521); - GC_child_general_int_spin_MPIdouble(i, X, tmp_v0, tmp_v1); + GC_child_general_int_spin_MPIdouble(i, X, nstate, tmp_v0, tmp_v1); StopTimer(521); } else if (X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(522); - GC_child_general_int_spin_MPIsingle(i, X, tmp_v0, tmp_v1); + GC_child_general_int_spin_MPIsingle(i, X, nstate, tmp_v0, tmp_v1); StopTimer(522); } else if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite) { StartTimer(522); - GC_child_general_int_spin_MPIsingle(i + 1, X, tmp_v0, tmp_v1); + GC_child_general_int_spin_MPIsingle(i + 1, X, nstate, tmp_v0, tmp_v1); StopTimer(522); } else { @@ -545,7 +545,7 @@ shared(tmp_v0, tmp_v1) dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIdouble( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, - X->Def.ParaExchangeCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(531); } else if (X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { @@ -553,7 +553,7 @@ shared(tmp_v0, tmp_v1) dam_pr=X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, - X->Def.ParaExchangeCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(532); } else if (X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite) { @@ -561,7 +561,7 @@ shared(tmp_v0, tmp_v1) dam_pr=X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ExchangeCoupling[i][0], sigma1, sigma2, - conj(X->Def.ParaExchangeCoupling[i]), X, tmp_v0, tmp_v1); + conj(X->Def.ParaExchangeCoupling[i]), X, nstate, tmp_v0, tmp_v1); StopTimer(532); } else { @@ -585,7 +585,7 @@ shared(tmp_v0, tmp_v1) dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIdouble( X->Def.PairLiftCoupling[i][0], sigma1, sigma2, X->Def.PairLiftCoupling[i][1], sigma1, sigma2, - X->Def.ParaPairLiftCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaPairLiftCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(541); } else if (X->Def.PairLiftCoupling[i][1] + 1 > X->Def.Nsite) { @@ -593,7 +593,7 @@ shared(tmp_v0, tmp_v1) dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.PairLiftCoupling[i][0], sigma1, sigma2, X->Def.PairLiftCoupling[i][1], sigma1, sigma2, - X->Def.ParaPairLiftCoupling[i], X, tmp_v0, tmp_v1); + X->Def.ParaPairLiftCoupling[i], X, nstate, tmp_v0, tmp_v1); StopTimer(542); } else if (X->Def.PairLiftCoupling[i][0] + 1 > X->Def.Nsite) { @@ -601,7 +601,7 @@ shared(tmp_v0, tmp_v1) dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.PairLiftCoupling[i][1], sigma1, sigma2, X->Def.PairLiftCoupling[i][0], sigma1, sigma2, - conj(X->Def.ParaPairLiftCoupling[i]), X, tmp_v0, tmp_v1); + conj(X->Def.ParaPairLiftCoupling[i]), X, nstate, tmp_v0, tmp_v1); StopTimer(542); } else { @@ -624,8 +624,8 @@ shared(tmp_v0, tmp_v1) */ int mltplyGeneralSpinGC( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ) { long unsigned int j; long unsigned int i; @@ -662,7 +662,7 @@ int mltplyGeneralSpinGC( if (sigma1 != sigma2) { if (isite1 > X->Def.Nsite) { dam_pr = X_GC_child_CisAit_GeneralSpin_MPIdouble( - isite1 - 1, sigma1, sigma2, tmp_trans, X, tmp_v0, tmp_v1); + isite1 - 1, sigma1, sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); X->Large.prdct += dam_pr; }/*if (isite1 > X->Def.Nsite)*/ else { @@ -709,17 +709,17 @@ shared(tmp_v0, tmp_v1) if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite && X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(521); - GC_child_general_int_GeneralSpin_MPIdouble(i, X, tmp_v0, tmp_v1); + GC_child_general_int_GeneralSpin_MPIdouble(i, X, nstate, tmp_v0, tmp_v1); StopTimer(521); } else if (X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(522); - GC_child_general_int_GeneralSpin_MPIsingle(i, X, tmp_v0, tmp_v1); + GC_child_general_int_GeneralSpin_MPIsingle(i, X, nstate, tmp_v0, tmp_v1); StopTimer(522); } else if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite) { StartTimer(522); - GC_child_general_int_GeneralSpin_MPIsingle(i + 1, X, tmp_v0, tmp_v1); + GC_child_general_int_GeneralSpin_MPIsingle(i + 1, X, nstate, tmp_v0, tmp_v1); StopTimer(522); } else { @@ -821,8 +821,8 @@ shared(tmp_v0, tmp_v1) */ int mltplySpinGCBoost( struct BindStruct *X,//!<[inout] - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector ) { long unsigned int j; @@ -841,7 +841,7 @@ int mltplySpinGCBoost( tmp_v2 = cd_1d_allocate(i_max+1); tmp_v3 = cd_1d_allocate(i_max+1); - child_general_int_spin_MPIBoost(X, tmp_v0, tmp_v1, tmp_v2, tmp_v3); + child_general_int_spin_MPIBoost(X, nstate, tmp_v0, tmp_v1, tmp_v2, tmp_v3); dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) \ private(j) shared(tmp_v1,tmp_v0) firstprivate(i_max) @@ -868,8 +868,8 @@ private(j) shared(tmp_v1,tmp_v0) firstprivate(i_max) @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex child_exchange_spin( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { long unsigned int j; @@ -880,7 +880,7 @@ double complex child_exchange_spin( #pragma omp parallel for default(none) reduction(+:dam_pr) \ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += child_exchange_spin_element(j, tmp_v0, tmp_v1, X, &off); + dam_pr += child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); return dam_pr; }/*double complex child_exchange_spin*/ /** @@ -890,8 +890,8 @@ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex GC_child_exchange_spin( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { long unsigned int j; @@ -902,7 +902,7 @@ double complex GC_child_exchange_spin( #pragma omp parallel for default(none) reduction(+:dam_pr) \ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_exchange_spin_element(j, tmp_v0, tmp_v1, X, &off); + dam_pr += GC_child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); return dam_pr; }/*double complex GC_child_exchange_spin*/ /** @@ -912,8 +912,8 @@ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex GC_child_pairlift_spin( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { long int j; @@ -924,7 +924,7 @@ double complex GC_child_pairlift_spin( #pragma omp parallel for default(none) reduction(+:dam_pr) \ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_pairlift_spin_element(j, tmp_v0, tmp_v1, X, &off); + dam_pr += GC_child_pairlift_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); return dam_pr; }/*double complex GC_child_pairlift_spin*/ /** @@ -934,8 +934,8 @@ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex child_general_int_spin( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { double complex dam_pr, tmp_V, dmv; @@ -972,8 +972,8 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(t @author Kazuyoshi Yoshimi (The University of Tokyo) */ double complex GC_child_general_int_spin( - double complex *tmp_v0,//!<[inout] Result vector - double complex *tmp_v1,//!<[in] Input producted vector + int nstate, double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { double complex dam_pr, tmp_V; @@ -1003,25 +1003,25 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma1,org_sigma2,org_sigma3,org_sigma4,t #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += GC_child_CisAisCisAis_spin_element( - j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, tmp_v0, tmp_v1, X); + j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, tmp_v0, tmp_v1, X); } else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += GC_child_CisAisCitAiu_spin_element( - j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); } else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += GC_child_CisAitCiuAiu_spin_element( - j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) dam_pr += GC_child_CisAitCiuAiv_spin_element( - j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, tmp_v0, tmp_v1, X, &tmp_off); + j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); } }/*End of parallel region*/ return dam_pr; diff --git a/src/mltplySpinCore.c b/src/mltplySpinCore.c index 4e4a97f2e..827d92c08 100644 --- a/src/mltplySpinCore.c +++ b/src/mltplySpinCore.c @@ -272,8 +272,8 @@ int X_child_exchange_spin_element( */ double complex child_exchange_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[out] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[out] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -313,8 +313,8 @@ double complex child_exchange_spin_element( */ double complex GC_child_exchange_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[out] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[out] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -351,8 +351,8 @@ double complex GC_child_exchange_spin_element( */ double complex GC_child_pairlift_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction - double complex *tmp_v0,//!<[out] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[out] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -395,8 +395,8 @@ double complex child_CisAisCisAis_spin_element( long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 double complex tmp_V,//!<[in] Coupling constatnt - double complex *tmp_v0,//!<[in] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X//!<[inout] ) { int tmp_sgn; @@ -430,8 +430,8 @@ double complex GC_child_CisAisCisAis_spin_element( long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 double complex tmp_V,//!<[in] Coupling constatnt - double complex *tmp_v0,//!<[in] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X//!<[inout] ) { int tmp_sgn; @@ -463,8 +463,8 @@ double complex GC_child_CisAisCitAiu_spin_element( long unsigned int isA_up,//!<[in] Bit mask for spin 1 long unsigned int isB_up,//!<[in] Bit mask for spin 2 double complex tmp_V,//!<[in] Coupling constatnt - double complex *tmp_v0,//!<[in] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -498,8 +498,8 @@ double complex GC_child_CisAitCiuAiu_spin_element( long unsigned int isA_up,//!<[in] Bit mask for spin 1 long unsigned int isB_up,//!<[in] Bit mask for spin 2 double complex tmp_V,//!<[in] Coupling constatnt - double complex *tmp_v0,//!<[in] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { @@ -533,8 +533,8 @@ double complex GC_child_CisAitCiuAiv_spin_element( long unsigned int isA_up,//!<[in] Bit mask for spin 1 long unsigned int isB_up,//!<[in] Bit mask for spin 2 double complex tmp_V,//!<[in] Coupling constatnt - double complex *tmp_v0,//!<[in] Resulting wavefunction - double complex *tmp_v1,//!<[in] Wavefunction to be multiplied + int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction + double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off_2//!<[out] Index of final wavefunction ) { From 94b427e2a38cd5188bee949b14b9f15e91b4b023 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Tue, 26 Feb 2019 00:31:18 +0900 Subject: [PATCH 02/50] Backup --- src/include/global.h | 6 +- src/include/mltplyCommon.h | 2 + src/include/mltplyHubbard.h | 18 +- src/include/mltplyHubbardCore.h | 44 +- src/include/mltplyMPIHubbard.h | 8 +- src/include/mltplyMPIHubbardCore.h | 34 +- src/include/mltplyMPISpin.h | 6 +- src/include/mltplyMPISpinCore.h | 58 +-- src/include/mltplySpin.h | 10 +- src/include/mltplySpinCore.h | 22 +- src/mltply.c | 6 +- src/mltplyHubbard.c | 156 +++---- src/mltplyHubbardCore.c | 134 +++--- src/mltplyMPIBoost.c | 12 - src/mltplyMPIHubbard.c | 217 +++------- src/mltplyMPIHubbardCore.c | 664 +++++++++-------------------- src/mltplyMPISpin.c | 142 ++---- src/mltplyMPISpinCore.c | 526 +++++++---------------- src/mltplySpin.c | 176 +++----- src/mltplySpinCore.c | 111 ++--- 20 files changed, 752 insertions(+), 1600 deletions(-) diff --git a/src/include/global.h b/src/include/global.h index bb01e2a25..ce038002a 100644 --- a/src/include/global.h +++ b/src/include/global.h @@ -31,10 +31,10 @@ #define ITINERANT 0 #define LOCSPIN 1 -double complex *v0; /**< A vector after multiplying Hamiltonian, @f$ v_0 = H v_1@f$.*/ -double complex *v1; /**< A vector before multiplying Hamiltonian, @f$ v_0 = H v_1@f$.*/ +double complex **v0; /**< A vector after multiplying Hamiltonian, @f$ v_0 = H v_1@f$.*/ +double complex **v1; /**< A vector before multiplying Hamiltonian, @f$ v_0 = H v_1@f$.*/ double complex *v2; /**< A temporary vector for time evolution calculation, @f$ v2 = H*v1 = H^coef |psi(t)>@f$.*/ -double complex *v1buf; /**< A temporary vector for MPI. */ +double complex **v1buf; /**< A temporary vector for MPI. */ //[s] For calcSpectrum double complex *v1Org; /**< An input vector to calculate spectrum function.*/ diff --git a/src/include/mltplyCommon.h b/src/include/mltplyCommon.h index 1cb467d16..55974e3a2 100644 --- a/src/include/mltplyCommon.h +++ b/src/include/mltplyCommon.h @@ -24,4 +24,6 @@ #define M_TOTALS 4 #define M_CALCSPEC 4 +void zaxpy_(int *n, double complex *a, double complex *x, int *incx, double complex *y, int *incy); + #endif /* HPHI_MLTPLYCOMMON_H */ diff --git a/src/include/mltplyHubbard.h b/src/include/mltplyHubbard.h index d0bc0534f..48a12dc23 100644 --- a/src/include/mltplyHubbard.h +++ b/src/include/mltplyHubbard.h @@ -23,7 +23,7 @@ int mltplyHubbard(struct BindStruct *X, int nstate, double complex **tmp_v0,doub int mltplyHubbardGC(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -double complex GC_child_general_hopp +void GC_child_general_hopp ( int nstate, double complex **tmp_v0, double complex **tmp_v1, @@ -32,14 +32,14 @@ double complex GC_child_general_hopp ); -double complex GC_child_general_int( +void GC_child_general_int( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex child_general_int +void child_general_int ( int nstate, double complex **tmp_v0, double complex **tmp_v1, @@ -47,7 +47,7 @@ double complex child_general_int ); -double complex child_general_hopp +void child_general_hopp ( double complex *tmp_v0, double complex *tmp_v1, @@ -55,35 +55,35 @@ double complex child_general_hopp double complex trans ); -double complex child_exchange +void child_exchange ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex child_pairhopp +void child_pairhopp ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex GC_child_exchange +void GC_child_exchange ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex GC_child_pairlift +void GC_child_pairlift ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex GC_child_pairhopp +void GC_child_pairhopp ( int nstate, double complex **tmp_v0, double complex **tmp_v1, diff --git a/src/include/mltplyHubbardCore.h b/src/include/mltplyHubbardCore.h index 55800e672..752231bed 100644 --- a/src/include/mltplyHubbardCore.h +++ b/src/include/mltplyHubbardCore.h @@ -19,7 +19,7 @@ #include "Common.h" -double complex child_pairhopp_element +void child_pairhopp_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -28,7 +28,7 @@ double complex child_pairhopp_element long unsigned int *tmp_off ); -double complex GC_child_exchange_element +void GC_child_exchange_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -37,7 +37,7 @@ double complex GC_child_exchange_element long unsigned int *tmp_off ); -double complex GC_child_pairhopp_element +void GC_child_pairhopp_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -46,7 +46,7 @@ double complex GC_child_pairhopp_element long unsigned int *tmp_off ); -double complex child_exchange_element +void child_exchange_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -55,7 +55,7 @@ double complex child_exchange_element long unsigned int *tmp_off ); -double complex child_CisAisCisAis_element +void child_CisAisCisAis_element ( long unsigned int j, long unsigned int isite1, @@ -67,7 +67,7 @@ double complex child_CisAisCisAis_element long unsigned int *tmp_off ); -double complex child_CisAisCjtAku_element +void child_CisAisCjtAku_element ( long unsigned int j, long unsigned int isite1, @@ -82,7 +82,7 @@ double complex child_CisAisCjtAku_element long unsigned int *tmp_off ); -double complex child_CisAjtCkuAku_element +void child_CisAjtCkuAku_element ( long unsigned int j, long unsigned int isite1, @@ -97,7 +97,7 @@ double complex child_CisAjtCkuAku_element long unsigned int *tmp_off ); -double complex child_CisAjtCkuAlv_element +void child_CisAjtCkuAlv_element ( long unsigned int j, long unsigned int isite1, @@ -115,7 +115,7 @@ double complex child_CisAjtCkuAlv_element long unsigned int *tmp_off_2 ); //[s]Grand canonical -double complex GC_child_CisAisCisAis_element +void GC_child_CisAisCisAis_element ( long unsigned int j, long unsigned int isite1, @@ -127,7 +127,7 @@ double complex GC_child_CisAisCisAis_element long unsigned int *tmp_off ); -double complex GC_child_CisAisCjtAku_element +void GC_child_CisAisCjtAku_element ( long unsigned int j, long unsigned int isite1, @@ -142,7 +142,7 @@ double complex GC_child_CisAisCjtAku_element long unsigned int *tmp_off ); -double complex GC_child_CisAjtCkuAku_element +void GC_child_CisAjtCkuAku_element ( long unsigned int j, long unsigned int isite1, @@ -157,7 +157,7 @@ double complex GC_child_CisAjtCkuAku_element long unsigned int *tmp_off ); -double complex GC_child_CisAjtCkuAlv_element +void GC_child_CisAjtCkuAlv_element ( long unsigned int j, long unsigned int isite1, @@ -176,7 +176,7 @@ double complex GC_child_CisAjtCkuAlv_element ); //[e]Grand canonical -double complex GC_CisAis +void GC_CisAis ( long unsigned int j, double complex *tmp_v0, @@ -186,7 +186,7 @@ double complex GC_CisAis double complex tmp_trans ); -double complex GC_AisCis( +void GC_AisCis( long unsigned int j, int nstate, double complex **tmp_v0, double complex **tmp_v1, @@ -226,10 +226,10 @@ int X_GC_CisAjt ); -double complex CisAjt +void CisAjt ( long unsigned int j, - double complex *tmp_v0, + int nstate, double complex *tmp_v0, double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, @@ -240,10 +240,10 @@ double complex CisAjt ); -double complex GC_CisAjt +void GC_CisAjt ( long unsigned int j, - double complex *tmp_v0, + int nstate, double complex *tmp_v0, double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, @@ -293,7 +293,7 @@ int child_exchange_GetInfo ); -double complex GC_Ajt +void GC_Ajt ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -303,7 +303,7 @@ double complex GC_Ajt long unsigned int *tmp_off ); -double complex GC_Cis +void GC_Cis ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -315,7 +315,7 @@ double complex GC_Cis -double complex GC_Ajt +void GC_Ajt ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -340,7 +340,7 @@ int X_Cis -double complex X_Ajt +void X_Ajt ( long unsigned int j, long unsigned int is1_spin, diff --git a/src/include/mltplyMPIHubbard.h b/src/include/mltplyMPIHubbard.h index bfdcfb85d..52764520d 100644 --- a/src/include/mltplyMPIHubbard.h +++ b/src/include/mltplyMPIHubbard.h @@ -29,7 +29,7 @@ void GC_child_general_hopp_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_general_hopp_MPIdouble +void X_GC_child_general_hopp_MPIdouble ( int org_isite1, int org_ispin1, @@ -49,7 +49,7 @@ void GC_child_general_hopp_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_general_hopp_MPIsingle +void X_GC_child_general_hopp_MPIsingle ( int org_isite1, int org_ispin1, @@ -70,7 +70,7 @@ void child_general_hopp_MPIdouble double complex **tmp_v1 ); -double complex X_child_general_hopp_MPIdouble +void X_child_general_hopp_MPIdouble ( int org_isite1, int org_ispin1, @@ -90,7 +90,7 @@ void child_general_hopp_MPIsingle double complex **tmp_v1 ); -double complex X_child_general_hopp_MPIsingle +void X_child_general_hopp_MPIsingle ( int org_isite1, int org_ispin1, diff --git a/src/include/mltplyMPIHubbardCore.h b/src/include/mltplyMPIHubbardCore.h index 4b8fd8d01..249caf6a5 100644 --- a/src/include/mltplyMPIHubbardCore.h +++ b/src/include/mltplyMPIHubbardCore.h @@ -75,7 +75,7 @@ int GetSgnInterAll unsigned long int *offbit ); -double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI +void X_GC_child_CisAisCjtAjt_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -87,7 +87,7 @@ double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI double complex **tmp_v1 ); -double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI +void X_GC_child_CisAjtCkuAlv_Hubbard_MPI ( int isite1, int isigma1, @@ -103,7 +103,7 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI double complex **tmp_v1 ); -double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI +void X_GC_child_CisAjtCkuAku_Hubbard_MPI ( int isite1, int isigma1, @@ -117,7 +117,7 @@ double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjtAku_Hubbard_MPI +void X_GC_child_CisAisCjtAku_Hubbard_MPI ( int isite1, int isigma1, @@ -131,7 +131,7 @@ double complex X_GC_child_CisAisCjtAku_Hubbard_MPI double complex **tmp_v1 ); -double complex X_GC_child_CisAis_Hubbard_MPI +void X_GC_child_CisAis_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -141,7 +141,7 @@ double complex X_GC_child_CisAis_Hubbard_MPI double complex **tmp_v1 ); -double complex X_GC_child_CisAjt_Hubbard_MPI +void X_GC_child_CisAjt_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -153,7 +153,7 @@ double complex X_GC_child_CisAjt_Hubbard_MPI double complex **tmp_v1 ); -double complex X_child_CisAisCjtAjt_Hubbard_MPI +void X_child_CisAisCjtAjt_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -165,7 +165,7 @@ double complex X_child_CisAisCjtAjt_Hubbard_MPI double complex **tmp_v1 ); -double complex X_child_CisAjtCkuAlv_Hubbard_MPI +void X_child_CisAjtCkuAlv_Hubbard_MPI ( int isite1, int isigma1, @@ -181,7 +181,7 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI double complex **tmp_v1 ); -double complex X_child_CisAjtCkuAku_Hubbard_MPI +void X_child_CisAjtCkuAku_Hubbard_MPI ( int isite1, int isigma1, @@ -195,7 +195,7 @@ double complex X_child_CisAjtCkuAku_Hubbard_MPI double complex **tmp_v1 ); -double complex X_child_CisAisCjtAku_Hubbard_MPI +void X_child_CisAisCjtAku_Hubbard_MPI ( int isite1, int isigma1, @@ -209,7 +209,7 @@ double complex X_child_CisAisCjtAku_Hubbard_MPI double complex **tmp_v1 ); -double complex X_child_CisAis_Hubbard_MPI +void X_child_CisAis_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -219,7 +219,7 @@ double complex X_child_CisAis_Hubbard_MPI double complex **tmp_v1 ); -double complex X_child_CisAjt_MPIdouble +void X_child_CisAjt_MPIdouble ( int org_isite1, int org_ispin1, @@ -236,7 +236,7 @@ double complex X_child_CisAjt_MPIdouble long unsigned int *list_2_2_target ); -double complex X_child_CisAjt_MPIsingle +void X_child_CisAjt_MPIsingle ( int org_isite1, int org_ispin1, @@ -254,7 +254,7 @@ double complex X_child_CisAjt_MPIsingle ); -double complex X_GC_Cis_MPI +void X_GC_Cis_MPI ( int org_isite, int org_ispin, @@ -266,7 +266,7 @@ double complex X_GC_Cis_MPI unsigned long int *Tpow ); -double complex X_GC_Ajt_MPI +void X_GC_Ajt_MPI ( int org_isite, int org_ispin, @@ -278,7 +278,7 @@ double complex X_GC_Ajt_MPI long unsigned int *Tpow ); -double complex X_Cis_MPI +void X_Cis_MPI ( int org_isite, unsigned int org_ispin, @@ -297,7 +297,7 @@ double complex X_Cis_MPI long unsigned int _ihfbit ); -double complex X_Ajt_MPI +void X_Ajt_MPI ( int org_isite, unsigned int org_ispin, diff --git a/src/include/mltplyMPISpin.h b/src/include/mltplyMPISpin.h index 7f005b322..54c98b850 100644 --- a/src/include/mltplyMPISpin.h +++ b/src/include/mltplyMPISpin.h @@ -29,7 +29,7 @@ void child_general_int_spin_MPIdouble double complex **tmp_v1 ); -double complex X_child_general_int_spin_MPIdouble +void X_child_general_int_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -44,7 +44,7 @@ double complex X_child_general_int_spin_MPIdouble ); -double complex X_child_general_int_spin_TotalS_MPIdouble +void X_child_general_int_spin_TotalS_MPIdouble ( int org_isite1, int org_isite3, @@ -61,7 +61,7 @@ void child_general_int_spin_MPIsingle double complex **tmp_v1 ); -double complex X_child_general_int_spin_MPIsingle +void X_child_general_int_spin_MPIsingle ( int org_isite1, int org_ispin1, diff --git a/src/include/mltplyMPISpinCore.h b/src/include/mltplyMPISpinCore.h index bb550e5d0..734d2545e 100644 --- a/src/include/mltplyMPISpinCore.h +++ b/src/include/mltplyMPISpinCore.h @@ -21,7 +21,7 @@ #include #include "struct.h" -double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble +void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -34,7 +34,7 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble +void X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -47,7 +47,7 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble +void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -62,7 +62,7 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble ); //general spin - single -double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle +void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -75,7 +75,7 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle +void X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -88,7 +88,7 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle +void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -102,7 +102,7 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAit_GeneralSpin_MPIdouble +void X_GC_child_CisAit_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -113,7 +113,7 @@ double complex X_GC_child_CisAit_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAis_GeneralSpin_MPIdouble +void X_GC_child_CisAis_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -123,7 +123,7 @@ double complex X_GC_child_CisAis_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_AisCis_GeneralSpin_MPIdouble +void X_GC_child_AisCis_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -133,7 +133,7 @@ double complex X_GC_child_AisCis_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble +void X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -145,7 +145,7 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle +void X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -157,7 +157,7 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex X_child_CisAit_GeneralSpin_MPIdouble +void X_child_CisAit_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -174,7 +174,7 @@ double complex X_child_CisAit_GeneralSpin_MPIdouble ); -double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble +void X_GC_child_CisAitCiuAiv_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -188,7 +188,7 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble +void X_GC_child_CisAisCjuAjv_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -201,7 +201,7 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAitCjuAju_spin_MPIdouble +void X_GC_child_CisAitCjuAju_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -214,7 +214,7 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjuAju_spin_MPIdouble +void X_GC_child_CisAisCjuAju_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -226,7 +226,7 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle +void X_GC_child_CisAitCiuAiv_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -240,7 +240,7 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle +void X_GC_child_CisAisCjuAjv_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -253,7 +253,7 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAitCjuAju_spin_MPIsingle +void X_GC_child_CisAitCjuAju_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -266,7 +266,7 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjuAju_spin_MPIsingle +void X_GC_child_CisAisCjuAju_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -278,7 +278,7 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAisCjuAju_spin_MPIsingle +void X_GC_child_CisAisCjuAju_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -290,7 +290,7 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIsingle double complex **tmp_v1 ); -double complex X_GC_child_CisAit_spin_MPIdouble +void X_GC_child_CisAit_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -301,7 +301,7 @@ double complex X_GC_child_CisAit_spin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_CisAis_spin_MPIdouble +void X_GC_child_CisAis_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -311,7 +311,7 @@ double complex X_GC_child_CisAis_spin_MPIdouble double complex **tmp_v1 ); -double complex X_GC_child_AisCis_spin_MPIdouble +void X_GC_child_AisCis_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -321,7 +321,7 @@ double complex X_GC_child_AisCis_spin_MPIdouble double complex **tmp_v1 ); -double complex X_child_CisAit_spin_MPIdouble +void X_child_CisAit_spin_MPIdouble ( int org_isite1, int org_ispin2, @@ -341,7 +341,7 @@ double complex X_child_CisAit_spin_MPIdouble long unsigned int _ihfbit ); -double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble +void X_child_CisAisCjuAju_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -353,7 +353,7 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble +void X_child_CisAitCjuAjv_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -368,7 +368,7 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble ); //general spin - single -double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle +void X_child_CisAisCjuAju_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -380,7 +380,7 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle +void X_child_CisAitCjuAjv_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, diff --git a/src/include/mltplySpin.h b/src/include/mltplySpin.h index dabab0a60..94c2fef4e 100644 --- a/src/include/mltplySpin.h +++ b/src/include/mltplySpin.h @@ -34,14 +34,14 @@ int mltplyGeneralSpinGC(struct BindStruct *X, int nstate, double complex **tmp_v int mltplySpinGCBoost(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -double complex GC_child_general_int_spin +void GC_child_general_int_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex child_general_int_spin +void child_general_int_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, @@ -49,21 +49,21 @@ double complex child_general_int_spin ); -double complex GC_child_exchange_spin +void GC_child_exchange_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex child_exchange_spin +void child_exchange_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex GC_child_pairlift_spin +void GC_child_pairlift_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, diff --git a/src/include/mltplySpinCore.h b/src/include/mltplySpinCore.h index a46318e0b..0e621f4a4 100644 --- a/src/include/mltplySpinCore.h +++ b/src/include/mltplySpinCore.h @@ -19,7 +19,7 @@ #include "Common.h" -double complex child_exchange_spin_element +void child_exchange_spin_element ( long unsigned int j, double complex *tmp_v0, @@ -28,7 +28,7 @@ double complex child_exchange_spin_element long unsigned int *tmp_off ); -double complex GC_child_pairlift_spin_element +void GC_child_pairlift_spin_element ( long unsigned int j, double complex *tmp_v0, @@ -37,7 +37,7 @@ double complex GC_child_pairlift_spin_element long unsigned int *tmp_off ); -double complex GC_child_exchange_spin_element +void GC_child_exchange_spin_element ( long unsigned int j, double complex *tmp_v0, @@ -58,7 +58,7 @@ int X_child_exchange_spin_element ); //[s]Spin -double complex child_CisAisCisAis_spin_element +void child_CisAisCisAis_spin_element ( long unsigned int j, long unsigned int isA_up, @@ -71,7 +71,7 @@ double complex child_CisAisCisAis_spin_element struct BindStruct *X ); -double complex child_CisAisCitAiu_spin_element +void child_CisAisCitAiu_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -85,7 +85,7 @@ double complex child_CisAisCitAiu_spin_element long unsigned int *tmp_off ); -double complex child_CisAitCiuAiu_spin_element +void child_CisAitCiuAiu_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -99,7 +99,7 @@ double complex child_CisAitCiuAiu_spin_element long unsigned int *tmp_off ); -double complex child_CisAitCiuAiv_spin_element +void child_CisAitCiuAiv_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -115,7 +115,7 @@ double complex child_CisAitCiuAiv_spin_element //[e]Spin //[s]GC Spin -double complex GC_child_CisAisCisAis_spin_element +void GC_child_CisAisCisAis_spin_element ( long unsigned int j, long unsigned int isA_up, @@ -128,7 +128,7 @@ double complex GC_child_CisAisCisAis_spin_element struct BindStruct *X ); -double complex GC_child_CisAisCitAiu_spin_element +void GC_child_CisAisCitAiu_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -142,7 +142,7 @@ double complex GC_child_CisAisCitAiu_spin_element long unsigned int *tmp_off ); -double complex GC_child_CisAitCiuAiu_spin_element +void GC_child_CisAitCiuAiu_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -156,7 +156,7 @@ double complex GC_child_CisAitCiuAiu_spin_element long unsigned int *tmp_off ); -double complex GC_child_CisAitCiuAiv_spin_element +void GC_child_CisAitCiuAiv_spin_element ( long unsigned int j, long unsigned int org_sigma2, diff --git a/src/mltply.c b/src/mltply.c index bc5ebb9b8..ff24b4840 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -59,14 +59,12 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp long unsigned int ilft=0; long unsigned int ihfbit=0; - double complex dam_pr; long unsigned int i_max; StartTimer(1); i_max = X->Check.idim_max; X->Large.prdct = 0.0; - dam_pr = 0.0; if(i_max!=0){ if (X->Def.iFlgGeneralSpin == FALSE) { @@ -94,12 +92,10 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp X->Large.mode = M_MLTPLY; StartTimer(100); -#pragma omp parallel for default(none) reduction(+:dam_pr) firstprivate(i_max) shared(tmp_v0, tmp_v1, list_Diagonal) +#pragma omp parallel for default(none) firstprivate(i_max) shared(tmp_v0, tmp_v1, list_Diagonal) for (j = 1; j <= i_max; j++) { tmp_v0[j] += (list_Diagonal[j]) * tmp_v1[j]; - dam_pr += (list_Diagonal[j]) * conj(tmp_v1[j]) * tmp_v1[j]; } - X->Large.prdct += dam_pr; StopTimer(100); if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, nstate, tmp_v0, tmp_v1); diff --git a/src/mltplyHubbard.c b/src/mltplyHubbard.c index 3a4d49ef8..ed2d3b534 100644 --- a/src/mltplyHubbard.c +++ b/src/mltplyHubbard.c @@ -157,7 +157,6 @@ int mltplyHubbard( long unsigned int isite3, isite4, sigma3, sigma4; long unsigned int ibitsite1, ibitsite2, ibitsite3, ibitsite4; - double complex dam_pr; double complex tmp_trans; /*[s] For InterAll */ double complex tmp_V; @@ -201,8 +200,7 @@ int mltplyHubbard( } tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; X->Large.tmp_trans = tmp_trans; - dam_pr = child_general_hopp(tmp_v0, tmp_v1, X, tmp_trans); - X->Large.prdct += dam_pr; + child_general_hopp(nstate, tmp_v0, tmp_v1, X, tmp_trans); } StopTimer(313); } @@ -224,7 +222,6 @@ int mltplyHubbard( sigma4 = X->Def.InterAll_OffDiagonal[i][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[i]; - dam_pr = 0.0; if (CheckPE(isite1 - 1, X) == TRUE || CheckPE(isite2 - 1, X) == TRUE || CheckPE(isite3 - 1, X) == TRUE || CheckPE(isite4 - 1, X) == TRUE) { StartTimer(321); @@ -233,21 +230,21 @@ int mltplyHubbard( ibitsite3 = X->Def.OrgTpow[2 * isite3 - 2 + sigma3]; ibitsite4 = X->Def.OrgTpow[2 * isite4 - 2 + sigma4]; if (ibitsite1 == ibitsite2 && ibitsite3 == ibitsite4) { - dam_pr += X_child_CisAisCjtAjt_Hubbard_MPI(isite1 - 1, sigma1, + X_child_CisAisCjtAjt_Hubbard_MPI(isite1 - 1, sigma1, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); } else if (ibitsite1 == ibitsite2 && ibitsite3 != ibitsite4) { - dam_pr += X_child_CisAisCjtAku_Hubbard_MPI(isite1 - 1, sigma1, + X_child_CisAisCjtAku_Hubbard_MPI(isite1 - 1, sigma1, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); } else if (ibitsite1 != ibitsite2 && ibitsite3 == ibitsite4) { - dam_pr += X_child_CisAjtCkuAku_Hubbard_MPI(isite1 - 1, sigma1, isite2 - 1, sigma2, + X_child_CisAjtCkuAku_Hubbard_MPI(isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); } else if (ibitsite1 != ibitsite2 && ibitsite3 != ibitsite4) { - dam_pr += X_child_CisAjtCkuAlv_Hubbard_MPI(isite1 - 1, sigma1, isite2 - 1, sigma2, + X_child_CisAjtCkuAlv_Hubbard_MPI(isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); } StopTimer(321); @@ -269,11 +266,10 @@ int mltplyHubbard( child_general_int_GetInfo(i, X, isite1, isite2, isite3, isite4, sigma1, sigma2, sigma3, sigma4, tmp_V); - dam_pr += child_general_int(tmp_v0, tmp_v1, X); + child_general_int(nstate, tmp_v0, tmp_v1, X); }/*for (ihermite = 0; ihermite < 2; ihermite++)*/ StopTimer(322); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NInterAll_OffDiagonal; i+=2)*/ StopTimer(320); /** @@ -283,13 +279,12 @@ int mltplyHubbard( for (i = 0; i < X->Def.NPairHopping; i +=2) { sigma1=0; sigma2=1; - dam_pr = 0.0; if ( X->Def.PairHopping[i][0] + 1 > X->Def.Nsite || X->Def.PairHopping[i][1] + 1 > X->Def.Nsite) { StartTimer(331); - dam_pr = X_child_CisAjtCkuAlv_Hubbard_MPI( + X_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.PairHopping[i][0], sigma1, X->Def.PairHopping[i][1], sigma1, X->Def.PairHopping[i][0], sigma2, X->Def.PairHopping[i][1], sigma2, X->Def.ParaPairHopping[i], X, nstate, tmp_v0, tmp_v1); @@ -300,11 +295,10 @@ int mltplyHubbard( for (ihermite = 0; ihermite<2; ihermite++) { idx = i + ihermite; child_pairhopp_GetInfo(idx, X); - dam_pr += child_pairhopp(tmp_v0, tmp_v1, X); + child_pairhopp(nstate, tmp_v0, tmp_v1, X); }/*for (ihermite = 0; ihermite<2; ihermite++)*/ StopTimer(332); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NPairHopping; i += 2)*/ StopTimer(330); /** @@ -314,12 +308,11 @@ int mltplyHubbard( for (i = 0; i < X->Def.NExchangeCoupling; i ++) { sigma1 = 0; sigma2 = 1; - dam_pr=0.0; if (X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite || X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(341); - dam_pr = X_child_CisAjtCkuAlv_Hubbard_MPI( + X_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.ExchangeCoupling[i][0], sigma1, X->Def.ExchangeCoupling[i][1], sigma1, X->Def.ExchangeCoupling[i][1], sigma2, X->Def.ExchangeCoupling[i][0], sigma2, X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -328,10 +321,9 @@ int mltplyHubbard( else { StartTimer(342); child_exchange_GetInfo(i, X); - dam_pr = child_exchange(tmp_v0, tmp_v1, X); + child_exchange(nstate, tmp_v0, tmp_v1, X); StopTimer(342); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NExchangeCoupling; i ++)*/ StopTimer(340); @@ -354,7 +346,6 @@ int mltplyHubbardGC( long unsigned int isite3, isite4, sigma3, sigma4; long unsigned int ibitsite1, ibitsite2, ibitsite3, ibitsite4; - double complex dam_pr; double complex tmp_trans; /*[s] For InterAll */ double complex tmp_V; @@ -397,8 +388,7 @@ int mltplyHubbardGC( return -1; } tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; - dam_pr = GC_child_general_hopp(tmp_v0, tmp_v1, X, tmp_trans); - X->Large.prdct += dam_pr; + GC_child_general_hopp(nstate, tmp_v0, tmp_v1, X, tmp_trans); } StopTimer(213); } @@ -428,22 +418,21 @@ int mltplyHubbardGC( ibitsite3 = X->Def.OrgTpow[2 * isite3 - 2 + sigma3]; ibitsite4 = X->Def.OrgTpow[2 * isite4 - 2 + sigma4]; if (ibitsite1 == ibitsite2 && ibitsite3 == ibitsite4) - dam_pr = X_GC_child_CisAisCjtAjt_Hubbard_MPI( + X_GC_child_CisAisCjtAjt_Hubbard_MPI( isite1 - 1, sigma1, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 == ibitsite2 && ibitsite3 != ibitsite4) - dam_pr = X_GC_child_CisAisCjtAku_Hubbard_MPI( + X_GC_child_CisAisCjtAku_Hubbard_MPI( isite1 - 1, sigma1, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 != ibitsite2 && ibitsite3 == ibitsite4) - dam_pr = X_GC_child_CisAjtCkuAku_Hubbard_MPI( + X_GC_child_CisAjtCkuAku_Hubbard_MPI( isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 != ibitsite2 && ibitsite3 != ibitsite4) - dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI( + X_GC_child_CisAjtCkuAlv_Hubbard_MPI( isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); StopTimer(221); }//InterPE else{ StartTimer(222); - dam_pr=0.0; for(ihermite=0; ihermite<2; ihermite++){ idx=i+ihermite; isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; @@ -458,11 +447,10 @@ int mltplyHubbardGC( child_general_int_GetInfo(i, X, isite1, isite2, isite3, isite4, sigma1, sigma2, sigma3, sigma4, tmp_V); - dam_pr += GC_child_general_int(tmp_v0, tmp_v1, X); + GC_child_general_int(nstate, tmp_v0, tmp_v1, X); }/*for(ihermite=0; ihermite<2; ihermite++)*/ StopTimer(222); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NInterAll_OffDiagonal; i+=2)*/ StopTimer(220); /** @@ -472,12 +460,11 @@ int mltplyHubbardGC( for (i = 0; i < X->Def.NPairHopping; i +=2) { sigma1 = 0; sigma2 = 1; - dam_pr = 0.0; if ( X->Def.PairHopping[i][0] + 1 > X->Def.Nsite || X->Def.PairHopping[i][1] + 1 > X->Def.Nsite) { StartTimer(231); - dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI( + X_GC_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.PairHopping[i][0], sigma1, X->Def.PairHopping[i][1], sigma1, X->Def.PairHopping[i][0], sigma2, X->Def.PairHopping[i][1], sigma2, X->Def.ParaPairHopping[i], X, nstate, tmp_v0, tmp_v1); @@ -488,11 +475,10 @@ int mltplyHubbardGC( for (ihermite = 0; ihermite<2; ihermite++) { idx = i + ihermite; child_pairhopp_GetInfo(idx, X); - dam_pr += GC_child_pairhopp(tmp_v0, tmp_v1, X); + GC_child_pairhopp(nstate, tmp_v0, tmp_v1, X); }/*for (ihermite = 0; ihermite<2; ihermite++)*/ StopTimer(232); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NPairHopping; i += 2)*/ StopTimer(230); /** @@ -502,12 +488,11 @@ int mltplyHubbardGC( for (i = 0; i < X->Def.NExchangeCoupling; i++) { sigma1=0; sigma2=1; - dam_pr=0.0; if ( X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite || X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(241); - dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI( + X_GC_child_CisAjtCkuAlv_Hubbard_MPI( X->Def.ExchangeCoupling[i][0], sigma1, X->Def.ExchangeCoupling[i][1], sigma1, X->Def.ExchangeCoupling[i][1], sigma2, X->Def.ExchangeCoupling[i][0], sigma2, X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -516,10 +501,9 @@ int mltplyHubbardGC( else { StartTimer(242); child_exchange_GetInfo(i, X); - dam_pr = GC_child_exchange(tmp_v0, tmp_v1, X); + GC_child_exchange(nstate, tmp_v0, tmp_v1, X); StopTimer(242); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NExchangeCoupling; i++)*/ StopTimer(240); @@ -533,11 +517,10 @@ int mltplyHubbardGC( /** @brief Compute pairhopp term (canonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_pairhopp( +void child_pairhopp( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] @@ -545,20 +528,18 @@ double complex child_pairhopp( long int j; long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; - double complex dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); - return dam_pr; + child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); + return; }/*double complex child_pairhopp*/ /** @brief Compute Exchange term (canonical) in single process -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_exchange( +void child_exchange( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] @@ -566,20 +547,18 @@ double complex child_exchange( long int j; long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; - double complex dam_pr = 0; -#pragma omp parallel for default(none) reduction(+:dam_pr) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); - return dam_pr; + child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); + return; }/*double complex child_exchange*/ /** @brief Compute hopping (canonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_general_hopp( +void child_general_hopp( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X,//!<[inout] @@ -587,26 +566,24 @@ double complex child_general_hopp( ) { long unsigned int j, isite1, isite2, Asum, Adiff; long unsigned int i_max = X->Large.i_max; - double complex dam_pr = 0; isite1 = X->Large.is1_spin; isite2 = X->Large.is2_spin; Asum = X->Large.isA_spin; Adiff = X->Large.A_spin; //fprintf(stdout, "DEBUG, isite1=%ld, isite2=%ld, Asum=%ld, Adiff=%ld \n", isite1, isite2, Asum, Adiff); -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans) * trans; - return dam_pr; + CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans) * trans; + return; }/*double complex child_general_hopp*/ /** @brief Commpute hopping term (grandcanonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_general_hopp( +void GC_child_general_hopp( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X,//!<[inout] @@ -615,7 +592,6 @@ double complex GC_child_general_hopp( long unsigned int j, isite1, isite2, Asum, Adiff; long unsigned int tmp_off; long unsigned int i_max = X->Large.i_max; - double complex dam_pr = 0; isite1 = X->Large.is1_spin; isite2 = X->Large.is2_spin; @@ -623,31 +599,30 @@ double complex GC_child_general_hopp( Adiff = X->Large.A_spin; if (isite1 == isite2) { -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ private(j) firstprivate(i_max,X,isite1, trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAis(j, nstate, tmp_v0, tmp_v1, X, isite1, trans) * trans; + GC_CisAis(j, nstate, tmp_v0, tmp_v1, X, isite1, trans) * trans; }/*if (isite1 == isite2)*/ else { -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j,tmp_off) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off) * trans; + GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off) * trans; } - return dam_pr; + return; }/*double complex GC_child_general_hopp*/ /** @brief Compute inter-all term (canonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_general_int( +void child_general_int( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { - double complex dam_pr, tmp_V; + double complex tmp_V; long unsigned int j, i_max; long unsigned int isite1, isite2, isite3, isite4; long unsigned int Asum, Bsum, Adiff, Bdiff; @@ -667,9 +642,8 @@ double complex child_general_int( Bdiff = X->Large.B_spin; tmp_V = X->Large.tmp_V; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ private(j, tmp_off, tmp_off_2) \ firstprivate(i_max, X, isite1, isite2, isite3, isite4, Asum, Bsum, Adiff, Bdiff, tmp_V) \ shared(tmp_v0, tmp_v1) @@ -677,40 +651,39 @@ shared(tmp_v0, tmp_v1) if (isite1 == isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 == isite4)*/ else if (isite1 == isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += child_CisAisCjtAku_element( + child_CisAisCjtAku_element( j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 != isite4)*/ else if (isite1 != isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 != isite2 && isite3 == isite4)*/ else if (isite1 != isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += child_CisAjtCkuAlv_element( + child_CisAjtCkuAlv_element( j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off_2); }/*if (isite1 != isite2 && isite3 != isite4)*/ }/*End of parallel region*/ - return dam_pr; + return; }/*double complex child_general_int*/ /** @brief Compute inter-all term (canonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_general_int( +void GC_child_general_int( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { - double complex dam_pr, tmp_V; + double complex tmp_V; long unsigned int j, i_max; long unsigned int isite1, isite2, isite3, isite4; long unsigned int Asum, Bsum, Adiff, Bdiff; @@ -729,44 +702,42 @@ double complex GC_child_general_int( Bdiff = X->Large.B_spin; tmp_V = X->Large.tmp_V; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel default(none) private(j) \ firstprivate(i_max, X, isite1, isite2, isite4, isite3, Asum, Bsum, Adiff, Bdiff, tmp_off, tmp_off_2, tmp_V) \ shared(tmp_v0, tmp_v1) { if (isite1 == isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 == isite4)*/ else if (isite1 == isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 == isite2 && isite3 != isite4)*/ else if (isite1 != isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAjtCkuAku_element( + GC_child_CisAjtCkuAku_element( j, isite1, isite2, isite3, Asum, Adiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); }/*if (isite1 != isite2 && isite3 == isite4)*/ else if (isite1 != isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAjtCkuAlv_element( + GC_child_CisAjtCkuAlv_element( j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off_2); }/*if (isite1 != isite2 && isite3 != isite4)*/ }/*End of parallel region*/ - return dam_pr; + return; }/*double complex GC_child_general_int*/ /** @brief Compute pairhopp term (grandcanonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_pairhopp( +void GC_child_pairhopp( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] @@ -774,21 +745,19 @@ double complex GC_child_pairhopp( long int j; long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; - double complex dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) firstprivate(i_max,X,off) private(j) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) firstprivate(i_max,X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); + GC_child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); - return dam_pr; + return; }/*double complex GC_child_pairhopp*/ /** @brief Compute Exchange term (grandcanonical) in single process -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_exchange( +void GC_child_exchange( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X @@ -796,13 +765,12 @@ double complex GC_child_exchange( long int j; long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; - double complex dam_pr = 0.0; #pragma omp parallel for default(none) \ -reduction(+:dam_pr) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) + firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); - return dam_pr; + GC_child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); + return; }/*double complex GC_child_exchange*/ /******************************************************************************/ //[e] child functions diff --git a/src/mltplyHubbardCore.c b/src/mltplyHubbardCore.c index d4cc16fea..81586fa3a 100644 --- a/src/mltplyHubbardCore.c +++ b/src/mltplyHubbardCore.c @@ -21,8 +21,6 @@ #include "mltplyCommon.h" #include "mltplyHubbardCore.h" -void zaxpy_(int *n, double complex *a, double complex *x, int *incx, double complex *y, int *incy); - /******************************************************************************/ //[s] GetInfo functions /******************************************************************************/ @@ -228,7 +226,6 @@ int child_exchange_GetInfo( /******************************************************************************/ /** @brief Operation of @f$t c_{i\sigma}^\dagger c_{i\sigma}@f$ (Grandcanonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -252,7 +249,6 @@ void GC_CisAis( }/*double complex GC_CisAis*/ /** @brief Operation of @f$t c_{i\sigma} c_{i\sigma}^\dagger@f$ (Grandcanonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void GC_AisCis( @@ -358,16 +354,15 @@ void GC_CisAjt( ibit_tmp_1 = (list_1_j & is1_spin); ibit_tmp_2 = (list_1_j & is2_spin); *tmp_off = 0; + int one = 1; if (ibit_tmp_1 == 0 && ibit_tmp_2 != 0) { bit = list_1_j & diff_spin; SgnBit(bit, &sgn); // Fermion sign list_1_off = list_1_j ^ sum_spin; *tmp_off = list_1_off; - dmv = sgn * tmp_v1[j]; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[list_1_off + 1] += dmv * tmp_V; - } + dmv = sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } else { return 0; @@ -472,6 +467,7 @@ void child_exchange_element( long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; + int one = 1; ibit1_up = list_1[j] & is1_up; ibit2_up = list_1[j] & is2_up; @@ -485,10 +481,8 @@ void child_exchange_element( return 0; } *tmp_off = off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY) { - tmp_v0[off] += dmv; - } + dmv = tmp_J; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off][0], &one); } else if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1[j] - (is1_up + is2_down); @@ -497,15 +491,12 @@ void child_exchange_element( return 0; } *tmp_off = off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY) { - tmp_v0[off] += dmv; - } + dmv = tmp_J; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off][0], &one); } }/*double complex child_exchange_element*/ /** @brief Compute pairhopp term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -529,6 +520,7 @@ void child_pairhopp_element( long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; + int one = 1; ibit1_up = list_1[j] & is1_up; ibit2_up = list_1[j] & is2_up; @@ -543,15 +535,12 @@ void child_pairhopp_element( return 0; } *tmp_off = off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { - tmp_v0[off] += dmv; - } + dmv = tmp_J; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off][0], &one); } }/*double complex child_pairhopp_element*/ /** @brief Compute exchange term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -572,6 +561,7 @@ void GC_child_exchange_element( long unsigned int list_1_j, list_1_off; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; + int one = 1; list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -586,10 +576,8 @@ void GC_child_exchange_element( list_1_off = iexchg; *tmp_off = list_1_off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY) { - tmp_v0[list_1_off + 1] += dmv; - } + dmv = tmp_J; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } else if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1_j - (is1_up + is2_down); @@ -597,15 +585,12 @@ void GC_child_exchange_element( list_1_off = iexchg; *tmp_off = list_1_off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY) { - tmp_v0[list_1_off + 1] += dmv; - } + dmv = tmp_J; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } }/*double complex GC_child_exchange_element*/ /** @brief Compute pairhopp term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -626,6 +611,7 @@ void GC_child_pairhopp_element( long unsigned int list_1_j, list_1_off; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; + int one = 1; list_1_j = j - 1; @@ -642,16 +628,13 @@ void GC_child_pairhopp_element( iexchg += (is1_up + is1_down); list_1_off = iexchg; *tmp_off = list_1_off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { - tmp_v0[list_1_off + 1] += dmv; - } + dmv = tmp_J; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } } /** @brief Compute @f$c_{is}^\dagger c_{is} c_{is}^\dagger c_{is}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -667,17 +650,15 @@ void child_CisAisCisAis_element( ) { int tmp_sgn; double complex dmv; + int one = 1; tmp_sgn = X_CisAis(list_1[j], X, isite3); tmp_sgn *= X_CisAis(list_1[j], X, isite1); - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[j] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*double complex child_CisAisCisAis_element*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -696,21 +677,19 @@ void child_CisAisCjtAku_element( ) { int tmp_sgn; double complex dmv; + int one = 1; tmp_sgn = X_CisAjt(list_1[j], X, isite3, isite4, Bsum, Bdiff, tmp_off); if (tmp_sgn != 0) { tmp_sgn *= X_CisAis(list_1[*tmp_off], X, isite1); if (tmp_sgn != 0) { - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off][0], &one); } } }/*double complex child_CisAisCjtAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -729,21 +708,19 @@ void child_CisAjtCkuAku_element( ) { int tmp_sgn; double complex dmv; + int one = 1; tmp_sgn = X_CisAis(list_1[j], X, isite3); if (tmp_sgn != 0) { tmp_sgn *= X_CisAjt(list_1[j], X, isite1, isite2, Asum, Adiff, tmp_off); if (tmp_sgn != 0) { - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off][0], &one); } } }/*double complex child_CisAjtCkuAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -765,6 +742,7 @@ void child_CisAjtCkuAlv_element( ) { int tmp_sgn; long unsigned int tmp_off_1; + int one = 1; double complex dmv; tmp_sgn = X_GC_CisAjt(list_1[j], X, isite3, isite4, Bsum, Bdiff, &tmp_off_1); @@ -772,10 +750,8 @@ void child_CisAjtCkuAlv_element( if (tmp_sgn != 0) { tmp_sgn *= X_CisAjt(tmp_off_1, X, isite1, isite2, Asum, Adiff, tmp_off_2); if (tmp_sgn != 0) { - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off_2] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off_2][0], &one); } } }/*double complex child_CisAjtCkuAlv_element*/ @@ -783,7 +759,6 @@ void child_CisAjtCkuAlv_element( /** @brief Compute @f$c_{is}^\dagger c_{is} c_{is}^\dagger c_{is}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -799,19 +774,17 @@ void GC_child_CisAisCisAis_element( ) { int tmp_sgn; double complex dmv; + int one = 1; tmp_sgn = X_CisAis(j - 1, X, isite3); tmp_sgn *= X_CisAis(j - 1, X, isite1); if (tmp_sgn != 0) { - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[j] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); } }/*double complex GC_child_CisAisCisAis_element*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -830,21 +803,19 @@ void GC_child_CisAisCjtAku_element( ) { int tmp_sgn; double complex dmv; + int one = 1; tmp_sgn = X_GC_CisAjt((j - 1), X, isite3, isite4, Bsum, Bdiff, tmp_off); if (tmp_sgn != 0) { tmp_sgn *= X_CisAis(*tmp_off, X, isite1); if (tmp_sgn != 0) { - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off + 1] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); } } }/*double complex GC_child_CisAisCjtAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -863,21 +834,19 @@ void GC_child_CisAjtCkuAku_element( ) { int tmp_sgn; double complex dmv; + int one = 1; tmp_sgn = X_CisAis((j - 1), X, isite3); if (tmp_sgn != 0) { tmp_sgn *= X_GC_CisAjt((j - 1), X, isite1, isite2, Asum, Adiff, tmp_off); if (tmp_sgn != 0) { - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off + 1] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); }/*if (tmp_sgn != 0)*/ }/*if (tmp_sgn != 0)*/ }/*double complex GC_child_CisAjtCkuAku_element*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -900,15 +869,14 @@ void GC_child_CisAjtCkuAlv_element( int tmp_sgn; long unsigned int tmp_off_1; double complex dmv; + int one = 1; tmp_sgn = X_GC_CisAjt((j - 1), X, isite3, isite4, Bsum, Bdiff, &tmp_off_1); if (tmp_sgn != 0) { tmp_sgn *= X_GC_CisAjt(tmp_off_1, X, isite1, isite2, Asum, Adiff, tmp_off_2); if (tmp_sgn != 0) { - dmv = tmp_V * tmp_v1[j] * tmp_sgn; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off_2 + 1] += dmv; - } + dmv = tmp_V * tmp_sgn; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off_2 + 1][0], &one); } } }/*double complex GC_child_CisAjtCkuAlv_element*/ @@ -916,7 +884,6 @@ void GC_child_CisAjtCkuAlv_element( /** @brief Compute @f$c_{is}^\dagger@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) @@ -934,6 +901,7 @@ void GC_Cis( long unsigned int bit; int sgn, ipsgn; double complex dmv; + int one = 1; list_1_j = j - 1; @@ -953,10 +921,8 @@ void GC_Cis( #endif list_1_off = list_1_j | is1_spin; // OR *tmp_off = list_1_off; - dmv = ipsgn * sgn * tmp_v1[j]; - //if (X->Large.mode == M_MLTPLY) { // for multply - tmp_v0[list_1_off + 1] += dmv * tmp_V; - //} + dmv = ipsgn * sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } else { return 0; @@ -965,7 +931,6 @@ void GC_Cis( /** @brief Compute @f$c_{jt}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) @@ -983,6 +948,7 @@ void GC_Ajt( long unsigned int bit; int sgn, ipsgn; double complex dmv; + int one = 1; list_1_j = j - 1; @@ -1001,10 +967,8 @@ void GC_Ajt( #endif list_1_off = list_1_j ^ is1_spin; *tmp_off = list_1_off; - dmv = ipsgn * sgn * tmp_v1[j]; - //if (X->Large.mode == M_MLTPLY) { // for multply - tmp_v0[list_1_off + 1] += dmv * tmp_V; - //} + dmv = ipsgn * sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } else { return 0; diff --git a/src/mltplyMPIBoost.c b/src/mltplyMPIBoost.c index b390ef875..7cc314d70 100644 --- a/src/mltplyMPIBoost.c +++ b/src/mltplyMPIBoost.c @@ -43,7 +43,6 @@ void child_general_int_spin_MPIBoost( { #ifdef MPI - //double complex dam_pr = 0; // MPI_Status statusMPI; // int ierr; @@ -406,17 +405,6 @@ void child_general_int_spin_MPIBoost( }/* loop for iloop */ -/* - dam_pr= X_child_general_int_spin_MPIBoost - ( - matJ, X, nstate, tmp_v0, tmp_v1); - - X->Large.prdct += dam_pr; -*/ -// c_free1(arrayz, (int)pow(2.0, 16)); -// c_free1(arrayx, (int)pow(2.0, 16)); -// c_free1(arrayw, (int)pow(2.0, 16)); - free_cd_2d_allocate(vecJ); free_cd_2d_allocate(matJ); free_cd_2d_allocate(matJ2); diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index daf18bf00..59cd76173 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -37,12 +37,10 @@ void GC_child_general_hopp_MPIdouble double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr = 0; - dam_pr = X_GC_child_general_hopp_MPIdouble( + X_GC_child_general_hopp_MPIdouble( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void GC_child_general_hopp_MPIdouble*/ /** @@ -51,7 +49,7 @@ When both site1 and site2 are in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) @return fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ */ -double complex X_GC_child_general_hopp_MPIdouble( +void X_GC_child_general_hopp_MPIdouble( int org_isite1,//!<[in] @f$i_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_ispin1,//!<[in] @f$\sigma_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_isite2,//!<[in] @f$i_2@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ @@ -65,7 +63,8 @@ double complex X_GC_child_general_hopp_MPIdouble( int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans; + int one = 1; mask1 = (int)X->Def.Tpow[2 * org_isite1 + org_ispin1]; mask2 = (int)X->Def.Tpow[2 * org_isite2 + org_ispin2]; @@ -85,7 +84,7 @@ double complex X_GC_child_general_hopp_MPIdouble( trans = -(double)Fsgn * conj(tmp_trans); if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) trans = 0.0; }/*if (state1 == mask1 && state2 == 0)*/ - else return 0.0; + else return; ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, @@ -96,29 +95,14 @@ double complex X_GC_child_general_hopp_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans, X) shared(v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = trans * v1buf[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = trans * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*void GC_child_general_hopp_MPIdouble*/ /** @@ -127,7 +111,7 @@ When both site1 and site2 are in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) @return fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ */ -double complex X_child_CisAjt_MPIdouble( +void X_child_CisAjt_MPIdouble( int org_isite1,//!<[in] @f$i_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_ispin1,//!<[in] @f$\sigma_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_isite2,//!<[in] @f$i_2@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ @@ -147,6 +131,7 @@ double complex X_child_CisAjt_MPIdouble( unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; double complex trans, dmv; + int one = 1; mask1 = (int) X->Def.Tpow[2 * org_isite1 + org_ispin1]; mask2 = (int) X->Def.Tpow[2 * org_isite2 + org_ispin2]; @@ -168,7 +153,7 @@ double complex X_child_CisAjt_MPIdouble( trans = 0; } }/*if (state1 == mask1 && state2 == 0)*/ - else return 0; + else return; ierr = MPI_Sendrecv(&X->Check.idim_maxOrg, 1, MPI_UNSIGNED_LONG, origin, 0, &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, @@ -190,17 +175,11 @@ double complex X_child_CisAjt_MPIdouble( firstprivate(idim_max_buf, trans, X, list_2_1_target, list_2_2_target, list_1buf_org) \ shared(v1buf, tmp_v0) for (j = 1; j <= idim_max_buf; j++){ - dmv = trans * v1buf[j]; GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - tmp_v0[ioff] += dmv; + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC)*/ - else return 0.0; - - return 1.0; -#else - return 0.0; #endif }/*void child_CisAjt_MPIdouble*/ /** @@ -215,12 +194,10 @@ void GC_child_general_hopp_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr=0; - dam_pr=X_GC_child_general_hopp_MPIsingle( + X_GC_child_general_hopp_MPIsingle( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1 ); - X->Large.prdct += dam_pr; #endif }/*void GC_child_general_hopp_MPIsingle*/ /** @@ -230,7 +207,7 @@ void GC_child_general_hopp_MPIsingle( @author Mitsuaki Kawamura (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_general_hopp_MPIsingle( +void X_GC_child_general_hopp_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -244,7 +221,8 @@ double complex X_GC_child_general_hopp_MPIsingle( int mask2, state1, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, mask1, state1check, bit1diff, ioff; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; + int one = 1; /* Prepare index in the inter PE */ @@ -283,47 +261,25 @@ double complex X_GC_child_general_hopp_MPIsingle( bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, state1, Fsgn, ioff) \ +#pragma omp parallel default(none) private(j, dmv, state1, Fsgn, ioff) \ firstprivate(idim_max_buf, trans, X, mask1, state1check, bit1diff) shared(v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 0; j < idim_max_buf; j++) { + for (j = 0; j < idim_max_buf; j++) { - state1 = j & mask1; + state1 = j & mask1; - if (state1 == state1check) { - - SgnBit(j & bit1diff, &Fsgn); - ioff = j ^ mask1; - - dmv = (double)Fsgn * trans * v1buf[j + 1]; - tmp_v0[ioff + 1] += dmv; - dam_pr += conj(tmp_v1[ioff + 1]) * dmv; - }/*if (state1 == state1check)*/ - }/*for (j = 0; j < idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC)*/ - else { -#pragma omp for - for (j = 0; j < idim_max_buf; j++) { - - state1 = j & mask1; + if (state1 == state1check) { - if (state1 == state1check) { + SgnBit(j & bit1diff, &Fsgn); + ioff = j ^ mask1; - SgnBit(j & bit1diff, &Fsgn); - ioff = j ^ mask1; + dmv = (double)Fsgn * trans; + zaxpy_(&nstate, &dmv, &v1buf[j + 1][0], &one, &tmp_v0[ioff + 1][0], &one); + }/*if (state1 == state1check)*/ + }/*for (j = 0; j < idim_max_buf; j++)*/ - dmv = (double)Fsgn * trans * v1buf[j + 1]; - dam_pr += conj(tmp_v1[ioff + 1]) * dmv; - }/*for (j = 0; j < idim_max_buf; j++)*/ - }/*for (j = 0; j < idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC))*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*void GC_child_general_hopp_MPIsingle*/ /** @@ -338,12 +294,10 @@ void child_general_hopp_MPIdouble( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr; - dam_pr =X_child_general_hopp_MPIdouble( + X_child_general_hopp_MPIdouble( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void child_general_hopp_MPIdouble*/ /** @@ -352,7 +306,7 @@ void child_general_hopp_MPIdouble( @return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex X_child_general_hopp_MPIdouble( +void X_child_general_hopp_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -366,7 +320,8 @@ double complex X_child_general_hopp_MPIdouble( int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; + int one = 1; mask1 = (int) X->Def.Tpow[2 * org_isite1 + org_ispin1]; mask2 = (int) X->Def.Tpow[2 * org_isite2 + org_ispin2]; @@ -387,7 +342,7 @@ double complex X_child_general_hopp_MPIdouble( trans = -(double) Fsgn * conj(tmp_trans); if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) trans = 0; } - else return 0; + else return; ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, @@ -402,8 +357,7 @@ double complex X_child_general_hopp_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, Fsgn, ioff) \ +#pragma omp parallel default(none) private(j, dmv, Fsgn, ioff) \ firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -411,9 +365,7 @@ double complex X_child_general_hopp_MPIdouble( for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dmv = trans * v1buf[j]; - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC)*/ else { @@ -421,14 +373,10 @@ double complex X_child_general_hopp_MPIdouble( for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dmv = trans * v1buf[j]; - dam_pr += conj(tmp_v1[ioff]) * dmv; + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*void child_general_hopp_MPIdouble*/ /** @@ -443,12 +391,10 @@ void child_general_hopp_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr; - dam_pr =X_child_general_hopp_MPIsingle( + X_child_general_hopp_MPIsingle( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void child_general_hopp_MPIsingle*/ /** @@ -457,7 +403,7 @@ void child_general_hopp_MPIsingle( @return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex X_child_general_hopp_MPIsingle( +void X_child_general_hopp_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -471,7 +417,8 @@ double complex X_child_general_hopp_MPIsingle( int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int mask1, state1, idim_max_buf, j, state1check, bit1diff, ioff, jreal; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; + int one = 1; /* Prepare index in the inter PE */ @@ -514,53 +461,25 @@ double complex X_child_general_hopp_MPIsingle( bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, Fsgn, ioff, jreal, state1) \ +#pragma omp parallel default(none) private(j, dmv, Fsgn, ioff, jreal, state1) \ firstprivate(idim_max_buf, trans, X, mask1, state1check, bit1diff, myrank) shared(list_1, list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { + for (j = 1; j <= idim_max_buf; j++) { - jreal = list_1buf[j]; - state1 = jreal & mask1; + jreal = list_1buf[j]; + state1 = jreal & mask1; - if (state1 == state1check) { - SgnBit(jreal & bit1diff, &Fsgn); - GetOffComp(list_2_1, list_2_2, jreal ^ mask1, + if (state1 == state1check) { + SgnBit(jreal & bit1diff, &Fsgn); + GetOffComp(list_2_1, list_2_2, jreal ^ mask1, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dmv = (double)Fsgn * trans * v1buf[j]; - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*if (state1 == state1check)*/ - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC)*/ - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - - jreal = list_1buf[j]; - state1 = jreal & mask1; - - if (state1 == state1check) { - SgnBit(jreal & bit1diff, &Fsgn); - GetOffComp(list_2_1, list_2_2, jreal ^ mask1, - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); -/* - if(X->Large.mode==M_CORR){ - printf("DEBUG: myrank=%d, org=%d, bit=%d, iexchg=%d, ioff=%d, list_1=%d\n", myrank, jreal, state1, jreal ^ mask1, ioff, list_1[ioff]); - } -*/ - dmv = (double)Fsgn * trans * v1buf[j]; - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*if (state1 == state1check)*/ - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC))*/ + dmv = (double)Fsgn * trans; + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + }/*if (state1 == state1check)*/ + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*double complex child_general_hopp_MPIsingle*/ /** @@ -569,7 +488,7 @@ double complex X_child_general_hopp_MPIsingle( @return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex X_child_CisAjt_MPIsingle( +void X_child_CisAjt_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -589,6 +508,7 @@ double complex X_child_CisAjt_MPIsingle( unsigned long int mask1, state1, idim_max_buf, j, state1check, bit1diff, ioff, jreal; MPI_Status statusMPI; double complex trans, dmv; + int one = 1; /* Prepare index in the inter PE */ @@ -628,27 +548,20 @@ double complex X_child_CisAjt_MPIsingle( bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; - if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC) { #pragma omp parallel for default(none) private(j, dmv, Fsgn, ioff, jreal, state1) \ firstprivate(idim_max_buf, trans, X, mask1, state1check, bit1diff,list_2_1_target, list_2_2_target, list_1buf_org, list_1) shared(v1buf, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - jreal = list_1buf_org[j]; - state1 = jreal & mask1; - if (state1 == state1check) { - SgnBit(jreal & bit1diff, &Fsgn); - GetOffComp(list_2_1_target, list_2_2_target, jreal ^ mask1, - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - if(ioff !=0){ - dmv = (double) Fsgn * trans * v1buf[j]; - tmp_v0[ioff] += dmv; - }/*if(ioff !=0)*/ - }/*if (state1 == state1check)*/ - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC)*/ - else return 0; - - return 1; -#else - return 0.0; + for (j = 1; j <= idim_max_buf; j++) { + jreal = list_1buf_org[j]; + state1 = jreal & mask1; + if (state1 == state1check) { + SgnBit(jreal & bit1diff, &Fsgn); + GetOffComp(list_2_1_target, list_2_2_target, jreal ^ mask1, + X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); + if (ioff != 0) { + dmv = (double)Fsgn * trans; + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + }/*if(ioff !=0)*/ + }/*if (state1 == state1check)*/ + }/*for (j = 1; j <= idim_max_buf; j++)*/ #endif }/*double complex child_general_hopp_MPIsingle*/ diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 129849101..6e3d5ec86 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -258,9 +258,8 @@ int GetSgnInterAll( /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{jt}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI( +void X_GC_child_CisAisCjtAjt_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -271,66 +270,42 @@ double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0.0; int iCheck; unsigned long int tmp_ispin1; unsigned long int i_max = X->Check.idim_max; unsigned long int tmp_off, j; double complex dmv; -// MPI_Status statusMPI; + int one = 1; + // MPI_Status statusMPI; iCheck=CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long unsigned int) myrank); if(iCheck != TRUE){ return 0.0; } -#pragma omp parallel reduction(+:dam_pr) default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, nstate, tmp_v0, tmp_v1) \ +#pragma omp parallel default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, nstate, tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off, tmp_ispin1) { if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (!(X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= i_max; j++) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= i_max; j++)*/ }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/ else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) { if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3]; else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - } - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - } - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (!(X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= i_max; j++) { + if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + } + }/*for (j = 1; j <= i_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -338,9 +313,8 @@ double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI( /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI( +void X_GC_child_CisAjtCkuAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -353,7 +327,6 @@ double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0.0; unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; int iCheck, ierr, Fsgn; @@ -364,6 +337,7 @@ double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI( unsigned long int origin, tmp_off; unsigned long int org_rankbit; MPI_Status statusMPI; + int one = 1; iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long unsigned int) myrank, &origin); isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; @@ -403,20 +377,20 @@ double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI( if (CheckBit_Ajt(isite3, myrank, &tmp_off) == FALSE) return 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared(tmp_v0, tmp_v1) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off); + GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off); if (X->Large.mode != M_CORR) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off); + GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off); }/*if (X->Large.mode != M_CORR)*/ }/*End of paralle region*/ - return dam_pr; + return; }//myrank =origin else { ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, @@ -428,7 +402,7 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, org_rankbit, Adiff) \ +#pragma omp parallel default(none) private(j, dmv, tmp_off, Fsgn, org_rankbit, Adiff) \ shared(v1buf, tmp_v1, nstate, tmp_v0, myrank, origin, isite3, org_isite3, isite1, isite2, org_isite2, org_isite1) \ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4) { @@ -439,69 +413,33 @@ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isi tmp_V *= Fsgn; if (org_isite3 + 1 > X->Def.Nsite) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = tmp_V * v1buf[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = tmp_V * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ } else { //org_isite3 <= X->Def.Nsite - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (CheckBit_Ajt(isite3, j - 1, &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (CheckBit_Ajt(isite3, j - 1, &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + if (CheckBit_Ajt(isite3, j - 1, &tmp_off) == TRUE) { + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[j][0], &one); + } + }/*for (j = 1; j <= idim_max_buf; j++)*/ } }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite)*/ else { org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j] * Fsgn; - tmp_v0[tmp_off + 1] += dmv; - dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j] * Fsgn; - dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) { + dmv = tmp_V * Fsgn; + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[tmp_off + 1][0], &one); + } + }/*for (j = 1; j <= idim_max_buf; j++)*/ } }/*End of parallel region*/ }/*myrank != origin*/ - return dam_pr; + return; #else return 0.0; #endif @@ -509,9 +447,8 @@ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isi /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAisCjtAku_Hubbard_MPI( +void X_GC_child_CisAisCjtAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -524,21 +461,16 @@ double complex X_GC_child_CisAisCjtAku_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0; - dam_pr = X_GC_child_CisAjtCkuAku_Hubbard_MPI( + X_GC_child_CisAjtCkuAku_Hubbard_MPI( org_isite4, org_ispin4, org_isite3, org_ispin3, org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1); - return conj(dam_pr); -#else - return 0.0; #endif }/*double complex X_GC_child_CisAisCjtAku_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI( +void X_GC_child_CisAjtCkuAlv_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -553,7 +485,6 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0; unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; int iCheck, ierr, Fsgn; @@ -565,6 +496,7 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI( unsigned long int org_rankbit; int iFlgHermite = FALSE; MPI_Status statusMPI; + int one = 1; iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4, @@ -603,43 +535,43 @@ double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI( if (myrank == origin) { if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv //calc nis - dam_pr = X_GC_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_GC_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); //calc -nisniv - dam_pr -= X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (isite1 == isite4 && isite2 == isite3)*/ else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE //calc CisAku if (isite4 > isite1) Adiff = isite4 - isite1 * 2; else Adiff = isite1 - isite4 * 2; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ +#pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off); + GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off); //calc -CisAku njv - dam_pr -= X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4, - org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4, + org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) { //for hermite -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ +#pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) for (j = 1; j <= i_max; j++) - dam_pr += GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off); + GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off); //calc -njvCkuAis - dam_pr -= X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, - org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, + org_isite1, org_ispin1, -tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (X->Large.mode != M_CORR)*/ }/*if (isite2 == isite3)*/ else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE - dam_pr = -X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, - org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, + org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) { //for hermite - dam_pr += -X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2, - org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2, + org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (X->Large.mode != M_CORR)*/ }/*if (isite2 != isite3)*/ - return dam_pr; + return; }//myrank =origin else { ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, @@ -669,46 +601,23 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) Fsgn *= X_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == TRUE)*/ - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - dmv = tmp_V * v1buf[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - dmv = tmp_V * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ +#pragma omp parallel for default(none) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0) + for (j = 1; j <= idim_max_buf; j++) { + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ } else { org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j] * Fsgn; - tmp_v0[tmp_off + 1] += dmv; - dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j] * Fsgn; - dam_pr += conj(tmp_v1[tmp_off + 1]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ +#pragma omp parallel for default(none) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0) + for (j = 1; j <= idim_max_buf; j++) { + if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) { + dmv = tmp_V * Fsgn; + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[tmp_off + 1][0], &one); + } + }/*for (j = 1; j <= idim_max_buf; j++)*/ } }/*myrank != origin*/ - return dam_pr; + return; #else return 0.0; #endif @@ -716,9 +625,8 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) /** @brief Compute @f$c_{is}^\dagger c_{is}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAis_Hubbard_MPI( +void X_GC_child_CisAis_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_V,//!<[in] Coupling constant @@ -727,62 +635,38 @@ double complex X_GC_child_CisAis_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0.0; unsigned long int i_max = X->Check.idim_max; unsigned long int j, isite1, tmp_off; double complex dmv; -// MPI_Status statusMPI; + int one = 1; + // MPI_Status statusMPI; isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; if (org_isite1 + 1 > X->Def.Nsite) { if (CheckBit_Ajt(isite1, (unsigned long int) myrank, &tmp_off) == FALSE) return 0.0; -#pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= i_max; j++) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= i_max; j++)*/ }/*End of parallel region*/ }/*if (org_isite1 + 1 > X->Def.Nsite)*/ else { -#pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE)*/ - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE)*/ - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= i_max; j++) { + if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*if (CheckBit_Ajt(isite1, j - 1, &tmp_off) == TRUE)*/ + }/*for (j = 1; j <= i_max; j++)*/ }/*End of parallel region*/ }/*if (org_isite1 + 1 <= X->Def.Nsite)*/ - return dam_pr; + return; #else return 0.0; #endif @@ -790,9 +674,8 @@ double complex X_GC_child_CisAis_Hubbard_MPI( /** @brief Compute @f$c_{is}^\dagger c_{jt}@f$ term of grandcanonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAjt_Hubbard_MPI( +void X_GC_child_CisAjt_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -803,20 +686,19 @@ double complex X_GC_child_CisAjt_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0.0; // MPI_Status statusMPI; if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) { - dam_pr = X_GC_child_general_hopp_MPIdouble(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1); + X_GC_child_general_hopp_MPIdouble(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1); } else if (org_isite1 + 1 > X->Def.Nsite || org_isite2 + 1 > X->Def.Nsite) { - dam_pr = X_GC_child_general_hopp_MPIsingle(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1); + X_GC_child_general_hopp_MPIsingle(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1); } else { //error message will be added. exitMPI(-1); } - return dam_pr; + return; #else return 0.0; #endif @@ -824,9 +706,8 @@ double complex X_GC_child_CisAjt_Hubbard_MPI( /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{jt}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAisCjtAjt_Hubbard_MPI( +void X_child_CisAisCjtAjt_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -837,74 +718,46 @@ double complex X_child_CisAisCjtAjt_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0.0; int iCheck; unsigned long int tmp_ispin1; unsigned long int i_max = X->Check.idim_max; unsigned long int tmp_off, j; double complex dmv; -// MPI_Status statusMPI; + int one = 1; + // MPI_Status statusMPI; iCheck = CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long unsigned int) myrank); if (iCheck != TRUE) return 0.0; -#pragma omp parallel reduction(+:dam_pr) default(none) \ +#pragma omp parallel default(none) \ shared(tmp_v0, tmp_v1, list_1, org_isite1, org_ispin1, org_isite3, org_ispin3) \ firstprivate(i_max, tmp_V, X, tmp_ispin1) private(dmv, j, tmp_off) { if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= i_max; j++) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= i_max; j++)*/ }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/ else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) { if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3]; else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - } - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - } - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= i_max; j++) { + if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + } + }/*for (j = 1; j <= i_max; j++)*/ }/*if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite)*/ }/*End of parallel region*/ - return dam_pr; -#else - return 0.0; #endif }/*double complex X_child_CisAisCjtAjt_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAjtCkuAlv_Hubbard_MPI( +void X_child_CisAjtCkuAlv_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -919,7 +772,6 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0; unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; int iCheck, ierr, Fsgn; @@ -931,6 +783,7 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI( unsigned long int org_rankbit, ioff; int iFlgHermite = FALSE; MPI_Status statusMPI; + int one = 1; iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4, @@ -966,42 +819,44 @@ double complex X_child_CisAjtCkuAlv_Hubbard_MPI( if (myrank == origin) { if (isite1 == isite4 && isite2 == isite3) { // CisAjvCjvAis =Cis(1-njv)Ais=nis-nisnjv //calc nis - dam_pr = X_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_child_CisAis_Hubbard_MPI(org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); //calc -nisniv - dam_pr -= X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1); }/* if (isite1 == isite4 && isite2 == isite3)*/ else if (isite2 == isite3) { // CisAjvCjvAku= Cis(1-njv)Aku=-CisAkunjv+CisAku: j is in PE if (isite4 > isite1) Adiff = isite4 - isite1 * 2; else Adiff = isite1 - isite4 * 2; //calc CisAku -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ +#pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, nstate, tmp_v0, list_1) for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V); + CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V); //calc -CisAku njv - dam_pr -= X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4, - org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4, + org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) { //for hermite -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_off) \ +#pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V); + CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V); //calc -njvCkuAis - dam_pr -= X_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, org_isite1, org_ispin1, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_child_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, + org_isite1, org_ispin1, -tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (X->Large.mode != M_CORR)*/ }/*if (isite2 == isite3)*/ else {// CisAjtCkuAis = -CisAisCkuAjt: i is in PE - dam_pr = -X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, org_isite2, org_ispin2, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite3, org_ispin3, + org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) //for hermite: CisAkuCjtAis=-CisAisCjtAku - dam_pr = -X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2, - org_isite3, org_ispin3, tmp_V, X, nstate, tmp_v0, tmp_v1); + X_child_CisAisCjtAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite2, org_ispin2, + org_isite3, org_ispin3, -tmp_V, X, nstate, tmp_v0, tmp_v1); }/*if (isite2 != isite3)*/ - return dam_pr; + return; }//myrank =origin else { ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, @@ -1036,79 +891,44 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) Fsgn *= X_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == TRUE)*/ - dam_pr = 0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff) \ +#pragma omp parallel default(none) private(j, dmv, ioff) \ firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, nstate, tmp_v0, list_2_1, list_2_2, list_1buf) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE) - { - dmv = tmp_V * v1buf[j]; - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE) - { - dmv = tmp_V * v1buf[j]; - dam_pr += conj(tmp_v1[ioff]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= idim_max_buf; j++) { + if (GetOffComp(list_2_1, list_2_2, list_1buf[j], + X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE) + { + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + } + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ }//org_isite1+1 > X->Def.Nsite && org_isite2+1 > X->Def.Nsite // && org_isite3+1 > X->Def.Nsite && org_isite4+1 > X->Def.Nsite else { org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin; - dam_pr = 0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, tmp_off, Fsgn, ioff) \ +#pragma omp parallel default(none) private(j, dmv, tmp_off, Fsgn, ioff) \ firstprivate(myrank, idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, \ org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4) \ shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, - list_1buf[j] + org_rankbit, &tmp_off) == TRUE) - { - if (GetOffComp(list_2_1, list_2_2, tmp_off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE) - { - dmv = tmp_V * v1buf[j] * Fsgn; - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; - } - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, - list_1buf[j] + org_rankbit, &tmp_off) == TRUE) + for (j = 1; j <= idim_max_buf; j++) { + if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, + list_1buf[j] + org_rankbit, &tmp_off) == TRUE) + { + if (GetOffComp(list_2_1, list_2_2, tmp_off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE) { - if (GetOffComp(list_2_1, list_2_2, tmp_off, - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff) == TRUE) - { - dmv = tmp_V * v1buf[j] * Fsgn; - dam_pr += conj(tmp_v1[ioff]) * dmv; - } + dmv = tmp_V * Fsgn; + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + } + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ } }/*if (myrank != origin)*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1116,9 +936,8 @@ shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2) /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAjtCkuAku_Hubbard_MPI( +void X_child_CisAjtCkuAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -1131,7 +950,6 @@ double complex X_child_CisAjtCkuAku_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0.0; unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf, ioff; int iCheck, ierr, Fsgn; @@ -1142,6 +960,7 @@ double complex X_child_CisAjtCkuAku_Hubbard_MPI( unsigned long int origin, tmp_off; unsigned long int org_rankbit; MPI_Status statusMPI; + int one = 1; //printf("Deubg0-0: org_isite1=%d, org_ispin1=%d, org_isite2=%d, org_ispin2=%d, org_isite3=%d, org_ispin3=%d\n", org_isite1, org_ispin1,org_isite2, org_ispin2,org_isite3, org_ispin3); iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long unsigned int) myrank, &origin); //printf("iCheck=%d, myrank=%d, origin=%d\n", iCheck, myrank, origin); @@ -1178,20 +997,20 @@ double complex X_child_CisAjtCkuAku_Hubbard_MPI( if (myrank == origin) {// only k is in PE //for hermite -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V); + CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V); if (X->Large.mode != M_CORR) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V); + CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V); }/*if (X->Large.mode != M_CORR)*/ }/*End of parallel region*/ - return dam_pr; + return; }//myrank =origin else { ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, @@ -1208,7 +1027,7 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \ +#pragma omp parallel default(none) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \ shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2) { @@ -1220,94 +1039,46 @@ shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org tmp_V *= Fsgn; if (org_isite3 + 1 > X->Def.Nsite) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = tmp_V * v1buf[j]; - GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { - #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = tmp_V * v1buf[j]; - GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + GetOffComp(list_2_1, list_2_2, list_1buf[j], + X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*if (org_isite3 + 1 > X->Def.Nsite)*/ else { //org_isite3 <= X->Def.Nsite - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (CheckBit_Ajt(isite3, list_1buf[j], &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j]; - GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { - #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (CheckBit_Ajt(isite3, list_1buf[j], &tmp_off) == TRUE) { - //printf("calc\n"); - dmv = tmp_V * v1buf[j]; - GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dam_pr += conj(tmp_v1[ioff]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - }/*if (org_isite3 + 1 <= X->Def.Nsite)*/ - }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite)*/ - else { - org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, - list_1buf[j] + org_rankbit, &tmp_off) == TRUE) { - dmv = tmp_V * v1buf[j] * Fsgn; - GetOffComp(list_2_1, list_2_2, tmp_off, + if (CheckBit_Ajt(isite3, list_1buf[j], &tmp_off) == TRUE) { + GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); } }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { + }/*if (org_isite3 + 1 <= X->Def.Nsite)*/ + }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite)*/ + else { + org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin; #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, - list_1buf[j] + org_rankbit, &tmp_off) == TRUE) - { - dmv = tmp_V * v1buf[j] * Fsgn; - GetOffComp(list_2_1, list_2_2, tmp_off, - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dam_pr += conj(tmp_v1[ioff]) * dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ + for (j = 1; j <= idim_max_buf; j++) { + if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, + list_1buf[j] + org_rankbit, &tmp_off) == TRUE) { + dmv = tmp_V * Fsgn; + GetOffComp(list_2_1, list_2_2, tmp_off, + X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + } + }/*for (j = 1; j <= idim_max_buf; j++)*/ } }/*End of parallel region*/ }/*if (myrank != origin)*/ - return dam_pr; -#else - return 0.0; #endif }/*double complex X_child_CisAjtCkuAku_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAisCjtAku_Hubbard_MPI( +void X_child_CisAisCjtAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1320,19 +1091,13 @@ double complex X_child_CisAisCjtAku_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0; - - dam_pr = X_child_CisAjtCkuAku_Hubbard_MPI( + X_child_CisAjtCkuAku_Hubbard_MPI( org_isite4, org_ispin4, org_isite3, org_ispin3, org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1); - - return conj(dam_pr); -#else - return 0.0; #endif }/*double complex X_child_CisAisCjtAku_Hubbard_MPI*/ -double complex X_child_CisAis_Hubbard_MPI( +void X_child_CisAis_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_V,//!<[in] Coupling constant @@ -1341,10 +1106,10 @@ double complex X_child_CisAis_Hubbard_MPI( double complex **tmp_v1//!<[inout] Initial wavefunction ) { #ifdef MPI - double complex dam_pr = 0.0; unsigned long int i_max = X->Check.idim_max; unsigned long int j, isite1, tmp_off; double complex dmv; + int one = 1; // MPI_Status statusMPI; isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; @@ -1352,54 +1117,27 @@ double complex X_child_CisAis_Hubbard_MPI( if (CheckBit_Ajt(isite1, (unsigned long int) myrank, &tmp_off) == FALSE) return 0.0; -#pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - } - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= i_max; j++)*/ - } + for (j = 1; j <= i_max; j++) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= i_max; j++)*/ }/*End of parallel*/ }/*if (org_isite1 + 1 > X->Def.Nsite)*/ else { -#pragma omp parallel reduction(+:dam_pr) default(none) shared(tmp_v0, tmp_v1, list_1) \ +#pragma omp parallel default(none) shared(tmp_v0, tmp_v1, list_1) \ firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= i_max; j++) { - if (X_CisAis(list_1[j], X, isite1) != 0) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*if (X_CisAis(list_1[j], X, isite1) != 0)*/ - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { #pragma omp for - for (j = 1; j <= i_max; j++) { - if (X_CisAis(list_1[j], X, isite1) != 0) { - dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*if (X_CisAis(list_1[j], X, isite1) != 0)*/ - }/*for (j = 1; j <= i_max; j++)*/ - } + for (j = 1; j <= i_max; j++) { + if (X_CisAis(list_1[j], X, isite1) != 0) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*if (X_CisAis(list_1[j], X, isite1) != 0)*/ + }/*for (j = 1; j <= i_max; j++)*/ }/*End of parallel region*/ }/*if (org_isite1 + 1 <= X->Def.Nsite)*/ - return dam_pr; -#else - return 0.0; #endif }/*double complex X_child_CisAis_Hubbard_MPI*/ /** @@ -1409,7 +1147,7 @@ double complex X_child_CisAis_Hubbard_MPI( @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -double complex X_GC_Cis_MPI( +void X_GC_Cis_MPI( int org_isite,//!<[in] Site i int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant//!<[in] @@ -1423,7 +1161,8 @@ double complex X_GC_Cis_MPI( int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; + int one = 1; // org_isite >= Nsite mask2 = (int)Tpow[2 * org_isite + org_ispin]; @@ -1457,17 +1196,11 @@ double complex X_GC_Cis_MPI( } else return 0; - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel for default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0) for (j = 0; j < idim_max_buf; j++) { - dmv = trans * tmp_v1buf[j + 1]; - tmp_v0[j + 1] += dmv; - dam_pr += conj(tmp_v1[j + 1]) * dmv; + zaxpy_(&nstate, &trans, &tmp_v1buf[j + 1][0], &one, &tmp_v0[j + 1][0], &one); } - return (dam_pr); -#else - return 0.0; #endif }/*double complex X_GC_Cis_MPI*/ /** @@ -1477,7 +1210,7 @@ double complex X_GC_Cis_MPI( @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -double complex X_GC_Ajt_MPI( +void X_GC_Ajt_MPI( int org_isite,//!<[in] Site j int org_ispin,//!<[in] Spin t double complex tmp_trans,//!<[in] Coupling constant//!<[in] @@ -1491,7 +1224,8 @@ double complex X_GC_Ajt_MPI( int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; + int one = 1; // org_isite >= Nsite mask2 = (int)Tpow[2 * org_isite + org_ispin]; @@ -1521,15 +1255,12 @@ double complex X_GC_Ajt_MPI( else if (state2 == mask2) trans = (double)Fsgn * tmp_trans; else return 0; - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel for default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0) for (j = 0; j < idim_max_buf; j++) { - dmv = trans * tmp_v1buf[j + 1]; - tmp_v0[j + 1] += dmv; - dam_pr += conj(tmp_v1[j + 1]) * dmv; + zaxpy_(&nstate, &trans, &tmp_v1buf[j + 1][0], &one, &tmp_v0[j + 1][0], &one); } - return (dam_pr); + return; #else return 0.0; #endif @@ -1537,9 +1268,8 @@ firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0) /** @brief Compute @f$c_{is}^\dagger@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_Cis_MPI( +void X_Cis_MPI( int org_isite,//!<[in] Site i unsigned int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant @@ -1560,7 +1290,8 @@ double complex X_Cis_MPI( int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; + int one = 1; // org_isite >= Nsite mask2 = (int)Tpow[2 * org_isite + org_ispin]; @@ -1598,17 +1329,15 @@ double complex X_Cis_MPI( } else return 0; - dam_pr = 0.0; #pragma omp parallel for default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) {//idim_max_buf -> original GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); - dmv = trans * tmp_v1buf[j]; - tmp_v0[ioff] += dmv; + zaxpy_(&nstate, &trans, &tmp_v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ - return (dam_pr); + return; #else return 0.0; #endif @@ -1616,9 +1345,8 @@ shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) /** @brief Compute @f$c_{jt}@f$ term of canonical Hubbard system -@return Fragment of @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ */ -double complex X_Ajt_MPI( +void X_Ajt_MPI( int org_isite,//!<[in] Site j unsigned int org_ispin,//!<[in] Spin t double complex tmp_trans,//!<[in] Coupling constant @@ -1639,7 +1367,8 @@ double complex X_Ajt_MPI( int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; + int one = 1; // org_isite >= Nsite mask2 = (int)Tpow[2 * org_isite + org_ispin]; @@ -1676,18 +1405,13 @@ double complex X_Ajt_MPI( } else return 0; - dam_pr = 0.0; #pragma omp parallel for default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); - dmv = trans * tmp_v1buf[j]; - tmp_v0[ioff] += dmv; + zaxpy_(&nstate, &trans, &tmp_v1buf[j][0], &one, &tmp_v0[ioff][0], &one); } - return (dam_pr); -#else - return 0.0; #endif }/*double complex X_Ajt_MPI*/ diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index f55bbaca8..53cd71b75 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -39,8 +39,7 @@ void child_general_int_spin_MPIdouble( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr = 0; - dam_pr = X_child_general_int_spin_MPIdouble( + X_child_general_int_spin_MPIdouble( (int)X->Def.InterAll_OffDiagonal[i_int][0], (int)X->Def.InterAll_OffDiagonal[i_int][1], (int)X->Def.InterAll_OffDiagonal[i_int][3], (int)X->Def.InterAll_OffDiagonal[i_int][4], (int)X->Def.InterAll_OffDiagonal[i_int][5], (int)X->Def.InterAll_OffDiagonal[i_int][7], @@ -49,7 +48,6 @@ void child_general_int_spin_MPIdouble( Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ to LargeList::prdct */ - X->Large.prdct += dam_pr; #endif }/*void child_general_int_spin_MPIdouble*/ /** @@ -58,7 +56,7 @@ void child_general_int_spin_MPIdouble( @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex X_child_general_int_spin_MPIdouble( +void X_child_general_int_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -74,7 +72,8 @@ double complex X_child_general_int_spin_MPIdouble( int mask1, mask2, state1, state2, ierr, origin; unsigned long int idim_max_buf, j, ioff; MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; + int one = 1; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; @@ -104,31 +103,13 @@ double complex X_child_general_int_spin_MPIdouble( v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff) \ +#pragma omp parallel for default(none) private(j, dmv, ioff) \ firstprivate(idim_max_buf, Jint, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dmv = Jint * v1buf[j]; - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC)*/ - else { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff) \ - firstprivate(idim_max_buf, Jint, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1, list_2_2, list_1buf[j], + for (j = 1; j <= idim_max_buf; j++) { + GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dmv = Jint * v1buf[j]; - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (! (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC))*/ - return dam_pr; -#else - return 0.0; + zaxpy_(&nstate, &Jint, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ #endif }/*double complex X_child_general_int_spin_MPIdouble*/ /** @@ -137,7 +118,7 @@ double complex X_child_general_int_spin_MPIdouble( @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex X_child_general_int_spin_TotalS_MPIdouble( +void X_child_general_int_spin_TotalS_MPIdouble( int org_isite1,//!<[in] site 1 int org_isite3,//!<[in] site 3 struct BindStruct *X,//!<[inout] @@ -148,7 +129,7 @@ double complex X_child_general_int_spin_TotalS_MPIdouble( int mask1, mask2, num1_up, num2_up, ierr, origin; unsigned long int idim_max_buf, j, ioff, ibit_tmp; MPI_Status statusMPI; - double complex dmv, dam_pr; + double complex dmv; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; @@ -173,16 +154,14 @@ double complex X_child_general_int_spin_TotalS_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff) \ +#pragma omp parallel for default(none) private(j, dmv, ioff) \ firstprivate(idim_max_buf, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); dmv = 0.5 * v1buf[j]; - dam_pr += conj(tmp_v1[ioff]) * dmv; }/*for (j = 1; j <= idim_max_buf; j++)*/ - return dam_pr; + return; #else return 0.0; #endif @@ -200,9 +179,8 @@ void child_general_int_spin_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr = 0; - dam_pr = X_child_general_int_spin_MPIsingle( + X_child_general_int_spin_MPIsingle( (int)X->Def.InterAll_OffDiagonal[i_int][0], (int)X->Def.InterAll_OffDiagonal[i_int][1], (int)X->Def.InterAll_OffDiagonal[i_int][3], (int)X->Def.InterAll_OffDiagonal[i_int][4], (int)X->Def.InterAll_OffDiagonal[i_int][5], (int)X->Def.InterAll_OffDiagonal[i_int][7], @@ -211,7 +189,6 @@ void child_general_int_spin_MPIsingle( Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ to LargeList::prdct */ - X->Large.prdct += dam_pr; #endif }/*void child_general_int_spin_MPIsingle*/ /* @@ -219,7 +196,7 @@ void child_general_int_spin_MPIsingle( site 3 is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_general_int_spin_MPIsingle( +void X_child_general_int_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -235,7 +212,8 @@ double complex X_child_general_int_spin_MPIsingle( int mask2, state2, ierr, origin; unsigned long int mask1, idim_max_buf, j, ioff, state1, jreal, state1check; MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; + int one = 1; /* Prepare index in the inter PE */ @@ -273,65 +251,19 @@ double complex X_child_general_int_spin_MPIsingle( */ mask1 = X->Def.Tpow[org_isite1]; - dam_pr = 0.0; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff, jreal, state1) \ +#pragma omp parallel for default(none) private(j, dmv, ioff, jreal, state1) \ firstprivate(idim_max_buf, Jint, X, mask1, state1check, org_isite1) \ shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - - jreal = list_1buf[j]; - - state1 = (jreal & mask1) / mask1; - if (state1 == state1check) { - GetOffComp(list_2_1, list_2_2, jreal ^ mask1, - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); + for (j = 1; j <= idim_max_buf; j++) { + jreal = list_1buf[j]; - dmv = Jint * v1buf[j]; - tmp_v0[ioff] += dmv; - dam_pr += conj(tmp_v1[ioff]) * dmv; - } + state1 = (jreal & mask1) / mask1; + if (state1 == state1check) { + GetOffComp(list_2_1, list_2_2, jreal ^ mask1, + X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); + zaxpy_(&nstate, &Jint, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); } } - else if (X->Large.mode == M_TOTALS) { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff, jreal, state1) \ -firstprivate(idim_max_buf, Jint, X, mask1, state1check, org_isite1) \ -shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - - jreal = list_1buf[j]; - - state1 = (jreal & mask1) / mask1; - if (state1 == state1check) { - GetOffComp(list_2_1, list_2_2, jreal ^ mask1, - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - - dmv = Jint * v1buf[j]; - dmv = 0.5 * v1buf[j]; - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*if (state1 == state1check)*/ - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_TOTALS)*/ - else { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv, ioff, jreal, state1) \ -firstprivate(idim_max_buf, Jint, X, mask1, state1check, org_isite1) \ -shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - - jreal = list_1buf[j]; - - state1 = (jreal & mask1) / mask1; - if (state1 == state1check) { - GetOffComp(list_2_1, list_2_2, jreal ^ mask1, - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dmv = Jint * v1buf[j]; - dam_pr += conj(tmp_v1[ioff]) * dmv; - }/*if (state1 == state1check)*/ - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - return dam_pr; -#else - return 0.0; #endif }/*double complex X_child_general_int_spin_MPIsingle*/ /** @@ -392,31 +324,29 @@ void GC_child_general_int_GeneralSpin_MPIdouble( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr; // MPI_Status statusMPI; if (X->Def.InterAll_OffDiagonal[i_int][1] == X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] != X->Def.InterAll_OffDiagonal[i_int][7]) { - dam_pr = X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( + X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else if (X->Def.InterAll_OffDiagonal[i_int][1] != X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] == X->Def.InterAll_OffDiagonal[i_int][7]) { - dam_pr = X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( + X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else { - dam_pr = X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( + X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } - X->Large.prdct += dam_pr; #endif }/*void GC_child_general_int_spin_MPIdouble*/ /** @@ -431,31 +361,29 @@ void GC_child_general_int_GeneralSpin_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr; if (X->Def.InterAll_OffDiagonal[i_int][1] == X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] != X->Def.InterAll_OffDiagonal[i_int][7]) { - dam_pr = X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( + X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else if (X->Def.InterAll_OffDiagonal[i_int][1] != X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] == X->Def.InterAll_OffDiagonal[i_int][7]) { - dam_pr = X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( + X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } else { - dam_pr = X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( + X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } - X->Large.prdct += dam_pr; #endif }/*void GC_child_general_int_spin_MPIsingle*/ /** @@ -469,13 +397,11 @@ void child_general_int_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ - double complex dam_pr; - dam_pr = X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( + X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; }/*void GC_child_general_int_spin_MPIdouble*/ /** @@ -489,13 +415,11 @@ void child_general_int_GeneralSpin_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ - double complex dam_pr; - dam_pr = X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( + X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; }/*void GC_child_general_int_spin_MPIsingle*/ diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index d5258598d..7889e685b 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -83,13 +83,11 @@ void GC_child_CisAitCiuAiv_spin_MPIdouble( double complex **tmp_v1 /**< [in] v0 = H v1*/) { #ifdef MPI - double complex dam_pr; - dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIdouble( + X_GC_child_CisAitCiuAiv_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int],X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @@ -100,7 +98,7 @@ void GC_child_CisAitCiuAiv_spin_MPIdouble( @author Kazuyoshi Yoshimi (The University of Tokyo) @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble( +void X_GC_child_CisAitCiuAiv_spin_MPIdouble( int org_isite1,//!<[in] site i int org_ispin1,//!<[in] spin s int org_ispin2,//!<[in] spin t @@ -116,7 +114,7 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble( int mask1, mask2, state1, state2, ierr, origin; unsigned long int idim_max_buf, j; MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; @@ -125,8 +123,8 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble( } else { if (org_ispin1 == org_ispin4 && org_ispin2 == org_ispin3) { //CisAitCitAis=CisAis - dam_pr = X_GC_child_CisAis_spin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, nstate, tmp_v0, tmp_v1); - return (dam_pr); + X_GC_child_CisAis_spin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, nstate, tmp_v0, tmp_v1); + return; } else { //CisAitCisAit=0 return 0.0; @@ -158,29 +156,15 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel default(none) private(j, dmv) \ firstprivate(idim_max_buf, Jint, X) shared(v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + dmv = Jint * v1buf[j]; + tmp_v0[j] += dmv; + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ - return dam_pr; -#else - return 0.0; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @@ -195,12 +179,10 @@ void GC_child_CisAisCjuAjv_spin_MPIdouble( double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ #ifdef MPI - double complex dam_pr; - dam_pr = X_GC_child_CisAisCjuAjv_spin_MPIdouble( + X_GC_child_CisAisCjuAjv_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @@ -209,7 +191,7 @@ void GC_child_CisAisCjuAjv_spin_MPIdouble( @return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble( +void X_GC_child_CisAisCjuAjv_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -225,7 +207,7 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble( long int origin, num1; unsigned long int idim_max_buf, j; MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4) {//CisAisCitAis return 0.0; @@ -256,27 +238,12 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel for default(none) private(j, dmv) \ firstprivate(idim_max_buf, Jint, X) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - } + for (j = 1; j <= idim_max_buf; j++) { + dmv = Jint * v1buf[j]; + tmp_v0[j] += dmv; } - else { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) \ - firstprivate(idim_max_buf, Jint, X) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - } - } - return (dam_pr); -#else - return 0.0; #endif }/*double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble*/ /** @@ -292,12 +259,10 @@ void GC_child_CisAitCjuAju_spin_MPIdouble( ) { #ifdef MPI - double complex dam_pr; - dam_pr = X_GC_child_CisAitCjuAju_spin_MPIdouble( + X_GC_child_CisAitCjuAju_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @@ -306,7 +271,7 @@ void GC_child_CisAitCjuAju_spin_MPIdouble( @return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAitCjuAju_spin_MPIdouble( +void X_GC_child_CisAitCjuAju_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -322,7 +287,7 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIdouble( long int origin; unsigned long int idim_max_buf, j; MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) {//cisaitcisais return 0.0; @@ -363,29 +328,15 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel default(none) private(j, dmv) \ firstprivate(idim_max_buf, Jint, X) shared(v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + dmv = Jint * v1buf[j]; + tmp_v0[j] += dmv; + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble*/ /** @@ -394,7 +345,7 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIdouble( @return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAisCjuAju_spin_MPIdouble( +void X_GC_child_CisAisCjuAju_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -408,35 +359,21 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIdouble( long unsigned int mask1, mask2, num1,num2; unsigned long int j; // MPI_Status statusMPI; - double complex dmv, dam_pr; + double complex dmv; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, mask1, org_ispin1); num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, mask2, org_ispin3); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel default(none) private(j, dmv) \ firstprivate(tmp_J, X, num1, num2) shared(tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = num1*num2*tmp_v1[j] * tmp_J; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++) */ - } - else { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = num1 * num2 * tmp_v1[j] * tmp_J; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + for (j = 1; j <= X->Check.idim_max; j++) { + dmv = num1 * num2*tmp_v1[j] * tmp_J; + tmp_v0[j] += dmv; + }/*for (j = 1; j <= X->Check.idim_max; j++) */ }/*End of parallel region*/ - return(dam_pr); -#else - return 0.0; #endif }/*double complex X_GC_child_CisAisCjuAju_spin_MPIdouble*/ /** @@ -445,7 +382,7 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIdouble( @return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAisCjuAju_spin_MPIsingle( +void X_GC_child_CisAisCjuAju_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -459,37 +396,22 @@ double complex X_GC_child_CisAisCjuAju_spin_MPIsingle( long unsigned int mask1, mask2, num1, num2; unsigned long int j; // MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; Jint = tmp_J; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; num2 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, mask2, org_ispin3); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, num1) \ +#pragma omp parallel default(none) private(j, dmv, num1) \ firstprivate(Jint, X, num2, mask1, org_ispin1) shared(tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = X_SpinGC_CisAis(j, X, mask1, org_ispin1); - dmv = Jint * num1 * num2 * tmp_v1[j]; - tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = X_SpinGC_CisAis(j, X, mask1, org_ispin1); - dmv = Jint * num1 * num2 * tmp_v1[j]; - dam_pr += conj(tmp_v1[j]) * dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + for (j = 1; j <= X->Check.idim_max; j++) { + num1 = X_SpinGC_CisAis(j, X, mask1, org_ispin1); + dmv = Jint * num1 * num2 * tmp_v1[j]; + tmp_v0[j] += dmv; + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*double complex X_GC_child_CisAisCjuAju_spin_MPIdouble*/ /** @@ -504,13 +426,11 @@ void GC_child_CisAitCiuAiv_spin_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr; - dam_pr =X_GC_child_CisAitCiuAiv_spin_MPIsingle( + X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @@ -519,7 +439,7 @@ void GC_child_CisAitCiuAiv_spin_MPIsingle( @return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle( +void X_GC_child_CisAitCiuAiv_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -535,7 +455,7 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle( int mask2, state2, ierr, origin; unsigned long int mask1, idim_max_buf, j, ioff, state1, state1check; MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; /* Prepare index in the inter PE */ @@ -569,35 +489,18 @@ double complex X_GC_child_CisAitCiuAiv_spin_MPIsingle( */ mask1 = X->Def.Tpow[org_isite1]; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, state1, ioff) \ +#pragma omp parallel default(none) private(j, dmv, state1, ioff) \ firstprivate(idim_max_buf, Jint, X, state1check, mask1) shared(v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 0; j < idim_max_buf; j++) { - state1 = X_SpinGC_CisAit(j + 1, X, mask1, state1check, &ioff); - if (state1 != 0) { - dmv = Jint * v1buf[j + 1]; - tmp_v0[ioff + 1] += dmv; - dam_pr += conj(tmp_v1[ioff + 1]) * dmv; - }/*if (state1 != 0)*/ - }/*for (j = 0; j < idim_max_buf; j++)*/ - } - else { -#pragma omp for - for (j = 0; j < idim_max_buf; j++) { - state1 = X_SpinGC_CisAit(j + 1, X, mask1, state1check, &ioff); - if (state1 != 0) { - dmv = Jint * v1buf[j + 1]; - dam_pr += conj(tmp_v1[ioff + 1]) * dmv; - }/*if (state1 != 0)*/ - }/*for (j = 0; j < idim_max_buf; j++)*/ - } + for (j = 0; j < idim_max_buf; j++) { + state1 = X_SpinGC_CisAit(j + 1, X, mask1, state1check, &ioff); + if (state1 != 0) { + dmv = Jint * v1buf[j + 1]; + tmp_v0[ioff + 1] += dmv; + }/*if (state1 != 0)*/ + }/*for (j = 0; j < idim_max_buf; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @@ -612,12 +515,10 @@ void GC_child_CisAisCjuAjv_spin_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr; - dam_pr =X_GC_child_CisAisCjuAjv_spin_MPIsingle( + X_GC_child_CisAisCjuAjv_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAisCjuAjv_spin_MPIsingle*/ /** @@ -626,7 +527,7 @@ void GC_child_CisAisCjuAjv_spin_MPIsingle( @return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle( +void X_GC_child_CisAisCjuAjv_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 2 int org_isite3,//!<[in] Site 1 @@ -641,7 +542,7 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle( int mask2, state2, ierr, origin; unsigned long int mask1, idim_max_buf, j, state1, state1check; MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; /* Prepare index in the inter PE */ @@ -674,35 +575,18 @@ double complex X_GC_child_CisAisCjuAjv_spin_MPIsingle( */ mask1 = X->Def.Tpow[org_isite1]; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, state1) \ +#pragma omp parallel default(none) private(j, dmv, state1) \ firstprivate(idim_max_buf, Jint, X, state1check, mask1) shared(v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 0; j < idim_max_buf; j++) { - state1 = (j & mask1) / mask1; - if (state1 == state1check) { - dmv = Jint * v1buf[j + 1]; - tmp_v0[j + 1] += dmv; - dam_pr += conj(tmp_v1[j + 1]) * dmv; - }/*if (state1 == state1check)*/ - }/*for (j = 0; j < idim_max_buf; j++)*/ - } - else { #pragma omp for - for (j = 0; j < idim_max_buf; j++) { - state1 = (j & mask1) / mask1; - if (state1 == state1check) { - dmv = Jint * v1buf[j + 1]; - dam_pr += conj(tmp_v1[j + 1]) * dmv; - }/*if (state1 == state1check)*/ - }/*for (j = 0; j < idim_max_buf; j++)*/ - } + for (j = 0; j < idim_max_buf; j++) { + state1 = (j & mask1) / mask1; + if (state1 == state1check) { + dmv = Jint * v1buf[j + 1]; + tmp_v0[j + 1] += dmv; + }/*if (state1 == state1check)*/ + }/*for (j = 0; j < idim_max_buf; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @@ -717,12 +601,10 @@ void GC_child_CisAitCjuAju_spin_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ #ifdef MPI - double complex dam_pr; - dam_pr =X_GC_child_CisAitCjuAju_spin_MPIsingle( + X_GC_child_CisAitCjuAju_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; #endif }/*void GC_child_CisAisCjuAjv_spin_MPIsingle*/ /** @@ -731,7 +613,7 @@ void GC_child_CisAitCjuAju_spin_MPIsingle( @return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAitCjuAju_spin_MPIsingle( +void X_GC_child_CisAitCjuAju_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -746,7 +628,7 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIsingle( int mask2, state2; unsigned long int mask1, j, ioff, state1, state1check; //MPI_Status statusMPI; - double complex Jint, dmv, dam_pr; + double complex Jint, dmv; /* Prepare index in the inter PE */ @@ -763,59 +645,23 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIsingle( mask1 = (int)X->Def.Tpow[org_isite1]; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv, state1, ioff) \ +#pragma omp parallel default(none) private(j, dmv, state1, ioff) \ firstprivate(Jint, X, state1check, mask1) shared(tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 0; j < X->Check.idim_max; j++) { - - state1 = (j & mask1) / mask1; - ioff = j ^ mask1; - if (state1 == state1check) { - dmv = Jint * tmp_v1[j + 1]; - } - else { - dmv = conj(Jint) * tmp_v1[j + 1]; - } - tmp_v0[ioff + 1] += dmv; - dam_pr += conj(tmp_v1[ioff + 1]) * dmv; - }/*for (j = 0; j < X->Check.idim_max; j++)*/ - } - else if (X->Large.mode == M_CORR) { #pragma omp for - for (j = 0; j < X->Check.idim_max; j++) { + for (j = 0; j < X->Check.idim_max; j++) { - state1 = (j & mask1) / mask1; - ioff = j ^ mask1; - if (state1 == state1check) { - dmv = Jint * tmp_v1[j + 1]; - } - else { - dmv = 0.0; - } - dam_pr += conj(tmp_v1[ioff + 1]) * dmv; - }/*for (j = 0; j < X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 0; j < X->Check.idim_max; j++) { - state1 = (j & mask1) / mask1; - ioff = j ^ mask1; - if (state1 == state1check) { - dmv = Jint * tmp_v1[j + 1]; - } - else { - dmv = conj(Jint) * tmp_v1[j + 1]; - } - dam_pr += conj(tmp_v1[ioff + 1]) * dmv; - }/*for (j = 0; j < X->Check.idim_max; j++)*/ - } + state1 = (j & mask1) / mask1; + ioff = j ^ mask1; + if (state1 == state1check) { + dmv = Jint * tmp_v1[j + 1]; + } + else { + dmv = conj(Jint) * tmp_v1[j + 1]; + } + tmp_v0[ioff + 1] += dmv; + }/*for (j = 0; j < X->Check.idim_max; j++)*/ }/*End of parallel region*/ - return (dam_pr); -#else - return 0.0; #endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @@ -823,7 +669,7 @@ double complex X_GC_child_CisAitCjuAju_spin_MPIsingle( When both site1 and site3 are in the inter process region. @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( +void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -837,7 +683,7 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( #ifdef MPI unsigned long int off, j; int origin, ierr; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; int ihermite = TRUE; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4) {//cisaisciuais=0 && cisaiucisais=0 @@ -868,8 +714,7 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) \ private(j, dmv) shared (tmp_v0, tmp_v1, v1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -877,18 +722,16 @@ private(j, dmv) shared (tmp_v0, tmp_v1, v1buf) for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; } } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; } } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -898,7 +741,7 @@ private(j, dmv) shared (tmp_v0, tmp_v1, v1buf) When both site1 and site3 are in the inter process region. @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( +void X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -912,7 +755,7 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( #ifdef MPI unsigned long int j, off; int origin, ierr; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) {//cisaitcisais=0 && cisaiscitais=0 @@ -944,8 +787,7 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) private(j, dmv) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ shared (tmp_v0, tmp_v1, v1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -953,18 +795,16 @@ shared (tmp_v0, tmp_v1, v1buf) for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; } } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; } } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -974,7 +814,7 @@ shared (tmp_v0, tmp_v1, v1buf) grandcanonical general spin system when both site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( +void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -989,15 +829,15 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( #ifdef MPI unsigned long int tmp_off, off, j; int origin, ierr, ihermite; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; ihermite = TRUE; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4 && org_ispin2 == org_ispin3) { //cisaitcitais=cisais && cisaitcitais =cisais - dam_pr = X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, nstate, tmp_v0, tmp_v1); - return (dam_pr); + X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1, org_ispin1, tmp_J, X, nstate, tmp_v0, tmp_v1); + return; } //cisaitcisait if (GetOffCompGeneralSpin((unsigned long int) myrank, org_isite1 + 1, org_ispin1, org_ispin2, @@ -1035,8 +875,7 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) private(j, dmv) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ shared (tmp_v0, tmp_v1, v1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1044,18 +883,16 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; } } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; } } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1065,7 +902,7 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( grandcanonical general spin system when both site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( +void X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1077,7 +914,7 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( ) { #ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; //MPI_Status statusMPI; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); @@ -1091,8 +928,7 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( } else return 0.0; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) private(j, dmv) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ shared (tmp_v0, tmp_v1) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1100,18 +936,16 @@ shared (tmp_v0, tmp_v1) for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1121,7 +955,7 @@ shared (tmp_v0, tmp_v1) grandcanonical general spin system when both site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAit_GeneralSpin_MPIdouble( +void X_GC_child_CisAit_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1133,7 +967,7 @@ double complex X_GC_child_CisAit_GeneralSpin_MPIdouble( #ifdef MPI unsigned long int off, j; int origin, ierr; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; if (GetOffCompGeneralSpin((unsigned long int) myrank, org_isite1 + 1, org_ispin1, org_ispin2, @@ -1155,8 +989,7 @@ double complex X_GC_child_CisAit_GeneralSpin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) private(j, dmv) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ shared (tmp_v0, tmp_v1, v1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1164,18 +997,16 @@ shared (tmp_v0, tmp_v1, v1buf) for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1185,7 +1016,7 @@ shared (tmp_v0, tmp_v1, v1buf) grandcanonical general spin system when both site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAis_GeneralSpin_MPIdouble( +void X_GC_child_CisAis_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant @@ -1195,7 +1026,7 @@ double complex X_GC_child_CisAis_GeneralSpin_MPIdouble( ) { #ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; //MPI_Status statusMPI; num1 = BitCheckGeneral((unsigned long int) myrank, @@ -1205,8 +1036,7 @@ double complex X_GC_child_CisAis_GeneralSpin_MPIdouble( } else return 0.0; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) private(j, dmv) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ shared (tmp_v0, tmp_v1) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1214,18 +1044,16 @@ shared (tmp_v0, tmp_v1) for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1235,7 +1063,7 @@ shared (tmp_v0, tmp_v1) grandcanonical general spin system when both site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_AisCis_GeneralSpin_MPIdouble( +void X_GC_child_AisCis_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant @@ -1245,7 +1073,7 @@ double complex X_GC_child_AisCis_GeneralSpin_MPIdouble( ) { #ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; //MPI_Status statusMPI; num1 = BitCheckGeneral((unsigned long int) myrank, @@ -1255,8 +1083,7 @@ double complex X_GC_child_AisCis_GeneralSpin_MPIdouble( } else return 0.0; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) private(j, dmv) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ shared (tmp_v0, tmp_v1) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1264,18 +1091,16 @@ shared (tmp_v0, tmp_v1) for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of Parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1285,7 +1110,7 @@ shared (tmp_v0, tmp_v1) canonical general spin system when both site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAit_GeneralSpin_MPIdouble( +void X_child_CisAit_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1359,7 +1184,7 @@ shared (tmp_v0, tmp_v1, v1buf) grandcanonical general spin system when one of these site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( +void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1373,7 +1198,7 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( #ifdef MPI unsigned long int off, j, num1; int origin, ierr, isite, IniSpin; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; if (GetOffCompGeneralSpin((unsigned long int)myrank, @@ -1402,8 +1227,7 @@ double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V, isite, IniSpin) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V, isite, IniSpin) \ private(j, dmv, num1) shared (tmp_v0, tmp_v1, v1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1413,7 +1237,6 @@ private(j, dmv, num1) shared (tmp_v0, tmp_v1, v1buf) if (num1 != 0) { dmv = v1buf[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*if (num1 != 0)*/ }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } @@ -1423,12 +1246,11 @@ private(j, dmv, num1) shared (tmp_v0, tmp_v1, v1buf) num1 = BitCheckGeneral(j - 1, isite, IniSpin, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; }/*if (num1 != 0)*/ }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1438,7 +1260,7 @@ private(j, dmv, num1) shared (tmp_v0, tmp_v1, v1buf) grandcanonical general spin system when one of these site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( +void X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1452,7 +1274,7 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( #ifdef MPI unsigned long int num1, j, off; int isite, IniSpin, FinSpin; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; //MPI_Status statusMPI; num1 = BitCheckGeneral((unsigned long int)myrank, @@ -1465,8 +1287,7 @@ double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( } else return 0.0; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, num1, off) \ shared (tmp_v0, tmp_v1, v1buf) { @@ -1478,14 +1299,12 @@ shared (tmp_v0, tmp_v1, v1buf) { dmv = tmp_v1[j] * tmp_V; tmp_v0[off + 1] += dmv; - dam_pr += conj(tmp_v1[off + 1]) * dmv; } else if (GetOffCompGeneralSpin(j - 1, isite, FinSpin, IniSpin, &off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) { dmv = tmp_v1[j] * conj(tmp_V); tmp_v0[off + 1] += dmv; - dam_pr += conj(tmp_v1[off + 1]) * dmv; } }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } @@ -1496,12 +1315,11 @@ shared (tmp_v0, tmp_v1, v1buf) X->Def.SiteToBit, X->Def.Tpow) == TRUE) { dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[off + 1]) * dmv; } }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1511,7 +1329,7 @@ shared (tmp_v0, tmp_v1, v1buf) grandcanonical general spin system when one of these site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( +void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1526,7 +1344,7 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( #ifdef MPI unsigned long int off, j; int origin, ierr, isite, IniSpin, FinSpin; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; if (GetOffCompGeneralSpin((unsigned long int)myrank, @@ -1557,8 +1375,7 @@ double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, off) shared (tmp_v0, tmp_v1, v1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1569,7 +1386,6 @@ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, off) shared (tmp { dmv = v1buf[j] * tmp_V; tmp_v0[off + 1] += dmv; - dam_pr += conj(tmp_v1[off + 1]) * dmv; } }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } @@ -1580,12 +1396,11 @@ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, off) shared (tmp X->Def.SiteToBit, X->Def.Tpow) == TRUE) { dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[off + 1]) * dmv; } }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1595,7 +1410,7 @@ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, off) shared (tmp grandcanonical general spin system when one of these site is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( +void X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1607,7 +1422,7 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( ){ #ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; //MPI_Status statusMPI; num1 = BitCheckGeneral((unsigned long int)myrank, org_isite3+1, org_ispin3, X->Def.SiteToBit, X->Def.Tpow); @@ -1616,8 +1431,7 @@ double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( } else return 0.0; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp_v0, tmp_v1) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1627,7 +1441,6 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp dmv = tmp_v1[j] * tmp_V * num1; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { @@ -1635,11 +1448,10 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp for (j = 1; j <= X->Check.idim_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); dmv = tmp_v1[j] * tmp_V * num1; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1649,7 +1461,7 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp canonical general spin system when both sites are in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( +void X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1664,7 +1476,7 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( #ifdef MPI unsigned long int tmp_off, off, j, idim_max_buf; int origin, ierr; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; int ihermite=TRUE; @@ -1713,8 +1525,7 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V, idim_max_buf) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V, idim_max_buf) \ private(j, dmv, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1723,7 +1534,6 @@ private(j, dmv, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) ConvertToList1GeneralSpin(list_1buf[j], X->Check.sdim, &off); dmv = v1buf[j] * tmp_V; tmp_v0[off] += dmv; - dam_pr += conj(tmp_v1[off]) * dmv; }/*for (j = 1; j <= idim_max_buf; j++)*/ } else { @@ -1731,11 +1541,10 @@ private(j, dmv, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) for (j = 1; j <= idim_max_buf; j++) { ConvertToList1GeneralSpin(list_1buf[j], X->Check.sdim, &off); dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[off]) * dmv; }/*for (j = 1; j <= idim_max_buf; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1745,7 +1554,7 @@ private(j, dmv, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) canonical general spin system when both sites are in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble( +void X_child_CisAisCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1757,7 +1566,7 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble( ) { #ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) { num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); @@ -1784,8 +1593,7 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble( return 0.0; } } - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) firstprivate(X, tmp_V) private(j, dmv) \ +#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ shared (tmp_v0, tmp_v1) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1793,18 +1601,16 @@ shared (tmp_v0, tmp_v1) for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = tmp_v1[j] * tmp_V; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1814,7 +1620,7 @@ shared (tmp_v0, tmp_v1) canonical general spin system when one of these sites is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle( +void X_child_CisAisCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1827,7 +1633,7 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle( { #ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; //MPI_Status statusMPI; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite3 + 1, org_ispin3, X->Def.SiteToBit, X->Def.Tpow); @@ -1836,8 +1642,7 @@ double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle( } else return 0.0; - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp_v0, tmp_v1, list_1) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -1847,7 +1652,6 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp dmv = tmp_v1[j] * tmp_V * num1; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { @@ -1856,11 +1660,10 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp num1 = BitCheckGeneral(list_1[j], org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); dmv = tmp_v1[j] * tmp_V * num1; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1870,7 +1673,7 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp canonical general spin system when one of these sites is in the inter process region @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ -double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( +void X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1885,7 +1688,7 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( #ifdef MPI unsigned long int tmp_off, off, j, idim_max_buf; int origin, ierr, isite, IniSpin, FinSpin; - double complex tmp_V, dmv, dam_pr; + double complex tmp_V, dmv; MPI_Status statusMPI; if (GetOffCompGeneralSpin((unsigned long int)myrank, @@ -1923,8 +1726,7 @@ double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ firstprivate(X, tmp_V, idim_max_buf, IniSpin, FinSpin, isite) \ private(j, dmv, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) { @@ -1938,7 +1740,6 @@ private(j, dmv, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) ConvertToList1GeneralSpin(tmp_off, X->Check.sdim, &off); dmv = v1buf[j] * tmp_V; tmp_v0[off] += dmv; - dam_pr += conj(tmp_v1[off]) * dmv; } }/*for (j = 1; j <= idim_max_buf; j++)*/ } @@ -1951,12 +1752,11 @@ private(j, dmv, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) { ConvertToList1GeneralSpin(tmp_off, X->Check.sdim, &off); dmv = v1buf[j] * tmp_V; - dam_pr += conj(tmp_v1[off]) * dmv; } }/*for (j = 1; j <= idim_max_buf; j++)*/ } }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -1967,7 +1767,7 @@ private(j, dmv, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAit_spin_MPIdouble( +void X_GC_child_CisAit_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1980,7 +1780,7 @@ double complex X_GC_child_CisAit_spin_MPIdouble( int mask1, state1, ierr, origin; unsigned long int idim_max_buf, j; MPI_Status statusMPI; - double complex trans, dmv, dam_pr; + double complex trans, dmv; mask1 = (int)X->Def.Tpow[org_isite1]; origin = myrank ^ mask1; @@ -2010,8 +1810,7 @@ double complex X_GC_child_CisAit_spin_MPIdouble( MPI_COMM_WORLD, &statusMPI); if (ierr != 0) exitMPI(-1); - dam_pr = 0.0; -#pragma omp parallel default(none) reduction(+:dam_pr) private(j, dmv) \ +#pragma omp parallel default(none) private(j, dmv) \ firstprivate(idim_max_buf, trans, X) shared(v1buf, tmp_v1, tmp_v0) { if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { @@ -2019,18 +1818,16 @@ firstprivate(idim_max_buf, trans, X) shared(v1buf, tmp_v1, tmp_v0) for (j = 1; j <= X->Check.idim_max; j++) { dmv = trans * v1buf[j]; tmp_v0[j] += dmv; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { dmv = trans * v1buf[j]; - dam_pr += conj(tmp_v1[j]) * dmv; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*End of parallel region*/ - return (dam_pr); + return; #else return 0.0; #endif @@ -2041,7 +1838,7 @@ firstprivate(idim_max_buf, trans, X) shared(v1buf, tmp_v1, tmp_v0) @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_child_CisAit_spin_MPIdouble( +void X_child_CisAit_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant @@ -2119,7 +1916,7 @@ shared(v1buf, tmp_v0) @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_CisAis_spin_MPIdouble( +void X_GC_child_CisAis_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant @@ -2131,12 +1928,10 @@ double complex X_GC_child_CisAis_spin_MPIdouble( long unsigned int j; int mask1; int ibit1; - double complex dam_pr; mask1 = (int)X->Def.Tpow[org_isite1]; ibit1 = (((unsigned long int)myrank& mask1)/mask1)^(1-org_ispin1); - dam_pr = 0.0; -#pragma omp parallel reduction(+:dam_pr)default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ +#pragma omp parallel default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ firstprivate(X, tmp_trans) private(j) { if (ibit1 != 0) { @@ -2144,18 +1939,16 @@ double complex X_GC_child_CisAis_spin_MPIdouble( #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { tmp_v0[j] += tmp_v1[j] * tmp_trans; - dam_pr += tmp_trans * conj(tmp_v1[j]) * tmp_v1[j]; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } else { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { - dam_pr += tmp_trans * conj(tmp_v1[j]) * tmp_v1[j]; }/*for (j = 1; j <= X->Check.idim_max; j++)*/ } }/*if (ibit1 != 0)*/ }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif @@ -2166,7 +1959,7 @@ double complex X_GC_child_CisAis_spin_MPIdouble( @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex X_GC_child_AisCis_spin_MPIdouble( +void X_GC_child_AisCis_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant @@ -2178,31 +1971,20 @@ double complex X_GC_child_AisCis_spin_MPIdouble( long unsigned int j; int mask1; int ibit1; - double complex dam_pr; mask1 = (int)X->Def.Tpow[org_isite1]; ibit1 = (((unsigned long int)myrank& mask1) / mask1) ^ (1 - org_ispin1); - dam_pr = 0.0; -#pragma omp parallel reduction(+:dam_pr)default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ +#pragma omp parallel default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ firstprivate(X, tmp_trans) private(j) { if (ibit1 == 0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - tmp_v0[j] += tmp_v1[j] * tmp_trans; - dam_pr += tmp_trans * conj(tmp_v1[j]) * tmp_v1[j]; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dam_pr += tmp_trans * conj(tmp_v1[j]) * tmp_v1[j]; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + for (j = 1; j <= X->Check.idim_max; j++) { + tmp_v0[j] += tmp_v1[j] * tmp_trans; + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*if (ibit1 == 0)*/ }/*End of parallel region*/ - return dam_pr; + return; #else return 0.0; #endif diff --git a/src/mltplySpin.c b/src/mltplySpin.c index c65f08621..e9da2fb21 100644 --- a/src/mltplySpin.c +++ b/src/mltplySpin.c @@ -198,7 +198,6 @@ int mltplyHalfSpin( long unsigned int isite1, isite2, sigma1, sigma2; long unsigned int sigma3, sigma4; - double complex dam_pr; /*[s] For InterAll */ double complex tmp_V; /*[e] For InterAll */ @@ -240,8 +239,7 @@ int mltplyHalfSpin( sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; child_general_int_spin_GetInfo(X, isite1, isite2, sigma1, sigma2, sigma3, sigma4, tmp_V); - dam_pr = child_general_int_spin(tmp_v0, tmp_v1, X); - X->Large.prdct += dam_pr; + child_general_int_spin(nstate, tmp_v0, tmp_v1, X); }/*for (ihermite = 0; ihermite<2; ihermite++)*/ StopTimer(414); } @@ -256,7 +254,7 @@ int mltplyHalfSpin( if (X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite && X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(421); - dam_pr = X_child_general_int_spin_MPIdouble( + X_child_general_int_spin_MPIdouble( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -264,7 +262,7 @@ int mltplyHalfSpin( } else if (X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(422); - dam_pr = X_child_general_int_spin_MPIsingle( + X_child_general_int_spin_MPIsingle( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -272,7 +270,7 @@ int mltplyHalfSpin( } else if (X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite) { StartTimer(423); - dam_pr = X_child_general_int_spin_MPIsingle( + X_child_general_int_spin_MPIsingle( X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ExchangeCoupling[i][0], sigma1, sigma2, conj(X->Def.ParaExchangeCoupling[i]), X, nstate, tmp_v0, tmp_v1); @@ -281,10 +279,9 @@ int mltplyHalfSpin( else { StartTimer(424); child_exchange_spin_GetInfo(i, X); - dam_pr = child_exchange_spin(tmp_v0, tmp_v1, X); + child_exchange_spin(nstate, tmp_v0, tmp_v1, X); StopTimer(424); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NExchangeCoupling; i += 2)*/ StopTimer(420); @@ -310,11 +307,10 @@ int mltplyGeneralSpin( long unsigned int isite1, isite2, sigma1, sigma2; long unsigned int sigma3, sigma4; - double complex dam_pr; long int tmp_sgn; /*[s] For InterAll */ double complex tmp_V; - double complex dmv=0; + int one = 1; /*[e] For InterAll */ long unsigned int i_max; @@ -357,9 +353,8 @@ int mltplyGeneralSpin( sigma3 = X->Def.InterAll_OffDiagonal[idx][5]; sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) \ - private(j, tmp_sgn, dmv, off, tmp_off, tmp_off2) \ +#pragma omp parallel for default(none) \ + private(j, tmp_sgn, off, tmp_off, tmp_off2) \ firstprivate(i_max, isite1, isite2, sigma1, sigma2, sigma3, sigma4, X, tmp_V, ihfbit) \ shared(tmp_v0, tmp_v1, list_1, list_2_1, list_2_2) for (j = 1; j <= i_max; j++) { @@ -368,15 +363,10 @@ int mltplyGeneralSpin( tmp_sgn = GetOffCompGeneralSpin(tmp_off, isite1, sigma2, sigma1, &tmp_off2, X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn == TRUE) { ConvertToList1GeneralSpin(tmp_off2, ihfbit, &off); - dmv = tmp_v1[j] * tmp_V; - if (X->Large.mode == M_MLTPLY) { // for multply - tmp_v0[off] += dmv; - } - dam_pr += conj(tmp_v1[off]) * dmv; + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[off][0], &one); } }/*if (tmp_sgn == TRUE)*/ }/*for (j = 1; j <= i_max; j++)*/ - X->Large.prdct += dam_pr; }/*for (ihermite = 0; ihermite < 2; ihermite++)*/ StopTimer(413); } @@ -424,11 +414,11 @@ int mltplyHalfSpinGC( long unsigned int is1_spin = 0; long unsigned int isite1, isite2, sigma1, sigma2; long unsigned int sigma3, sigma4; - double complex dam_pr; double complex tmp_trans; long int tmp_sgn; /*[s] For InterAll */ double complex tmp_V; + int one = 1; /*[e] For InterAll */ long unsigned int i_max; @@ -442,13 +432,12 @@ int mltplyHalfSpinGC( StartTimer(510); for (i = 0; i < X->Def.EDNTransfer; i+=2 ) { if(X->Def.EDGeneralTransfer[i][0]+1 > X->Def.Nsite){ - dam_pr=0; if(X->Def.EDGeneralTransfer[i][1]==X->Def.EDGeneralTransfer[i][3]){ fprintf(stderr, "Transverse_OffDiagonal component is illegal.\n"); } else{ StartTimer(511); - dam_pr += X_GC_child_CisAit_spin_MPIdouble( + X_GC_child_CisAit_spin_MPIdouble( X->Def.EDGeneralTransfer[i][0], X->Def.EDGeneralTransfer[i][1], X->Def.EDGeneralTransfer[i][3], -X->Def.EDParaGeneralTransfer[i], X, nstate, tmp_v0, tmp_v1); @@ -457,7 +446,6 @@ int mltplyHalfSpinGC( }/*if(X->Def.EDGeneralTransfer[i][0]+1 > X->Def.Nsite)*/ else{ StartTimer(512); - dam_pr = 0; for(ihermite=0; ihermite<2; ihermite++){ idx=i+ihermite; isite1 = X->Def.EDGeneralTransfer[idx][0] + 1; @@ -476,21 +464,19 @@ int mltplyHalfSpinGC( // longitudinal magnetic field (considerd in diagonalcalc.c) // transverse magnetic field is1_spin = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ private(j, tmp_sgn) firstprivate(i_max, is1_spin, sigma2, X,off, tmp_trans) \ shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { tmp_sgn = X_SpinGC_CisAit(j, X, is1_spin, sigma2, &off); if(tmp_sgn !=0){ - tmp_v0[off+1] += tmp_v1[j]*tmp_trans; - dam_pr += tmp_trans * conj(tmp_v1[off + 1]) * tmp_v1[j]; + zaxpy_(&nstate, &tmp_trans, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); }/*if(tmp_sgn !=0)*/ }/*for (j = 1; j <= i_max; j++)*/ }//sigma1 != sigma2 }/*for(ihermite=0; ihermite<2; ihermite++)*/ StopTimer(512); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.EDNTransfer; i+=2 )*/ StopTimer(510); /** @@ -526,8 +512,7 @@ shared(tmp_v0, tmp_v1) sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; child_general_int_spin_GetInfo(X, isite1, isite2, sigma1, sigma2, sigma3, sigma4, tmp_V); - dam_pr = GC_child_general_int_spin(tmp_v0, tmp_v1, X); - X->Large.prdct += dam_pr; + GC_child_general_int_spin(nstate, tmp_v0, tmp_v1, X); } StopTimer(523); } @@ -542,7 +527,7 @@ shared(tmp_v0, tmp_v1) if (X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite && X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite){ StartTimer(531); - dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIdouble( + X_GC_child_CisAitCiuAiv_spin_MPIdouble( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -550,7 +535,7 @@ shared(tmp_v0, tmp_v1) } else if (X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(532); - dam_pr=X_GC_child_CisAitCiuAiv_spin_MPIsingle( + X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.ExchangeCoupling[i][0], sigma1, sigma2, X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -558,7 +543,7 @@ shared(tmp_v0, tmp_v1) } else if (X->Def.ExchangeCoupling[i][0] + 1 > X->Def.Nsite) { StartTimer(532); - dam_pr=X_GC_child_CisAitCiuAiv_spin_MPIsingle( + X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.ExchangeCoupling[i][1], sigma2, sigma1, X->Def.ExchangeCoupling[i][0], sigma1, sigma2, conj(X->Def.ParaExchangeCoupling[i]), X, nstate, tmp_v0, tmp_v1); @@ -567,10 +552,9 @@ shared(tmp_v0, tmp_v1) else { StartTimer(533); child_exchange_spin_GetInfo(i, X); - dam_pr = GC_child_exchange_spin(tmp_v0, tmp_v1, X); + GC_child_exchange_spin(nstate, tmp_v0, tmp_v1, X); StopTimer(533); } - X->Large.prdct += dam_pr; }/* for (i = 0; i < X->Def.NExchangeCoupling; i ++) */ StopTimer(530); /** @@ -582,7 +566,7 @@ shared(tmp_v0, tmp_v1) if (X->Def.PairLiftCoupling[i][0] + 1 > X->Def.Nsite && X->Def.PairLiftCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(541); - dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIdouble( + X_GC_child_CisAitCiuAiv_spin_MPIdouble( X->Def.PairLiftCoupling[i][0], sigma1, sigma2, X->Def.PairLiftCoupling[i][1], sigma1, sigma2, X->Def.ParaPairLiftCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -590,7 +574,7 @@ shared(tmp_v0, tmp_v1) } else if (X->Def.PairLiftCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(542); - dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIsingle( + X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.PairLiftCoupling[i][0], sigma1, sigma2, X->Def.PairLiftCoupling[i][1], sigma1, sigma2, X->Def.ParaPairLiftCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -598,7 +582,7 @@ shared(tmp_v0, tmp_v1) } else if (X->Def.PairLiftCoupling[i][0] + 1 > X->Def.Nsite) { StartTimer(542); - dam_pr = X_GC_child_CisAitCiuAiv_spin_MPIsingle( + X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.PairLiftCoupling[i][1], sigma1, sigma2, X->Def.PairLiftCoupling[i][0], sigma1, sigma2, conj(X->Def.ParaPairLiftCoupling[i]), X, nstate, tmp_v0, tmp_v1); @@ -607,10 +591,9 @@ shared(tmp_v0, tmp_v1) else { StartTimer(543); child_pairlift_spin_GetInfo(i, X); - dam_pr = GC_child_pairlift_spin(tmp_v0, tmp_v1, X); + GC_child_pairlift_spin(nstate, tmp_v0, tmp_v1, X); StopTimer(543); } - X->Large.prdct += dam_pr; }/*for (i = 0; i < X->Def.NPairLiftCoupling; i += 2)*/ StopTimer(540); @@ -633,13 +616,12 @@ int mltplyGeneralSpinGC( long unsigned int tmp_off = 0; long unsigned int isite1, isite2, sigma1, sigma2; long unsigned int sigma3, sigma4; - double complex dam_pr; double complex tmp_trans; long int tmp_sgn; double num1 = 0; /*[s] For InterAll */ double complex tmp_V; - double complex dmv=0; + int one = 1; /*[e] For InterAll */ long unsigned int i_max; @@ -657,13 +639,11 @@ int mltplyGeneralSpinGC( sigma1 = X->Def.EDGeneralTransfer[i][1]; sigma2 = X->Def.EDGeneralTransfer[i][3]; tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; - dam_pr = 0.0; if (isite1 == isite2) { if (sigma1 != sigma2) { if (isite1 > X->Def.Nsite) { - dam_pr = X_GC_child_CisAit_GeneralSpin_MPIdouble( + X_GC_child_CisAit_GeneralSpin_MPIdouble( isite1 - 1, sigma1, sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); - X->Large.prdct += dam_pr; }/*if (isite1 > X->Def.Nsite)*/ else { for (ihermite = 0; ihermite<2; ihermite++) { @@ -675,19 +655,16 @@ int mltplyGeneralSpinGC( tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; // transverse magnetic field - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ private(j, tmp_sgn, num1) firstprivate(i_max, isite1, sigma1, sigma2, X, off, tmp_trans) \ shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin( j - 1, isite1, sigma2, sigma1, &off, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { // for multply - tmp_v0[off + 1] += tmp_v1[j] * tmp_trans; - dam_pr += conj(tmp_v1[off + 1]) * tmp_v1[j] * tmp_trans; + zaxpy_(&nstate, &tmp_trans, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); }/*if (num1 != 0)*/ }/*for (j = 1; j <= i_max; j++)*/ - X->Large.prdct += dam_pr; }/*for (ihermite = 0; ihermite<2; ihermite++)*/ } }// sigma1 != sigma2 @@ -734,7 +711,6 @@ shared(tmp_v0, tmp_v1) sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - dam_pr = 0.0; if (sigma1 == sigma2) { if (sigma3 == sigma4) { fprintf(stderr, "InterAll_OffDiagonal component is illegal.\n"); @@ -742,8 +718,8 @@ shared(tmp_v0, tmp_v1) }/*if (sigma3 == sigma4)*/ else { //sigma3=sigma4 term is considerd as a diagonal term. -#pragma omp parallel for default(none) reduction(+:dam_pr) \ - private(j, tmp_sgn, dmv, off) \ +#pragma omp parallel for default(none) \ + private(j, tmp_sgn, off) \ firstprivate(i_max, isite1, isite2, sigma1, sigma3, sigma4, X, tmp_V) \ shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { @@ -752,20 +728,16 @@ shared(tmp_v0, tmp_v1) if (tmp_sgn == TRUE) { tmp_sgn = BitCheckGeneral(off, isite1, sigma1, X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn == TRUE) { - dmv = tmp_v1[j] * tmp_V; - if (X->Large.mode == M_MLTPLY) { // for multply - tmp_v0[off + 1] += dmv; - } - dam_pr += conj(tmp_v1[off + 1]) * dmv; - }/*if (tmp_sgn == TRUE)*/ + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); + }/*if (tmp_sgn == TRUE)*/ }/*if (tmp_sgn == TRUE)*/ }/*for (j = 1; j <= i_max; j++)*/ } }/*if (sigma1 == sigma2)*/ else if (sigma3 == sigma4) { //sigma1=sigma2 term is considerd as a diagonal term. -#pragma omp parallel for default(none) reduction(+:dam_pr) \ - private(j, tmp_sgn, dmv, off, tmp_off) \ +#pragma omp parallel for default(none) \ + private(j, tmp_sgn, off, tmp_off) \ firstprivate(i_max, isite1, isite2, sigma1, sigma2, sigma3, sigma4, X, tmp_V) \ shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { @@ -774,18 +746,14 @@ shared(tmp_v0, tmp_v1) tmp_sgn = GetOffCompGeneralSpin( j - 1, isite1, sigma2, sigma1, &off, X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn == TRUE) { - dmv = tmp_v1[j] * tmp_V; - if (X->Large.mode == M_MLTPLY) { // for multply - tmp_v0[off + 1] += dmv; - } - dam_pr += conj(tmp_v1[off + 1]) * dmv; + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); }/*if (tmp_sgn == TRUE)*/ }/*if (tmp_sgn == TRUE)*/ }/*for (j = 1; j <= i_max; j++)*/ }/*else if (sigma3 == sigma4)*/ else { -#pragma omp parallel for default(none) reduction(+:dam_pr) \ - private(j, tmp_sgn, dmv, off, tmp_off) \ +#pragma omp parallel for default(none) \ + private(j, tmp_sgn, off, tmp_off) \ firstprivate(i_max, isite1, isite2, sigma1, sigma2, sigma3, sigma4, X, tmp_V) \ shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { @@ -795,16 +763,11 @@ shared(tmp_v0, tmp_v1) tmp_sgn = GetOffCompGeneralSpin( tmp_off, isite1, sigma2, sigma1, &off, X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn == TRUE) { - dmv = tmp_v1[j] * tmp_V; - if (X->Large.mode == M_MLTPLY) { // for multply - tmp_v0[off + 1] += dmv; - } - dam_pr += conj(tmp_v1[off + 1]) * dmv; + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); }/*if (tmp_sgn == TRUE)*/ }/*if (tmp_sgn == TRUE)*/ }/*for (j = 1; j <= i_max; j++)*/ } - X->Large.prdct += dam_pr; } StopTimer(523); } @@ -827,8 +790,6 @@ int mltplySpinGCBoost( { long unsigned int j; - double complex dam_pr; - /* SpinGCBoost */ double complex* tmp_v2; double complex* tmp_v3; @@ -842,12 +803,6 @@ int mltplySpinGCBoost( tmp_v3 = cd_1d_allocate(i_max+1); child_general_int_spin_MPIBoost(X, nstate, tmp_v0, tmp_v1, tmp_v2, tmp_v3); - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) \ -private(j) shared(tmp_v1,tmp_v0) firstprivate(i_max) - for(j=1;j<=i_max;j++) - dam_pr += conj(tmp_v1[j])*tmp_v0[j]; // = - X->Large.prdct += dam_pr; /* SpinGCBoost */ free_cd_1d_allocate(tmp_v2); @@ -863,11 +818,10 @@ private(j) shared(tmp_v1,tmp_v0) firstprivate(i_max) /** @brief Compute exchange term of spin Hamiltonian (canonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_exchange_spin( +void child_exchange_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] @@ -875,21 +829,18 @@ double complex child_exchange_spin( long unsigned int j; long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; - double complex dam_pr = 0; -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); - return dam_pr; + child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); }/*double complex child_exchange_spin*/ /** @brief Compute exchange term of spin Hamiltonian (grandcanonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_exchange_spin( +void GC_child_exchange_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] @@ -897,21 +848,18 @@ double complex GC_child_exchange_spin( long unsigned int j; long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; - double complex dam_pr = 0; -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); - return dam_pr; + GC_child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); }/*double complex GC_child_exchange_spin*/ /** @brief Compute pair-lift term of spin Hamiltonian (grandcanonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_pairlift_spin( +void GC_child_pairlift_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] @@ -919,31 +867,29 @@ double complex GC_child_pairlift_spin( long int j; long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; - double complex dam_pr = 0; -#pragma omp parallel for default(none) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) \ firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - dam_pr += GC_child_pairlift_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); - return dam_pr; + GC_child_pairlift_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); }/*double complex GC_child_pairlift_spin*/ /** @brief Compute Inter-All term of spin Hamiltonian (canonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_general_int_spin( +void child_general_int_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { - double complex dam_pr, tmp_V, dmv; + double complex tmp_V, dmv; long unsigned int j, i_max; long unsigned int org_sigma2, org_sigma4; long unsigned int isA_up, isB_up; long unsigned int tmp_off = 0; int tmp_sgn; + int one = 1; i_max = X->Large.i_max; org_sigma2 = X->Large.is2_spin; @@ -951,32 +897,28 @@ double complex child_general_int_spin( tmp_V = X->Large.tmp_V; isA_up = X->Large.is1_up; isB_up = X->Large.is2_up; - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) \ +#pragma omp parallel for default(none) private(j, tmp_sgn, dmv) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(tmp_v1, tmp_v0) for (j = 1; j <= i_max; j++) { tmp_sgn = X_child_exchange_spin_element(j, X, isA_up, isB_up, org_sigma2, org_sigma4, &tmp_off); if (tmp_sgn != 0) { - dmv = tmp_v1[j] * tmp_sgn * tmp_V; - tmp_v0[tmp_off] += dmv; - dam_pr += conj(tmp_v1[tmp_off]) * dmv; + dmv = tmp_sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[tmp_off][0], &one); }/*if (tmp_sgn != 0)*/ }/*for (j = 1; j <= i_max; j++)*/ - return dam_pr; }/*double complex child_general_int_spin*/ /** @brief Compute Inter-All term of spin Hamiltonian (grandcanonical) -@return Fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_general_int_spin( +void GC_child_general_int_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X//!<[inout] ) { - double complex dam_pr, tmp_V; + double complex tmp_V; long unsigned int j, i_max; long unsigned int org_isite1, org_isite2; long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; @@ -991,40 +933,38 @@ double complex GC_child_general_int_spin( org_sigma3 = X->Large.is3_spin; org_sigma4 = X->Large.is4_spin; tmp_V = X->Large.tmp_V; - dam_pr = 0.0; isA_up = X->Def.Tpow[org_isite1 - 1]; isB_up = X->Def.Tpow[org_isite2 - 1]; -#pragma omp parallel default(none) reduction(+:dam_pr) \ +#pragma omp parallel default(none) \ private(j) shared(tmp_v0, tmp_v1) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma1,org_sigma2,org_sigma3,org_sigma4,tmp_off, tmp_V) { if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAisCisAis_spin_element( + GC_child_CisAisCisAis_spin_element( j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, tmp_v0, tmp_v1, X); } else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAisCitAiu_spin_element( + GC_child_CisAisCitAiu_spin_element( j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); } else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAitCiuAiu_spin_element( + GC_child_CisAitCiuAiu_spin_element( j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) - dam_pr += GC_child_CisAitCiuAiv_spin_element( + GC_child_CisAitCiuAiv_spin_element( j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); } }/*End of parallel region*/ - return dam_pr; }/*double complex GC_child_general_int_spin*/ /******************************************************************************/ //[e] child functions diff --git a/src/mltplySpinCore.c b/src/mltplySpinCore.c index 827d92c08..d46f774f5 100644 --- a/src/mltplySpinCore.c +++ b/src/mltplySpinCore.c @@ -266,11 +266,10 @@ int X_child_exchange_spin_element( }/*int X_child_exchange_spin_element*/ /** @brief Multiply Hamiltonian of exchange term of canonical spin system -@return @f$\langle v_1 | H_{\rm this}| v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_exchange_spin_element( +void child_exchange_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate, double complex **tmp_v0,//!<[out] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied @@ -278,7 +277,6 @@ double complex child_exchange_spin_element( long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { long unsigned int off; - double complex dmv; long unsigned int iexchg; long unsigned int is_up = X->Large.isA_spin; long unsigned int irght = X->Large.irght; @@ -286,81 +284,67 @@ double complex child_exchange_spin_element( long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; - double complex dam_pr = 0; long unsigned int ibit_tmp; + int one = 1; ibit_tmp = (list_1[j] & is_up); if (ibit_tmp == 0 || ibit_tmp == is_up) { - return dam_pr; + return; } else { iexchg = list_1[j] ^ is_up; GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off); *tmp_off = off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY) { - tmp_v0[off] += dmv; - } - dam_pr += dmv * conj(tmp_v1[off]); - return dam_pr; + zaxpy_(&nstate, &tmp_J, &tmp_v1[j][0], &one, &tmp_v0[off][0], &one); } }/*double complex child_exchange_spin_element*/ /** @brief Multiply Hamiltonian of exchange term of grandcanonical spin system -@return @f$\langle v_1 | H_{\rm this}| v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_exchange_spin_element( +void GC_child_exchange_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate, double complex **tmp_v0,//!<[out] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { - double complex dmv; long unsigned int is_up = X->Large.isA_spin; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; long unsigned int list_1_j, list_1_off; + int one = 1; - double complex dam_pr = 0; list_1_j = j - 1; long unsigned int ibit_tmp; ibit_tmp = (list_1_j & is_up); if (ibit_tmp == 0 || ibit_tmp == is_up) { - return dam_pr; + return; } else { list_1_off = list_1_j ^ is_up; *tmp_off = list_1_off; - dmv = tmp_J * tmp_v1[j]; - if (mode == M_MLTPLY) { - tmp_v0[list_1_off + 1] += dmv; - } - dam_pr += dmv * conj(tmp_v1[list_1_off + 1]); - return dam_pr; + zaxpy_(&nstate, &tmp_J, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } }/*double complex GC_child_exchange_spin_element*/ /** @brief Multiply Hamiltonian of pairlift term of grandcanonical spin system -@return @f$\langle v_1 | H_{\rm this}| v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_pairlift_spin_element( +void GC_child_pairlift_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate, double complex **tmp_v0,//!<[out] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { - double complex dmv; long unsigned int is_up = X->Large.isA_spin; double complex tmp_J = X->Large.tmp_J; int mode = X->Large.mode; - double complex dam_pr = 0; + int one = 1; long unsigned int list_1_off; long unsigned int list_1_j = j - 1; long unsigned int ibit_tmp; @@ -369,26 +353,17 @@ double complex GC_child_pairlift_spin_element( if (ibit_tmp == 0 || ibit_tmp == is_up) { list_1_off = list_1_j ^ is_up; //Change: ++ -> -- or -- -> ++ *tmp_off = list_1_off; - dmv = tmp_J * tmp_v1[j];//* ibit_tmp; - if (mode == M_MLTPLY) { - tmp_v0[list_1_off + 1] += dmv; - } - dam_pr += dmv * conj(tmp_v1[list_1_off + 1]); - return dam_pr; - } - else { - return dam_pr; + zaxpy_(&nstate, &tmp_J, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } }/*double complex GC_child_pairlift_spin_element*/ //[s]Spin /** @brief Compute @f$c_{is}^\dagger c_{is} c_{is}^\dagger c_{is}@f$ term of canonical spsin system -@return Fragment of @f$\langle v_1 | H_{\rm this}|v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_CisAisCisAis_spin_element( +void child_CisAisCisAis_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isA_up,//!<[in] Bit mask for spin 1 long unsigned int isB_up,//!<[in] Bit mask for spin 2 @@ -401,16 +376,12 @@ double complex child_CisAisCisAis_spin_element( ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0; + int one = 1; tmp_sgn = X_Spin_CisAis(j, X, isB_up, org_sigma4); tmp_sgn *= X_Spin_CisAis(j, X, isA_up, org_sigma2); - dmv = tmp_v1[j] * tmp_sgn * tmp_V; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[j] += dmv; - } - dam_pr = conj(tmp_v1[j]) * dmv; - return dam_pr; + dmv = tmp_sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*double complex child_CisAisCisAis_spin_element*/ //[e]Spin @@ -419,11 +390,10 @@ double complex child_CisAisCisAis_spin_element( /** @brief Compute @f$c_{is}^\dagger c_{is} c_{is}^\dagger c_{is}@f$ term of grandcanonical spsin system -@return Fragment of @f$\langle v_1 | H_{\rm this}|v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAisCisAis_spin_element( +void GC_child_CisAisCisAis_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isA_up,//!<[in] Bit mask for spin 1 long unsigned int isB_up,//!<[in] Bit mask for spin 2 @@ -436,27 +406,22 @@ double complex GC_child_CisAisCisAis_spin_element( ) { int tmp_sgn; double complex dmv = 0; - double complex dam_pr = 0; + int one = 1; tmp_sgn = X_SpinGC_CisAis(j, X, isB_up, org_sigma4); tmp_sgn *= X_SpinGC_CisAis(j, X, isA_up, org_sigma2); if (tmp_sgn != 0) { - dmv = tmp_v1[j] * tmp_sgn * tmp_V; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[j] += dmv; - } - dam_pr = conj(tmp_v1[j]) * dmv; + dmv = tmp_sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); } - return dam_pr; }/*double complex GC_child_CisAisCisAis_spin_element*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{it}^\dagger c_{iu}@f$ term of grandcanonical spsin system -@return Fragment of @f$\langle v_1 | H_{\rm this}|v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAisCitAiu_spin_element( +void GC_child_CisAisCitAiu_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 @@ -470,28 +435,23 @@ double complex GC_child_CisAisCitAiu_spin_element( ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0 + 0 * I; + int one = 1; tmp_sgn = X_SpinGC_CisAit(j, X, isB_up, org_sigma4, tmp_off); if (tmp_sgn != 0) { tmp_sgn *= X_SpinGC_CisAis((*tmp_off + 1), X, isA_up, org_sigma2); if (tmp_sgn != 0) { - dmv = tmp_v1[j] * tmp_sgn * tmp_V; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off + 1] += dmv; - } - dam_pr = conj(tmp_v1[*tmp_off + 1]) * dmv; + dmv = tmp_sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); }/*if (tmp_sgn != 0)*/ }/*if (tmp_sgn != 0)*/ - return dam_pr; }/*double complex GC_child_CisAisCitAiu_spin_element*/ /** @brief Compute @f$c_{is}^\dagger c_{it} c_{iu}^\dagger c_{iu}@f$ term of grandcanonical spsin system -@return Fragment of @f$\langle v_1 | H_{\rm this}|v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAitCiuAiu_spin_element( +void GC_child_CisAitCiuAiu_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 @@ -505,28 +465,23 @@ double complex GC_child_CisAitCiuAiu_spin_element( ) { int tmp_sgn; double complex dmv; - double complex dam_pr = 0 + 0 * I; + int one = 1; tmp_sgn = X_SpinGC_CisAis(j, X, isB_up, org_sigma4); if (tmp_sgn != 0) { tmp_sgn *= X_SpinGC_CisAit(j, X, isA_up, org_sigma2, tmp_off); if (tmp_sgn != 0) { - dmv = tmp_v1[j] * tmp_sgn * tmp_V; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off + 1] += dmv; - } - dam_pr = conj(tmp_v1[*tmp_off + 1]) * dmv; + dmv = tmp_sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); }/*if (tmp_sgn != 0)*/ }/*if (tmp_sgn != 0)*/ - return dam_pr; }/*double complex GC_child_CisAitCiuAiu_spin_element*/ /** @brief Compute @f$c_{is}^\dagger c_{it} c_{iu}^\dagger c_{iv}@f$ term of grandcanonical spsin system -@return Fragment of @f$\langle v_1 | H_{\rm this}|v_1 \rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_child_CisAitCiuAiv_spin_element( +void GC_child_CisAitCiuAiv_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 @@ -541,18 +496,14 @@ double complex GC_child_CisAitCiuAiv_spin_element( int tmp_sgn; long unsigned int tmp_off_1; double complex dmv; - double complex dam_pr = 0 + 0 * I; + int one = 1; tmp_sgn = X_SpinGC_CisAit(j, X, isB_up, org_sigma4, &tmp_off_1); if (tmp_sgn != 0) { tmp_sgn *= X_SpinGC_CisAit((tmp_off_1 + 1), X, isA_up, org_sigma2, tmp_off_2); if (tmp_sgn != 0) { - dmv = tmp_v1[j] * tmp_sgn * tmp_V; - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply - tmp_v0[*tmp_off_2 + 1] += dmv; - } - dam_pr = conj(tmp_v1[*tmp_off_2 + 1]) * dmv; + dmv = tmp_sgn * tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off_2 + 1][0], &one); }/*if (tmp_sgn != 0)*/ }/*if (tmp_sgn != 0)*/ - return dam_pr; }/*double complex GC_child_CisAitCiuAiv_spin_element*/ //[e]GC Spin From 259bda00863429d1ed6d49540975bea6364bed00 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sun, 3 Mar 2019 00:58:38 +0900 Subject: [PATCH 03/50] Backup --- src/CG_EigenVector.c | 2 +- src/CalcByLOBPCG.c | 323 ++++++------ src/CalcSpectrumByBiCG.c | 4 +- src/Lanczos_EigenValue.c | 8 +- src/Lanczos_EigenVector.c | 4 +- src/Multiply.c | 4 +- src/PowerLanczos.c | 4 +- src/common/setmemory.c | 34 ++ src/common/setmemory.h | 4 + src/expec_cisajs.c | 833 ++++++++++++++++--------------- src/expec_energy_flct.c | 2 +- src/include/mltplyCommon.h | 4 +- src/include/wrapperMPI.h | 10 +- src/mltply.c | 49 +- src/mltplyHubbardCore.c | 3 - src/mltplyMPIBoost.c | 1 - src/mltplyMPIHubbard.c | 165 ++----- src/mltplyMPIHubbardCore.c | 330 +++---------- src/mltplyMPISpin.c | 77 +-- src/mltplyMPISpinCore.c | 979 ++++++++----------------------------- src/wrapperMPI.c | 181 +++++++ 21 files changed, 1222 insertions(+), 1799 deletions(-) diff --git a/src/CG_EigenVector.c b/src/CG_EigenVector.c index 6d0158ef1..6cfa44cb0 100644 --- a/src/CG_EigenVector.c +++ b/src/CG_EigenVector.c @@ -143,7 +143,7 @@ int CG_EigenVector(struct BindStruct *X/**<[inout]*/){ y[j]=(-Eig+eps_CG)*vg[j]; //y = -E*p } StartTimer(4401); - mltply(X,y,vg); // y += H*p + mltply(X,1,y,vg); // y += H*p StopTimer(4401); // (H-E)p=y finish! rp=0.0; diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index a08dacd3e..0d656a55e 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -154,7 +154,7 @@ If this is resuterting run, read from files. */ static void Initialize_wave( struct BindStruct *X,//!<[inout] - double complex **wave//!<[out] [exct][CheckList::idim_max] initial eigenvector + double complex **wave//!<[out] [CheckList::idim_max][exct] initial eigenvector ) { FILE *fp; @@ -165,7 +165,7 @@ static void Initialize_wave( long int idim, iv, i_max; unsigned long int i_max_tmp, sum_i_max; int mythread; - double dnorm; + double *dnorm; /* For DSFMT */ @@ -198,7 +198,8 @@ static void Initialize_wave( fprintf(stderr, "Error: Invalid restart file.\n"); exitMPI(-1); } - byte_size = fread(wave[ie], sizeof(complex double), X->Check.idim_max + 1, fp); + byte_size = fread(&v0[0][0], sizeof(complex double), X->Check.idim_max + 1, fp); + for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = v0[idim][0]; fclose(fp); } }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ @@ -229,7 +230,7 @@ static void Initialize_wave( fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d\n\n", initial_mode, iv, i_max, X->Def.k_exct); #pragma omp parallel for default(none) private(idim) shared(wave,i_max,ie) - for (idim = 1; idim <= i_max; idim++) wave[ie][idim] = 0.0; + for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = 0.0; sum_i_max = 0; for (iproc = 0; iproc < nproc; iproc++) { @@ -238,10 +239,10 @@ static void Initialize_wave( if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { if (myrank == iproc) { - wave[ie][iv - sum_i_max + 1] = 1.0; + wave[iv - sum_i_max + 1][ie] = 1.0; if (X->Def.iInitialVecType == 0) { - wave[ie][iv - sum_i_max + 1] += 1.0*I; - wave[ie][iv - sum_i_max + 1] /= sqrt(2.0); + wave[iv - sum_i_max + 1][ie] += 1.0*I; + wave[iv - sum_i_max + 1][ie] /= sqrt(2.0); } }/*if (myrank == iproc)*/ }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ @@ -273,22 +274,23 @@ static void Initialize_wave( if (X->Def.iInitialVecType == 0) { #pragma omp for for (idim = 1; idim <= i_max; idim++) - wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; + wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; } else { #pragma omp for for (idim = 1; idim <= i_max; idim++) - wave[ie][idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); + wave[idim][ie] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); } }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ }/*#pragma omp parallel*/ - for (ie = 0; ie < X->Def.k_exct; ie++) { - dnorm = sqrt(creal(VecProdMPI(i_max, wave[ie], wave[ie]))); + dnorm = d_1d_allocate(X->Def.k_exct); + NormMPI_dv(i_max, X->Def.k_exct, wave, dnorm); #pragma omp parallel for default(none) shared(i_max,wave,dnorm,ie) private(idim) - for (idim = 1; idim <= i_max; idim++) wave[ie][idim] /= dnorm; - } + for (idim = 1; idim <= i_max; idim++) + for (ie = 0; ie < X->Def.k_exct; ie++) wave[idim][ie] /= dnorm[ie]; + free_d_1d_allocate(dnorm); }/*else if(initial_mode==1)*/ }/*static void Initialize_wave*/ /** @@ -335,51 +337,55 @@ int LOBPCG_Main( FILE *fp; int iconv = -1; long int idim, i_max; - int ii, jj, ie, je, nsub, stp, mythread, nsub_cut; - double complex ***wxp/*[0] w, [1] x, [2] p of Ref.1*/, + int ii, jj, ie, nsub, stp, nsub_cut; + double complex ***wxp/*[0] w, [1] x, [2] p of Ref.1*/, ***hwxp/*[0] h*w, [1] h*x, [2] h*p of Ref.1*/, - *hsub, *ovlp /*Subspace Hamiltonian and Overlap*/, - **work; - double *eig, dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub; + ****hsub, ****ovlp; /*Subspace Hamiltonian and Overlap*/ + double *eig, *dnorm, eps_LOBPCG, eigabs_max, *preshift, precon, dnormmax, *eigsub; int do_precon = 0;//If = 1, use preconditioning (experimental) + char tN = 'N', tC = 'C'; + double complex one = 1.0, zero = 0.0; nsub = 3 * X->Def.k_exct; eig = d_1d_allocate(X->Def.k_exct); + dnorm = d_1d_allocate(X->Def.k_exct); eigsub = d_1d_allocate(nsub); - hsub = cd_1d_allocate(nsub*nsub); - ovlp = cd_1d_allocate(nsub*nsub); - work = cd_2d_allocate(nthreads, nsub); + hsub = cd_2d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); + ovlp = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); i_max = X->Check.idim_max; - free(v0); free(v1); free(vg); - wxp = cd_3d_allocate(3, X->Def.k_exct, X->Check.idim_max + 1); - hwxp = cd_3d_allocate(3, X->Def.k_exct, X->Check.idim_max + 1); + wxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct); + hwxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct); /**@brief
  • Set initial guess of wavefunction: @f${\bf x}=@f$initial guess
  • */ - Initialize_wave(X, wxp[1]); + Initialize_wave(X, &wxp[1]); + free(v0); TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); - for (ie = 0; ie < X->Def.k_exct; ie++) - for (idim = 1; idim <= i_max; idim++) hwxp[1][ie][idim] = 0.0; - for (ie = 0; ie < X->Def.k_exct; ie++) - mltply(X, hwxp[1][ie], wxp[1][ie]); + for (idim = 1; idim <= i_max; idim++) + for (ie = 0; ie < X->Def.k_exct; ie++) + hwxp[1][idim][ie] = 0.0; + mltply(X, X->Def.k_exct, hwxp[1], wxp[1]); stp = 1; TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", 0); - for (ie = 0; ie < X->Def.k_exct; ie++){ - for (idim = 1; idim <= i_max; idim++) wxp[2][ie][idim] = 0.0; - for (idim = 1; idim <= i_max; idim++) hwxp[2][ie][idim] = 0.0; - - eig[ie] = creal(VecProdMPI(i_max, wxp[1][ie], hwxp[1][ie])); - }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ + for (ie = 0; ie < X->Def.k_exct; ie++) eig[ie] = 0.0; + for (idim = 1; idim <= i_max; idim++) { + for (ie = 0; ie < X->Def.k_exct; ie++) { + wxp[2][idim][ie] = 0.0; + hwxp[2][idim][ie] = 0.0; + } + eig[ie] += conj(wxp[1][idim][ie]) * hwxp[1][idim][ie]; + } + SumMPI_dv(X->Def.k_exct, eig); sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead); childfopenMPI(sdt_2, "w", &fp); @@ -406,38 +412,44 @@ int LOBPCG_Main(
  • @b DO each eigenvector
    • */ + /**@brief +
    • Compute residual vectors: @f${\bf w}={\bf X}-\mu {\bf x}@f$
    • + */ +#pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig) private(idim,ie) reduction(+:dnorm) + for (idim = 1; idim <= i_max; idim++) { + for (ie = 0; ie < X->Def.k_exct; ie++) { + wxp[0][ie][idim] = hwxp[1][idim][ie] - eig[ie] * wxp[1][idim][ie]; + } + } + NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm); + dnormmax = 0.0; - for (ie = 0; ie < X->Def.k_exct; ie++) { - /**@brief -
    • Compute residual vectors: @f${\bf w}={\bf X}-\mu {\bf x}@f$
    • - */ -#pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,ie) private(idim) - for (idim = 1; idim <= i_max; idim++) - wxp[0][ie][idim] = hwxp[1][ie][idim] - eig[ie] * wxp[1][ie][idim]; - dnorm = sqrt(creal(VecProdMPI(i_max, wxp[0][ie], wxp[0][ie]))); - if (dnorm > dnormmax) dnormmax = dnorm; - - if (stp /= 1) { - /**@brief -
    • Preconditioning (Point Jacobi): @f${\bf w}={\hat T}^{-1} {\bf w}@f$
    • - */ - if (do_precon == 1) { - preshift = calc_preshift(eig[ie], dnorm, eps_LOBPCG); + for (ie = 0; ie < X->Def.k_exct; ie++) + if (dnorm[ie] > dnormmax) dnormmax = dnorm[ie]; + /**@brief +
    • Preconditioning (Point Jacobi): @f${\bf w}={\hat T}^{-1} {\bf w}@f$
    • + */ + if (stp /= 1) { + if (do_precon == 1) { + for (ie = 0; ie < X->Def.k_exct; ie++) + preshift[ie] = calc_preshift(eig[ie], dnorm[ie], eps_LOBPCG); #pragma omp parallel for default(none) shared(wxp,ie,list_Diagonal,preshift,i_max,eps_LOBPCG) private(idim,precon) - for (idim = 1; idim <= i_max; idim++) { - precon = list_Diagonal[idim] - preshift; - if(fabs(precon) > eps_LOBPCG) wxp[0][ie][idim] /= precon; + for (idim = 1; idim <= i_max; idim++) { + for (ie = 0; ie < X->Def.k_exct; ie++){ + precon = list_Diagonal[idim] - preshift[ie]; + if (fabs(precon) > eps_LOBPCG) wxp[0][idim][ie] /= precon; } - }/*if(do_precon == 1)*/ - /**@brief + } + }/*if(do_precon == 1)*/ + /**@brief
    • Normalize residual vector: @f${\bf w}={\bf w}/|w|@f$ - */ - dnorm = sqrt(creal(VecProdMPI(i_max, wxp[0][ie], wxp[0][ie]))); + */ + NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm); #pragma omp parallel for default(none) shared(i_max,wxp,dnorm,ie) private(idim) - for (idim = 1; idim <= i_max; idim++) wxp[0][ie][idim] /= dnorm; - }/*if (stp /= 1)*/ - - }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ + for (idim = 1; idim <= i_max; idim++) + for (ie = 0; ie < X->Def.k_exct; ie++) + wxp[0][idim][ie] /= dnorm[ie]; + }/*if (stp /= 1)*/ /**@brief
  • @b END @b DO each eigenvector
  • @@ -462,14 +474,10 @@ int LOBPCG_Main( /**@brief
  • @f${\bf W}={\hat H}{\bf w}@f$
  • */ -#pragma omp parallel default(none) shared(hwxp,i_max,X) private(idim,ie) - { -#pragma omp for nowait - for (ie = 0; ie < X->Def.k_exct; ie++) - for (idim = 1; idim <= i_max; idim++) hwxp[0][ie][idim] = 0.0; - } - for (ie = 0; ie < X->Def.k_exct; ie++) - mltply(X, hwxp[0][ie], wxp[0][ie]); +#pragma omp parallel for default(none) shared(hwxp,i_max,X) private(idim,ie) + for (idim = 1; idim <= i_max; idim++) + for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[0][ie][idim] = 0.0; + mltply(X, X->Def.k_exct, hwxp[0], wxp[0]); TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); /**@brief @@ -479,102 +487,85 @@ int LOBPCG_Main( */ for (ii = 0; ii < 3; ii++) { - for (ie = 0; ie < X->Def.k_exct; ie++){ - for (jj = 0; jj < 3; jj++) { - for (je = 0; je < X->Def.k_exct; je++){ - hsub[je + jj*X->Def.k_exct + ie * nsub + ii * nsub*X->Def.k_exct] - = VecProdMPI(i_max, wxp[jj][je], hwxp[ii][ie]); - ovlp[je + jj*X->Def.k_exct + ie * nsub + ii * nsub*X->Def.k_exct] - = VecProdMPI(i_max, wxp[jj][je], wxp[ii][ie]); - }/*for (je = 0; je < X->Def.k_exct; je++)*/ - }/*for (jj = 0; jj < 3; jj++)*/ - }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ - }/*for (ii = 0; ii < 3; ii++)*/ + for (jj = 0; jj < 3; jj++) { + zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i_max, &one, + &wxp[ii][1][0], &X->Def.k_exct, &wxp[jj][1][0], &X->Def.k_exct, &zero, &ovlp[jj][0][ii][0], &nsub); + zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i_max, &one, + &wxp[ii][1][0], &X->Def.k_exct, &hwxp[jj][1][0], &X->Def.k_exct, &zero, &hsub[jj][0][ii][0], &nsub); + } + } + SumMPI_cv(nsub*nsub, ovlp); + SumMPI_cv(nsub*nsub, hsub); + for (ie = 0; ie < X->Def.k_exct; ie++) - eig[ie] = - creal(hsub[ie + 1 * X->Def.k_exct + ie * nsub + 1 * nsub*X->Def.k_exct]); + eig[ie] = creal(hsub[1][ie][1][ie]); /**@brief
  • Subspace diagonalization with the Lowdin's orthogonalization for generalized eigenvalue problem: @f${\hat H}_{\rm sub}{\bf v}={\hat O}\mu_{\rm sub}{\bf v}@f$, @f${\bf v}=(\alpha, \beta, \gamma)@f$
  • */ - nsub_cut = diag_ovrp(nsub, hsub, ovlp, eigsub); + nsub_cut = diag_ovrp(nsub, &hsub[0][0][0][0], &ovlp[0][0][0][0], eigsub); /**@brief
  • Update @f$\mu=(\mu+\mu_{\rm sub})/2@f$
  • */ for (ie = 0; ie < X->Def.k_exct; ie++) eig[ie] = 0.5 * (eig[ie] + eigsub[ie]); - -#pragma omp parallel default(none) shared(i_max,X,wxp,hwxp,hsub,nsub,work) private(idim,ie,je,jj,mythread) - { -#if defined(_OPENMP) - mythread = omp_get_thread_num(); -#else - mythread = 0; -#endif - -#pragma omp for - for (idim = 1; idim <= i_max; idim++) { - /**@brief -
  • @f${\bf x}=\alpha {\bf w}+\beta {\bf x}+\gamma {\bf p}@f$, - Normalize @f${\bf x}@f$
  • - */ - for (ie = 0; ie < X->Def.k_exct; ie++) { - work[mythread][ie] = 0.0; - for (jj = 0; jj < 3; jj++) - for (je = 0; je < X->Def.k_exct; je++) - work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie]; - } - for (ie = 0; ie < X->Def.k_exct; ie++) wxp[1][ie][idim] = work[mythread][ie]; - /**@brief -
  • @f${\bf X}=\alpha {\bf W}+\beta {\bf X}+\gamma {\bf P}@f$, - Normalize @f${\bf X}@f$
  • - */ - for (ie = 0; ie < X->Def.k_exct; ie++) { - work[mythread][ie] = 0.0; - for (jj = 0; jj < 3; jj++) - for (je = 0; je < X->Def.k_exct; je++) - work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie]; - } - for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[1][ie][idim] = work[mythread][ie]; - /**@brief -
  • @f${\bf p}=\alpha {\bf w}+\gamma {\bf p}@f$, - Normalize @f${\bf p}@f$
  • - */ - for (ie = 0; ie < X->Def.k_exct; ie++) { - work[mythread][ie] = 0.0; - for (jj = 0; jj < 3; jj += 2) { - for (je = 0; je < X->Def.k_exct; je++) - work[mythread][ie] += wxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie]; - } - } - for (ie = 0; ie < X->Def.k_exct; ie++) wxp[2][ie][idim] = work[mythread][ie]; - /**@brief -
  • @f${\bf P}=\alpha {\bf W}+\gamma {\bf P}@f$, - Normalize @f${\bf P}@f$
  • - */ - for (ie = 0; ie < X->Def.k_exct; ie++) { - work[mythread][ie] = 0.0; - for (jj = 0; jj < 3; jj += 2) - for (je = 0; je < X->Def.k_exct; je++) - work[mythread][ie] += hwxp[jj][je][idim] * hsub[je + jj *X->Def.k_exct + nsub*ie]; - } - for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[2][ie][idim] = work[mythread][ie]; - - }/*for (idim = 1; idim <= i_max; idim++)*/ - }/*pragma omp parallel*/ + /**@brief +
  • @f${\bf x}=\alpha {\bf w}+\beta {\bf x}+\gamma {\bf p}@f$, + Normalize @f${\bf x}@f$
  • + */ + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + for (ii = 0; ii < 3; ii++) { + zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + } + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) + wxp[1][idim][ie] = v1buf[idim][ie]; + /**@brief +
  • @f${\bf X}=\alpha {\bf W}+\beta {\bf X}+\gamma {\bf P}@f$, + Normalize @f${\bf X}@f$
  • + */ + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + for (ii = 0; ii < 3; ii++) { + zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + } + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) + hwxp[1][idim][ie] = v1buf[idim][ie]; + /**@brief +
  • @f${\bf p}=\alpha {\bf w}+\gamma {\bf p}@f$, + Normalize @f${\bf p}@f$
  • + */ + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + for (ii = 0; ii < 3; ii += 2) { + zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + } + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) + wxp[2][idim][ie] = v1buf[idim][ie]; + /**@brief +
  • @f${\bf P}=\alpha {\bf W}+\gamma {\bf P}@f$, + Normalize @f${\bf P}@f$
  • + */ + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + for (ii = 0; ii < 3; ii += 2) { + zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + } + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) + hwxp[2][idim][ie] = v1buf[idim][ie]; /**@brief
  • Normalize @f${\bf w}@f$ and @f${\bf W}@f$
  • */ for (ii = 1; ii < 3; ii++) { - for (ie = 0; ie < X->Def.k_exct; ie++) { - dnorm = sqrt(creal(VecProdMPI(i_max, wxp[ii][ie], wxp[ii][ie]))); + NormMPI_dv(i_max, X->Def.k_exct, wxp[ii], dnorm); #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ie,ii) private(idim) - for (idim = 1; idim <= i_max; idim++) { - wxp[ii][ie][idim] /= dnorm; - hwxp[ii][ie][idim] /= dnorm; - } - }/* for (ie = 0; ie < X->Def.k_exct; ie++)*/ + for (idim = 1; idim <= i_max; idim++) { + for (ie = 0; ie < X->Def.k_exct; ie++) { + wxp[ii][idim][ie] /= dnorm[ie]; + hwxp[ii][idim][ie] /= dnorm[ie]; + }/* for (ie = 0; ie < X->Def.k_exct; ie++)*/ + } }/*for (ii = 1; ii < 3; ii++)*/ }/*for (stp = 1; stp <= X->Def.Lanczos_max; stp++)*/ @@ -591,10 +582,10 @@ int LOBPCG_Main( fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd); free_d_1d_allocate(eig); + free_d_1d_allocate(dnorm); free_d_1d_allocate(eigsub); - free_cd_1d_allocate(hsub); - free_cd_1d_allocate(ovlp); - free_cd_2d_allocate(work); + free_cd_4d_allocate(hsub); + free_cd_4d_allocate(ovlp); free_cd_3d_allocate(hwxp); /**@brief
  • Output resulting vectors for restart
  • @@ -610,20 +601,16 @@ int LOBPCG_Main(
  • Just Move wxp[1] into ::L_vec. The latter must be start from 0-index (the same as FullDiag)
*/ - L_vec = cd_2d_allocate(X->Def.k_exct, X->Check.idim_max + 1); -#pragma omp parallel default(none) shared(i_max,wxp,L_vec,X) private(idim,ie) - { - for (ie = 0; ie < X->Def.k_exct; ie++) { -#pragma omp for nowait - for (idim = 0; idim < i_max; idim++) - L_vec[ie][idim] = wxp[1][ie][idim + 1]; - }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ - }/*X->Def.k_exct, X->Check.idim_max + 1);*/ + L_vec = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); +#pragma omp parallel for default(none) shared(i_max,wxp,L_vec,X) private(idim,ie) + for (idim = 0; idim < i_max; idim++) + for (ie = 0; ie < X->Def.k_exct; ie++) + L_vec[idim][ie] = wxp[1][idim + 1][ie]; free_cd_3d_allocate(wxp); - v0 = cd_1d_allocate(X->Check.idim_max + 1); - v1 = cd_1d_allocate(X->Check.idim_max + 1); - vg = cd_1d_allocate(X->Check.idim_max + 1); + v0 = cd_1d_allocate(X->Check.idim_max + 1, 1); + v1 = cd_1d_allocate(X->Check.idim_max + 1, 1); + vg = cd_1d_allocate(X->Check.idim_max + 1, 1); if (iconv != 0) { sprintf(sdt, "%s", cLogLanczos_EigenValueNotConverged); return -1; @@ -693,7 +680,7 @@ int CalcByLOBPCG( and read from files. */ fprintf(stdoutMPI, "An Eigenvector is inputted.\n"); - L_vec = cd_2d_allocate(X->Bind.Def.k_exct, X->Bind.Check.idim_max + 1); + L_vec = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecStart, "a"); sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, ie, myrank); @@ -760,13 +747,13 @@ int CalcByLOBPCG( #pragma omp parallel for default(none) shared(X,v1,L_vec,ie) private(idim) for (idim = 0; idim < X->Bind.Check.idim_max; idim++) - v1[idim + 1] = L_vec[ie][idim]; + v1[idim + 1][0] = L_vec[idim][ie]; sprintf(sdt, cFileNameOutputEigen, X->Bind.Def.CDataFileHead, ie, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1); byte_size = fwrite(&X->Bind.Large.itr, sizeof(X->Bind.Large.itr), 1, fp); byte_size = fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp); - byte_size = fwrite(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fwrite(&v1[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); }/*for (ie = 0; ie < X->Bind.Def.k_exct; ie++)*/ diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index 06226a480..ca9616dc7 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -301,8 +301,8 @@ int CalcSpectrumByBiCG( v12[idim] = 0.0; v14[idim] = 0.0; } - iret = mltply(&X->Bind, v12, v2); - iret = mltply(&X->Bind, v14, v4); + iret = mltply(&X->Bind, 1, v12, v2); + iret = mltply(&X->Bind, 1, v14, v4); res_proj = VecProdMPI(X->Bind.Check.idim_max, vrhs, v2); /** diff --git a/src/Lanczos_EigenValue.c b/src/Lanczos_EigenValue.c index 74e83f751..68323629e 100644 --- a/src/Lanczos_EigenValue.c +++ b/src/Lanczos_EigenValue.c @@ -104,7 +104,7 @@ int Lanczos_EigenValue(struct BindStruct *X) { //Eigenvalues by Lanczos method TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); StartTimer(4101); - mltply(X, v0, v1); + mltply(X, 1, v0, v1); StopTimer(4101); stp = 1; TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); @@ -160,7 +160,7 @@ int Lanczos_EigenValue(struct BindStruct *X) { } StartTimer(4101); - mltply(X, v0, v1); + mltply(X, 1, v0, v1); StopTimer(4101); TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); alpha1 = creal(X->Large.prdct); @@ -356,7 +356,7 @@ int Lanczos_GetTridiagonalMatrixComponents( v1[i] = tmp_v1[i]; } stp = 0; - mltply(X, v0, tmp_v1); + mltply(X, 1, v0, tmp_v1); TimeKeeperWithStep(X, cFileNameTimeKeep, c_Lanczos_SpectrumStep, "a", stp); alpha1 = creal(X->Large.prdct);// alpha = v^{\dag}*H*v _alpha[1] = alpha1; @@ -392,7 +392,7 @@ int Lanczos_GetTridiagonalMatrixComponents( v1[i] = temp2; } - mltply(X, v0, v1); + mltply(X, 1, v0, v1); TimeKeeperWithStep(X, cFileNameTimeKeep, c_Lanczos_SpectrumStep, "a", stp); alpha1 = creal(X->Large.prdct); _alpha[stp] = alpha1; diff --git a/src/Lanczos_EigenVector.c b/src/Lanczos_EigenVector.c index 532272936..39fffb7b9 100644 --- a/src/Lanczos_EigenVector.c +++ b/src/Lanczos_EigenVector.c @@ -148,7 +148,7 @@ void Lanczos_EigenVector(struct BindStruct *X){ } }/*else if(initial_mode==1)*/ StartTimer(4201); - mltply(X, v0, v1); + mltply(X, 1, v0, v1); StopTimer(4201); alpha1=alpha[1]; @@ -174,7 +174,7 @@ void Lanczos_EigenVector(struct BindStruct *X){ v1[j] = temp2; } StartTimer(4201); - mltply(X, v0, v1); + mltply(X, 1, v0, v1); StopTimer(4201); alpha1 = alpha[i]; beta1 = beta[i]; diff --git a/src/Multiply.c b/src/Multiply.c index 8e564b221..3fc24524a 100644 --- a/src/Multiply.c +++ b/src/Multiply.c @@ -100,7 +100,7 @@ int MultiplyForTEM v1[i]=tmp2; v2[i]= 0.0 + I*0.0; } - mltply(X, v2, v1); + mltply(X, 1, v2, v1); } else { tmp1 *= -I * dt; @@ -114,7 +114,7 @@ int MultiplyForTEM for (coef = 2; coef <= X->Def.Param.ExpandCoef; coef++) { tmp1 *= -I * dt / (double complex) coef; //v2 = H*v1 = H^coef |psi(t)> - mltply(X, v2, v1); + mltply(X, 1, v2, v1); #pragma omp parallel for default(none) private(i) shared(v0, v1, v2) firstprivate(i_max, tmp1, myrank) for (i = 1; i <= i_max; i++) { diff --git a/src/PowerLanczos.c b/src/PowerLanczos.c index 75e98e668..e813a7a4c 100644 --- a/src/PowerLanczos.c +++ b/src/PowerLanczos.c @@ -43,7 +43,7 @@ int PowerLanczos(struct BindStruct *X){ for(i = 1; i <= i_max; i++){ v0[i]=0.0+0.0*I; } - mltply(X, v0, v1); // v0+=H*v1 + mltply(X, 1, v0, v1); // v0+=H*v1 dam_pr1=0.0; dam_pr2a=0.0; @@ -63,7 +63,7 @@ int PowerLanczos(struct BindStruct *X){ vg[i]=0.0; } - mltply(X, vg, v0); // vg=H*v0=H*H*v1 + mltply(X, 1, vg, v0); // vg=H*v0=H*H*v1 dam_pr2b = 0.0; dam_pr3 = 0.0; dam_pr4 = 0.0; diff --git a/src/common/setmemory.c b/src/common/setmemory.c index 770efd4f9..a8f642b06 100644 --- a/src/common/setmemory.c +++ b/src/common/setmemory.c @@ -295,3 +295,37 @@ void free_cd_3d_allocate(double complex***A){ free(A[0]); free(A); } + +/// \brief Allocation for A[N][M] +/// \param N [in] The size of the array A +/// \param M [in] The size of the array M +/// \return A Pointer to array A +/// \author Kazuyoshi Yoshimi (University of Tokyo) +double complex****cd_4d_allocate(const long unsigned int N, const long unsigned int M, const long unsigned int L, const long unsigned int K) { + long unsigned int int_i, int_j, int_k; + double complex****A; + A = (double complex****)calloc((N), sizeof(double complex**)); + A[0] = (double complex***)calloc((M*N), sizeof(double complex*)); + A[0][0] = (double complex**)calloc((L*M*N), sizeof(double complex)); + A[0][0][0] = (double complex**)calloc((K*L*M*N), sizeof(double complex)); + for (int_i = 0; int_i < N; int_i++) { + A[int_i] = A[0] + int_i * M; + for (int_j = 0; int_j < M; int_j++) { + A[int_i][int_j] = A[0][0] + int_i * M*L + int_j * L; + for (int_k = 0; int_k < L; int_k++) { + A[int_i][int_j][int_k] = A[0][0][0] + int_i * M*L*K + int_j * L*M + int_k * L; + } + } + } + return A; +} + +/// +/// \brief Function to free 3d array (complex double) +/// \param A A pointer of 3d array A +void free_cd_4d_allocate(double complex****A) { + free(A[0][0][0]); + free(A[0][0]); + free(A[0]); + free(A); +} diff --git a/src/common/setmemory.h b/src/common/setmemory.h index c99fb549e..4521156cf 100644 --- a/src/common/setmemory.h +++ b/src/common/setmemory.h @@ -182,4 +182,8 @@ double complex***cd_3d_allocate(const long unsigned int N, const long unsigned i /// \param A A pointer of 3d array A void free_cd_3d_allocate(double complex***A); +double complex****cd_4d_allocate(const long unsigned int N, const long unsigned int M, const long unsigned int L, const long unsigned int K); + +void free_cd_4d_allocate(double complex****A); + #endif //MVMC_SETMEMORY_H diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index 406e25b7e..b75c50bd1 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -36,21 +36,6 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) * */ - - -int expec_cisajs_HubbardGC(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajs_Hubbard(struct BindStruct *X,double complex *vec, FILE **_fp); - -int expec_cisajs_Spin(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajs_SpinHalf(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajs_SpinGeneral(struct BindStruct *X,double complex *vec, FILE **_fp); - -int expec_cisajs_SpinGC(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajs_SpinGCHalf(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajs_SpinGCGeneral(struct BindStruct *X,double complex *vec, FILE **_fp); - - - /** * @brief function of calculation for one body green's function * @@ -66,8 +51,12 @@ int expec_cisajs_SpinGCGeneral(struct BindStruct *X,double complex *vec, FILE ** * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs(struct BindStruct *X,double complex *vec){ - +int expec_cisajs( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec +){ FILE *fp; char sdt[D_FileNameMax]; @@ -110,10 +99,10 @@ int expec_cisajs(struct BindStruct *X,double complex *vec){ //vec=v0; break; case TimeEvolution: - step=X->Def.istep; - TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecOneBodyGStart, "a", step); - sprintf(sdt, cFileName1BGreen_TE, X->Def.CDataFileHead, step); - break; + step = X->Def.istep; + TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecOneBodyGStart, "a", step); + sprintf(sdt, cFileName1BGreen_TE, X->Def.CDataFileHead, step); + break; case FullDiag: case CG: @@ -127,7 +116,7 @@ int expec_cisajs(struct BindStruct *X,double complex *vec){ } switch(X->Def.iCalcModel){ case HubbardGC: - if(expec_cisajs_HubbardGC(X, vec, &fp)!=0){ + if(expec_cisajs_HubbardGC(X, nstate, Xvec, vec, &fp)!=0){ return -1; } break; @@ -135,22 +124,22 @@ int expec_cisajs(struct BindStruct *X,double complex *vec){ case KondoGC: case Hubbard: case Kondo: - if(expec_cisajs_Hubbard(X, vec, &fp)!=0){ - return -1; - } + if (expec_cisajs_Hubbard(X, nstate, Xvec, vec, &fp) != 0) { + return -1; + } break; case Spin: // for the Sz-conserved spin system - if(expec_cisajs_Spin(X, vec, &fp)!=0){ - return -1; - } + if (expec_cisajs_Spin(X, nstate, Xvec, vec, &fp) != 0) { + return -1; + } break; case SpinGC: - if(expec_cisajs_SpinGC(X, vec, &fp)!=0){ - return -1; - } - break; + if (expec_cisajs_SpinGC(X, nstate, Xvec, vec, &fp) != 0) { + return -1; + } + break; default: return -1; @@ -175,7 +164,6 @@ int expec_cisajs(struct BindStruct *X,double complex *vec){ } return 0; } - /** * @brief function of calculation for one body green's function for Hubbard GC model. * @@ -185,66 +173,73 @@ int expec_cisajs(struct BindStruct *X,double complex *vec){ * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_HubbardGC(struct BindStruct *X, double complex *vec, FILE **_fp){ - long unsigned int i,j; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - double complex dam_pr=0; - long int i_max; - long int ibit; - long unsigned int is; - double complex tmp_OneGreen=1.0; - - i_max = X->Check.idim_max; - - for(i=0;iDef.NCisAjt;i++){ - org_isite1 = X->Def.CisAjt[i][0]+1; - org_isite2 = X->Def.CisAjt[i][2]+1; - org_sigma1 = X->Def.CisAjt[i][1]; - org_sigma2 = X->Def.CisAjt[i][3]; - dam_pr=0; - if (org_isite1 > X->Def.Nsite && - org_isite2 > X->Def.Nsite) { - if(org_isite1==org_isite2 && org_sigma1==org_sigma2){ - if(org_sigma1==0){ - is = X->Def.Tpow[2 * org_isite1 - 2]; - } - else{ - is = X->Def.Tpow[2 * org_isite1 - 1]; - } - ibit = (unsigned long int)myrank & is; - if (ibit == is) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(vec) \ - firstprivate(i_max) private(j) - for (j = 1; j <= i_max; j++) dam_pr += vec[j]*conj(vec[j]); - } - } - else{ - dam_pr =X_GC_child_general_hopp_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_OneGreen, X, vec, vec); - } +int expec_cisajs_HubbardGC( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +){ + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + double complex dam_pr = 0; + long int i_max; + long int ibit; + long unsigned int is; + double complex tmp_OneGreen = 1.0; + + i_max = X->Check.idim_max; + + for (i = 0; i < X->Def.NCisAjt; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjt[i][0] + 1; + org_isite2 = X->Def.CisAjt[i][2] + 1; + org_sigma1 = X->Def.CisAjt[i][1]; + org_sigma2 = X->Def.CisAjt[i][3]; + dam_pr = 0; + if (org_isite1 > X->Def.Nsite && + org_isite2 > X->Def.Nsite) { + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { + if (org_sigma1 == 0) { + is = X->Def.Tpow[2 * org_isite1 - 2]; } - else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite){ - if(org_isite1Def.Tpow[2 * org_isite1 - 1]; } - else{ - if(child_general_hopp_GetInfo( X,org_isite1,org_isite2,org_sigma1,org_sigma2)!=0){ - return -1; - } - dam_pr = GC_child_general_hopp(vec,vec,X,tmp_OneGreen); + ibit = (unsigned long int)myrank & is; + if (ibit == is) { + zaxpy_long(i_max*nstate, tmp_OneGreen, &vec[1][0], &Xvec[1][0]); } - - dam_pr= SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1,org_isite2-1,org_sigma2,creal(dam_pr),cimag(dam_pr)); + } + else { + X_GC_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_OneGreen, X, nstate, Xvec, vec); + } + } + else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { + if (org_isite1 < org_isite2) { + X_GC_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_OneGreen, X, nstate, Xvec, vec); + } + else { + X_GC_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, + -tmp_OneGreen, X, nstate, Xvec, vec); + zswap_long(i_max*nstate, &vec[1][0], &Xvec[1][0]); + } + } + else { + if (child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2) != 0) { + return -1; + } + GC_child_general_hopp(nstate, Xvec, vec, X, tmp_OneGreen); } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", + org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief function of calculation for one body green's function for Hubbard model. * @@ -254,95 +249,100 @@ int expec_cisajs_HubbardGC(struct BindStruct *X, double complex *vec, FILE **_fp * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_Hubbard(struct BindStruct *X, double complex *vec, FILE **_fp) { - long unsigned int i,j; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - double complex dam_pr=0; - long int i_max; - int num1; - long int ibit; - long unsigned int is; - double complex tmp_OneGreen=1.0; - - i_max = X->Check.idim_max; - for(i=0;iDef.NCisAjt;i++){ - org_isite1 = X->Def.CisAjt[i][0]+1; - org_isite2 = X->Def.CisAjt[i][2]+1; - org_sigma1 = X->Def.CisAjt[i][1]; - org_sigma2 = X->Def.CisAjt[i][3]; - dam_pr=0.0; - - if(X->Def.iFlgSzConserved ==TRUE){ - if(org_sigma1 != org_sigma2){ - dam_pr =0.0; - fprintf(*_fp," %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1,org_isite2-1,org_sigma2,creal(dam_pr),cimag(dam_pr)); - continue; - } - } - - if(X->Def.iCalcModel==Kondo || X->Def.iCalcModel==KondoGC) { - if( (X->Def.LocSpn[org_isite1 - 1] == 1 && X->Def.LocSpn[org_isite2 - 1] == 0) || - (X->Def.LocSpn[org_isite1 - 1] == 0 && X->Def.LocSpn[org_isite2 - 1] == 1) - ) - { - dam_pr =0.0; - fprintf(*_fp," %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1,org_isite2-1,org_sigma2,creal(dam_pr),cimag(dam_pr)); - continue; - } - } +int expec_cisajs_Hubbard( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +) { + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + double complex dam_pr = 0; + long int i_max; + int num1; + long int ibit; + long unsigned int is; + double complex tmp_OneGreen = 1.0; + + i_max = X->Check.idim_max; + for (i = 0; i < X->Def.NCisAjt; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjt[i][0] + 1; + org_isite2 = X->Def.CisAjt[i][2] + 1; + org_sigma1 = X->Def.CisAjt[i][1]; + org_sigma2 = X->Def.CisAjt[i][3]; + dam_pr = 0.0; + + if (X->Def.iFlgSzConserved == TRUE) { + if (org_sigma1 != org_sigma2) { + dam_pr = 0.0; + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + continue; + } + } - if (org_isite1 > X->Def.Nsite && - org_isite2 > X->Def.Nsite) { - if(org_isite1==org_isite2 && org_sigma1==org_sigma2){//diagonal + if (X->Def.iCalcModel == Kondo || X->Def.iCalcModel == KondoGC) { + if ((X->Def.LocSpn[org_isite1 - 1] == 1 && X->Def.LocSpn[org_isite2 - 1] == 0) || + (X->Def.LocSpn[org_isite1 - 1] == 0 && X->Def.LocSpn[org_isite2 - 1] == 1) + ) + { + dam_pr = 0.0; + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + continue; + } + } - is = X->Def.Tpow[2 * org_isite1 - 2+org_sigma1]; - ibit = (unsigned long int)myrank & is; - if (ibit == is) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(vec) \ - firstprivate(i_max) private(j) - for (j = 1; j <= i_max; j++) dam_pr += vec[j]*conj(vec[j]); - } + if (org_isite1 > X->Def.Nsite && + org_isite2 > X->Def.Nsite) { + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) {//diagonal - } - else{ - dam_pr =X_child_general_hopp_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_OneGreen, X, vec, vec); - } + is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; + ibit = (unsigned long int)myrank & is; + if (ibit == is) { + zaxpy_long(i_max*nstate, tmp_OneGreen, &vec[1][0], &Xvec[1][0]); } - else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite){ - if(org_isite1 < org_isite2){ - dam_pr =X_child_general_hopp_MPIsingle(org_isite1-1, org_sigma1,org_isite2-1, org_sigma2, -tmp_OneGreen, X, vec, vec); - } - else{ - dam_pr = X_child_general_hopp_MPIsingle(org_isite2-1, org_sigma2, org_isite1-1, org_sigma1, -tmp_OneGreen, X, vec, vec); - dam_pr = conj(dam_pr); - } - } - else{ - if(child_general_hopp_GetInfo( X,org_isite1,org_isite2,org_sigma1,org_sigma2)!=0){ - return -1; - } - if(org_isite1==org_isite2 && org_sigma1==org_sigma2){ - //fprintf(stdoutMPI,"DEBUG1-3-1\n"); - is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; - -#pragma omp parallel for default(none) shared(list_1, vec) reduction(+:dam_pr) firstprivate(i_max, is) private(num1, ibit) - for(j = 1;j <= i_max;j++){ - ibit = list_1[j]&is; - num1 = ibit/is; - dam_pr += num1*conj(vec[j])*vec[j]; - } - } - else{ - dam_pr = child_general_hopp(vec,vec,X,tmp_OneGreen); - } + } + else { + X_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_OneGreen, X, nstate, Xvec, vec); + } + } + else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { + if (org_isite1 < org_isite2) { + X_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_OneGreen, X, nstate, Xvec, vec); + } + else { + X_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, + -tmp_OneGreen, X, nstate, Xvec, vec); + zswap_long(i_max*nstate, &vec[1][0], &Xvec[1][0]); + } + } + else { + if (child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2) != 0) { + return -1; + } + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { + is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; + +#pragma omp parallel for default(none) shared(list_1, vec) reduction(+:dam_pr) \ +firstprivate(i_max, is) private(num1, ibit) + for (j = 1; j <= i_max; j++) { + ibit = list_1[j] & is; + num1 = ibit / is; + zaxpy_(&nstate, &tmp_OneGreen, vec[j], &one, Xvec[j], &one); } - dam_pr= SumMPI_dc(dam_pr); - //fprintf(stdoutMPI, "rank=%d, dam_pr=%lf\n", myrank, creal(dam_pr)); - fprintf(*_fp," %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1,org_isite2-1,org_sigma2,creal(dam_pr),cimag(dam_pr)); + } + else { + child_general_hopp(nstate, Xvec, vec, X, tmp_OneGreen); + } } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief function of calculation for one body green's function for Spin model. * @@ -352,16 +352,22 @@ int expec_cisajs_Hubbard(struct BindStruct *X, double complex *vec, FILE **_fp) * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_Spin(struct BindStruct *X, double complex *vec, FILE **_fp) { - int info=0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info=expec_cisajs_SpinHalf(X,vec, _fp); - } else { - info=expec_cisajs_SpinGeneral(X,vec, _fp); - } - return info; +int expec_cisajs_Spin( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +) { + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajs_SpinHalf(X, nstate, Xvec, vec, _fp); + } + else { + info = expec_cisajs_SpinGeneral(X, nstate, Xvec, vec, _fp); + } + return info; } - /** * @brief function of calculation for one body green's function for Half-Spin model. * @@ -371,57 +377,63 @@ int expec_cisajs_Spin(struct BindStruct *X, double complex *vec, FILE **_fp) { * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_SpinHalf(struct BindStruct *X, double complex *vec, FILE **_fp) { - long unsigned int i,j; - long unsigned int isite1; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - double complex dam_pr=0; - long int i_max; - long int ibit1; - long unsigned int is1_up; - - i_max = X->Check.idim_max; - - for(i=0;iDef.NCisAjt;i++){ - org_isite1 = X->Def.CisAjt[i][0]+1; - org_isite2 = X->Def.CisAjt[i][2]+1; - org_sigma1 = X->Def.CisAjt[i][1]; - org_sigma2 = X->Def.CisAjt[i][3]; - - if(org_sigma1 == org_sigma2){ - if(org_isite1==org_isite2){ - if(org_isite1 > X->Def.Nsite){ - is1_up = X->Def.Tpow[org_isite1 - 1]; - ibit1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); - dam_pr=0; - if(ibit1 !=0){ -#pragma omp parallel for reduction(+:dam_pr)default(none) shared(vec) \ - firstprivate(i_max) private(j) - for (j = 1; j <= i_max; j++) dam_pr += conj(vec[j])*vec[j]; - } - }// org_isite1 > X->Def.Nsite - else{ - isite1 = X->Def.Tpow[org_isite1-1]; - dam_pr=0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max, isite1, org_sigma1, X) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr+=X_Spin_CisAis(j,X, isite1,org_sigma1)*conj(vec[j])*vec[j]; - } - } - } - else{ - dam_pr=0.0; - } - }else{ - // for the canonical case - dam_pr =0.0; +int expec_cisajs_SpinHalf( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +) { + long unsigned int i, j; + long unsigned int isite1; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + double complex dam_pr = 0, dmv; + long int i_max; + long int ibit1; + long unsigned int is1_up; + int one = 1; + + i_max = X->Check.idim_max; + + for (i = 0; i < X->Def.NCisAjt; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjt[i][0] + 1; + org_isite2 = X->Def.CisAjt[i][2] + 1; + org_sigma1 = X->Def.CisAjt[i][1]; + org_sigma2 = X->Def.CisAjt[i][3]; + + if (org_sigma1 == org_sigma2) { + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + is1_up = X->Def.Tpow[org_isite1 - 1]; + ibit1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); + if (ibit1 != 0) { + zaxpy_long(i_max*nstate, 1.0, &vec[1][0], &Xvec[1][0]); + } + }// org_isite1 > X->Def.Nsite + else { + isite1 = X->Def.Tpow[org_isite1 - 1]; +#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +firstprivate(i_max, isite1, org_sigma1, X) shared(vec) + for (j = 1; j <= i_max; j++) { + dmv = X_Spin_CisAis(j, X, isite1, org_sigma1); + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); + } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + } + else { + dam_pr = 0.0; + } + } + else { + // for the canonical case + dam_pr = 0.0; } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief function of calculation for one body green's function for General-Spin model. * @@ -431,62 +443,68 @@ int expec_cisajs_SpinHalf(struct BindStruct *X, double complex *vec, FILE **_fp) * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_SpinGeneral(struct BindStruct *X, double complex *vec, FILE **_fp) { - long unsigned int i,j; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - double complex dam_pr=0; - long int i_max; - int num1; - i_max = X->Check.idim_max; - - for(i=0;iDef.NCisAjt;i++){ - org_isite1 = X->Def.CisAjt[i][0]+1; - org_isite2 = X->Def.CisAjt[i][2]+1; - org_sigma1 = X->Def.CisAjt[i][1]; - org_sigma2 = X->Def.CisAjt[i][3]; - - if(org_isite1 == org_isite2){ - if(org_isite1 >X->Def.Nsite){ - if(org_sigma1==org_sigma2){ - // longitudinal magnetic field - num1 = BitCheckGeneral((unsigned long int)myrank, +int expec_cisajs_SpinGeneral( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +) { + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + double complex dam_pr = 0, dmv; + long int i_max; + int num1, one = 1; + i_max = X->Check.idim_max; + + for (i = 0; i < X->Def.NCisAjt; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjt[i][0] + 1; + org_isite2 = X->Def.CisAjt[i][2] + 1; + org_sigma1 = X->Def.CisAjt[i][1]; + org_sigma2 = X->Def.CisAjt[i][3]; + + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2) { + // longitudinal magnetic field + num1 = BitCheckGeneral((unsigned long int)myrank, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - dam_pr=0.0; - if (num1 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr+=conj(vec[j])*vec[j]; - } - } - }else{ - dam_pr=0.0; - } - } - else {//org_isite1 <= X->Def.Nsite - if(org_sigma1==org_sigma2){ - // longitudinal magnetic field - dam_pr=0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec, list_1) - for(j=1;j<=i_max;j++){ - num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - dam_pr+=conj(vec[j])*vec[j]*num1; - } - }else{ - dam_pr=0.0; - } - } - }else{ - // hopping is not allowed in localized spin system - dam_pr=0.0; - }//org_isite1 != org_isite2 - - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1, org_sigma1, org_isite2-1, org_sigma2,creal(dam_pr),cimag(dam_pr)); + dam_pr = 0.0; + if (num1 != 0) { + zaxpy_long(i_max*nstate, 1.0, &vec[1][0], &Xvec[1][0]); + } + } + else { + dam_pr = 0.0; + } + } + else {//org_isite1 <= X->Def.Nsite + if (org_sigma1 == org_sigma2) { + // longitudinal magnetic field + dam_pr = 0.0; +#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) \ +firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec, list_1) + for (j = 1; j <= i_max; j++) { + dmv = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); + } + } + else { + dam_pr = 0.0; + } + } } + else { + // hopping is not allowed in localized spin system + dam_pr = 0.0; + }//org_isite1 != org_isite2 - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief function of calculation for one body green's function for SpinGC model. * @@ -496,16 +514,22 @@ int expec_cisajs_SpinGeneral(struct BindStruct *X, double complex *vec, FILE **_ * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_SpinGC(struct BindStruct *X, double complex *vec, FILE **_fp) { - int info=0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info=expec_cisajs_SpinGCHalf(X,vec, _fp); - } else { - info=expec_cisajs_SpinGCGeneral(X,vec, _fp); - } - return info; +int expec_cisajs_SpinGC( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +) { + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajs_SpinGCHalf(X, nstate, Xvec, vec, _fp); + } + else { + info = expec_cisajs_SpinGCGeneral(X, nstate, Xvec, vec, _fp); + } + return info; } - /** * @brief function of calculation for one body green's function for Half-SpinGC model. * @@ -515,63 +539,76 @@ int expec_cisajs_SpinGC(struct BindStruct *X, double complex *vec, FILE **_fp) { * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_SpinGCHalf(struct BindStruct *X, double complex *vec, FILE **_fp) { - long unsigned int i,j; - long unsigned int isite1; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - double complex dam_pr=0; - long int i_max; - int tmp_sgn; - long unsigned int tmp_off=0; - - i_max = X->Check.idim_max; - - for(i=0;iDef.NCisAjt;i++){ - org_isite1 = X->Def.CisAjt[i][0]+1; - org_isite2 = X->Def.CisAjt[i][2]+1; - org_sigma1 = X->Def.CisAjt[i][1]; - org_sigma2 = X->Def.CisAjt[i][3]; - dam_pr=0.0; - - if(org_isite1 == org_isite2){ - if(org_isite1 > X->Def.Nsite){ - if(org_sigma1==org_sigma2){ // longitudinal magnetic field - dam_pr += X_GC_child_CisAis_spin_MPIdouble(org_isite1-1, org_sigma1, 1.0, X, vec, vec); - } - else{ // transverse magnetic field - dam_pr += X_GC_child_CisAit_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, 1.0, X, vec, vec); - } - }else{ - isite1 = X->Def.Tpow[org_isite1-1]; - - if(org_sigma1==org_sigma2){ - // longitudinal magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr += X_SpinGC_CisAis(j, X, isite1, org_sigma1)*conj(vec[j])*vec[j]; - } - }else{ - // transverse magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, tmp_off) firstprivate(i_max, isite1, org_sigma2, X) shared(vec) - for(j=1;j<=i_max;j++){ - tmp_sgn = X_SpinGC_CisAit(j,X, isite1,org_sigma2,&tmp_off); - if(tmp_sgn !=0){ - dam_pr += tmp_sgn*conj(vec[tmp_off+1])*vec[j]; - } - } - } +int expec_cisajs_SpinGCHalf( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +) { + long unsigned int i, j; + long unsigned int isite1; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + double complex dam_pr = 0, dmv; + long int i_max; + int tmp_sgn, one = 1; + long unsigned int tmp_off = 0; + + i_max = X->Check.idim_max; + + for (i = 0; i < X->Def.NCisAjt; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjt[i][0] + 1; + org_isite2 = X->Def.CisAjt[i][2] + 1; + org_sigma1 = X->Def.CisAjt[i][1]; + org_sigma2 = X->Def.CisAjt[i][3]; + dam_pr = 0.0; + + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2) { // longitudinal magnetic field + X_GC_child_CisAis_spin_MPIdouble(org_isite1 - 1, org_sigma1, 1.0, X, nstate, Xvec, vec); + } + else { // transverse magnetic field + X_GC_child_CisAit_spin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, 1.0, X, nstate, Xvec, vec); + } + } + else { + isite1 = X->Def.Tpow[org_isite1 - 1]; + + if (org_sigma1 == org_sigma2) { + // longitudinal magnetic field +#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn) \ +firstprivate(i_max, isite1, org_sigma1, X) shared(vec) + for (j = 1; j <= i_max; j++) { + dmv = X_SpinGC_CisAis(j, X, isite1, org_sigma1); + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); + } + } + else { + // transverse magnetic field +#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, tmp_off) \ +firstprivate(i_max, isite1, org_sigma2, X) shared(vec) + for (j = 1; j <= i_max; j++) { + tmp_sgn = X_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); + if (tmp_sgn != 0) { + dmv = (double complex)tmp_sgn; + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[tmp_off + 1][0], &one); } - }else{ - // hopping is not allowed in localized spin system - dam_pr=0.0; + } } - - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1, org_sigma1, org_isite2-1, org_sigma2,creal(dam_pr),cimag(dam_pr)); + } + } + else { + // hopping is not allowed in localized spin system + dam_pr = 0.0; } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", + org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief function of calculation for one body green's function for General SpinGC model. * @@ -581,60 +618,70 @@ int expec_cisajs_SpinGCHalf(struct BindStruct *X, double complex *vec, FILE **_f * @retval 0 normally finished. * @retval -1 abnormally finished. */ -int expec_cisajs_SpinGCGeneral(struct BindStruct *X, double complex *vec, FILE **_fp) { - long unsigned int i, j; - long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - double complex dam_pr = 0; - long int i_max; - long unsigned int tmp_off = 0; - int num1; - - i_max = X->Check.idim_max; - - for (i = 0; i < X->Def.NCisAjt; i++) { - org_isite1 = X->Def.CisAjt[i][0] + 1; - org_isite2 = X->Def.CisAjt[i][2] + 1; - org_sigma1 = X->Def.CisAjt[i][1]; - org_sigma2 = X->Def.CisAjt[i][3]; - if (org_isite1 == org_isite2) { - if (org_isite1 > X->Def.Nsite) { - if (org_sigma1 == org_sigma2) { -// longitudinal magnetic field - dam_pr = X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, 1.0, X, vec, vec); - } else { -// transverse magnetic field - dam_pr = X_GC_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, 1.0, X, - vec, vec); - } - } else {//org_isite1 <= X->Def.Nsite - if (org_sigma1 == org_sigma2) { -// longitudinal magnetic field - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - dam_pr += conj(vec[j]) * vec[j] * num1; - } - } else { -// transverse magnetic field - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off) shared(vec) - for (j = 1; j <= i_max; j++) { - num1 = GetOffCompGeneralSpin(j - 1, org_isite1, org_sigma2, org_sigma1, &tmp_off, - X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - dam_pr += conj(vec[tmp_off + 1]) * vec[j] * num1; - } - } - } +int expec_cisajs_SpinGCGeneral( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + FILE **_fp +) { + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + double complex dam_pr = 0, dmv; + long int i_max; + long unsigned int tmp_off = 0; + int num1, one = 1; + + i_max = X->Check.idim_max; + + for (i = 0; i < X->Def.NCisAjt; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjt[i][0] + 1; + org_isite2 = X->Def.CisAjt[i][2] + 1; + org_sigma1 = X->Def.CisAjt[i][1]; + org_sigma2 = X->Def.CisAjt[i][3]; + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2) { + // longitudinal magnetic field + X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, + 1.0, X, nstate, Xvec, vec); + } + else { + // transverse magnetic field + X_GC_child_CisAit_GeneralSpin_MPIdouble( + org_isite1 - 1, org_sigma1, org_sigma2, 1.0, X, nstate, Xvec, vec); + } + } + else {//org_isite1 <= X->Def.Nsite + if (org_sigma1 == org_sigma2) { + // longitudinal magnetic field +#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) \ +firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + dmv = (double complex)num1; + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); + } + } + else { + // transverse magnetic field +#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) \ +firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off) shared(vec) + for (j = 1; j <= i_max; j++) { + num1 = GetOffCompGeneralSpin( + j - 1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + dmv = (double complex)num1; + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[tmp_off + 1][0], &one); } - } else { -// hopping is not allowed in localized spin system - dam_pr = 0.0; + } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, - creal(dam_pr), cimag(dam_pr)); + } } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", + org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + } + return 0; } diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index 4ffb8fcad..24cae5565 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -139,7 +139,7 @@ int expec_energy_flct(struct BindStruct *X){ nCalcExpec=5302; } StartTimer(nCalcExpec); - mltply(X, v0, v1); // v0+=H*v1 + mltply(X, 1, v0, v1); // v0+=H*v1 StopTimer(nCalcExpec); /* switch -> SpinGCBoost */ diff --git a/src/include/mltplyCommon.h b/src/include/mltplyCommon.h index 55974e3a2..df08f280c 100644 --- a/src/include/mltplyCommon.h +++ b/src/include/mltplyCommon.h @@ -25,5 +25,7 @@ #define M_CALCSPEC 4 void zaxpy_(int *n, double complex *a, double complex *x, int *incx, double complex *y, int *incy); - +void zaxpy_long(unsigned long int n, double complex a, double complex *x, double complex *y); +void zswap_long(unsigned long int n,double complex *x, double complex *y); +void zclear(unsigned long int n, double complex *x); #endif /* HPHI_MLTPLYCOMMON_H */ diff --git a/src/include/wrapperMPI.h b/src/include/wrapperMPI.h index 591e85c56..371a1f659 100644 --- a/src/include/wrapperMPI.h +++ b/src/include/wrapperMPI.h @@ -32,10 +32,18 @@ unsigned long int MaxMPI_li(unsigned long int idim); double MaxMPI_d(double dvalue); double complex SumMPI_dc(double complex norm); double SumMPI_d(double norm); +double SumMPI_dv(int nnorm, double *norm); +double SumMPI_cv(int nnorm, double complex *norm); unsigned long int SumMPI_li(unsigned long int idim); int SumMPI_i(int idim); unsigned long int BcastMPI_li(int root, unsigned long int idim); double NormMPI_dc(unsigned long int idim, double complex *_v1); +void NormMPI_dv(unsigned long int ndim, int nstate, double complex **_v1, double *dnorm); double complex VecProdMPI(long unsigned int ndim, double complex *v1, double complex *v2); - +void MultiVecProdMPI(long unsigned int ndim, int nstate, double complex **v1, double complex **v2, double complex *prod); +void SendRecv_cv(int origin, unsigned long int nMsgS, unsigned long int nMsgR, + double complex *vecs, double complex *vecr); +void SendRecv_iv(int origin, unsigned long int nMsgS, unsigned long int nMsgR, + unsigned long int *vecs, unsigned long int *vecr); +unsigned long int SendRecv_i(int origin, unsigned long int isend); #endif diff --git a/src/mltply.c b/src/mltply.c index ff24b4840..39672c28a 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -54,6 +54,7 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) */ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1) { + int one = 1; long unsigned int j=0; long unsigned int irght=0; long unsigned int ilft=0; @@ -94,7 +95,7 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp StartTimer(100); #pragma omp parallel for default(none) firstprivate(i_max) shared(tmp_v0, tmp_v1, list_Diagonal) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += (list_Diagonal[j]) * tmp_v1[j]; + zaxpy_(&nstate, &list_Diagonal[j], &tmp_v1[j][0], &one, &tmp_v0[tmp_off][0], &one); } StopTimer(100); if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, nstate, tmp_v0, tmp_v1); @@ -126,3 +127,49 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp StopTimer(1); return 0; } +/** +@brief Wrapper of zaxpy. +*/ +void zaxpy_long( + unsigned long int n, + double complex a, + double complex *x, + double complex *y +) { + unsigned long int i; + +#pragma omp parallel for default(none) private(i) shared(n, a, x, y) + for (i = 0; i < n; i++) + y[i] += a * x[i] + y[i]; +} +/** +@brief Wrapper of zswap. +*/ +void zswap_long( + unsigned long int n, + double complex *x, + double complex *y +) { + unsigned long int i; + double complex x0; + +#pragma omp parallel for default(none) private(i,x0) shared(n, x, y) + for (i = 0; i < n; i++) { + x0 = x[i]; + x[i] = y[i]; + y[i] = x0; + } +} +/** +@brief Wrapper of zswap. +*/ +void zclear( + unsigned long int n, + double complex *x +) { + unsigned long int i; + +#pragma omp parallel for default(none) private(i) shared(n, x) + for (i = 0; i < n; i++) + x[i] = 0.0; +} diff --git a/src/mltplyHubbardCore.c b/src/mltplyHubbardCore.c index 81586fa3a..269d5dc22 100644 --- a/src/mltplyHubbardCore.c +++ b/src/mltplyHubbardCore.c @@ -288,7 +288,6 @@ int X_CisAis( } /** @brief @f$c_{is}^\dagger c_{jt}@f$ term for canonical Hubbard -@return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -328,7 +327,6 @@ void CisAjt( } /** @brief @f$c_{is}^\dagger c_{jt}@f$ term for grandcanonical Hubbard -@return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -443,7 +441,6 @@ int X_GC_CisAjt( /******************************************************************************/ /** @brief Compute exchange term of canonical-Hubbard -@return @return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ diff --git a/src/mltplyMPIBoost.c b/src/mltplyMPIBoost.c index 7cc314d70..783caa7e3 100644 --- a/src/mltplyMPIBoost.c +++ b/src/mltplyMPIBoost.c @@ -43,7 +43,6 @@ void child_general_int_spin_MPIBoost( { #ifdef MPI - // MPI_Status statusMPI; // int ierr; // int INFO; diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index 59cd76173..f1cd0fe5e 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -16,9 +16,6 @@ /**@file @brief Functions for Hubbard Hamiltonian + MPI */ -#ifdef MPI -#include "mpi.h" -#endif #include "Common.h" #include "bitcalc.h" #include "wrapperMPI.h" @@ -36,12 +33,10 @@ void GC_child_general_hopp_MPIdouble int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_GC_child_general_hopp_MPIdouble( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); -#endif }/*void GC_child_general_hopp_MPIdouble*/ /** @brief Hopping term in Hubbard + GC @@ -59,12 +54,9 @@ void X_GC_child_general_hopp_MPIdouble( int nstate, double complex **tmp_v0,//!< [out] Result v0 = H v1 double complex **tmp_v1 //!< [in] v0 = H v1 ) { -#ifdef MPI int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j; - MPI_Status statusMPI; double complex trans; - int one = 1; mask1 = (int)X->Def.Tpow[2 * org_isite1 + org_ispin1]; mask2 = (int)X->Def.Tpow[2 * org_isite2 + org_ispin2]; @@ -86,24 +78,10 @@ void X_GC_child_general_hopp_MPIdouble( }/*if (state1 == mask1 && state2 == 0)*/ else return; - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - -#pragma omp parallel default(none) private(j, dmv) \ -firstprivate(idim_max_buf, trans, X) shared(v1buf, tmp_v1, tmp_v0) - { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[j][0], &one); - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*End of parallel region*/ -#endif + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); + + zaxpy_long(X->Check.idim_max*nstate, trans, &v1buf[1][0], &tmp_v0[1][0]); }/*void GC_child_general_hopp_MPIdouble*/ /** @brief Hopping term in Hubbard + MPI @@ -120,17 +98,15 @@ void X_child_CisAjt_MPIdouble( struct BindStruct *X,//!< [inout] int nstate, double complex **tmp_v0,//!< [out] Result v0 = H v1 double complex **tmp_v1,//!< [in] v0 = H v1 - double complex *v1buf,//!<[in] + double complex **v1buf,//!<[in] long unsigned int *list_1_org,//!<[in] long unsigned int *list_1buf_org,//!<[in] long unsigned int *list_2_1_target,//!<[in] long unsigned int *list_2_2_target//!<[in] ) { -#ifdef MPI int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j, ioff; - MPI_Status statusMPI; - double complex trans, dmv; + double complex trans; int one = 1; mask1 = (int) X->Def.Tpow[2 * org_isite1 + org_ispin1]; @@ -155,32 +131,18 @@ void X_child_CisAjt_MPIdouble( }/*if (state1 == mask1 && state2 == 0)*/ else return; - ierr = MPI_Sendrecv(&X->Check.idim_maxOrg, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(list_1_org, X->Check.idim_maxOrg + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_maxOrg + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_maxOrg); + SendRecv_iv(origin, X->Check.idim_maxOrg + 1, idim_max_buf + 1, list_1_org, list_1buf_org); + SendRecv_cv(origin, X->Check.idim_maxOrg*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); - if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC) { -#pragma omp parallel for default(none) private(j, dmv, ioff) \ +#pragma omp parallel for default(none) private(j, ioff) \ firstprivate(idim_max_buf, trans, X, list_2_1_target, list_2_2_target, list_1buf_org) \ shared(v1buf, tmp_v0) - for (j = 1; j <= idim_max_buf; j++){ - GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC)*/ -#endif + for (j = 1; j <= idim_max_buf; j++) { + GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], + X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*void child_CisAjt_MPIdouble*/ /** @brief Hopping term in Hubbard + GC @@ -193,17 +155,14 @@ void GC_child_general_hopp_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_GC_child_general_hopp_MPIsingle( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1 ); -#endif }/*void GC_child_general_hopp_MPIsingle*/ /** @brief Hopping term in Hubbard + GC When only site2 is in the inter process region. -@return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ @@ -217,10 +176,8 @@ void X_GC_child_general_hopp_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ) { -#ifdef MPI int mask2, state1, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, mask1, state1check, bit1diff, ioff; - MPI_Status statusMPI; double complex trans, dmv; int one = 1; /* @@ -233,15 +190,9 @@ void X_GC_child_general_hopp_MPIsingle( SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE @@ -280,7 +231,6 @@ void X_GC_child_general_hopp_MPIsingle( }/*for (j = 0; j < idim_max_buf; j++)*/ }/*End of parallel region*/ -#endif }/*void GC_child_general_hopp_MPIsingle*/ /** @brief Hopping term in Hubbard (Kondo) + Canonical ensemble @@ -293,17 +243,14 @@ void child_general_hopp_MPIdouble( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_child_general_hopp_MPIdouble( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); -#endif }/*void child_general_hopp_MPIdouble*/ /** @brief Hopping term in Hubbard (Kondo) + Canonical ensemble When both site1 and site2 are in the inter process region. -@return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ void X_child_general_hopp_MPIdouble( @@ -316,10 +263,8 @@ void X_child_general_hopp_MPIdouble( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ) { -#ifdef MPI int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j, ioff; - MPI_Status statusMPI; double complex trans, dmv; int one = 1; @@ -344,40 +289,20 @@ void X_child_general_hopp_MPIdouble( } else return; - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) private(j, dmv, Fsgn, ioff) \ firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*if (X->Large.mode == M_MLTPLY|| X->Large.mode == M_CALCSPEC)*/ - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1, list_2_2, list_1buf[j], - X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); - }/*for (j = 1; j <= idim_max_buf; j++)*/ + for (j = 1; j <= idim_max_buf; j++) { + GetOffComp(list_2_1, list_2_2, list_1buf[j], + X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ -#endif }/*void child_general_hopp_MPIdouble*/ /** @brief Hopping term in Hubbard (Kondo) + Canonical ensemble @@ -390,17 +315,14 @@ void child_general_hopp_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_child_general_hopp_MPIsingle( X->Def.EDGeneralTransfer[itrans][0], X->Def.EDGeneralTransfer[itrans][1], X->Def.EDGeneralTransfer[itrans][2], X->Def.EDGeneralTransfer[itrans][3], X->Def.EDParaGeneralTransfer[itrans], X, nstate, tmp_v0, tmp_v1); -#endif }/*void child_general_hopp_MPIsingle*/ /** @brief Hopping term in Hubbard (Kondo) + Canonical ensemble When only site2 is in the inter process region. -@return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ void X_child_general_hopp_MPIsingle( @@ -413,10 +335,8 @@ void X_child_general_hopp_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ) { -#ifdef MPI int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int mask1, state1, idim_max_buf, j, state1check, bit1diff, ioff, jreal; - MPI_Status statusMPI; double complex trans, dmv; int one = 1; /* @@ -430,18 +350,9 @@ void X_child_general_hopp_MPIsingle( SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE */ @@ -480,12 +391,10 @@ void X_child_general_hopp_MPIsingle( }/*if (state1 == state1check)*/ }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ -#endif }/*double complex child_general_hopp_MPIsingle*/ /** @brief Hopping term in Hubbard (Kondo) + Canonical ensemble When only site2 is in the inter process region. -@return @f$\langle v_1|{\hat H}_{\rm this}|v_1\rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ void X_child_CisAjt_MPIsingle( @@ -497,16 +406,14 @@ void X_child_CisAjt_MPIsingle( struct BindStruct *X,//!<[inout] int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1,//!<[in] v0 = H v1 - double complex *v1buf,//!<[in] Buffer for sendrecv of wavefunction + double complex **v1buf,//!<[in] Buffer for sendrecv of wavefunction long unsigned int *list_1_org,//!<[in] Similler to ::list_1 long unsigned int *list_1buf_org,//!<[in] Similler to ::list_1buf long unsigned int *list_2_1_target,//!<[in] ??? long unsigned int *list_2_2_target//!<[in] ??? ){ -#ifdef MPI int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int mask1, state1, idim_max_buf, j, state1check, bit1diff, ioff, jreal; - MPI_Status statusMPI; double complex trans, dmv; int one = 1; /* @@ -520,18 +427,9 @@ void X_child_CisAjt_MPIsingle( SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign - ierr = MPI_Sendrecv(&X->Check.idim_maxOrg, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1_org, X->Check.idim_maxOrg + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_maxOrg + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_maxOrg); + SendRecv_iv(origin, X->Check.idim_maxOrg + 1, list_1buf_org + 1, list_1_org, list_1buf_org); + SendRecv_cv(origin, X->Check.idim_maxOrg*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE */ @@ -563,5 +461,4 @@ void X_child_CisAjt_MPIsingle( }/*if(ioff !=0)*/ }/*if (state1 == state1check)*/ }/*for (j = 1; j <= idim_max_buf; j++)*/ -#endif }/*double complex child_general_hopp_MPIsingle*/ diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 6e3d5ec86..d27700f1c 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -16,9 +16,6 @@ /**@file @brief Functions for Hubbar + MPI (Core) */ -#ifdef MPI -#include "mpi.h" -#endif #include "Common.h" #include "mltplyCommon.h" #include "mltplyHubbardCore.h" @@ -269,46 +266,33 @@ void X_GC_child_CisAisCjtAjt_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI int iCheck; unsigned long int tmp_ispin1; unsigned long int i_max = X->Check.idim_max; unsigned long int tmp_off, j; - double complex dmv; int one = 1; - // MPI_Status statusMPI; iCheck=CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long unsigned int) myrank); if(iCheck != TRUE){ - return 0.0; + return; } -#pragma omp parallel default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, nstate, tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off, tmp_ispin1) - { + if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) { + zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); + }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/ + else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) { + if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3]; + else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; - if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) { +#pragma omp parallel default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, nstate, tmp_v0, tmp_v1) \ + firstprivate(i_max, tmp_V, X) private(j, tmp_off, tmp_ispin1) #pragma omp for - for (j = 1; j <= i_max; j++) { + for (j = 1; j <= i_max; j++) { + if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) { zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/ - else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) { - if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3]; - else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; - -#pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) { - zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); - } - }/*for (j = 1; j <= i_max; j++)*/ - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + } + }/*for (j = 1; j <= i_max; j++)*/ + } }/*double complex X_GC_child_CisAisCjtAjt_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ @@ -326,7 +310,6 @@ void X_GC_child_CisAjtCkuAku_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; int iCheck, ierr, Fsgn; @@ -336,7 +319,6 @@ void X_GC_child_CisAjtCkuAku_Hubbard_MPI( double complex dmv; unsigned long int origin, tmp_off; unsigned long int org_rankbit; - MPI_Status statusMPI; int one = 1; iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long unsigned int) myrank, &origin); @@ -369,13 +351,13 @@ void X_GC_child_CisAjtCkuAku_Hubbard_MPI( } } else { - return 0.0; + return; } } if (myrank == origin) {// only k is in PE - if (CheckBit_Ajt(isite3, myrank, &tmp_off) == FALSE) return 0.0; + if (CheckBit_Ajt(isite3, myrank, &tmp_off) == FALSE) return; #pragma omp parallel default(none) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared(tmp_v0, tmp_v1) @@ -393,14 +375,8 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared( return; }//myrank =origin else { - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) private(j, dmv, tmp_off, Fsgn, org_rankbit, Adiff) \ shared(v1buf, tmp_v1, nstate, tmp_v0, myrank, origin, isite3, org_isite3, isite1, isite2, org_isite2, org_isite1) \ @@ -439,10 +415,6 @@ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isi } }/*End of parallel region*/ }/*myrank != origin*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAjtCkuAku_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ @@ -460,11 +432,9 @@ void X_GC_child_CisAisCjtAku_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI X_GC_child_CisAjtCkuAku_Hubbard_MPI( org_isite4, org_ispin4, org_isite3, org_ispin3, org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1); -#endif }/*double complex X_GC_child_CisAisCjtAku_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ @@ -484,7 +454,6 @@ void X_GC_child_CisAjtCkuAlv_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; int iCheck, ierr, Fsgn; @@ -495,7 +464,6 @@ void X_GC_child_CisAjtCkuAlv_Hubbard_MPI( unsigned long int origin, tmp_off, tmp_off2; unsigned long int org_rankbit; int iFlgHermite = FALSE; - MPI_Status statusMPI; int one = 1; iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, @@ -528,7 +496,7 @@ void X_GC_child_CisAjtCkuAlv_Hubbard_MPI( } } else { - return 0.0; + return; } } @@ -574,14 +542,8 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) return; }//myrank =origin else { - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite) { @@ -601,10 +563,8 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) Fsgn *= X_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == TRUE)*/ -#pragma omp parallel for default(none) private(j, dmv) firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[j][0], &one); - }/*for (j = 1; j <= idim_max_buf; j++)*/ + + zaxpy_long(i_max*nstate, tmp_V, &v1buf[1][0], &tmp_v0[1][0]); } else { org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin; @@ -617,10 +577,6 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) }/*for (j = 1; j <= idim_max_buf; j++)*/ } }/*myrank != origin*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAjtCkuAlv_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{is}@f$ @@ -634,29 +590,19 @@ void X_GC_child_CisAis_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI unsigned long int i_max = X->Check.idim_max; unsigned long int j, isite1, tmp_off; - double complex dmv; int one = 1; - // MPI_Status statusMPI; isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; if (org_isite1 + 1 > X->Def.Nsite) { - if (CheckBit_Ajt(isite1, (unsigned long int) myrank, &tmp_off) == FALSE) return 0.0; + if (CheckBit_Ajt(isite1, (unsigned long int) myrank, &tmp_off) == FALSE) return; -#pragma omp parallel default(none) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off) - { -#pragma omp for - for (j = 1; j <= i_max; j++) { - zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); - }/*for (j = 1; j <= i_max; j++)*/ - }/*End of parallel region*/ + zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*if (org_isite1 + 1 > X->Def.Nsite)*/ else { #pragma omp parallel default(none) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off) + firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off) { #pragma omp for for (j = 1; j <= i_max; j++) { @@ -666,10 +612,6 @@ void X_GC_child_CisAis_Hubbard_MPI( }/*for (j = 1; j <= i_max; j++)*/ }/*End of parallel region*/ }/*if (org_isite1 + 1 <= X->Def.Nsite)*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAis_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{jt}@f$ @@ -685,8 +627,6 @@ void X_GC_child_CisAjt_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI -// MPI_Status statusMPI; if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) { X_GC_child_general_hopp_MPIdouble(org_isite1, org_ispin1, org_isite2, org_ispin2, tmp_trans, X, nstate, tmp_v0, tmp_v1); @@ -698,10 +638,6 @@ void X_GC_child_CisAjt_Hubbard_MPI( //error message will be added. exitMPI(-1); } - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAjt_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{jt}@f$ @@ -717,41 +653,32 @@ void X_child_CisAisCjtAjt_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI int iCheck; unsigned long int tmp_ispin1; unsigned long int i_max = X->Check.idim_max; unsigned long int tmp_off, j; - double complex dmv; int one = 1; - // MPI_Status statusMPI; iCheck = CheckBit_PairPE(org_isite1, org_ispin1, org_isite3, org_ispin3, X, (long unsigned int) myrank); - if (iCheck != TRUE) return 0.0; + if (iCheck != TRUE) return; -#pragma omp parallel default(none) \ -shared(tmp_v0, tmp_v1, list_1, org_isite1, org_ispin1, org_isite3, org_ispin3) \ - firstprivate(i_max, tmp_V, X, tmp_ispin1) private(dmv, j, tmp_off) - { - if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) { + if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) { #pragma omp for - for (j = 1; j <= i_max; j++) { - zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/ - else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) { - if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3]; - else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; + zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); + }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/ + else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) { + if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3]; + else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; -#pragma omp for - for (j = 1; j <= i_max; j++) { - if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) { - zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); - } - }/*for (j = 1; j <= i_max; j++)*/ - }/*if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite)*/ - }/*End of parallel region*/ -#endif +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1, list_1, org_isite1, org_ispin1, org_isite3, org_ispin3) \ + firstprivate(i_max, tmp_V, X, tmp_ispin1) private(j, tmp_off) + for (j = 1; j <= i_max; j++) { + if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) { + zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + } + }/*for (j = 1; j <= i_max; j++)*/ + }/*if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite)*/ }/*double complex X_child_CisAisCjtAjt_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ @@ -771,7 +698,6 @@ void X_child_CisAjtCkuAlv_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; int iCheck, ierr, Fsgn; @@ -782,7 +708,6 @@ void X_child_CisAjtCkuAlv_Hubbard_MPI( unsigned long int origin, tmp_off, tmp_off2; unsigned long int org_rankbit, ioff; int iFlgHermite = FALSE; - MPI_Status statusMPI; int one = 1; iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, @@ -813,7 +738,7 @@ void X_child_CisAjtCkuAlv_Hubbard_MPI( iFlgHermite = TRUE; if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0; }/*if (iCheck == TRUE)*/ - else return 0.0; + else return; }/*if (iCheck == FALSE)*/ if (myrank == origin) { @@ -859,20 +784,10 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) return; }//myrank =origin else { - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite && org_isite4 + 1 > X->Def.Nsite) { @@ -891,7 +806,7 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) Fsgn *= X_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == TRUE)*/ -#pragma omp parallel default(none) private(j, dmv, ioff) \ +#pragma omp parallel default(none) private(j, ioff) \ firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, nstate, tmp_v0, list_2_1, list_2_2, list_1buf) { #pragma omp for @@ -928,10 +843,6 @@ shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2) }/*End of parallel region*/ } }/*if (myrank != origin)*/ - return; -#else - return 0.0; -#endif }/*double complex X_child_CisAjtCkuAlv_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ @@ -949,7 +860,6 @@ void X_child_CisAjtCkuAku_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf, ioff; int iCheck, ierr, Fsgn; @@ -959,7 +869,6 @@ void X_child_CisAjtCkuAku_Hubbard_MPI( double complex dmv; unsigned long int origin, tmp_off; unsigned long int org_rankbit; - MPI_Status statusMPI; int one = 1; //printf("Deubg0-0: org_isite1=%d, org_ispin1=%d, org_isite2=%d, org_ispin2=%d, org_isite3=%d, org_ispin3=%d\n", org_isite1, org_ispin1,org_isite2, org_ispin2,org_isite3, org_ispin3); iCheck = CheckBit_InterAllPE(org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite3, org_ispin3, X, (long unsigned int) myrank, &origin); @@ -992,7 +901,7 @@ void X_child_CisAjtCkuAku_Hubbard_MPI( if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0; //printf("tmp_isite1=%ld, tmp_isite2=%ld, Adiff=%ld\n", tmp_isite1, tmp_isite2, Adiff); }/*if (iCheck == TRUE)*/ - else return 0.0; + else return; }/*if (iCheck == FALSE)*/ if (myrank == origin) {// only k is in PE @@ -1013,19 +922,9 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp return; }//myrank =origin else { - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \ firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \ @@ -1072,7 +971,6 @@ shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org } }/*End of parallel region*/ }/*if (myrank != origin)*/ -#endif }/*double complex X_child_CisAjtCkuAku_Hubbard_MPI*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ @@ -1090,11 +988,9 @@ void X_child_CisAisCjtAku_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI X_child_CisAjtCkuAku_Hubbard_MPI( org_isite4, org_ispin4, org_isite3, org_ispin3, org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1); -#endif }/*double complex X_child_CisAisCjtAku_Hubbard_MPI*/ void X_child_CisAis_Hubbard_MPI( @@ -1105,30 +1001,20 @@ void X_child_CisAis_Hubbard_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { -#ifdef MPI unsigned long int i_max = X->Check.idim_max; unsigned long int j, isite1, tmp_off; - double complex dmv; int one = 1; -// MPI_Status statusMPI; isite1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; if (org_isite1 + 1 > X->Def.Nsite) { if (CheckBit_Ajt(isite1, (unsigned long int) myrank, &tmp_off) == FALSE) - return 0.0; + return; -#pragma omp parallel default(none) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_V, X) private(dmv, j, tmp_off) - { -#pragma omp for - for (j = 1; j <= i_max; j++) { - zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); - }/*for (j = 1; j <= i_max; j++)*/ - }/*End of parallel*/ + zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*if (org_isite1 + 1 > X->Def.Nsite)*/ else { #pragma omp parallel default(none) shared(tmp_v0, tmp_v1, list_1) \ - firstprivate(i_max, tmp_V, X, isite1) private(dmv, j, tmp_off) + firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off) { #pragma omp for for (j = 1; j <= i_max; j++) { @@ -1138,7 +1024,6 @@ void X_child_CisAis_Hubbard_MPI( }/*for (j = 1; j <= i_max; j++)*/ }/*End of parallel region*/ }/*if (org_isite1 + 1 <= X->Def.Nsite)*/ -#endif }/*double complex X_child_CisAis_Hubbard_MPI*/ /** @brief Single creation/annihilation operator @@ -1157,12 +1042,9 @@ void X_GC_Cis_MPI( double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { -#ifdef MPI int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j; - MPI_Status statusMPI; - double complex trans, dmv; - int one = 1; + double complex trans; // org_isite >= Nsite mask2 = (int)Tpow[2 * org_isite + org_ispin]; @@ -1178,15 +1060,8 @@ void X_GC_Cis_MPI( //SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign - ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, idim_max); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); if (state2 == mask2) { trans = 0; @@ -1194,14 +1069,9 @@ void X_GC_Cis_MPI( else if (state2 == 0) { trans = (double)Fsgn * tmp_trans; } - else return 0; + else return; -#pragma omp parallel for default(none) private(j, dmv) \ - firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0) - for (j = 0; j < idim_max_buf; j++) { - zaxpy_(&nstate, &trans, &tmp_v1buf[j + 1][0], &one, &tmp_v0[j + 1][0], &one); - } -#endif + zaxpy_long(idim_max_buf*nstate, trans, &tmp_v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_Cis_MPI*/ /** @brief Single creation/annihilation operator @@ -1220,12 +1090,9 @@ void X_GC_Ajt_MPI( double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { -#ifdef MPI int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j; - MPI_Status statusMPI; - double complex trans, dmv; - int one = 1; + double complex trans; // org_isite >= Nsite mask2 = (int)Tpow[2 * org_isite + org_ispin]; @@ -1241,29 +1108,15 @@ void X_GC_Ajt_MPI( //SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign - ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, idim_max); - ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); if ( state2 == 0 ) trans = 0; else if (state2 == mask2) trans = (double)Fsgn * tmp_trans; - else return 0; + else return; -#pragma omp parallel for default(none) private(j, dmv) \ -firstprivate(idim_max_buf, trans) shared(tmp_v1buf, tmp_v1, tmp_v0) - for (j = 0; j < idim_max_buf; j++) { - zaxpy_(&nstate, &trans, &tmp_v1buf[j + 1][0], &one, &tmp_v0[j + 1][0], &one); - } - return; -#else - return 0.0; -#endif + zaxpy_long(idim_max_buf*nstate, trans, &tmp_v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_Ajt_MPI*/ /** @brief Compute @f$c_{is}^\dagger@f$ @@ -1286,11 +1139,9 @@ void X_Cis_MPI( long unsigned int _ilft,//!<[in] Similer to LargeList::ilft long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit ) { -#ifdef MPI int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, ioff; - MPI_Status statusMPI; - double complex trans, dmv; + double complex trans; int one = 1; // org_isite >= Nsite @@ -1306,20 +1157,11 @@ void X_Cis_MPI( SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign - ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, idim_max); - ierr = MPI_Sendrecv(list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org); - ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); if (state2 == mask2) { trans = 0; @@ -1327,9 +1169,9 @@ void X_Cis_MPI( else if (state2 == 0) { trans = (double)Fsgn * tmp_trans; } - else return 0; + else retur; -#pragma omp parallel for default(none) private(j, dmv) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) {//idim_max_buf -> original @@ -1337,10 +1179,6 @@ shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) _irght, _ilft, _ihfbit, &ioff); zaxpy_(&nstate, &trans, &tmp_v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_Cis_MPI*/ /** @brief Compute @f$c_{jt}@f$ @@ -1363,11 +1201,9 @@ void X_Ajt_MPI( long unsigned int _ilft,//!<[in] Similer to LargeList::ilft long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit ){ -#ifdef MPI int mask2, state2, ierr, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, ioff; - MPI_Status statusMPI; - double complex trans, dmv; + double complex trans; int one = 1; // org_isite >= Nsite @@ -1382,20 +1218,9 @@ void X_Ajt_MPI( bit2diff = myrank - ((2 * mask2 - 1) & myrank); SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign - ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - tmp_v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, idim_max); + SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); if (state2 == 0) { trans = 0; @@ -1403,9 +1228,9 @@ void X_Ajt_MPI( else if (state2 == mask2) { trans = (double)Fsgn * tmp_trans; } - else return 0; + else return; -#pragma omp parallel for default(none) private(j, dmv) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) { @@ -1413,5 +1238,4 @@ shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) _irght, _ilft, _ihfbit, &ioff); zaxpy_(&nstate, &trans, &tmp_v1buf[j][0], &one, &tmp_v0[ioff][0], &one); } -#endif }/*double complex X_Ajt_MPI*/ diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index 53cd71b75..971b4d45f 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -17,9 +17,6 @@ @brief Functions for spin Hamiltonian + MPI */ -#ifdef MPI -#include "mpi.h" -#endif #include "Common.h" #include "mltplyCommon.h" #include "bitcalc.h" @@ -38,7 +35,6 @@ void child_general_int_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_child_general_int_spin_MPIdouble( (int)X->Def.InterAll_OffDiagonal[i_int][0], (int)X->Def.InterAll_OffDiagonal[i_int][1], (int)X->Def.InterAll_OffDiagonal[i_int][3], (int)X->Def.InterAll_OffDiagonal[i_int][4], @@ -48,12 +44,10 @@ void child_general_int_spin_MPIdouble( Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ to LargeList::prdct */ -#endif }/*void child_general_int_spin_MPIdouble*/ /** @brief Exchange term in Spin model When both site1 and site2 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ void X_child_general_int_spin_MPIdouble( @@ -68,11 +62,9 @@ void X_child_general_int_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask1, mask2, state1, state2, ierr, origin; unsigned long int idim_max_buf, j, ioff; - MPI_Status statusMPI; - double complex Jint, dmv; + double complex Jint; int one = 1; mask1 = (int)X->Def.Tpow[org_isite1]; @@ -93,29 +85,21 @@ void X_child_general_int_spin_MPIdouble( } else return 0; - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel for default(none) private(j, dmv, ioff) \ +#pragma omp parallel for default(none) private(j, ioff) \ firstprivate(idim_max_buf, Jint, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); zaxpy_(&nstate, &Jint, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ -#endif }/*double complex X_child_general_int_spin_MPIdouble*/ /** @brief Exchange term in Spin model When both site1 and site2 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ void X_child_general_int_spin_TotalS_MPIdouble( @@ -125,10 +109,8 @@ void X_child_general_int_spin_TotalS_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ){ -#ifdef MPI int mask1, mask2, num1_up, num2_up, ierr, origin; unsigned long int idim_max_buf, j, ioff, ibit_tmp; - MPI_Status statusMPI; double complex dmv; mask1 = (int)X->Def.Tpow[org_isite1]; @@ -141,18 +123,9 @@ void X_child_general_int_spin_TotalS_MPIdouble( ibit_tmp = (num1_up) ^ (num2_up); if (ibit_tmp == 0) return 0; - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel for default(none) private(j, dmv, ioff) \ firstprivate(idim_max_buf, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) @@ -162,14 +135,10 @@ void X_child_general_int_spin_TotalS_MPIdouble( dmv = 0.5 * v1buf[j]; }/*for (j = 1; j <= idim_max_buf; j++)*/ return; -#else - return 0.0; -#endif }/*double complex X_child_general_int_spin_MPIdouble*/ /** @brief Exchange term in Spin model When only site2 is in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ void child_general_int_spin_MPIsingle( @@ -178,7 +147,6 @@ void child_general_int_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_child_general_int_spin_MPIsingle( (int)X->Def.InterAll_OffDiagonal[i_int][0], (int)X->Def.InterAll_OffDiagonal[i_int][1], @@ -189,12 +157,10 @@ void child_general_int_spin_MPIsingle( Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ to LargeList::prdct */ -#endif }/*void child_general_int_spin_MPIsingle*/ /* @brief General interaction term of canonical spin system. site 3 is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_child_general_int_spin_MPIsingle( int org_isite1,//!<[in] Site 1 @@ -208,11 +174,9 @@ void X_child_general_int_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask2, state2, ierr, origin; unsigned long int mask1, idim_max_buf, j, ioff, state1, jreal, state1check; - MPI_Status statusMPI; - double complex Jint, dmv; + double complex Jint; int one = 1; /* Prepare index in the inter PE @@ -234,24 +198,15 @@ void X_child_general_int_spin_MPIsingle( } else return 0; - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE */ mask1 = X->Def.Tpow[org_isite1]; -#pragma omp parallel for default(none) private(j, dmv, ioff, jreal, state1) \ +#pragma omp parallel for default(none) private(j, ioff, jreal, state1) \ firstprivate(idim_max_buf, Jint, X, mask1, state1check, org_isite1) \ shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) for (j = 1; j <= idim_max_buf; j++) { @@ -264,7 +219,6 @@ shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) zaxpy_(&nstate, &Jint, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); } } -#endif }/*double complex X_child_general_int_spin_MPIsingle*/ /** @brief General interaction term in the Spin model + GC @@ -323,8 +277,6 @@ void GC_child_general_int_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI - // MPI_Status statusMPI; if (X->Def.InterAll_OffDiagonal[i_int][1] == X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] != X->Def.InterAll_OffDiagonal[i_int][7]) { @@ -347,7 +299,6 @@ void GC_child_general_int_GeneralSpin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } -#endif }/*void GC_child_general_int_spin_MPIdouble*/ /** @brief General interaction term in the Spin model + GC @@ -360,7 +311,6 @@ void GC_child_general_int_GeneralSpin_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI if (X->Def.InterAll_OffDiagonal[i_int][1] == X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] != X->Def.InterAll_OffDiagonal[i_int][7]) { @@ -384,7 +334,6 @@ void GC_child_general_int_GeneralSpin_MPIsingle( X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); } -#endif }/*void GC_child_general_int_spin_MPIsingle*/ /** @brief General interaction term in the Spin model + GC diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 7889e685b..140c4e372 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -62,9 +62,6 @@ General two body term: */ -#ifdef MPI -#include "mpi.h" -#endif #include "Common.h" #include "mltplyCommon.h" #include "mltplySpinCore.h" @@ -82,19 +79,16 @@ void GC_child_CisAitCiuAiv_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/) { -#ifdef MPI X_GC_child_CisAitCiuAiv_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int],X, nstate, tmp_v0, tmp_v1); -#endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @brief @f$c_{is}^\dagger c_{it} c_{iu}^\dagger c_{iv}@f$ term in Spin model + GC. When both site1 and site2 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) @author Mitsuaki Kawamura (The University of Tokyo) */ @@ -110,11 +104,10 @@ void X_GC_child_CisAitCiuAiv_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask1, mask2, state1, state2, ierr, origin; - unsigned long int idim_max_buf, j; - MPI_Status statusMPI; - double complex Jint, dmv; + unsigned long int idim_max_buf, j, nstatedim; + double complex Jint; + int one = 1; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; @@ -127,7 +120,7 @@ void X_GC_child_CisAitCiuAiv_spin_MPIdouble( return; } else { //CisAitCisAit=0 - return 0.0; + return; } } @@ -144,28 +137,14 @@ void X_GC_child_CisAitCiuAiv_spin_MPIdouble( } } else { - return 0; + return; } - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j, dmv) \ - firstprivate(idim_max_buf, Jint, X) shared(v1buf, tmp_v1, tmp_v0) - { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*End of parallel region*/ -#endif + nstatedim = nstate * idim_max_buf; + zaxpy_(&nstatedim, &Jint, &v1buf[1][0], &one, &tmp_v0[1][0], &one); }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @brief Wrapper for calculating CisAisCjuAjv term in Spin model + GC @@ -178,17 +157,14 @@ void GC_child_CisAisCjuAjv_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ -#ifdef MPI X_GC_child_CisAisCjuAjv_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); -#endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @brief CisAisCjuAjv term in Spin model + GC When both site1 and site2 are in the inter process region. -@return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAisCjuAjv_spin_MPIdouble( @@ -202,15 +178,13 @@ void X_GC_child_CisAisCjuAjv_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask1, mask2, state2, ierr; long int origin, num1; unsigned long int idim_max_buf, j; - MPI_Status statusMPI; - double complex Jint, dmv; + double complex Jint; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4) {//CisAisCitAis - return 0.0; + return; } mask1 = (int)X->Def.Tpow[org_isite1]; @@ -226,25 +200,13 @@ void X_GC_child_CisAisCjuAjv_spin_MPIdouble( if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) Jint = 0; } else { - return 0.0; + return; } - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - -#pragma omp parallel for default(none) private(j, dmv) \ - firstprivate(idim_max_buf, Jint, X) shared(v1buf, tmp_v1, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - tmp_v0[j] += dmv; - } -#endif + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); + + zaxpy_long(X->Check.idim_max*nstate, Jint, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble*/ /** @brief Wrapper for calculating CisAitCjuAju term in Spin model + GC @@ -258,17 +220,14 @@ void GC_child_CisAitCjuAju_spin_MPIdouble( double complex **tmp_v1//!<[in] v0 = H v1 ) { -#ifdef MPI X_GC_child_CisAitCjuAju_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); -#endif }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @brief CisAisCjuAjv term in Spin model + GC When both site1 and site2 are in the inter process region. -@return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAitCjuAju_spin_MPIdouble( @@ -282,15 +241,13 @@ void X_GC_child_CisAitCjuAju_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask1, mask2, state1, ierr, num1; long int origin; unsigned long int idim_max_buf, j; - MPI_Status statusMPI; - double complex Jint, dmv; + double complex Jint; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) {//cisaitcisais - return 0.0; + return; } mask1 = (int)X->Def.Tpow[org_isite1]; @@ -303,7 +260,7 @@ void X_GC_child_CisAitCjuAju_spin_MPIdouble( Jint = tmp_J; } else { - return 0.0; + return; } }/*if (state1 == org_ispin2)*/ else {//state1 = org_ispin1 @@ -315,34 +272,18 @@ void X_GC_child_CisAitCjuAju_spin_MPIdouble( } } else { - return 0.0; + return; } } - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j, dmv) \ - firstprivate(idim_max_buf, Jint, X) shared(v1buf, tmp_v1, tmp_v0) - { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - dmv = Jint * v1buf[j]; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - }/*End of parallel region*/ -#endif + zaxpy_long(X->Check.idim_max*nstate, Jint, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAisCjuAjv_spin_MPIdouble*/ /** @brief CisAisCjuAjv term in Spin model + GC When both site1 and site2 are in the inter process region. -@return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAisCjuAju_spin_MPIdouble( @@ -355,11 +296,10 @@ void X_GC_child_CisAisCjuAju_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ){ -#ifdef MPI long unsigned int mask1, mask2, num1,num2; unsigned long int j; -// MPI_Status statusMPI; double complex dmv; + int one = 1; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, mask1, org_ispin1); @@ -370,16 +310,14 @@ void X_GC_child_CisAisCjuAju_spin_MPIdouble( { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { - dmv = num1 * num2*tmp_v1[j] * tmp_J; - tmp_v0[j] += dmv; + dmv = num1 * num2 * tmp_J; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*for (j = 1; j <= X->Check.idim_max; j++) */ }/*End of parallel region*/ -#endif }/*double complex X_GC_child_CisAisCjuAju_spin_MPIdouble*/ /** @brief CisAisCjuAjv term in Spin model + GC When both site1 and site2 are in the inter process region. -@return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAisCjuAju_spin_MPIsingle( @@ -392,11 +330,10 @@ void X_GC_child_CisAisCjuAju_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI long unsigned int mask1, mask2, num1, num2; unsigned long int j; -// MPI_Status statusMPI; double complex Jint, dmv; + int one = 1; Jint = tmp_J; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; @@ -408,11 +345,10 @@ void X_GC_child_CisAisCjuAju_spin_MPIsingle( #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { num1 = X_SpinGC_CisAis(j, X, mask1, org_ispin1); - dmv = Jint * num1 * num2 * tmp_v1[j]; - tmp_v0[j] += dmv; + dmv = Jint * num1 * num2; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*End of parallel region*/ -#endif }/*double complex X_GC_child_CisAisCjuAju_spin_MPIdouble*/ /** @brief Exchange and Pairlifting term in Spin model + GC @@ -425,18 +361,15 @@ void GC_child_CisAitCiuAiv_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_GC_child_CisAitCiuAiv_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); -#endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @brief Exchange and Pairlifting term in Spin model + GC When only site2 is in the inter process region. -@return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Mitsuaki Kawamura (The University of Tokyo) */ void X_GC_child_CisAitCiuAiv_spin_MPIsingle( @@ -451,11 +384,10 @@ void X_GC_child_CisAitCiuAiv_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask2, state2, ierr, origin; unsigned long int mask1, idim_max_buf, j, ioff, state1, state1check; - MPI_Status statusMPI; - double complex Jint, dmv; + double complex Jint; + int one = 1; /* Prepare index in the inter PE */ @@ -474,34 +406,26 @@ void X_GC_child_CisAitCiuAiv_spin_MPIsingle( Jint = 0; } } - else return 0.0; - - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + else return; + + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE */ mask1 = X->Def.Tpow[org_isite1]; -#pragma omp parallel default(none) private(j, dmv, state1, ioff) \ +#pragma omp parallel default(none) private(j, state1, ioff) \ firstprivate(idim_max_buf, Jint, X, state1check, mask1) shared(v1buf, tmp_v1, tmp_v0) { #pragma omp for for (j = 0; j < idim_max_buf; j++) { state1 = X_SpinGC_CisAit(j + 1, X, mask1, state1check, &ioff); if (state1 != 0) { - dmv = Jint * v1buf[j + 1]; - tmp_v0[ioff + 1] += dmv; + zaxpy_(&nstate, &Jint, &v1buf[j + 1][0], &one, &tmp_v0[ioff + 1][0], &one); }/*if (state1 != 0)*/ }/*for (j = 0; j < idim_max_buf; j++)*/ }/*End of parallel region*/ -#endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @brief Wrapper for CisAisCjuAjv term in Spin model + GC @@ -514,17 +438,14 @@ void GC_child_CisAisCjuAjv_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_GC_child_CisAisCjuAjv_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); -#endif }/*void GC_child_CisAisCjuAjv_spin_MPIsingle*/ /** @brief CisAisCjuAjv term in Spin model + GC When only site2 is in the inter process region. -@return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAisCjuAjv_spin_MPIsingle( @@ -538,11 +459,10 @@ void X_GC_child_CisAisCjuAjv_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask2, state2, ierr, origin; unsigned long int mask1, idim_max_buf, j, state1, state1check; - MPI_Status statusMPI; - double complex Jint, dmv; + double complex Jint; + int one = 1; /* Prepare index in the inter PE */ @@ -560,34 +480,26 @@ void X_GC_child_CisAisCjuAjv_spin_MPIsingle( Jint = 0; } } - else return 0.0; - - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + else return; + + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE */ mask1 = X->Def.Tpow[org_isite1]; -#pragma omp parallel default(none) private(j, dmv, state1) \ +#pragma omp parallel default(none) private(j, state1) \ firstprivate(idim_max_buf, Jint, X, state1check, mask1) shared(v1buf, tmp_v1, tmp_v0) { #pragma omp for for (j = 0; j < idim_max_buf; j++) { state1 = (j & mask1) / mask1; if (state1 == state1check) { - dmv = Jint * v1buf[j + 1]; - tmp_v0[j + 1] += dmv; + zaxpy_(&nstate, &Jint, &v1buf[j + 1][0], &one, &tmp_v0[j + 1][0], &one); }/*if (state1 == state1check)*/ }/*for (j = 0; j < idim_max_buf; j++)*/ }/*End of parallel region*/ -#endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @brief Wrapper for CisAisCjuAjv term in Spin model + GC @@ -600,17 +512,14 @@ void GC_child_CisAitCjuAju_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ -#ifdef MPI X_GC_child_CisAitCjuAju_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); -#endif }/*void GC_child_CisAisCjuAjv_spin_MPIsingle*/ /** @brief CisAisCjuAjv term in Spin model + GC When only site2 is in the inter process region. -@return @f$\langle v_1 | H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAitCjuAju_spin_MPIsingle( @@ -624,11 +533,10 @@ void X_GC_child_CisAitCjuAju_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI int mask2, state2; unsigned long int mask1, j, ioff, state1, state1check; - //MPI_Status statusMPI; double complex Jint, dmv; + int one = 1; /* Prepare index in the inter PE */ @@ -640,7 +548,7 @@ void X_GC_child_CisAitCjuAju_spin_MPIsingle( Jint = tmp_J; } else { - return 0.0; + return; } mask1 = (int)X->Def.Tpow[org_isite1]; @@ -654,20 +562,18 @@ void X_GC_child_CisAitCjuAju_spin_MPIsingle( state1 = (j & mask1) / mask1; ioff = j ^ mask1; if (state1 == state1check) { - dmv = Jint * tmp_v1[j + 1]; + dmv = Jint; } else { - dmv = conj(Jint) * tmp_v1[j + 1]; + dmv = conj(Jint); } - tmp_v0[ioff + 1] += dmv; + zaxpy_(&nstate, &dmv, &tmp_v1[j + 1][0], &one, &tmp_v0[ioff + 1][0], &one); }/*for (j = 0; j < X->Check.idim_max; j++)*/ }/*End of parallel region*/ -#endif }/*void GC_child_CisAitCiuAiv_spin_MPIsingle*/ /** @brief @f$c_{is}^\dagger c_{is} c_{ju}^\dagger c_{jv}@f$ term in Spin model. When both site1 and site3 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -680,14 +586,12 @@ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI unsigned long int off, j; int origin, ierr; - double complex tmp_V, dmv; - MPI_Status statusMPI; - int ihermite = TRUE; + double complex tmp_V; + int ihermite = TRUE, one = 1; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4) {//cisaisciuais=0 && cisaiucisais=0 - return 0.0; + return; } if (BitCheckGeneral(myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow) == TRUE @@ -704,42 +608,18 @@ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( tmp_V = conj(tmp_J); if(X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0.0; }/*BitCheckGeneral(off, org_isite1 + 1, org_ispin1)*/ - else return 0.0; + else return; }/*GetOffCompGeneralSpin(myrank, org_isite3 + 1, org_ispin4, org_ispin3, &off)*/ - else return 0.0; + else return; } origin = (int)off; - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) firstprivate(X, tmp_V) \ -private(j, dmv) shared (tmp_v0, tmp_v1, v1buf) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - tmp_v0[j] += dmv; - } - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - } - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble*/ /** @brief @f$c_{is}^\dagger c_{it} c_{ju}^\dagger c_{ju}@f$ term in Spin model. When both site1 and site3 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -752,14 +632,12 @@ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { -#ifdef MPI unsigned long int j, off; - int origin, ierr; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, one = 1; + double complex tmp_V; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) {//cisaitcisais=0 && cisaiscitais=0 - return 0.0; + return; } if (BitCheckGeneral(myrank, org_isite3 + 1, org_ispin3, X->Def.SiteToBit, X->Def.Tpow) == TRUE @@ -776,43 +654,19 @@ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( X->Def.SiteToBit, X->Def.Tpow) == TRUE) { tmp_V = tmp_J; } - else return 0.0; + else return; } - else return 0.0; + else return; origin = (int)off; - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ -shared (tmp_v0, tmp_v1, v1buf) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - tmp_v0[j] += dmv; - } - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - } - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{it} c_{ju}^\dagger c_{jv}@f$ term in the grandcanonical general spin system when both site is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -826,11 +680,9 @@ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { -#ifdef MPI unsigned long int tmp_off, off, j; - int origin, ierr, ihermite; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, ihermite, one = 1; + double complex tmp_V; ihermite = TRUE; @@ -863,44 +715,20 @@ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( tmp_V = conj(tmp_J); if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0.0; } - else return 0.0; + else return; } - else return 0.0; + else return; } origin = (int)off; - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ - shared (tmp_v0, tmp_v1, v1buf) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - tmp_v0[j] += dmv; - } - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - } - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{is} c_{ju}^\dagger c_{ju}@f$ term in the grandcanonical general spin system when both site is in the inter process region - @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -912,10 +740,9 @@ void X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { -#ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv; - //MPI_Status statusMPI; + double complex tmp_V; + int one = 1; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); @@ -924,36 +751,15 @@ void X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( if (num1 == TRUE) { tmp_V = tmp_J; } - else return 0.0; + else return; } - else return 0.0; + else return; -#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ -shared (tmp_v0, tmp_v1) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{it}@f$ term in the grandcanonical general spin system when both site is in the inter process region - @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAit_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -964,11 +770,9 @@ void X_GC_child_CisAit_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { -#ifdef MPI unsigned long int off, j; - int origin, ierr; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, one = 1; + double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int) myrank, org_isite1 + 1, org_ispin1, org_ispin2, &off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) { @@ -980,41 +784,17 @@ void X_GC_child_CisAit_GeneralSpin_MPIdouble( tmp_V = conj(tmp_trans); if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) tmp_V = 0.0; } - else return 0.0; + else return; origin = (int)off; - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ -shared (tmp_v0, tmp_v1, v1buf) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = v1buf[j] * tmp_V; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAit_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{is}@f$ term in the grandcanonical general spin system when both site is in the inter process region - @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAis_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -1024,44 +804,21 @@ void X_GC_child_CisAis_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { -#ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv; - //MPI_Status statusMPI; + double complex tmp_V; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { tmp_V = tmp_trans; } - else return 0.0; + else return; -#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ -shared (tmp_v0, tmp_v1) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAis_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is} c_{is}^\dagger@f$ term in the grandcanonical general spin system when both site is in the inter process region - @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_AisCis_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -1071,44 +828,22 @@ void X_GC_child_AisCis_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { -#ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv; - //MPI_Status statusMPI; + double complex tmp_V; + int one = 1; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); if (num1 == 0) { tmp_V = tmp_trans; } - else return 0.0; + else return; -#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ -shared (tmp_v0, tmp_v1) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - }/*End of Parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_AisCis_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{it}@f$ term in the canonical general spin system when both site is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_child_CisAit_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -1125,11 +860,9 @@ void X_child_CisAit_GeneralSpin_MPIdouble( long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit ) { -#ifdef MPI unsigned long int off, j, tmp_off,idim_max_buf; - int origin, ierr; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, one = 1; + double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int) myrank, org_isite1 + 1, org_ispin1, org_ispin2, &off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) { @@ -1141,48 +874,28 @@ void X_child_CisAit_GeneralSpin_MPIdouble( tmp_V = conj(tmp_trans); if (X->Large.mode == M_CORR || X->Large.mode ==M_CALCSPEC) tmp_V = 0.0; } - else return 0.0; + else return; origin = (int) off; - ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); + idim_max_buf = SendRecv_i(origin, idim_max); if(ierr != 0) exitMPI(-1); + + SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org); - ierr = MPI_Sendrecv(list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); - if (X->Large.mode == M_MLTPLY || X->Large.mode ==M_CALCSPEC) { #pragma omp parallel for default(none)\ -firstprivate(X, tmp_V, idim_max_buf, list_1buf_org) private(j, dmv, tmp_off) \ +firstprivate(X, tmp_V, idim_max_buf, list_1buf_org) private(j, tmp_off) \ shared (tmp_v0, tmp_v1, v1buf) - for (j = 1; j <= idim_max_buf; j++) { - ConvertToList1GeneralSpin(list_1buf_org[j], X->Large.ihfbit, &tmp_off); - dmv = v1buf[j] * tmp_V; - tmp_v0[tmp_off] += dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { - tmp_off = 0; - return 0; - } - return 1; -#else - return 0.0; -#endif + for (j = 1; j <= idim_max_buf; j++) { + ConvertToList1GeneralSpin(list_1buf_org[j], X->Large.ihfbit, &tmp_off); + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[tmp_off][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*double complex X_child_CisAit_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{jv}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 @@ -1195,11 +908,9 @@ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ){ -#ifdef MPI unsigned long int off, j, num1; - int origin, ierr, isite, IniSpin; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, isite, IniSpin, one = 1; + double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int)myrank, org_isite3 + 1, org_ispin3, org_ispin4, &off, @@ -1218,47 +929,25 @@ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( isite = org_isite1 + 1; IniSpin = org_ispin1; } - else return 0.0; + else return; origin = (int)off; - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) firstprivate(X, tmp_V, isite, IniSpin) \ -private(j, dmv, num1) shared (tmp_v0, tmp_v1, v1buf) +private(j, num1) shared (tmp_v0, tmp_v1, v1buf) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = BitCheckGeneral(j - 1, isite, IniSpin, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - dmv = v1buf[j] * tmp_V; - tmp_v0[j] += dmv; - }/*if (num1 != 0)*/ - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = BitCheckGeneral(j - 1, isite, IniSpin, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - dmv = v1buf[j] * tmp_V; - }/*if (num1 != 0)*/ - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + for (j = 1; j <= X->Check.idim_max; j++) { + num1 = BitCheckGeneral(j - 1, isite, IniSpin, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*End of parallel region*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle*/ /** @brief Compute @f$c_{is}^\dagger c_{it}c_{ju}^\dagger c_{ju}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 @@ -1271,11 +960,9 @@ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ){ -#ifdef MPI unsigned long int num1, j, off; - int isite, IniSpin, FinSpin; + int isite, IniSpin, FinSpin, one = 1; double complex tmp_V, dmv; - //MPI_Status statusMPI; num1 = BitCheckGeneral((unsigned long int)myrank, org_isite3+1, org_ispin3, X->Def.SiteToBit, X->Def.Tpow); @@ -1285,49 +972,32 @@ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( IniSpin = org_ispin2; FinSpin = org_ispin1; } - else return 0.0; + else return; #pragma omp parallel default(none) \ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, num1, off) \ shared (tmp_v0, tmp_v1, v1buf) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[off + 1] += dmv; - } - else if (GetOffCompGeneralSpin(j - 1, isite, FinSpin, IniSpin, &off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - dmv = tmp_v1[j] * conj(tmp_V); - tmp_v0[off + 1] += dmv; - } - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - dmv = tmp_v1[j] * tmp_V; - } - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + for (j = 1; j <= X->Check.idim_max; j++) { + if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, + X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + dmv = tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); + } + else if (GetOffCompGeneralSpin(j - 1, isite, FinSpin, IniSpin, &off, + X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + dmv = conj(tmp_V); + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); + } + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*End of parallel region*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle*/ /** @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{jv}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 @@ -1341,11 +1011,9 @@ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ){ -#ifdef MPI unsigned long int off, j; - int origin, ierr, isite, IniSpin, FinSpin; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, isite, IniSpin, FinSpin, one = 1; + double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int)myrank, org_isite3 + 1, org_ispin3, org_ispin4, &off, @@ -1366,49 +1034,28 @@ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( IniSpin = org_ispin1; FinSpin = org_ispin2; } - else return 0.0; + else return; origin = (int)off; - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) \ -firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, off) shared (tmp_v0, tmp_v1, v1buf) +firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, off) shared (tmp_v0, tmp_v1, v1buf) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - dmv = v1buf[j] * tmp_V; - tmp_v0[off + 1] += dmv; - } - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - dmv = v1buf[j] * tmp_V; - } - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + for (j = 1; j <= X->Check.idim_max; j++) { + if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, + X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[off + 1][0], &one); + } + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*End of parallel region*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle*/ /** @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{ju}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 @@ -1420,46 +1067,31 @@ void X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ){ -#ifdef MPI unsigned long int j, num1; double complex tmp_V, dmv; - //MPI_Status statusMPI; + int one = 1; num1 = BitCheckGeneral((unsigned long int)myrank, org_isite3+1, org_ispin3, X->Def.SiteToBit, X->Def.Tpow); if (num1 != FALSE) { tmp_V = tmp_J; } - else return 0.0; + else return; #pragma omp parallel default(none) \ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp_v0, tmp_v1) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = BitCheckGeneral(j - 1, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); + for (j = 1; j <= X->Check.idim_max; j++) { + num1 = BitCheckGeneral(j - 1, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); - dmv = tmp_v1[j] * tmp_V * num1; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = BitCheckGeneral(j - 1, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); - dmv = tmp_v1[j] * tmp_V * num1; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + dmv = tmp_V * num1; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*End of parallel region*/ - return; -#else - return 0.0; -#endif }/*double complex X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle*/ /** @brief Compute @f$c_{is}^\dagger c_{it}c_{ju}^\dagger c_{jv}@f$ term in the canonical general spin system when both sites are in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -1473,11 +1105,9 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ){ -#ifdef MPI unsigned long int tmp_off, off, j, idim_max_buf; - int origin, ierr; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, one = 1; + double complex tmp_V; int ihermite=TRUE; if (GetOffCompGeneralSpin((unsigned long int)myrank, org_isite1 + 1, org_ispin1, org_ispin2, &tmp_off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) @@ -1494,65 +1124,40 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( ihermite=FALSE; } - if(ihermite==FALSE){ - if(GetOffCompGeneralSpin((unsigned long int)myrank, org_isite3 + 1, org_ispin4, org_ispin3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) + if (ihermite == FALSE) { + if (GetOffCompGeneralSpin((unsigned long int)myrank, org_isite3 + 1, org_ispin4, org_ispin3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + if (GetOffCompGeneralSpin(tmp_off, org_isite1 + 1, org_ispin2, org_ispin1, &off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) { - if (GetOffCompGeneralSpin(tmp_off, org_isite1 + 1, org_ispin2, org_ispin1, &off, X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - tmp_V = conj(tmp_J); - if(X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC){ - tmp_V=0.0; - } - } - else return 0.0; + tmp_V = conj(tmp_J); + if (X->Large.mode == M_CORR || X->Large.mode == M_CALCSPEC) { + tmp_V = 0.0; + } } - else return 0.0; + else return; + } + else return; } - origin = (int)off; - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) firstprivate(X, tmp_V, idim_max_buf) \ -private(j, dmv, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) +private(j, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - ConvertToList1GeneralSpin(list_1buf[j], X->Check.sdim, &off); - dmv = v1buf[j] * tmp_V; - tmp_v0[off] += dmv; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - ConvertToList1GeneralSpin(list_1buf[j], X->Check.sdim, &off); - dmv = v1buf[j] * tmp_V; - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + for (j = 1; j <= idim_max_buf; j++) { + ConvertToList1GeneralSpin(list_1buf[j], X->Check.sdim, &off); + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[off][0], &one); + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ - return; -#else - return 0.0; -#endif }/*double complex X_child_CisAitCjuAjv_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{ju}@f$ term in the canonical general spin system when both sites are in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_child_CisAisCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 @@ -1564,9 +1169,9 @@ void X_child_CisAisCjuAju_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { -#ifdef MPI unsigned long int j, num1; - double complex tmp_V, dmv; + double complex tmp_V; + int one = 1; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) { num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); @@ -1574,7 +1179,7 @@ void X_child_CisAisCjuAju_GeneralSpin_MPIdouble( tmp_V = tmp_J; } else { - return 0.0; + return; } } else { @@ -1586,39 +1191,19 @@ void X_child_CisAisCjuAju_GeneralSpin_MPIdouble( tmp_V = tmp_J; } else { - return 0.0; + return; } } else { - return 0.0; + return; } } -#pragma omp parallel default(none) firstprivate(X, tmp_V) private(j, dmv) \ -shared (tmp_v0, tmp_v1) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = tmp_v1[j] * tmp_V; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + + zaxpy_long(X->Check.idim_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*double complex X_child_CisAisCjuAju_GeneralSpin_MPIdouble*/ /** @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{ju}@f$ term in the canonical general spin system when one of these sites is in the inter process region -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_child_CisAisCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 @@ -1631,47 +1216,31 @@ void X_child_CisAisCjuAju_GeneralSpin_MPIsingle( double complex **tmp_v1//!<[in] Input wavefunction ) { -#ifdef MPI unsigned long int j, num1; double complex tmp_V, dmv; - //MPI_Status statusMPI; + int one = 1; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite3 + 1, org_ispin3, X->Def.SiteToBit, X->Def.Tpow); if (num1 != FALSE) { tmp_V = tmp_J; } - else return 0.0; + else return; #pragma omp parallel default(none) \ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp_v0, tmp_v1, list_1) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = BitCheckGeneral(list_1[j], org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); - - dmv = tmp_v1[j] * tmp_V * num1; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - num1 = BitCheckGeneral(list_1[j], org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); + for (j = 1; j <= X->Check.idim_max; j++) { + num1 = BitCheckGeneral(list_1[j], org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); - dmv = tmp_v1[j] * tmp_V * num1; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } + dmv = tmp_V * num1; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ }/*End of parallel region*/ - return; -#else - return 0.0; -#endif }/*double complex X_child_CisAisCjuAju_GeneralSpin_MPIsingle*/ /** @brief Compute @f$c_{is}^\dagger c_{it}c_{ju}^\dagger c_{jv}@f$ term in the canonical general spin system when one of these sites is in the inter process region - @return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ */ void X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 @@ -1685,11 +1254,9 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ){ -#ifdef MPI unsigned long int tmp_off, off, j, idim_max_buf; - int origin, ierr, isite, IniSpin, FinSpin; - double complex tmp_V, dmv; - MPI_Status statusMPI; + int origin, ierr, isite, IniSpin, FinSpin, one = 1; + double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int)myrank, org_isite3 + 1, org_ispin3, org_ispin4, &off, @@ -1709,62 +1276,33 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( IniSpin = org_ispin1; FinSpin = org_ispin2; } - else return 0.0; + else return; origin = (int)off; - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(list_1, X->Check.idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) \ firstprivate(X, tmp_V, idim_max_buf, IniSpin, FinSpin, isite) \ -private(j, dmv, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) +private(j, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= idim_max_buf; j++) { - - if (GetOffCompGeneralSpin(list_1buf[j], isite, IniSpin, FinSpin, &tmp_off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - ConvertToList1GeneralSpin(tmp_off, X->Check.sdim, &off); - dmv = v1buf[j] * tmp_V; - tmp_v0[off] += dmv; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= idim_max_buf; j++) { + for (j = 1; j <= idim_max_buf; j++) { - if (GetOffCompGeneralSpin(list_1buf[j], isite, IniSpin, FinSpin, &tmp_off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - ConvertToList1GeneralSpin(tmp_off, X->Check.sdim, &off); - dmv = v1buf[j] * tmp_V; - } - }/*for (j = 1; j <= idim_max_buf; j++)*/ - } + if (GetOffCompGeneralSpin(list_1buf[j], isite, IniSpin, FinSpin, &tmp_off, + X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + ConvertToList1GeneralSpin(tmp_off, X->Check.sdim, &off); + zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[off][0], &one); + } + }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ - return; -#else - return 0.0; -#endif }/*double complex X_child_CisAitCjuAjv_GeneralSpin_MPIsingle*/ /** @brief Hopping term in Spin + GC When both site1 and site2 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAit_spin_MPIdouble( @@ -1776,18 +1314,14 @@ void X_GC_child_CisAit_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/) { -#ifdef MPI int mask1, state1, ierr, origin; unsigned long int idim_max_buf, j; - MPI_Status statusMPI; - double complex trans, dmv; + double complex trans; mask1 = (int)X->Def.Tpow[org_isite1]; origin = myrank ^ mask1; state1 = (origin & mask1)/mask1; - //fprintf(stdout, "Debug: myrank=%d, origin=%d, state1=%d\n", myrank, origin, state1); - if(state1 == org_ispin2){ trans = tmp_trans; } @@ -1798,44 +1332,17 @@ void X_GC_child_CisAit_spin_MPIdouble( } } else{ - return 0.0; + return; } - ierr = MPI_Sendrecv(&X->Check.idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - ierr = MPI_Sendrecv(tmp_v1, X->Check.idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + idim_max_buf = SendRecv_i(origin, X->Check.idim_max); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j, dmv) \ -firstprivate(idim_max_buf, trans, X) shared(v1buf, tmp_v1, tmp_v0) - { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = trans * v1buf[j]; - tmp_v0[j] += dmv; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - dmv = trans * v1buf[j]; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, trans, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAit_spin_MPIdouble*/ /** @brief Hopping term in Spin + Canonical for CalcSpectrum When both site1 and site2 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_child_CisAit_spin_MPIdouble( @@ -1856,12 +1363,10 @@ void X_child_CisAit_spin_MPIdouble( long unsigned int _ilft,//!<[in] Similer to LargeList::ilft long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit ){ -#ifdef MPI - int mask1, state1, ierr, origin; + int mask1, state1, ierr, origin, one = 1; unsigned long int idim_max_buf, j; unsigned long int tmp_off; - MPI_Status statusMPI; - double complex trans, dmv; + double complex trans; mask1 = (int)X->Def.Tpow[org_isite1]; origin = myrank ^ mask1; @@ -1875,45 +1380,22 @@ void X_child_CisAit_spin_MPIdouble( } // fprintf(stdout, "Debug: myrank=%d, origin=%d, trans=%lf\n", myrank, origin, trans); - - ierr = MPI_Sendrecv(&idim_max, 1, MPI_UNSIGNED_LONG, origin, 0, - &idim_max_buf, 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(list_1_org, idim_max + 1, MPI_UNSIGNED_LONG, origin, 0, - list_1buf_org, idim_max_buf + 1, MPI_UNSIGNED_LONG, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); - - ierr = MPI_Sendrecv(tmp_v1, idim_max + 1, MPI_DOUBLE_COMPLEX, origin, 0, - v1buf, idim_max_buf + 1, MPI_DOUBLE_COMPLEX, origin, 0, - MPI_COMM_WORLD, &statusMPI); - if (ierr != 0) exitMPI(-1); + + idim_max_buf = SendRecv_i(origin, idim_max); + SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { -#pragma omp parallel for default(none) private(j, dmv, tmp_off) \ +#pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(idim_max_buf, trans, X, list_1buf_org, list_2_1_target, list_2_2_target) \ shared(v1buf, tmp_v0) - for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &tmp_off); - dmv = trans * v1buf[j]; - tmp_v0[tmp_off] += dmv; - } - } - else { - tmp_off = 0; - return 0; + for (j = 1; j <= idim_max_buf; j++) { + GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &tmp_off); + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[tmp_off][0], &one); } - return 1; -#else - return 0.0; -#endif }/*double complex X_child_CisAit_spin_MPIdouble*/ /** @brief Hopping term in Spin + GC When both site1 and site2 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_CisAis_spin_MPIdouble( @@ -1924,39 +1406,16 @@ void X_GC_child_CisAis_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ -#ifdef MPI long unsigned int j; - int mask1; - int ibit1; + int mask1, ibit1; mask1 = (int)X->Def.Tpow[org_isite1]; ibit1 = (((unsigned long int)myrank& mask1)/mask1)^(1-org_ispin1); -#pragma omp parallel default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ - firstprivate(X, tmp_trans) private(j) - { - if (ibit1 != 0) { - if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { // for multply -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - tmp_v0[j] += tmp_v1[j] * tmp_trans; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - else { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - } - }/*if (ibit1 != 0)*/ - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + zaxpy_long(X->Check.idim_max*nstate, tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAis_spin_MPIdouble*/ /** @brief Hopping term in Spin + GC When both site1 and site2 are in the inter process region. -@return @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ @author Kazuyoshi Yoshimi (The University of Tokyo) */ void X_GC_child_AisCis_spin_MPIdouble( @@ -1967,25 +1426,13 @@ void X_GC_child_AisCis_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ -#ifdef MPI long unsigned int j; int mask1; int ibit1; mask1 = (int)X->Def.Tpow[org_isite1]; ibit1 = (((unsigned long int)myrank& mask1) / mask1) ^ (1 - org_ispin1); -#pragma omp parallel default(none) shared(tmp_v1, nstate, tmp_v0, ibit1) \ - firstprivate(X, tmp_trans) private(j) - { - if (ibit1 == 0) { -#pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - tmp_v0[j] += tmp_v1[j] * tmp_trans; - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ - }/*if (ibit1 == 0)*/ - }/*End of parallel region*/ - return; -#else - return 0.0; -#endif + if (ibit1 == 0) { + zaxpy_long(X->Check.idim_max*nstate, tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); + }/*if (ibit1 == 0)*/ }/*double complex X_GC_child_AisCis_spin_MPIdouble*/ diff --git a/src/wrapperMPI.c b/src/wrapperMPI.c index 9fa509042..b0163245f 100644 --- a/src/wrapperMPI.c +++ b/src/wrapperMPI.c @@ -231,6 +231,38 @@ double SumMPI_d( return(norm); }/*double SumMPI_d*/ /** +@brief MPI wrapper function to obtain sum of Double array +across processes. +@author Mitsuaki Kawamura (The University of Tokyo) +*/ +void SumMPI_dv( + int nnorm, + double *norm//!<[in] Value to be summed +) { +#ifdef MPI + int ierr; + ierr = MPI_Allreduce(MPI_IN_PLACE, &norm, &nnorm, + MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD); + if (ierr != 0) exitMPI(-1); +#endif +}/*void SumMPI_dv*/ +/** +@brief MPI wrapper function to obtain sum of Double array +across processes. +@author Mitsuaki Kawamura (The University of Tokyo) +*/ +void SumMPI_cv( + int nnorm, + double complex *norm//!<[in] Value to be summed +) { +#ifdef MPI + int ierr; + ierr = MPI_Allreduce(MPI_IN_PLACE, &norm, &nnorm, + MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + if (ierr != 0) exitMPI(-1); +#endif +}/*void SumMPI_cv*/ +/** @brief MPI wrapper function to obtain sum of unsigned long integer across processes. @return Sumed value across processes. @@ -307,6 +339,31 @@ double NormMPI_dc( return dnorm; }/*double NormMPI_dc*/ /** +@brief Compute norm of process-distributed vector +@f$|{\bf v}_1|^2@f$ +@return Norm @f$|{\bf v}_1|^2@f$ +*/ +void NormMPI_dv( + unsigned long int ndim,//!<[in] Local dimension of vector + int nstate, + double complex **_v1,//!<[in] [idim] vector to be producted + double *dnorm +) { + double complex cdnorm = 0; + unsigned long int idim; + int istate; + + for (istate = 0; istate < nstate; istate++) dnorm[istate] = 0.0; +#pragma omp parallel for default(none) private(i) firstprivate(myrank) shared(_v1, idim) reduction(+: dnorm) + for (idim = 1; idim <= ndim; idim++) { + for (istate = 0; istate < nstate; istate++) { + dnorm[istate] += conj(_v1[idim][istate])*_v1[idim][istate]; + } + } + SumMPI_dv(nstate, dnorm); + for (istate = 0; istate < nstate; istate++) dnorm[istate] = sqrt(dnorm[istate]); +}/*double NormMPI_cv*/ +/** @brief Compute conjugate scaler product of process-distributed vector @f${\bf v}_1^* \cdot {\bf v}_2@f$ @return Conjugate scaler product @f${\bf v}_1^* \cdot {\bf v}_2@f$ @@ -326,3 +383,127 @@ double complex VecProdMPI( return(prod); }/*double complex VecProdMPI*/ +/** +@brief Compute conjugate scaler product of process-distributed vector +@f${\bf v}_1^* \cdot {\bf v}_2@f$ +*/ +void MultiVecProdMPI( + long unsigned int ndim,//!<[in] Local dimension of vector + int nstate, + double complex **v1,//!<[in] [ndim] vector to be producted + double complex **v2,//!<[in] [ndim] vector to be producted + double complex *prod +) { + long unsigned int idim; + int istate; + + for (istate = 0; istate < nstate; istate++) prod[istate] = 0.0; +#pragma omp parallel for default(none) shared(v1,v2,ndim) private(idim) reduction(+: prod) + for (idim = 1; idim <= ndim; idim++) { + for (istate = 0; istate < nstate; istate++) { + prod[istate] += conj(v1[idim][istate])*v2[idim][istate]; + } + } + SumMPI_cv(nstate, prod); +}/*void MultiVecProdMPI*/ +/** +@brief Wrapper of MPI_Sendrecv for double complex number. +When we pass a message longer than 2^31-1 +(max of int: 2147483647), we need to divide it. +*/ +void SendRecv_cv( + int origin, + unsigned long int nMsgS, + unsigned long int nMsgR, + double complex *vecs, + double complex *vecr +) { +#ifdef MPI + int ierr, two31m1 = 2147483647, modMsg, nMsgS2, nMsgR2; + unsigned long int nMsg, nnMsg, iMsg, sMsgR, sMsgS; + MPI_Status statusMPI; + + if (nMsgS > nMsgR) nMsg = nMsgS; + else nMsg = nMsgR; + nnMsg = nMsg / two31m1; + modMsg = nMsg % two31m1; + if (modMsg != 0) nnMsg += 1; + + sMsgS = 0; + sMsgR = 0; + for (iMsg = 0; iMsg < nnMsg; iMsg++) { + nMsgS2 = nMsgS / nnMsg; + nMsgR2 = nMsgR / nnMsg; + if (iMsg < nMsgS % nnMsg) nMsgS2 += 1; + if (iMsg < nMsgR % nnMsg) nMsgR2 += 1; + + ierr = MPI_Sendrecv(&vecs[sMsgS], nMsgS2, MPI_DOUBLE_COMPLEX, origin, 0, + &vecr[sMsgR], nMsgR2, MPI_DOUBLE_COMPLEX, origin, 0, + MPI_COMM_WORLD, &statusMPI); + if (ierr != 0) exitMPI(-1); + + sMsgS += nMsgS2; + sMsgR += nMsgR2; + } +#endif +}/*void SendRecv_cv*/ +/** +@brief Wrapper of MPI_Sendrecv for long unsigned integer number. +When we pass a message longer than 2^31-1 +(max of int: 2147483647), we need to divide it. +*/ +void SendRecv_iv( + int origin, + unsigned long int nMsgS, + unsigned long int nMsgR, + unsigned long int *vecs, + unsigned long int *vecr +) { +#ifdef MPI + int ierr, two31m1 = 2147483647, modMsg, nMsgS2, nMsgR2; + unsigned long int nMsg, nnMsg, iMsg, sMsgR, sMsgS; + MPI_Status statusMPI; + + if (nMsgS > nMsgR) nMsg = nMsgS; + else nMsg = nMsgR; + nnMsg = nMsg / two31m1; + modMsg = nMsg % two31m1; + if (modMsg != 0) nnMsg += 1; + + sMsgS = 0; + sMsgR = 0; + for (iMsg = 0; iMsg < nnMsg; iMsg++) { + nMsgS2 = nMsgS / nnMsg; + nMsgR2 = nMsgR / nnMsg; + if (iMsg < nMsgS % nnMsg) nMsgS2 += 1; + if (iMsg < nMsgR % nnMsg) nMsgR2 += 1; + + ierr = MPI_Sendrecv(&vecs[sMsgS], nMsgS2, MPI_UNSIGNED_LONG, origin, 0, + &vecr[sMsgR], nMsgR2, MPI_UNSIGNED_LONG, origin, 0, + MPI_COMM_WORLD, &statusMPI); + if (ierr != 0) exitMPI(-1); + + sMsgS += nMsgS2; + sMsgR += nMsgR2; + } +#endif +}/*void SendRecv_iv*/ +/** +@brief Wrapper of MPI_Sendrecv for long unsigned integer number. +*/ +unsigned long int SendRecv_i( + int origin, + unsigned long int isend +) { +#ifdef MPI + int ierr; + MPI_Status statusMPI; + unsigned long int ircv; + ierr = MPI_Sendrecv(&isend, 1, MPI_UNSIGNED_LONG, origin, 0, + &ircv, 1, MPI_UNSIGNED_LONG, origin, 0, + MPI_COMM_WORLD, &statusMPI); + return ircv; +#else + return isend; +#endif +}/*void SendRecv_i*/ From b84abfddc820e1094d6c5b02c48c17a4094b04cd Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Mon, 4 Mar 2019 00:06:02 +0900 Subject: [PATCH 04/50] Backup --- src/CG_EigenVector.c | 255 ----- src/CalcByLanczos.c | 234 ----- src/CalcSpectrum.c | 11 - src/CalcSpectrumByLanczos.c | 219 ----- src/CalcSpectrumByTPQ.c | 274 ------ src/HPhiMain.c | 50 +- src/Lanczos_EigenValue.c | 720 -------------- src/Lanczos_EigenVector.c | 208 ---- src/PowerLanczos.c | 179 ---- src/expec_cisajs.c | 48 +- src/expec_cisajscktaltdc.c | 1409 ++++++++++++++------------- src/expec_energy_flct.c | 270 +++-- src/expec_totalspin.c | 3 - src/include/CG_EigenVector.h | 24 - src/include/CalcByLanczos.h | 21 - src/include/CalcSpectrumByLanczos.h | 35 - src/include/CalcSpectrumByTPQ.h | 33 - src/include/Lanczos_EigenValue.h | 40 - src/include/Lanczos_EigenVector.h | 17 - src/include/PowerLanczos.h | 4 - src/include/mltplyMPIBoost.h | 31 - src/include/struct.h | 16 +- src/include/vec12.h | 21 - src/mltplyMPIBoost.c | 416 -------- src/mltplySpin.c | 39 - src/vec12.c | 86 -- 26 files changed, 899 insertions(+), 3764 deletions(-) delete mode 100644 src/CG_EigenVector.c delete mode 100644 src/CalcByLanczos.c delete mode 100644 src/CalcSpectrumByLanczos.c delete mode 100644 src/CalcSpectrumByTPQ.c delete mode 100644 src/Lanczos_EigenValue.c delete mode 100644 src/Lanczos_EigenVector.c delete mode 100644 src/PowerLanczos.c delete mode 100644 src/include/CG_EigenVector.h delete mode 100644 src/include/CalcByLanczos.h delete mode 100644 src/include/CalcSpectrumByLanczos.h delete mode 100644 src/include/CalcSpectrumByTPQ.h delete mode 100644 src/include/Lanczos_EigenValue.h delete mode 100644 src/include/Lanczos_EigenVector.h delete mode 100644 src/include/PowerLanczos.h delete mode 100644 src/include/mltplyMPIBoost.h delete mode 100644 src/include/vec12.h delete mode 100644 src/mltplyMPIBoost.c delete mode 100644 src/vec12.c diff --git a/src/CG_EigenVector.c b/src/CG_EigenVector.c deleted file mode 100644 index 6cfa44cb0..000000000 --- a/src/CG_EigenVector.c +++ /dev/null @@ -1,255 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -/**@file -@brief Inversed power method with CG -*/ -#include "CG_EigenVector.h" -#include "FileIO.h" -#include "mltply.h" -#include "wrapperMPI.h" -#include "CalcTime.h" -/** -@brief inversed power method with CG -@author Takahiro Misawa (The University of Tokyo) -@author Kazuyoshi Yoshimi (The University of Tokyo) -@return -1 if file can not be opened, 0 for other. -*/ -int CG_EigenVector(struct BindStruct *X/**<[inout]*/){ - - fprintf(stdoutMPI, "%s", cLogCG_EigenVecStart); - TimeKeeper(X, cFileNameTimeKeep, cCG_EigenVecStart, "a"); - - time_t start,mid; - FILE *fp_0; - char sdt_1[D_FileNameMax]; - dsfmt_t dsfmt; - long unsigned int u_long_i; - int mythread; - - long int i,j; - double Eig; - int i_itr,itr,iv,itr_max; - int t_itr; - double bnorm,xnorm,rnorm,rnorm2; - double complex alpha,beta,xb,rp,yp,gosa1,tmp_r,gosa2; - double complex *y,*b; - long int L_size; - long int i_max; - - i_max=X->Check.idim_max; - Eig=X->Phys.Target_CG_energy; - - strcpy(sdt_1, cFileNameTimeEV_CG); - if(childfopenMPI(sdt_1, "w", &fp_0) !=0){ - return -1; - } - - L_size=sizeof(double complex)*(i_max+1); - - b=(double complex *)malloc(L_size); - y=(double complex *)malloc(L_size); - - if(y==NULL){ - fprintf(fp_0,"BAD in CG_EigenVector \n"); - }else{ - fprintf(fp_0,"allocate succeed !!! \n"); - } - fclose(fp_0); - - start=time(NULL); - /* - add random components - */ - iv = X->Def.initial_iv; - bnorm = 0.0; -#pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ - shared(v0, v1, iv, X, nthreads, myrank, b, bnorm) firstprivate(i_max) - { - /* - Initialise MT - */ -#ifdef _OPENMP - mythread = omp_get_thread_num(); -#else - mythread = 0; -#endif - u_long_i = 123432 + abs(iv) + mythread + nthreads * myrank; - dsfmt_init_gen_rand(&dsfmt, u_long_i); - -#pragma omp for - for (i = 1; i <= i_max; i++) { - v0[i] = v1[i]; - b[i] = v0[i]; - } - -#pragma omp for reduction(+:bnorm) - for (i = 1; i <= i_max; i++) { - b[i] += 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*0.001; - bnorm += conj(b[i])*b[i]; - } - }/*#pragma omp*/ - /* - Normalize b - */ - bnorm = SumMPI_d(bnorm); - bnorm=sqrt(bnorm); - -#pragma omp parallel for default(none) private(i) shared(b) firstprivate(i_max,bnorm) - for(i=1;i<=i_max;i++){ - b[i]=b[i]/bnorm; - } - - t_itr=0; - for(i_itr=0;i_itr<=50;i_itr++){ - //CG start!! - bnorm=0.0; - //initialization -#pragma omp parallel for reduction(+:bnorm) default(none) private(i) shared(b, v1, vg, v0) firstprivate(i_max) - for(i=1;i<=i_max;i++){ - bnorm+=conj(b[i])*b[i]; - v1[i]=b[i]; - vg[i]=b[i]; - v0[i]=0.0; - } - bnorm = SumMPI_d(bnorm); - if(iv >= 0){ - childfopenMPI(sdt_1,"a",&fp_0); - fprintf(fp_0,"b[%d]=%lf bnorm== %lf \n ",iv,creal(b[iv]),bnorm); - fclose(fp_0); - } - //iteration - if(i_itr==0){ - itr_max=500; - }else{ - itr_max=500; - } - - for(itr=1;itr<=itr_max;itr++){ -#pragma omp parallel for default(none) private(j) shared(y, vg) firstprivate(i_max, Eig,eps_CG) - for(j=1;j<=i_max;j++){ - y[j]=(-Eig+eps_CG)*vg[j]; //y = -E*p - } - StartTimer(4401); - mltply(X,1,y,vg); // y += H*p - StopTimer(4401); - // (H-E)p=y finish! - rp=0.0; - yp=0.0; -#pragma omp parallel for reduction(+:rp, yp) default(none) private(i) shared(v1, vg, y) firstprivate(i_max) - for(i=1;i<=i_max;i++){ - rp+=v1[i]*conj(v1[i]); - yp+=y[i]*conj(vg[i]); - } - rp = SumMPI_dc(rp); - yp = SumMPI_dc(yp); - alpha=rp/yp; - rnorm=0.0; -#pragma omp parallel for reduction(+:rnorm) default(none) private(i) shared(v0, v1, vg)firstprivate(i_max, alpha) - for(i=1;i<=i_max;i++){ - v0[i]+=alpha*vg[i]; - rnorm+=conj(v1[i])*v1[i]; - } - rnorm = SumMPI_d(rnorm); - rnorm2=0.0; - gosa1=0.0; -#pragma omp parallel for reduction(+:rnorm2, gosa1) default(none) private(i) shared(v1 , y) firstprivate(i_max, alpha) private(tmp_r) - for(i=1;i<=i_max;i++){ - tmp_r=v1[i]-alpha*y[i]; - gosa1+=conj(tmp_r)*v1[i];// old r and new r should be orthogonal -> gosa1=0 - v1[i]=tmp_r; - rnorm2+=conj(v1[i])*v1[i]; - } - gosa1 = SumMPI_dc(gosa1); - rnorm2 = SumMPI_d(rnorm2); - - gosa2=0.0; -#pragma omp parallel for reduction(+:gosa2) default(none) private(i) shared(v1, vg) firstprivate(i_max) - for(i=1;i<=i_max;i++){ - gosa2+=v1[i]*conj(vg[i]); // new r and old p should be orthogonal - } - gosa2 = SumMPI_dc(gosa2); - - beta=rnorm2/rnorm; -#pragma omp parallel for default(none) shared(v1, vg) firstprivate(i_max, beta) - for(i=1;i<=i_max;i++){ - vg[i]=v1[i]+beta*vg[i]; - } - // if(itr%5==0){ - childfopenMPI(sdt_1,"a", &fp_0); - fprintf(fp_0,"i_itr=%d itr=%d %.10lf %.10lf \n ", - i_itr,itr,sqrt(rnorm2),pow(10,-5)*sqrt(bnorm)); - fclose(fp_0); - if(sqrt(rnorm2). */ -#include "expec_cisajs.h" -#include "expec_cisajscktaltdc.h" -#include "expec_totalspin.h" -#include "CG_EigenVector.h" -#include "expec_energy_flct.h" -#include "Lanczos_EigenValue.h" -#include "Lanczos_EigenVector.h" -#include "CalcByLanczos.h" -#include "FileIO.h" -#include "wrapperMPI.h" -#include "CalcTime.h" - -/** - * @file CalcByLanczos.c - * @version 0.1, 0.2 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * - * @brief File for givinvg functions of calculating eigenvalues and eigenvectors by Lanczos method - * - * - */ - - -/** - * @brief A main function to calculate eigenvalues and eigenvectors by Lanczos method - * - * @param[in,out] X CalcStruct list for getting and pushing calculation information - * @retval 0 normally finished - * @retval -1 unnormally finished - * - * @version 0.2 - * @date 2015/10/20 add function of using a flag of iCalcEigenVec - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * - */ -int CalcByLanczos( - struct EDMainCalStruct *X - ) -{ - char sdt[D_FileNameMax]; - double diff_ene,var; - long int i_max=0; - FILE *fp; - size_t byte_size; - - if(X->Bind.Def.iInputEigenVec==FALSE){ - // this part will be modified - switch(X->Bind.Def.iCalcModel){ - case HubbardGC: - case SpinGC: - case KondoGC: - case SpinlessFermionGC: - initial_mode = 1; // 1 -> random initial vector - break; - case Hubbard: - case Kondo: - case Spin: - case SpinlessFermion: - - if(X->Bind.Def.iFlgGeneralSpin ==TRUE){ - initial_mode=1; - } - else{ - if(X->Bind.Def.initial_iv>0){ - initial_mode = 0; // 0 -> only v[iv] = 1 - }else{ - initial_mode = 1; // 1 -> random initial vector - } - } - break; - default: - exitMPI(-1); - } - - StartTimer(4100); - int iret=0; - iret=Lanczos_EigenValue(&(X->Bind)); - StopTimer(4100); - if(iret == 1) return(TRUE);//restart mode. - else if(iret != 0) return(FALSE); - - if(X->Bind.Def.iCalcEigenVec==CALCVEC_NOT){ - fprintf(stdoutMPI, " Lanczos EigenValue = %.10lf \n ",X->Bind.Phys.Target_energy); - return(TRUE); - } - - fprintf(stdoutMPI, "%s", cLogLanczos_EigenVecStart); - - if(X->Bind.Check.idim_maxMPI != 1){ - StartTimer(4200); - Lanczos_EigenVector(&(X->Bind)); - StopTimer(4200); - - StartTimer(4300); - iret=expec_energy_flct(&(X->Bind)); - StopTimer(4300); - if(iret != 0) return(FALSE); - - //check for the accuracy of the eigenvector - var = fabs(X->Bind.Phys.var-X->Bind.Phys.energy*X->Bind.Phys.energy)/fabs(X->Bind.Phys.var); - diff_ene = fabs(X->Bind.Phys.Target_CG_energy-X->Bind.Phys.energy)/fabs(X->Bind.Phys.Target_CG_energy); - - fprintf(stdoutMPI, "\n"); - fprintf(stdoutMPI, " Accuracy check !!!\n"); - fprintf(stdoutMPI, " LanczosEnergy = %.14e \n EnergyByVec = %.14e \n diff_ene = %.14e \n var = %.14e \n",X->Bind.Phys.Target_CG_energy,X->Bind.Phys.energy,diff_ene,var); - if(diff_ene < eps_Energy && var< eps_Energy){ - fprintf(stdoutMPI, " Accuracy of Lanczos vectors is enough.\n"); - fprintf(stdoutMPI, "\n"); - } - else if(X->Bind.Def.iCalcEigenVec==CALCVEC_LANCZOSCG){ - fprintf(stdoutMPI, " Accuracy of Lanczos vectors is NOT enough\n\n"); - X->Bind.Def.St=1; - StartTimer(4400); - iret=CG_EigenVector(&(X->Bind)); - StopTimer(4400); - if(iret != 0) return(FALSE); - - StartTimer(4300); - iret=expec_energy_flct(&(X->Bind)); - StopTimer(4300); - if(iret != 0) return(FALSE); - - var = fabs(X->Bind.Phys.var-X->Bind.Phys.energy*X->Bind.Phys.energy)/fabs(X->Bind.Phys.var); - diff_ene = fabs(X->Bind.Phys.Target_CG_energy-X->Bind.Phys.energy)/fabs(X->Bind.Phys.Target_CG_energy); - fprintf(stdoutMPI, "\n"); - fprintf(stdoutMPI, " CG Accuracy check !!!\n"); - fprintf(stdoutMPI, " LanczosEnergy = %.14e\n EnergyByVec = %.14e\n diff_ene = %.14e\n var = %.14e \n ",X->Bind.Phys.Target_CG_energy,X->Bind.Phys.energy,diff_ene,var); - fprintf(stdoutMPI, "\n"); - //} - } - } - else{//idim_max=1 - v0[1]=1; - StartTimer(4300); - iret=expec_energy_flct(&(X->Bind)); - StopTimer(4300); - if(iret != 0) return(FALSE); - } - } - else{// X->Bind.Def.iInputEigenVec=true :input v1: - fprintf(stdoutMPI, "An Eigenvector is inputted.\n"); - StartTimer(4800); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecStart, "a"); - StartTimer(4801); - sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, X->Bind.Def.k_exct-1, myrank); - childfopenALL(sdt, "rb", &fp); - if(fp==NULL){ - fprintf(stderr, "Error: A file of Inputvector does not exist.\n"); - exitMPI(-1); - } - byte_size = fread(&step_i, sizeof(int), 1, fp); - byte_size = fread(&i_max, sizeof(long int), 1, fp); - if(i_max != X->Bind.Check.idim_max){ - fprintf(stderr, "Error: A file of Inputvector is incorrect.\n"); - exitMPI(-1); - } - byte_size = fread(v1, sizeof(complex double),X->Bind.Check.idim_max+1, fp); - fclose(fp); - StopTimer(4801); - StopTimer(4800); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecFinish, "a"); - if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); - } - - fprintf(stdoutMPI, "%s", cLogLanczos_EigenVecEnd); - // v1 is eigen vector - - StartTimer(4500); - if(expec_cisajs(&(X->Bind), v1)!=0){ - fprintf(stderr, "Error: calc OneBodyG.\n"); - exitMPI(-1); - } - StopTimer(4500); - StartTimer(4600); - if(expec_cisajscktaltdc(&(X->Bind), v1)!=0){ - fprintf(stderr, "Error: calc TwoBodyG.\n"); - exitMPI(-1); - } - StopTimer(4600); - - if(expec_totalSz(&(X->Bind), v1)!=0){ - fprintf(stderr, "Error: calc TotalSz.\n"); - exitMPI(-1); - } - - if(X->Bind.Def.St==0){ - sprintf(sdt, cFileNameEnergy_Lanczos, X->Bind.Def.CDataFileHead); - }else if(X->Bind.Def.St==1){ - sprintf(sdt, cFileNameEnergy_CG, X->Bind.Def.CDataFileHead); - } - - if(childfopenMPI(sdt, "w", &fp)!=0){ - exitMPI(-1); - } - - fprintf(fp,"Energy %.16lf \n",X->Bind.Phys.energy); - fprintf(fp,"Doublon %.16lf \n",X->Bind.Phys.doublon); - fprintf(fp,"Sz %.16lf \n",X->Bind.Phys.Sz); - // fprintf(fp,"total S^2 %.10lf \n",X->Bind.Phys.s2); - fclose(fp); - - if(X->Bind.Def.iOutputEigenVec==TRUE){ - TimeKeeper(&(X->Bind), cFileNameTimeKeep, cOutputEigenVecStart, "a"); - sprintf(sdt, cFileNameOutputEigen, X->Bind.Def.CDataFileHead, X->Bind.Def.k_exct-1, myrank); - if(childfopenALL(sdt, "wb", &fp)!=0){ - exitMPI(-1); - } - fwrite(&X->Bind.Large.itr, sizeof(X->Bind.Large.itr),1,fp); - fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max),1,fp); - fwrite(v1, sizeof(complex double),X->Bind.Check.idim_max+1, fp); - fclose(fp); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, cOutputEigenVecFinish, "a"); - } - - return TRUE; -} diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 4fec982e7..a37fc1155 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -247,17 +247,6 @@ int CalcSpectrum( TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumStart, "a"); StartTimer(6200); switch (X->Bind.Def.iCalcType) { - case Lanczos: - - iret = CalcSpectrumByLanczos(X, v1, dnorm, Nomega, dcSpectrum, dcomega); - - if (iret != TRUE) { - //Error Message will be added. - return FALSE; - } - - break;//Lanczos Spectrum - case CG: iret = CalcSpectrumByBiCG(X, v0, v1, vg, Nomega, dcSpectrum, dcomega); diff --git a/src/CalcSpectrumByLanczos.c b/src/CalcSpectrumByLanczos.c deleted file mode 100644 index f4f1e1cac..000000000 --- a/src/CalcSpectrumByLanczos.c +++ /dev/null @@ -1,219 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 Takahiro Misawa, Kazuyoshi Yoshimi, Mitsuaki Kawamura, Youhei Yamaji, Synge Todo, Naoki Kawashima */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#include "CalcSpectrumByLanczos.h" -#include "Lanczos_EigenValue.h" -#include "FileIO.h" -#include "wrapperMPI.h" -#include "common/setmemory.h" -#include "CalcTime.h" -/** - * @file CalcSpectrumByLanczos.c - * @brief Get the spectrum function by continued fraction expansions.\n - * Ref. E.Dagotto, Rev. Mod. Phys. 66 (1994), 763. - * @version 1.1 - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * - */ - -/// -/// \brief A main function to calculate spectrum by continued fraction expansions. -/// \param X [in,out] Struct for getting and giving calculation information -/// \param tmp_v1 [in] Normalized excited state. -/// \param dnorm [in] Norm of the excited state before normalization. -/// \param Nomega [in] Total number of frequencies. -/// \param dcSpectrum [out] Calculated spectrum. -/// \param dcomega [in] Target frequencies. -/// \retval 0 normally finished -/// \retval -1 unnormally finished -/// \version 1.1 -/// \author Kazuyoshi Yoshimi (The University of Tokyo) -int CalcSpectrumByLanczos( - struct EDMainCalStruct *X, - double complex **tmp_v1, - double dnorm, - int Nomega, - double complex *dcSpectrum, - double complex *dcomega -) -{ - unsigned long int i; - int iret; - unsigned long int liLanczosStp = X->Bind.Def.Lanczos_max; - unsigned long int liLanczosStp_vec=0; - - if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) { - - StartTimer(6201); - if(ReadInitialVector( &(X->Bind), v0, v1, &liLanczosStp_vec)!=0){ - StopTimer(6201); - exitMPI(-1); - } - StopTimer(6201); - } - - //Read diagonal components - if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents || - X->Bind.Def.iFlgCalcSpec ==RECALC_FROM_TMComponents_VEC|| - X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) - { - StartTimer(6202); - int iFlgTMComp=0; - if(X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC) - { - iFlgTMComp=0; - } - else{ - iFlgTMComp=1; - } - iret=ReadTMComponents(&(X->Bind), &dnorm, &liLanczosStp, iFlgTMComp); - if(iret !=TRUE){ - fprintf(stdoutMPI, " Error: Fail to read TMcomponents\n"); - return FALSE; - } - - if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents){ - X->Bind.Def.Lanczos_restart=0; - } - else if(X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC|| - X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC){ - if(liLanczosStp_vec !=liLanczosStp){ - fprintf(stdoutMPI, " Error: Input files for vector and TMcomponents are incoorect.\n"); - fprintf(stdoutMPI, " Error: Input vector %ld th stps, TMcomponents %ld th stps.\n", liLanczosStp_vec, liLanczosStp); - return FALSE; - } - X->Bind.Def.Lanczos_restart=liLanczosStp; - liLanczosStp = liLanczosStp+X->Bind.Def.Lanczos_max; - } - StopTimer(6202); - } - - // calculate ai, bi - if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || - X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC - ) - { - fprintf(stdoutMPI, " Start: Calculate tridiagonal matrix components.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_GetTridiagonalStart, "a"); - // Functions in Lanczos_EigenValue - StartTimer(6203); - iret = Lanczos_GetTridiagonalMatrixComponents(&(X->Bind), alpha, beta, tmp_v1, &(liLanczosStp)); - StopTimer(6203); - if (iret != TRUE) { - //Error Message will be added. - return FALSE; - } - fprintf(stdoutMPI, " End: Calculate tridiagonal matrix components.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_GetTridiagonalEnd, "a"); - StartTimer(6204); - OutputTMComponents(&(X->Bind), alpha,beta, dnorm, liLanczosStp); - StopTimer(6204); - }//X->Bind.Def.iFlgCalcSpec == RECALC_NOT || RECALC_FROM_TMComponents_VEC - - fprintf(stdoutMPI, " Start: Caclulate spectrum from tridiagonal matrix components.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumFromTridiagonalStart, "a"); - StartTimer(6205); - for( i = 0 ; i < Nomega; i++) { - iret = GetSpectrumByTridiagonalMatrixComponents(alpha, beta, dnorm, dcomega[i], &dcSpectrum[i], liLanczosStp); - if (iret != TRUE) { - //ToDo: Error Message will be added. - //ReAlloc alpha, beta and Set alpha_start and beta_start in Lanczos_EigenValue - return FALSE; - } - dcSpectrum[i] = dnorm * dcSpectrum[i]; - } - StopTimer(6205); - fprintf(stdoutMPI, " End: Caclulate spectrum from tridiagonal matrix components.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumFromTridiagonalEnd, "a"); - - //output vectors for recalculation - if(X->Bind.Def.iFlgCalcSpec==RECALC_OUTPUT_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec==RECALC_INOUT_TMComponents_VEC){ - StartTimer(6206); - if(OutputLanczosVector(&(X->Bind), v0, v1, liLanczosStp)!=0){ - StopTimer(6206); - exitMPI(-1); - } - StopTimer(6206); - } - - return TRUE; -} - -/// -/// \brief Calculate the spectrum by using tridiagonal matrix components obtained by the Lanczos_GetTridiagonalMatrixComponents function. -/// \param tmp_alpha [in] Tridiagonal matrix components. -/// \param tmp_beta [in] Tridiagonal matrix components. -/// \param dnorm [in] Norm for the excited state. -/// \param dcomega [in] Target frequency. -/// \param dcSpectrum [out] Spectrum at dcomega. -/// \param ilLanczosStp [in] Lanczos step required to get tridiagonal matrix components. -/// \retval FALSE Fail to get the spectrum -/// \retval TRUE Success to get the spectrum -int GetSpectrumByTridiagonalMatrixComponents( - double *tmp_alpha, - double *tmp_beta, - double dnorm, - double complex dcomega, - double complex *dcSpectrum, - unsigned long int ilLanczosStp - ) -{ - unsigned long int istp=2; - double complex dcDn; - double complex dcb0; - double complex dcbn, dcan; - double complex dcDeltahn; - double complex dch; - - if(ilLanczosStp < 1){ - fprintf(stdoutMPI, "Error: LanczosStep must be greater than 1.\n"); - return FALSE; - } - - dcb0 = dcomega-tmp_alpha[1]; - if(ilLanczosStp ==1) { - if(cabs(dcb0). */ -#include "CalcSpectrumByTPQ.h" -#include "Lanczos_EigenValue.h" -#include "FileIO.h" -#include "wrapperMPI.h" -#include "vec12.h" -#include "common/setmemory.h" -/** - * @file CalcSpectrumByTPQ.c - * @version 1.2 - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * - * @brief Calculate spectrum function for the TPQ state. \n - * Note: This method is trial and cannot be used in the release mode. - * - * - */ - -/// \brief Read TPQ data at "X->Bind.Large.itr" step in SS_rand file. -/// \param [in] X CalcStruct list for getting and pushing calculation information -/// \param [out] ene energy -/// \param [out] temp temperature -/// \param [out] specificHeat specific heat -/// \retval TRUE succeed to read data -/// \retval FALSE fail to read data -int ReadTPQData( - struct EDMainCalStruct *X, - double* ene, - double* temp, - double* specificHeat -){ - FILE *fp; - char sdt[D_FileNameMax]; - char ctmp2[256]; - double dinv_temp; - double dene, dHvar, dn, ddoublon; - int istp; - sprintf(sdt, cFileNameSSRand, X->Bind.Def.irand); - childfopenMPI(sdt, "r", &fp); - if(fp==NULL){ - fprintf(stderr, " Error: SS_rand%d.dat does not exist.\n", X->Bind.Def.irand); - fclose(fp); - return FALSE; - } - fgetsMPI(ctmp2, 256, fp); - while(fgetsMPI(ctmp2, 256, fp) != NULL) { - sscanf(ctmp2, "%lf %lf %lf %lf %lf %d\n", - &dinv_temp, - &dene, - &dHvar, - &dn, - &ddoublon, - &istp - ); - if(istp==X->Bind.Large.itr) break; - } - fclose(fp); - - *ene = dene; - *temp = 1.0/dinv_temp; - *specificHeat = (dHvar-dene*dene)*(dinv_temp*dinv_temp); - - return TRUE; -} - -/// -/// \brief Calculate spectrum function from the TPQ state. -/// \param dcomega [in] Target frequencies. -/// \param dtemp [in] Temperature corresponding to the target TPQ state. -/// \param dspecificheat [in] Specific heat. -/// \param ene [in] Energy for the target TPQ state. -/// \param tmp_E [in] Energies included in the excited TPQ state obtained by the continued fraction expansions. -/// \param Nsite [in] Total number of sites. -/// \param idim_max [in] Dimension of the Hilbert space. -/// \param dc_tmpSpec [out] Calculated spectrum. -/// \retval FALSE fail to calculate spectrum. -/// \retval TRUE sucsceed to calculate spectrum. -int GetCalcSpectrumTPQ(double complex dcomega, double dtemp, double dspecificheat, - double ene, double *tmp_E, int Nsite, int idim_max, double complex * dc_tmpSpec) -{ - int l; - double tmp_dcSpec; - double factor, pre_factor; - pre_factor=2.0*dtemp*dtemp*dspecificheat; - factor=M_PI*pre_factor; - factor=1.0/sqrt(factor); - tmp_dcSpec=0; - - if(cimag(dcomega)>0) { - for (l = 1; l <= idim_max; l++) { - //TODO: Check omega is real ? - //fprintf(stdoutMPI, "Debug: %lf, %lf\n", creal(dcomega) - tmp_E[l] + ene, pre_factor); - tmp_dcSpec += (double)(vec[l][1] * conj(vec[l][1])) * exp(-pow((creal(dcomega) - tmp_E[l] + ene),2)/(pre_factor)); - } - } - else{ - fprintf(stderr, " an imarginary part of omega must be positive.\n"); - return FALSE; - } - tmp_dcSpec *=factor; - *dc_tmpSpec = tmp_dcSpec; - return TRUE; -} - -/// \brief A main function to calculate spectrum by TPQ (Note: This method is trial) -/// \param X [in,out] CalcStruct list for getting and pushing calculation information -/// \param tmp_v1 [in] Normalized excited state. -/// \param dnorm [in] Norm of the excited state before normalization. -/// \param Nomega [in] Total number of frequencies. -/// \param dcSpectrum [out] Calculated spectrum. -/// \param dcomega [in] Target frequencies. -/// \retval 0 normally finished -/// \retval -1 unnormally finished -/// \version 1.2 -/// \author Kazuyoshi Yoshimi (The University of Tokyo) -int CalcSpectrumByTPQ( - struct EDMainCalStruct *X, - double complex **tmp_v1, - double dnorm, - int Nomega, - double complex *dcSpectrum, - double complex *dcomega -) -{ - char sdt[D_FileNameMax]; - unsigned long int i, i_max; - FILE *fp; - int iret; - unsigned long int liLanczosStp = X->Bind.Def.Lanczos_max; - unsigned long int liLanczosStp_vec=0; - double dene, dtemp, dspecificHeat; - double *tmp_E; - double complex dctmp_Spectrum; - int stp; - size_t byte_size; - - //Read Ene, temp, C - if(ReadTPQData(X, &dene, &dtemp, &dspecificHeat)!=TRUE){ - return FALSE; - } - - if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) { - fprintf(stdoutMPI, " Start: Input vectors for recalculation.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputSpectrumRecalcvecStart, "a"); - - sprintf(sdt, cFileNameOutputRestartVec, X->Bind.Def.CDataFileHead, myrank); - if (childfopenALL(sdt, "rb", &fp) != 0) { - exitMPI(-1); - } - byte_size = fread(&liLanczosStp_vec, sizeof(liLanczosStp_vec),1,fp); - byte_size = fread(&i_max, sizeof(long int), 1, fp); - if(i_max != X->Bind.Check.idim_max){ - fprintf(stderr, "Error: A size of Inputvector is incorrect.\n"); - exitMPI(-1); - } - byte_size = fread(v0, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fread(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - fclose(fp); - fprintf(stdoutMPI, " End: Input vectors for recalculation.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputSpectrumRecalcvecEnd, "a"); - if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); - } - - //Read diagonal components - if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents || - X->Bind.Def.iFlgCalcSpec ==RECALC_FROM_TMComponents_VEC|| - X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) - { - int iFlgTMComp=0; - if(X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC) - { - iFlgTMComp=0; - } - else{ - iFlgTMComp=1; - } - iret=ReadTMComponents(&(X->Bind), &dnorm, &liLanczosStp, iFlgTMComp); - if(iret !=TRUE){ - fprintf(stdoutMPI, " Error: Fail to read TMcomponents\n"); - return FALSE; - } - - if(X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents){ - X->Bind.Def.Lanczos_restart=0; - } - else if(X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC|| - X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC){ - if(liLanczosStp_vec !=liLanczosStp){ - fprintf(stdoutMPI, " Error: Input files for vector and TMcomponents are incoorect.\n"); - fprintf(stdoutMPI, " Error: Input vector %ld th stps, TMcomponents %ld th stps.\n", liLanczosStp_vec, liLanczosStp); - return FALSE; - } - X->Bind.Def.Lanczos_restart=liLanczosStp; - liLanczosStp = liLanczosStp+X->Bind.Def.Lanczos_max; - } - } - - // calculate ai, bi - if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || - X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC - ) - { - fprintf(stdoutMPI, " Start: Calculate tridiagonal matrix components.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_GetTridiagonalStart, "a"); - // Functions in Lanczos_EigenValue - iret = Lanczos_GetTridiagonalMatrixComponents(&(X->Bind), alpha, beta, tmp_v1, &(liLanczosStp)); - if (iret != TRUE) { - //Error Message will be added. - return FALSE; - } - fprintf(stdoutMPI, " End: Calculate tridiagonal matrix components.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_GetTridiagonalEnd, "a"); - OutputTMComponents(&(X->Bind), alpha,beta, dnorm, liLanczosStp); - }//X->Bind.Def.iFlgCalcSpec == RECALC_NOT || RECALC_FROM_TMComponents_VEC - - stp=liLanczosStp; - tmp_E = d_1d_allocate(stp + 1); - X->Bind.Def.nvec= stp; - vec12(alpha,beta,stp,tmp_E, &(X->Bind)); - fprintf(stdoutMPI, " Start: Caclulate spectrum from tridiagonal matrix components.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumFromTridiagonalStart, "a"); - for( i = 0 ; i < Nomega; i++) { - dctmp_Spectrum=0; - iret = GetCalcSpectrumTPQ(dcomega[i], dtemp, dspecificHeat, dene, tmp_E, X->Bind.Def.Nsite, stp, &dctmp_Spectrum); - if (iret != TRUE) { - //ReAlloc alpha, beta and Set alpha_start and beta_start in Lanczos_EigenValue - return FALSE; - } - dcSpectrum[i] = dnorm * dctmp_Spectrum; - } - fprintf(stdoutMPI, " End: Caclulate spectrum from tridiagonal matrix components.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumFromTridiagonalEnd, "a"); - - free_d_1d_allocate(tmp_E); - //output vectors for recalculation - if(X->Bind.Def.iFlgCalcSpec==RECALC_OUTPUT_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec==RECALC_INOUT_TMComponents_VEC){ - fprintf(stdoutMPI, " Start: Output vectors for recalculation.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_OutputSpectrumRecalcvecStart, "a"); - - sprintf(sdt, cFileNameOutputRestartVec, X->Bind.Def.CDataFileHead, myrank); - if(childfopenALL(sdt, "wb", &fp)!=0){ - exitMPI(-1); - } - fwrite(&liLanczosStp, sizeof(liLanczosStp),1,fp); - fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max),1,fp); - fwrite(v0, sizeof(complex double),X->Bind.Check.idim_max+1, fp); - fwrite(v1, sizeof(complex double),X->Bind.Check.idim_max+1, fp); - fclose(fp); - - fprintf(stdoutMPI, " End: Output vectors for recalculation.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_OutputSpectrumRecalcvecEnd, "a"); - } - - return TRUE; -} diff --git a/src/HPhiMain.c b/src/HPhiMain.c index a62e31e77..0659b4cd7 100644 --- a/src/HPhiMain.c +++ b/src/HPhiMain.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -280,46 +279,37 @@ int main(int argc, char* argv[]){ StopTimer(2000); switch (X.Bind.Def.iCalcType) { - case Lanczos: - StartTimer(4000); - if (CalcByLanczos(&X) != TRUE) { - StopTimer(4000); - exitMPI(-3); - } - StopTimer(4000); - break; - case CG: if (CalcByLOBPCG(&X) != TRUE) { exitMPI(-3); } break; - case FullDiag: - StartTimer(5000); - if (X.Bind.Def.iFlgScaLAPACK ==0 && nproc != 1) { - fprintf(stdoutMPI, "Error: Full Diagonalization by LAPACK is only allowed for one process.\n"); - FinalizeMPI(); - } - if (CalcByFullDiag(&X) != TRUE) { - FinalizeMPI(); - } - StopTimer(5000); + case FullDiag: + StartTimer(5000); + if (X.Bind.Def.iFlgScaLAPACK == 0 && nproc != 1) { + fprintf(stdoutMPI, "Error: Full Diagonalization by LAPACK is only allowed for one process.\n"); + FinalizeMPI(); + } + if (CalcByFullDiag(&X) != TRUE) { + FinalizeMPI(); + } + StopTimer(5000); break; - case TPQCalc: - StartTimer(3000); - if (CalcByTPQ(NumAve, X.Bind.Def.Param.ExpecInterval, &X) != TRUE) { - StopTimer(3000); - exitMPI(-3); - } + case TPQCalc: + StartTimer(3000); + if (CalcByTPQ(NumAve, X.Bind.Def.Param.ExpecInterval, &X) != TRUE) { StopTimer(3000); + exitMPI(-3); + } + StopTimer(3000); break; - case TimeEvolution: - if(CalcByTEM(X.Bind.Def.Param.ExpecInterval, &X)!=0){ - exitMPI(-3); - } + case TimeEvolution: + if (CalcByTEM(X.Bind.Def.Param.ExpecInterval, &X) != 0) { + exitMPI(-3); + } break; default: diff --git a/src/Lanczos_EigenValue.c b/src/Lanczos_EigenValue.c deleted file mode 100644 index 68323629e..000000000 --- a/src/Lanczos_EigenValue.c +++ /dev/null @@ -1,720 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ - -#include "Common.h" -#include "common/setmemory.h" -#include "mltply.h" -#include "vec12.h" -#include "bisec.h" -#include "FileIO.h" -#include "matrixlapack.h" -#include "Lanczos_EigenValue.h" -#include "wrapperMPI.h" -#include "CalcTime.h" - -/** - * @file Lanczos_EigenValue.c - * - * @brief Calculate eigenvalues by the Lanczos method. - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ - -/** - * @brief Main function for calculating eigen values by Lanczos method.\n - * The energy convergence is judged by the level of target energy determined by @f$ \verb|k_exct| @f$.\n - * - * @param X [in] Struct to give the information for calculating the eigen values. - * @retval -2 Fail to read the initial vectors or triangular matrix components. - * @retval -1 Fail to obtain the eigen values with in the @f$ \verb| Lanczos_max |@f$ step - * @retval 0 Succeed to calculate the eigen values. - * @version 0.1 - * - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -int Lanczos_EigenValue(struct BindStruct *X) { - - fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueStart); - FILE *fp; - char sdt[D_FileNameMax], sdt_2[D_FileNameMax]; - int stp; - long int i, i_max; - unsigned long int i_max_tmp; - int k_exct, Target; - int iconv = -1; - double beta1, alpha1; //beta,alpha1 should be real - double complex temp1, temp2; - double complex cbeta1; - double E[5], ebefor, E_target; - -// for GC - double dnorm=0.0; - - double **tmp_mat; - double *tmp_E; - int int_i, int_j, mfint[7]; - int iret=0; - sprintf(sdt_2, cFileNameLanczosStep, X->Def.CDataFileHead); - - i_max = X->Check.idim_max; - k_exct = X->Def.k_exct; - unsigned long int liLanczosStp; - liLanczosStp = X->Def.Lanczos_max; - unsigned long int liLanczosStp_vec=0; - - if (X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN){ - if(ReadInitialVector( X, v0, v1, &liLanczosStp_vec)!=0){ - fprintf(stdoutMPI, " Error: Fail to read initial vectors\n"); - return -2; - } - iret=ReadTMComponents(X, &dnorm, &liLanczosStp, 0); - if(iret !=TRUE){ - fprintf(stdoutMPI, " Error: Fail to read TMcomponents\n"); - return -2; - } - if(liLanczosStp_vec !=liLanczosStp){ - fprintf(stdoutMPI, " Error: Input files for vector and TMcomponents are incoorect.\n"); - fprintf(stdoutMPI, " Error: Input vector %ld th stps, TMcomponents %ld th stps.\n", liLanczosStp_vec, liLanczosStp); - return -2; - } - X->Def.Lanczos_restart=liLanczosStp; - //Calculate EigenValue - - liLanczosStp = liLanczosStp+X->Def.Lanczos_max; - alpha1=alpha[X->Def.Lanczos_restart]; - beta1=beta[X->Def.Lanczos_restart]; - }/*X->Def.iReStart == RESTART_INOUT || X->Def.iReStart == RESTART_IN*/ - else { - SetInitialVector(X, v0, v1); - //Eigenvalues by Lanczos method - TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); - StartTimer(4101); - mltply(X, 1, v0, v1); - StopTimer(4101); - stp = 1; - TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); - - alpha1 = creal(X->Large.prdct);// alpha = v^{\dag}*H*v - - alpha[1] = alpha1; - cbeta1 = 0.0; - -#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) - for (i = 1; i <= i_max; i++) { - cbeta1 += conj(v0[i] - alpha1 * v1[i]) * (v0[i] - alpha1 * v1[i]); - } - cbeta1 = SumMPI_dc(cbeta1); - beta1 = creal(cbeta1); - beta1 = sqrt(beta1); - beta[1] = beta1; - ebefor = 0; - liLanczosStp = X->Def.Lanczos_max; - X->Def.Lanczos_restart =1; - }/*else restart*/ - - /* - * Set Maximum number of loop to the dimention of the Wavefunction - */ - i_max_tmp = SumMPI_li(i_max); - if (i_max_tmp < liLanczosStp) { - liLanczosStp = i_max_tmp; - } - if (i_max_tmp < X->Def.LanczosTarget) { - liLanczosStp = i_max_tmp; - } - if (i_max_tmp == 1) { - E[1] = alpha[1]; - StartTimer(4102); - vec12(alpha, beta, stp, E, X); - StopTimer(4102); - X->Large.itr = stp; - X->Phys.Target_energy = E[k_exct]; - iconv = 0; - fprintf(stdoutMPI, " LanczosStep E[1] \n"); - fprintf(stdoutMPI, " stp=%d %.10lf \n", stp, E[1]); - } - - fprintf(stdoutMPI, " LanczosStep E[1] E[2] E[3] E[4] Target:E[%d] E_Max/Nsite\n", X->Def.LanczosTarget + 1); - for (stp = X->Def.Lanczos_restart+1; stp <= liLanczosStp; stp++) { -#pragma omp parallel for default(none) private(i,temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) - for (i = 1; i <= i_max; i++) { - temp1 = v1[i]; - temp2 = (v0[i] - alpha1 * v1[i]) / beta1; - v0[i] = -beta1 * temp1; - v1[i] = temp2; - } - - StartTimer(4101); - mltply(X, 1, v0, v1); - StopTimer(4101); - TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); - alpha1 = creal(X->Large.prdct); - alpha[stp] = alpha1; - cbeta1 = 0.0; - -#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) - for (i = 1; i <= i_max; i++) { - cbeta1 += conj(v0[i] - alpha1 * v1[i]) * (v0[i] - alpha1 * v1[i]); - } - cbeta1 = SumMPI_dc(cbeta1); - beta1 = creal(cbeta1); - beta1 = sqrt(beta1); - beta[stp] = beta1; - - Target = X->Def.LanczosTarget; - - if (stp == 2) { - tmp_mat = d_2d_allocate(stp,stp); - tmp_E = d_1d_allocate(stp+1); - - for (int_i = 0; int_i < stp; int_i++) { - for (int_j = 0; int_j < stp; int_j++) { - tmp_mat[int_i][int_j] = 0.0; - } - } - tmp_mat[0][0] = alpha[1]; - tmp_mat[0][1] = beta[1]; - tmp_mat[1][0] = beta[1]; - tmp_mat[1][1] = alpha[2]; - DSEVvalue(stp, tmp_mat, tmp_E); - E[1] = tmp_E[0]; - E[2] = tmp_E[1]; - if (Target < 2) { - E_target = tmp_E[Target]; - ebefor = E_target; - } - free_d_1d_allocate(tmp_E); - free_d_2d_allocate(tmp_mat); - - childfopenMPI(sdt_2, "w", &fp); - - fprintf(fp, "LanczosStep E[1] E[2] E[3] E[4] Target:E[%d] E_Max/Nsite\n", Target + 1); - if (Target < 2) { - fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx %.10lf xxxxxxxxx \n", stp, E[1], E[2], - E_target); - fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx %.10lf xxxxxxxxx \n", stp, E[1], E[2], E_target); - } else { - fprintf(stdoutMPI, " stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx xxxxxxxxx \n", stp, E[1], E[2]); - fprintf(fp, "stp = %d %.10lf %.10lf xxxxxxxxxx xxxxxxxxx xxxxxxxxx xxxxxxxxx \n", stp, E[1], E[2]); - } - - fclose(fp); - } - - //if (stp > 2 && stp % 2 == 0) { - if (stp > 2) { - childfopenMPI(sdt_2, "a", &fp); - tmp_mat = d_2d_allocate(stp,stp); - tmp_E = d_1d_allocate(stp+1); - - for (int_i = 0; int_i < stp; int_i++) { - for (int_j = 0; int_j < stp; int_j++) { - tmp_mat[int_i][int_j] = 0.0; - } - } - tmp_mat[0][0] = alpha[1]; - tmp_mat[0][1] = beta[1]; - for (int_i = 1; int_i < stp - 1; int_i++) { - tmp_mat[int_i][int_i] = alpha[int_i + 1]; - tmp_mat[int_i][int_i + 1] = beta[int_i + 1]; - tmp_mat[int_i][int_i - 1] = beta[int_i]; - } - tmp_mat[int_i][int_i] = alpha[int_i + 1]; - tmp_mat[int_i][int_i - 1] = beta[int_i]; - StartTimer(4103); - DSEVvalue(stp, tmp_mat, tmp_E); - StopTimer(4103); - E[1] = tmp_E[0]; - E[2] = tmp_E[1]; - E[3] = tmp_E[2]; - E[4] = tmp_E[3]; - E[0] = tmp_E[stp - 1]; - if (stp > Target) { - E_target = tmp_E[Target]; - } - free_d_1d_allocate(tmp_E); - free_d_2d_allocate(tmp_mat); - if (stp > Target) { - fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf %.10lf %.10lf\n", stp, E[1], E[2], E[3], E[4], - E_target, E[0] / (double) X->Def.NsiteMPI); - fprintf(fp, "stp=%d %.10lf %.10lf %.10lf %.10lf %.10lf %.10lf\n", stp, E[1], E[2], E[3], E[4], E_target, - E[0] / (double) X->Def.NsiteMPI); - } else { - fprintf(stdoutMPI, " stp = %d %.10lf %.10lf %.10lf %.10lf xxxxxxxxx %.10lf\n", stp, E[1], E[2], E[3], E[4], - E[0] / (double) X->Def.NsiteMPI); - fprintf(fp, "stp=%d %.10lf %.10lf %.10lf %.10lf xxxxxxxxx %.10lf\n", stp, E[1], E[2], E[3], E[4], - E[0] / (double) X->Def.NsiteMPI); - } - fclose(fp); - if (stp > Target) { - if (fabs((E_target - ebefor) / E_target) < eps_Lanczos || fabs(beta[stp]) < pow(10.0, -14)) { - /* - if(X->Def.iReStart == RESTART_INOUT ||X->Def.iReStart == RESTART_OUT){ - break; - } - */ - tmp_E = d_1d_allocate(stp+1); - StartTimer(4102); - vec12(alpha, beta, stp, tmp_E, X); - StopTimer(4102); - X->Large.itr = stp; - X->Phys.Target_energy = E_target; - X->Phys.Target_CG_energy = tmp_E[k_exct]; //for CG - iconv = 0; - free_d_1d_allocate(tmp_E); - break; - } - ebefor = E_target; - } - - } - } - if (X->Def.iReStart == RESTART_INOUT ||X->Def.iReStart == RESTART_OUT ){ - if(stp != X->Def.Lanczos_restart+2) { // 2 steps are needed to get the value: E[stp+2]-E[stp+1] - OutputTMComponents(X, alpha, beta, dnorm, stp - 1); - OutputLanczosVector(X, v0, v1, stp - 1); - } - if (iconv !=0){ - sprintf(sdt, "%s", cLogLanczos_EigenValueNotConverged); - fprintf(stdoutMPI, "Lanczos Eigenvalue is not converged in this process (restart mode).\n"); - return 1; - } - } - - sprintf(sdt, cFileNameTimeKeep, X->Def.CDataFileHead); - if (iconv != 0) { - sprintf(sdt, "%s", cLogLanczos_EigenValueNotConverged); - fprintf(stdoutMPI, "Lanczos Eigenvalue is not converged in this process.\n"); - return -1; - } - - TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueFinish, "a"); - fprintf(stdoutMPI, "%s", cLogLanczos_EigenValueEnd); - - return 0; -} - -/** - * @brief Calculate tridiagonal matrix components by Lanczos method - * - * @param X [in] Struct to give the information to calculate triangular matrix components. - * @param _alpha [in,out] Triangular matrix components. - * @param _beta [in,out] Triangular matrix components. - * @param tmp_v1 [in, out] A temporary vector to calculate triangular matrix components. - * @param liLanczos_step [in] The max iteration step. - * @version 1.2 - * @return TRUE - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -int Lanczos_GetTridiagonalMatrixComponents( - struct BindStruct *X, - double *_alpha, - double *_beta, - double complex **tmp_v1, - unsigned long int *liLanczos_step - ) { - - char sdt[D_FileNameMax]; - int stp; - long int i, i_max; - i_max = X->Check.idim_max; - - unsigned long int i_max_tmp; - double beta1, alpha1; //beta,alpha1 should be real - double complex temp1, temp2; - double complex cbeta1; - - sprintf(sdt, cFileNameLanczosStep, X->Def.CDataFileHead); - - /* - Set Maximum number of loop to the dimension of the Wavefunction - */ - i_max_tmp = SumMPI_li(i_max); - if (i_max_tmp < *liLanczos_step || i_max_tmp < X->Def.LanczosTarget) { - *liLanczos_step = i_max_tmp; - } - - if (X->Def.Lanczos_restart == 0) { // initial procedure (not restart) -#pragma omp parallel for default(none) private(i) shared(v0, v1, tmp_v1) firstprivate(i_max) - for (i = 1; i <= i_max; i++) { - v0[i] = 0.0; - v1[i] = tmp_v1[i]; - } - stp = 0; - mltply(X, 1, v0, tmp_v1); - TimeKeeperWithStep(X, cFileNameTimeKeep, c_Lanczos_SpectrumStep, "a", stp); - alpha1 = creal(X->Large.prdct);// alpha = v^{\dag}*H*v - _alpha[1] = alpha1; - cbeta1 = 0.0; - fprintf(stdoutMPI, " Step / Step_max alpha beta \n"); - -#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) - for (i = 1; i <= i_max; i++) { - cbeta1 += conj(v0[i] - alpha1 * v1[i]) * (v0[i] - alpha1 * v1[i]); - } - cbeta1 = SumMPI_dc(cbeta1); - beta1 = creal(cbeta1); - beta1 = sqrt(beta1); - _beta[1] = beta1; - X->Def.Lanczos_restart = 1; - } else { // restart case - alpha1 = alpha[X->Def.Lanczos_restart]; - beta1 = beta[X->Def.Lanczos_restart]; - } - - for (stp = X->Def.Lanczos_restart + 1; stp <= *liLanczos_step; stp++) { - - if (fabs(_beta[stp - 1]) < pow(10.0, -14)) { - *liLanczos_step = stp - 1; - break; - } - -#pragma omp parallel for default(none) private(i, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) - for (i = 1; i <= i_max; i++) { - temp1 = v1[i]; - temp2 = (v0[i] - alpha1 * v1[i]) / beta1; - v0[i] = -beta1 * temp1; - v1[i] = temp2; - } - - mltply(X, 1, v0, v1); - TimeKeeperWithStep(X, cFileNameTimeKeep, c_Lanczos_SpectrumStep, "a", stp); - alpha1 = creal(X->Large.prdct); - _alpha[stp] = alpha1; - cbeta1 = 0.0; - -#pragma omp parallel for reduction(+:cbeta1) default(none) private(i) shared(v0, v1) firstprivate(i_max, alpha1) - for (i = 1; i <= i_max; i++) { - cbeta1 += conj(v0[i] - alpha1 * v1[i]) * (v0[i] - alpha1 * v1[i]); - } - cbeta1 = SumMPI_dc(cbeta1); - beta1 = creal(cbeta1); - beta1 = sqrt(beta1); - _beta[stp] = beta1; - if(stp %10 == 0) { - fprintf(stdoutMPI, " stp = %d / %lu %.10lf %.10lf \n", stp, *liLanczos_step, alpha1, beta1); - } - } - - return TRUE; -} - - -/// -/// \brief Read initial vectors for the restart calculation. -/// \param X [in] Give the dimension for the vector _v0 and _v1. -/// \param _v0 [out] The inputted vector for recalculation @f$ v_0 @f$. -/// \param _v1 [out] The inputted vector for recalculation @f$ v_1 @f$. -/// \param liLanczosStp_vec [in] The max iteration step. -/// \retval -1 Fail to read the initial vector. -/// \retval 0 Succeed to read the initial vector. -/// \version 1.2 -/// \author Kazuyoshi Yoshimi (The University of Tokyo) -int ReadInitialVector(struct BindStruct *X, double complex* _v0, double complex *_v1, unsigned long int *liLanczosStp_vec) -{ - size_t byte_size; - char sdt[D_FileNameMax]; - FILE *fp; - unsigned long int i_max; - - fprintf(stdoutMPI, " Start: Input vectors for recalculation.\n"); - TimeKeeper(X, cFileNameTimeKeep, c_InputSpectrumRecalcvecStart, "a"); - sprintf(sdt, cFileNameOutputRestartVec, X->Def.CDataFileHead, myrank); - if (childfopenALL(sdt, "rb", &fp) != 0) { - return -1; - } - byte_size = fread(liLanczosStp_vec, sizeof(*liLanczosStp_vec),1,fp); - byte_size = fread(&i_max, sizeof(long int), 1, fp); - if(i_max != X->Check.idim_max){ - fprintf(stderr, "Error: A size of Inputvector is incorrect.\n"); - return -1; - } - byte_size = fread(_v0, sizeof(complex double), X->Check.idim_max + 1, fp); - byte_size = fread(_v1, sizeof(complex double), X->Check.idim_max + 1, fp); - fclose(fp); - fprintf(stdoutMPI, " End: Input vectors for recalculation.\n"); - TimeKeeper(X, cFileNameTimeKeep, c_InputSpectrumRecalcvecEnd, "a"); - if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); - return 0; -} - -/// -/// \brief Output initial vectors for the restart calculation. -/// \param X [in] Give the dimension for the vector _v0 and _v1. -/// \param tmp_v0 [in] The outputted vector for recalculation @f$ v_0 @f$. -/// \param tmp_v1 [in] The outputted vector for recalculation @f$ v_1 @f$. -/// \param liLanczosStp_vec [in] The step for finishing the iteration. -/// \retval -1 Fail to output the vector. -/// \retval 0 Succeed to output the vector. -/// \version 2.0 -/// \author Kazuyoshi Yoshimi (The University of Tokyo) -int OutputLanczosVector(struct BindStruct *X, - double complex* tmp_v0, - double complex **tmp_v1, - unsigned long int liLanczosStp_vec){ - char sdt[D_FileNameMax]; - FILE *fp; - - fprintf(stdoutMPI, " Start: Output vectors for recalculation.\n"); - TimeKeeper(X, cFileNameTimeKeep, c_OutputSpectrumRecalcvecStart, "a"); - - sprintf(sdt, cFileNameOutputRestartVec, X->Def.CDataFileHead, myrank); - if(childfopenALL(sdt, "wb", &fp)!=0){ - return -1; - } - fwrite(&liLanczosStp_vec, sizeof(liLanczosStp_vec),1,fp); - fwrite(&X->Check.idim_max, sizeof(X->Check.idim_max),1,fp); - fwrite(tmp_v0, sizeof(complex double),X->Check.idim_max+1, fp); - fwrite(tmp_v1, sizeof(complex double),X->Check.idim_max+1, fp); - fclose(fp); - - fprintf(stdoutMPI, " End: Output vectors for recalculation.\n"); - TimeKeeper(X, cFileNameTimeKeep, c_OutputSpectrumRecalcvecEnd, "a"); - return 0; -} - - -/// -/// \brief Set initial vector to start the calculation for Lanczos method.\n -/// \param X [in, out] Get the information of reading initisl vectors.\n -/// Input: idim_max, iFlgMPI, k_exct, iInitialVecType. \n -/// Output: Large.iv. -/// \param tmp_v0 [out] The initial vector whose components are zero. -/// \param tmp_v1 [out] The initial vector whose components are randomly given when initial_mode=1, otherwise, iv-th component is only given. -void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1) { - int iproc; - long int i, iv, i_max; - unsigned long int i_max_tmp, sum_i_max; - int mythread; - -// for GC - double dnorm; - double complex cdnorm; - long unsigned int u_long_i; - dsfmt_t dsfmt; - - i_max = X->Check.idim_max; - if (initial_mode == 0) { - if(X->Def.iFlgMPI==0) { - sum_i_max = SumMPI_li(X->Check.idim_max); - } - else{ - sum_i_max =X->Check.idim_max; - } - X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1; - iv = X->Large.iv; - fprintf(stdoutMPI, " initial_mode=%d normal: iv = %ld i_max=%ld k_exct =%d \n\n", initial_mode, iv, i_max, - X->Def.k_exct); -#pragma omp parallel for default(none) private(i) shared(tmp_v0, tmp_v1) firstprivate(i_max) - for (i = 1; i <= i_max; i++) { - tmp_v0[i] = 0.0; - tmp_v1[i] = 0.0; - } - - sum_i_max = 0; - if(X->Def.iFlgMPI==0) { - for (iproc = 0; iproc < nproc; iproc++) { - i_max_tmp = BcastMPI_li(iproc, i_max); - if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { - if (myrank == iproc) { - tmp_v1[iv - sum_i_max + 1] = 1.0; - if (X->Def.iInitialVecType == 0) { - tmp_v1[iv - sum_i_max + 1] += 1.0 * I; - tmp_v1[iv - sum_i_max + 1] /= sqrt(2.0); - } - }/*if (myrank == iproc)*/ - }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ - - sum_i_max += i_max_tmp; - - }/*for (iproc = 0; iproc < nproc; iproc++)*/ - } - else { - tmp_v1[iv + 1] = 1.0; - if (X->Def.iInitialVecType == 0) { - tmp_v1[iv + 1] += 1.0 * I; - tmp_v1[iv + 1] /= sqrt(2.0); - } - } - }/*if(initial_mode == 0)*/ - else if (initial_mode == 1) { - iv = X->Def.initial_iv; - fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n\n", initial_mode, iv, i_max, - X->Def.k_exct); -#pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ - shared(tmp_v0, tmp_v1, iv, X, nthreads, myrank) firstprivate(i_max) - { - -#pragma omp for - for (i = 1; i <= i_max; i++) { - tmp_v0[i] = 0.0; - } - /* - Initialise MT - */ -#ifdef _OPENMP - mythread = omp_get_thread_num(); -#else - mythread = 0; -#endif - if(X->Def.iFlgMPI==0) { - u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank; - } - else{ - u_long_i = 123432 + labs(iv)+ mythread ; - } - dsfmt_init_gen_rand(&dsfmt, u_long_i); - - if (X->Def.iInitialVecType == 0) { -#pragma omp for - for (i = 1; i <= i_max; i++) - tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) + - 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) * I; - } else { -#pragma omp for - for (i = 1; i <= i_max; i++) - tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5); - } - - }/*#pragma omp parallel*/ - - cdnorm = 0.0; -#pragma omp parallel for default(none) private(i) shared(tmp_v1, i_max) reduction(+: cdnorm) - for (i = 1; i <= i_max; i++) { - cdnorm += conj(tmp_v1[i]) * tmp_v1[i]; - } - if(X->Def.iFlgMPI==0) { - cdnorm = SumMPI_dc(cdnorm); - } - dnorm = creal(cdnorm); - dnorm = sqrt(dnorm); -#pragma omp parallel for default(none) private(i) shared(tmp_v1) firstprivate(i_max, dnorm) - for (i = 1; i <= i_max; i++) { - tmp_v1[i] = tmp_v1[i] / dnorm; - } - }/*else if(initial_mode==1)*/ -} - -/// -/// \brief Read tridiagonal matrix components obtained by the Lanczos method.\n -/// \note The arrays of tridiaonal components alpha and beta are global arrays. -/// \param X [in] Give the iteration number for the recalculation and the input file name. -/// \param _dnorm [out] Get the norm. -/// \param _i_max [in] The iteration step for the input data. -/// \param iFlg [in] Flag for the recalculation. -/// \retval FALSE Fail to read the file. -/// \retval TRUE Succeed to read the file. -/// \version 1.2 -/// \author Kazuyoshi Yoshimi (The University of Tokyo) -int ReadTMComponents( - struct BindStruct *X, - double *_dnorm, - unsigned long int *_i_max, - int iFlg -){ - char sdt[D_FileNameMax]; - char ctmp[256]; - - unsigned long int idx, i, ivec; - unsigned long int i_max; - double dnorm; - FILE *fp; - idx=1; - sprintf(sdt, cFileNameTridiagonalMatrixComponents, X->Def.CDataFileHead); - if(childfopenMPI(sdt,"r", &fp)!=0){ - return FALSE; - } - - fgetsMPI(ctmp, sizeof(ctmp)/sizeof(char), fp); - sscanf(ctmp,"%ld \n", &i_max); - if (X->Def.LanczosTarget > X->Def.nvec) { - ivec = X->Def.LanczosTarget + 1; - } - else { - ivec =X->Def.nvec + 1; - } - - if(iFlg==0) { - alpha = (double *) realloc(alpha, sizeof(double) * (i_max + X->Def.Lanczos_max + 1)); - beta = (double *) realloc(beta, sizeof(double) * (i_max + X->Def.Lanczos_max + 1)); - vec[0] = (complex double *) realloc(vec[0], ivec*(i_max + X->Def.Lanczos_max + 1) * sizeof(complex double)); - for (i = 0; i < ivec; i++) { - vec[i] = vec[0] + i*(i_max + X->Def.Lanczos_max + 1); - } - } - else if(iFlg==1){ - alpha=(double*)realloc(alpha, sizeof(double)*(i_max + 1)); - beta=(double*)realloc(beta, sizeof(double)*(i_max + 1)); - vec[0] = (complex double *) realloc(vec[0], ivec*(i_max + 1) * sizeof(complex double)); - for (i = 0; i < ivec; i++) { - vec[i] = vec[0] + i*(i_max +1); - //vec[i] = (complex double *) realloc(vec[i], (i_max + X->Def.Lanczos_max + 1) * sizeof(complex double)); - } - } - else{ - fclose(fp); - return FALSE; - } - fgetsMPI(ctmp, sizeof(ctmp)/sizeof(char), fp); - sscanf(ctmp,"%lf \n", &dnorm); - while(fgetsMPI(ctmp, sizeof(ctmp)/sizeof(char), fp) != NULL){ - sscanf(ctmp,"%lf %lf \n", &alpha[idx], &beta[idx]); - idx++; - } - fclose(fp); - *_dnorm=dnorm; - *_i_max=i_max; - return TRUE; -} - - -/// -/// \brief Output tridiagonal matrix components obtained by the Lanczos method. -/// \param X [in] Give the input file name. -/// \param _alpha [in] The array of tridiagonal matrix components. -/// \param _beta [in] The array of tridiagonal matrix components. -/// \param _dnorm [in] The norm. -/// \param liLanczosStp [in] The iteration step. -/// \retval FALSE Fail to open the file for the output. -/// \retval TRUE Succeed to open the file for the output. -/// \version 1.2 -/// \author Kazuyoshi Yoshimi (The University of Tokyo) -int OutputTMComponents( - struct BindStruct *X, - double *_alpha, - double *_beta, - double _dnorm, - int liLanczosStp -) -{ - char sdt[D_FileNameMax]; - unsigned long int i; - FILE *fp; - - sprintf(sdt, cFileNameTridiagonalMatrixComponents, X->Def.CDataFileHead); - if(childfopenMPI(sdt,"w", &fp)!=0){ - return FALSE; - } - fprintf(fp, "%d \n",liLanczosStp); - fprintf(fp, "%.10lf \n",_dnorm); - for( i = 1 ; i <= liLanczosStp; i++){ - fprintf(fp,"%.10lf %.10lf \n", _alpha[i], _beta[i]); - } - fclose(fp); - return TRUE; -} diff --git a/src/Lanczos_EigenVector.c b/src/Lanczos_EigenVector.c deleted file mode 100644 index 39fffb7b9..000000000 --- a/src/Lanczos_EigenVector.c +++ /dev/null @@ -1,208 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ - -#include "Common.h" -#include "mltply.h" -#include "CalcTime.h" -#include "Lanczos_EigenVector.h" -#include "wrapperMPI.h" - -/** - * - * @file Lanczos_EigenVector.c - * @version 0.1, 0.2 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * - * @brief Calculate eigenvectors by the Lanczos method. - * - */ - -/** - * @brief Calculate eigenvectors by the Lanczos method. \n - * The calculated tridiagonal matrix components @f$ \alpha_i, \beta_i@f$ are stored in each array @f$ \verb|alpha| @f$ and @f$\verb|beta|@f$\n - * (@f$ i = 0\cdots N_c@f$, where @f$ N_c@f$ is the step where the calculated energy satisfies the convergence condition). - * - * @param X [in,out] Struct for getting information to calculate eigenvectors. - * @version 0.2 - * @details add an option to choose a type of initial vectors from complex or real types. - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -void Lanczos_EigenVector(struct BindStruct *X){ - - fprintf(stdoutMPI, "%s", cLogLanczos_EigenVectorStart); - TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorStart, "a"); - - long int i,j,i_max,iv; - int k_exct, iproc; - double beta1,alpha1,dnorm, dnorm_inv; - double complex temp1,temp2,cdnorm; - int mythread; - -// for GC - long unsigned int u_long_i, sum_i_max, i_max_tmp; - dsfmt_t dsfmt; - - k_exct = X->Def.k_exct; - - iv=X->Large.iv; - i_max=X->Check.idim_max; - - if(initial_mode == 0){ - - sum_i_max = SumMPI_li(X->Check.idim_max); - X->Large.iv = (sum_i_max / 2 + X->Def.initial_iv) % sum_i_max + 1; - iv=X->Large.iv; -#pragma omp parallel for default(none) private(i) shared(v0, v1,vg) firstprivate(i_max) - for(i = 1; i <= i_max; i++){ - v0[i]=0.0; - v1[i]=0.0; - vg[i]=0.0; - } - - sum_i_max = 0; - for (iproc = 0; iproc < nproc; iproc++) { - - i_max_tmp = BcastMPI_li(iproc, i_max); - if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { - - if (myrank == iproc) { - v1[iv - sum_i_max+1] = 1.0; - if (X->Def.iInitialVecType == 0) { - v1[iv - sum_i_max+1] += 1.0*I; - v1[iv - sum_i_max+1] /= sqrt(2.0); - } - vg[iv - sum_i_max+1]=conj(vec[k_exct][1])*v1[iv - sum_i_max+1]; - - }/*if (myrank == iproc)*/ - }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ - - sum_i_max += i_max_tmp; - - }/*for (iproc = 0; iproc < nproc; iproc++)*/ - - }/*if(initial_mode == 0)*/ - else if(initial_mode==1){ - iv = X->Def.initial_iv; - //fprintf(stdoutMPI, " initial_mode=%d (random): iv = %ld i_max=%ld k_exct =%d \n",initial_mode,iv,i_max,k_exct); - #pragma omp parallel default(none) private(i, u_long_i, mythread, dsfmt) \ - shared(v0, v1, iv, X, nthreads, myrank) firstprivate(i_max) - { - -#pragma omp for - for (i = 1; i <= i_max; i++) { - v0[i] = 0.0; - } - /* - Initialize MT - */ -#ifdef _OPENMP - mythread = omp_get_thread_num(); -#else - mythread = 0; -#endif - u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank; - dsfmt_init_gen_rand(&dsfmt, u_long_i); - - if (X->Def.iInitialVecType == 0) { -#pragma omp for - for (i = 1; i <= i_max; i++) - v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; - } - else { -#pragma omp for - for (i = 1; i <= i_max; i++) - v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); - } - }/*#pragma omp parallel*/ - /* - Normalize - */ - cdnorm=0.0; -#pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: cdnorm) - for(i=1;i<=i_max;i++){ - cdnorm += conj(v1[i])*v1[i]; - } - cdnorm = SumMPI_dc(cdnorm); - dnorm=creal(cdnorm); - dnorm=sqrt(dnorm); -#pragma omp parallel for default(none) private(i) shared(v1, vec, vg) firstprivate(i_max, dnorm, k_exct) - for(i=1;i<=i_max;i++){ - v1[i] = v1[i]/dnorm; - vg[i] = v1[i]*conj(vec[k_exct][1]); - } - }/*else if(initial_mode==1)*/ - StartTimer(4201); - mltply(X, 1, v0, v1); - StopTimer(4201); - - alpha1=alpha[1]; - beta1=beta[1]; - -#pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct) - for(j=1;j<=i_max;j++){ - vg[j]+=conj(vec[k_exct][2])*(v0[j]-alpha1*v1[j])/beta1; - } - - //iteration - for(i=2;i<=X->Large.itr-1;i++) { - /* - if (abs(beta[i]) < pow(10.0, -15)) { - break; - } -*/ -#pragma omp parallel for default(none) private(j, temp1, temp2) shared(v0, v1) firstprivate(i_max, alpha1, beta1) - for (j = 1; j <= i_max; j++) { - temp1 = v1[j]; - temp2 = (v0[j] - alpha1 * v1[j]) / beta1; - v0[j] = -beta1 * temp1; - v1[j] = temp2; - } - StartTimer(4201); - mltply(X, 1, v0, v1); - StopTimer(4201); - alpha1 = alpha[i]; - beta1 = beta[i]; -#pragma omp parallel for default(none) private(j) shared(vec, v0, v1, vg) firstprivate(alpha1, beta1, i_max, k_exct, i) - for (j = 1; j <= i_max; j++) { - vg[j] += conj(vec[k_exct][i + 1]) * (v0[j] - alpha1 * v1[j]) / beta1; - } - } - -#pragma omp parallel for default(none) private(j) shared(v0, vg) firstprivate(i_max) - for(j=1;j<=i_max;j++){ - v0[j] = vg[j]; - } - - //normalization - dnorm=0.0; -#pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v0) firstprivate(i_max) - for(j=1;j<=i_max;j++){ - dnorm += conj(v0[j])*v0[j]; - } - dnorm = SumMPI_d(dnorm); - dnorm=sqrt(dnorm); - dnorm_inv=1.0/dnorm; -#pragma omp parallel for default(none) private(j) shared(v0) firstprivate(i_max, dnorm_inv) - for(j=1;j<=i_max;j++){ - v0[j] = v0[j]*dnorm_inv; - } - - TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenVectorFinish, "a"); - fprintf(stdoutMPI, "%s", cLogLanczos_EigenVectorEnd); -} diff --git a/src/PowerLanczos.c b/src/PowerLanczos.c deleted file mode 100644 index e813a7a4c..000000000 --- a/src/PowerLanczos.c +++ /dev/null @@ -1,179 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#include "PowerLanczos.h" -#include "mltply.h" -#include "wrapperMPI.h" - -int PowerLanczos(struct BindStruct *X){ - - long int i,j; - - double dnorm, dnorm_inv; - double complex dam_pr1,dam_pr2a,dam_pr2b,dam_pr3,dam_pr4; - double E1,E2a,E2b,E3,E4; - double alpha_p,alpha_m; - double Lz_Var; - double Lz_Ene_p,Lz_Ene_m; - double Lz_Var_p,Lz_Var_m; - long int i_max,i_Lz; - - i_max=X->Check.idim_max; - -// v1 is eigenvector -// v0 = H*v1 -// this subroutine - for(i_Lz=0;i_Lz<50;i_Lz++){ - //v1 -> eigen_vec - //v0 -> v0=H*v1 -// if(i_Lz>1){ - #pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max) - for(i = 1; i <= i_max; i++){ - v0[i]=0.0+0.0*I; - } - mltply(X, 1, v0, v1); // v0+=H*v1 - - dam_pr1=0.0; - dam_pr2a=0.0; - #pragma omp parallel for default(none) reduction(+:dam_pr1,dam_pr2a) private(j) shared(v0, v1)firstprivate(i_max) - for(j=1;j<=i_max;j++){ - dam_pr1 += conj(v1[j])*v0[j]; // E = = - dam_pr2a += conj(v0[j])*v0[j]; // E^2 = = - //v0[j]=v1[j]; v1-> orginal v0=H*v1 - } - dam_pr1 = SumMPI_dc(dam_pr1); - dam_pr2a = SumMPI_dc(dam_pr2a); - E1 = creal(dam_pr1); // E - E2a = creal(dam_pr2a);// E^2 - - #pragma omp parallel for default(none) private(i) shared(vg) firstprivate(i_max) - for(i = 1; i <= i_max; i++){ - vg[i]=0.0; - } - - mltply(X, 1, vg, v0); // vg=H*v0=H*H*v1 - dam_pr2b = 0.0; - dam_pr3 = 0.0; - dam_pr4 = 0.0; - #pragma omp parallel for default(none) reduction(+:dam_pr2b,dam_pr3, dam_pr4) private(j) shared(v0,v1, vg)firstprivate(i_max) - for(j=1;j<=i_max;j++){ - dam_pr2b += conj(vg[j])*v1[j]; // E^2 = = - dam_pr3 += conj(vg[j])*v0[j]; // E^3 = = - dam_pr4 += conj(vg[j])*vg[j]; // E^4 = = - } - dam_pr2b = SumMPI_dc(dam_pr2b); - dam_pr3 = SumMPI_dc(dam_pr3); - dam_pr4 = SumMPI_dc(dam_pr4); - //E1 = X->Phys.energy;// E^1 - //E2a = X->Phys.var ;// E^2 = - E2b = creal(dam_pr2b) ;// E^2 = ( - E3 = creal(dam_pr3) ;// E^3 - E4 = creal(dam_pr4) ;// E^4 - - if(solve_2ndPolinomial(X,&alpha_p,&alpha_m,E1,E2a,E2b,E3,E4)!=TRUE){ - fprintf(stdoutMPI,"Power Lanczos break \n"); - return 0; - } - //printf("E1=%.16lf E2a=%.16lf E2b=%.16lf E3=%.16lf E4=%.16lf \n",E1,E2a,E2b,E3,E4); - - Lz(X,alpha_p,&Lz_Ene_p,&Lz_Var_p,E1,E2a,E3,E4); - Lz(X,alpha_m,&Lz_Ene_m,&Lz_Var_m,E1,E2a,E3,E4); - - if(Lz_Ene_p < Lz_Ene_m){ - Lz_Var=Lz_Var_p; - fprintf(stdoutMPI,"Power Lanczos (P): %.16lf %.16lf \n",Lz_Ene_p,Lz_Var_p); - #pragma omp parallel for default(none) private(j) shared(v0, v1) firstprivate(i_max,alpha_p) - for(j=1;j<=i_max;j++){ - v1[j] = v1[j]+alpha_p*v0[j]; // (1+alpha*H)v1=v1+alpha*v0 - } - }else{ - Lz_Var=Lz_Var_m; - fprintf(stdoutMPI,"Power Lanczos (M): %.16lf %.16lf \n",Lz_Ene_m,Lz_Var_m); - #pragma omp parallel for default(none) private(j) shared(v0, v1)firstprivate(i_max,alpha_m) - for(j=1;j<=i_max;j++){ - v1[j] = v1[j]+alpha_m*v0[j]; // (1+alpha*H)v1=v1+alpha*v0 - } - } - //normalization - dnorm=0.0; - #pragma omp parallel for default(none) reduction(+:dnorm) private(j) shared(v1) firstprivate(i_max) - for(j=1;j<=i_max;j++){ - dnorm += conj(v1[j])*v1[j]; - } - dnorm = SumMPI_d(dnorm); - dnorm=sqrt(dnorm); - dnorm_inv=1.0/dnorm; -#pragma omp parallel for default(none) private(j) shared(v1) firstprivate(i_max, dnorm_inv) - for(j=1;j<=i_max;j++){ - v1[j] = v1[j]*dnorm_inv; - } - if(Lz_Var < eps_Energy){ - fprintf(stdoutMPI,"Power Lanczos break \n"); - return 1; - //break; - } - } - return 0; -} - -int solve_2ndPolinomial(struct BindStruct *X,double *alpha_p,double *alpha_m,double E1,double E2a,double E2b,double E3,double E4){ - double a,b,c,d; - double tmp_AA,tmp_BB,tmp_CC; - - //not solving 2nd Polinomial - //approximate linear equation is solved - - - a = E1; - b = E2a; - c = E2a; - d = E3; - - tmp_AA = b*(b+c)-2*a*d; - tmp_BB = -a*b+d; - tmp_CC = b*((b+c)*(b+c))-(a*a)*b*(b+2*c)+4*(a*a*a)*d-2*a*(2*b+c)*d+d*d; - if(tmp_AA< pow(b, 2)*pow(10.0, -15)){ - return FALSE; - } - //printf("XXX: %.16lf %.16lf %.16lf %.16lf \n",a, b, c, d); - //printf("XXX: %.16lf %.16lf %.16lf \n",tmp_AA,tmp_BB,tmp_CC); - if(tmp_CC>=0){ - *alpha_p = (tmp_BB+sqrt((tmp_CC)))/tmp_AA; - *alpha_m = (tmp_BB-sqrt((tmp_CC)))/tmp_AA; - //printf("YYY: %.16lf %.16lf \n",*alpha_p,*alpha_m); - } - else{ - //*alpha_m = -E2a/E3*(1-E1*E1/E2a)/(1-E1*E2a/E3); - //*alpha_p = 0.0; - //*alpha_m = -E3/E4*(1+2*E1*E1*E1/E3-3*E1*E2a/E3)/(1-5*E2a*E2a/E4+4*E1*E1*E2a/E4); - *alpha_p = cabs((tmp_BB+csqrt((tmp_CC))))/tmp_AA; - *alpha_m = cabs((tmp_BB-csqrt((tmp_CC))))/tmp_AA; - } - return TRUE; -} - -void Lz(struct BindStruct *X,double alpha,double *Lz_Ene,double *Lz_Var,double E1,double E2,double E3,double E4){ - - double tmp_ene,tmp_var; - - tmp_ene = (E1+2*alpha*E2+alpha*alpha*E3)/(1+2*alpha*E1+alpha*alpha*E2); - *Lz_Ene = tmp_ene; - X->Phys.energy = tmp_ene; - tmp_var = (E2+2*alpha*E3+alpha*alpha*E4)/(1+2*alpha*E1+alpha*alpha*E2); - X->Phys.var = tmp_var; - *Lz_Var = fabs(tmp_var-tmp_ene*tmp_ene)/tmp_var; - -} - diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index b75c50bd1..00a43fe35 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -79,18 +79,6 @@ int expec_cisajs( X->Large.mode = M_CORR; switch(X->Def.iCalcType){ - case Lanczos: - if(X->Def.St==0){ - sprintf(sdt, cFileName1BGreen_Lanczos, X->Def.CDataFileHead); - fprintf(stdoutMPI, "%s", cLogLanczosExpecOneBodyGStart); - TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecOneBodyGStart, "a"); - }else if(X->Def.St==1){ - sprintf(sdt, cFileName1BGreen_CG, X->Def.CDataFileHead); - TimeKeeper(X, cFileNameTimeKeep, cCGExpecOneBodyGStart, "a"); - fprintf(stdoutMPI, "%s", cLogCGExpecOneBodyGStart); - } - //vec=v0; - break; case TPQCalc: step=X->Def.istep; rand_i=X->Def.irand; @@ -178,7 +166,7 @@ int expec_cisajs_HubbardGC( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ){ long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; @@ -234,7 +222,7 @@ int expec_cisajs_HubbardGC( GC_child_general_hopp(nstate, Xvec, vec, X, tmp_OneGreen); } - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } @@ -254,7 +242,7 @@ int expec_cisajs_Hubbard( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ) { long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; @@ -276,19 +264,17 @@ int expec_cisajs_Hubbard( if (X->Def.iFlgSzConserved == TRUE) { if (org_sigma1 != org_sigma2) { - dam_pr = 0.0; - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + zclear(nstate, prod[i]); continue; } } if (X->Def.iCalcModel == Kondo || X->Def.iCalcModel == KondoGC) { if ((X->Def.LocSpn[org_isite1 - 1] == 1 && X->Def.LocSpn[org_isite2 - 1] == 0) || - (X->Def.LocSpn[org_isite1 - 1] == 0 && X->Def.LocSpn[org_isite2 - 1] == 1) + (X->Def.LocSpn[org_isite1 - 1] == 0 && X->Def.LocSpn[org_isite2 - 1] == 1) ) { - dam_pr = 0.0; - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); + zclear(nstate, prod[i]); continue; } } @@ -338,7 +324,7 @@ firstprivate(i_max, is) private(num1, ibit) child_general_hopp(nstate, Xvec, vec, X, tmp_OneGreen); } } - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; @@ -357,7 +343,7 @@ int expec_cisajs_Spin( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ) { int info = 0; if (X->Def.iFlgGeneralSpin == FALSE) { @@ -382,7 +368,7 @@ int expec_cisajs_SpinHalf( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ) { long unsigned int i, j; long unsigned int isite1; @@ -429,7 +415,7 @@ firstprivate(i_max, isite1, org_sigma1, X) shared(vec) // for the canonical case dam_pr = 0.0; } - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; @@ -448,7 +434,7 @@ int expec_cisajs_SpinGeneral( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ) { long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; @@ -500,7 +486,7 @@ firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec, list_1) dam_pr = 0.0; }//org_isite1 != org_isite2 - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; @@ -519,7 +505,7 @@ int expec_cisajs_SpinGC( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ) { int info = 0; if (X->Def.iFlgGeneralSpin == FALSE) { @@ -544,7 +530,7 @@ int expec_cisajs_SpinGCHalf( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ) { long unsigned int i, j; long unsigned int isite1; @@ -603,7 +589,7 @@ firstprivate(i_max, isite1, org_sigma2, X) shared(vec) // hopping is not allowed in localized spin system dam_pr = 0.0; } - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } @@ -623,7 +609,7 @@ int expec_cisajs_SpinGCGeneral( int nstate, double complex **Xvec, double complex **vec, - FILE **_fp + double complex **prod ) { long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; @@ -679,7 +665,7 @@ firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off) shared(vec) } } } - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index d3a4f0fe9..88c61d840 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -25,7 +25,6 @@ #include "mltplyMPISpin.h" #include "mltplyMPISpinCore.h" #include "mltplyMPIHubbardCore.h" - /** * @file expec_cisajscktaltdc.c * @@ -39,31 +38,6 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) * */ - -int expec_cisajscktalt_HubbardGC(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajscktalt_Hubbard(struct BindStruct *X,double complex *vec, FILE **_fp); - -int expec_cisajscktalt_Spin(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajscktalt_SpinHalf(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajscktalt_SpinGeneral(struct BindStruct *X,double complex *vec, FILE **_fp); - -int expec_cisajscktalt_SpinGC(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajscktalt_SpinGCHalf(struct BindStruct *X,double complex *vec, FILE **_fp); -int expec_cisajscktalt_SpinGCGeneral(struct BindStruct *X,double complex *vec, FILE **_fp); - -int Rearray_Interactions( - int i, - long unsigned int *org_isite1, - long unsigned int *org_isite2, - long unsigned int *org_isite3, - long unsigned int *org_isite4, - long unsigned int *org_sigma1, - long unsigned int *org_sigma2, - long unsigned int *org_sigma3, - long unsigned int *org_sigma4, - double complex *tmp_V, - struct BindStruct *X -); /** * @brief Parent function to calculate two-body green's functions * @@ -84,10 +58,10 @@ int Rearray_Interactions( int expec_cisajscktaltdc ( struct BindStruct *X, - double complex *vec - ) -{ - + int nstate, + double complex **Xvec, + double complex **vec + ){ FILE *fp; char sdt[D_FileNameMax]; long unsigned int irght,ilft,ihfbit; @@ -105,18 +79,6 @@ int expec_cisajscktaltdc //Make File Name for output switch (X->Def.iCalcType){ - case Lanczos: - if(X->Def.St==0){ - sprintf(sdt, cFileName2BGreen_Lanczos, X->Def.CDataFileHead); - TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecTwoBodyGStart,"a"); - fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGStart); - }else if(X->Def.St==1){ - sprintf(sdt, cFileName2BGreen_CG, X->Def.CDataFileHead); - TimeKeeper(X, cFileNameTimeKeep, cCGExpecTwoBodyGStart,"a"); - fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGStart); - } - break; - case TPQCalc: step=X->Def.istep; rand_i=X->Def.irand; @@ -142,29 +104,29 @@ int expec_cisajscktaltdc switch(X->Def.iCalcModel){ case HubbardGC: - if(expec_cisajscktalt_HubbardGC(X, vec, &fp)!=0){ - return -1; - } + if (expec_cisajscktalt_HubbardGC(X, nstate, Xvec, vec, &fp) != 0) { + return -1; + } break; case KondoGC: case Hubbard: case Kondo: - if(expec_cisajscktalt_Hubbard(X, vec, &fp)!=0){ - return -1; - } + if (expec_cisajscktalt_Hubbard(X, nstate, Xvec, vec, &fp) != 0) { + return -1; + } break; case Spin: - if(expec_cisajscktalt_Spin(X, vec, &fp)!=0){ - return -1; - } + if (expec_cisajscktalt_Spin(X, nstate, Xvec, vec, &fp) != 0) { + return -1; + } break; case SpinGC: - if(expec_cisajscktalt_SpinGC(X, vec, &fp)!=0){ - return -1; - } + if (expec_cisajscktalt_SpinGC(X, nstate, Xvec, vec, &fp) != 0) { + return -1; + } break; default: @@ -204,7 +166,6 @@ int expec_cisajscktaltdc //[e] return 0; } - /// /// \brief Rearray interactions /// \param i @@ -298,7 +259,6 @@ int Rearray_Interactions( } return 0; } - /** * @brief Child function to calculate two-body green's functions for Hubbard GC model * @@ -309,114 +269,116 @@ int Rearray_Interactions( * @retval -1 abnormally finished * */ -int expec_cisajscktalt_HubbardGC(struct BindStruct *X,double complex *vec, FILE **_fp){ - long unsigned int i,j; - long unsigned int isite1,isite2,isite3,isite4; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; - long unsigned int Asum,Bsum,Adiff,Bdiff; - long unsigned int tmp_off=0; - long unsigned int tmp_off_2=0; - double complex tmp_V= 1.0+0.0*I; - ; - double complex dam_pr; - long int i_max; - - for(i=0;iDef.NCisAjtCkuAlvDC;i++){ - org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; - org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; - org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; - org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; - org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; - dam_pr=0.0; - - if(CheckPE(org_isite1-1, X)==TRUE || CheckPE(org_isite2-1, X)==TRUE || - CheckPE(org_isite3-1, X)==TRUE || CheckPE(org_isite4-1, X)==TRUE){ - isite1 = X->Def.OrgTpow[2*org_isite1-2+org_sigma1] ; - isite2 = X->Def.OrgTpow[2*org_isite2-2+org_sigma2] ; - isite3 = X->Def.OrgTpow[2*org_isite3-2+org_sigma3] ; - isite4 = X->Def.OrgTpow[2*org_isite4-2+org_sigma4] ; - if(isite1 == isite2 && isite3 == isite4){ - - dam_pr = X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1-1, org_sigma1, - org_isite3-1, org_sigma3, - 1.0, X, vec, vec); - } - else if(isite1 == isite2 && isite3 != isite4){ - - dam_pr = X_GC_child_CisAisCjtAku_Hubbard_MPI(org_isite1-1, org_sigma1, - org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, - 1.0, X, vec, vec); - - } - else if(isite1 != isite2 && isite3 == isite4){ - - dam_pr = X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, - org_isite3-1, org_sigma3, - 1.0, X, vec, vec); - - } - else if(isite1 != isite2 && isite3 != isite4){ - dam_pr = X_GC_child_CisAjtCkuAlv_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, - org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, - 1.0, X, vec, vec); - } +int expec_cisajscktalt_HubbardGC( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +) { + long unsigned int i, j; + long unsigned int isite1, isite2, isite3, isite4; + long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; + long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; + long unsigned int Asum, Bsum, Adiff, Bdiff; + long unsigned int tmp_off = 0; + long unsigned int tmp_off_2 = 0; + double complex tmp_V = 1.0 + 0.0*I; + long int i_max; + + for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; + org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; + org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; + org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; + org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; + org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; + org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; + org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; + + if (CheckPE(org_isite1 - 1, X) == TRUE || CheckPE(org_isite2 - 1, X) == TRUE || + CheckPE(org_isite3 - 1, X) == TRUE || CheckPE(org_isite4 - 1, X) == TRUE) { + isite1 = X->Def.OrgTpow[2 * org_isite1 - 2 + org_sigma1]; + isite2 = X->Def.OrgTpow[2 * org_isite2 - 2 + org_sigma2]; + isite3 = X->Def.OrgTpow[2 * org_isite3 - 2 + org_sigma3]; + isite4 = X->Def.OrgTpow[2 * org_isite4 - 2 + org_sigma4]; + if (isite1 == isite2 && isite3 == isite4) { + + X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, + 1.0, X, nstate, Xvec, vec); + } + else if (isite1 == isite2 && isite3 != isite4) { - }//InterPE - else{ - child_general_int_GetInfo - (i, X, org_isite1, org_isite2, org_isite3, org_isite4, - org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V - ); + X_GC_child_CisAisCjtAku_Hubbard_MPI( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, + 1.0, X, nstate, Xvec, vec); - i_max = X->Large.i_max; - isite1 = X->Large.is1_spin; - isite2 = X->Large.is2_spin; - Asum = X->Large.isA_spin; - Adiff = X->Large.A_spin; + } + else if (isite1 != isite2 && isite3 == isite4) { - isite3 = X->Large.is3_spin; - isite4 = X->Large.is4_spin; - Bsum = X->Large.isB_spin; - Bdiff = X->Large.B_spin; + X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + org_isite3 - 1, org_sigma3, 1.0, X, nstate, Xvec, vec); - if(isite1 == isite2 && isite3 == isite4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr += GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, vec, vec, X, &tmp_off); - } - }else if(isite1 == isite2 && isite3 != isite4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr += GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off); - } - }else if(isite1 != isite2 && isite3 == isite4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr +=GC_child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, vec, vec, X, &tmp_off); - } + } + else if (isite1 != isite2 && isite3 != isite4) { + X_GC_child_CisAjtCkuAlv_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, 1.0, X, nstate, Xvec, vec); + } - }else if(isite1 != isite2 && isite3 != isite4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr +=GC_child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off_2); - } - } + }//InterPE + else { + child_general_int_GetInfo(i, X, org_isite1, org_isite2, org_isite3, org_isite4, + org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V); + + i_max = X->Large.i_max; + isite1 = X->Large.is1_spin; + isite2 = X->Large.is2_spin; + Asum = X->Large.isA_spin; + Adiff = X->Large.A_spin; + + isite3 = X->Large.is3_spin; + isite4 = X->Large.is4_spin; + Bsum = X->Large.isB_spin; + Bdiff = X->Large.B_spin; + + if (isite1 == isite2 && isite3 == isite4) { +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec, X, &tmp_off); } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1, org_isite2-1,org_sigma2, org_isite3-1, org_sigma3, org_isite4-1,org_sigma4, creal(dam_pr), cimag(dam_pr)); - - }//Intra PE - return 0; + } + else if (isite1 == isite2 && isite3 != isite4) { +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, + tmp_V, nstate, Xvec, vec, X, &tmp_off); + } + } + else if (isite1 != isite2 && isite3 == isite4) { +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, + tmp_V, nstate, Xvec, vec, X, &tmp_off); + } + } + else if (isite1 != isite2 && isite3 != isite4) { +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, + tmp_V, nstate, Xvec, vec, X, &tmp_off_2); + } + } + } + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, creal(dam_pr), cimag(dam_pr)); + }//Intra PE + return 0; } - /** * @brief Child function to calculate two-body green's functions for Hubbard model * @@ -427,117 +389,123 @@ int expec_cisajscktalt_HubbardGC(struct BindStruct *X,double complex *vec, FILE * @retval -1 abnormally finished * */ -int expec_cisajscktalt_Hubbard(struct BindStruct *X,double complex *vec, FILE **_fp){ - long unsigned int i,j; - long unsigned int isite1,isite2,isite3,isite4; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; - long unsigned int Asum,Bsum,Adiff,Bdiff; - long unsigned int tmp_off=0; - long unsigned int tmp_off_2=0; - double complex tmp_V; - double complex dam_pr; - long int i_max; +int expec_cisajscktalt_Hubbard( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +){ + long unsigned int i, j; + long unsigned int isite1, isite2, isite3, isite4; + long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; + long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; + long unsigned int Asum, Bsum, Adiff, Bdiff; + long unsigned int tmp_off = 0; + long unsigned int tmp_off_2 = 0; + double complex tmp_V; + long int i_max; + + for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; + org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; + org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; + org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; + org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; + org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; + org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; + org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; + tmp_V = 1.0; + + if (X->Def.iFlgSzConserved == TRUE) { + if (org_sigma1 + org_sigma3 != org_sigma2 + org_sigma4) { + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, creal(dam_pr), cimag(dam_pr)); + continue; + } + } - for(i=0;iDef.NCisAjtCkuAlvDC;i++){ - org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; - org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; - org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; - org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; - org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; - tmp_V = 1.0; + if (CheckPE(org_isite1 - 1, X) == TRUE || CheckPE(org_isite2 - 1, X) == TRUE || + CheckPE(org_isite3 - 1, X) == TRUE || CheckPE(org_isite4 - 1, X) == TRUE) { + isite1 = X->Def.OrgTpow[2 * org_isite1 - 2 + org_sigma1]; + isite2 = X->Def.OrgTpow[2 * org_isite2 - 2 + org_sigma2]; + isite3 = X->Def.OrgTpow[2 * org_isite3 - 2 + org_sigma3]; + isite4 = X->Def.OrgTpow[2 * org_isite4 - 2 + org_sigma4]; + if (isite1 == isite2 && isite3 == isite4) { + X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1 - 1, org_sigma1, + org_isite3 - 1, org_sigma3, 1.0, X, nstate, Xvec, vec); + } + else if (isite1 == isite2 && isite3 != isite4) { + //printf("org_isite1=%d, org_isite2=%d, org_isite3=%d, org_isite4=%d\n", org_isite1, org_isite2, org_isite3, org_isite4); + X_child_CisAisCjtAku_Hubbard_MPI(org_isite1 - 1, org_sigma1, + org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, 1.0, X, nstate, Xvec, vec); + } + else if (isite1 != isite2 && isite3 == isite4) { + X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + org_isite3 - 1, org_sigma3, 1.0, X, nstate, Xvec, vec); - dam_pr=0.0; - if(X->Def.iFlgSzConserved ==TRUE){ - if(org_sigma1+org_sigma3 != org_sigma2+org_sigma4){ - dam_pr=SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, creal(dam_pr), cimag(dam_pr)); - continue; - } + } + else if (isite1 != isite2 && isite3 != isite4) { + X_child_CisAjtCkuAlv_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, 1.0, X, nstate, Xvec, vec); + } + }//InterPE + else { + child_general_int_GetInfo( + i, X, org_isite1, org_isite2, org_isite3, org_isite4, + org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V + ); + + i_max = X->Large.i_max; + isite1 = X->Large.is1_spin; + isite2 = X->Large.is2_spin; + Asum = X->Large.isA_spin; + Adiff = X->Large.A_spin; + + isite3 = X->Large.is3_spin; + isite4 = X->Large.is4_spin; + Bsum = X->Large.isB_spin; + Bdiff = X->Large.B_spin; + + tmp_V = 1.0; + if (isite1 == isite2 && isite3 == isite4) { +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) + for (j = 1; j <= i_max; j++) { + child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec, X, &tmp_off); } - - if(CheckPE(org_isite1-1, X)==TRUE || CheckPE(org_isite2-1, X)==TRUE || - CheckPE(org_isite3-1, X)==TRUE || CheckPE(org_isite4-1, X)==TRUE){ - isite1 = X->Def.OrgTpow[2*org_isite1-2+org_sigma1] ; - isite2 = X->Def.OrgTpow[2*org_isite2-2+org_sigma2] ; - isite3 = X->Def.OrgTpow[2*org_isite3-2+org_sigma3] ; - isite4 = X->Def.OrgTpow[2*org_isite4-2+org_sigma4] ; - if(isite1 == isite2 && isite3 == isite4){ - dam_pr = X_child_CisAisCjtAjt_Hubbard_MPI(org_isite1-1, org_sigma1, - org_isite3-1, org_sigma3, - 1.0, X, vec, vec); - } - else if(isite1 == isite2 && isite3 != isite4){ - //printf("org_isite1=%d, org_isite2=%d, org_isite3=%d, org_isite4=%d\n", org_isite1, org_isite2, org_isite3, org_isite4); - dam_pr = X_child_CisAisCjtAku_Hubbard_MPI(org_isite1-1, org_sigma1, - org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, - 1.0, X, vec, vec); - } - else if(isite1 != isite2 && isite3 == isite4){ - dam_pr = X_child_CisAjtCkuAku_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, - org_isite3-1, org_sigma3, - 1.0, X, vec, vec); - - } - else if(isite1 != isite2 && isite3 != isite4){ - dam_pr = X_child_CisAjtCkuAlv_Hubbard_MPI(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, - org_isite3-1, org_sigma3, org_isite4-1, org_sigma4, - 1.0, X, vec, vec); - } - - }//InterPE - else{ - child_general_int_GetInfo( - i, X, org_isite1, org_isite2, org_isite3, org_isite4, - org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V - ); - - i_max = X->Large.i_max; - isite1 = X->Large.is1_spin; - isite2 = X->Large.is2_spin; - Asum = X->Large.isA_spin; - Adiff = X->Large.A_spin; - - isite3 = X->Large.is3_spin; - isite4 = X->Large.is4_spin; - Bsum = X->Large.isB_spin; - Bdiff = X->Large.B_spin; - - tmp_V = 1.0; - dam_pr = 0.0; - if(isite1 == isite2 && isite3 == isite4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) - for(j=1;j<=i_max;j++){ - dam_pr += child_CisAisCisAis_element(j, isite1, isite3, tmp_V, vec, vec, X, &tmp_off); - } - }else if(isite1 == isite2 && isite3 != isite4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) - for(j=1;j<=i_max;j++){ - dam_pr += child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off); - } - }else if(isite1 != isite2 && isite3 == isite4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) - for(j=1;j<=i_max;j++){ - dam_pr +=child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, vec, vec, X, &tmp_off); - } - }else if(isite1 != isite2 && isite3 != isite4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) shared(vec,tmp_V) - for(j=1;j<=i_max;j++){ - dam_pr +=child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, vec, vec, X, &tmp_off_2); - - } - } + } + else if (isite1 == isite2 && isite3 != isite4) { +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) + for (j = 1; j <= i_max; j++) { + child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, + tmp_V, nstate, Xvec, vec, X, &tmp_off); } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n",org_isite1-1,org_sigma1, org_isite2-1,org_sigma2, org_isite3-1, org_sigma3, org_isite4-1,org_sigma4, creal(dam_pr), cimag(dam_pr)); + } + else if (isite1 != isite2 && isite3 == isite4) { +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) + for (j = 1; j <= i_max; j++) { + child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, + tmp_V, nstate, Xvec, vec, X, &tmp_off); + } + } + else if (isite1 != isite2 && isite3 != isite4) { +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) + for (j = 1; j <= i_max; j++) { + child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, + tmp_V, nstate, Xvec, vec, X, &tmp_off_2); + } + } } - - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief Parent function to calculate two-body green's functions for Spin model * @@ -548,16 +516,22 @@ int expec_cisajscktalt_Hubbard(struct BindStruct *X,double complex *vec, FILE ** * @retval -1 abnormally finished * */ -int expec_cisajscktalt_Spin(struct BindStruct *X,double complex *vec, FILE **_fp) { - int info=0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info=expec_cisajscktalt_SpinHalf(X,vec, _fp); - } else { - info=expec_cisajscktalt_SpinGeneral(X,vec, _fp); - } - return info; +int expec_cisajscktalt_Spin( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +) { + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajscktalt_SpinHalf(X, nstate, Xvec, vec, _fp); + } + else { + info = expec_cisajscktalt_SpinGeneral(X, nstate, Xvec, vec, _fp); + } + return info; } - /** * @brief Child function to calculate two-body green's functions for 1/2 Spin model * @@ -568,128 +542,126 @@ int expec_cisajscktalt_Spin(struct BindStruct *X,double complex *vec, FILE **_fp * @retval -1 abnormally finished * */ -int expec_cisajscktalt_SpinHalf(struct BindStruct *X,double complex *vec, FILE **_fp){ - long unsigned int i,j; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; - long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4; - long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4; - long unsigned int isA_up, isB_up; - long unsigned int is1_up, is2_up; - long unsigned int tmp_off=0; - int tmp_sgn, num1, num2; - double complex tmp_V; - double complex dam_pr; - long int i_max; - double complex dmv; - - i_max=X->Check.idim_max; - X->Large.mode=M_CORR; - - - for(i=0;iDef.NCisAjtCkuAlvDC;i++){ - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; - if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0){ - //error message will be added - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); - continue; - } +int expec_cisajscktalt_SpinHalf( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +){ + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; + long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; + long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; + long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; + long unsigned int isA_up, isB_up; + long unsigned int is1_up, is2_up; + long unsigned int tmp_off = 0; + int tmp_sgn, num1, num2, one = 1; + double complex tmp_V; + long int i_max; + double complex dmv; + + i_max = X->Check.idim_max; + X->Large.mode = M_CORR; + + for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; + tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; + tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; + tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; + tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; + tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; + tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; + tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; + if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { + //error message will be added + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + continue; + } - dam_pr = 0.0; - if(org_isite1 >X->Def.Nsite && org_isite3>X->Def.Nsite){ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - is1_up = X->Def.Tpow[org_isite1 - 1]; - is2_up = X->Def.Tpow[org_isite3 - 1]; - num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); - num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); -#pragma omp parallel for default(none) reduction (+:dam_pr) shared(vec) \ - firstprivate(i_max, num1, num2, tmp_V) private(j) - for (j = 1; j <= i_max; j++) { - dam_pr += tmp_V*num1*num2*vec[j]*conj(vec[j]); - } - } - else if(org_isite1==org_isite3 && org_sigma1==org_sigma4 && org_sigma2==org_sigma3){ - is1_up = X->Def.Tpow[org_isite1 - 1]; - num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); -#pragma omp parallel for default(none) reduction (+:dam_pr) shared(vec) \ - firstprivate(i_max, num1, num2, tmp_V) private(j) - for (j = 1; j <= i_max; j++) { - dam_pr += tmp_V*num1*vec[j]*conj(vec[j]); - } - } - else if(org_sigma1==org_sigma4 && org_sigma2==org_sigma3){//exchange - dam_pr += X_child_general_int_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } - else{ // other process is not allowed + if (org_isite1 > X->Def.Nsite && org_isite3 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + is1_up = X->Def.Tpow[org_isite1 - 1]; + is2_up = X->Def.Tpow[org_isite3 - 1]; + num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); + num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); + zaxpy_long(i_max*nstate, tmp_V * num1*num2, &vec[1][0], &Xvec[1][0]); + } + else if (org_isite1 == org_isite3 && org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) { + is1_up = X->Def.Tpow[org_isite1 - 1]; + num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); + zaxpy_long(i_max*nstate, tmp_V * num1, &vec[1][0], &Xvec[1][0]); + } + else if (org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) {//exchange + X_child_general_int_spin_MPIdouble( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + else { // other process is not allowed // error message will be added - } - } - else if(org_isite1 > X->Def.Nsite || org_isite3>X->Def.Nsite){ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - is1_up = X->Def.Tpow[org_isite1 - 1]; - is2_up = X->Def.Tpow[org_isite3 - 1]; - num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); - dam_pr=0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr)shared(vec) \ + } + } + else if (org_isite1 > X->Def.Nsite || org_isite3 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + is1_up = X->Def.Tpow[org_isite1 - 1]; + is2_up = X->Def.Tpow[org_isite3 - 1]; + num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); +#pragma omp parallel for default(none)shared(vec) \ firstprivate(i_max, tmp_V, is1_up, org_sigma1, X, num2) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = X_Spin_CisAis(j, X, is1_up, org_sigma1); - dam_pr += tmp_V*num1*num2*conj(vec[j])*vec[j]; - } - } - else if(org_sigma1==org_sigma4 && org_sigma2==org_sigma3){//exchange - dam_pr += X_child_general_int_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } - else{ // other process is not allowed - // error message will be added - dam_pr=0.0; - } + for (j = 1; j <= i_max; j++) { + num1 = X_Spin_CisAis(j, X, is1_up, org_sigma1); + dmv = tmp_V * num1*num2; + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } - else{ - isA_up = X->Def.Tpow[org_isite1-1]; - isB_up = X->Def.Tpow[org_isite3-1]; - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off, tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr +=child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, vec, vec, X); - } - }else if(org_isite1==org_isite3 && org_sigma1==org_sigma4 && org_sigma3==org_sigma2){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, dmv) firstprivate(i_max,X,isA_up,org_sigma1, tmp_V) shared(vec, list_1) - for(j=1;j<=i_max;j++){ - dmv = X_Spin_CisAis(j, X, isA_up, org_sigma1); - dam_pr += vec[j]*tmp_V*dmv*conj(vec[j]); - } - } - else if(org_sigma1==org_sigma4 && org_sigma2==org_sigma3){ // exchange - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, dmv) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - tmp_sgn = X_child_exchange_spin_element(j,X,isA_up,isB_up,org_sigma2,org_sigma4,&tmp_off); - dmv = vec[j]*tmp_sgn; - dam_pr += conj(vec[tmp_off])*dmv; - } - }else{ // other process is not allowed + } + else if (org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) {//exchange + X_child_general_int_spin_MPIsingle( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + else { // other process is not allowed // error message will be added - dam_pr=0.0; - } + } + } + else { + isA_up = X->Def.Tpow[org_isite1 - 1]; + isB_up = X->Def.Tpow[org_isite3 - 1]; + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off, tmp_V) + for (j = 1; j <= i_max; j++) { + child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, + tmp_V, nstate, Xvec, vec, X); } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,creal(dam_pr),cimag(dam_pr)); - + } + else if (org_isite1 == org_isite3 && org_sigma1 == org_sigma4 && org_sigma3 == org_sigma2) { +#pragma omp parallel for default(none) private(j, dmv) \ +firstprivate(i_max,X,isA_up,org_sigma1, tmp_V) shared(vec, list_1) + for (j = 1; j <= i_max; j++) { + dmv = tmp_V * X_Spin_CisAis(j, X, isA_up, org_sigma1); + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); + } + } + else if (org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) { // exchange +#pragma omp parallel for default(none) private(j, tmp_sgn, dmv) shared(vec) \ +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) + for (j = 1; j <= i_max; j++) { + tmp_sgn = X_child_exchange_spin_element(j, X, isA_up, isB_up, org_sigma2, org_sigma4, &tmp_off); + dmv = tmp_sgn; + zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[tmp_off][0], &one); + } + } + else { // other process is not allowed + // error message will be added + } } - - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief Child function to calculate two-body green's functions for General Spin model * @@ -700,113 +672,122 @@ int expec_cisajscktalt_SpinHalf(struct BindStruct *X,double complex *vec, FILE * * @retval -1 abnormally finished * */ -int expec_cisajscktalt_SpinGeneral(struct BindStruct *X,double complex *vec, FILE **_fp){ - long unsigned int i,j; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; - long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4; - long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4; - long unsigned int tmp_off=0; - long unsigned int tmp_off_2=0; - long unsigned int list1_off=0; - int num1; - double complex tmp_V; - double complex dam_pr; - long int i_max; - int tmp_Sz; - long unsigned int tmp_org=0; - vec[0]=0; - i_max=X->Check.idim_max; - X->Large.mode=M_CORR; - - for(i=0;iDef.NCisAjtCkuAlvDC;i++){ - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; +int expec_cisajscktalt_SpinGeneral( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +){ + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; + long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; + long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; + long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; + long unsigned int tmp_off = 0; + long unsigned int tmp_off_2 = 0; + long unsigned int list1_off = 0; + int num1, one = 1; + double complex tmp_V; + long int i_max; + int tmp_Sz; + long unsigned int tmp_org = 0; + i_max = X->Check.idim_max; + X->Large.mode = M_CORR; + + for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; + tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; + tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; + tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; + tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; + tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; + tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; + tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; + + if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + continue; + } + tmp_Sz = 0; - if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0){ - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); - continue; - } - tmp_Sz=0; + for (j = 0; j < 2; j++) { + tmp_org = X->Def.CisAjtCkuAlvDC[i][4 * j + 1] * X->Def.Tpow[X->Def.CisAjtCkuAlvDC[i][4 * j]]; + tmp_Sz += GetLocal2Sz(X->Def.CisAjtCkuAlvDC[i][4 * j] + 1, tmp_org, X->Def.SiteToBit, X->Def.Tpow); + tmp_org = X->Def.CisAjtCkuAlvDC[i][4 * j + 3] * X->Def.Tpow[X->Def.CisAjtCkuAlvDC[i][4 * j + 2]]; + tmp_Sz -= GetLocal2Sz(X->Def.CisAjtCkuAlvDC[i][4 * j + 2] + 1, tmp_org, X->Def.SiteToBit, X->Def.Tpow); + } + if (tmp_Sz != 0) { // not Sz conserved + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + continue; + } - for(j=0;j<2; j++) { - tmp_org = X->Def.CisAjtCkuAlvDC[i][4*j+1]*X->Def.Tpow[X->Def.CisAjtCkuAlvDC[i][4 * j]]; - tmp_Sz += GetLocal2Sz(X->Def.CisAjtCkuAlvDC[i][4 * j] + 1, tmp_org, X->Def.SiteToBit, X->Def.Tpow); - tmp_org = X->Def.CisAjtCkuAlvDC[i][4*j+3]*X->Def.Tpow[X->Def.CisAjtCkuAlvDC[i][4 * j+2]]; - tmp_Sz -= GetLocal2Sz(X->Def.CisAjtCkuAlvDC[i][4 * j+2] + 1, tmp_org, X->Def.SiteToBit, X->Def.Tpow); + if (org_isite1 > X->Def.Nsite && org_isite3 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + X_child_CisAisCjuAju_GeneralSpin_MPIdouble( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, + tmp_V, X, nstate, Xvec, vec); } - if(tmp_Sz !=0){ // not Sz conserved - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); - continue; + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { + X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); } - - dam_pr = 0.0; - if(org_isite1 >X->Def.Nsite && org_isite3>X->Def.Nsite){ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr=X_child_CisAisCjuAju_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr=X_child_CisAitCjuAjv_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); - } - else{ - dam_pr=0.0; - } - } - else if(org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite){ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr=X_child_CisAisCjuAju_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr=X_child_CisAitCjuAjv_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); - } - else{ - dam_pr=0.0; + else { + } + } + else if (org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + dam_pr=X_child_CisAisCjuAju_GeneralSpin_MPIsingle( + org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { + X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + else { + } + } + else { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal +#pragma omp parallel for default(none) private(j, num1) shared(vec,list_1) \ +firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + num1 = BitCheckGeneral(list_1[j], org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + zaxpy_(&nstate, &tmp_V, &vec[j][0], &one, &Xvec[j][0], &one); } + } } - else{ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) shared(vec,list_1) - for(j=1;j<=i_max;j++){ - num1=BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - num1=BitCheckGeneral(list_1[j], org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - dam_pr += tmp_V*conj(vec[j])*vec[j]; - } - } - } - } - else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, list1_off, myrank, tmp_V) shared(vec, list_1) - for(j=1;j<=i_max;j++){ - num1 = GetOffCompGeneralSpin(list_1[j], org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE) { - num1 = GetOffCompGeneralSpin(tmp_off, org_isite1, org_sigma2, org_sigma1, &tmp_off_2, - X->Def.SiteToBit, X->Def.Tpow); - if (num1 != FALSE) { - ConvertToList1GeneralSpin(tmp_off_2, X->Check.sdim, &list1_off); - dam_pr += tmp_V * conj(vec[list1_off]) * vec[j]; - } - } - } - //printf("DEBUG: rank=%d, dam_pr=%lf\n", myrank, creal(dam_pr)); - } - else{ - dam_pr=0.0; + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { +#pragma omp parallel for default(none) private(j, num1) \ +firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, list1_off, myrank, tmp_V) shared(vec, list_1) + for (j = 1; j <= i_max; j++) { + num1 = GetOffCompGeneralSpin(list_1[j], org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + num1 = GetOffCompGeneralSpin(tmp_off, org_isite1, org_sigma2, org_sigma1, &tmp_off_2, + X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + ConvertToList1GeneralSpin(tmp_off_2, X->Check.sdim, &list1_off); + zaxpy_(&nstate, &tmp_V, &vec[j][0], &one, &Xvec[list1_off][0], &one); } + } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4, creal(dam_pr),cimag(dam_pr)); + //printf("DEBUG: rank=%d, dam_pr=%lf\n", myrank, creal(dam_pr)); + } + else { + } } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief Parent function to calculate two-body green's functions for Spin GC model * @@ -817,16 +798,22 @@ int expec_cisajscktalt_SpinGeneral(struct BindStruct *X,double complex *vec, FIL * @retval -1 abnormally finished * */ -int expec_cisajscktalt_SpinGC(struct BindStruct *X,double complex *vec, FILE **_fp){ - int info=0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info=expec_cisajscktalt_SpinGCHalf(X,vec, _fp); - } else { - info=expec_cisajscktalt_SpinGCGeneral(X,vec, _fp); - } - return info; +int expec_cisajscktalt_SpinGC( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +){ + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajscktalt_SpinGCHalf(X, nstate, Xvec, vec, _fp); + } + else { + info = expec_cisajscktalt_SpinGCGeneral(X, nstate, Xvec, vec, _fp); + } + return info; } - /** * @brief Child function to calculate two-body green's functions for 1/2 Spin GC model * @@ -837,107 +824,133 @@ int expec_cisajscktalt_SpinGC(struct BindStruct *X,double complex *vec, FILE **_ * @retval -1 abnormally finished * */ -int expec_cisajscktalt_SpinGCHalf(struct BindStruct *X,double complex *vec, FILE **_fp){ - long unsigned int i,j; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; - long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4; - long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4; - long unsigned int isA_up, isB_up; - long unsigned int tmp_off=0; - double complex tmp_V; - double complex dam_pr; - long int i_max; - i_max=X->Check.idim_max; - - for(i=0;iDef.NCisAjtCkuAlvDC;i++){ - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; +int expec_cisajscktalt_SpinGCHalf( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +){ + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; + long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; + long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; + long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; + long unsigned int isA_up, isB_up; + long unsigned int tmp_off = 0; + double complex tmp_V; + long int i_max; + i_max = X->Check.idim_max; + + for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { + zclear(i_max*nstate, &Xvec[1][0]); + tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; + tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; + tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; + tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; + tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; + tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; + tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; + tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; + + if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { + //error message will be added + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + continue; + } - if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0){ - //error message will be added - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); - continue; - } + if (org_isite1 > X->Def.Nsite && org_isite3 > X->Def.Nsite) { //org_isite3 >= org_isite1 > Nsite - dam_pr=0.0; - if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite){ //org_isite3 >= org_isite1 > Nsite + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + X_GC_child_CisAisCjuAju_spin_MPIdouble( + org_isite1 - 1, org_sigma1, (org_isite3 - 1), org_sigma3, tmp_V, X, nstate, Xvec, vec); - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += X_GC_child_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, vec, vec); + } + else if (org_isite1 == org_isite3 && org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) { //diagonal (for spin: cuadcdau=cuau) + X_GC_child_CisAis_spin_MPIdouble( + org_isite1 - 1, org_sigma1, tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAisCjuAjv_spin_MPIdouble( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { + X_GC_child_CisAitCjuAju_spin_MPIdouble( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAitCiuAiv_spin_MPIdouble( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + } + else if (org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { //org_isite3 > Nsite >= org_isite1 + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + X_GC_child_CisAisCjuAju_spin_MPIsingle( + org_isite1 - 1, org_sigma1, (org_isite3 - 1), org_sigma3, tmp_V, X, nstate, Xvec, vec); - } - else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3){ //diagonal (for spin: cuadcdau=cuau) - dam_pr += X_GC_child_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, vec, vec); - } - else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += X_GC_child_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += X_GC_child_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += X_GC_child_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } + } + else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAisCjuAjv_spin_MPIsingle( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { + X_GC_child_CisAitCjuAju_spin_MPIsingle( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAitCiuAiv_spin_MPIsingle( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + } + else { + if (org_isite1 == org_isite2 && org_isite3 == org_isite4) { + isA_up = X->Def.Tpow[org_isite2 - 1]; + isB_up = X->Def.Tpow[org_isite4 - 1]; + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, + tmp_V, nstate, Xvec, vec, X); + } } - else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite){ //org_isite3 > Nsite >= org_isite1 - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += X_GC_child_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, vec, vec); - - } - else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += X_GC_child_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += X_GC_child_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += X_GC_child_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } + else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, + tmp_V, nstate, Xvec, vec, X, &tmp_off); + } } - else{ - if(org_isite1==org_isite2 && org_isite3==org_isite4){ - isA_up = X->Def.Tpow[org_isite2-1]; - isB_up = X->Def.Tpow[org_isite4-1]; - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr +=GC_child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, vec, vec, X); - } - }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr += GC_child_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, vec, vec, X, &tmp_off); - } - }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr += GC_child_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, vec, vec, X, &tmp_off); - } - }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - dam_pr += GC_child_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, vec, vec, X, &tmp_off); - } - } - } + else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, + tmp_V, nstate, Xvec, vec, X, &tmp_off); + } + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { +#pragma omp parallel for default(none) private(j) shared(vec) \ +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) + for (j = 1; j <= i_max; j++) { + GC_child_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, + tmp_V, nstate, Xvec, vec, X, &tmp_off); + } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,creal(dam_pr),cimag(dam_pr)); + } } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); + } + return 0; } - /** * @brief Child function to calculate two-body green's functions for General Spin GC model * @@ -948,117 +961,145 @@ int expec_cisajscktalt_SpinGCHalf(struct BindStruct *X,double complex *vec, FILE * @retval -1 abnormally finished * */ -int expec_cisajscktalt_SpinGCGeneral(struct BindStruct *X,double complex *vec, FILE **_fp){ - long unsigned int i,j; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; - long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4; - long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4; - long unsigned int tmp_off=0; - long unsigned int tmp_off_2=0; - int num1; - double complex tmp_V; - double complex dam_pr; - long int i_max; - i_max=X->Check.idim_max; - X->Large.mode=M_CORR; - +int expec_cisajscktalt_SpinGCGeneral( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +){ + long unsigned int i, j; + long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; + long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; + long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; + long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; + long unsigned int tmp_off = 0; + long unsigned int tmp_off_2 = 0; + int num1, one = 1; + double complex tmp_V; + long int i_max; + i_max = X->Check.idim_max; + X->Large.mode = M_CORR; for(i=0;iDef.NCisAjtCkuAlvDC;i++){ - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0]+1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2]+1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4]+1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6]+1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; - dam_pr = 0.0; + zclear(i_max*nstate, &Xvec[1][0]); + tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; + tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; + tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; + tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; + tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; + tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; + tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; + tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; + + if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { + //error message will be added + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + continue; + } - if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X)!=0){ - //error message will be added - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); - continue; + if (org_isite1 > X->Def.Nsite && org_isite3 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, tmp_V, X, nstate, Xvec, vec); } - - if(org_isite1 > X->Def.Nsite && org_isite3 > X->Def.Nsite){ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr=X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); - } - else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr=X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr=X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr=X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); + else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { + X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + } + else if (org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal + X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { + X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, + tmp_V, X, nstate, Xvec, vec); + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { + X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( + org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, org_sigma4, + tmp_V, X, nstate, Xvec, vec); + } + } + else { + if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal +#pragma omp parallel for default(none) private(j, num1) shared(vec) \ +firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + num1 = BitCheckGeneral(j - 1, org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + zaxpy_(&nstate, &tmp_V, &vec[j][0], &one, &Xvec[j][0], &one); } + } } - else if(org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite){ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr=X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); - } - else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr=X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, vec, vec); - } - else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr=X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, vec, vec); + } + else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { +#pragma omp parallel for default(none) private(j, num1) shared(vec) \ +firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma3,org_sigma4, tmp_off, tmp_V) + for (j = 1; j <= i_max; j++) { + num1 = GetOffCompGeneralSpin(j - 1, org_isite3, org_sigma4, org_sigma3, + &tmp_off, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + num1 = BitCheckGeneral(tmp_off, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + zaxpy_(&nstate, &tmp_V, &vec[j][0], &one, &Xvec[tmp_off + 1][0], &one); } - else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr=X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4,tmp_V, X, vec, vec); + } + } + } + else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { +#pragma omp parallel for default(none) private(j, num1) shared(vec) \ +firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma2, org_sigma3, tmp_off, tmp_V) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + num1 = GetOffCompGeneralSpin(j - 1, org_isite1, org_sigma2, org_sigma1, + &tmp_off, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + zaxpy_(&nstate, &tmp_V, &vec[j][0], &one, &Xvec[tmp_off + 1][0], &one); } + } } - else{ - if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - num1=BitCheckGeneral(j-1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - num1=BitCheckGeneral(j-1, org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - dam_pr += tmp_V*conj(vec[j])*vec[j]; - } - } - } - }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma3,org_sigma4, tmp_off, tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - num1 = GetOffCompGeneralSpin(j-1, org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - num1=BitCheckGeneral(tmp_off, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - dam_pr += tmp_V*conj(vec[tmp_off+1])*vec[j]; - } - } - } - }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma2, org_sigma3, tmp_off, tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - num1 = BitCheckGeneral(j-1, org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - num1 = GetOffCompGeneralSpin(j-1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - dam_pr += tmp_V*conj(vec[tmp_off+1])*vec[j]; - } - } - } - }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, tmp_V) shared(vec) - for(j=1;j<=i_max;j++){ - num1 = GetOffCompGeneralSpin(j-1, org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - num1 = GetOffCompGeneralSpin(tmp_off, org_isite1, org_sigma2, org_sigma1, &tmp_off_2, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != FALSE){ - dam_pr += tmp_V*conj(vec[tmp_off_2+1])*vec[j]; - } - } - - } + } + else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { +#pragma omp parallel for default(none) private(j, num1) \ +firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, tmp_V) shared(vec) + for (j = 1; j <= i_max; j++) { + num1 = GetOffCompGeneralSpin(j - 1, org_isite3, org_sigma4, org_sigma3, + &tmp_off, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + num1 = GetOffCompGeneralSpin(tmp_off, org_isite1, org_sigma2, org_sigma1, + &tmp_off_2, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != FALSE) { + zaxpy_(&nstate, &tmp_V, &vec[j][0], &one, &Xvec[tmp_off_2 + 1][0], &one); } + } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4, creal(dam_pr),cimag(dam_pr)); + } } - return 0; + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); + } + return 0; } diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index 24cae5565..d8f719ac9 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -37,12 +37,9 @@ int expec_energy_flct(struct BindStruct *X){ long unsigned int irght,ilft,ihfbit; double complex dam_pr,dam_pr1; long unsigned int i_max; + int istate; switch(X->Def.iCalcType){ - case Lanczos: - fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); - TimeKeeper(X, cFileNameTimeKeep, cExpecStart, "a"); - break; case TPQCalc: case TimeEvolution: #ifdef _DEBUG @@ -67,13 +64,10 @@ int expec_energy_flct(struct BindStruct *X){ X->Large.ilft = ilft; X->Large.ihfbit = ihfbit; X->Large.mode = M_ENERGY; - X->Phys.energy=0.0; + for (istate = 0; istate < X->Def.k_exct; istate++) X->Phys.energy[istate]=0.0; int nCalcFlct; - if(X->Def.iCalcType == Lanczos){ - nCalcFlct=4301; - } - else if (X->Def.iCalcType == TPQCalc){ + if (X->Def.iCalcType == TPQCalc) { nCalcFlct=3201; } else{//For FullDiag @@ -109,12 +103,14 @@ int expec_energy_flct(struct BindStruct *X){ expec_energy_flct_GeneralSpin(X); } */ - X->Phys.doublon = 0.0; - X->Phys.doublon2 = 0.0; - X->Phys.num = X->Def.NsiteMPI; - X->Phys.num2 = X->Def.NsiteMPI*X->Def.NsiteMPI; - X->Phys.Sz = 0.5 * (double)X->Def.Total2SzMPI; - X->Phys.Sz2 = X->Phys.Sz * X->Phys.Sz; + for (istate = 0; istate < X->Def.k_exct; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = X->Def.NsiteMPI; + X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; + X->Phys.Sz[istate] = 0.5 * (double)X->Def.Total2SzMPI; + X->Phys.Sz2[istate] = X->Phys.Sz * X->Phys.Sz; + } break; default: return -1; @@ -123,46 +119,40 @@ int expec_energy_flct(struct BindStruct *X){ StopTimer(nCalcFlct); #pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) - for(i = 1; i <= i_max; i++){ - v1[i]=v0[i]; - v0[i]=0.0; + for (i = 1; i <= i_max; i++) { + for (istate = 0; istate < X->Def.k_exct; istate++){ + v1[i][istate] = v0[i][istate]; + v0[i][istate] = 0.0; + } } int nCalcExpec; - if(X->Def.iCalcType == Lanczos){ - nCalcExpec=4302; - } - else if (X->Def.iCalcType == TPQCalc){ + if (X->Def.iCalcType == TPQCalc){ nCalcExpec=3202; } else{//For FullDiag nCalcExpec=5302; } StartTimer(nCalcExpec); - mltply(X, 1, v0, v1); // v0+=H*v1 + mltply(X, 1, X->Def.k_exct, v0, v1); // v0+=H*v1 StopTimer(nCalcExpec); /* switch -> SpinGCBoost */ - dam_pr=0.0; - dam_pr1=0.0; + for (istate = 0; istate < X->Def.k_exct; istate++) { + X->Phys.energy[istate] = 0.0; + X->Phys.var[istate] = 0.0; + } #pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) - for(j=1;j<=i_max;j++){ - dam_pr += conj(v1[j])*v0[j]; // E = = - dam_pr1 += conj(v0[j])*v0[j]; // E^2 = = - //v0[j]=v1[j]; v1-> orginal v0=H*v1 + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < X->Def.k_exct; istate++) { + X->Phys.energy += conj(v1[j][istate])*v0[j][istate]; // E = = + X->Phys.var += conj(v0[j][istate])*v0[j][istate]; // E^2 = = + } } - dam_pr = SumMPI_dc(dam_pr); - dam_pr1 = SumMPI_dc(dam_pr1); - //fprintf(stdoutMPI, "Debug: ene=%lf, var=%lf\n", creal(dam_pr), creal(dam_pr1)); - - X->Phys.energy = dam_pr; - X->Phys.var = dam_pr1; + SumMPI_cv(X->Def.k_exct, X->Phys.energy); + SumMPI_cv(X->Def.k_exct, X->Phys.var); switch(X->Def.iCalcType) { - case Lanczos: - fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); - TimeKeeper(X, cFileNameTimeKeep, cExpecEnd, "a"); - break; case TPQCalc: case TimeEvolution: #ifdef _DEBUG @@ -173,130 +163,128 @@ int expec_energy_flct(struct BindStruct *X){ default: break; } - return 0; } - /// /// \brief Calculate expected values of energies and physical quantities for Hubbard GC model /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. /// \retval 0 normally finished. /// \retval -1 abnormally finished. int expec_energy_flct_HubbardGC(struct BindStruct *X) { - long unsigned int j; - long unsigned int isite1; - long unsigned int is1_up_a, is1_up_b; - long unsigned int is1_down_a, is1_down_b; - int bit_up, bit_down, bit_D; - long unsigned int ibit_up, ibit_down, ibit_D; - double D, tmp_D, tmp_D2; - double N, tmp_N, tmp_N2; - double Sz, tmp_Sz, tmp_Sz2; - double tmp_v02; - long unsigned int i_max; - unsigned int l_ibit1, u_ibit1, i_32; - i_max=X->Check.idim_max; + long unsigned int j; + long unsigned int isite1; + long unsigned int is1_up_a, is1_up_b; + long unsigned int is1_down_a, is1_down_b; + int bit_up, bit_down, bit_D; + long unsigned int ibit_up, ibit_down, ibit_D; + double D, tmp_D, tmp_D2; + double N, tmp_N, tmp_N2; + double Sz, tmp_Sz, tmp_Sz2; + double tmp_v02; + long unsigned int i_max; + unsigned int l_ibit1, u_ibit1, i_32; + i_max = X->Check.idim_max; - i_32 = 0xFFFFFFFF; //2^32 - 1 + i_32 = 0xFFFFFFFF; //2^32 - 1 // tentative doublon - tmp_D = 0.0; - tmp_D2 = 0.0; - tmp_N = 0.0; - tmp_N2 = 0.0; - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; + tmp_D = 0.0; + tmp_D2 = 0.0; + tmp_N = 0.0; + tmp_N2 = 0.0; + tmp_Sz = 0.0; + tmp_Sz2 = 0.0; //[s] for bit count - is1_up_a = 0; - is1_up_b = 0; - is1_down_a = 0; - is1_down_b = 0; - for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { - if (isite1 > X->Def.Nsite) { - is1_up_a += X->Def.Tpow[2 * isite1 - 2]; - is1_down_a += X->Def.Tpow[2 * isite1 - 1]; - } else { - is1_up_b += X->Def.Tpow[2 * isite1 - 2]; - is1_down_b += X->Def.Tpow[2 * isite1 - 1]; - } + is1_up_a = 0; + is1_up_b = 0; + is1_down_a = 0; + is1_down_b = 0; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + if (isite1 > X->Def.Nsite) { + is1_up_a += X->Def.Tpow[2 * isite1 - 2]; + is1_down_a += X->Def.Tpow[2 * isite1 - 1]; } + else { + is1_up_b += X->Def.Tpow[2 * isite1 - 2]; + is1_down_b += X->Def.Tpow[2 * isite1 - 1]; + } + } //[e] #pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2) default(none) shared(v0,list_1) \ firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ private(j, tmp_v02,D,N,Sz,isite1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1,ibit_up,ibit_down,ibit_D) - for (j = 1; j <= i_max; j++) { - tmp_v02 = conj(v0[j]) * v0[j]; - bit_up = 0; - bit_down = 0; - bit_D = 0; + for (j = 1; j <= i_max; j++) { + tmp_v02 = conj(v0[j]) * v0[j]; + bit_up = 0; + bit_down = 0; + bit_D = 0; // isite1 > X->Def.Nsite - ibit_up = (unsigned long int) myrank & is1_up_a; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) myrank & is1_down_a; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); + ibit_up = (unsigned long int) myrank & is1_up_a; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + + ibit_down = (unsigned long int) myrank & is1_down_a; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); + + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); // isite1 <= X->Def.Nsite - ibit_up = (unsigned long int) (j - 1) & is1_up_b; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) (j - 1) & is1_down_b; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); - - D = bit_D; - N = bit_up + bit_down; - Sz = bit_up - bit_down; - - tmp_D += tmp_v02 * D; - tmp_D2 += tmp_v02 * D * D; - tmp_N += tmp_v02 * N; - tmp_N2 += tmp_v02 * N * N; - tmp_Sz += tmp_v02 * Sz; - tmp_Sz2 += tmp_v02 * Sz * Sz; - } - tmp_D = SumMPI_d(tmp_D); - tmp_D2 = SumMPI_d(tmp_D2); - tmp_N = SumMPI_d(tmp_N); - tmp_N2 = SumMPI_d(tmp_N2); - tmp_Sz = SumMPI_d(tmp_Sz); - tmp_Sz2 = SumMPI_d(tmp_Sz2); - - X->Phys.doublon = tmp_D; - X->Phys.doublon2 = tmp_D2; - X->Phys.num = tmp_N; - X->Phys.num2 = tmp_N2; - X->Phys.Sz = tmp_Sz*0.5; - X->Phys.Sz2 = tmp_Sz2*0.25; - X->Phys.num_up = 0.5*(tmp_N+tmp_Sz); - X->Phys.num_down = 0.5*(tmp_N-tmp_Sz); + ibit_up = (unsigned long int) (j - 1) & is1_up_b; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + + ibit_down = (unsigned long int) (j - 1) & is1_down_b; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); + + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); + + D = bit_D; + N = bit_up + bit_down; + Sz = bit_up - bit_down; + + tmp_D += tmp_v02 * D; + tmp_D2 += tmp_v02 * D * D; + tmp_N += tmp_v02 * N; + tmp_N2 += tmp_v02 * N * N; + tmp_Sz += tmp_v02 * Sz; + tmp_Sz2 += tmp_v02 * Sz * Sz; + } + tmp_D = SumMPI_d(tmp_D); + tmp_D2 = SumMPI_d(tmp_D2); + tmp_N = SumMPI_d(tmp_N); + tmp_N2 = SumMPI_d(tmp_N2); + tmp_Sz = SumMPI_d(tmp_Sz); + tmp_Sz2 = SumMPI_d(tmp_Sz2); + + X->Phys.doublon = tmp_D; + X->Phys.doublon2 = tmp_D2; + X->Phys.num = tmp_N; + X->Phys.num2 = tmp_N2; + X->Phys.Sz = tmp_Sz * 0.5; + X->Phys.Sz2 = tmp_Sz2 * 0.25; + X->Phys.num_up = 0.5*(tmp_N + tmp_Sz); + X->Phys.num_down = 0.5*(tmp_N - tmp_Sz); - return 0; + return 0; } - /// /// \brief Calculate expected values of energies and physical quantities for Hubbard model /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index 1538178a9..3accd298f 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -147,8 +147,6 @@ void totalspin_Hubbard(struct BindStruct *X,double complex *vec) { X->Phys.s2 = creal(spn); X->Phys.Sz = creal(spn_z); } - - /** * @brief function of calculating totalspin for Hubbard model in grand canonical ensemble * @@ -218,7 +216,6 @@ void totalspin_HubbardGC(struct BindStruct *X,double complex *vec) { X->Phys.s2 = creal(spn); X->Phys.Sz = creal(spn_z); } - /** * @brief function of calculating totalspin for spin model * diff --git a/src/include/CG_EigenVector.h b/src/include/CG_EigenVector.h deleted file mode 100644 index 0c4d40452..000000000 --- a/src/include/CG_EigenVector.h +++ /dev/null @@ -1,24 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ - -#ifndef HPHI_CG_EIGENVECTOR_H -#define HPHI_CG_EIGENVECTOR_H - -#include "Common.h" - -int CG_EigenVector(struct BindStruct *X); - -#endif /* HPHI_CG_EIGENVECTOR_H */ diff --git a/src/include/CalcByLanczos.h b/src/include/CalcByLanczos.h deleted file mode 100644 index b88e11cb3..000000000 --- a/src/include/CalcByLanczos.h +++ /dev/null @@ -1,21 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#pragma once -#include "Common.h" - -int CalcByLanczos( - struct EDMainCalStruct *X -); diff --git a/src/include/CalcSpectrumByLanczos.h b/src/include/CalcSpectrumByLanczos.h deleted file mode 100644 index 2a7724e2b..000000000 --- a/src/include/CalcSpectrumByLanczos.h +++ /dev/null @@ -1,35 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 Takahiro Misawa, Kazuyoshi Yoshimi, Mitsuaki Kawamura, Youhei Yamaji, Synge Todo, Naoki Kawashima */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#pragma once -#include "Common.h" - -int CalcSpectrumByLanczos( - struct EDMainCalStruct *X, - double complex **tmp_v1, - double norm, - int Nomega, - double complex *dcSpectrum, - double complex *dcomega -); - -int GetSpectrumByTridiagonalMatrixComponents( - double *tmp_alpha, - double *tmp_beta, - double dnorm, - double complex _zomega, - double complex *_zSpetcrum, - unsigned long int liLanczosStp -); diff --git a/src/include/CalcSpectrumByTPQ.h b/src/include/CalcSpectrumByTPQ.h deleted file mode 100644 index 3b9e4ab79..000000000 --- a/src/include/CalcSpectrumByTPQ.h +++ /dev/null @@ -1,33 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 Takahiro Misawa, Kazuyoshi Yoshimi, Mitsuaki Kawamura, Youhei Yamaji, Synge Todo, Naoki Kawashima */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#pragma once -#include "Common.h" - -int CalcSpectrumByTPQ( - struct EDMainCalStruct *X, - double complex **tmp_v1, - double norm, - int Nomega, - double complex *dcSpectrum, - double complex *dcomega -); - -int ReadTPQData( - struct EDMainCalStruct *X, - double* ene, - double* temp, - double* specificHeat -); diff --git a/src/include/Lanczos_EigenValue.h b/src/include/Lanczos_EigenValue.h deleted file mode 100644 index 0bd0605ad..000000000 --- a/src/include/Lanczos_EigenValue.h +++ /dev/null @@ -1,40 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#pragma once -int Lanczos_EigenValue(struct BindStruct *X); -int Lanczos_GetTridiagonalMatrixComponents(struct BindStruct *X, double *alpha, double *beta, double complex *_v1, unsigned long int *Lanczos_step); - -int ReadInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1, unsigned long int *liLanczosStp_vec); - -int OutputLanczosVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1, unsigned long int liLanczosStp_vec); - -void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex **tmp_v1); - -int ReadTMComponents( - struct BindStruct *X, - double *_dnorm, - unsigned long int *i_max, - const int iFlg -); - -int OutputTMComponents( - struct BindStruct *X, - double *_alpha, - double *_beta, - double _dnorm, - int liLanczosStp -); - diff --git a/src/include/Lanczos_EigenVector.h b/src/include/Lanczos_EigenVector.h deleted file mode 100644 index 922ff5355..000000000 --- a/src/include/Lanczos_EigenVector.h +++ /dev/null @@ -1,17 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#pragma once -void Lanczos_EigenVector(struct BindStruct *X); diff --git a/src/include/PowerLanczos.h b/src/include/PowerLanczos.h deleted file mode 100644 index dab04490e..000000000 --- a/src/include/PowerLanczos.h +++ /dev/null @@ -1,4 +0,0 @@ -#include "Common.h" -int PowerLanczos(struct BindStruct *X); -int solve_2ndPolinomial(struct BindStruct *X,double *alpha_p,double *alpha_m,double E1,double E2a,double E2b,double E3,double E4); -void Lz(struct BindStruct *X,double alpha,double *Lz_Ene,double *Lz_Var,double E1,double E2,double E3,double E4); diff --git a/src/include/mltplyMPIBoost.h b/src/include/mltplyMPIBoost.h deleted file mode 100644 index 5857fd4c3..000000000 --- a/src/include/mltplyMPIBoost.h +++ /dev/null @@ -1,31 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ - -//Define Mode for mltply -// complex version - -#pragma once -#include -#include "struct.h" - -void child_general_int_spin_MPIBoost -( - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1, - double complex *tmp_v2, - double complex *tmp_v3 -); diff --git a/src/include/struct.h b/src/include/struct.h index d8852b426..52f41737f 100644 --- a/src/include/struct.h +++ b/src/include/struct.h @@ -352,15 +352,15 @@ struct LargeList { */ struct PhysList { //double energy,doublon; - double energy;/**<@brief Expectation value of the total energy.*/ - double doublon;/**<@brief Expectation value of the Doublon*/ - double doublon2;/**<@brief Expectation value of the Square of doublon*/ - double num;/**<@brief Expectation value of the Number of electrons*/ - double num2;/**<@brief Expectation value of the quare of the number of electrons*/ - double Sz;/**<@brief Expectation value of the Total Sz*/ - double Sz2;/**<@brief Expectation value of the Square of total Sz*/ + double *energy;/**<@brief Expectation value of the total energy.*/ + double *doublon;/**<@brief Expectation value of the Doublon*/ + double *doublon2;/**<@brief Expectation value of the Square of doublon*/ + double *num;/**<@brief Expectation value of the Number of electrons*/ + double *num2;/**<@brief Expectation value of the quare of the number of electrons*/ + double *Sz;/**<@brief Expectation value of the Total Sz*/ + double *Sz2;/**<@brief Expectation value of the Square of total Sz*/ /*[s] For TPQ*/ - double var;/**<@brief Expectation value of the Energy variance.*/ + double *var;/**<@brief Expectation value of the Energy variance.*/ /*[e] For TPQ*/ /*[s] For Full Diagonalization*/ diff --git a/src/include/vec12.h b/src/include/vec12.h deleted file mode 100644 index 21075ca13..000000000 --- a/src/include/vec12.h +++ /dev/null @@ -1,21 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#pragma once -#include "Common.h" - -void vec12(double alpha[],double beta[],unsigned int ndim, - double E[],struct BindStruct *X); - diff --git a/src/mltplyMPIBoost.c b/src/mltplyMPIBoost.c deleted file mode 100644 index 783caa7e3..000000000 --- a/src/mltplyMPIBoost.c +++ /dev/null @@ -1,416 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ - -//Define Mode for mltply -// complex version - -#ifdef MPI -#include "mpi.h" -#endif -#include "Common.h" -#include "common/setmemory.h" -#include "wrapperMPI.h" - -void zgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, double complex *ALPHA, double complex *matJL, int *LDA, double complex *arrayz, int *LDB, double complex *BETA, double complex *arrayx, int *LDC); - -/** - * - * Exchange term in Spin model - * - * @author Mitsuaki Kawamura (The University of Tokyo) - * @author Youhei Yamaji (The University of Tokyo) - */ -void child_general_int_spin_MPIBoost( - struct BindStruct *X /**< [inout]*/, - int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, - double complex **tmp_v1 /**< [in] v0 = H v1*/, - double complex *tmp_v2 /**< [inout] bufffer*/, - double complex *tmp_v3 /**< [inout] bufffer*/ - ) -{ -#ifdef MPI - - - // int ierr; - // int INFO; - char TRANSA, TRANSB; - int M, N, K, LDA, LDB, LDC; - double complex ALPHA, BETA; - long unsigned int i_max; - long unsigned int j, k, ell, iloop; - long unsigned int i1, i2; - long unsigned int iomp; - long unsigned int ell4, ell5, ell6, m0, Ipart1; - long unsigned int mi, mj, mri, mrj, mrk, mrl; - int indj; - long unsigned int ellrl, ellrk, ellrj, ellri, elli1, elli2, ellj1, ellj2; - long unsigned int iSS1, iSS2, iSSL1, iSSL2; - double complex **vecJ; - double complex **matJ, **matJ2; - double complex *matJL; - double complex *matI; - double complex **matB; - double complex *arrayz; - double complex *arrayx; - double complex *arrayw; - long unsigned int ishift1, ishift2, ishift3, ishift4, ishift5, pivot_flag, num_J_star; - long unsigned int pow4, pow5, pow41, pow51; - //long unsigned int pow1, pow2, pow3, pow4, pow5, pow11, pow21, pow31, pow41, pow51; - - i_max = X->Check.idim_max; - -/* -//zero clear - #pragma omp parallel for default(none) private(j) \ - shared(i_max,tmp_v0) - for(j=0;jBoost.W0, X->Boost.R0, X->Boost.num_pivot, X->Boost.ishift_nspin, X->Boost.list_6spin_star, X->Boost.list_6spin_pair, 1, X->Boost.arrayJ, X->Boost.vecB); - - for(iloop=0; iloop < X->Boost.R0; iloop++){ - - - for(j=iloop*X->Boost.num_pivot; j < (iloop+1)*X->Boost.num_pivot; j++){ - - num_J_star = (long unsigned int)X->Boost.list_6spin_star[j][0]; //(0,j) - ishift1 = (long unsigned int)X->Boost.list_6spin_star[j][1]; //(1,j) - ishift2 = (long unsigned int)X->Boost.list_6spin_star[j][2]; //(2,j) - ishift3 = (long unsigned int)X->Boost.list_6spin_star[j][3]; //(3,j) - ishift4 = (long unsigned int)X->Boost.list_6spin_star[j][4]; //(4,j) - ishift5 = (long unsigned int)X->Boost.list_6spin_star[j][5]; //(5,j) - pivot_flag = (long unsigned int)X->Boost.list_6spin_star[j][6]; //(6,j) - //pow1 = (int)pow(2.0,ishift1); - //pow2 = (int)pow(2.0,ishift1+ishift2); - //pow3 = (int)pow(2.0,ishift1+ishift2+ishift3); - pow4 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4); - pow5 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5); - //pow11= (int)pow(2.0,ishift1+1); - //pow21= (int)pow(2.0,ishift1+ishift2+1); - //pow31= (int)pow(2.0,ishift1+ishift2+ishift3+1); - pow41= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+1); - pow51= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+1); - - for(k=0; k < (64*64); k++){ - matJL[k] = 0.0 + 0.0*I; - matI[k] = 0.0 + 0.0*I; - } - for(k=0; k < 64; k++){ - matI[k+64*k] = 1.0; - } - - for(ell=0; ell < num_J_star; ell++){ - mi = (long unsigned int)X->Boost.list_6spin_pair[j][0][ell]; //(1,ell,j) - mj = (long unsigned int)X->Boost.list_6spin_pair[j][1][ell]; //(2,ell,j) - mri = (long unsigned int)X->Boost.list_6spin_pair[j][2][ell]; //(3,ell,j) - mrj = (long unsigned int)X->Boost.list_6spin_pair[j][3][ell]; //(4,ell,j) - mrk = (long unsigned int)X->Boost.list_6spin_pair[j][4][ell]; //(5,ell,j) - mrl = (long unsigned int)X->Boost.list_6spin_pair[j][5][ell]; //(6,ell,j) - indj = X->Boost.list_6spin_pair[j][6][ell]; //(7,ell,j) - for(i1 = 0; i1 < 3; i1++){ - for(i2 = 0; i2 < 3; i2++){ - vecJ[i1][i2] = X->Boost.arrayJ[(indj-1)][i1][i2]; - } - } - //matJSS(1,1) = vecJ(3,3) - matJ[0][0] = vecJ[2][2]; - //matJSS(1,2)= vecJ(1,1)-vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1) - matJ[0][1] = vecJ[0][0]-vecJ[1][1]-I*vecJ[0][1]-I*vecJ[1][0]; - //matJSS(1,3)= vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2) - matJ[0][2] = vecJ[2][0]-I*vecJ[2][1]; - //matJSS(1,4)= vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3) - matJ[0][3] = vecJ[0][2]-I*vecJ[1][2]; - //matJSS(2,1)= vecJ(1,1)-vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1) - matJ[1][0] = vecJ[0][0]-vecJ[1][1]+I*vecJ[0][1]+I*vecJ[1][0]; - //matJSS(2,2)= vecJ(3,3) - matJ[1][1] = vecJ[2][2]; - //matJSS(2,3)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3) - matJ[1][2] =(-1.0)*vecJ[0][2]-I*vecJ[1][2]; - //matJSS(2,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2) - matJ[1][3] =(-1.0)*vecJ[2][0]-I*vecJ[2][1]; - //matJSS(3,1)= vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2) - matJ[2][0] = vecJ[2][0]+I*vecJ[2][1]; - //matJSS(3,2)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3) - matJ[2][1] =(-1.0)*vecJ[0][2]+I*vecJ[1][2]; - //matJSS(3,3)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3) - matJ[2][2] =(-1.0)*vecJ[2][2]; - //matJSS(3,4)= vecJ(1,1)+vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1) - matJ[2][3] = vecJ[0][0]+vecJ[1][1]+I*vecJ[0][1]-I*vecJ[1][0]; - //matJSS(4,1)= vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3) - matJ[3][0] = vecJ[0][2]+I*vecJ[1][2]; - //matJSS(4,2)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2) - matJ[3][1] =(-1.0)*vecJ[2][0]+I*vecJ[2][1]; - //matJSS(4,3)= vecJ(1,1)+vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1) - matJ[3][2] = vecJ[0][0]+vecJ[1][1]-I*vecJ[0][1]+I*vecJ[1][0]; - //matJSS(4,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3) - matJ[3][3] =(-1.0)*vecJ[2][2]; - - matJ2[3][3] = matJ[0][0]; - matJ2[3][0] = matJ[0][1]; - matJ2[3][1] = matJ[0][2]; - matJ2[3][2] = matJ[0][3]; - matJ2[0][3] = matJ[1][0]; - matJ2[0][0] = matJ[1][1]; - matJ2[0][1] = matJ[1][2]; - matJ2[0][2] = matJ[1][3]; - matJ2[1][3] = matJ[2][0]; - matJ2[1][0] = matJ[2][1]; - matJ2[1][1] = matJ[2][2]; - matJ2[1][2] = matJ[2][3]; - matJ2[2][3] = matJ[3][0]; - matJ2[2][0] = matJ[3][1]; - matJ2[2][1] = matJ[3][2]; - matJ2[2][2] = matJ[3][3]; - - for(ellri=0; ellri<2; ellri++){ - for(ellrj=0; ellrj<2; ellrj++){ - for(ellrk=0; ellrk<2; ellrk++){ - for(ellrl=0; ellrl<2; ellrl++){ - for(elli1=0; elli1<2; elli1++){ - for(ellj1=0; ellj1<2; ellj1++){ - for(elli2=0; elli2<2; elli2++){ - for(ellj2=0; ellj2<2; ellj2++){ - - iSSL1 = elli1*(int)pow(2,mi) + ellj1*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl); - iSSL2 = elli2*(int)pow(2,mi) + ellj2*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl); - iSS1 = elli1 + 2*ellj1; - iSS2 = elli2 + 2*ellj2; - matJL[iSSL1+64*iSSL2] += matJ2[iSS1][iSS2]; - } - } - } - } - } - } - } - } - - - }/* loop for ell */ - - /* external magnetic field B */ - if(pivot_flag==1){ - matB[0][0] = + X->Boost.vecB[2]; // -BM - matB[1][1] = - X->Boost.vecB[2]; // -BM - //matB[0][1] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM - //matB[1][0] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM - matB[0][1] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM - matB[1][0] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM - for(ellri=0; ellri<2; ellri++){ - for(ellrj=0; ellrj<2; ellrj++){ - for(ellrk=0; ellrk<2; ellrk++){ - for(ellrl=0; ellrl<2; ellrl++){ - for(ellj1=0; ellj1<2; ellj1++){ - for(elli1=0; elli1<2; elli1++){ - for(elli2=0; elli2<2; elli2++){ - for(ellj2=0; ellj2Boost.ishift_nspin; ellj2++){ - iSSL1 = elli1*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6)); - iSSL2 = elli2*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6)); - matJL[iSSL1+64*iSSL2] += matB[elli1][elli2]; - } - } - } - } - } - } - } - } - } - /* external magnetic field B */ - - iomp=i_max/(int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+2); - - #pragma omp parallel default(none) private(arrayx,arrayz,arrayw,ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \ - shared(matJL,matI,iomp,i_max,myrank,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,nstate,tmp_v0,tmp_v1,tmp_v3) - { - - arrayx = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1))); - arrayz = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1))); - arrayw = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1))); - -#pragma omp for - for(ell6 = 0; ell6 < iomp; ell6++){ - Ipart1=pow51*2*ell6; - for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){ - for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){ - for(m0 = 0; m0 < 16; m0++){ - arrayz[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]; - arrayz[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]; - arrayz[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]; - arrayz[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]; - tmp_v3[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]; - tmp_v3[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]; - tmp_v3[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]; - tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]; - arrayx[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]; - arrayx[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]; - arrayx[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]; - arrayx[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]; - } - } - } - - - for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){ - for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){ - for(m0 = 0; m0 < 16; m0++){ - arrayz[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)]; - arrayz[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)]; - arrayz[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)]; - arrayz[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)]; - tmp_v3[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)]; - tmp_v3[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)]; - tmp_v3[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)]; - tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)]; - arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)]; - arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)]; - arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)]; - arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)]; - } - - } - } - - TRANSA = 'N'; - TRANSB = 'N'; - M = 64; - N = (int)pow(2.0, ishift4+ishift5-1); - K = 64; - ALPHA = 1.0; - LDA = 64; - LDB = 64; - BETA = 1.0; - LDC = 64; - - zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matJL,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC); - //zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matI,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC); -/* - for(ell5=0;ell5<(64*N);ell5++){ - arrayw[ell5]=0.0; - } - for(ell5=0;ell5<64;ell5++){ - for(ell4=0;ell4<64;ell4++){ - for(m0=0;m0Boost.ishift_nspin); - #pragma omp parallel for default(none) private(ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \ - firstprivate(iomp) shared(i_max,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,X,nstate,tmp_v0,tmp_v1) - for(ell5 = 0; ell5 < iomp; ell5++ ){ - for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){ - tmp_v0[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v1[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)]; - } - } - iomp=i_max/(int)pow(2.0,X->Boost.ishift_nspin); - #pragma omp parallel for default(none) private(ell4,ell5) \ - firstprivate(iomp) shared(i_max,X,tmp_v1,tmp_v3) - for(ell5 = 0; ell5 < iomp; ell5++ ){ - for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){ - tmp_v1[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v3[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)]; - } - } - } - else{ - #pragma omp parallel for default(none) private(ell4) \ - shared(i_max,nstate,tmp_v0,tmp_v1,tmp_v3) - for(ell4 = 0; ell4 < i_max; ell4++ ){ - tmp_v0[1 + ell4] = tmp_v1[1 + ell4]; - tmp_v1[1 + ell4] = tmp_v3[1 + ell4]; - } - }/* if pivot_flag */ - - }/* loop for j */ - - /* - ierr = MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); - ierr = MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); - */ - MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); - MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); - - - iomp=(int)pow(2.0,X->Boost.W0)/nproc; - #pragma omp parallel for default(none) private(ell4,ell5,ell6) \ - firstprivate(iomp) shared(i_max,X,nproc,nstate,tmp_v0,tmp_v1,tmp_v2,tmp_v3) - //for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.W0)/nproc; ell4++ ){ - for(ell4 = 0; ell4 < iomp; ell4++ ){ - for(ell5 = 0; ell5 < nproc; ell5++ ){ - for(ell6 = 0; ell6 < (int)(i_max/(int)pow(2.0,X->Boost.W0)); ell6++ ){ - tmp_v1[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v3[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)]; - tmp_v0[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v2[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)]; - } - } - } - - - }/* loop for iloop */ - - free_cd_2d_allocate(vecJ); - free_cd_2d_allocate(matJ); - free_cd_2d_allocate(matJ2); - free_cd_2d_allocate(matB); - free_cd_1d_allocate(matJL); - free_cd_1d_allocate(matI); -#endif - -}/*void child_general_int_spin_MPIBoost*/ - diff --git a/src/mltplySpin.c b/src/mltplySpin.c index e9da2fb21..3fd61530f 100644 --- a/src/mltplySpin.c +++ b/src/mltplySpin.c @@ -21,7 +21,6 @@ - mltplySpinGC() : Main routine of spin Hamiltonian (grandcanonical) - mltplyHalfSpinGC() : 1/2 spin - mltplyGeneralSpinGC() : general spin - - mltplySpinGCBoost() : Hub routines @@ -166,7 +165,6 @@ General on-site term #include "mltplyHubbardCore.h" #include "mltplyMPISpin.h" #include "mltplyMPISpinCore.h" -#include "mltplyMPIBoost.h" /** @brief Driver function for Spin hamiltonian @return error code @@ -393,9 +391,6 @@ int mltplySpinGC( if(iret != 0) return iret; - if(X->Boost.flgBoost == 1) - iret = mltplySpinGCBoost(X, nstate, tmp_v0, tmp_v1); - return iret; }/*int mltplySpinGC*/ /** @@ -777,40 +772,6 @@ shared(tmp_v0, tmp_v1) StopTimer(500); return 0; }/*int mltplyGeneralSpinGC*/ -/** -@brief Driver function for Spin hamiltonian (Boost) -@return error code -@author Kazuyoshi Yoshimi (The University of Tokyo) -*/ -int mltplySpinGCBoost( - struct BindStruct *X,//!<[inout] - int nstate, double complex **tmp_v0,//!<[inout] Result vector - double complex **tmp_v1//!<[in] Input producted vector -) -{ - long unsigned int j; - - /* SpinGCBoost */ - double complex* tmp_v2; - double complex* tmp_v3; - /* SpinGCBoost */ - - long unsigned int i_max; - i_max = X->Check.idim_max; - - StartTimer(500); - tmp_v2 = cd_1d_allocate(i_max+1); - tmp_v3 = cd_1d_allocate(i_max+1); - - child_general_int_spin_MPIBoost(X, nstate, tmp_v0, tmp_v1, tmp_v2, tmp_v3); - - /* SpinGCBoost */ - free_cd_1d_allocate(tmp_v2); - free_cd_1d_allocate(tmp_v3); - /* SpinGCBoost */ - StopTimer(500); - return 0; -}/*int mltplySpinGCBoost*/ /******************************************************************************/ //[s] child functions diff --git a/src/vec12.c b/src/vec12.c deleted file mode 100644 index 46316e749..000000000 --- a/src/vec12.c +++ /dev/null @@ -1,86 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -/**@file -@brief Functions to Diagonalize a tri-diagonal matrix and store eigenvectors -into ::vec -*/ -#include "matrixlapack.h" -#include "Common.h" -#include "wrapperMPI.h" -#include "common/setmemory.h" -#include "xsetmem.h" -/** -@brief Diagonalize a tri-diagonal matrix and store eigenvectors -into ::vec -@author Takahiro Misawa (The University of Tokyo) -@author Youhei Yamaji (The University of Tokyo) -*/ -void vec12( - double alpha[], - double beta[], - unsigned int ndim, - double tmp_E[], - struct BindStruct *X -) { - unsigned int j,k,nvec; - - double **tmpA, **tmpvec; - double *tmpr; - - nvec = X->Def.nvec; - tmpA = d_2d_allocate(ndim, ndim); - tmpvec = d_2d_allocate(ndim, ndim); - tmpr = d_1d_allocate(ndim); - -#pragma omp parallel for default(none) firstprivate(ndim) private(j,k) shared(tmpA) - for(k=0;k<=ndim-1;k++) - for(j=0;j<=ndim-1;j++) tmpA[k][j]=0.0; -#pragma omp parallel for default(none) firstprivate(ndim, nvec) private(j,k) shared(vec) - for(k=1;k<=nvec;k++) - for(j=1;j<=ndim;j++) vec[k][j]=0.0; - -#pragma omp parallel for default(none) firstprivate(ndim, alpha, beta) private(j) shared(tmpA) - for(j=0;j<=ndim-2;j++){ - tmpA[j][j]=alpha[j+1]; - tmpA[j][j+1]=beta[j+1]; - tmpA[j+1][j]=beta[j+1]; - }/*for(j=0;j<=ndim-2;j++)*/ - tmpA[ndim-1][ndim-1]=alpha[ndim]; - - DSEVvector( ndim, tmpA, tmpr, tmpvec ); - if(X->Def.iCalcType==Lanczos && X->Def.iFlgCalcSpec == 0) - fprintf(stdoutMPI, " Lanczos EigenValue in vec12 = %.10lf \n ",tmpr[0]); - - if (nvec <= ndim) { - if (nvec < X->Def.LanczosTarget) nvec = X->Def.LanczosTarget; - -#pragma omp parallel for default(none) firstprivate(ndim, nvec) private(j,k) shared(tmpvec, vec, tmp_E, tmpr) - for(k=1;k<=nvec;k++){ - tmp_E[k]=tmpr[k-1]; - for (j = 1; j <= ndim; j++) vec[k][j] = tmpvec[k - 1][j - 1]; - }/*for(k=1;k<=nvec;k++)*/ - }/*if(nvec<=ndim)*/ - else{ -#pragma omp parallel for default(none) firstprivate(ndim, nvec) private(j,k) shared(tmpvec, vec, tmp_E, tmpr) - for(k=1;k<=ndim;k++){ - tmp_E[k]=tmpr[k-1]; - for (j = 1; j <= ndim; j++) vec[k][j] = tmpvec[k - 1][j - 1]; - }/*for(k=1;k<=ndim;k++)*/ - }/*if(nvec>ndim)*/ - free_d_2d_allocate(tmpA); - free_d_1d_allocate(tmpr); - free_d_2d_allocate(tmpvec); -}/*void vec12*/ From 2ae1014ea965eddfeac0048f1872172ca7fa34eb Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Tue, 5 Mar 2019 17:16:41 +0900 Subject: [PATCH 05/50] Backup --- src/expec_totalspin.c | 442 ++++++++++++++++++++---------------------- src/phys.c | 22 +-- 2 files changed, 226 insertions(+), 238 deletions(-) diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index 3accd298f..51440f1c2 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -48,8 +48,9 @@ */ int expec_totalspin ( - struct BindStruct *X, - double complex *vec + struct BindStruct *X, + int nstate, + double complex **vec ) { X->Large.mode = M_TOTALS; @@ -75,7 +76,6 @@ int expec_totalspin } return 0; } - /** * @brief function of calculating totalspin for Hubbard model * @@ -85,23 +85,28 @@ int expec_totalspin * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -void totalspin_Hubbard(struct BindStruct *X,double complex *vec) { +void totalspin_Hubbard( + struct BindStruct *X, + int nstate, + double complex **vec +) { long unsigned int j; long unsigned int irght, ilft, ihfbit; long unsigned int isite1, isite2; long unsigned int is1_up, is2_up, is1_down, is2_down; long unsigned int iexchg, off; - int num1_up, num2_up; + int num1_up, num2_up, istate; int num1_down, num2_down; long unsigned int ibit1_up, ibit2_up, ibit1_down, ibit2_down; - double complex spn_z, tmp_spn_z; - double complex spn; + double complex tmp_spn_z; long unsigned i_max; i_max = X->Check.idim_max; GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit); - spn = 0.0; - spn_z = 0.0; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + } for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { is1_up = X->Def.Tpow[2 * isite1 - 2]; is1_down = X->Def.Tpow[2 * isite1 - 1]; @@ -124,28 +129,31 @@ void totalspin_Hubbard(struct BindStruct *X,double complex *vec) { num1_down = ibit1_down / is1_down; tmp_spn_z = (num1_up - num1_down) * (num2_up - num2_down); - spn += conj(vec[j]) * vec[j] * tmp_spn_z / 4.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * tmp_spn_z / 4.0; if (isite1 == isite2) { - spn += conj(vec[j]) * vec[j] * (num1_up + num1_down - 2 * num1_up * num1_down) / 2.0; - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * (num1_up + num1_down - 2 * num1_up * num1_down) / 2.0; + X->Phys.Sz[istate] += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; + } } else { if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1[j] - (is1_up + is2_down); iexchg += (is2_up + is1_down); GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off); - spn += conj(vec[j]) * vec[off] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] / 2.0; } else if (ibit1_up == 0 && ibit1_down != 0 && ibit2_up != 0 && ibit2_down == 0) { iexchg = list_1[j] - (is1_down + is2_up); iexchg += (is2_down + is1_up); GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off); - spn += conj(vec[j]) * vec[off] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] / 2.0; } } } } } - X->Phys.s2 = creal(spn); - X->Phys.Sz = creal(spn_z); } /** * @brief function of calculating totalspin for Hubbard model in grand canonical ensemble @@ -156,22 +164,27 @@ void totalspin_Hubbard(struct BindStruct *X,double complex *vec) { * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -void totalspin_HubbardGC(struct BindStruct *X,double complex *vec) { +void totalspin_HubbardGC( + struct BindStruct *X, + int nstate, + double complex **vec +) { long unsigned int j; long unsigned int isite1, isite2; long unsigned int is1_up, is2_up, is1_down, is2_down; long unsigned int iexchg, off; - int num1_up, num2_up; + int num1_up, num2_up, istate; int num1_down, num2_down; long unsigned int ibit1_up, ibit2_up, ibit1_down, ibit2_down, list_1_j; - double complex spn_z, tmp_spn_z; - double complex spn; + double complex tmp_spn_z; long unsigned int i_max; i_max = X->Check.idim_max; - spn = 0.0; - spn_z = 0.0; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + } for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { for (isite2 = 1; isite2 <= X->Def.NsiteMPI; isite2++) { is1_up = X->Def.Tpow[2 * isite1 - 2]; @@ -193,28 +206,29 @@ void totalspin_HubbardGC(struct BindStruct *X,double complex *vec) { num2_down = ibit2_down / is2_down; tmp_spn_z = (num1_up - num1_down) * (num2_up - num2_down); - spn += conj(vec[j]) * vec[j] * tmp_spn_z / 4.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * tmp_spn_z / 4.0; if (isite1 == isite2) { - spn += conj(vec[j]) * vec[j] * (num1_up + num1_down - 2 * num1_up * num1_down) / 2.0; - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + spn += conj(vec[j][istate]) * vec[j][istate] * (num1_up + num1_down - 2 * num1_up * num1_down) / 2.0; + spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; } else { if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1_j - (is1_up + is2_down); iexchg += (is2_up + is1_down); off = iexchg + 1; - spn += conj(vec[j]) * vec[off] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] / 2.0; } else if (ibit1_up == 0 && ibit1_down != 0 && ibit2_up != 0 && ibit2_down == 0) { iexchg = list_1_j - (is1_down + is2_up); iexchg += (is2_down + is1_up); off = iexchg + 1; - spn += conj(vec[j]) * vec[off] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] / 2.0; } } } } } - X->Phys.s2 = creal(spn); - X->Phys.Sz = creal(spn_z); } /** * @brief function of calculating totalspin for spin model @@ -227,7 +241,7 @@ void totalspin_HubbardGC(struct BindStruct *X,double complex *vec) { * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -void totalspin_Spin(struct BindStruct *X,double complex *vec) { +void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { long unsigned int j; long unsigned int irght, ilft, ihfbit; @@ -269,13 +283,13 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { #pragma omp parallel for default(none) reduction (+:spn_zd) shared(vec) \ firstprivate(i_max, spn_z) private(j) for (j = 1; j <= i_max; j++) { - spn_zd += conj(vec[j]) * vec[j] * spn_z / 4.0; + spn_zd += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; } if (isite1 == isite2) { #pragma omp parallel for default(none) reduction (+:spn_zd) shared(vec) \ firstprivate(i_max) private(j) for (j = 1; j <= i_max; j++) { - spn_zd += conj(vec[j]) * vec[j] / 2.0; + spn_zd += conj(vec[j][istate]) * vec[j][istate] / 2.0; } } else {//off diagonal spn += X_child_general_int_spin_TotalS_MPIdouble(isite1 - 1, isite2 - 1, X, vec, vec); @@ -303,7 +317,7 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { num1_up = ibit1_up / is1_up; num1_down = 1 - num1_up; spn_z = (num1_up - num1_down) * (num2_up - num2_down); - spn_zd += conj(vec[j]) * vec[j] * spn_z / 4.0; + spn_zd += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; } if (isite1 < isite2) { spn += X_child_general_int_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, vec, vec); @@ -327,16 +341,17 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { num2_down = 1 - num2_up; spn_z = (num1_up - num1_down) * (num2_up - num2_down); - spn_zd += conj(vec[j]) * vec[j] * spn_z / 4.0; + spn_zd += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; if (isite1 == isite2) { - spn_zd += conj(vec[j]) * vec[j] / 2.0; + spn_zd += conj(vec[j][istate]) * vec[j][istate] / 2.0; } else { ibit_tmp = (num1_up) ^ (num2_up); if (ibit_tmp != 0) { iexchg = list_1[j] ^ (is_up); GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off); - spn += conj(vec[j]) * vec[off] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] / 2.0; } } }// j @@ -355,15 +370,15 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { #pragma omp parallel for reduction(+: spn, spn_z) default(none) firstprivate(i_max, isite1, X, S1) private (spn_z1)shared(vec, list_1) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j]) * vec[j] * S1 * (S1 + 1.0); - spn_z += conj(vec[j]) * vec[j] * spn_z1; + spn += conj(vec[j][istate]) * vec[j][istate] * S1 * (S1 + 1.0); + spn_z += conj(vec[j][istate]) * vec[j][istate] * spn_z1; } } else { #pragma omp parallel for reduction(+: spn) default(none) firstprivate(i_max, isite1, isite2, X, S1, S2) private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) shared(vec, list_1) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); spn_z2 = 0.5 * GetLocal2Sz(isite2, list_1[j], X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j]) * vec[j] * spn_z1 * spn_z2; + spn += conj(vec[j][istate]) * vec[j][istate] * spn_z1 * spn_z2; sigma_1 = GetBitGeneral(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); sigma_2 = GetBitGeneral(isite2, list_1[j], X->Def.SiteToBit, X->Def.Tpow); @@ -375,7 +390,9 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { X->Def.Tpow); if (ibit_tmp == TRUE) { ConvertToList1GeneralSpin(off_2, X->Check.sdim, &off); - spn += conj(vec[j]) * vec[off] * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 + 1)) * + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] + * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 + 1)) * sqrt(S1 * (S1 + 1) - spn_z1 * (spn_z1 - 1)) / 2.0; } } @@ -387,7 +404,9 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { X->Def.Tpow); if (ibit_tmp == TRUE) { ConvertToList1GeneralSpin(off_2, X->Check.sdim, &off); - spn += conj(vec[j]) * vec[off] * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 - 1.0)) * + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] + * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 - 1.0)) * sqrt(S1 * (S1 + 1) - spn_z1 * (spn_z1 + 1)) / 2.0; } } @@ -404,8 +423,6 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { X->Phys.s2 = creal(spn); X->Phys.Sz = creal(spn_z); } - - /** * @brief function of calculating totalspin for spin model in grand canonical ensemble * @@ -418,7 +435,7 @@ void totalspin_Spin(struct BindStruct *X,double complex *vec) { * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -void totalspin_SpinGC(struct BindStruct *X,double complex *vec){ +void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ long unsigned int j; long unsigned int isite1,isite2, tmp_isite1, tmp_isite2; @@ -439,203 +456,175 @@ void totalspin_SpinGC(struct BindStruct *X,double complex *vec){ spn_d=0.0; spn_z=0.0; if(X->Def.iFlgGeneralSpin==FALSE){ - for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ - if(isite1 > X->Def.Nsite){ - is1_up = X->Def.Tpow[isite1-1]; - ibit1_up = myrank&is1_up; - num1_up = ibit1_up/is1_up; - num1_down =1-num1_up; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + if (isite1 > X->Def.Nsite) { + is1_up = X->Def.Tpow[isite1 - 1]; + ibit1_up = myrank & is1_up; + num1_up = ibit1_up / is1_up; + num1_down = 1 - num1_up; #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up, num1_up, num1_down) shared(vec) - for(j=1;j<=i_max;j++){ - spn_z += conj(vec[j])*vec[j]*(num1_up-num1_down)/2.0; - } + for (j = 1; j <= i_max; j++) { + spn_z += conj(vec[j][istate])*vec[j][istate] * (num1_up - num1_down) / 2.0; + } } else{ - is1_up = X->Def.Tpow[isite1-1]; + is1_up = X->Def.Tpow[isite1 - 1]; #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up) private(list_1_j, ibit1_up, num1_up, num1_down) shared(vec) - for(j=1;j<=i_max;j++){ - list_1_j=j-1; - ibit1_up = list_1_j&is1_up; - num1_up = ibit1_up/is1_up; - num1_down = 1-num1_up; - spn_z += conj(vec[j])*vec[j]*(num1_up-num1_down)/2.0; - } - + for (j = 1; j <= i_max; j++) { + list_1_j = j - 1; + ibit1_up = list_1_j & is1_up; + num1_up = ibit1_up / is1_up; + num1_down = 1 - num1_up; + spn_z += conj(vec[j][istate])*vec[j][istate] * (num1_up - num1_down) / 2.0; + } } - for(isite2=1;isite2<=X->Def.NsiteMPI;isite2++){ + for (isite2 = 1; isite2 <= X->Def.NsiteMPI; isite2++) { - if(isite1 > X->Def.Nsite && isite2 > X->Def.Nsite){ - is1_up = X->Def.Tpow[isite1-1]; - is2_up = X->Def.Tpow[isite2-1]; - num1_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, 1); - num1_down = 1-num1_up; - num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); - num2_down = 1-num2_up; - spn_z2 = (num1_up-num1_down)*(num2_up-num2_down)/4.0; + if (isite1 > X->Def.Nsite && isite2 > X->Def.Nsite) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + num1_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, 1); + num1_down = 1 - num1_up; + num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); + num2_down = 1 - num2_up; + spn_z2 = (num1_up - num1_down)*(num2_up - num2_down) / 4.0; #pragma omp parallel for default(none) reduction (+:spn_d) shared(vec) \ firstprivate(i_max, spn_z2) private(j) - for (j = 1; j <= i_max; j++) { - spn_d += conj(vec[j])*vec[j]*spn_z2; - } - if(isite1 == isite2){ + for (j = 1; j <= i_max; j++) { + spn_d += conj(vec[j][istate])*vec[j][istate] * spn_z2; + } + if (isite1 == isite2) { #pragma omp parallel for default(none) reduction (+:spn_d) shared(vec) \ firstprivate(i_max) private(j) - for (j = 1; j <= i_max; j++) { - spn_d += conj(vec[j])*vec[j]/2.0; - } - }//isite1 = isite2 - else{//off diagonal - spn += X_GC_child_CisAitCiuAiv_spin_MPIdouble(isite1-1, 0, 1, isite2-1, 1, 0, 1.0, X, vec, vec)/2.0; - } - } - else if(isite1 > X->Def.Nsite || isite2 > X->Def.Nsite){ - if(isite1 < isite2){ - tmp_isite1=isite1; - tmp_isite2=isite2; - } - else{ - tmp_isite1 = isite2; - tmp_isite2 = isite1; - } - is1_up = X->Def.Tpow[tmp_isite1 - 1]; - is2_up = X->Def.Tpow[tmp_isite2 - 1]; - num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); - num2_down =1-num2_up; - //diagonal + for (j = 1; j <= i_max; j++) { + spn_d += conj(vec[j][istate])*vec[j][istate] / 2.0; + } + }//isite1 = isite2 + else {//off diagonal + spn += X_GC_child_CisAitCiuAiv_spin_MPIdouble(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, vec, vec) / 2.0; + } + } + else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { + if (isite1 < isite2) { + tmp_isite1 = isite1; + tmp_isite2 = isite2; + } + else { + tmp_isite1 = isite2; + tmp_isite2 = isite1; + } + is1_up = X->Def.Tpow[tmp_isite1 - 1]; + is2_up = X->Def.Tpow[tmp_isite2 - 1]; + num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); + num2_down = 1 - num2_up; + //diagonal #pragma omp parallel for reduction(+: spn_d) default(none) firstprivate(i_max, is1_up, num2_up, num2_down) private(ibit1_up, num1_up, num1_down, spn_z2, list_1_j) shared(vec) - for(j=1;j<=i_max;j++){ - list_1_j=j-1; - ibit1_up = list_1_j&is1_up; - num1_up = ibit1_up/is1_up; - num1_down = 1-num1_up; - spn_z2 = (num1_up-num1_down)*(num2_up-num2_down); - spn_d += conj(vec[j])*vec[j]*spn_z2/4.0; - } - if(isite1 < isite2){ - spn += X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite1-1, 0, 1, isite2-1, 1, 0, 1.0, X, vec, vec)/2.0; - } - else{ - spn += conj(X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite2-1, 1, 0, isite1-1, 0, 1, 1.0, X, vec, vec))/2.0; - } - } - else{ - is2_up = X->Def.Tpow[isite2-1]; - is_up = is1_up+is2_up; -#pragma omp parallel for reduction(+: spn, spn_d) default(none) firstprivate(i_max, is_up, is1_up, is2_up, isite1, isite2) private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z2, iexchg, off, ibit_tmp) shared(vec) - for(j=1;j<=i_max;j++){ - list_1_j=j-1; - ibit1_up = list_1_j&is1_up; - num1_up = ibit1_up/is1_up; - num1_down = 1-num1_up; - ibit2_up = list_1_j&is2_up; - num2_up = ibit2_up/is2_up; - num2_down = 1-num2_up; - - spn_z2 = (num1_up-num1_down)*(num2_up-num2_down); - spn_d += conj(vec[j])*vec[j]*spn_z2/4.0; - - if(isite1==isite2){ - spn_d += conj(vec[j])*vec[j]/2.0; - }else{ - ibit_tmp = (num1_up) ^ (num2_up); - if(ibit_tmp!=0){ - iexchg = list_1_j ^ (is_up); - off = iexchg+1; - spn += conj(vec[j])*vec[off]/2.0; + for (j = 1; j <= i_max; j++) { + list_1_j = j - 1; + ibit1_up = list_1_j & is1_up; + num1_up = ibit1_up / is1_up; + num1_down = 1 - num1_up; + spn_z2 = (num1_up - num1_down)*(num2_up - num2_down); + spn_d += conj(vec[j][istate])*vec[j][istate] * spn_z2 / 4.0; + } + if (isite1 < isite2) { + spn += X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, vec, vec) / 2.0; + } + else { + spn += conj(X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, vec, vec)) / 2.0; + } } + else { + is2_up = X->Def.Tpow[isite2 - 1]; + is_up = is1_up + is2_up; +#pragma omp parallel for reduction(+: spn, spn_d) default(none) firstprivate(i_max, is_up, is1_up, is2_up, isite1, isite2) private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z2, iexchg, off, ibit_tmp) shared(vec) + for (j = 1; j <= i_max; j++) { + list_1_j = j - 1; + ibit1_up = list_1_j & is1_up; + num1_up = ibit1_up / is1_up; + num1_down = 1 - num1_up; + ibit2_up = list_1_j & is2_up; + num2_up = ibit2_up / is2_up; + num2_down = 1 - num2_up; - } - }//j - }//else + spn_z2 = (num1_up - num1_down)*(num2_up - num2_down); + spn_d += conj(vec[j][istate])*vec[j][istate] * spn_z2 / 4.0; + + if (isite1 == isite2) { + spn_d += conj(vec[j][istate])*vec[j][istate] / 2.0; + } + else { + ibit_tmp = (num1_up) ^ (num2_up); + if (ibit_tmp != 0) { + iexchg = list_1_j ^ (is_up); + off = iexchg + 1; + spn += conj(vec[j][istate])*vec[off][istate] / 2.0; + } + } + }//j + }//else } } } - else{//general spin - double S1=0; - double S2=0; - spn =0.0; - spn_z=0.0; - for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ + else {//general spin + double S1 = 0; + double S2 = 0; + spn = 0.0; + spn_z = 0.0; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { S1=0.5*(X->Def.SiteToBit[isite1-1]-1); if(isite1 > X->Def.Nsite){ - spn_z1 = 0.5*GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); + spn_z1 = 0.5*GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); #pragma omp parallel for reduction(+: spn, spn_z) default(none) firstprivate(S1, spn_z1,i_max) shared(vec) - for(j=1;j<=i_max;j++){ - spn += conj(vec[j])*vec[j]*S1*(S1+1.0); - spn_z += conj(vec[j])*vec[j]*spn_z1; - } + for (j = 1; j <= i_max; j++) { + spn += conj(vec[j][istate])*vec[j][istate] * S1*(S1 + 1.0); + spn_z += conj(vec[j][istate])*vec[j][istate] * spn_z1; + } } else{ #pragma omp parallel for reduction(+: spn, spn_z) default(none) firstprivate(i_max, isite1, X, S1) private(spn_z1) shared(vec) - for(j=1;j<=i_max;j++){ - spn_z1 = 0.5*GetLocal2Sz(isite1, j-1, X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j])*vec[j]*S1*(S1+1.0); - spn_z += conj(vec[j])*vec[j]*spn_z1; - } + for (j = 1; j <= i_max; j++) { + spn_z1 = 0.5*GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); + spn += conj(vec[j][istate])*vec[j][istate] * S1*(S1 + 1.0); + spn_z += conj(vec[j][istate])*vec[j][istate] * spn_z1; + } } for(isite2=1;isite2<=X->Def.NsiteMPI;isite2++){ - if(isite1==isite2) continue; - S2=0.5*(X->Def.SiteToBit[isite2-1]-1); - - if(isite1 > X->Def.Nsite && isite2 > X->Def.Nsite){ - /* - spn_z1 = 0.5*GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); - spn_z2 = 0.5*GetLocal2Sz(isite2, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); -#pragma omp parallel for reduction(+: spn, spn_z) default(none) firstprivate(spn_z1, spn_z2, i_max) shared(vec) - for(j=1;j<=i_max;j++){ - spn += conj(vec[j])*vec[j]*spn_z1*spn_z2; - } - tmp_V= sqrt(S2*(S2+1) - spn_z2*(spn_z2+1))*sqrt(S1*(S1+1) - spn_z1*(spn_z1-1))/2.0; - spn += X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble(isite1-1, sigma_1-1, sigma_1, isite2-1, sigma_2+1, sigma_2, tmp_V, X,vec, vec); - tmp_V= sqrt(S2*(S2+1) - spn_z2*(spn_z2-1))*sqrt(S1*(S1+1) - spn_z1*(spn_z1+1))/2.0; - spn += X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble(isite1-1, sigma_1+1, sigma_1, isite2-1, sigma_2-1, sigma_2, tmp_V, X,vec, vec); - */ - } - else if(isite1 > X->Def.Nsite || isite2 > X->Def.Nsite){ - /* - if(isite1 < isite2){ - tmp_isite1=isite1; - tmp_isite2=isite2; - } - else{ - tmp_isite1 = isite2; - tmp_isite2 = isite1; - } - spn_z2 = 0.5*GetLocal2Sz(tmp_isite2, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); - sigma_2 = GetBitGeneral(tmp_isite2, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); - */ - } - else{ //inner-process -#pragma omp parallel for reduction(+: spn) default(none) firstprivate(i_max, isite1, isite2, X, S1, S2) private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) shared(vec) - for(j=1;j<=i_max;j++){ - spn_z1 = 0.5*GetLocal2Sz(isite1, j-1, X->Def.SiteToBit, X->Def.Tpow); - spn_z2 = 0.5*GetLocal2Sz(isite2, j-1, X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j])*vec[j]*spn_z1*spn_z2; - - sigma_1=GetBitGeneral(isite1, j-1, X->Def.SiteToBit, X->Def.Tpow); - sigma_2=GetBitGeneral(isite2, j-1, X->Def.SiteToBit, X->Def.Tpow); - - ibit_tmp = GetOffCompGeneralSpin(j-1, isite2, sigma_2, sigma_2+1, &off, X->Def.SiteToBit, X->Def.Tpow); - if(ibit_tmp!=0){ - ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1-1,&off_2, X->Def.SiteToBit, X->Def.Tpow); - if(ibit_tmp!=0){ - spn += conj(vec[j])*vec[off_2+1]*sqrt(S2*(S2+1) - spn_z2*(spn_z2+1))*sqrt(S1*(S1+1) - spn_z1*(spn_z1-1))/2.0; + if (isite1 == isite2) continue; + S2 = 0.5*(X->Def.SiteToBit[isite2 - 1] - 1); + if (isite1 > X->Def.Nsite && isite2 > X->Def.Nsite) { } - } - - ibit_tmp = GetOffCompGeneralSpin(j-1, isite2, sigma_2, sigma_2-1, &off, X->Def.SiteToBit, X->Def.Tpow); - if(ibit_tmp !=0){ - ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1+1, &off_2, X->Def.SiteToBit, X->Def.Tpow); - if(ibit_tmp!=0){ - spn += conj(vec[j])*vec[off_2+1]*sqrt(S2*(S2+1) - spn_z2*(spn_z2-1.0))*sqrt(S1*(S1+1)- spn_z1*(spn_z1+1))/2.0; + else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { } - } - }//j - }//inner-process + else { //inner-process +#pragma omp parallel for reduction(+: spn) default(none) firstprivate(i_max, isite1, isite2, X, S1, S2) private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) shared(vec) + for (j = 1; j <= i_max; j++) { + spn_z1 = 0.5*GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); + spn_z2 = 0.5*GetLocal2Sz(isite2, j - 1, X->Def.SiteToBit, X->Def.Tpow); + spn += conj(vec[j][istate])*vec[j][istate] * spn_z1*spn_z2; + + sigma_1 = GetBitGeneral(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); + sigma_2 = GetBitGeneral(isite2, j - 1, X->Def.SiteToBit, X->Def.Tpow); + + ibit_tmp = GetOffCompGeneralSpin(j - 1, isite2, sigma_2, sigma_2 + 1, &off, X->Def.SiteToBit, X->Def.Tpow); + if (ibit_tmp != 0) { + ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 - 1, &off_2, X->Def.SiteToBit, X->Def.Tpow); + if (ibit_tmp != 0) { + spn += conj(vec[j][istate])*vec[off_2 + 1] * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 + 1))*sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 - 1)) / 2.0; + } + } + ibit_tmp = GetOffCompGeneralSpin(j - 1, isite2, sigma_2, sigma_2 - 1, &off, X->Def.SiteToBit, X->Def.Tpow); + if (ibit_tmp != 0) { + ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 + 1, &off_2, X->Def.SiteToBit, X->Def.Tpow); + if (ibit_tmp != 0) { + spn += conj(vec[j][istate])*vec[off_2 + 1] * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 - 1.0))*sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 + 1)) / 2.0; + } + } + }//j + }//inner-process }//isite2 }//isite1 } - spn = SumMPI_dc(spn); spn_d = SumMPI_dc(spn_d); spn_z = SumMPI_dc(spn_z); @@ -644,8 +633,9 @@ void totalspin_SpinGC(struct BindStruct *X,double complex *vec){ } int expec_totalSz( - struct BindStruct *X, - double complex *vec + struct BindStruct *X, + int nstate, + double complex **vec ) { X->Large.mode = M_TOTALS; switch (X->Def.iCalcModel) { @@ -667,11 +657,8 @@ int expec_totalSz( default: X->Phys.Sz = 0.0; } - return 0; } - - /** * @brief function of calculating totalSz for Hubbard model in grand canonical ensemble * @@ -684,7 +671,7 @@ int expec_totalSz( void totalSz_HubbardGC ( struct BindStruct *X, - double complex *vec + int nstate, double complex **vec ) { long unsigned int j; @@ -709,7 +696,7 @@ void totalSz_HubbardGC num1_sz = num1_up - num1_down; #pragma omp parallel for reduction(+:spn_z) default(none) firstprivate(i_max) private(j) shared(num1_sz,vec) for (j = 1; j <= i_max; j++) { - spn_z += (num1_sz) / 2.0 * conj(vec[j]) * vec[j]; + spn_z += (num1_sz) / 2.0 * conj(vec[j][istate]) * vec[j][istate]; } #endif } else {//isite1 > X->Def.Nsite @@ -723,14 +710,13 @@ void totalSz_HubbardGC num1_up = ibit1_up / is1_up; ibit1_down = list_1_j & is1_down; num1_down = ibit1_down / is1_down; - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; } } } spn_z = SumMPI_dc(spn_z); X->Phys.Sz = creal(spn_z); } - /** * @brief function of calculating totalSz for Spin model in grand canonical ensemble * @@ -743,7 +729,8 @@ void totalSz_HubbardGC void totalSz_SpinGC ( struct BindStruct *X, - double complex *vec + int nstate, + double complex **vec ) { long unsigned int j, list_1_j; long unsigned int isite1; @@ -766,10 +753,11 @@ void totalSz_SpinGC num1_down = 1 - num1_up; #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up, num1_up, num1_down) shared(vec) for (j = 1; j <= i_max; j++) { - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; } #endif - } else { + } + else { is1_up = X->Def.Tpow[isite1 - 1]; #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up) private(list_1_j, ibit1_up, num1_up, num1_down) shared(vec) for (j = 1; j <= i_max; j++) { @@ -777,7 +765,7 @@ void totalSz_SpinGC ibit1_up = list_1_j & is1_up; num1_up = ibit1_up / is1_up; num1_down = 1 - num1_up; - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; } }//else } @@ -788,13 +776,13 @@ void totalSz_SpinGC spn_z1 = 0.5 * GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(spn_z1, i_max) shared(vec) for (j = 1; j <= i_max; j++) { - spn_z += conj(vec[j]) * vec[j] * spn_z1; + spn_z += conj(vec[j][istate]) * vec[j][istate] * spn_z1; } } else { #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, isite1, X) private(spn_z1) shared(vec) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); - spn_z += conj(vec[j]) * vec[j] * spn_z1; + spn_z += conj(vec[j][istate]) * vec[j][istate] * spn_z1; } } }//isite1 diff --git a/src/phys.c b/src/phys.c index ba5ff943f..2fb2e6432 100644 --- a/src/phys.c +++ b/src/phys.c @@ -78,18 +78,18 @@ void phys(struct BindStruct *X, //!<[inout] } } } else { - if (X->Def.iCalcType == FullDiag) { - if (myrank == 0) { - for (j = 0; j < i_max; j++) { - v0[j + 1] = L_vec[i][j]; - } - } - } else { - for (j = 0; j < i_max; j++) { - v0[j + 1] = L_vec[i][j]; - } - + if (X->Def.iCalcType == FullDiag) { + if (myrank == 0) { + for (j = 0; j < i_max; j++) { + v0[j + 1] = L_vec[i][j]; + } + } + } + else { + for (j = 0; j < i_max; j++) { + v0[j + 1] = L_vec[i][j]; } + } } #else for (j = 0; j < i_max; j++) { From 576e78fe811b2351cc602c8ddc7fe496ce6ba125 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Thu, 7 Mar 2019 00:31:24 +0900 Subject: [PATCH 06/50] Backup --- src/CalcByFullDiag.c | 4 +- src/CalcByLOBPCG.c | 64 ++- src/CalcByTPQ.c | 439 ++++++++-------- src/FirstMultiply.c | 30 +- src/Multiply.c | 101 ++-- src/PairEx.c | 66 +-- src/PairExHubbard.c | 389 +++++++------- src/PairExSpin.c | 620 +++++++++++------------ src/SingleEx.c | 3 +- src/SingleExHubbard.c | 49 +- src/expec_cisajs.c | 79 +-- src/expec_cisajscktaltdc.c | 99 ++-- src/expec_energy_flct.c | 785 ++++++++++++++++------------- src/expec_totalspin.c | 481 +++++++----------- src/include/expec_cisajs.h | 4 +- src/include/expec_cisajscktaltdc.h | 5 +- src/include/expec_energy_flct.h | 14 +- src/include/global.h | 4 +- src/include/struct.h | 22 +- src/lapack_diag.c | 2 +- src/matrixlapack.c | 2 +- src/output.c | 4 +- src/phys.c | 102 ++-- src/xsetmem.c | 195 +++---- 24 files changed, 1779 insertions(+), 1784 deletions(-) diff --git a/src/CalcByFullDiag.c b/src/CalcByFullDiag.c index d2110d8e1..668990c52 100644 --- a/src/CalcByFullDiag.c +++ b/src/CalcByFullDiag.c @@ -24,8 +24,8 @@ /// \retval FALSE(=0) abnormally finished. int CalcByFullDiag( - struct EDMainCalStruct *X - ) + struct EDMainCalStruct *X +) { int iret=0; fprintf(stdoutMPI, "%s", cLogFullDiag_SetHam_Start); diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index 0d656a55e..d4da65b86 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -28,6 +28,7 @@ localy optimal block (preconditioned) conjugate gradient method. #include "expec_energy_flct.h" #include "phys.h" #include +#include "mltplyCommon.h" #include "./common/setmemory.h" void zheevd_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int * lrwork, int *iwork, int *liwork, int *info); @@ -160,7 +161,7 @@ static void Initialize_wave( FILE *fp; char sdt[D_FileNameMax]; size_t byte_size; - + double complex *vin; int iproc, ie, ierr; long int idim, iv, i_max; unsigned long int i_max_tmp, sum_i_max; @@ -180,6 +181,7 @@ static void Initialize_wave( fprintf(stdoutMPI, "%s", cLogInputVecStart); ierr = 0; + vin = cd_1d_allocate(X->Check.idim_max + 1); for (ie = 0; ie < X->Def.k_exct; ie++) { sprintf(sdt, cFileNameInputVector, ie, myrank); @@ -198,11 +200,12 @@ static void Initialize_wave( fprintf(stderr, "Error: Invalid restart file.\n"); exitMPI(-1); } - byte_size = fread(&v0[0][0], sizeof(complex double), X->Check.idim_max + 1, fp); - for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = v0[idim][0]; + byte_size = fread(vin, sizeof(complex double), X->Check.idim_max + 1, fp); + for (idim = 1; idim <= i_max; idim++) wave[idim][ie] = vin[idim]; fclose(fp); } }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ + free_cd_1d_allocate(vin); if (ierr == 0) { //TimeKeeperWithRandAndStep(X, cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i); @@ -305,18 +308,24 @@ static void Output_restart( size_t byte_size; char sdt[D_FileNameMax]; int ie; + long unsigned int idim; + double complex *vout; //TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i); fprintf(stdoutMPI, "%s", cLogOutputVecStart); + vout = cd_1d_allocate(X->Check.idim_max); for (ie = 0; ie < X->Def.k_exct; ie++) { sprintf(sdt, cFileNameOutputVector, ie, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1); byte_size = fwrite(&X->Large.itr, sizeof(X->Large.itr), 1, fp); byte_size = fwrite(&X->Check.idim_max, sizeof(X->Check.idim_max), 1, fp); - byte_size = fwrite(wave[ie], sizeof(complex double), X->Check.idim_max + 1, fp); + for (idim = 1; idim <= X->Check.idim_max; idim++) vout[idim] = wave[idim][ie]; + byte_size = fwrite(vout, sizeof(complex double), X->Check.idim_max + 1, fp); fclose(fp); }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ + free_cd_1d_allocate(vout); + //TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i); fprintf(stdoutMPI, "%s", cLogOutputVecFinish); if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size); @@ -356,6 +365,7 @@ int LOBPCG_Main( i_max = X->Check.idim_max; + free(v0); free(v1); free(vg); wxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct); @@ -366,24 +376,23 @@ int LOBPCG_Main( @f${\bf x}=@f$initial guess */ Initialize_wave(X, &wxp[1]); - free(v0); TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); - for (idim = 1; idim <= i_max; idim++) - for (ie = 0; ie < X->Def.k_exct; ie++) - hwxp[1][idim][ie] = 0.0; + zclear(i_max*X->Def.k_exct, &hwxp[1][1][0]); mltply(X, X->Def.k_exct, hwxp[1], wxp[1]); stp = 1; TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", 0); + zclear(i_max*X->Def.k_exct, &wxp[2][1][0]); + zclear(i_max*X->Def.k_exct, &hwxp[2][1][0]); for (ie = 0; ie < X->Def.k_exct; ie++) eig[ie] = 0.0; for (idim = 1; idim <= i_max; idim++) { for (ie = 0; ie < X->Def.k_exct; ie++) { wxp[2][idim][ie] = 0.0; hwxp[2][idim][ie] = 0.0; + eig[ie] += conj(wxp[1][idim][ie]) * hwxp[1][idim][ie]; } - eig[ie] += conj(wxp[1][idim][ie]) * hwxp[1][idim][ie]; } SumMPI_dv(X->Def.k_exct, eig); @@ -474,9 +483,7 @@ int LOBPCG_Main( /**@brief
  • @f${\bf W}={\hat H}{\bf w}@f$
  • */ -#pragma omp parallel for default(none) shared(hwxp,i_max,X) private(idim,ie) - for (idim = 1; idim <= i_max; idim++) - for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[0][ie][idim] = 0.0; + zclear(i_max*X->Def.k_exct, &hwxp[0][1][0]); mltply(X, X->Def.k_exct, hwxp[0], wxp[0]); TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); @@ -514,7 +521,7 @@ int LOBPCG_Main(
  • @f${\bf x}=\alpha {\bf w}+\beta {\bf x}+\gamma {\bf p}@f$, Normalize @f${\bf x}@f$
  • */ - for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii++) { zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); @@ -525,7 +532,7 @@ int LOBPCG_Main(
  • @f${\bf X}=\alpha {\bf W}+\beta {\bf X}+\gamma {\bf P}@f$, Normalize @f${\bf X}@f$
  • */ - for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii++) { zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); @@ -536,7 +543,7 @@ int LOBPCG_Main(
  • @f${\bf p}=\alpha {\bf w}+\gamma {\bf p}@f$, Normalize @f${\bf p}@f$
  • */ - for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii += 2) { zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); @@ -547,7 +554,7 @@ int LOBPCG_Main(
  • @f${\bf P}=\alpha {\bf W}+\gamma {\bf P}@f$, Normalize @f${\bf P}@f$
  • */ - for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++)v1buf[idim][ie] = 0.0; + zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii += 2) { zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); @@ -630,6 +637,7 @@ int CalcByLOBPCG( size_t byte_size; long int i_max = 0, ie, idim; FILE *fp; + double complex *vin; fprintf(stdoutMPI, "###### Eigenvalue with LOBPCG #######\n\n"); @@ -681,6 +689,7 @@ int CalcByLOBPCG( */ fprintf(stdoutMPI, "An Eigenvector is inputted.\n"); L_vec = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); + vin = cd_1d_allocate(X->Bind.Check.idim_max + 1); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecStart, "a"); sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, ie, myrank); @@ -695,13 +704,14 @@ int CalcByLOBPCG( fprintf(stderr, "Error: Invalid Inputvector file.\n"); exitMPI(-1); } - byte_size = fread(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fread(vin, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); #pragma omp parallel for default(none) shared(L_vec, v1) firstprivate(i_max, ie), private(idim) for (idim = 0; idim < i_max; idim++) { - L_vec[ie][idim] = v1[idim + 1]; + L_vec[ie][idim] = vin[idim + 1]; } }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ fclose(fp); + free_cd_1d_allocate(vin); TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecFinish, "a"); if(byte_size == 0) printf("byte_size : %d\n", (int)byte_size); @@ -728,12 +738,12 @@ int CalcByLOBPCG( for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { //phys(&(X->Bind), ie); fprintf(fp, "State %ld\n", ie); - fprintf(fp, " Energy %.16lf \n", X->Bind.Phys.all_energy[ie]); - fprintf(fp, " Doublon %.16lf \n", X->Bind.Phys.all_doublon[ie]); - fprintf(fp, " Sz %.16lf \n", X->Bind.Phys.all_sz[ie]); - //fprintf(fp, " S^2 %.16lf \n", X->Bind.Phys.all_s2[ie]); - //fprintf(fp, " N_up %.16lf \n", X->Bind.Phys.all_num_up[ie]); - //fprintf(fp, " N_down %.16lf \n", X->Bind.Phys.all_num_down[ie]); + fprintf(fp, " Energy %.16lf \n", X->Bind.Phys.energy[ie]); + fprintf(fp, " Doublon %.16lf \n", X->Bind.Phys.doublon[ie]); + fprintf(fp, " Sz %.16lf \n", X->Bind.Phys.Sz[ie]); + //fprintf(fp, " S^2 %.16lf \n", X->Bind.Phys.s2[ie]); + //fprintf(fp, " N_up %.16lf \n", X->Bind.Phys.num_up[ie]); + //fprintf(fp, " N_down %.16lf \n", X->Bind.Phys.num_down[ie]); fprintf(fp, "\n"); } fclose(fp); @@ -743,19 +753,21 @@ int CalcByLOBPCG( if (X->Bind.Def.iOutputEigenVec == TRUE) { TimeKeeper(&(X->Bind), cFileNameTimeKeep, cOutputEigenVecStart, "a"); + vin = cd_1d_allocate(X->Bind.Check.idim_max); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { #pragma omp parallel for default(none) shared(X,v1,L_vec,ie) private(idim) for (idim = 0; idim < X->Bind.Check.idim_max; idim++) - v1[idim + 1][0] = L_vec[idim][ie]; + vin[idim + 1] = L_vec[idim][ie]; sprintf(sdt, cFileNameOutputEigen, X->Bind.Def.CDataFileHead, ie, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1); byte_size = fwrite(&X->Bind.Large.itr, sizeof(X->Bind.Large.itr), 1, fp); byte_size = fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp); - byte_size = fwrite(&v1[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fwrite(vin, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); }/*for (ie = 0; ie < X->Bind.Def.k_exct; ie++)*/ + free_cd_1d_allocate(vin); TimeKeeper(&(X->Bind), cFileNameTimeKeep, cOutputEigenVecStart, "a"); }/*if (X->Bind.Def.iOutputEigenVec == TRUE)*/ diff --git a/src/CalcByTPQ.c b/src/CalcByTPQ.c index 52b4bf101..2e48f1c62 100644 --- a/src/CalcByTPQ.c +++ b/src/CalcByTPQ.c @@ -22,8 +22,7 @@ #include "FileIO.h" #include "wrapperMPI.h" #include "CalcTime.h" - - +#include "common/setmemory.h" /** * @file CalcByTPQ.c * @version 0.1, 0.2 @@ -31,12 +30,8 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) * * @brief File for givinvg functions of TPQ method - * - * */ - /** - * * @brief A main function to calculate physical quqntities by TPQ method * * @param [in] NumAve Number of samples @@ -50,245 +45,279 @@ * @retval -1 unnormally finished */ int CalcByTPQ( - const int NumAve, - const int ExpecInterval, - struct EDMainCalStruct *X -) -{ + const int NumAve, + const int ExpecInterval, + struct EDMainCalStruct *X +) { char sdt[D_FileNameMax]; - char sdt_phys[D_FileNameMax]; - char sdt_norm[D_FileNameMax]; - char sdt_flct[D_FileNameMax]; - int rand_i, rand_max, iret; + char **sdt_phys, **sdt_norm, **sdt_flct; + int rand_i, iret; unsigned long int i_max; - int step_iO=0; + int step_iO = 0; FILE *fp; - double inv_temp, Ns; + double *inv_temp, Ns; struct TimeKeepStruct tstruct; size_t byte_size; - tstruct.tstart=time(NULL); - - rand_max = NumAve; + tstruct.tstart = time(NULL); + inv_temp = d_1d_allocate(NumAve); + step_spin = ExpecInterval; - X->Bind.Def.St=0; + X->Bind.Def.St = 0; fprintf(stdoutMPI, "%s", cLogTPQ_Start); - for (rand_i = 0; rand_iBind.Def.NsiteMPI; - fprintf(stdoutMPI, cLogTPQRand, rand_i+1, rand_max); - iret=0; - X->Bind.Def.irand=rand_i; - //Make or Read initial vector - if(X->Bind.Def.iReStart==RESTART_INOUT || X->Bind.Def.iReStart==RESTART_IN) { - StartTimer(3600); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogInputVecStart); - sprintf(sdt, cFileNameInputVector, rand_i, myrank); - childfopenALL(sdt, "rb", &fp); - if(fp==NULL){ - fprintf(stdout, "A file of Inputvector does not exist.\n"); - fprintf(stdout, "Start to calculate in normal procedure.\n"); - iret=1; - } - byte_size = fread(&step_i, sizeof(step_i), 1, fp); - byte_size = fread(&i_max, sizeof(long int), 1, fp); - if(i_max != X->Bind.Check.idim_max){ - fprintf(stderr, "Error: A file of Inputvector is incorrect.\n"); - exitMPI(-1); - } - byte_size = fread(v0, sizeof(complex double), X->Bind.Check.idim_max+1, fp); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogInputVecFinish); - fclose(fp); - StopTimer(3600); - X->Bind.Def.istep=step_i; - StartTimer(3200); - iret=expec_energy_flct(&(X->Bind)); - StopTimer(3200); - if(iret != 0) return -1; + //for rand_i =0, rand_iBind.Def.NsiteMPI; + fprintf(stdoutMPI, cLogTPQRand, 1, NumAve); + iret = 0; - step_iO=step_i-1; - if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); + //Make or Read initial vector + if (X->Bind.Def.iReStart == RESTART_INOUT || X->Bind.Def.iReStart == RESTART_IN) { + StartTimer(3600); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogInputVecStart); + sprintf(sdt, cFileNameInputVector, rand_i, myrank); + childfopenALL(sdt, "rb", &fp); + if (fp == NULL) { + fprintf(stdout, "A file of Inputvector does not exist.\n"); + fprintf(stdout, "Start to calculate in normal procedure.\n"); + iret = 1; } - - if(X->Bind.Def.iReStart==RESTART_NOT || X->Bind.Def.iReStart==RESTART_OUT || iret ==1) { - StartTimer(3600); - if (childfopenMPI(sdt_phys, "w", &fp) != 0) { - return -1; + byte_size = fread(&step_i, sizeof(step_i), 1, fp); + byte_size = fread(&i_max, sizeof(long int), 1, fp); + if (i_max != X->Bind.Check.idim_max) { + fprintf(stderr, "Error: A file of Inputvector is incorrect.\n"); + exitMPI(-1); + } + byte_size = fread(v0, sizeof(complex double), (X->Bind.Check.idim_max + 1)*NumAve, fp); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogInputVecFinish); + fclose(fp); + StopTimer(3600); + X->Bind.Def.istep = step_i; + StartTimer(3200); + iret = expec_energy_flct(&(X->Bind), NumAve, v0, v1); + StopTimer(3200); + if (iret != 0) return -1; + + step_iO = step_i - 1; + if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); + }/*if (X->Bind.Def.iReStart == RESTART_INOUT || X->Bind.Def.iReStart == RESTART_IN)*/ + + if (X->Bind.Def.iReStart == RESTART_NOT || X->Bind.Def.iReStart == RESTART_OUT || iret == 1) { + StartTimer(3600); + for (rand_i = 0; rand_i < NumAve; rand_i++) { + if (childfopenMPI(sdt_phys[rand_i], "w", &fp) == 0) { + fprintf(fp, "%s", cLogSSRand); + fclose(fp); } - fprintf(fp, "%s", cLogSSRand); - fclose(fp); -// for norm - if (childfopenMPI(sdt_norm, "w", &fp) != 0) { - return -1; + else return -1; + // for norm + if (childfopenMPI(sdt_norm[rand_i], "w", &fp) == 0) { + fprintf(fp, "%s", cLogNormRand); + fclose(fp); } - fprintf(fp, "%s", cLogNormRand); - fclose(fp); -// for fluctuations - if (childfopenMPI(sdt_flct, "w", &fp) != 0) { - return -1; + else return -1; + // for fluctuations + if (childfopenMPI(sdt_flct[rand_i], "w", &fp) == 0) { + fprintf(fp, "%s", cLogFlctRand); + fclose(fp); } - fprintf(fp, "%s", cLogFlctRand); - fclose(fp); - - StopTimer(3600); + else return -1; + } + StopTimer(3600); - step_i = 0; + step_i = 0; - StartTimer(3100); - if(rand_i==0){ - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "w", rand_i, step_i); - } - else{ - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "a", rand_i, step_i); - } - /**@brief - Initialize v1 and compute v0 = H*v1 - */ + StartTimer(3100); + if (rand_i == 0) { + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "w", 0, step_i); + } + else { + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "a", 0, step_i); + } + /**@brief + Initialize v1 and compute v0 = H*v1 + */ + for (rand_i = 0; rand_i < NumAve; rand_i++) { FirstMultiply(rand_i, &(X->Bind)); - inv_temp = 0.0; - StopTimer(3100); - if (childfopenMPI(sdt_phys, "a", &fp) != 0) { - return -1; + inv_temp[rand_i] = 0.0; + } + StopTimer(3100); + for (rand_i = 0; rand_i < NumAve; rand_i++) { + if (childfopenMPI(sdt_phys[rand_i], "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", + inv_temp[rand_i], X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.num[rand_i], step_i); + fclose(fp); } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.energy, X->Bind.Phys.var, - X->Bind.Phys.doublon, X->Bind.Phys.num, step_i); - fclose(fp); + else return -1; // for norm - if (childfopenMPI(sdt_norm, "a", &fp) != 0) { - return -1; + if (childfopenMPI(sdt_norm[rand_i], "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %d\n", + inv_temp[rand_i], global_1st_norm[rand_i], global_1st_norm[rand_i], step_i); + fclose(fp); } - fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp, global_1st_norm, global_1st_norm, step_i); - fclose(fp); - /**@brief - Compute expectation value at infinite temperature - */ - X->Bind.Def.istep = 0; - StartTimer(3300); - iret=expec_cisajs(&(X->Bind), v1); - StopTimer(3300); - if(iret !=0) return -1; + else return -1; + } + + /**@brief + Compute expectation value at infinite temperature + */ + X->Bind.Def.istep = 0; + StartTimer(3300); + iret = expec_cisajs(&(X->Bind), NumAve, v0, v1); + StopTimer(3300); + if (iret != 0) return -1; - StartTimer(3400); - iret=expec_cisajscktaltdc(&(X->Bind), v1); - StopTimer(3400); - if(iret !=0) return -1; + StartTimer(3400); + iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v0, v1); + StopTimer(3400); + if (iret != 0) return -1; - /**@brief - Compute v1=0, and compute v0 = H*v1 - */ - StartTimer(3200); - iret=expec_energy_flct(&(X->Bind)); //v0 = H*v1 - StopTimer(3200); - if(iret !=0) return -1; - step_i += 1; - inv_temp = (2.0 / Ns) / (LargeValue - X->Bind.Phys.energy / Ns); - StartTimer(3600); - if (childfopenMPI(sdt_phys, "a", &fp) != 0) { - return -1; - } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.energy, X->Bind.Phys.var, - X->Bind.Phys.doublon, X->Bind.Phys.num, step_i); - fclose(fp); -// for norm - if (childfopenMPI(sdt_norm, "a", &fp) != 0) { - return -1; + /**@brief + Compute v1=0, and compute v0 = H*v1 + */ + StartTimer(3200); + iret = expec_energy_flct(&(X->Bind), NumAve, v0, v1); //v0 = H*v1 + StopTimer(3200); + if (iret != 0) return -1; + step_i += 1; + StartTimer(3600); + for (rand_i = 0; rand_i < NumAve; rand_i++) { + inv_temp[rand_i] = (2.0 / Ns) / (LargeValue - X->Bind.Phys.energy[rand_i] / Ns); + if (childfopenMPI(sdt_phys, "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", + inv_temp[rand_i], X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.num[rand_i], step_i); + fclose(fp); } - fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp, global_norm, global_1st_norm, step_i); - fclose(fp); -// for fluctuations - if (childfopenMPI(sdt_flct, "a", &fp) != 0) { - return -1; + else return -1; + // for norm + if (childfopenMPI(sdt_norm, "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %d\n", + inv_temp[rand_i], global_norm[rand_i], global_1st_norm[rand_i], step_i); + fclose(fp); } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp,X->Bind.Phys.num,X->Bind.Phys.num2, X->Bind.Phys.doublon,X->Bind.Phys.doublon2, X->Bind.Phys.Sz,X->Bind.Phys.Sz2,step_i); - fclose(fp); -// - StopTimer(3600); - step_i +=1; - X->Bind.Def.istep = step_i; - step_iO=0; - } - - for (step_i = X->Bind.Def.istep; step_iBind.Def.Lanczos_max; step_i++){ - X->Bind.Def.istep=step_i; - if(step_i %((X->Bind.Def.Lanczos_max-step_iO)/10)==0){ - fprintf(stdoutMPI, cLogTPQStep, step_i, X->Bind.Def.Lanczos_max); + else return -1; + // for fluctuations + if (childfopenMPI(sdt_flct, "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", + inv_temp[rand_i], X->Bind.Phys.num[rand_i], X->Bind.Phys.num2[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.doublon2[rand_i], + X->Bind.Phys.Sz[rand_i], X->Bind.Phys.Sz2[rand_i], step_i); + fclose(fp); } - X->Bind.Def.istep=step_i; - StartTimer(3600); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "a", rand_i, step_i); - StopTimer(3600); - StartTimer(3500); - Multiply(&(X->Bind)); - StopTimer(3500); + else return -1; + }/*for (rand_i = 0; rand_i < NumAve; rand_i++)*/ + StopTimer(3600); + step_i += 1; + X->Bind.Def.istep = step_i; + step_iO = 0; + }/*if (X->Bind.Def.iReStart == RESTART_NOT || X->Bind.Def.iReStart == RESTART_OUT || iret == 1)*/ - StartTimer(3200); - iret=expec_energy_flct(&(X->Bind)); - StopTimer(3200); - if(iret !=0) return -1; + for (step_i = X->Bind.Def.istep; step_i < X->Bind.Def.Lanczos_max; step_i++) { + X->Bind.Def.istep = step_i; + if (step_i % ((X->Bind.Def.Lanczos_max - step_iO) / 10) == 0) { + fprintf(stdoutMPI, cLogTPQStep, step_i, X->Bind.Def.Lanczos_max); + } + X->Bind.Def.istep = step_i; + StartTimer(3600); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "a", 0, step_i); + StopTimer(3600); + StartTimer(3500); + Multiply(&(X->Bind)); + StopTimer(3500); -// - inv_temp = (2.0*step_i / Ns) / (LargeValue - X->Bind.Phys.energy / Ns); + StartTimer(3200); + iret = expec_energy_flct(&(X->Bind), NumAve, v0, v1); + StopTimer(3200); + if (iret != 0) return -1; - StartTimer(3600); - if(childfopenMPI(sdt_phys, "a", &fp)!=0){ - return FALSE; + StartTimer(3600); + for (rand_i = 0; rand_i < NumAve; rand_i++) { + inv_temp[rand_i] = (2.0*step_i / Ns) / (LargeValue - X->Bind.Phys.energy[rand_i] / Ns); + if (childfopenMPI(sdt_phys, "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", + inv_temp[rand_i], X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.num[rand_i], step_i); + // for + fclose(fp); } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.energy, X->Bind.Phys.var, X->Bind.Phys.doublon, X->Bind.Phys.num ,step_i); -// for - fclose(fp); + else return FALSE; - if(childfopenMPI(sdt_norm, "a", &fp)!=0){ - return FALSE; + if (childfopenMPI(sdt_norm, "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %d\n", + inv_temp[rand_i], global_norm[rand_i], global_1st_norm[rand_i], step_i); + fclose(fp); } - fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp, global_norm, global_1st_norm, step_i); - fclose(fp); + else return FALSE; -// for fluctuations - if (childfopenMPI(sdt_flct, "a", &fp) != 0) { - return -1; + // for fluctuations + if (childfopenMPI(sdt_flct, "a", &fp) == 0) { + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", + inv_temp, X->Bind.Phys.num[rand_i], X->Bind.Phys.num2[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.doublon2[rand_i], + X->Bind.Phys.Sz[rand_i], X->Bind.Phys.Sz2[rand_i], step_i); + fclose(fp); } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp,X->Bind.Phys.num,X->Bind.Phys.num2, X->Bind.Phys.doublon,X->Bind.Phys.doublon2, X->Bind.Phys.Sz,X->Bind.Phys.Sz2,step_i); - fclose(fp); -// - StopTimer(3600); + else return -1; + }/*for (rand_i = 0; rand_i < NumAve; rand_i++)*/ + StopTimer(3600); + if (step_i%step_spin == 0) { + StartTimer(3300); + iret = expec_cisajs(&(X->Bind), NumAve, v0, v1); + StopTimer(3300); + if (iret != 0) return -1; - if (step_i%step_spin == 0){ - StartTimer(3300); - iret=expec_cisajs(&(X->Bind),v1); - StopTimer(3300); - if(iret !=0) return -1; - - StartTimer(3400); - iret=expec_cisajscktaltdc(&(X->Bind), v1); - StopTimer(3400); - if(iret !=0) return -1; - } + StartTimer(3400); + iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v0, v1); + StopTimer(3400); + if (iret != 0) return -1; } + }/*for (step_i = X->Bind.Def.istep; step_i < X->Bind.Def.Lanczos_max; step_i++)*/ - if(X->Bind.Def.iReStart== RESTART_OUT || X->Bind.Def.iReStart==RESTART_INOUT){ - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogOutputVecStart); - sprintf(sdt, cFileNameOutputVector, rand_i, myrank); - if(childfopenALL(sdt, "wb", &fp)!=0){ - exitMPI(-1); - } - fwrite(&step_i, sizeof(step_i), 1, fp); - fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max),1,fp); - fwrite(v1, sizeof(complex double),X->Bind.Check.idim_max+1, fp); - fclose(fp); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogOutputVecFinish); + if (X->Bind.Def.iReStart == RESTART_OUT || X->Bind.Def.iReStart == RESTART_INOUT) { + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogOutputVecStart); + sprintf(sdt, cFileNameOutputVector, 0, myrank); + if (childfopenALL(sdt, "wb", &fp) != 0) { + exitMPI(-1); } + fwrite(&step_i, sizeof(step_i), 1, fp); + fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp); + fwrite(v1, sizeof(complex double), (X->Bind.Check.idim_max + 1)*NumAve, fp); + fclose(fp); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogOutputVecFinish); } + fprintf(stdoutMPI, "%s", cLogTPQ_End); - tstruct.tend=time(NULL); - fprintf(stdoutMPI, cLogTPQEnd, (int)(tstruct.tend-tstruct.tstart)); + tstruct.tend = time(NULL); + fprintf(stdoutMPI, cLogTPQEnd, (int)(tstruct.tend - tstruct.tstart)); + free_d_1d_allocate(inv_temp); + + for (rand_i = 0; rand_i < NumAve; rand_i++) { + free(sdt_phys[rand_i]); + free(sdt_norm[rand_i]); + free(sdt_flct[rand_i]); + } + free(sdt_phys); + free(sdt_norm); + free(sdt_flct); + return TRUE; } diff --git a/src/FirstMultiply.c b/src/FirstMultiply.c index a1039b271..b2ee8f898 100644 --- a/src/FirstMultiply.c +++ b/src/FirstMultiply.c @@ -55,8 +55,8 @@ int FirstMultiply(int rand_i, struct BindStruct *X) { { #pragma omp for for (i = 1; i <= i_max; i++) { - v0[i] = 0.0; - v1[i] = 0.0; + v0[i][rand_i] = 0.0; + v1[i][rand_i] = 0.0; } /* @@ -75,12 +75,12 @@ int FirstMultiply(int rand_i, struct BindStruct *X) { StartTimer(3101); #pragma omp for for (i = 1; i <= i_max; i++) - v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; + v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; }/*if (X->Def.iInitialVecType == 0)*/ else { #pragma omp for for (i = 1; i <= i_max; i++) - v1[i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); + v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); } StopTimer(3101); @@ -91,15 +91,15 @@ int FirstMultiply(int rand_i, struct BindStruct *X) { dnorm=0.0; #pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) for(i=1;i<=i_max;i++){ - dnorm += conj(v1[i])*v1[i]; + dnorm += conj(v1[i][rand_i])*v1[i][rand_i]; } dnorm = SumMPI_dc(dnorm); dnorm=sqrt(dnorm); - global_1st_norm = dnorm; + global_1st_norm[rand_i] = dnorm; #pragma omp parallel for default(none) private(i) shared(v0,v1) firstprivate(i_max, dnorm) for(i=1;i<=i_max;i++){ - v1[i] = v1[i]/dnorm; - v0[i] = v1[i]; + v1[i][rand_i] = v1[i][rand_i] / dnorm; + v0[i][rand_i] = v1[i][rand_i]; } TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStep, "a", rand_i, step_i); @@ -112,20 +112,20 @@ int FirstMultiply(int rand_i, struct BindStruct *X) { StopTimer(3102); #pragma omp parallel for default(none) private(i) shared(v0, v1, list_1) firstprivate(i_max, Ns, LargeValue, myrank) for(i = 1; i <= i_max; i++){ - v0[i]=LargeValue*v1[i]-v0[i]/Ns; + v0[i][rand_i] = LargeValue * v1[i][rand_i] - v0[i][rand_i] / Ns; } dnorm=0.0; #pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max) reduction(+: dnorm) - for(i=1;i<=i_max;i++){ - dnorm += conj(v0[i])*v0[i]; + for (i = 1; i <= i_max; i++) { + dnorm += conj(v0[i][rand_i])*v0[i][rand_i]; } dnorm = SumMPI_dc(dnorm); - dnorm=sqrt(dnorm); - global_norm = dnorm; + dnorm = sqrt(dnorm); + global_norm[rand_i] = dnorm; #pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max, dnorm) - for(i=1;i<=i_max;i++){ - v0[i] = v0[i]/dnorm; + for (i = 1; i <= i_max; i++) { + v0[i][rand_i] = v0[i][rand_i] / dnorm; } TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStepEnd, "a", rand_i, step_i); return 0; diff --git a/src/Multiply.c b/src/Multiply.c index 3fc24524a..38a14ec82 100644 --- a/src/Multiply.c +++ b/src/Multiply.c @@ -41,32 +41,32 @@ */ int Multiply ( - struct BindStruct *X - ) + struct BindStruct *X +) { - long int i,i_max; + long int i, i_max; double complex dnorm; double Ns; + int rand_i; - i_max=X->Check.idim_max; + i_max = X->Check.idim_max; Ns = 1.0*X->Def.NsiteMPI; - // mltply is in expec_energy.c v0=H*v1 - dnorm=0.0; -#pragma omp parallel for default(none) reduction(+: dnorm) private(i) shared(v0, v1) firstprivate(i_max, Ns, LargeValue) - for(i = 1; i <= i_max; i++){ - v0[i]=LargeValue*v1[i]-v0[i]/Ns; //v0=(l-H/Ns)*v1 - dnorm += conj(v0[i])*v0[i]; + // mltply is in expec_energy.c v0=H*v1 + for (rand_i = 0; rand_i < NumAve; rand_i++)dnorm = 0.0; +#pragma omp parallel for default(none) reduction(+: dnorm) private(i) \ +shared(v0, v1) firstprivate(i_max, Ns, LargeValue) + for (i = 1; i <= i_max; i++) { + for (rand_i = 0; rand_i < NumAve; rand_i++) { + v0[i][rand_i] = LargeValue * v1[i][rand_i] - v0[i][rand_i] / Ns; //v0=(l-H/Ns)*v1 + } } - dnorm=SumMPI_dc(dnorm); - dnorm=sqrt(dnorm); - global_norm = dnorm; + NormMPI_dv(i_max, NumAve, v0, global_norm); #pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max, dnorm) - for(i=1;i<=i_max;i++){ - v0[i] = v0[i]/dnorm; - } + for (i = 1; i <= i_max; i++) + for (rand_i = 0; rand_i < NumAve; rand_i++) + v0[i][rand_i] = v0[i][rand_i] / global_norm[rand_i]; return 0; } - /** * @brief Function of multiplying Hamiltonian for Time Evolution. * @@ -78,63 +78,66 @@ int Multiply */ int MultiplyForTEM ( - struct BindStruct *X - ) + struct BindStruct *X +) { - long int i,i_max; + long int i, i_max; int coef; - double complex dnorm=0.0; + double complex dnorm = 0.0; double complex tmp1 = 1.0; - double complex tmp2=0.0; - double dt=X->Def.Param.TimeSlice; + double complex tmp2 = 0.0; + double dt = X->Def.Param.TimeSlice; //Make |v0> = |psi(t+dt)> from |v1> = |psi(t)> and |v0> = H |psi(t)> - i_max=X->Check.idim_max; + i_max = X->Check.idim_max; // mltply is in expec_energy.c v0=H*v1 - if(dt Def.Param.ExpandCoef; coef++) { - tmp1 *= -I * dt / (double complex) coef; + tmp1 *= -I * dt / (double complex)coef; //v2 = H*v1 = H^coef |psi(t)> mltply(X, 1, v2, v1); -#pragma omp parallel for default(none) private(i) shared(v0, v1, v2) firstprivate(i_max, tmp1, myrank) +#pragma omp parallel for default(none) private(i) shared(v0, v1, v2) \ +firstprivate(i_max, tmp1, myrank) for (i = 1; i <= i_max; i++) { - v0[i] += tmp1 * v2[i]; - v1[i] = v2[i]; - v2[i] = 0.0 + I * 0.0; + v0[i][0] += tmp1 * v2[i][0]; + v1[i][0] = v2[i][0]; + v2[i][0] = 0.0 + I * 0.0; } } } - dnorm=0.0; -#pragma omp parallel for default(none) reduction(+: dnorm) private(i) shared(v0) firstprivate(i_max, dt) - for(i = 1; i <= i_max; i++){ - dnorm += conj(v0[i])*v0[i]; + dnorm = 0.0; +#pragma omp parallel for default(none) reduction(+: dnorm) private(i) shared(v0) \ +firstprivate(i_max, dt) + for (i = 1; i <= i_max; i++) { + dnorm += conj(v0[i][0])*v0[i][0]; } - dnorm=SumMPI_dc(dnorm); - dnorm=sqrt(dnorm); - global_norm = dnorm; + dnorm = SumMPI_dc(dnorm); + dnorm = sqrt(dnorm); + global_norm[0] = dnorm; #pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max, dnorm) - for(i=1;i<=i_max;i++){ - v0[i] = v0[i]/dnorm; + for (i = 1; i <= i_max; i++) { + v0[i][0] = v0[i][0] / dnorm; } return 0; } diff --git a/src/PairEx.c b/src/PairEx.c index be68604c9..fcfc4b763 100644 --- a/src/PairEx.c +++ b/src/PairEx.c @@ -45,55 +45,55 @@ /// \version 1.2 int GetPairExcitedState ( - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 - ) + struct BindStruct *X, + int nstate, double complex **tmp_v0, + double complex **tmp_v1 +) { - int iret; - long unsigned int irght,ilft,ihfbit; + int iret; + long unsigned int irght, ilft, ihfbit; // i_max = X->Check.idim_max; - if(X->Def.iFlgGeneralSpin == FALSE) { - if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { - return -1; - } + if (X->Def.iFlgGeneralSpin == FALSE) { + if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { + return -1; } - else { - if (GetSplitBitForGeneralSpin(X->Def.Nsite, &ihfbit, X->Def.SiteToBit) != 0) { - return -1; - } + } + else { + if (GetSplitBitForGeneralSpin(X->Def.Nsite, &ihfbit, X->Def.SiteToBit) != 0) { + return -1; } + } - X->Large.i_max = X->Check.idim_maxOrg; - X->Large.irght = irght; - X->Large.ilft = ilft; - X->Large.ihfbit = ihfbit; - X->Large.mode=M_CALCSPEC; + X->Large.i_max = X->Check.idim_maxOrg; + X->Large.irght = irght; + X->Large.ilft = ilft; + X->Large.ihfbit = ihfbit; + X->Large.mode = M_CALCSPEC; - switch(X->Def.iCalcModel){ + switch (X->Def.iCalcModel) { case HubbardGC: - iret=GetPairExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1); break; case KondoGC: case Hubbard: case Kondo: - iret=GetPairExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1); break; - case Spin: // for the Sz-conserved spin system - iret =GetPairExcitedStateSpin(X, nstate, tmp_v0, tmp_v1); - break; + case Spin: // for the Sz-conserved spin system + iret = GetPairExcitedStateSpin(X, nstate, tmp_v0, tmp_v1); + break; - case SpinGC: - iret=GetPairExcitedStateSpinGC(X,nstate,tmp_v0, tmp_v1); - break; + case SpinGC: + iret = GetPairExcitedStateSpinGC(X, nstate, tmp_v0, tmp_v1); + break; - default: - iret =FALSE; - break; - } + default: + iret = FALSE; + break; + } - return iret; + return iret; } diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index e57703079..5314082b9 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -35,96 +35,93 @@ /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateHubbardGC( - struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ +) { + long unsigned int i, j; + long unsigned int isite1; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; -){ + double complex tmp_trans = 0; + long int i_max; + long int ibit; + long unsigned int is; + i_max = X->Check.idim_maxOrg; + for (i = 0; i < X->Def.NPairExcitationOperator; i++) { + org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[i][1]; + org_sigma2 = X->Def.PairExcitationOperator[i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[i]; - long unsigned int i,j; - long unsigned int isite1; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - - double complex tmp_trans=0; - long int i_max; - long int ibit; - long unsigned int is; - i_max = X->Check.idim_maxOrg; - for(i=0;iDef.NPairExcitationOperator;i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; - - if (org_isite1 > X->Def.Nsite && - org_isite2 > X->Def.Nsite) { - if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { - if (X->Def.PairExcitationOperator[i][4] == 0) { - if (org_sigma1 == 0) { - is = X->Def.Tpow[2 * org_isite1 - 2]; - } - else { - is = X->Def.Tpow[2 * org_isite1 - 1]; - } - ibit = (unsigned long int) myrank & is; - if (ibit != is) { - //minus sign comes from negative tmp_trans due to readdef + if (org_isite1 > X->Def.Nsite && + org_isite2 > X->Def.Nsite) { + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { + if (X->Def.PairExcitationOperator[i][4] == 0) { + if (org_sigma1 == 0) { + is = X->Def.Tpow[2 * org_isite1 - 2]; + } + else { + is = X->Def.Tpow[2 * org_isite1 - 1]; + } + ibit = (unsigned long int) myrank & is; + if (ibit != is) { + //minus sign comes from negative tmp_trans due to readdef #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; - } - } - else {//X->Def.PairExcitationOperator[i][4]==1 - if (org_sigma1 == 0) { - is = X->Def.Tpow[2 * org_isite1 - 2]; - } - else { - is = X->Def.Tpow[2 * org_isite1 - 1]; - } - ibit = (unsigned long int) myrank & is; - if (ibit == is) { + for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; + } + } + else {//X->Def.PairExcitationOperator[i][4]==1 + if (org_sigma1 == 0) { + is = X->Def.Tpow[2 * org_isite1 - 2]; + } + else { + is = X->Def.Tpow[2 * org_isite1 - 1]; + } + ibit = (unsigned long int) myrank & is; + if (ibit == is) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; - } - } - } - else { - X_GC_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, - tmp_v0, tmp_v1); - } + for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; + } } - else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { - if (org_isite1 < org_isite2) { - X_GC_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, - tmp_v0, tmp_v1); - } - else { - X_GC_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, - -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); - } - } - else { + } + else { + X_GC_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, + tmp_v0, tmp_v1); + } + } + else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { + if (org_isite1 < org_isite2) { + X_GC_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, + tmp_v0, tmp_v1); + } + else { + X_GC_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, + -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); + } + } + else { - if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && X->Def.PairExcitationOperator[i][4] == 0) { - isite1=X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && X->Def.PairExcitationOperator[i][4] == 0) { + isite1 = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; #pragma omp parallel for default(none) private(j) firstprivate(i_max,X,isite1, tmp_trans) shared(tmp_v0, tmp_v1) - for(j=1;j<=i_max;j++){ - GC_AisCis(j, nstate, tmp_v0, tmp_v1, X, isite1, -tmp_trans); - } - } - else { - if (child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2) != 0) { - return -1; - } - GC_child_general_hopp(tmp_v0, tmp_v1, X, tmp_trans); - } + for (j = 1; j <= i_max; j++) { + GC_AisCis(j, nstate, tmp_v0, tmp_v1, X, isite1, -tmp_trans); + } + } + else { + if (child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2) != 0) { + return -1; } + GC_child_general_hopp(tmp_v0, tmp_v1, X, tmp_trans); + } } - return TRUE; + } + return TRUE; } - /// /// Calculation of pair excited state for Hubbard canonical system /// \param X [in,out] define list to get and put information of calculation @@ -135,146 +132,148 @@ int GetPairExcitedStateHubbardGC( /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateHubbard( - struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ -){ - long unsigned int i,j, idim_maxMPI; - long unsigned int irght,ilft,ihfbit; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - long unsigned int tmp_off=0; + struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ +) { + long unsigned int i, j, idim_maxMPI; + long unsigned int irght, ilft, ihfbit; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + long unsigned int tmp_off = 0; - double complex tmp_trans=0; - long int i_max; - int tmp_sgn, num1; - long int ibit; - long unsigned int is, Asum, Adiff; - long unsigned int ibitsite1, ibitsite2; + double complex tmp_trans = 0; + long int i_max; + int tmp_sgn, num1; + long int ibit; + long unsigned int is, Asum, Adiff; + long unsigned int ibitsite1, ibitsite2; - // i_max = X->Check.idim_max; - i_max = X->Check.idim_maxOrg; - if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ - return -1; - } - X->Large.i_max = i_max; - X->Large.irght = irght; - X->Large.ilft = ilft; - X->Large.ihfbit = ihfbit; - X->Large.mode=M_CALCSPEC; -// X->Large.mode = M_MLTPLY; + // i_max = X->Check.idim_max; + i_max = X->Check.idim_maxOrg; + if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { + return -1; + } + X->Large.i_max = i_max; + X->Large.irght = irght; + X->Large.ilft = ilft; + X->Large.ihfbit = ihfbit; + X->Large.mode = M_CALCSPEC; + // X->Large.mode = M_MLTPLY; - double complex **tmp_v1bufOrg; - //set size + double complex **tmp_v1bufOrg; + //set size #ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg= cd_1d_allocate(idim_maxMPI + 1); + idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); + tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); #endif // MPI - for(i=0;iDef.NPairExcitationOperator;i++){ - org_isite1 = X->Def.PairExcitationOperator[i][0]+1; - org_isite2 = X->Def.PairExcitationOperator[i][2]+1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; - ibitsite1 = X->Def.OrgTpow[2*org_isite1-2+org_sigma1] ; - ibitsite2 = X->Def.OrgTpow[2*org_isite2-2+org_sigma2] ; - child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2); - Asum = X->Large.isA_spin; - Adiff = X->Large.A_spin; + for (i = 0; i < X->Def.NPairExcitationOperator; i++) { + org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[i][1]; + org_sigma2 = X->Def.PairExcitationOperator[i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[i]; + ibitsite1 = X->Def.OrgTpow[2 * org_isite1 - 2 + org_sigma1]; + ibitsite2 = X->Def.OrgTpow[2 * org_isite2 - 2 + org_sigma2]; + child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2); + Asum = X->Large.isA_spin; + Adiff = X->Large.A_spin; - if(X->Def.iFlagListModified == TRUE // Not to adopt HubbrdNConserved - && org_sigma1 != org_sigma2){ - if (org_isite1 > X->Def.Nsite && - org_isite2 > X->Def.Nsite) - { - X_child_CisAjt_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); - } - else if (org_isite2 > X->Def.Nsite - || org_isite1 > X->Def.Nsite) - { - if(org_isite1 < org_isite2) { - X_child_CisAjt_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, - tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); - } else{ - X_child_CisAjt_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, - tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); } - } - else{ + if (X->Def.iFlagListModified == TRUE // Not to adopt HubbrdNConserved + && org_sigma1 != org_sigma2) { + if (org_isite1 > X->Def.Nsite && + org_isite2 > X->Def.Nsite) + { + X_child_CisAjt_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); + } + else if (org_isite2 > X->Def.Nsite + || org_isite1 > X->Def.Nsite) + { + if (org_isite1 < org_isite2) { + X_child_CisAjt_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, + tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); + } + else { + X_child_CisAjt_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, + tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); + } + } + else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,stdoutMPI) \ firstprivate(i_max, tmp_trans, Asum, Adiff, ibitsite1, ibitsite2, X, list_1_org, list_1, myrank) \ private(j, tmp_sgn, tmp_off) - for (j = 1; j <= i_max; j++){ - tmp_sgn=X_CisAjt(list_1_org[j], X, ibitsite1, ibitsite2, Asum, Adiff, &tmp_off); - tmp_v0[tmp_off] += tmp_trans * tmp_sgn*tmp_v1[j]; - } - } + for (j = 1; j <= i_max; j++) { + tmp_sgn = X_CisAjt(list_1_org[j], X, ibitsite1, ibitsite2, Asum, Adiff, &tmp_off); + tmp_v0[tmp_off] += tmp_trans * tmp_sgn*tmp_v1[j]; } - else{ - if (org_isite1 > X->Def.Nsite && - org_isite2 > X->Def.Nsite) { - if(org_isite1==org_isite2 && org_sigma1==org_sigma2){//diagonal - is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; - ibit = (unsigned long int) myrank & is; - if( X->Def.PairExcitationOperator[i][4]==0) { - if (ibit != is) { + } + } + else { + if (org_isite1 > X->Def.Nsite && + org_isite2 > X->Def.Nsite) { + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) {//diagonal + is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; + ibit = (unsigned long int) myrank & is; + if (X->Def.PairExcitationOperator[i][4] == 0) { + if (ibit != is) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; - } - } - else{ - if (ibit == is) { + for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; + } + } + else { + if (ibit == is) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; - } - } - } - else{ - X_child_general_hopp_MPIdouble(org_isite1-1, org_sigma1, org_isite2-1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - } - else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite){ - if(org_isite1 < org_isite2){ - X_child_general_hopp_MPIsingle(org_isite1-1, org_sigma1,org_isite2-1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - else{ - X_child_general_hopp_MPIsingle(org_isite2-1, org_sigma2, org_isite1-1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); - } + for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; } - else{ - if(child_general_hopp_GetInfo( X,org_isite1,org_isite2,org_sigma1,org_sigma2)!=0){ - return -1; - } - if(org_isite1==org_isite2 && org_sigma1==org_sigma2){ - is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; - if( X->Def.PairExcitationOperator[i][4]==0) { + } + } + else { + X_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + } + else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { + if (org_isite1 < org_isite2) { + X_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + else { + X_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); + } + } + else { + if (child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2) != 0) { + return -1; + } + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { + is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; + if (X->Def.PairExcitationOperator[i][4] == 0) { #pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) - for (j = 1; j <= i_max; j++) { - ibit = list_1[j] & is; - num1 = (1-ibit / is); - tmp_v0[j] += -tmp_trans * num1 * tmp_v1[j]; - } - } - else{ + for (j = 1; j <= i_max; j++) { + ibit = list_1[j] & is; + num1 = (1 - ibit / is); + tmp_v0[j] += -tmp_trans * num1 * tmp_v1[j]; + } + } + else { #pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) - for (j = 1; j <= i_max; j++) { - ibit = list_1[j] & is; - num1 = ibit / is; - tmp_v0[j] += tmp_trans * num1 * tmp_v1[j]; - } - } - } - else{ - child_general_hopp(tmp_v0, tmp_v1,X,tmp_trans); - } + for (j = 1; j <= i_max; j++) { + ibit = list_1[j] & is; + num1 = ibit / is; + tmp_v0[j] += tmp_trans * num1 * tmp_v1[j]; } + } + } + else { + child_general_hopp(tmp_v0, tmp_v1, X, tmp_trans); } + } } + } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_1d_allocate(tmp_v1bufOrg); #endif // MPI - return TRUE; + return TRUE; } diff --git a/src/PairExSpin.c b/src/PairExSpin.c index 3e68e4493..82ef572fe 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -33,24 +33,21 @@ /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateSpinGC( - struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ -){ +) { - int iret=0; - if (X->Def.iFlgGeneralSpin == FALSE) { - iret=GetPairExcitedStateHalfSpinGC(X, nstate, tmp_v0, tmp_v1); - } - else{ - iret=GetPairExcitedStateGeneralSpinGC(X, nstate, tmp_v0, tmp_v1); - } - return iret; + int iret = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + iret = GetPairExcitedStateHalfSpinGC(X, nstate, tmp_v0, tmp_v1); + } + else { + iret = GetPairExcitedStateGeneralSpinGC(X, nstate, tmp_v0, tmp_v1); + } + return iret; } - - -// /// Calculation of pair excited state for Half Spin Grand canonical system /// \param X [in,out] define list to get and put information of calculation /// \param tmp_v0 [out] Result v0 = H v1 @@ -60,79 +57,79 @@ int GetPairExcitedStateSpinGC( /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateHalfSpinGC( - struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - -){ - long unsigned int i,j; - long unsigned int isite1; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - long unsigned int tmp_off=0; - - double complex tmp_trans=0; - long int i_max; - int tmp_sgn; - i_max = X->Check.idim_maxOrg; - - for(i=0;iDef.NPairExcitationOperator;i++){ - org_isite1 = X->Def.PairExcitationOperator[i][0]+1; - org_isite2 = X->Def.PairExcitationOperator[i][2]+1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; - if(org_isite1 == org_isite2){ - if(org_isite1 > X->Def.Nsite){ - if(org_sigma1==org_sigma2){ // longitudinal magnetic field - if(X->Def.PairExcitationOperator[i][4]==0) { - X_GC_child_AisCis_spin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - else{ - X_GC_child_CisAis_spin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - } - else{ // transverse magnetic field - //fprintf(stdoutMPI, "Debug: test, org_isite1=%d, org_sigma1=%d, orgsima_2=%d\n", org_isite1, org_sigma1, org_sigma2); - X_GC_child_CisAit_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - }else{ - isite1 = X->Def.Tpow[org_isite1-1]; - if(org_sigma1==org_sigma2) { - if (X->Def.PairExcitationOperator[i][4] == 0) { - // longitudinal magnetic field + struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ +) { + long unsigned int i, j; + long unsigned int isite1; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + long unsigned int tmp_off = 0; + + double complex tmp_trans = 0; + long int i_max; + int tmp_sgn; + i_max = X->Check.idim_maxOrg; + + for (i = 0; i < X->Def.NPairExcitationOperator; i++) { + org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[i][1]; + org_sigma2 = X->Def.PairExcitationOperator[i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[i]; + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2) { // longitudinal magnetic field + if (X->Def.PairExcitationOperator[i][4] == 0) { + X_GC_child_AisCis_spin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + else { + X_GC_child_CisAis_spin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + } + else { // transverse magnetic field + //fprintf(stdoutMPI, "Debug: test, org_isite1=%d, org_sigma1=%d, orgsima_2=%d\n", org_isite1, org_sigma1, org_sigma2); + X_GC_child_CisAit_spin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + } + else { + isite1 = X->Def.Tpow[org_isite1 - 1]; + if (org_sigma1 == org_sigma2) { + if (X->Def.PairExcitationOperator[i][4] == 0) { + // longitudinal magnetic field #pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - tmp_v0[j] += (1.0-X_SpinGC_CisAis(j, X, isite1, org_sigma1)) * tmp_v1[j] * (-tmp_trans); - } - } - else { - // longitudinal magnetic field + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += (1.0 - X_SpinGC_CisAis(j, X, isite1, org_sigma1)) * tmp_v1[j] * (-tmp_trans); + } + } + else { + // longitudinal magnetic field #pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - tmp_v0[j] += X_SpinGC_CisAis(j, X, isite1, org_sigma1) * tmp_v1[j] * tmp_trans; - } - } - }else{ - // transverse magnetic field - // fprintf(stdoutMPI, "Debug: isite1=%d, org_sigma2=%d\n", isite1, org_sigma2); + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += X_SpinGC_CisAis(j, X, isite1, org_sigma1) * tmp_v1[j] * tmp_trans; + } + } + } + else { + // transverse magnetic field + // fprintf(stdoutMPI, "Debug: isite1=%d, org_sigma2=%d\n", isite1, org_sigma2); #pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off) firstprivate(i_max, isite1, org_sigma2, X, tmp_trans) shared(tmp_v0, tmp_v1) - for(j=1;j<=i_max;j++){ - tmp_sgn = X_SpinGC_CisAit(j,X, isite1,org_sigma2,&tmp_off); - if(tmp_sgn !=0){ - tmp_v0[tmp_off+1]+= tmp_sgn*tmp_v1[j]*tmp_trans; - } - } - } + for (j = 1; j <= i_max; j++) { + tmp_sgn = X_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); + if (tmp_sgn != 0) { + tmp_v0[tmp_off + 1] += tmp_sgn * tmp_v1[j] * tmp_trans; } - }else{ - fprintf(stdoutMPI, "ERROR: hopping is not allowed in localized spin system\n"); - return FALSE; + } } + } + } + else { + fprintf(stdoutMPI, "ERROR: hopping is not allowed in localized spin system\n"); + return FALSE; } - return TRUE; + } + return TRUE; } - -// /// Calculation of pair excited state for general Spin Grand canonical system /// \param X [in,out] define list to get and put information of calculation /// \param tmp_v0 [out] Result v0 = H v1 @@ -142,79 +139,80 @@ int GetPairExcitedStateHalfSpinGC( /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateGeneralSpinGC( - struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { - long unsigned int i, j; - int num1; - long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - long unsigned int tmp_off = 0; - - double complex tmp_trans = 0; - long int i_max; - i_max = X->Check.idim_maxOrg; - - for(i=0;iDef.NPairExcitationOperator;i++){ - org_isite1 = X->Def.PairExcitationOperator[i][0]+1; - org_isite2 = X->Def.PairExcitationOperator[i][2]+1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; - if(org_isite1 == org_isite2){ - if(org_isite1 > X->Def.Nsite){ - if(org_sigma1==org_sigma2){ - if(X->Def.PairExcitationOperator[i][4]==0) { - // longitudinal magnetic field - X_GC_child_AisCis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - else{ - X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - }else{ - // transverse magnetic field - X_GC_child_CisAit_GeneralSpin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); - } - } - else{//org_isite1 <= X->Def.Nsite - if(org_sigma1==org_sigma2){ - if(X->Def.PairExcitationOperator[i][4]==0) { - // longitudinal magnetic field + long unsigned int i, j; + int num1; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + long unsigned int tmp_off = 0; + + double complex tmp_trans = 0; + long int i_max; + i_max = X->Check.idim_maxOrg; + + for (i = 0; i < X->Def.NPairExcitationOperator; i++) { + org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[i][1]; + org_sigma2 = X->Def.PairExcitationOperator[i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[i]; + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2) { + if (X->Def.PairExcitationOperator[i][4] == 0) { + // longitudinal magnetic field + X_GC_child_AisCis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + else { + X_GC_child_CisAis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + } + else { + // transverse magnetic field + X_GC_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1); + } + } + else {//org_isite1 <= X->Def.Nsite + if (org_sigma1 == org_sigma2) { + if (X->Def.PairExcitationOperator[i][4] == 0) { + // longitudinal magnetic field #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += -tmp_trans * tmp_v1[j] * (1.0-num1); - } - } - else{ - // longitudinal magnetic field + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += -tmp_trans * tmp_v1[j] * (1.0 - num1); + } + } + else { + // longitudinal magnetic field #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += tmp_trans * tmp_v1[j] * num1; - } - } - }else{ - // transverse magnetic field + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += tmp_trans * tmp_v1[j] * num1; + } + } + } + else { + // transverse magnetic field #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off, tmp_trans) shared(tmp_v0, tmp_v1) - for(j=1;j<=i_max;j++){ - num1 = GetOffCompGeneralSpin(j-1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); - if(num1 !=0){ - tmp_v0[tmp_off+1] += tmp_trans*tmp_v1[j]*num1; - } - } - } + for (j = 1; j <= i_max; j++) { + num1 = GetOffCompGeneralSpin(j - 1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + tmp_v0[tmp_off + 1] += tmp_trans * tmp_v1[j] * num1; } - }else{ - fprintf(stdoutMPI, "ERROR: hopping is not allowed in localized spin system\n"); - return FALSE; + } } + } + } + else { + fprintf(stdoutMPI, "ERROR: hopping is not allowed in localized spin system\n"); + return FALSE; } - return TRUE; + } + return TRUE; } - -// /// Calculation of pair excited state for Spin canonical system /// \param X [in,out] define list to get and put information of calculation /// \param tmp_v0 [out] Result v0 = H v1 @@ -224,22 +222,20 @@ int GetPairExcitedStateGeneralSpinGC( /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateSpin( - struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ -){ - int iret=0; - if (X->Def.iFlgGeneralSpin == FALSE) { - iret=GetPairExcitedStateHalfSpin(X, nstate, tmp_v0, tmp_v1); - } - else{ - iret=GetPairExcitedStateGeneralSpin(X, nstate, tmp_v0, tmp_v1); - } - return iret; +) { + int iret = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + iret = GetPairExcitedStateHalfSpin(X, nstate, tmp_v0, tmp_v1); + } + else { + iret = GetPairExcitedStateGeneralSpin(X, nstate, tmp_v0, tmp_v1); + } + return iret; } - -// /// Calculation of pair excited state for Half Spin canonical system /// \param X [in,out] define list to get and put information of calculation /// \param tmp_v0 [out] Result v0 = H v1 @@ -249,99 +245,101 @@ int GetPairExcitedStateSpin( /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateHalfSpin( - struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { - long unsigned int i,j, idim_maxMPI; - long unsigned int isite1; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - long unsigned int tmp_off=0; + long unsigned int i, j, idim_maxMPI; + long unsigned int isite1; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + long unsigned int tmp_off = 0; - double complex tmp_trans=0; - long int i_max; - int num1; - long int ibit1; - long unsigned int is1_up; + double complex tmp_trans = 0; + long int i_max; + int num1; + long int ibit1; + long unsigned int is1_up; - i_max = X->Check.idim_maxOrg; + i_max = X->Check.idim_maxOrg; - double complex **tmp_v1bufOrg; - //set size + double complex **tmp_v1bufOrg; + //set size #ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg=cd_1d_allocate(idim_maxMPI + 1); + idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); + tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); #endif // MPI - for (i = 0; i < X->Def.NPairExcitationOperator; i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; - if (org_sigma1 == org_sigma2) { - if (org_isite1 == org_isite2) { - if (org_isite1 > X->Def.Nsite) { - is1_up = X->Def.Tpow[org_isite1 - 1]; - ibit1 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, org_sigma1); - if (X->Def.PairExcitationOperator[i][4] == 0) { - if (ibit1 == 0) { + for (i = 0; i < X->Def.NPairExcitationOperator; i++) { + org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[i][1]; + org_sigma2 = X->Def.PairExcitationOperator[i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[i]; + if (org_sigma1 == org_sigma2) { + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + is1_up = X->Def.Tpow[org_isite1 - 1]; + ibit1 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, org_sigma1); + if (X->Def.PairExcitationOperator[i][4] == 0) { + if (ibit1 == 0) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; - } - } else { - if (ibit1 != 0) { + for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; + } + } + else { + if (ibit1 != 0) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; - } - } - }// org_isite1 > X->Def.Nsite - else { - isite1 = X->Def.Tpow[org_isite1 - 1]; - if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && - X->Def.PairExcitationOperator[i][4] == 0) { + for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; + } + } + }// org_isite1 > X->Def.Nsite + else { + isite1 = X->Def.Tpow[org_isite1 - 1]; + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && + X->Def.PairExcitationOperator[i][4] == 0) { #pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - tmp_v0[j] += (1.0 - X_Spin_CisAis(j, X, isite1, org_sigma1)) * tmp_v1[j] * (-tmp_trans); - } - } else { + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += (1.0 - X_Spin_CisAis(j, X, isite1, org_sigma1)) * tmp_v1[j] * (-tmp_trans); + } + } + else { #pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - tmp_v0[j] += X_Spin_CisAis(j, X, isite1, org_sigma1) * tmp_v1[j] * tmp_trans; - } - } - } - } else { - fprintf(stdoutMPI, "Error: isite1 must be equal to isite2 for Spin system. \n"); - return FALSE; + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += X_Spin_CisAis(j, X, isite1, org_sigma1) * tmp_v1[j] * tmp_trans; } - } else { //org_sigma1 != org_sigma2 // for the canonical case - if (org_isite1 > X->Def.Nsite) {//For MPI - X_child_CisAit_spin_MPIdouble(org_isite1-1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, X->Def.Tpow,list_1_org, list_1buf_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft,X->Large.ihfbit); + } + } + } + else { + fprintf(stdoutMPI, "Error: isite1 must be equal to isite2 for Spin system. \n"); + return FALSE; + } + } + else { //org_sigma1 != org_sigma2 // for the canonical case + if (org_isite1 > X->Def.Nsite) {//For MPI + X_child_CisAit_spin_MPIdouble(org_isite1 - 1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, X->Def.Tpow, list_1_org, list_1buf_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); - } else { - isite1 = X->Def.Tpow[org_isite1 - 1]; + } + else { + isite1 = X->Def.Tpow[org_isite1 - 1]; #pragma omp parallel for default(none) private(j, tmp_off, num1) \ firstprivate(i_max, isite1, org_sigma2, X, tmp_trans, list_1_org, list_1, list_2_1, list_2_2) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - num1=X_Spin_CisAit(j, X, isite1, org_sigma2, list_1_org, list_2_1, list_2_2, &tmp_off); - if (num1 != 0) tmp_v0[tmp_off] += tmp_v1[j] * tmp_trans*(double)num1; - } - } + for (j = 1; j <= i_max; j++) { + num1 = X_Spin_CisAit(j, X, isite1, org_sigma2, list_1_org, list_2_1, list_2_2, &tmp_off); + if (num1 != 0) tmp_v0[tmp_off] += tmp_v1[j] * tmp_trans*(double)num1; } + } } + } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_1d_allocate(tmp_v1bufOrg); #endif - return TRUE; + return TRUE; } - - -// /// Calculation of pair excited state for general Spin canonical system /// \param X [in,out] define list to get and put information of calculation /// \param tmp_v0 [out] Result v0 = H v1 @@ -351,105 +349,109 @@ int GetPairExcitedStateHalfSpin( /// \author Kazuyoshi Yoshimi /// \version 1.2 int GetPairExcitedStateGeneralSpin( - struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { - long unsigned int i,j, idim_maxMPI; - long unsigned int org_isite1,org_isite2,org_sigma1,org_sigma2; - long unsigned int tmp_off=0; - long unsigned int off=0; - - double complex tmp_trans=0; - long int i_max; - int tmp_sgn, num1; - i_max = X->Check.idim_maxOrg; - - double complex **tmp_v1bufOrg; - //set size + long unsigned int i, j, idim_maxMPI; + long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; + long unsigned int tmp_off = 0; + long unsigned int off = 0; + + double complex tmp_trans = 0; + long int i_max; + int tmp_sgn, num1; + i_max = X->Check.idim_maxOrg; + + double complex **tmp_v1bufOrg; + //set size #ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); + idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); + tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); #endif // MPI - for(i=0;iDef.NPairExcitationOperator;i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; - if (org_isite1 == org_isite2) { - if (org_isite1 > X->Def.Nsite) { - if (org_sigma1 == org_sigma2) { - // longitudinal magnetic field - num1 = BitCheckGeneral((unsigned long int) myrank, - org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - if (X->Def.PairExcitationOperator[i][4] == 0) { - if (num1 == 0) { + for (i = 0; i < X->Def.NPairExcitationOperator; i++) { + org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[i][1]; + org_sigma2 = X->Def.PairExcitationOperator[i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[i]; + if (org_isite1 == org_isite2) { + if (org_isite1 > X->Def.Nsite) { + if (org_sigma1 == org_sigma2) { + // longitudinal magnetic field + num1 = BitCheckGeneral((unsigned long int) myrank, + org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + if (X->Def.PairExcitationOperator[i][4] == 0) { + if (num1 == 0) { #pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - tmp_v0[j] += -tmp_trans * tmp_v1[j]; - } - } - } else { - if (num1 != 0) { + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += -tmp_trans * tmp_v1[j]; + } + } + } + else { + if (num1 != 0) { #pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) - for (j = 1; j <= i_max; j++) { - tmp_v0[j] += tmp_trans * tmp_v1[j]; - } - } - } - }//org_sigma1=org_sigma2 - else {//org_sigma1 != org_sigma2 - X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, - tmp_v1, tmp_v1bufOrg, i_max, list_1_org, list_1buf_org, - X->Large.ihfbit); - } - } else {//org_isite1 <= X->Def.Nsite - if (org_sigma1 == org_sigma2) { - // longitudinal magnetic field - if (X->Def.PairExcitationOperator[i][4] == 0) { + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += tmp_trans * tmp_v1[j]; + } + } + } + }//org_sigma1=org_sigma2 + else {//org_sigma1 != org_sigma2 + X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, + tmp_v1, tmp_v1bufOrg, i_max, list_1_org, list_1buf_org, + X->Large.ihfbit); + } + } + else {//org_isite1 <= X->Def.Nsite + if (org_sigma1 == org_sigma2) { + // longitudinal magnetic field + if (X->Def.PairExcitationOperator[i][4] == 0) { #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1, list_1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += -tmp_trans * tmp_v1[j] * (1.0 - num1); - } - } else { + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += -tmp_trans * tmp_v1[j] * (1.0 - num1); + } + } + else { #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1, list_1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += tmp_trans * tmp_v1[j] * num1; - } - } - }//org_sigma1=org_sigma2 - else {//org_sigma1 != org_sigma2 + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += tmp_trans * tmp_v1[j] * num1; + } + } + }//org_sigma1=org_sigma2 + else {//org_sigma1 != org_sigma2 #pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off) \ firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X, off, tmp_trans, myrank) \ shared(tmp_v0, tmp_v1, list_1_org, list_1) - for (j = 1; j <= i_max; j++) { - tmp_sgn = GetOffCompGeneralSpin(list_1_org[j], org_isite1, org_sigma2, org_sigma1, &off, - X->Def.SiteToBit, X->Def.Tpow); - if (tmp_sgn != FALSE) { - ConvertToList1GeneralSpin(off, X->Large.ihfbit, &tmp_off); + for (j = 1; j <= i_max; j++) { + tmp_sgn = GetOffCompGeneralSpin(list_1_org[j], org_isite1, org_sigma2, org_sigma1, &off, + X->Def.SiteToBit, X->Def.Tpow); + if (tmp_sgn != FALSE) { + ConvertToList1GeneralSpin(off, X->Large.ihfbit, &tmp_off); #ifdef _DEBUG - printf("rank=%d, org=%ld, tmp_off=%ld, list_1=%ld, ihfbit=%ld\n",myrank, list_1_org[j], off, list_1[tmp_off], X->Large.ihfbit); + printf("rank=%d, org=%ld, tmp_off=%ld, list_1=%ld, ihfbit=%ld\n", myrank, list_1_org[j], off, list_1[tmp_off], X->Large.ihfbit); #endif - tmp_v0[tmp_off] += tmp_v1[j] * tmp_trans; - } - } - - } + tmp_v0[tmp_off] += tmp_v1[j] * tmp_trans; } - } else { - fprintf(stdoutMPI, "ERROR: hopping is not allowed in localized spin system\n"); - return FALSE; - }//org_isite1 != org_isite2 + } + + } + } } + else { + fprintf(stdoutMPI, "ERROR: hopping is not allowed in localized spin system\n"); + return FALSE; + }//org_isite1 != org_isite2 + } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_1d_allocate(tmp_v1bufOrg); #endif // MPI - return TRUE; + return TRUE; } diff --git a/src/SingleEx.c b/src/SingleEx.c index 7f1999c0a..fd9a6de2c 100644 --- a/src/SingleEx.c +++ b/src/SingleEx.c @@ -29,7 +29,8 @@ Target System: Hubbard, Kondo */ int GetSingleExcitedState( struct BindStruct *X,//!Def.NSingleExcitationOperator == 0) { @@ -51,7 +53,7 @@ int GetSingleExcitedStateHubbard( //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg=cd_1d_allocate(idim_maxMPI + 1); + tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); #endif // MPI idim_max = X->Check.idim_maxOrg; @@ -63,31 +65,37 @@ int GetSingleExcitedStateHubbard( is1_spin = X->Def.Tpow[2 * org_isite + ispin]; if (itype == 1) { if (org_isite >= X->Def.Nsite) { - X_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, idim_max, \ - X->Def.Tpow, list_1_org, list_1buf_org, list_2_1, list_2_2, \ + X_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, idim_max, + X->Def.Tpow, list_1_org, list_1buf_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X, list_1_org) \ - firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin) private(j, isgn,tmp_off) + firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin) \ +private(j, isgn,tmp_off) for (j = 1; j <= idim_max; j++) {//idim_max -> original dimension - isgn = X_Cis(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); - tmp_v0[tmp_off] += tmp_v1[j] * isgn*tmpphi; + isgn = X_Cis(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, + X->Large.irght, X->Large.ilft, X->Large.ihfbit); + dmv = isgn * tmpphi; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } else if (itype == 0) { if (org_isite >= X->Def.Nsite) { - X_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, \ - idim_max, X->Def.Tpow, list_1_org, list_1buf_org, \ + X_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, + idim_max, X->Def.Tpow, list_1_org, list_1buf_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X, list_1_org, list_1) \ - firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin, myrank) private(j, isgn, tmp_off) + firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin, myrank) \ +private(j, isgn, tmp_off) for (j = 1; j <= idim_max; j++) {//idim_max -> original dimension - isgn = X_Ajt(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); - tmp_v0[tmp_off] += tmp_v1[j] * isgn*tmpphi; + isgn = X_Ajt(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, + X->Large.irght, X->Large.ilft, X->Large.ihfbit); + dmv = isgn * tmpphi; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } @@ -106,9 +114,10 @@ int GetSingleExcitedStateHubbard( */ int GetSingleExcitedStateHubbardGC( struct BindStruct *X,//!Check.idim_maxOrg); - tmp_v1bufOrg=cd_1d_allocate(idim_maxMPI + 1); + tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); #endif // MPI // SingleEx @@ -136,7 +145,8 @@ int GetSingleExcitedStateHubbardGC( tmpphi = X->Def.ParaSingleExcitationOperator[i]; if (itype == 1) { if (org_isite >= X->Def.Nsite) { - X_GC_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, idim_max, tmp_v1bufOrg, X->Def.Tpow); + X_GC_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, + idim_max, tmp_v1bufOrg, X->Def.Tpow); } else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X) \ @@ -149,7 +159,8 @@ int GetSingleExcitedStateHubbardGC( } else if (itype == 0) { if (org_isite >= X->Def.Nsite) { - X_GC_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, idim_max, tmp_v1bufOrg, X->Def.Tpow); + X_GC_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, + idim_max, tmp_v1bufOrg, X->Def.Tpow); } else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X) \ diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index 00a43fe35..d40fd402c 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -24,6 +24,7 @@ #include "mltplySpinCore.h" #include "mltplyMPIHubbard.h" #include "mltplyMPISpinCore.h" +#include "common/setmemory.h" /** * @file expec_cisajs.c @@ -59,12 +60,11 @@ int expec_cisajs( ){ FILE *fp; char sdt[D_FileNameMax]; - - long unsigned int irght,ilft,ihfbit; + double complex **prod; + long unsigned int irght, ilft, ihfbit, ica; long int i_max; //For TPQ - int step=0; - int rand_i=0; + int step = 0, rand_i = 0, istate; if(X->Def.NCisAjt <1) return 0; @@ -81,50 +81,41 @@ int expec_cisajs( switch(X->Def.iCalcType){ case TPQCalc: step=X->Def.istep; - rand_i=X->Def.irand; - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGStart, "a", rand_i, step); - sprintf(sdt, cFileName1BGreen_TPQ, X->Def.CDataFileHead, rand_i, step); - //vec=v0; + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGStart, "a", 0, step); break; case TimeEvolution: step = X->Def.istep; TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecOneBodyGStart, "a", step); - sprintf(sdt, cFileName1BGreen_TE, X->Def.CDataFileHead, step); break; - case FullDiag: case CG: - sprintf(sdt, cFileName1BGreen_FullDiag, X->Def.CDataFileHead, X->Phys.eigen_num); - //vec=v0; break; } - if(childfopenMPI(sdt, "w", &fp)!=0){ - return -1; - } + prod = cd_2d_allocate(X->Def.NCisAjt, nstate); switch(X->Def.iCalcModel){ case HubbardGC: - if(expec_cisajs_HubbardGC(X, nstate, Xvec, vec, &fp)!=0){ - return -1; + if(expec_cisajs_HubbardGC(X, nstate, Xvec, vec, prod)!=0){ + return -1; } break; case KondoGC: case Hubbard: case Kondo: - if (expec_cisajs_Hubbard(X, nstate, Xvec, vec, &fp) != 0) { + if (expec_cisajs_Hubbard(X, nstate, Xvec, vec, prod) != 0) { return -1; } break; case Spin: // for the Sz-conserved spin system - if (expec_cisajs_Spin(X, nstate, Xvec, vec, &fp) != 0) { + if (expec_cisajs_Spin(X, nstate, Xvec, vec, prod) != 0) { return -1; } break; case SpinGC: - if (expec_cisajs_SpinGC(X, nstate, Xvec, vec, &fp) != 0) { + if (expec_cisajs_SpinGC(X, nstate, Xvec, vec, prod) != 0) { return -1; } break; @@ -133,14 +124,34 @@ int expec_cisajs( return -1; } - fclose(fp); - if(X->Def.St==0){ - if(X->Def.iCalcType==Lanczos){ - TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecOneBodyGFinish, "a"); - fprintf(stdoutMPI, "%s", cLogLanczosExpecOneBodyGEnd); - TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecOneBodyGFinish, "a"); + for (istate = 0; istate < nstate; istate++) { + switch (X->Def.iCalcModel) { + case TPQCalc: + step = X->Def.istep; + sprintf(sdt, cFileName1BGreen_TPQ, X->Def.CDataFileHead, istate, step); + break; + case TimeEvolution: + step = X->Def.istep; + sprintf(sdt, cFileName1BGreen_TE, X->Def.CDataFileHead, step); + break; + case FullDiag: + case CG: + sprintf(sdt, cFileName1BGreen_FullDiag, X->Def.CDataFileHead, istate); + break; + } + if (childfopenMPI(sdt, "w", &fp) == 0) { + for (ica = 0; ica < X->Def.NCisAjt; ica++) { + fprintf(fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", + X->Def.CisAjt[ica][0], X->Def.CisAjt[ica][1], X->Def.CisAjt[ica][2], X->Def.CisAjt[ica][3], + creal(prod[ica][istate]), cimag(prod[ica][istate])); + } + fclose(fp); } - else if(X->Def.iCalcType==TPQCalc){ + else return -1; + }/*for (istate = 0; istate < nstate; istate++)*/ + + if(X->Def.St==0){ + if(X->Def.iCalcType==TPQCalc){ TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGFinish, "a", rand_i, step); } else if(X->Def.iCalcType==TimeEvolution){ @@ -150,6 +161,7 @@ int expec_cisajs( TimeKeeper(X, cFileNameTimeKeep, cCGExpecOneBodyGFinish, "a"); fprintf(stdoutMPI, "%s", cLogCGExpecOneBodyGEnd); } + free_cd_2d_allocate(prod); return 0; } /** @@ -223,8 +235,6 @@ int expec_cisajs_HubbardGC( } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", - org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -325,7 +335,6 @@ firstprivate(i_max, is) private(num1, ibit) } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -347,10 +356,10 @@ int expec_cisajs_Spin( ) { int info = 0; if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajs_SpinHalf(X, nstate, Xvec, vec, _fp); + info = expec_cisajs_SpinHalf(X, nstate, Xvec, vec, prod); } else { - info = expec_cisajs_SpinGeneral(X, nstate, Xvec, vec, _fp); + info = expec_cisajs_SpinGeneral(X, nstate, Xvec, vec, prod); } return info; } @@ -416,7 +425,6 @@ firstprivate(i_max, isite1, org_sigma1, X) shared(vec) dam_pr = 0.0; } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -487,7 +495,6 @@ firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec, list_1) }//org_isite1 != org_isite2 MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -590,8 +597,6 @@ firstprivate(i_max, isite1, org_sigma2, X) shared(vec) dam_pr = 0.0; } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", - org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -666,8 +671,6 @@ firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off) shared(vec) } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", - org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, creal(dam_pr), cimag(dam_pr)); } return 0; } diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index 88c61d840..e34fd701c 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -25,6 +25,7 @@ #include "mltplyMPISpin.h" #include "mltplyMPISpinCore.h" #include "mltplyMPIHubbardCore.h" +#include "common/setmemory.h" /** * @file expec_cisajscktaltdc.c * @@ -57,18 +58,17 @@ */ int expec_cisajscktaltdc ( - struct BindStruct *X, + struct BindStruct *X, int nstate, double complex **Xvec, double complex **vec ){ FILE *fp; char sdt[D_FileNameMax]; - long unsigned int irght,ilft,ihfbit; - + long unsigned int irght, ilft, ihfbit, icaca; + double complex **prod; //For TPQ - int step=0; - int rand_i=0; + int step = 0, rand_i = 0, istate; if(X->Def.NCisAjtCkuAlvDC <1) return 0; X->Large.mode=M_CORR; @@ -78,33 +78,24 @@ int expec_cisajscktaltdc } //Make File Name for output + prod = cd_2d_allocate(X->Def.NCisAjt, nstate); switch (X->Def.iCalcType){ case TPQCalc: step=X->Def.istep; - rand_i=X->Def.irand; - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGStart, "a", rand_i, step); - sprintf(sdt, cFileName2BGreen_TPQ, X->Def.CDataFileHead, rand_i, step); + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGStart, "a", 0, step); break; - case TimeEvolution: step=X->Def.istep; TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecTwoBodyGStart, "a", step); - sprintf(sdt, cFileName2BGreen_TE, X->Def.CDataFileHead, step); break; - case FullDiag: case CG: - sprintf(sdt, cFileName2BGreen_FullDiag, X->Def.CDataFileHead, X->Phys.eigen_num); break; } - if(childfopenMPI(sdt, "w", &fp)!=0){ - return -1; - } - switch(X->Def.iCalcModel){ case HubbardGC: - if (expec_cisajscktalt_HubbardGC(X, nstate, Xvec, vec, &fp) != 0) { + if (expec_cisajscktalt_HubbardGC(X, nstate, Xvec, vec, prod) != 0) { return -1; } break; @@ -112,19 +103,19 @@ int expec_cisajscktaltdc case KondoGC: case Hubbard: case Kondo: - if (expec_cisajscktalt_Hubbard(X, nstate, Xvec, vec, &fp) != 0) { + if (expec_cisajscktalt_Hubbard(X, nstate, Xvec, vec, prod) != 0) { return -1; } break; case Spin: - if (expec_cisajscktalt_Spin(X, nstate, Xvec, vec, &fp) != 0) { + if (expec_cisajscktalt_Spin(X, nstate, Xvec, vec, prod) != 0) { return -1; } break; case SpinGC: - if (expec_cisajscktalt_SpinGC(X, nstate, Xvec, vec, &fp) != 0) { + if (expec_cisajscktalt_SpinGC(X, nstate, Xvec, vec, prod) != 0) { return -1; } break; @@ -133,18 +124,36 @@ int expec_cisajscktaltdc return -1; } - fclose(fp); - - if(X->Def.iCalcType==Lanczos){ - if(X->Def.St==0){ - TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecTwoBodyGFinish,"a"); - fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGFinish); - }else if(X->Def.St==1){ - TimeKeeper(X, cFileNameTimeKeep, cCGExpecTwoBodyGFinish,"a"); - fprintf(stdoutMPI, "%s", cLogCGExpecTwoBodyGFinish); + for (istate = 0; istate < nstate; istate++) { + switch (X->Def.iCalcModel) { + case TPQCalc: + step = X->Def.istep; + sprintf(sdt, cFileName2BGreen_TPQ, X->Def.CDataFileHead, istate, step); + break; + case TimeEvolution: + step = X->Def.istep; + sprintf(sdt, cFileName2BGreen_TE, X->Def.CDataFileHead, step); + break; + case FullDiag: + case CG: + sprintf(sdt, cFileName2BGreen_FullDiag, X->Def.CDataFileHead, istate); + break; } - } - else if(X->Def.iCalcType==TPQCalc){ + if (childfopenMPI(sdt, "w", &fp) == 0) { + for (icaca = 0; icaca < X->Def.NCisAjt; icaca++) { + fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n", + X->Def.CisAjtCkuAlvDC[icaca][0], X->Def.CisAjtCkuAlvDC[icaca][1], + X->Def.CisAjtCkuAlvDC[icaca][2], X->Def.CisAjtCkuAlvDC[icaca][3], + X->Def.CisAjtCkuAlvDC[icaca][4], X->Def.CisAjtCkuAlvDC[icaca][5], + X->Def.CisAjtCkuAlvDC[icaca][6], X->Def.CisAjtCkuAlvDC[icaca][7], + creal(prod[icaca][istate]), cimag(prod[icaca][istate])); + } + fclose(fp); + } + else return -1; + }/*for (istate = 0; istate < nstate; istate++)*/ + + if(X->Def.iCalcType==TPQCalc){ TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGFinish, "a", rand_i, step); } else if(X->Def.iCalcType==TimeEvolution){ @@ -164,6 +173,7 @@ int expec_cisajscktaltdc } */ //[e] + free_cd_2d_allocate(prod); return 0; } /// @@ -375,7 +385,6 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, creal(dam_pr), cimag(dam_pr)); }//Intra PE return 0; } @@ -420,8 +429,7 @@ int expec_cisajscktalt_Hubbard( if (X->Def.iFlgSzConserved == TRUE) { if (org_sigma1 + org_sigma3 != org_sigma2 + org_sigma4) { - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, creal(dam_pr), cimag(dam_pr)); + zclear(nstate, prod[i]); continue; } } @@ -502,7 +510,6 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n", org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -525,10 +532,10 @@ int expec_cisajscktalt_Spin( ) { int info = 0; if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajscktalt_SpinHalf(X, nstate, Xvec, vec, _fp); + info = expec_cisajscktalt_SpinHalf(X, nstate, Xvec, vec, prod); } else { - info = expec_cisajscktalt_SpinGeneral(X, nstate, Xvec, vec, _fp); + info = expec_cisajscktalt_SpinGeneral(X, nstate, Xvec, vec, prod); } return info; } @@ -577,7 +584,7 @@ int expec_cisajscktalt_SpinHalf( tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { //error message will be added - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + zclear(nstate, prod[i]); continue; } @@ -658,7 +665,6 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -707,7 +713,7 @@ int expec_cisajscktalt_SpinGeneral( tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + zclear(nstate, prod[i]); continue; } tmp_Sz = 0; @@ -719,7 +725,7 @@ int expec_cisajscktalt_SpinGeneral( tmp_Sz -= GetLocal2Sz(X->Def.CisAjtCkuAlvDC[i][4 * j + 2] + 1, tmp_org, X->Def.SiteToBit, X->Def.Tpow); } if (tmp_Sz != 0) { // not Sz conserved - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + zclear(nstate, prod[i]); continue; } @@ -784,7 +790,6 @@ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3 } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -807,10 +812,10 @@ int expec_cisajscktalt_SpinGC( ){ int info = 0; if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajscktalt_SpinGCHalf(X, nstate, Xvec, vec, _fp); + info = expec_cisajscktalt_SpinGCHalf(X, nstate, Xvec, vec, prod); } else { - info = expec_cisajscktalt_SpinGCGeneral(X, nstate, Xvec, vec, _fp); + info = expec_cisajscktalt_SpinGCGeneral(X, nstate, Xvec, vec, prod); } return info; } @@ -855,7 +860,7 @@ int expec_cisajscktalt_SpinGCHalf( if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { //error message will be added - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + zclear(nstate, prod[i]); continue; } @@ -947,7 +952,6 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); } return 0; } @@ -994,7 +998,7 @@ int expec_cisajscktalt_SpinGCGeneral( if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { //error message will be added - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, 0.0, 0.0); + zclear(nstate, prod[i]); continue; } @@ -1099,7 +1103,6 @@ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3 } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); - fprintf(*_fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", tmp_org_isite1 - 1, tmp_org_sigma1, tmp_org_isite2 - 1, tmp_org_sigma2, tmp_org_isite3 - 1, tmp_org_sigma3, tmp_org_isite4 - 1, tmp_org_sigma4, creal(dam_pr), cimag(dam_pr)); } return 0; } diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index d8f719ac9..76d907913 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -20,6 +20,7 @@ #include "expec_energy_flct.h" #include "wrapperMPI.h" #include "CalcTime.h" +#include "common/setmemory.h" /** * @brief Parent function to calculate expected values of energy and physical quantities. @@ -31,15 +32,20 @@ * \retval 0 normally finished. * \retval -1 abnormally finished. */ -int expec_energy_flct(struct BindStruct *X){ - - long unsigned int i,j; - long unsigned int irght,ilft,ihfbit; - double complex dam_pr,dam_pr1; +int expec_energy_flct( + struct BindStruct *X, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1 +) { + + long unsigned int i, j; + long unsigned int irght, ilft, ihfbit; + double complex dam_pr, dam_pr1; long unsigned int i_max; int istate; - switch(X->Def.iCalcType){ + switch (X->Def.iCalcType) { case TPQCalc: case TimeEvolution: #ifdef _DEBUG @@ -54,46 +60,46 @@ int expec_energy_flct(struct BindStruct *X){ return -1; } - i_max=X->Check.idim_max; - if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ + i_max = X->Check.idim_max; + if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { return -1; } - X->Large.i_max = i_max; - X->Large.irght = irght; - X->Large.ilft = ilft; - X->Large.ihfbit = ihfbit; - X->Large.mode = M_ENERGY; - for (istate = 0; istate < X->Def.k_exct; istate++) X->Phys.energy[istate]=0.0; + X->Large.i_max = i_max; + X->Large.irght = irght; + X->Large.ilft = ilft; + X->Large.ihfbit = ihfbit; + X->Large.mode = M_ENERGY; + for (istate = 0; istate < nstate; istate++) X->Phys.energy[istate] = 0.0; int nCalcFlct; if (X->Def.iCalcType == TPQCalc) { - nCalcFlct=3201; + nCalcFlct = 3201; } - else{//For FullDiag - nCalcFlct=5301; + else {//For FullDiag + nCalcFlct = 5301; } StartTimer(nCalcFlct); - - switch(X->Def.iCalcModel){ + + switch (X->Def.iCalcModel) { case HubbardGC: - expec_energy_flct_HubbardGC(X); - break; + expec_energy_flct_HubbardGC(X, nstate, tmp_v0); + break; case KondoGC: case Hubbard: case Kondo: - expec_energy_flct_Hubbard(X); - break; - + expec_energy_flct_Hubbard(X, nstate, tmp_v0); + break; + case SpinGC: - if(X->Def.iFlgGeneralSpin == FALSE) { - expec_energy_flct_HalfSpinGC(X); - } - else{//for generalspin - expec_energy_flct_GeneralSpinGC(X); - } - break;/*case SpinGC*/ - /* SpinGCBoost */ + if (X->Def.iFlgGeneralSpin == FALSE) { + expec_energy_flct_HalfSpinGC(X, nstate, tmp_v0); + } + else {//for generalspin + expec_energy_flct_GeneralSpinGC(X, nstate, tmp_v0); + } + break;/*case SpinGC*/ + /* SpinGCBoost */ case Spin: /* if(X->Def.iFlgGeneralSpin == FALSE){ @@ -103,7 +109,7 @@ int expec_energy_flct(struct BindStruct *X){ expec_energy_flct_GeneralSpin(X); } */ - for (istate = 0; istate < X->Def.k_exct; istate++) { + for (istate = 0; istate < nstate; istate++) { X->Phys.doublon[istate] = 0.0; X->Phys.doublon2[istate] = 0.0; X->Phys.num[istate] = X->Def.NsiteMPI; @@ -116,52 +122,52 @@ int expec_energy_flct(struct BindStruct *X){ return -1; } - StopTimer(nCalcFlct); + StopTimer(nCalcFlct); #pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) for (i = 1; i <= i_max; i++) { - for (istate = 0; istate < X->Def.k_exct; istate++){ - v1[i][istate] = v0[i][istate]; - v0[i][istate] = 0.0; + for (istate = 0; istate < nstate; istate++) { + tmp_v1[i][istate] = tmp_v0[i][istate]; + tmp_v0[i][istate] = 0.0; } } int nCalcExpec; - if (X->Def.iCalcType == TPQCalc){ - nCalcExpec=3202; + if (X->Def.iCalcType == TPQCalc) { + nCalcExpec = 3202; } - else{//For FullDiag - nCalcExpec=5302; + else {//For FullDiag + nCalcExpec = 5302; } StartTimer(nCalcExpec); - mltply(X, 1, X->Def.k_exct, v0, v1); // v0+=H*v1 + mltply(X, 1, nstate, tmp_v0, tmp_v1); // v0+=H*v1 StopTimer(nCalcExpec); -/* switch -> SpinGCBoost */ + /* switch -> SpinGCBoost */ - for (istate = 0; istate < X->Def.k_exct; istate++) { + for (istate = 0; istate < nstate; istate++) { X->Phys.energy[istate] = 0.0; X->Phys.var[istate] = 0.0; } - #pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) +#pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < X->Def.k_exct; istate++) { - X->Phys.energy += conj(v1[j][istate])*v0[j][istate]; // E = = - X->Phys.var += conj(v0[j][istate])*v0[j][istate]; // E^2 = = + for (istate = 0; istate < nstate; istate++) { + X->Phys.energy[istate] += conj(tmp_v1[j][istate])*tmp_v0[j][istate]; // E = = + X->Phys.var[istate] += conj(tmp_v0[j][istate])*tmp_v0[j][istate]; // E^2 = = } - } - SumMPI_cv(X->Def.k_exct, X->Phys.energy); - SumMPI_cv(X->Def.k_exct, X->Phys.var); + } + SumMPI_cv(nstate, X->Phys.energy); + SumMPI_cv(nstate, X->Phys.var); - switch(X->Def.iCalcType) { - case TPQCalc: - case TimeEvolution: + switch (X->Def.iCalcType) { + case TPQCalc: + case TimeEvolution: #ifdef _DEBUG - fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); - TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); + fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); + TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); #endif - break; - default: - break; + break; + default: + break; } return 0; } @@ -170,21 +176,26 @@ int expec_energy_flct(struct BindStruct *X){ /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. /// \retval 0 normally finished. /// \retval -1 abnormally finished. -int expec_energy_flct_HubbardGC(struct BindStruct *X) { +int expec_energy_flct_HubbardGC( + struct BindStruct *X, + int nstate, + double complex **tmp_v0 +) { long unsigned int j; long unsigned int isite1; long unsigned int is1_up_a, is1_up_b; long unsigned int is1_down_a, is1_down_b; - int bit_up, bit_down, bit_D; + int bit_up, bit_down, bit_D, istate; long unsigned int ibit_up, ibit_down, ibit_D; double D, tmp_D, tmp_D2; double N, tmp_N, tmp_N2; double Sz, tmp_Sz, tmp_Sz2; - double tmp_v02; + double *tmp_v02; long unsigned int i_max; unsigned int l_ibit1, u_ibit1, i_32; i_max = X->Check.idim_max; + tmp_v02 = d_1d_allocate(nstate); i_32 = 0xFFFFFFFF; //2^32 - 1 // tentative doublon tmp_D = 0.0; @@ -194,7 +205,7 @@ int expec_energy_flct_HubbardGC(struct BindStruct *X) { tmp_Sz = 0.0; tmp_Sz2 = 0.0; -//[s] for bit count + //[s] for bit count is1_up_a = 0; is1_up_b = 0; is1_down_a = 0; @@ -209,16 +220,16 @@ int expec_energy_flct_HubbardGC(struct BindStruct *X) { is1_down_b += X->Def.Tpow[2 * isite1 - 1]; } } -//[e] + //[e] #pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2) default(none) shared(v0,list_1) \ firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ private(j, tmp_v02,D,N,Sz,isite1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1,ibit_up,ibit_down,ibit_D) for (j = 1; j <= i_max; j++) { - tmp_v02 = conj(v0[j]) * v0[j]; + for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; bit_up = 0; bit_down = 0; bit_D = 0; -// isite1 > X->Def.Nsite + // isite1 > X->Def.Nsite ibit_up = (unsigned long int) myrank & is1_up_a; u_ibit1 = ibit_up >> 32; l_ibit1 = ibit_up & i_32; @@ -237,7 +248,7 @@ int expec_energy_flct_HubbardGC(struct BindStruct *X) { bit_D += pop(u_ibit1); bit_D += pop(l_ibit1); -// isite1 <= X->Def.Nsite + // isite1 <= X->Def.Nsite ibit_up = (unsigned long int) (j - 1) & is1_up_b; u_ibit1 = ibit_up >> 32; l_ibit1 = ibit_up & i_32; @@ -260,29 +271,30 @@ int expec_energy_flct_HubbardGC(struct BindStruct *X) { N = bit_up + bit_down; Sz = bit_up - bit_down; - tmp_D += tmp_v02 * D; - tmp_D2 += tmp_v02 * D * D; - tmp_N += tmp_v02 * N; - tmp_N2 += tmp_v02 * N * N; - tmp_Sz += tmp_v02 * Sz; - tmp_Sz2 += tmp_v02 * Sz * Sz; + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] += tmp_v02[istate] * D; + X->Phys.doublon2[istate] += tmp_v02[istate] * D * D; + X->Phys.num[istate] += tmp_v02[istate] * N; + X->Phys.num2[istate] += tmp_v02[istate] * N * N; + X->Phys.Sz[istate] += tmp_v02[istate] * Sz; + X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + } + } + SumMPI_dv(nstate, X->Phys.doublon); + SumMPI_dv(nstate, X->Phys.doublon2); + SumMPI_dv(nstate, X->Phys.num); + SumMPI_dv(nstate, X->Phys.num2); + SumMPI_dv(nstate, X->Phys.Sz); + SumMPI_dv(nstate, X->Phys.Sz2); + + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] *= 0.5; + X->Phys.Sz2[istate] *= 0.25; + X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]); + X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - tmp_D = SumMPI_d(tmp_D); - tmp_D2 = SumMPI_d(tmp_D2); - tmp_N = SumMPI_d(tmp_N); - tmp_N2 = SumMPI_d(tmp_N2); - tmp_Sz = SumMPI_d(tmp_Sz); - tmp_Sz2 = SumMPI_d(tmp_Sz2); - - X->Phys.doublon = tmp_D; - X->Phys.doublon2 = tmp_D2; - X->Phys.num = tmp_N; - X->Phys.num2 = tmp_N2; - X->Phys.Sz = tmp_Sz * 0.5; - X->Phys.Sz2 = tmp_Sz2 * 0.25; - X->Phys.num_up = 0.5*(tmp_N + tmp_Sz); - X->Phys.num_down = 0.5*(tmp_N - tmp_Sz); + free_d_1d_allocate(tmp_v02); return 0; } /// @@ -290,364 +302,413 @@ int expec_energy_flct_HubbardGC(struct BindStruct *X) { /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. /// \retval 0 normally finished. /// \retval -1 abnormally finished. -int expec_energy_flct_Hubbard(struct BindStruct *X){ - long unsigned int j; - long unsigned int isite1; - long unsigned int is1_up_a,is1_up_b; - long unsigned int is1_down_a,is1_down_b; - int bit_up,bit_down,bit_D; - - long unsigned int ibit_up,ibit_down,ibit_D; - double D,tmp_D,tmp_D2; - double N,tmp_N,tmp_N2; - double Sz,tmp_Sz, tmp_Sz2; - double tmp_v02; - long unsigned int i_max,tmp_list_1; - unsigned int l_ibit1,u_ibit1,i_32; - i_max=X->Check.idim_max; - - i_32 = (unsigned int)(pow(2,32)-1); - - tmp_D = 0.0; - tmp_D2 = 0.0; - tmp_N = 0.0; - tmp_N2 = 0.0; - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - - //[s] for bit count - is1_up_a = 0; - is1_up_b = 0; - is1_down_a = 0; - is1_down_b = 0; - for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ - if(isite1 > X->Def.Nsite){ - is1_up_a += X->Def.Tpow[2*isite1 - 2]; - is1_down_a += X->Def.Tpow[2*isite1 - 1]; - }else{ - is1_up_b += X->Def.Tpow[2*isite1 - 2]; - is1_down_b += X->Def.Tpow[2*isite1 - 1]; - } +int expec_energy_flct_Hubbard( + struct BindStruct *X, + int nstate, + double complex **tmp_v0 +) { + long unsigned int j; + long unsigned int isite1; + long unsigned int is1_up_a, is1_up_b; + long unsigned int is1_down_a, is1_down_b; + int bit_up, bit_down, bit_D, istate; + + long unsigned int ibit_up, ibit_down, ibit_D; + double D, tmp_D, tmp_D2; + double N, tmp_N, tmp_N2; + double Sz, tmp_Sz, tmp_Sz2; + double *tmp_v02; + long unsigned int i_max, tmp_list_1; + unsigned int l_ibit1, u_ibit1, i_32; + i_max = X->Check.idim_max; + + tmp_v02 = d_1d_allocate(nstate); + i_32 = (unsigned int)(pow(2, 32) - 1); + + tmp_D = 0.0; + tmp_D2 = 0.0; + tmp_N = 0.0; + tmp_N2 = 0.0; + tmp_Sz = 0.0; + tmp_Sz2 = 0.0; + + //[s] for bit count + is1_up_a = 0; + is1_up_b = 0; + is1_down_a = 0; + is1_down_b = 0; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + if (isite1 > X->Def.Nsite) { + is1_up_a += X->Def.Tpow[2 * isite1 - 2]; + is1_down_a += X->Def.Tpow[2 * isite1 - 1]; } -//[e] + else { + is1_up_b += X->Def.Tpow[2 * isite1 - 2]; + is1_down_b += X->Def.Tpow[2 * isite1 - 1]; + } + } + //[e] #pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2) default(none) shared(v0,list_1) \ firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ private(j, tmp_v02,D,N,Sz,isite1,tmp_list_1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1,ibit_up,ibit_down,ibit_D) - for(j = 1; j <= i_max; j++) { - tmp_v02 = conj(v0[j]) * v0[j]; - bit_up = 0; - bit_down = 0; - bit_D = 0; - tmp_list_1 = list_1[j]; -// isite1 > X->Def.Nsite - ibit_up = (unsigned long int) myrank & is1_up_a; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) myrank & is1_down_a; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); - -// isite1 <= X->Def.Nsite - ibit_up = (unsigned long int) tmp_list_1 & is1_up_b; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) tmp_list_1 & is1_down_b; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); - - D = bit_D; - N = bit_up + bit_down; - Sz = bit_up - bit_down; - - tmp_D += tmp_v02 * D; - tmp_D2 += tmp_v02 * D * D; - tmp_N += tmp_v02 * N; - tmp_N2 += tmp_v02 * N * N; - tmp_Sz += tmp_v02 * Sz; - tmp_Sz2 += tmp_v02 * Sz * Sz; - } + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + bit_up = 0; + bit_down = 0; + bit_D = 0; + tmp_list_1 = list_1[j]; + // isite1 > X->Def.Nsite + ibit_up = (unsigned long int) myrank & is1_up_a; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + ibit_down = (unsigned long int) myrank & is1_down_a; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); - tmp_D = SumMPI_d(tmp_D); - tmp_D2 = SumMPI_d(tmp_D2); - tmp_N = SumMPI_d(tmp_N); - tmp_N2 = SumMPI_d(tmp_N2); - tmp_Sz = SumMPI_d(tmp_Sz); - tmp_Sz2 = SumMPI_d(tmp_Sz2); - - X->Phys.doublon = tmp_D; - X->Phys.doublon2 = tmp_D2; - X->Phys.num = tmp_N; - X->Phys.num2 = tmp_N2; - X->Phys.Sz = tmp_Sz*0.5; - X->Phys.Sz2 = tmp_Sz2*0.25; - X->Phys.num_up = 0.5*(tmp_N+tmp_Sz); - X->Phys.num_down = 0.5*(tmp_N-tmp_Sz); - return 0; -} + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); + + // isite1 <= X->Def.Nsite + ibit_up = (unsigned long int) tmp_list_1 & is1_up_b; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + + ibit_down = (unsigned long int) tmp_list_1 & is1_down_b; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); + + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); + + D = bit_D; + N = bit_up + bit_down; + Sz = bit_up - bit_down; + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] += tmp_v02[istate] * D; + X->Phys.doublon2[istate] += tmp_v02[istate] * D * D; + X->Phys.num[istate] += tmp_v02[istate] * N; + X->Phys.num2[istate] += tmp_v02[istate] * N * N; + X->Phys.Sz[istate] += tmp_v02[istate] * Sz; + X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + } + } + SumMPI_dv(nstate, X->Phys.doublon); + SumMPI_dv(nstate, X->Phys.doublon2); + SumMPI_dv(nstate, X->Phys.num); + SumMPI_dv(nstate, X->Phys.num2); + SumMPI_dv(nstate, X->Phys.Sz); + SumMPI_dv(nstate, X->Phys.Sz2); + + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] *= 0.5; + X->Phys.Sz2[istate] *= 0.25; + X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]); + X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); + } + + free_d_1d_allocate(tmp_v02); + return 0; +} /// /// \brief Calculate expected values of energies and physical quantities for Half-SpinGC model /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. /// \retval 0 normally finished. /// \retval -1 abnormally finished. -int expec_energy_flct_HalfSpinGC(struct BindStruct *X){ - long unsigned int j; - long unsigned int isite1; - long unsigned int is1_up_a,is1_up_b; +int expec_energy_flct_HalfSpinGC( + struct BindStruct *X, + int nstate, + double complex **tmp_v0 +) { + long unsigned int j; + long unsigned int isite1; + long unsigned int is1_up_a, is1_up_b; + + long unsigned int ibit1; + double Sz, tmp_Sz, tmp_Sz2; + double *tmp_v02; + long unsigned int i_max; + unsigned int l_ibit1, u_ibit1, i_32; + int istate; - long unsigned int ibit1; - double Sz,tmp_Sz, tmp_Sz2; - double tmp_v02; - long unsigned int i_max; - unsigned int l_ibit1,u_ibit1,i_32; - i_max=X->Check.idim_max; + i_max = X->Check.idim_max; - i_32 = 0xFFFFFFFF; //2^32 - 1 + tmp_v02 = d_1d_allocate(nstate); + i_32 = 0xFFFFFFFF; //2^32 - 1 - // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - -//[s] for bit count - is1_up_a = 0; - is1_up_b = 0; - for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ - if(isite1 > X->Def.Nsite){ - is1_up_a += X->Def.Tpow[isite1 - 1]; - }else{ - is1_up_b += X->Def.Tpow[isite1 - 1]; - } + // tentative doublon + tmp_Sz = 0.0; + tmp_Sz2 = 0.0; + + //[s] for bit count + is1_up_a = 0; + is1_up_b = 0; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + if (isite1 > X->Def.Nsite) { + is1_up_a += X->Def.Tpow[isite1 - 1]; + } + else { + is1_up_b += X->Def.Tpow[isite1 - 1]; } -//[e] + } + //[e] #pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0) \ firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1) - for(j = 1; j <= i_max; j++){ - tmp_v02 = conj(v0[j])*v0[j]; - Sz = 0.0; - -// isite1 > X->Def.Nsite - ibit1 = (unsigned long int) myrank & is1_up_a; - u_ibit1 = ibit1 >> 32; - l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); -// isite1 <= X->Def.Nsite - ibit1 = (unsigned long int) (j-1)&is1_up_b; - u_ibit1 = ibit1 >> 32; - l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); - Sz = 2*Sz-X->Def.NsiteMPI; - - tmp_Sz += Sz*tmp_v02; - tmp_Sz2 += Sz*Sz*tmp_v02; + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; + + // isite1 > X->Def.Nsite + ibit1 = (unsigned long int) myrank & is1_up_a; + u_ibit1 = ibit1 >> 32; + l_ibit1 = ibit1 & i_32; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + // isite1 <= X->Def.Nsite + ibit1 = (unsigned long int) (j - 1)&is1_up_b; + u_ibit1 = ibit1 >> 32; + l_ibit1 = ibit1 & i_32; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + Sz = 2 * Sz - X->Def.NsiteMPI; + + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] += tmp_v02[istate] * Sz; + X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; } - tmp_Sz = SumMPI_d(tmp_Sz); - tmp_Sz2 = SumMPI_d(tmp_Sz2); - - X->Phys.doublon = 0.0; - X->Phys.doublon2 = 0.0; - X->Phys.num = X->Def.NsiteMPI; - X->Phys.num2 = X->Def.NsiteMPI*X->Def.NsiteMPI; - X->Phys.Sz = tmp_Sz*0.5; - X->Phys.Sz2 = tmp_Sz2*0.25; - X->Phys.num_up = 0.5*(X->Def.NsiteMPI+tmp_Sz); - X->Phys.num_down = 0.5*(X->Def.NsiteMPI-tmp_Sz); - - return 0; -} + } + SumMPI_dv(nstate, X->Phys.Sz); + SumMPI_dv(nstate, X->Phys.Sz2); + + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = X->Def.NsiteMPI; + X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; + X->Phys.Sz[istate] *= 0.5; + X->Phys.Sz2[istate] *= 0.25; + X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]); + X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); + } + free_d_1d_allocate(tmp_v02); + return 0; +} /// /// \brief Calculate expected values of energies and physical quantities for General-SpinGC model /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. /// \retval 0 normally finished. /// \retval -1 abnormally finished. -int expec_energy_flct_GeneralSpinGC(struct BindStruct *X){ - long unsigned int j; - long unsigned int isite1; - - double Sz,tmp_Sz, tmp_Sz2; - double tmp_v02; - long unsigned int i_max; - i_max=X->Check.idim_max; +int expec_energy_flct_GeneralSpinGC( + struct BindStruct *X, + int nstate, + double complex **tmp_v0 +) { + long unsigned int j; + long unsigned int isite1; + int istate; + double Sz, tmp_Sz, tmp_Sz2; + double *tmp_v02; + long unsigned int i_max; - // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - - for(j = 1; j <= i_max; j++){ - tmp_v02 = conj(v0[j])*v0[j]; - Sz = 0.0; - for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ - //prefactor 0.5 is added later. - if(isite1 > X->Def.Nsite){ - Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); - }else{ - Sz += GetLocal2Sz(isite1, j-1, X->Def.SiteToBit, X->Def.Tpow); - } - } - tmp_Sz += Sz*tmp_v02; - tmp_Sz2 += Sz*Sz*tmp_v02; - } + tmp_v02 = d_1d_allocate(nstate); + i_max = X->Check.idim_max; - tmp_Sz = SumMPI_d(tmp_Sz); - tmp_Sz2 = SumMPI_d(tmp_Sz2); + // tentative doublon + tmp_Sz = 0.0; + tmp_Sz2 = 0.0; - X->Phys.doublon = 0.0; - X->Phys.doublon2 = 0.0; - X->Phys.num = X->Def.NsiteMPI; - X->Phys.num2 = X->Def.NsiteMPI*X->Def.NsiteMPI; - X->Phys.Sz = tmp_Sz*0.5; - X->Phys.Sz2 = tmp_Sz2*0.25; - X->Phys.num_up = 0.5*(X->Def.NsiteMPI+tmp_Sz); - X->Phys.num_down = 0.5*(X->Def.NsiteMPI-tmp_Sz); + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + //prefactor 0.5 is added later. + if (isite1 > X->Def.Nsite) { + Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); + } + else { + Sz += GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); + } + } + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] += tmp_v02[istate] * Sz; + X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + } + } + SumMPI_dv(nstate, X->Phys.Sz); + SumMPI_dv(nstate, X->Phys.Sz2); + + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = X->Def.NsiteMPI; + X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; + X->Phys.Sz[istate] *= 0.5; + X->Phys.Sz2[istate] *= 0.25; + X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]); + X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); + } - return 0; + free_d_1d_allocate(tmp_v02); + return 0; } - /// /// \brief Calculate expected values of energies and physical quantities for Half-Spin model /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. /// \retval 0 normally finished. /// \retval -1 abnormally finished. -int expec_energy_flct_HalfSpin(struct BindStruct *X){ +int expec_energy_flct_HalfSpin( + struct BindStruct *X, + int nstate, + double complex **tmp_v0 +) { long unsigned int j; long unsigned int isite1; - long unsigned int is1_up_a,is1_up_b; + long unsigned int is1_up_a, is1_up_b; long unsigned int ibit1; - double Sz,tmp_Sz, tmp_Sz2; - double tmp_v02; + double Sz, tmp_Sz, tmp_Sz2; + double *tmp_v02; long unsigned int i_max, tmp_list_1; - unsigned int l_ibit1,u_ibit1,i_32; - i_max=X->Check.idim_max; + unsigned int l_ibit1, u_ibit1, i_32; + int istate; + i_max = X->Check.idim_max; + tmp_v02 = d_1d_allocate(nstate); i_32 = 0xFFFFFFFF; //2^32 - 1 // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; + tmp_Sz = 0.0; + tmp_Sz2 = 0.0; -//[s] for bit count + //[s] for bit count is1_up_a = 0; is1_up_b = 0; - for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ - if(isite1 > X->Def.Nsite){ + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + if (isite1 > X->Def.Nsite) { is1_up_a += X->Def.Tpow[isite1 - 1]; - }else{ + } + else { is1_up_b += X->Def.Tpow[isite1 - 1]; } } -//[e] + //[e] #pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0, list_1) \ firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1, tmp_list_1) - for(j = 1; j <= i_max; j++){ - tmp_v02 = conj(v0[j])*v0[j]; - Sz = 0.0; + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; tmp_list_1 = list_1[j]; -// isite1 > X->Def.Nsite - ibit1 = (unsigned long int) myrank & is1_up_a; + // isite1 > X->Def.Nsite + ibit1 = (unsigned long int) myrank & is1_up_a; u_ibit1 = ibit1 >> 32; l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); -// isite1 <= X->Def.Nsite - ibit1 = (unsigned long int) tmp_list_1 &is1_up_b; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + // isite1 <= X->Def.Nsite + ibit1 = (unsigned long int) tmp_list_1 &is1_up_b; u_ibit1 = ibit1 >> 32; l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); - Sz = 2*Sz-X->Def.NsiteMPI; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + Sz = 2 * Sz - X->Def.NsiteMPI; - tmp_Sz += Sz*tmp_v02; - tmp_Sz2 += Sz*Sz*tmp_v02; + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] += tmp_v02[istate] * Sz; + X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + } + } + SumMPI_dv(nstate, X->Phys.Sz); + SumMPI_dv(nstate, X->Phys.Sz2); + + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = X->Def.NsiteMPI; + X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; + X->Phys.Sz[istate] *= 0.5; + X->Phys.Sz2[istate] *= 0.25; + X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]); + X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - tmp_Sz = SumMPI_d(tmp_Sz); - tmp_Sz2 = SumMPI_d(tmp_Sz2); - - X->Phys.doublon = 0.0; - X->Phys.doublon2 = 0.0; - X->Phys.num = X->Def.NsiteMPI; - X->Phys.num2 = X->Def.NsiteMPI*X->Def.NsiteMPI; - X->Phys.Sz = tmp_Sz*0.5; - X->Phys.Sz2 = tmp_Sz2*0.25; - X->Phys.num_up = 0.5*(X->Def.NsiteMPI+tmp_Sz); - X->Phys.num_down = 0.5*(X->Def.NsiteMPI-tmp_Sz); + free_d_1d_allocate(tmp_v02); return 0; } - /// /// \brief Calculate expected values of energies and physical quantities for General-Spin model /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. /// \retval 0 normally finished. /// \retval -1 abnormally finished. -int expec_energy_flct_GeneralSpin(struct BindStruct *X){ +int expec_energy_flct_GeneralSpin( + struct BindStruct *X, + int nstate, + double complex **tmp_v0 +) { long unsigned int j; long unsigned int isite1; - - double Sz,tmp_Sz, tmp_Sz2; - double tmp_v02; + int istate; + double Sz, tmp_Sz, tmp_Sz2; + double *tmp_v02; long unsigned int i_max, tmp_list1; - i_max=X->Check.idim_max; + + tmp_v02 = d_1d_allocate(nstate); + i_max = X->Check.idim_max; // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; + tmp_Sz = 0.0; + tmp_Sz2 = 0.0; #pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0, list_1) \ firstprivate(i_max,X,myrank) private(j,Sz,isite1,tmp_v02, tmp_list1) - for(j = 1; j <= i_max; j++){ - tmp_v02 = conj(v0[j])*v0[j]; - Sz = 0.0; + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; tmp_list1 = list_1[j]; - for(isite1=1;isite1<=X->Def.NsiteMPI;isite1++){ + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { //prefactor 0.5 is added later. - if(isite1 > X->Def.Nsite){ + if (isite1 > X->Def.Nsite) { Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); - }else{ + } + else { Sz += GetLocal2Sz(isite1, tmp_list1, X->Def.SiteToBit, X->Def.Tpow); } } - tmp_Sz += Sz*tmp_v02; - tmp_Sz2 += Sz*Sz*tmp_v02; + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] += tmp_v02[istate] * Sz; + X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + } + } + SumMPI_dv(nstate, X->Phys.Sz); + SumMPI_dv(nstate, X->Phys.Sz2); + + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = X->Def.NsiteMPI; + X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; + X->Phys.Sz[istate] *= 0.5; + X->Phys.Sz2[istate] *= 0.25; + X->Phys.num_up[istate] = 0.5*(X->Phys.num[istate] + X->Phys.Sz[istate]); + X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - tmp_Sz = SumMPI_d(tmp_Sz); - tmp_Sz2 = SumMPI_d(tmp_Sz2); - - X->Phys.doublon = 0.0; - X->Phys.doublon2 = 0.0; - X->Phys.num = X->Def.NsiteMPI; - X->Phys.num2 = X->Def.NsiteMPI*X->Def.NsiteMPI; - X->Phys.Sz = tmp_Sz*0.5; - X->Phys.Sz2 = tmp_Sz2*0.25; - X->Phys.num_up = 0.5*(X->Def.NsiteMPI+tmp_Sz); - X->Phys.num_down = 0.5*(X->Def.NsiteMPI-tmp_Sz); - + free_d_1d_allocate(tmp_v02); return 0; } diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index 51440f1c2..bcc565280 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -20,7 +20,6 @@ #include "mltplyMPISpin.h" #include "mltplyMPISpinCore.h" #include "expec_totalspin.h" - /** * @file expec_totalspin.c * @@ -34,8 +33,6 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) * */ - - /** * @brief Parent function of calculation of total spin * @@ -49,30 +46,34 @@ int expec_totalspin ( struct BindStruct *X, - int nstate, + int nstate, double complex **vec - ) +) { + int istate; + X->Large.mode = M_TOTALS; - switch(X->Def.iCalcModel){ + switch (X->Def.iCalcModel) { case Spin: - totalspin_Spin(X,vec); - X->Phys.Sz=X->Def.Total2SzMPI/2.; - break; + totalspin_Spin(X, nstate, vec); + X->Phys.Sz = X->Def.Total2SzMPI / 2.; + break; case SpinGC: - totalspin_SpinGC(X,vec); - break; - case Hubbard: - case Kondo: - totalspin_Hubbard(X,vec); - break; + totalspin_SpinGC(X, nstate, vec); + break; + case Hubbard: + case Kondo: + totalspin_Hubbard(X, nstate, vec); + break; case HubbardGC: case KondoGC: - totalspin_HubbardGC(X,vec); - break; + totalspin_HubbardGC(X, nstate, vec); + break; default: - X->Phys.s2=0.0; - X->Phys.Sz=0.0; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + } } return 0; } @@ -87,7 +88,7 @@ int expec_totalspin */ void totalspin_Hubbard( struct BindStruct *X, - int nstate, + int nstate, double complex **vec ) { long unsigned int j; @@ -115,7 +116,10 @@ void totalspin_Hubbard( is2_up = X->Def.Tpow[2 * isite2 - 2]; is2_down = X->Def.Tpow[2 * isite2 - 1]; -#pragma omp parallel for reduction(+:spn, spn_z) default(none) firstprivate(i_max, is1_up, is1_down, is2_up, is2_down, irght, ilft, ihfbit, isite1, isite2) private(ibit1_up, num1_up, ibit2_up, num2_up, ibit1_down, num1_down, ibit2_down, num2_down, tmp_spn_z, iexchg, off) shared(vec, list_1, list_2_1, list_2_2) +#pragma omp parallel for reduction(+:spn_z) default(none) \ +firstprivate(i_max, is1_up, is1_down, is2_up, is2_down, irght, ilft, ihfbit, isite1, isite2) \ +private(ibit1_up, num1_up, ibit2_up, num2_up, ibit1_down, num1_down, ibit2_down, num2_down, tmp_spn_z, iexchg, off) \ +shared(vec, list_1, list_2_1, list_2_2) for (j = 1; j <= i_max; j++) { ibit1_up = list_1[j] & is1_up; @@ -136,14 +140,16 @@ void totalspin_Hubbard( X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * (num1_up + num1_down - 2 * num1_up * num1_down) / 2.0; X->Phys.Sz[istate] += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; } - } else { + } + else { if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1[j] - (is1_up + is2_down); iexchg += (is2_up + is1_down); GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off); - for (istate = 0; istate < nstate; istate++) + for (istate = 0; istate < nstate; istate++) X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] / 2.0; - } else if (ibit1_up == 0 && ibit1_down != 0 && ibit2_up != 0 && ibit2_down == 0) { + } + else if (ibit1_up == 0 && ibit1_down != 0 && ibit2_up != 0 && ibit2_down == 0) { iexchg = list_1[j] - (is1_down + is2_up); iexchg += (is2_down + is1_up); GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off); @@ -154,6 +160,8 @@ void totalspin_Hubbard( } } } + SumMPI_dv(nstate, X->Phys.s2); + SumMPI_dv(nstate, X->Phys.Sz); } /** * @brief function of calculating totalspin for Hubbard model in grand canonical ensemble @@ -166,7 +174,7 @@ void totalspin_Hubbard( */ void totalspin_HubbardGC( struct BindStruct *X, - int nstate, + int nstate, double complex **vec ) { long unsigned int j; @@ -192,7 +200,10 @@ void totalspin_HubbardGC( is2_up = X->Def.Tpow[2 * isite2 - 2]; is2_down = X->Def.Tpow[2 * isite2 - 1]; -#pragma omp parallel for reduction(+:spn, spn_z) default(none) firstprivate(i_max, is1_up, is1_down, is2_up, is2_down, isite1, isite2) private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, ibit1_down, num1_down, ibit2_down, num2_down, tmp_spn_z, iexchg, off) shared(vec) +#pragma omp parallel for reduction(+:spn_z) default(none) \ +firstprivate(i_max, is1_up, is1_down, is2_up, is2_down, isite1, isite2) \ +private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, ibit1_down, num1_down, ibit2_down, num2_down, tmp_spn_z, iexchg, off) \ +shared(vec) for (j = 1; j <= i_max; j++) { list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -209,16 +220,18 @@ void totalspin_HubbardGC( for (istate = 0; istate < nstate; istate++) X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * tmp_spn_z / 4.0; if (isite1 == isite2) { - spn += conj(vec[j][istate]) * vec[j][istate] * (num1_up + num1_down - 2 * num1_up * num1_down) / 2.0; - spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; - } else { + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * (num1_up + num1_down - 2 * num1_up * num1_down) / 2.0; + X->Phys.Sz[istate] += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; + } + else { if (ibit1_up != 0 && ibit1_down == 0 && ibit2_up == 0 && ibit2_down != 0) { iexchg = list_1_j - (is1_up + is2_down); iexchg += (is2_up + is1_down); off = iexchg + 1; for (istate = 0; istate < nstate; istate++) X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] / 2.0; - } else if (ibit1_up == 0 && ibit1_down != 0 && ibit2_up != 0 && ibit2_down == 0) { + } + else if (ibit1_up == 0 && ibit1_down != 0 && ibit2_up != 0 && ibit2_down == 0) { iexchg = list_1_j - (is1_down + is2_up); iexchg += (is2_down + is1_up); off = iexchg + 1; @@ -229,6 +242,8 @@ void totalspin_HubbardGC( } } } + SumMPI_dv(nstate, X->Phys.s2); + SumMPI_dv(nstate, X->Phys.Sz); } /** * @brief function of calculating totalspin for spin model @@ -241,8 +256,11 @@ void totalspin_HubbardGC( * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { - +void totalspin_Spin( + struct BindStruct *X, + int nstate, + double complex **vec +) { long unsigned int j; long unsigned int irght, ilft, ihfbit; long unsigned int isite1, isite2; @@ -253,19 +271,19 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { int num1_up, num2_up; int num1_down, num2_down; - int sigma_1, sigma_2; + int sigma_1, sigma_2, istate; long unsigned int ibit1_up, ibit2_up, ibit_tmp, is_up; - double complex spn_z = 0.0; - double complex spn_z1, spn_z2, spn_zd; - double complex spn = 0.0; + double complex spn_z1, spn_z2; long unsigned int i_max; + double spn_z; i_max = X->Check.idim_max; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + } if (X->Def.iFlgGeneralSpin == FALSE) { GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit); - spn = 0.0; - spn_z = 0.0; - spn_zd = 0.0; for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { for (isite2 = 1; isite2 <= X->Def.NsiteMPI; isite2++) { @@ -283,24 +301,29 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { #pragma omp parallel for default(none) reduction (+:spn_zd) shared(vec) \ firstprivate(i_max, spn_z) private(j) for (j = 1; j <= i_max; j++) { - spn_zd += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; } if (isite1 == isite2) { #pragma omp parallel for default(none) reduction (+:spn_zd) shared(vec) \ firstprivate(i_max) private(j) for (j = 1; j <= i_max; j++) { - spn_zd += conj(vec[j][istate]) * vec[j][istate] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] / 2.0; } - } else {//off diagonal - spn += X_child_general_int_spin_TotalS_MPIdouble(isite1 - 1, isite2 - 1, X, vec, vec); + } + else {//off diagonal + spn += X_child_general_int_spin_TotalS_MPIdouble(isite1 - 1, isite2 - 1, X, nstate, vec, vec); } #endif - } else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { + } + else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { #ifdef MPI if (isite1 < isite2) { tmp_isite1 = isite1; tmp_isite2 = isite2; - } else { + } + else { tmp_isite1 = isite2; tmp_isite2 = isite1; } @@ -311,18 +334,23 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { num2_down = 1 - num2_up; //diagonal -#pragma omp parallel for reduction(+: spn_zd) default(none) firstprivate(i_max, is1_up, num2_up, num2_down) private(ibit1_up, num1_up, num1_down, spn_z) shared(list_1, vec) +#pragma omp parallel for reduction(+: spn_zd) default(none) \ +firstprivate(i_max, is1_up, num2_up, num2_down) \ +private(ibit1_up, num1_up, num1_down, spn_z) \ +shared(list_1, vec) for (j = 1; j <= i_max; j++) { ibit1_up = list_1[j] & is1_up; num1_up = ibit1_up / is1_up; num1_down = 1 - num1_up; spn_z = (num1_up - num1_down) * (num2_up - num2_down); - spn_zd += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; } if (isite1 < isite2) { - spn += X_child_general_int_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, vec, vec); - } else { - spn += conj(X_child_general_int_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, vec, vec)); + spn += X_child_general_int_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec); + } + else { + spn += conj(X_child_general_int_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec)); } #endif }//isite1 > Nsite || isite2 > Nsite @@ -331,7 +359,10 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { is2_up = X->Def.Tpow[isite2 - 1]; is_up = is1_up + is2_up; -#pragma omp parallel for reduction(+: spn, spn_zd) default(none) firstprivate(i_max, is_up, is1_up, is2_up, irght, ilft, ihfbit, isite1, isite2) private(ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z, iexchg, off, ibit_tmp) shared(list_1, list_2_1, list_2_2, vec) +#pragma omp parallel for reduction(+: spn_zd) default(none) \ +firstprivate(i_max, is_up, is1_up, is2_up, irght, ilft, ihfbit, isite1, isite2) \ +private(ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z, iexchg, off, ibit_tmp) \ +shared(list_1, list_2_1, list_2_2, vec) for (j = 1; j <= i_max; j++) { ibit1_up = list_1[j] & is1_up; num1_up = ibit1_up / is1_up; @@ -341,11 +372,14 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { num2_down = 1 - num2_up; spn_z = (num1_up - num1_down) * (num2_up - num2_down); - spn_zd += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; if (isite1 == isite2) { - spn_zd += conj(vec[j][istate]) * vec[j][istate] / 2.0; - } else { + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] / 2.0; + } + else { ibit_tmp = (num1_up) ^ (num2_up); if (ibit_tmp != 0) { iexchg = list_1[j] ^ (is_up); @@ -367,47 +401,53 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { S1 = 0.5 * (X->Def.SiteToBit[isite1 - 1] - 1); S2 = 0.5 * (X->Def.SiteToBit[isite2 - 1] - 1); if (isite1 == isite2) { -#pragma omp parallel for reduction(+: spn, spn_z) default(none) firstprivate(i_max, isite1, X, S1) private (spn_z1)shared(vec, list_1) +#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, isite1, X, S1) private (spn_z1)shared(vec, list_1) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j][istate]) * vec[j][istate] * S1 * (S1 + 1.0); - spn_z += conj(vec[j][istate]) * vec[j][istate] * spn_z1; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * S1 * (S1 + 1.0); + X->Phys.Sz[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z1; + } } - } else { -#pragma omp parallel for reduction(+: spn) default(none) firstprivate(i_max, isite1, isite2, X, S1, S2) private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) shared(vec, list_1) + } + else { +#pragma omp parallel for reduction(+: ) default(none) \ +firstprivate(i_max, isite1, isite2, X, S1, S2) \ +private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) shared(vec, list_1) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); spn_z2 = 0.5 * GetLocal2Sz(isite2, list_1[j], X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j][istate]) * vec[j][istate] * spn_z1 * spn_z2; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z1 * spn_z2; sigma_1 = GetBitGeneral(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); sigma_2 = GetBitGeneral(isite2, list_1[j], X->Def.SiteToBit, X->Def.Tpow); ibit_tmp = GetOffCompGeneralSpin(list_1[j], isite2, sigma_2, sigma_2 + 1, &off, X->Def.SiteToBit, - X->Def.Tpow); + X->Def.Tpow); if (ibit_tmp == TRUE) { ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 - 1, &off_2, X->Def.SiteToBit, - X->Def.Tpow); + X->Def.Tpow); if (ibit_tmp == TRUE) { ConvertToList1GeneralSpin(off_2, X->Check.sdim, &off); for (istate = 0; istate < nstate; istate++) - X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] - * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 + 1)) * - sqrt(S1 * (S1 + 1) - spn_z1 * (spn_z1 - 1)) / 2.0; + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] + * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 + 1)) + * sqrt(S1 * (S1 + 1) - spn_z1 * (spn_z1 - 1)) / 2.0; } } ibit_tmp = GetOffCompGeneralSpin(list_1[j], isite2, sigma_2, sigma_2 - 1, &off, X->Def.SiteToBit, - X->Def.Tpow); + X->Def.Tpow); if (ibit_tmp == TRUE) { ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 + 1, &off_2, X->Def.SiteToBit, - X->Def.Tpow); + X->Def.Tpow); if (ibit_tmp == TRUE) { ConvertToList1GeneralSpin(off_2, X->Check.sdim, &off); for (istate = 0; istate < nstate; istate++) - X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] - * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 - 1.0)) * - sqrt(S1 * (S1 + 1) - spn_z1 * (spn_z1 + 1)) / 2.0; + X->Phys.s2[istate] += conj(vec[j][istate]) * vec[off][istate] + * sqrt(S2 * (S2 + 1) - spn_z2 * (spn_z2 - 1.0)) + * sqrt(S1 * (S1 + 1) - spn_z1 * (spn_z1 + 1)) / 2.0; } } } @@ -415,13 +455,8 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { } } } - - spn = SumMPI_dc(spn); - spn_zd = SumMPI_dc(spn_zd); - spn_z = SumMPI_dc(spn_z); - spn += spn_zd; - X->Phys.s2 = creal(spn); - X->Phys.Sz = creal(spn_z); + SumMPI_dv(nstate, X->Phys.s2); + SumMPI_dv(nstate, X->Phys.Sz); } /** * @brief function of calculating totalspin for spin model in grand canonical ensemble @@ -435,27 +470,31 @@ void totalspin_Spin(struct BindStruct *X,int nstate, double complex **vec) { * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ - +void totalspin_SpinGC( + struct BindStruct *X, + int nstate, + double complex **vec +) { long unsigned int j; - long unsigned int isite1,isite2, tmp_isite1, tmp_isite2; - long unsigned int is1_up,is2_up; + long unsigned int isite1, isite2, tmp_isite1, tmp_isite2; + long unsigned int is1_up, is2_up; long unsigned int iexchg, off, off_2; - int num1_up,num2_up; - int num1_down,num2_down; + int num1_up, num2_up, istate; + int num1_down, num2_down; int sigma_1, sigma_2; - long unsigned int ibit1_up,ibit2_up,ibit_tmp,is_up; + long unsigned int ibit1_up, ibit2_up, ibit_tmp, is_up; double complex spn_z; double complex spn_z1, spn_z2; - double complex spn, spn_d; long unsigned int list_1_j; long unsigned int i_max; - i_max=X->Check.idim_max; + + i_max = X->Check.idim_max; X->Large.mode = M_TOTALS; - spn=0.0; - spn_d=0.0; - spn_z=0.0; - if(X->Def.iFlgGeneralSpin==FALSE){ + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + } + if (X->Def.iFlgGeneralSpin == FALSE) { for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { if (isite1 > X->Def.Nsite) { is1_up = X->Def.Tpow[isite1 - 1]; @@ -464,10 +503,11 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ num1_down = 1 - num1_up; #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up, num1_up, num1_down) shared(vec) for (j = 1; j <= i_max; j++) { - spn_z += conj(vec[j][istate])*vec[j][istate] * (num1_up - num1_down) / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.Sz[istate] += conj(vec[j][istate])*vec[j][istate] * (num1_up - num1_down) / 2.0; } } - else{ + else { is1_up = X->Def.Tpow[isite1 - 1]; #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up) private(list_1_j, ibit1_up, num1_up, num1_down) shared(vec) for (j = 1; j <= i_max; j++) { @@ -475,7 +515,8 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ ibit1_up = list_1_j & is1_up; num1_up = ibit1_up / is1_up; num1_down = 1 - num1_up; - spn_z += conj(vec[j][istate])*vec[j][istate] * (num1_up - num1_down) / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.Sz[istate] += conj(vec[j][istate])*vec[j][istate] * (num1_up - num1_down) / 2.0; } } for (isite2 = 1; isite2 <= X->Def.NsiteMPI; isite2++) { @@ -488,20 +529,23 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); num2_down = 1 - num2_up; spn_z2 = (num1_up - num1_down)*(num2_up - num2_down) / 4.0; -#pragma omp parallel for default(none) reduction (+:spn_d) shared(vec) \ +#pragma omp parallel for default(none) reduction (+:) shared(vec) \ firstprivate(i_max, spn_z2) private(j) for (j = 1; j <= i_max; j++) { - spn_d += conj(vec[j][istate])*vec[j][istate] * spn_z2; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z2; } if (isite1 == isite2) { -#pragma omp parallel for default(none) reduction (+:spn_d) shared(vec) \ +#pragma omp parallel for default(none) reduction (+:) shared(vec) \ firstprivate(i_max) private(j) for (j = 1; j <= i_max; j++) { - spn_d += conj(vec[j][istate])*vec[j][istate] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] / 2.0; } }//isite1 = isite2 else {//off diagonal - spn += X_GC_child_CisAitCiuAiv_spin_MPIdouble(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, vec, vec) / 2.0; + spn += X_GC_child_CisAitCiuAiv_spin_MPIdouble( + isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec) / 2.0; } } else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { @@ -518,26 +562,32 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); num2_down = 1 - num2_up; //diagonal -#pragma omp parallel for reduction(+: spn_d) default(none) firstprivate(i_max, is1_up, num2_up, num2_down) private(ibit1_up, num1_up, num1_down, spn_z2, list_1_j) shared(vec) +#pragma omp parallel for reduction(+: ) default(none) \ +firstprivate(i_max, is1_up, num2_up, num2_down) \ +private(ibit1_up, num1_up, num1_down, spn_z2, list_1_j) shared(vec) for (j = 1; j <= i_max; j++) { list_1_j = j - 1; ibit1_up = list_1_j & is1_up; num1_up = ibit1_up / is1_up; num1_down = 1 - num1_up; spn_z2 = (num1_up - num1_down)*(num2_up - num2_down); - spn_d += conj(vec[j][istate])*vec[j][istate] * spn_z2 / 4.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z2 / 4.0; } if (isite1 < isite2) { - spn += X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, vec, vec) / 2.0; + spn += X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec) / 2.0; } else { - spn += conj(X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, vec, vec)) / 2.0; + spn += conj(X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec)) / 2.0; } } else { is2_up = X->Def.Tpow[isite2 - 1]; is_up = is1_up + is2_up; -#pragma omp parallel for reduction(+: spn, spn_d) default(none) firstprivate(i_max, is_up, is1_up, is2_up, isite1, isite2) private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z2, iexchg, off, ibit_tmp) shared(vec) +#pragma omp parallel for reduction(+: ) default(none) \ +firstprivate(i_max, is_up, is1_up, is2_up, isite1, isite2) \ +private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z2, iexchg, off, ibit_tmp) \ +shared(vec) for (j = 1; j <= i_max; j++) { list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -548,17 +598,20 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ num2_down = 1 - num2_up; spn_z2 = (num1_up - num1_down)*(num2_up - num2_down); - spn_d += conj(vec[j][istate])*vec[j][istate] * spn_z2 / 4.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z2 / 4.0; if (isite1 == isite2) { - spn_d += conj(vec[j][istate])*vec[j][istate] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] / 2.0; } else { ibit_tmp = (num1_up) ^ (num2_up); if (ibit_tmp != 0) { iexchg = list_1_j ^ (is_up); off = iexchg + 1; - spn += conj(vec[j][istate])*vec[off][istate] / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[off][istate] / 2.0; } } }//j @@ -567,29 +620,31 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ } } else {//general spin - double S1 = 0; - double S2 = 0; - spn = 0.0; - spn_z = 0.0; - for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { - S1=0.5*(X->Def.SiteToBit[isite1-1]-1); - if(isite1 > X->Def.Nsite){ + double S1 = 0; + double S2 = 0; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + S1 = 0.5*(X->Def.SiteToBit[isite1 - 1] - 1); + if (isite1 > X->Def.Nsite) { spn_z1 = 0.5*GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); -#pragma omp parallel for reduction(+: spn, spn_z) default(none) firstprivate(S1, spn_z1,i_max) shared(vec) +#pragma omp parallel for reduction(+: ) default(none) firstprivate(S1, spn_z1,i_max) shared(vec) for (j = 1; j <= i_max; j++) { - spn += conj(vec[j][istate])*vec[j][istate] * S1*(S1 + 1.0); - spn_z += conj(vec[j][istate])*vec[j][istate] * spn_z1; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * S1*(S1 + 1.0); + X->Phys.Sz[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z1; + } } } - else{ -#pragma omp parallel for reduction(+: spn, spn_z) default(none) firstprivate(i_max, isite1, X, S1) private(spn_z1) shared(vec) + else { +#pragma omp parallel for reduction(+: ) default(none) firstprivate(i_max, isite1, X, S1) private(spn_z1) shared(vec) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5*GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j][istate])*vec[j][istate] * S1*(S1 + 1.0); - spn_z += conj(vec[j][istate])*vec[j][istate] * spn_z1; + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * S1*(S1 + 1.0); + X->Phys.Sz[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z1; + } } } - for(isite2=1;isite2<=X->Def.NsiteMPI;isite2++){ + for (isite2 = 1; isite2 <= X->Def.NsiteMPI; isite2++) { if (isite1 == isite2) continue; S2 = 0.5*(X->Def.SiteToBit[isite2 - 1] - 1); if (isite1 > X->Def.Nsite && isite2 > X->Def.Nsite) { @@ -597,11 +652,15 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { } else { //inner-process -#pragma omp parallel for reduction(+: spn) default(none) firstprivate(i_max, isite1, isite2, X, S1, S2) private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) shared(vec) +#pragma omp parallel for reduction(+: ) default(none) \ +firstprivate(i_max, isite1, isite2, X, S1, S2) \ +private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) \ +shared(vec) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5*GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); spn_z2 = 0.5*GetLocal2Sz(isite2, j - 1, X->Def.SiteToBit, X->Def.Tpow); - spn += conj(vec[j][istate])*vec[j][istate] * spn_z1*spn_z2; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z1*spn_z2; sigma_1 = GetBitGeneral(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); sigma_2 = GetBitGeneral(isite2, j - 1, X->Def.SiteToBit, X->Def.Tpow); @@ -610,14 +669,20 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ if (ibit_tmp != 0) { ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 - 1, &off_2, X->Def.SiteToBit, X->Def.Tpow); if (ibit_tmp != 0) { - spn += conj(vec[j][istate])*vec[off_2 + 1] * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 + 1))*sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 - 1)) / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[off_2 + 1] + * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 + 1)) + * sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 - 1)) / 2.0; } } ibit_tmp = GetOffCompGeneralSpin(j - 1, isite2, sigma_2, sigma_2 - 1, &off, X->Def.SiteToBit, X->Def.Tpow); if (ibit_tmp != 0) { ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 + 1, &off_2, X->Def.SiteToBit, X->Def.Tpow); if (ibit_tmp != 0) { - spn += conj(vec[j][istate])*vec[off_2 + 1] * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 - 1.0))*sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 + 1)) / 2.0; + for (istate = 0; istate < nstate; istate++) + X->Phys.s2[istate] += conj(vec[j][istate])*vec[off_2 + 1] + * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 - 1.0)) + * sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 + 1)) / 2.0; } } }//j @@ -625,168 +690,6 @@ void totalspin_SpinGC(struct BindStruct *X,int nstate, double complex **vec){ }//isite2 }//isite1 } - spn = SumMPI_dc(spn); - spn_d = SumMPI_dc(spn_d); - spn_z = SumMPI_dc(spn_z); - X->Phys.s2=creal(spn+spn_d); - X->Phys.Sz=creal(spn_z); -} - -int expec_totalSz( - struct BindStruct *X, - int nstate, - double complex **vec -) { - X->Large.mode = M_TOTALS; - switch (X->Def.iCalcModel) { - case Spin: - X->Phys.Sz = X->Def.Total2SzMPI / 2.; - break; - case SpinGC: - totalSz_SpinGC(X, vec); - break; - case Hubbard: - case Kondo: - X->Phys.Sz = X->Def.Total2SzMPI / 2.; - - break; - case HubbardGC: - case KondoGC: - totalSz_HubbardGC(X, vec); - break; - default: - X->Phys.Sz = 0.0; - } - return 0; -} -/** - * @brief function of calculating totalSz for Hubbard model in grand canonical ensemble - * - * @param[in,out] X data list of calculation parameters - * @param vec eigenvector - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -void totalSz_HubbardGC -( - struct BindStruct *X, - int nstate, double complex **vec - ) { - - long unsigned int j; - long unsigned int isite1; - long unsigned int is1_up, is1_down; - int num1_up, num1_down, num1_sz; - long unsigned int ibit1_up, ibit1_down, list_1_j; - double complex spn_z; - long unsigned int i_max; - - i_max = X->Check.idim_max; - spn_z = 0.0; - for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { - if (isite1 > X->Def.Nsite) { -#ifdef MPI - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; - ibit1_up = (unsigned long int) myrank & is1_up; - num1_up = ibit1_up / is1_up; - ibit1_down = (unsigned long int) myrank & is1_down; - num1_down = ibit1_down / is1_down; - num1_sz = num1_up - num1_down; -#pragma omp parallel for reduction(+:spn_z) default(none) firstprivate(i_max) private(j) shared(num1_sz,vec) - for (j = 1; j <= i_max; j++) { - spn_z += (num1_sz) / 2.0 * conj(vec[j][istate]) * vec[j][istate]; - } -#endif - } else {//isite1 > X->Def.Nsite - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; -#pragma omp parallel for reduction(+:spn_z) default(none) firstprivate(i_max, is1_up, is1_down, isite1) \ - private(list_1_j, ibit1_up, num1_up, ibit1_down, num1_down) shared(vec) - for (j = 1; j <= i_max; j++) { - list_1_j = j - 1; - ibit1_up = list_1_j & is1_up; - num1_up = ibit1_up / is1_up; - ibit1_down = list_1_j & is1_down; - num1_down = ibit1_down / is1_down; - spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; - } - } - } - spn_z = SumMPI_dc(spn_z); - X->Phys.Sz = creal(spn_z); -} -/** - * @brief function of calculating totalSz for Spin model in grand canonical ensemble - * - * @param[in,out] X data list of calculation parameters - * @param vec eigenvector - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -void totalSz_SpinGC -( - struct BindStruct *X, - int nstate, - double complex **vec - ) { - long unsigned int j, list_1_j; - long unsigned int isite1; - long unsigned int is1_up; - int num1_up; - int num1_down; - long unsigned int ibit1_up; - double complex spn_z, spn_z1; - long unsigned int i_max; - i_max = X->Check.idim_max; - X->Large.mode = M_TOTALS; - spn_z = 0.0; - if (X->Def.iFlgGeneralSpin == FALSE) { - for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { - if (isite1 > X->Def.Nsite) { -#ifdef MPI - is1_up = X->Def.Tpow[isite1 - 1]; - ibit1_up = myrank & is1_up; - num1_up = ibit1_up / is1_up; - num1_down = 1 - num1_up; -#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up, num1_up, num1_down) shared(vec) - for (j = 1; j <= i_max; j++) { - spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; - } -#endif - } - else { - is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up) private(list_1_j, ibit1_up, num1_up, num1_down) shared(vec) - for (j = 1; j <= i_max; j++) { - list_1_j = j - 1; - ibit1_up = list_1_j & is1_up; - num1_up = ibit1_up / is1_up; - num1_down = 1 - num1_up; - spn_z += conj(vec[j][istate]) * vec[j][istate] * (num1_up - num1_down) / 2.0; - } - }//else - } - } else {//general spin - spn_z = 0.0; - for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { - if (isite1 > X->Def.Nsite) { - spn_z1 = 0.5 * GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); -#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(spn_z1, i_max) shared(vec) - for (j = 1; j <= i_max; j++) { - spn_z += conj(vec[j][istate]) * vec[j][istate] * spn_z1; - } - } else { -#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, isite1, X) private(spn_z1) shared(vec) - for (j = 1; j <= i_max; j++) { - spn_z1 = 0.5 * GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); - spn_z += conj(vec[j][istate]) * vec[j][istate] * spn_z1; - } - } - }//isite1 - } - spn_z = SumMPI_dc(spn_z); - X->Phys.Sz = creal(spn_z); + SumMPI_dv(nstate, X->Phys.s2); + SumMPI_dv(nstate, X->Phys.Sz); } diff --git a/src/include/expec_cisajs.h b/src/include/expec_cisajs.h index e152f2e7a..290f5bdff 100644 --- a/src/include/expec_cisajs.h +++ b/src/include/expec_cisajs.h @@ -18,5 +18,7 @@ int expec_cisajs( struct BindStruct *X, - double complex *vec + int nstate, + double complex **Xvec, + double complex **vec ); diff --git a/src/include/expec_cisajscktaltdc.h b/src/include/expec_cisajscktaltdc.h index 51b07e8cb..644cde401 100644 --- a/src/include/expec_cisajscktaltdc.h +++ b/src/include/expec_cisajscktaltdc.h @@ -59,10 +59,7 @@ double complex child_Cor_3(long unsigned int j, ); -int expec_cisajscktaltdc( - struct BindStruct *X, - double complex *vec - ); +int expec_cisajscktaltdc(struct BindStruct *X, int nstate, double complex **Xvec, double complex **vec); void expec_cisajscktaltdc_alldiag_spin( struct BindStruct *X, diff --git a/src/include/expec_energy_flct.h b/src/include/expec_energy_flct.h index fa0ed1174..d6418f9e4 100644 --- a/src/include/expec_energy_flct.h +++ b/src/include/expec_energy_flct.h @@ -16,16 +16,4 @@ #pragma once #include "Common.h" -int expec_energy_flct(struct BindStruct *X); - -int expec_energy_flct_Hubbard(struct BindStruct *X); - -int expec_energy_flct_HubbardGC(struct BindStruct *X); - -int expec_energy_flct_HalfSpinGC(struct BindStruct *X); - -int expec_energy_flct_GeneralSpinGC(struct BindStruct *X); - -int expec_energy_flct_HalfSpin(struct BindStruct *X); - -int expec_energy_flct_GeneralSpin(struct BindStruct *X); +int expec_energy_flct(struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1); diff --git a/src/include/global.h b/src/include/global.h index ce038002a..8ce1fed8b 100644 --- a/src/include/global.h +++ b/src/include/global.h @@ -64,8 +64,8 @@ int initial_mode;/**< mode to get initial state (0: use same random generato double LargeValue;/**< constant value l for TPQ calculation.*/ int NumAve;/**< Average number for TPQ calculation*/ int step_i;/**< step for TPQ calculation*/ -double global_norm;/**< norm before normalization for TPQ calculation*/ -double global_1st_norm;/**< 1-st norm for TPQ calculation*/ +double *global_norm;/**< norm before normalization for TPQ calculation*/ +double *global_1st_norm;/**< 1-st norm for TPQ calculation*/ int step_spin;/**< output step for TE calculation.*/ /*[e] For TPQ*/ diff --git a/src/include/struct.h b/src/include/struct.h index 52f41737f..1c8b01a5b 100644 --- a/src/include/struct.h +++ b/src/include/struct.h @@ -359,29 +359,13 @@ struct PhysList { double *num2;/**<@brief Expectation value of the quare of the number of electrons*/ double *Sz;/**<@brief Expectation value of the Total Sz*/ double *Sz2;/**<@brief Expectation value of the Square of total Sz*/ + double *num_up;/**<@brief Expectation value of the number of up-spin electtrons*/ + double *num_down;/**<@brief Expectation value of the number of down-spin electtrons*/ + double *s2;/**<@brief Expectation value of the square of the total S.*/ /*[s] For TPQ*/ double *var;/**<@brief Expectation value of the Energy variance.*/ /*[e] For TPQ*/ - /*[s] For Full Diagonalization*/ - int eigen_num;/**<@brief Index of eigenstate used for the file name of the correlation function.*/ - double num_up;/**<@brief Expectation value of the number of up-spin electtrons*/ - double num_down;/**<@brief Expectation value of the number of down-spin electtrons*/ - double s2;/**<@brief Expectation value of the square of the total S.*/ - double *all_energy;/**<@brief [CheckList::idim_max+1] Energy for FullDiag and LOBPCG. - malloc in setmem_large().*/ - double *all_doublon;/**<@brief [CheckList::idim_max+1] Doublon for FullDiag and LOBPCG. - malloc in setmem_large().*/ - double *all_sz;/**<@brief [CheckList::idim_max+1] @f$S_z@f$ for FullDiag and LOBPCG. - malloc in setmem_large().*/ - double *all_s2;/**<@brief [CheckList::idim_max+1] @f$S_z^2@f$ for FullDiag and LOBPCG. - malloc in setmem_large().*/ - double *all_num_up;/**<@brief [CheckList::idim_max+1] Number of spin-up electrons - for FullDiag and LOBPCG. malloc in setmem_large().*/ - double *all_num_down;/**<@brief[CheckList::idim_max+1] Number of spin-down electrons - for FullDiag and LOBPCG. malloc in setmem_large().*/ - /*[e] For Full Diagonalization*/ - double *spin_real_cor;/**<@brief Malloc, but Not used ???*/ double *charge_real_cor;/**<@brief Malloc, but Not used ???*/ double *loc_spin_z;/**<@brief Malloc, but Not used ???*/ diff --git a/src/lapack_diag.c b/src/lapack_diag.c index 20ba030b2..614a53386 100644 --- a/src/lapack_diag.c +++ b/src/lapack_diag.c @@ -119,7 +119,7 @@ struct BindStruct *X//!<[inout] return -1; } for (i = 0; i < i_max; i++) { - fprintf(fp, " %ld %.10lf \n", i, creal(v0[i])); + fprintf(fp, " %ld %.10lf \n", i, creal(v0[i][0])); } fclose(fp); diff --git a/src/matrixlapack.c b/src/matrixlapack.c index 2d5ff5ca0..c83068d97 100644 --- a/src/matrixlapack.c +++ b/src/matrixlapack.c @@ -428,7 +428,7 @@ int ZHEEVall(int xNsize, double complex **A, double complex *r,double complex ** k=0; for(i=0;i \n"); for (i = 0; i < i_max; i++) { - fprintf(fp, " %10lf %10lf %10lf %10lf %10lf\n", X->Phys.all_energy[i], X->Phys.all_num_up[i]+X->Phys.all_num_down[i], X->Phys.all_sz[i], - X->Phys.all_s2[i], X->Phys.all_doublon[i]); + fprintf(fp, " %10lf %10lf %10lf %10lf %10lf\n", X->Phys.energy[i], X->Phys.num_up[i]+X->Phys.num_down[i], X->Phys.Sz[i], + X->Phys.s2[i], X->Phys.doublon[i]); } fclose(fp); } diff --git a/src/phys.c b/src/phys.c index 2fb2e6432..a68df3179 100644 --- a/src/phys.c +++ b/src/phys.c @@ -64,20 +64,21 @@ void phys(struct BindStruct *X, //!<[inout] for (j = 0; j < i_max; j++) { v0[j + 1] = 0.0; } - if(use_scalapack){ + if (use_scalapack) { MPI_Comm_rank(MPI_COMM_WORLD, &rank); GetEigenVector(i, i_max, Z_vec, descZ_vec, vec_tmp); - if(rank == 0) { + if (rank == 0) { for (j = 0; j < i_max; j++) { v0[j + 1] = vec_tmp[j]; } } - else{ + else { for (j = 0; j < i_max; j++) { v0[j + 1] = 0.0; } } - } else { + } + else { if (X->Def.iCalcType == FullDiag) { if (myrank == 0) { for (j = 0; j < i_max; j++) { @@ -93,78 +94,73 @@ void phys(struct BindStruct *X, //!<[inout] } #else for (j = 0; j < i_max; j++) { - v0[j + 1] = L_vec[i][j]; - } + v0[j + 1][i] = L_vec[j][i]; + } #endif + }/*for (i = 0; i < neig; i++)*/ - X->Phys.eigen_num = i; - if (expec_energy_flct(X) != 0) { - fprintf(stderr, "Error: calc expec_energy.\n"); - exitMPI(-1); - } - if (expec_cisajs(X, v1) != 0) { - fprintf(stderr, "Error: calc OneBodyG.\n"); - exitMPI(-1); - } - if (expec_cisajscktaltdc(X, v1) != 0) { - fprintf(stderr, "Error: calc TwoBodyG.\n"); - exitMPI(-1); - } + if (expec_energy_flct(X, neig, v0, v1) != 0) { + fprintf(stderr, "Error: calc expec_energy.\n"); + exitMPI(-1); + } + if (expec_cisajs(X, neig, v0, v1) != 0) { + fprintf(stderr, "Error: calc OneBodyG.\n"); + exitMPI(-1); + } + if (expec_cisajscktaltdc(X, neig, v0, v1) != 0) { + fprintf(stderr, "Error: calc TwoBodyG.\n"); + exitMPI(-1); + } #ifdef _SCALAPACK - if(use_scalapack){ - if (X->Def.iCalcType == FullDiag) { - X->Phys.s2=0.0; - X->Phys.Sz=0.0; - } - }else{ - if (X->Def.iCalcType == FullDiag) { - if (expec_totalspin(X, v1) != 0) { - fprintf(stderr, "Error: calc TotalSpin.\n"); - exitMPI(-1); - } - } - } -#else + if (use_scalapack) { + if (X->Def.iCalcType == FullDiag) { + X->Phys.s2 = 0.0; + X->Phys.Sz = 0.0; + } + } + else { if (X->Def.iCalcType == FullDiag) { if (expec_totalspin(X, v1) != 0) { fprintf(stderr, "Error: calc TotalSpin.\n"); exitMPI(-1); } } + } +#else + if (X->Def.iCalcType == FullDiag) { + if (expec_totalspin(X, v1) != 0) { + fprintf(stderr, "Error: calc TotalSpin.\n"); + exitMPI(-1); + } + } #endif - + + for (i = 0; i < neig; i++) { if (X->Def.iCalcModel == Spin || X->Def.iCalcModel == SpinGC) { tmp_N = X->Def.NsiteMPI; - } else { - tmp_N = X->Phys.num_up + X->Phys.num_down; } - - if (X->Def.iCalcType == FullDiag){ + else { + tmp_N = X->Phys.num_up[i] + X->Phys.num_down[i]; + } + if (X->Def.iCalcType == FullDiag) { #ifdef _SCALAPACK - if (use_scalapack){ + if (use_scalapack) { fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf Doublon=%10lf \n", i, X->Phys.energy, tmp_N, - X->Phys.Sz, X->Phys.doublon); + X->Phys.Sz, X->Phys.doublon); } - else{ + else { fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf S2=%10lf Doublon=%10lf \n", i, X->Phys.energy, tmp_N, - X->Phys.Sz, X->Phys.s2, X->Phys.doublon); + X->Phys.Sz, X->Phys.s2, X->Phys.doublon); } #else - fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf S2=%10lf Doublon=%10lf \n", i, X->Phys.energy, tmp_N, - X->Phys.Sz, X->Phys.s2, X->Phys.doublon); - + fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf S2=%10lf Doublon=%10lf \n", + i, X->Phys.energy[i], tmp_N, X->Phys.Sz[i], X->Phys.s2[i], X->Phys.doublon[i]); #endif } else if (X->Def.iCalcType == CG) - fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf Doublon=%10lf \n", i, X->Phys.energy, tmp_N, - X->Phys.Sz, X->Phys.doublon); - X->Phys.all_energy[i] = X->Phys.energy; - X->Phys.all_doublon[i] = X->Phys.doublon; - X->Phys.all_sz[i] = X->Phys.Sz; - X->Phys.all_s2[i] = X->Phys.s2; - X->Phys.all_num_up[i] = X->Phys.num_up; - X->Phys.all_num_down[i] = X->Phys.num_down; + fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf Doublon=%10lf \n", + i, X->Phys.energy[i], tmp_N, X->Phys.Sz[i], X->Phys.doublon[i]); } #ifdef _SCALAPACK if(use_scalapack) free(vec_tmp); diff --git a/src/xsetmem.c b/src/xsetmem.c index 22e75762e..367e57789 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -164,74 +164,77 @@ int setmem_large idim_maxMPI = MaxMPI_li(X->Check.idim_max); if (GetlistSize(X) == TRUE) { - list_1 = lui_1d_allocate(X->Check.idim_max + 1); + list_1 = lui_1d_allocate(X->Check.idim_max + 1); #ifdef MPI - list_1buf = lui_1d_allocate(idim_maxMPI + 1); + list_1buf = lui_1d_allocate(idim_maxMPI + 1); #endif // MPI - list_2_1 = lui_1d_allocate(X->Large.SizeOflist_2_1); - list_2_2 = lui_1d_allocate(X->Large.SizeOflist_2_2); - if (list_1 == NULL - || list_2_1 == NULL - || list_2_2 == NULL - ) { - return -1; - } + list_2_1 = lui_1d_allocate(X->Large.SizeOflist_2_1); + list_2_2 = lui_1d_allocate(X->Large.SizeOflist_2_2); + if (list_1 == NULL + || list_2_1 == NULL + || list_2_2 == NULL + ) { + return -1; + } } - list_Diagonal = d_1d_allocate(X->Check.idim_max + 1); - v0 = cd_1d_allocate(X->Check.idim_max + 1); - v1 = cd_1d_allocate(X->Check.idim_max + 1); + list_Diagonal = d_1d_allocate(X->Check.idim_max + 1); + v0 = cd_1d_allocate(X->Check.idim_max + 1); + v1 = cd_1d_allocate(X->Check.idim_max + 1); if (X->Def.iCalcType == TimeEvolution) { - v2 = cd_1d_allocate(X->Check.idim_max + 1); + v2 = cd_1d_allocate(X->Check.idim_max + 1); } else { - v2 = cd_1d_allocate(1); + v2 = cd_1d_allocate(1); } #ifdef MPI - v1buf = cd_1d_allocate(idim_maxMPI + 1); + v1buf = cd_1d_allocate(idim_maxMPI + 1); #endif // MPI if (X->Def.iCalcType == TPQCalc) { - vg = cd_1d_allocate(1); - } else { - vg = cd_1d_allocate(X->Check.idim_max + 1); + vg = cd_1d_allocate(1); + } + else { + vg = cd_1d_allocate(X->Check.idim_max + 1); } - alpha = d_1d_allocate(X->Def.Lanczos_max + 1); - beta = d_1d_allocate(X->Def.Lanczos_max + 1); + alpha = d_1d_allocate(X->Def.Lanczos_max + 1); + beta = d_1d_allocate(X->Def.Lanczos_max + 1); if ( - list_Diagonal == NULL - || v0 == NULL - || v1 == NULL - || vg == NULL - ) { + list_Diagonal == NULL + || v0 == NULL + || v1 == NULL + || vg == NULL + ) { return -1; } if (X->Def.iCalcType == TPQCalc || X->Def.iFlgCalcSpec != CALCSPEC_NOT) { - vec = cd_2d_allocate(X->Def.Lanczos_max + 1, X->Def.Lanczos_max + 1); - } else if (X->Def.iCalcType == Lanczos || X->Def.iCalcType == CG) { + vec = cd_2d_allocate(X->Def.Lanczos_max + 1, X->Def.Lanczos_max + 1); + } + else if (X->Def.iCalcType == Lanczos || X->Def.iCalcType == CG) { if (X->Def.LanczosTarget > X->Def.nvec) { - vec = cd_2d_allocate(X->Def.LanczosTarget + 1, X->Def.Lanczos_max + 1); - } else { - vec = cd_2d_allocate(X->Def.nvec + 1, X->Def.Lanczos_max + 1); + vec = cd_2d_allocate(X->Def.LanczosTarget + 1, X->Def.Lanczos_max + 1); + } + else { + vec = cd_2d_allocate(X->Def.nvec + 1, X->Def.Lanczos_max + 1); } } if (X->Def.iCalcType == FullDiag) { - X->Phys.all_num_down = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_num_up = d_1d_allocate( X->Check.idim_max + 1); - X->Phys.all_energy = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_doublon = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_sz = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_s2 = d_1d_allocate(X->Check.idim_max + 1); - Ham = cd_2d_allocate(X->Check.idim_max + 1, X->Check.idim_max + 1); - L_vec = cd_2d_allocate(X->Check.idim_max + 1, X->Check.idim_max + 1); + X->Phys.all_num_down = d_1d_allocate(X->Check.idim_max + 1); + X->Phys.all_num_up = d_1d_allocate(X->Check.idim_max + 1); + X->Phys.all_energy = d_1d_allocate(X->Check.idim_max + 1); + X->Phys.all_doublon = d_1d_allocate(X->Check.idim_max + 1); + X->Phys.all_sz = d_1d_allocate(X->Check.idim_max + 1); + X->Phys.all_s2 = d_1d_allocate(X->Check.idim_max + 1); + Ham = cd_2d_allocate(X->Check.idim_max + 1, X->Check.idim_max + 1); + L_vec = cd_2d_allocate(X->Check.idim_max + 1, X->Check.idim_max + 1); if (X->Phys.all_num_down == NULL - || X->Phys.all_num_up == NULL - || X->Phys.all_energy == NULL - || X->Phys.all_doublon == NULL - || X->Phys.all_s2 == NULL - ) { + || X->Phys.all_num_up == NULL + || X->Phys.all_energy == NULL + || X->Phys.all_doublon == NULL + || X->Phys.all_s2 == NULL + ) { return -1; } for (j = 0; j < X->Check.idim_max + 1; j++) { @@ -239,13 +242,14 @@ int setmem_large return -1; } } - } else if (X->Def.iCalcType == CG) { - X->Phys.all_num_down = d_1d_allocate(X->Def.k_exct); - X->Phys.all_num_up = d_1d_allocate(X->Def.k_exct); - X->Phys.all_energy = d_1d_allocate(X->Def.k_exct); - X->Phys.all_doublon = d_1d_allocate(X->Def.k_exct); - X->Phys.all_sz = d_1d_allocate(X->Def.k_exct); - X->Phys.all_s2 = d_1d_allocate( X->Def.k_exct); + } + else if (X->Def.iCalcType == CG) { + X->Phys.all_num_down = d_1d_allocate(X->Def.k_exct); + X->Phys.all_num_up = d_1d_allocate(X->Def.k_exct); + X->Phys.all_energy = d_1d_allocate(X->Def.k_exct); + X->Phys.all_doublon = d_1d_allocate(X->Def.k_exct); + X->Phys.all_sz = d_1d_allocate(X->Def.k_exct); + X->Phys.all_s2 = d_1d_allocate(X->Def.k_exct); } fprintf(stdoutMPI, "%s", cProFinishAlloc); return 0; @@ -260,21 +264,18 @@ int setmem_large /// \param NInterAll [in] Total number of InterAll interactions. /// \author Kazuyoshi Yoshimi /// \version 1.2 - void setmem_IntAll_Diagonal - ( - int **InterAllOffDiagonal, - double complex *ParaInterAllOffDiagonal, - int **InterAllDiagonal, - double *ParaInterAllDiagonal, - const int NInterAll - ) { - InterAllOffDiagonal = i_2d_allocate(NInterAll, 8); - ParaInterAllOffDiagonal = cd_1d_allocate(NInterAll); - InterAllDiagonal = i_2d_allocate(NInterAll, 4); - ParaInterAllDiagonal = d_1d_allocate(NInterAll); - } - - +void setmem_IntAll_Diagonal( + int **InterAllOffDiagonal, + double complex *ParaInterAllOffDiagonal, + int **InterAllDiagonal, + double *ParaInterAllDiagonal, + const int NInterAll +) { + InterAllOffDiagonal = i_2d_allocate(NInterAll, 8); + ParaInterAllOffDiagonal = cd_1d_allocate(NInterAll); + InterAllDiagonal = i_2d_allocate(NInterAll, 4); + ParaInterAllDiagonal = d_1d_allocate(NInterAll); +} /// /// \brief Set size of lists for the canonical ensemble. /// \param X [in,out] Give the information for getting the list size and get the lists.\n @@ -284,40 +285,40 @@ int setmem_large /// \retval FALSE: Unnormally finished /// \author Kazuyoshi Yoshimi /// \version 1.2 - int GetlistSize - ( - struct BindStruct *X - ) { - // unsigned int idim_maxMPI; +int GetlistSize( + struct BindStruct *X +) { + // unsigned int idim_maxMPI; // idim_maxMPI = MaxMPI_li(X->Check.idim_max); - - switch (X->Def.iCalcModel) { - case Spin: - case Hubbard: - case HubbardNConserved: - case Kondo: - case KondoGC: - if (X->Def.iFlgGeneralSpin == FALSE) { - if (X->Def.iCalcModel == Spin && X->Def.Nsite % 2 == 1) { - X->Large.SizeOflist_2_1 = X->Check.sdim * 2 + 2; - } else { - X->Large.SizeOflist_2_1 = X->Check.sdim + 2; - } - X->Large.SizeOflist_2_2 = X->Check.sdim + 2; - X->Large.SizeOflistjb = X->Check.sdim + 2; - } else {//for spin-canonical general spin - X->Large.SizeOflist_2_1 = X->Check.sdim + 2; - X->Large.SizeOflist_2_2 = - X->Def.Tpow[X->Def.Nsite - 1] * X->Def.SiteToBit[X->Def.Nsite - 1] / X->Check.sdim + 2; - X->Large.SizeOflistjb = - X->Def.Tpow[X->Def.Nsite - 1] * X->Def.SiteToBit[X->Def.Nsite - 1] / X->Check.sdim + 2; - } - break; - default: - return FALSE; + switch (X->Def.iCalcModel) { + case Spin: + case Hubbard: + case HubbardNConserved: + case Kondo: + case KondoGC: + if (X->Def.iFlgGeneralSpin == FALSE) { + if (X->Def.iCalcModel == Spin && X->Def.Nsite % 2 == 1) { + X->Large.SizeOflist_2_1 = X->Check.sdim * 2 + 2; + } + else { + X->Large.SizeOflist_2_1 = X->Check.sdim + 2; + } + X->Large.SizeOflist_2_2 = X->Check.sdim + 2; + X->Large.SizeOflistjb = X->Check.sdim + 2; } - return TRUE; + else {//for spin-canonical general spin + X->Large.SizeOflist_2_1 = X->Check.sdim + 2; + X->Large.SizeOflist_2_2 = + X->Def.Tpow[X->Def.Nsite - 1] * X->Def.SiteToBit[X->Def.Nsite - 1] / X->Check.sdim + 2; + X->Large.SizeOflistjb = + X->Def.Tpow[X->Def.Nsite - 1] * X->Def.SiteToBit[X->Def.Nsite - 1] / X->Check.sdim + 2; + } + break; + default: + return FALSE; + } + return TRUE; } /** @page page_setmem Malloc vectors From 7a47d8a35c92982952e8d27787013962d544f45c Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Thu, 7 Mar 2019 15:58:30 +0900 Subject: [PATCH 07/50] Backup --- src/CalcByFullDiag.c | 9 +- src/CalcByLOBPCG.c | 28 +- src/CalcByTEM.c | 79 +++-- src/CalcSpectrum.c | 566 +++++++++++++++---------------- src/CalcSpectrumByBiCG.c | 7 +- src/CalcSpectrumByFullDiag.c | 27 +- src/Multiply.c | 4 +- src/PairExHubbard.c | 55 +-- src/PairExSpin.c | 107 +++--- src/include/CalcSpectrumByBiCG.h | 1 - src/include/Multiply.h | 10 +- src/include/SingleExHubbard.h | 17 +- src/include/global.h | 10 - src/include/makeHam.h | 19 -- src/include/matrixlapack.h | 5 +- src/include/mltplyHubbard.h | 11 +- src/lapack_diag.c | 27 +- src/makeHam.c | 552 ------------------------------ src/matrixlapack.c | 365 ++------------------ src/mltplyHubbard.c | 3 +- src/phys.c | 12 +- src/xsetmem.c | 101 ++---- 22 files changed, 539 insertions(+), 1476 deletions(-) delete mode 100644 src/include/makeHam.h delete mode 100644 src/makeHam.c diff --git a/src/CalcByFullDiag.c b/src/CalcByFullDiag.c index 668990c52..37867fd45 100644 --- a/src/CalcByFullDiag.c +++ b/src/CalcByFullDiag.c @@ -17,6 +17,8 @@ #include "input.h" #include "wrapperMPI.h" #include "CalcTime.h" +#include "mltplyCommon.h" +#include "mltply.h" /// \brief Parent function for FullDiag mode /// \param X [in,out] Struct to get information about file header names, dimension of hirbert space, calc type, physical quantities. @@ -28,10 +30,15 @@ int CalcByFullDiag( ) { int iret=0; + unsigned long int idim; + fprintf(stdoutMPI, "%s", cLogFullDiag_SetHam_Start); StartTimer(5100); if(X->Bind.Def.iInputHam==FALSE){ - makeHam(&(X->Bind)); + zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v0[0][0]); + zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v1[0][0]); + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) v1[idim][idim] = 1.0; + mltply(&(X->Bind), X->Bind.Check.idim_max, v0, v1); } else if(X->Bind.Def.iInputHam==TRUE){ fprintf(stdoutMPI, "%s", cLogFullDiag_InputHam_Start); diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index d4da65b86..147e559f4 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -367,7 +367,6 @@ int LOBPCG_Main( free(v0); free(v1); - free(vg); wxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct); hwxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct); /**@brief @@ -605,19 +604,17 @@ int LOBPCG_Main( } } /**@brief -
  • Just Move wxp[1] into ::L_vec. The latter must be start from 0-index (the same as FullDiag)
  • +
  • Just Move wxp[1] into ::v1. The latter must be start from 0-index (the same as FullDiag)
  • */ - L_vec = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); -#pragma omp parallel for default(none) shared(i_max,wxp,L_vec,X) private(idim,ie) - for (idim = 0; idim < i_max; idim++) + v1 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); +#pragma omp parallel for default(none) shared(i_max,wxp,v1,X) private(idim,ie) + for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) - L_vec[idim][ie] = wxp[1][idim + 1][ie]; + v1[idim][ie] = wxp[1][idim][ie]; free_cd_3d_allocate(wxp); + v0 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); - v0 = cd_1d_allocate(X->Check.idim_max + 1, 1); - v1 = cd_1d_allocate(X->Check.idim_max + 1, 1); - vg = cd_1d_allocate(X->Check.idim_max + 1, 1); if (iconv != 0) { sprintf(sdt, "%s", cLogLanczos_EigenValueNotConverged); return -1; @@ -688,7 +685,6 @@ int CalcByLOBPCG( and read from files. */ fprintf(stdoutMPI, "An Eigenvector is inputted.\n"); - L_vec = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); vin = cd_1d_allocate(X->Bind.Check.idim_max + 1); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecStart, "a"); @@ -705,9 +701,9 @@ int CalcByLOBPCG( exitMPI(-1); } byte_size = fread(vin, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); -#pragma omp parallel for default(none) shared(L_vec, v1) firstprivate(i_max, ie), private(idim) - for (idim = 0; idim < i_max; idim++) { - L_vec[ie][idim] = vin[idim + 1]; +#pragma omp parallel for default(none) shared(v1) firstprivate(i_max, ie), private(idim) + for (idim = 1; idim <= i_max; idim++) { + v1[ie][idim] = vin[idim]; } }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ fclose(fp); @@ -756,9 +752,9 @@ int CalcByLOBPCG( vin = cd_1d_allocate(X->Bind.Check.idim_max); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { -#pragma omp parallel for default(none) shared(X,v1,L_vec,ie) private(idim) - for (idim = 0; idim < X->Bind.Check.idim_max; idim++) - vin[idim + 1] = L_vec[idim][ie]; +#pragma omp parallel for default(none) shared(X,v1,ie) private(idim) + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) + vin[idim] = v1[idim][ie]; sprintf(sdt, cFileNameOutputEigen, X->Bind.Def.CDataFileHead, ie, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1); diff --git a/src/CalcByTEM.c b/src/CalcByTEM.c index bef775b31..3f92e382d 100644 --- a/src/CalcByTEM.c +++ b/src/CalcByTEM.c @@ -25,6 +25,7 @@ #include "FileIO.h" #include "wrapperMPI.h" #include "HPhiTrans.h" +#include "common/setmemory.h" void MakeTEDTransfer(struct BindStruct *X, const int timeidx); void MakeTEDInterAll(struct BindStruct *X, const int timeidx); @@ -51,8 +52,8 @@ void MakeTEDInterAll(struct BindStruct *X, const int timeidx); * @author Kazuyoshi Yoshimi (The University of Tokyo) */ int CalcByTEM( - const int ExpecInterval, - struct EDMainCalStruct *X + const int ExpecInterval, + struct EDMainCalStruct *X ) { size_t byte_size; char *defname; @@ -60,14 +61,15 @@ int CalcByTEM( char sdt_phys[D_FileNameMax]; char sdt_norm[D_FileNameMax]; char sdt_flct[D_FileNameMax]; - int rand_i=0; + int rand_i = 0; int step_initial = 0; long int i_max = 0; FILE *fp; double Time = X->Bind.Def.Param.Tinit; - double dt = ((X->Bind.Def.NLaser==0)? 0.0: X->Bind.Def.Param.TimeSlice); + double dt = ((X->Bind.Def.NLaser == 0) ? 0.0 : X->Bind.Def.Param.TimeSlice); + double complex **v2; /**< Ttemporary vector for time evolution calculation, @f$ v2 = H*v1 = H^coef |psi(t)>@f$.*/ - if(X->Bind.Def.NTETimeSteps < X->Bind.Def.Lanczos_max){ + if (X->Bind.Def.NTETimeSteps < X->Bind.Def.Lanczos_max) { fprintf(stdoutMPI, "Error: NTETimeSteps must be larger than Lanczos_max.\n"); return -1; } @@ -77,9 +79,10 @@ int CalcByTEM( if (X->Bind.Def.iInputEigenVec == FALSE) { fprintf(stderr, "Error: A file of Inputvector is not inputted.\n"); return -1; - } else { + } + else { //input v1 - fprintf(stdoutMPI, "%s","An Initial Vector is inputted.\n"); + fprintf(stdoutMPI, "%s", "An Initial Vector is inputted.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorStart, "a"); GetFileNameByKW(KWSpectrumVec, &defname); strcat(defname, "_rank_%d.dat"); @@ -97,7 +100,7 @@ int CalcByTEM( fclose(fp); exitMPI(-1); } - fread(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + fread(&v1[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); if (X->Bind.Def.iReStart == RESTART_NOT || X->Bind.Def.iReStart == RESTART_OUT) { step_initial = 0; @@ -108,26 +111,27 @@ int CalcByTEM( if (childfopenMPI(sdt_phys, "w", &fp) != 0) { return -1; } - fprintf(fp, "%s",cLogSS); + fprintf(fp, "%s", cLogSS); fclose(fp); sprintf(sdt_norm, "%s", cFileNameNorm); if (childfopenMPI(sdt_norm, "w", &fp) != 0) { return -1; } - fprintf(fp, "%s",cLogNorm); + fprintf(fp, "%s", cLogNorm); fclose(fp); sprintf(sdt_flct, "%s", cFileNameFlct); if (childfopenMPI(sdt_flct, "w", &fp) != 0) { return -1; } - fprintf(fp, "%s",cLogFlct); + fprintf(fp, "%s", cLogFlct); fclose(fp); int iInterAllOffDiagonal_org = X->Bind.Def.NInterAll_OffDiagonal; int iTransfer_org = X->Bind.Def.EDNTransfer; + v2 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); for (step_i = step_initial; step_i < X->Bind.Def.Lanczos_max; step_i++) { X->Bind.Def.istep = step_i; @@ -139,7 +143,7 @@ int CalcByTEM( fprintf(stdoutMPI, cLogTEStep, step_i, X->Bind.Def.Lanczos_max); } - if(X->Bind.Def.NLaser !=0) { + if (X->Bind.Def.NLaser != 0) { TransferWithPeierls(&(X->Bind), Time); } else { @@ -152,36 +156,37 @@ int CalcByTEM( X->Bind.Def.Param.TimeSlice = dt; // Set interactions - if(X->Bind.Def.NTETransferMax != 0 && X->Bind.Def.NTEInterAllMax!=0){ + if (X->Bind.Def.NTETransferMax != 0 && X->Bind.Def.NTEInterAllMax != 0) { fprintf(stdoutMPI, - "Error: Time Evoluation mode does not support TEOneBody and TETwoBody interactions at the same time. \n"); + "Error: Time Evoluation mode does not support TEOneBody and TETwoBody interactions at the same time. \n"); return -1; } else if (X->Bind.Def.NTETransferMax > 0) { //One-Body type MakeTEDTransfer(&(X->Bind), step_i); - }else if (X->Bind.Def.NTEInterAllMax > 0) { //Two-Body type + } + else if (X->Bind.Def.NTEInterAllMax > 0) { //Two-Body type MakeTEDInterAll(&(X->Bind), step_i); } //[e] Yoshimi } - if(step_i == step_initial){ + if (step_i == step_initial) { TimeKeeperWithStep(&(X->Bind), cFileNameTEStep, cTEStep, "w", step_i); } else { TimeKeeperWithStep(&(X->Bind), cFileNameTEStep, cTEStep, "a", step_i); } - MultiplyForTEM(&(X->Bind)); + MultiplyForTEM(&(X->Bind), v2); //Add Diagonal Parts //Multiply Diagonal - expec_energy_flct(&(X->Bind)); + expec_energy_flct(&(X->Bind), 1, v0, v1); - if(X->Bind.Def.NLaser >0 ) Time+=dt; + if (X->Bind.Def.NLaser > 0) Time += dt; if (childfopenMPI(sdt_phys, "a", &fp) != 0) { return -1; } fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", Time, X->Bind.Phys.energy, X->Bind.Phys.var, - X->Bind.Phys.doublon, X->Bind.Phys.num, step_i); + X->Bind.Phys.doublon, X->Bind.Phys.num, step_i); fclose(fp); if (childfopenMPI(sdt_norm, "a", &fp) != 0) { @@ -193,14 +198,15 @@ int CalcByTEM( if (childfopenMPI(sdt_flct, "a", &fp) != 0) { return -1; } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", Time,X->Bind.Phys.num,X->Bind.Phys.num2, X->Bind.Phys.doublon,X->Bind.Phys.doublon2, X->Bind.Phys.Sz,X->Bind.Phys.Sz2,step_i); + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", + Time, X->Bind.Phys.num, X->Bind.Phys.num2, X->Bind.Phys.doublon, X->Bind.Phys.doublon2, X->Bind.Phys.Sz, X->Bind.Phys.Sz2, step_i); fclose(fp); if (step_i % step_spin == 0) { - expec_cisajs(&(X->Bind), v1); - expec_cisajscktaltdc(&(X->Bind), v1); + expec_cisajs(&(X->Bind), 1, v0, v1); + expec_cisajscktaltdc(&(X->Bind), 1, v0, v1); } if (X->Bind.Def.iOutputEigenVec == TRUE) { if (step_i % X->Bind.Def.Param.OutputInterval == 0) { @@ -211,11 +217,13 @@ int CalcByTEM( } fwrite(&step_i, sizeof(step_i), 1, fp); fwrite(&X->Bind.Check.idim_max, sizeof(long int), 1, fp); - fwrite(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + fwrite(&v1[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); } } - } + }/*for (step_i = step_initial; step_i < X->Bind.Def.Lanczos_max; step_i++)*/ + free_cd_2d_allocate(v2); + if (X->Bind.Def.iOutputEigenVec == TRUE) { sprintf(sdt, cFileNameOutputEigen, X->Bind.Def.CDataFileHead, rand_i, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) { @@ -224,37 +232,35 @@ int CalcByTEM( } fwrite(&step_i, sizeof(step_i), 1, fp); fwrite(&X->Bind.Check.idim_max, sizeof(long int), 1, fp); - fwrite(v1, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + fwrite(&v1[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); } - fprintf(stdoutMPI, "%s",cLogTEM_End); + fprintf(stdoutMPI, "%s", cLogTEM_End); return 0; } - /// \brief Set transfer integrals at timeidx-th time /// \param X struct for getting information of transfer integrals /// \param timeidx index of time void MakeTEDTransfer(struct BindStruct *X, const int timeidx) { - int i,j; + int i, j; //Clear values - for(i=0; iDef.NTETransferMax ;i++) { - for(j =0; j<4; j++) { + for (i = 0; i < X->Def.NTETransferMax; i++) { + for (j = 0; j < 4; j++) { X->Def.EDGeneralTransfer[i + X->Def.EDNTransfer][j] = 0; } - X->Def.EDParaGeneralTransfer[i+X->Def.EDNTransfer]=0.0; + X->Def.EDParaGeneralTransfer[i + X->Def.EDNTransfer] = 0.0; } //Input values - for(i=0; iDef.NTETransfer[timeidx] ;i++){ - for(j =0; j<4; j++) { + for (i = 0; i < X->Def.NTETransfer[timeidx]; i++) { + for (j = 0; j < 4; j++) { X->Def.EDGeneralTransfer[i + X->Def.EDNTransfer][j] = X->Def.TETransfer[timeidx][i][j]; } - X->Def.EDParaGeneralTransfer[i+X->Def.EDNTransfer]=X->Def.ParaTETransfer[timeidx][i]; + X->Def.EDParaGeneralTransfer[i + X->Def.EDNTransfer] = X->Def.ParaTETransfer[timeidx][i]; } X->Def.EDNTransfer += X->Def.NTETransfer[timeidx]; } - /// \brief Set interall interactions at timeidx-th time /// \param X struct for getting information of interall interactions /// \param timeidx index of time @@ -277,4 +283,3 @@ void MakeTEDInterAll(struct BindStruct *X, const int timeidx) { } X->Def.NInterAll_OffDiagonal += X->Def.NTEInterAllOffDiagonal[timeidx]; } - diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index a37fc1155..68e97d336 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -15,9 +15,7 @@ /* along with this program. If not, see . */ #include "mltply.h" #include "CalcSpectrum.h" -#include "CalcSpectrumByLanczos.h" #include "CalcSpectrumByBiCG.h" -#include "CalcSpectrumByTPQ.h" #include "CalcSpectrumByFullDiag.h" #include "CalcTime.h" #include "SingleEx.h" @@ -88,44 +86,46 @@ int OutputSpectrum( * */ int CalcSpectrum( - struct EDMainCalStruct *X - ) { - char sdt[D_FileNameMax]; - char *defname; - unsigned long int i; - unsigned long int i_max = 0; - int i_stp; - int iFlagListModified = FALSE; - FILE *fp; - double dnorm; - - //ToDo: Nomega should be given as a parameter - int Nomega; - double complex OmegaMax, OmegaMin; - double complex *dcSpectrum; - double complex *dcomega; - size_t byte_size; - - //set omega - if (SetOmega(&(X->Bind.Def)) != TRUE) { - fprintf(stderr, "Error: Fail to set Omega.\n"); - exitMPI(-1); - } else { - if (X->Bind.Def.iFlgSpecOmegaOrg == FALSE) { - X->Bind.Def.dcOmegaOrg = I*(X->Bind.Def.dcOmegaMax - X->Bind.Def.dcOmegaMin) / (double) X->Bind.Def.iNOmega; - } - } - /* - Set & malloc omega grid - */ - Nomega = X->Bind.Def.iNOmega; - dcSpectrum = cd_1d_allocate(Nomega); - dcomega = cd_1d_allocate(Nomega); - OmegaMax = X->Bind.Def.dcOmegaMax + X->Bind.Def.dcOmegaOrg; - OmegaMin = X->Bind.Def.dcOmegaMin + X->Bind.Def.dcOmegaOrg; - for (i = 0; i < Nomega; i++) { - dcomega[i] = (OmegaMax - OmegaMin) / Nomega * i + OmegaMin; + struct EDMainCalStruct *X +) { + char sdt[D_FileNameMax]; + char *defname; + unsigned long int i; + unsigned long int i_max = 0; + int i_stp; + int iFlagListModified = FALSE; + FILE *fp; + double dnorm; + double complex *v1Org; /**< Input vector to calculate spectrum function.*/ + + //ToDo: Nomega should be given as a parameter + int Nomega; + double complex OmegaMax, OmegaMin; + double complex *dcSpectrum; + double complex *dcomega; + size_t byte_size; + + //set omega + if (SetOmega(&(X->Bind.Def)) != TRUE) { + fprintf(stderr, "Error: Fail to set Omega.\n"); + exitMPI(-1); + } + else { + if (X->Bind.Def.iFlgSpecOmegaOrg == FALSE) { + X->Bind.Def.dcOmegaOrg = I * (X->Bind.Def.dcOmegaMax - X->Bind.Def.dcOmegaMin) / (double)X->Bind.Def.iNOmega; } + } + /* + Set & malloc omega grid + */ + Nomega = X->Bind.Def.iNOmega; + dcSpectrum = cd_1d_allocate(Nomega); + dcomega = cd_1d_allocate(Nomega); + OmegaMax = X->Bind.Def.dcOmegaMax + X->Bind.Def.dcOmegaOrg; + OmegaMin = X->Bind.Def.dcOmegaMin + X->Bind.Def.dcOmegaOrg; + for (i = 0; i < Nomega; i++) { + dcomega[i] = (OmegaMax - OmegaMin) / Nomega * i + OmegaMin; + } fprintf(stdoutMPI, "\nFrequency range:\n"); fprintf(stdoutMPI, " Omega Max. : %15.5e %15.5e\n", creal(OmegaMax), cimag(OmegaMax)); @@ -142,24 +142,24 @@ int CalcSpectrum( } X->Bind.Def.iFlagListModified = iFlagListModified; - //Set Memory - v1Org = cd_1d_allocate(X->Bind.Check.idim_maxOrg+1); - for(i=0; iBind.Check.idim_maxOrg+1; i++){ - v1Org[i]=0; - } - - //Make excited state - StartTimer(6100); - if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || - X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || - (X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC && X->Bind.Def.iCalcType == CG)) { - //input eigen vector - StartTimer(6101); - fprintf(stdoutMPI, " Start: An Eigenvector is inputted in CalcSpectrum.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorStart, "a"); - GetFileNameByKW(KWSpectrumVec, &defname); - strcat(defname, "_rank_%d.dat"); -// sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, X->Bind.Def.k_exct - 1, myrank); + //Set Memory + v1Org = cd_1d_allocate(X->Bind.Check.idim_maxOrg + 1); + for (i = 0; i < X->Bind.Check.idim_maxOrg + 1; i++) { + v1Org[i] = 0; + } + + //Make excited state + StartTimer(6100); + if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || + X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || + (X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC && X->Bind.Def.iCalcType == CG)) { + //input eigen vector + StartTimer(6101); + fprintf(stdoutMPI, " Start: An Eigenvector is inputted in CalcSpectrum.\n"); + TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorStart, "a"); + GetFileNameByKW(KWSpectrumVec, &defname); + strcat(defname, "_rank_%d.dat"); + // sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, X->Bind.Def.k_exct - 1, myrank); sprintf(sdt, defname, myrank); childfopenALL(sdt, "rb", &fp); @@ -179,7 +179,7 @@ int CalcSpectrum( byte_size = fread(v1Org, sizeof(complex double), i_max + 1, fp); fclose(fp); StopTimer(6101); - if (byte_size == 0) printf("byte_size: %d \n", (int) byte_size); + if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); for (i = 0; i <= X->Bind.Check.idim_max; i++) { v0[i] = 0; @@ -191,11 +191,11 @@ int CalcSpectrum( //Multiply Operator StartTimer(6102); - GetExcitedState(&(X->Bind), v0, v1Org); + GetExcitedState(&(X->Bind), 1, v0, v1Org); StopTimer(6102); //calculate norm - dnorm = NormMPI_dc(X->Bind.Check.idim_max, v0); + dnorm = NormMPI_dc(X->Bind.Check.idim_max, &v0[0][0]); if (fabs(dnorm) < pow(10.0, -15)) { fprintf(stderr, "Warning: Norm of an excitation vector becomes 0.\n"); fprintf(stdoutMPI, " End: Calculating an excited Eigenvector.\n\n"); @@ -211,18 +211,18 @@ int CalcSpectrum( //normalize vector #pragma omp parallel for default(none) private(i) shared(v1, v0) firstprivate(i_max, dnorm, X) for (i = 1; i <= X->Bind.Check.idim_max; i++) { - v1[i] = v0[i] / dnorm; + v1[i][0] = v0[i][0] / dnorm; } //Output excited vector if (X->Bind.Def.iOutputExVec == 1) { sprintf(sdt, cFileNameOutputExcitedVec, X->Bind.Def.CDataFileHead, myrank); - if(childfopenALL(sdt, "w", &fp)!=0){ + if (childfopenALL(sdt, "w", &fp) != 0) { return -1; } fprintf(fp, "%ld\n", X->Bind.Check.idim_max); for (i = 1; i <= X->Bind.Check.idim_max; i++) { - fprintf(fp, "%.10lf, %.10lf\n", creal(v1[i]), cimag(v1[i])); + fprintf(fp, "%.10lf, %.10lf\n", creal(v1[i][0]), cimag(v1[i][0])); } fclose(fp); } @@ -241,40 +241,22 @@ int CalcSpectrum( //calculate Diagonal term diagonalcalc(&(X->Bind)); - int iret = TRUE; fprintf(stdoutMPI, " Start: Calculating a spectrum.\n\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumStart, "a"); StartTimer(6200); switch (X->Bind.Def.iCalcType) { - case CG: - - iret = CalcSpectrumByBiCG(X, v0, v1, vg, Nomega, dcSpectrum, dcomega); - - if (iret != TRUE) { - //Error Message will be added. - return FALSE; - } - - break;//Lanczos Spectrum - - case TPQCalc: - fprintf(stderr, " Error: TPQ is not supported for calculating spectrum mode.\n"); - return FALSE;//TPQ is not supprted. -#ifdef _CALCSPEC_TPQ - iret = CalcSpectrumByTPQ(X, v1, dnorm, Nomega, dcSpectrum, dcomega); - if (iret != TRUE) { - //Error Message will be added. - return FALSE; - } -#endif - - case FullDiag: - iret = CalcSpectrumByFullDiag(X, Nomega, dcSpectrum, dcomega); - break; - - default: - break; + case CG: + iret = CalcSpectrumByBiCG(X, &v0[0][0], &v1[0][0], Nomega, dcSpectrum, dcomega); + if (iret != TRUE) { + return FALSE; + } + break; + case FullDiag: + iret = CalcSpectrumByFullDiag(X, Nomega, dcSpectrum, dcomega); + break; + default: + break; } StopTimer(6200); @@ -291,7 +273,6 @@ int CalcSpectrum( return TRUE; }/*int CalcSpectrum*/ - /// /// \brief Parent function to calculate the excited state. /// \param X [in] Struct to get number of excitation operators. @@ -301,31 +282,30 @@ int CalcSpectrum( /// \retval TRUE Success to calculate the excited state. int GetExcitedState ( - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 + struct BindStruct *X, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1 ) { - if(X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0){ + if (X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0) { fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); return FALSE; - } + } - if(X->Def.NSingleExcitationOperator > 0){ - if(GetSingleExcitedState(X,nstate,tmp_v0, tmp_v1)!=TRUE){ - return FALSE; - } + if (X->Def.NSingleExcitationOperator > 0) { + if (GetSingleExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + return FALSE; } - else if(X->Def.NPairExcitationOperator >0){ - if(GetPairExcitedState(X,nstate,tmp_v0, tmp_v1)!=TRUE){ - return FALSE; - } + } + else if (X->Def.NPairExcitationOperator > 0) { + if (GetPairExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + return FALSE; } - + } return TRUE; } - /// /// \brief Set target frequencies /// \param X [in, out] Struct to give and get the information of target frequencies.\n @@ -335,20 +315,20 @@ int GetExcitedState /// \retval TRUE Success to set frequencies. int SetOmega ( - struct DefineList *X -){ + struct DefineList *X +) { FILE *fp; - char sdt[D_FileNameMax],ctmp[256]; - int istp=4; + char sdt[D_FileNameMax], ctmp[256]; + int istp = 4; double E1, E2, E3, E4, Emax; - long unsigned int iline_countMax=2; - long unsigned int iline_count=2; + long unsigned int iline_countMax = 2; + long unsigned int iline_count = 2; - if(X->iFlgSpecOmegaMax == TRUE && X->iFlgSpecOmegaMin == TRUE){ + if (X->iFlgSpecOmegaMax == TRUE && X->iFlgSpecOmegaMin == TRUE) { return TRUE; } - else{ + else { if (X->iCalcType == Lanczos || X->iCalcType == FullDiag) { sprintf(sdt, cFileNameLanczosStep, X->CDataFileHead); childfopenMPI(sdt, "r", &fp); @@ -398,17 +378,16 @@ int SetOmega Emax = LargeValue; }/**/ //Read Lanczos_Step - if(X->iFlgSpecOmegaMax == FALSE){ - X->dcOmegaMax= Emax*(double)X->Nsite; + if (X->iFlgSpecOmegaMax == FALSE) { + X->dcOmegaMax = Emax * (double)X->Nsite; } - if(X->iFlgSpecOmegaMin == FALSE){ - X->dcOmegaMin= E1; + if (X->iFlgSpecOmegaMin == FALSE) { + X->dcOmegaMin = E1; } }/*Omegamax and omegamin is not specified in modpara*/ return TRUE; } - /// /// \brief Make the lists for the excited state; list_1, list_2_1 and list_2_2 (for canonical ensemble). /// The original lists before the excitation are given by list_xxx_org @@ -419,199 +398,208 @@ int SetOmega /// \retval -1 fail to make lists. /// \retval 0 sucsess to make lists. int MakeExcitedList( - struct BindStruct *X, - int *iFlgListModifed + struct BindStruct *X, + int *iFlgListModifed ) { - long int j; - *iFlgListModifed = FALSE; - //To Get Original space - if (check(X) == MPIFALSE) { - FinalizeMPI(); + long int j; + *iFlgListModifed = FALSE; + //To Get Original space + if (check(X) == MPIFALSE) { + FinalizeMPI(); + return -1; + } + + X->Check.idim_maxOrg = X->Check.idim_max; + X->Check.idim_maxMPIOrg = X->Check.idim_maxMPI; + + if (X->Def.NSingleExcitationOperator > 0) { + switch (X->Def.iCalcModel) { + case HubbardGC: + break; + case HubbardNConserved: + case KondoGC: + case Hubbard: + case Kondo: + *iFlgListModifed = TRUE; + break; + case Spin: + case SpinGC: + return FALSE; + } + } + else if (X->Def.NPairExcitationOperator > 0) { + switch (X->Def.iCalcModel) { + case HubbardGC: + case SpinGC: + case HubbardNConserved: + break; + case KondoGC: + case Hubbard: + case Kondo: + case Spin: + if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { + *iFlgListModifed = TRUE; + } + break; + } + } + else { + return FALSE; + } + + if (*iFlgListModifed == TRUE) { + if (GetlistSize(X) == TRUE) { + list_1_org = lui_1d_allocate(X->Check.idim_max + 1); +#ifdef MPI + list_1buf_org = lui_1d_allocate(X->Check.idim_maxMPI + 1); + //lui_malloc1(list_1buf_org, X->Check.idim_maxMPI + 1); +#endif // MPI + list_2_1_org = lui_1d_allocate(X->Large.SizeOflist_2_1); + list_2_2_org = lui_1d_allocate(X->Large.SizeOflist_2_2); + //lui_malloc1(list_2_1_org, X->Large.SizeOflist_2_1); + //lui_malloc1(list_2_2_org, X->Large.SizeOflist_2_2); + if (list_1_org == NULL + || list_2_1_org == NULL + || list_2_2_org == NULL + ) + { return -1; + } + for (j = 0; j < X->Large.SizeOflist_2_1; j++) { + list_2_1_org[j] = 0; + } + for (j = 0; j < X->Large.SizeOflist_2_2; j++) { + list_2_2_org[j] = 0; + } + } - X->Check.idim_maxOrg = X->Check.idim_max; - X->Check.idim_maxMPIOrg = X->Check.idim_maxMPI; + if (sz(X, list_1_org, list_2_1_org, list_2_2_org) != 0) { + return FALSE; + } if (X->Def.NSingleExcitationOperator > 0) { - switch (X->Def.iCalcModel) { - case HubbardGC: - break; - case HubbardNConserved: - case KondoGC: - case Hubbard: - case Kondo: - *iFlgListModifed = TRUE; - break; - case Spin: - case SpinGC: - return FALSE; + switch (X->Def.iCalcModel) { + case HubbardGC: + break; + case HubbardNConserved: + if (X->Def.SingleExcitationOperator[0][2] == 1) { //cis + X->Def.Ne = X->Def.NeMPI + 1; } - } else if (X->Def.NPairExcitationOperator > 0) { - switch (X->Def.iCalcModel) { - case HubbardGC: - case SpinGC: - case HubbardNConserved: - break; - case KondoGC: - case Hubbard: - case Kondo: - case Spin: - if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { - *iFlgListModifed = TRUE; - } - break; + else { + X->Def.Ne = X->Def.NeMPI - 1; } - } else { + break; + case KondoGC: + case Hubbard: + case Kondo: + if (X->Def.SingleExcitationOperator[0][2] == 1) { //cis + X->Def.Ne = X->Def.NeMPI + 1; + if (X->Def.SingleExcitationOperator[0][1] == 0) {//up + X->Def.Nup = X->Def.NupOrg + 1; + X->Def.Ndown = X->Def.NdownOrg; + } + else {//down + X->Def.Nup = X->Def.NupOrg; + X->Def.Ndown = X->Def.NdownOrg + 1; + } + } + else {//ajt + X->Def.Ne = X->Def.NeMPI - 1; + if (X->Def.SingleExcitationOperator[0][1] == 0) {//up + X->Def.Nup = X->Def.NupOrg - 1; + X->Def.Ndown = X->Def.NdownOrg; + + } + else {//down + X->Def.Nup = X->Def.NupOrg; + X->Def.Ndown = X->Def.NdownOrg - 1; + } + } + break; + case Spin: + case SpinGC: return FALSE; + } } - - if (*iFlgListModifed == TRUE) { - if(GetlistSize(X)==TRUE) { - list_1_org = lui_1d_allocate(X->Check.idim_max + 1); -#ifdef MPI - list_1buf_org = lui_1d_allocate(X->Check.idim_maxMPI + 1); - //lui_malloc1(list_1buf_org, X->Check.idim_maxMPI + 1); -#endif // MPI - list_2_1_org = lui_1d_allocate(X->Large.SizeOflist_2_1); - list_2_2_org = lui_1d_allocate(X->Large.SizeOflist_2_2); - //lui_malloc1(list_2_1_org, X->Large.SizeOflist_2_1); - //lui_malloc1(list_2_2_org, X->Large.SizeOflist_2_2); - if(list_1_org==NULL - || list_2_1_org==NULL - || list_2_2_org==NULL - ) - { - return -1; - } - for(j =0; jLarge.SizeOflist_2_1; j++){ - list_2_1_org[j]=0; - } - for(j =0; jLarge.SizeOflist_2_2; j++){ - list_2_2_org[j]=0; - } - - } - - if (sz(X, list_1_org, list_2_1_org, list_2_2_org) != 0) { - return FALSE; + else if (X->Def.NPairExcitationOperator > 0) { + X->Def.Ne = X->Def.NeMPI; + switch (X->Def.iCalcModel) { + case HubbardGC: + case SpinGC: + case HubbardNConserved: + break; + case KondoGC: + case Hubbard: + case Kondo: + if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { + if (X->Def.PairExcitationOperator[0][1] == 0) {//up + X->Def.Nup = X->Def.NupOrg + 1; + X->Def.Ndown = X->Def.NdownOrg - 1; + } + else {//down + X->Def.Nup = X->Def.NupOrg - 1; + X->Def.Ndown = X->Def.NdownOrg + 1; + } } - - if (X->Def.NSingleExcitationOperator > 0) { - switch (X->Def.iCalcModel) { - case HubbardGC: - break; - case HubbardNConserved: - if (X->Def.SingleExcitationOperator[0][2] == 1) { //cis - X->Def.Ne = X->Def.NeMPI + 1; - } - else{ - X->Def.Ne = X->Def.NeMPI - 1; - } - break; - case KondoGC: - case Hubbard: - case Kondo: - if (X->Def.SingleExcitationOperator[0][2] == 1) { //cis - X->Def.Ne = X->Def.NeMPI + 1; - if (X->Def.SingleExcitationOperator[0][1] == 0) {//up - X->Def.Nup = X->Def.NupOrg + 1; - X->Def.Ndown=X->Def.NdownOrg; - } else {//down - X->Def.Nup=X->Def.NupOrg; - X->Def.Ndown = X->Def.NdownOrg + 1; - } - } else {//ajt - X->Def.Ne = X->Def.NeMPI - 1; - if (X->Def.SingleExcitationOperator[0][1] == 0) {//up - X->Def.Nup = X->Def.NupOrg - 1; - X->Def.Ndown=X->Def.NdownOrg; - - } else {//down - X->Def.Nup=X->Def.NupOrg; - X->Def.Ndown = X->Def.NdownOrg - 1; - } - } - break; - case Spin: - case SpinGC: - return FALSE; + break; + case Spin: + if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { + if (X->Def.iFlgGeneralSpin == FALSE) { + if (X->Def.PairExcitationOperator[0][1] == 0) {//down + X->Def.Nup = X->Def.NupOrg - 1; + X->Def.Ndown = X->Def.NdownOrg + 1; } - } else if (X->Def.NPairExcitationOperator > 0) { - X->Def.Ne=X->Def.NeMPI; - switch (X->Def.iCalcModel) { - case HubbardGC: - case SpinGC: - case HubbardNConserved: - break; - case KondoGC: - case Hubbard: - case Kondo: - if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { - if (X->Def.PairExcitationOperator[0][1] == 0) {//up - X->Def.Nup = X->Def.NupOrg + 1; - X->Def.Ndown = X->Def.NdownOrg - 1; - } else {//down - X->Def.Nup = X->Def.NupOrg - 1; - X->Def.Ndown = X->Def.NdownOrg + 1; - } - } - break; - case Spin: - if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { - if (X->Def.iFlgGeneralSpin == FALSE) { - if (X->Def.PairExcitationOperator[0][1] == 0) {//down - X->Def.Nup = X->Def.NupOrg - 1; - X->Def.Ndown = X->Def.NdownOrg + 1; - } else {//up - X->Def.Nup = X->Def.NupOrg + 1; - X->Def.Ndown = X->Def.NdownOrg - 1; - } - } - else{//for general spin - X->Def.Total2Sz = X->Def.Total2SzMPI+2*(X->Def.PairExcitationOperator[0][1]-X->Def.PairExcitationOperator[0][3]); - } - } - break; + else {//up + X->Def.Nup = X->Def.NupOrg + 1; + X->Def.Ndown = X->Def.NdownOrg - 1; } - } else { - return FALSE; - } - //Update Infomation - X->Def.Nsite=X->Def.NsiteMPI; - - if (check(X) == MPIFALSE) { - FinalizeMPI(); - return FALSE; + } + else {//for general spin + X->Def.Total2Sz = X->Def.Total2SzMPI + 2 * (X->Def.PairExcitationOperator[0][1] - X->Def.PairExcitationOperator[0][3]); + } } + break; + } } - - //set memory - if (setmem_large(X) != 0) { - fprintf(stdoutMPI, cErrLargeMem, iErrCodeMem); - exitMPI(-1); + else { + return FALSE; } + //Update Infomation + X->Def.Nsite = X->Def.NsiteMPI; - if (sz(X, list_1, list_2_1, list_2_2) != 0) { - return FALSE; + if (check(X) == MPIFALSE) { + FinalizeMPI(); + return FALSE; } + } - if(X->Def.iCalcModel==HubbardNConserved){ - X->Def.iCalcModel=Hubbard; - } + //set memory + if (setmem_large(X) != 0) { + fprintf(stdoutMPI, cErrLargeMem, iErrCodeMem); + exitMPI(-1); + } + + if (sz(X, list_1, list_2_1, list_2_2) != 0) { + return FALSE; + } + + if (X->Def.iCalcModel == HubbardNConserved) { + X->Def.iCalcModel = Hubbard; + } #ifdef _DEBUG if (*iFlgListModifed == TRUE) { - for(j=1; j<=X->Check.idim_maxOrg; j++){ - fprintf(stdout, "Debug1: myrank=%d, list_1_org[ %ld] = %ld\n", myrank, j, list_1_org[j]+myrank*X->Def.OrgTpow[2*X->Def.NsiteMPI-1]); + for (j = 1; j <= X->Check.idim_maxOrg; j++) { + fprintf(stdout, "Debug1: myrank=%d, list_1_org[ %ld] = %ld\n", + myrank, j, list_1_org[j] + myrank * X->Def.OrgTpow[2 * X->Def.NsiteMPI - 1]); } - for(j=1; j<=X->Check.idim_max; j++){ - fprintf(stdout, "Debug2: myrank=%d, list_1[ %ld] = %ld\n", myrank, j, list_1[j]+myrank* 64); - } + for (j = 1; j <= X->Check.idim_max; j++) { + fprintf(stdout, "Debug2: myrank=%d, list_1[ %ld] = %ld\n", myrank, j, list_1[j] + myrank * 64); } + } #endif - - return TRUE; + return TRUE; } diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index ca9616dc7..1ca230922 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -18,8 +18,6 @@ @brief File for givinvg functions of calculating spectrum by Lanczos */ #include "Common.h" -#include "CalcSpectrumByLanczos.h" -#include "Lanczos_EigenValue.h" #include "FileIO.h" #include "wrapperMPI.h" #include "common/setmemory.h" @@ -207,7 +205,6 @@ int CalcSpectrumByBiCG( struct EDMainCalStruct *X,//!<[inout] double complex *vrhs,//!<[in] [CheckList::idim_max] Right hand side vector, excited state. double complex *v2,//!<[inout] [CheckList::idim_max] Work space for residual vector @f${\bf r}@f$ - double complex *v4,//!<[inout] [CheckList::idim_max] Work space for shadow residual vector @f${\bf {\tilde r}}@f$ int Nomega,//!<[in] Number of Frequencies double complex *dcSpectrum,//!<[out] [Nomega] Spectrum double complex *dcomega//!<[in] [Nomega] Frequency @@ -219,7 +216,7 @@ int CalcSpectrumByBiCG( size_t byte_size; int iret, max_step; unsigned long int liLanczosStp_vec = 0; - double complex *v12, *v14, res_proj; + double complex *v4, *v12, *v14, res_proj; int stp, one = 1, status[3], iomega; double *resz; @@ -231,6 +228,7 @@ int CalcSpectrumByBiCG( */ v12 = (double complex*)malloc((X->Bind.Check.idim_max + 1) * sizeof(double complex)); v14 = (double complex*)malloc((X->Bind.Check.idim_max + 1) * sizeof(double complex)); + v4 = (double complex*)malloc((X->Bind.Check.idim_max + 1) * sizeof(double complex)); resz = (double*)malloc(Nomega * sizeof(double)); /**
  • Set initial result vector(+shadow result vector) @@ -373,5 +371,6 @@ int CalcSpectrumByBiCG( free(resz); free(v12); free(v14); + free(v4); return TRUE; }/*int CalcSpectrumByBiCG*/ diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index 121edb30d..fe6501118 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -22,8 +22,10 @@ full-diagonalization method. #include #include "struct.h" #include "lapack_diag.h" -#include "makeHam.h" +#include "mltply.h" +#include "mltplyCommon.h" #include "CalcTime.h" +#include "common/setmemory.h" void zcopy_(int *n, double complex *x, int *incx, double complex *y, int *incy); void zdotc_(double complex *xy, int *n, double complex *x, int *incx, double complex *y, int *incy); @@ -44,20 +46,27 @@ int CalcSpectrumByFullDiag( int idim, jdim, iomega; int idim_max_int; int incr=1; + double *vAv2; + double complex *vg, vAv; /**
    • Generate fully stored Hamiltonian. Because ::v0 & ::v1 are overwritten, copy ::v0 into ::vg.
    • */ idim_max_int = (int)X->Bind.Check.idim_max; - zcopy_(&idim_max_int, &v0[1], &incr, &vg[0], &incr); + vg = cd_1d_allocate(idim_max_int); + vAv2 = d_1d_allocate(idim_max_int); + zcopy_(&idim_max_int, &v0[1][0], &incr, &vg[0], &incr); StartTimer(6301); - makeHam(&(X->Bind)); + zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v0[0][0]); + zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v1[0][0]); + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) v1[idim][idim] = 1.0; + mltply(&(X->Bind), X->Bind.Check.idim_max, v0, v1); StopTimer(6301); /**
    • ::v0 becomes eigenvalues in lapack_diag(), and - ::L_vec becomes eigenvectors
    • + ::v1 becomes eigenvectors */ StartTimer(6302); lapack_diag(&(X->Bind)); @@ -69,10 +78,10 @@ int CalcSpectrumByFullDiag( StartTimer(6303); for (idim = 0; idim < idim_max_int; idim++) { - v1[idim] = 0.0; - for (jdim = 0; jdim < idim_max_int; jdim++) v1[idim] += conj(vg[jdim]) * L_vec[idim][jdim]; + vAv = 0.0; + for (jdim = 0; jdim < idim_max_int; jdim++) vAv += conj(vg[jdim]) * v1[jdim][idim]; //zdotc_(&v1[idim], &idim_max_int, &vg[0], &incr, &L_vec[idim][0], &incr); - v1[idim] = conj(v1[idim]) * v1[idim]; + vAv2[idim] = conj(vAv) * vAv; }/*for (idim = 0; idim < idim_max_int; idim++)*/ StopTimer(6303); /** @@ -87,10 +96,12 @@ int CalcSpectrumByFullDiag( for (iomega = 0; iomega < Nomega; iomega++) { dcSpectrum[iomega] = 0.0; for (idim = 0; idim < idim_max_int; idim++) { - dcSpectrum[iomega] += v1[idim] / (dcomega[iomega] - v0[idim]); + dcSpectrum[iomega] += v1[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); }/*for (idim = 0; idim < idim_max_int; idim++)*/ }/*for (iomega = 0; iomega < Nomega; iomega++)*/ StopTimer(6304); + free_cd_1d_allocate(vg); + free_d_1d_allocate(vAv2); return TRUE; }/*CalcSpectrumByFullDiag*/ diff --git a/src/Multiply.c b/src/Multiply.c index 38a14ec82..49858465a 100644 --- a/src/Multiply.c +++ b/src/Multiply.c @@ -78,10 +78,10 @@ shared(v0, v1) firstprivate(i_max, Ns, LargeValue) */ int MultiplyForTEM ( - struct BindStruct *X + struct BindStruct *X, + double complex **v2 ) { - long int i, i_max; int coef; double complex dnorm = 0.0; diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index 5314082b9..d7f816418 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -141,9 +141,9 @@ int GetPairExcitedStateHubbard( long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; - double complex tmp_trans = 0; + double complex tmp_trans = 0, dmv; long int i_max; - int tmp_sgn, num1; + int tmp_sgn, num1, one = 1; long int ibit; long unsigned int is, Asum, Adiff; long unsigned int ibitsite1, ibitsite2; @@ -184,27 +184,32 @@ int GetPairExcitedStateHubbard( if (org_isite1 > X->Def.Nsite && org_isite2 > X->Def.Nsite) { - X_child_CisAjt_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); + X_child_CisAjt_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, + list_1_org, list_1buf_org, list_2_1, list_2_2); } else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if (org_isite1 < org_isite2) { - X_child_CisAjt_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, - tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); + X_child_CisAjt_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, + list_1_org, list_1buf_org, list_2_1, list_2_2); } else { - X_child_CisAjt_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, - tmp_v1, tmp_v1bufOrg, list_1_org, list_1buf_org, list_2_1, list_2_2); + X_child_CisAjt_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, + -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, + list_1_org, list_1buf_org, list_2_1, list_2_2); } } else { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,stdoutMPI) \ - firstprivate(i_max, tmp_trans, Asum, Adiff, ibitsite1, ibitsite2, X, list_1_org, list_1, myrank) \ - private(j, tmp_sgn, tmp_off) +firstprivate(i_max, tmp_trans, Asum, Adiff, ibitsite1, ibitsite2, X, list_1_org, list_1, myrank) \ +private(j, tmp_sgn, tmp_off) for (j = 1; j <= i_max; j++) { tmp_sgn = X_CisAjt(list_1_org[j], X, ibitsite1, ibitsite2, Asum, Adiff, &tmp_off); - tmp_v0[tmp_off] += tmp_trans * tmp_sgn*tmp_v1[j]; + dmv = tmp_trans * tmp_sgn; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } @@ -216,9 +221,12 @@ int GetPairExcitedStateHubbard( ibit = (unsigned long int) myrank & is; if (X->Def.PairExcitationOperator[i][4] == 0) { if (ibit != is) { + dmv = -tmp_trans; #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; + for (j = 1; j <= i_max; j++) { + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + } } } else { @@ -230,15 +238,18 @@ int GetPairExcitedStateHubbard( } } else { - X_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); + X_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_trans, X, nstate, tmp_v0, tmp_v1); } } else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if (org_isite1 < org_isite2) { - X_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, nstate, tmp_v0, tmp_v1); + X_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_trans, X, nstate, tmp_v0, tmp_v1); } else { - X_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); + X_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, + -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); } } else { @@ -248,32 +259,34 @@ int GetPairExcitedStateHubbard( if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; if (X->Def.PairExcitationOperator[i][4] == 0) { -#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) +#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) \ +firstprivate(i_max, is, tmp_trans) private(num1, ibit) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; num1 = (1 - ibit / is); - tmp_v0[j] += -tmp_trans * num1 * tmp_v1[j]; + dmv = -tmp_trans * num1; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { -#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, is, tmp_trans) private(num1, ibit) +#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) \ +firstprivate(i_max, is, tmp_trans) private(num1, ibit) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; num1 = ibit / is; - tmp_v0[j] += tmp_trans * num1 * tmp_v1[j]; + dmv = tmp_trans * num1; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } else { - child_general_hopp(tmp_v0, tmp_v1, X, tmp_trans); + child_general_hopp(nstate, tmp_v0, tmp_v1, X, tmp_trans); } } } } - #ifdef MPI free_cd_1d_allocate(tmp_v1bufOrg); #endif // MPI - return TRUE; } diff --git a/src/PairExSpin.c b/src/PairExSpin.c index 82ef572fe..fc084f63e 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -19,6 +19,7 @@ #include "wrapperMPI.h" #include "mltplyMPISpinCore.h" #include "mltplySpinCore.h" +#include "mltplyCommon.h" #ifdef MPI #include "common/setmemory.h" #endif @@ -36,7 +37,6 @@ int GetPairExcitedStateSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ double complex **tmp_v1 /**< [in] v0 = H v1*/ - ) { int iret = 0; @@ -58,7 +58,8 @@ int GetPairExcitedStateSpinGC( /// \version 1.2 int GetPairExcitedStateHalfSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + int nstate, + double complex **tmp_v0, /**< [out] Result v0 = H v1*/ double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { long unsigned int i, j; @@ -66,9 +67,10 @@ int GetPairExcitedStateHalfSpinGC( long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; - double complex tmp_trans = 0; + double complex tmp_trans = 0, dmv; long int i_max; - int tmp_sgn; + int tmp_sgn, one = 1; + i_max = X->Check.idim_maxOrg; for (i = 0; i < X->Def.NPairExcitationOperator; i++) { @@ -99,14 +101,16 @@ int GetPairExcitedStateHalfSpinGC( // longitudinal magnetic field #pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += (1.0 - X_SpinGC_CisAis(j, X, isite1, org_sigma1)) * tmp_v1[j] * (-tmp_trans); + dmv = (1.0 - X_SpinGC_CisAis(j, X, isite1, org_sigma1))* (-tmp_trans); + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { // longitudinal magnetic field #pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += X_SpinGC_CisAis(j, X, isite1, org_sigma1) * tmp_v1[j] * tmp_trans; + dmv = X_SpinGC_CisAis(j, X, isite1, org_sigma1)* tmp_trans; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -117,7 +121,8 @@ int GetPairExcitedStateHalfSpinGC( for (j = 1; j <= i_max; j++) { tmp_sgn = X_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); if (tmp_sgn != 0) { - tmp_v0[tmp_off + 1] += tmp_sgn * tmp_v1[j] * tmp_trans; + dmv = (double complex)tmp_sgn * tmp_trans; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off + 1], &one); } } } @@ -140,7 +145,8 @@ int GetPairExcitedStateHalfSpinGC( /// \version 1.2 int GetPairExcitedStateGeneralSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + int nstate, + double complex **tmp_v0, /**< [out] Result v0 = H v1*/ double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { @@ -148,8 +154,8 @@ int GetPairExcitedStateGeneralSpinGC( int num1; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; - - double complex tmp_trans = 0; + int one = 1; + double complex tmp_trans = 0, dmv; long int i_max; i_max = X->Check.idim_maxOrg; @@ -182,7 +188,8 @@ int GetPairExcitedStateGeneralSpinGC( #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += -tmp_trans * tmp_v1[j] * (1.0 - num1); + dmv = -tmp_trans * (1.0 - num1); + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { @@ -190,7 +197,8 @@ int GetPairExcitedStateGeneralSpinGC( #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += tmp_trans * tmp_v1[j] * num1; + dmv = tmp_trans * num1; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -200,7 +208,8 @@ int GetPairExcitedStateGeneralSpinGC( for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin(j - 1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { - tmp_v0[tmp_off + 1] += tmp_trans * tmp_v1[j] * num1; + dmv = tmp_trans * num1; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off + 1], &one); } } } @@ -223,7 +232,8 @@ int GetPairExcitedStateGeneralSpinGC( /// \version 1.2 int GetPairExcitedStateSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + int nstate, + double complex **tmp_v0, /**< [out] Result v0 = H v1*/ double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { @@ -246,19 +256,18 @@ int GetPairExcitedStateSpin( /// \version 1.2 int GetPairExcitedStateHalfSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + int nstate, + double complex **tmp_v0, /**< [out] Result v0 = H v1*/ double complex **tmp_v1 /**< [in] v0 = H v1*/ - ) { long unsigned int i, j, idim_maxMPI; long unsigned int isite1; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; - - double complex tmp_trans = 0; + double complex tmp_trans = 0, dmv; long int i_max; - int num1; + int num1, one = 1; long int ibit1; long unsigned int is1_up; @@ -284,16 +293,20 @@ int GetPairExcitedStateHalfSpin( ibit1 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, org_sigma1); if (X->Def.PairExcitationOperator[i][4] == 0) { if (ibit1 == 0) { + dmv = -tmp_trans; #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; + for (j = 1; j <= i_max; j++) { + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + } } } else { if (ibit1 != 0) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; + for (j = 1; j <= i_max; j++) + zaxpy_(nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); } } }// org_isite1 > X->Def.Nsite @@ -303,13 +316,15 @@ int GetPairExcitedStateHalfSpin( X->Def.PairExcitationOperator[i][4] == 0) { #pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += (1.0 - X_Spin_CisAis(j, X, isite1, org_sigma1)) * tmp_v1[j] * (-tmp_trans); + dmv = (1.0 - X_Spin_CisAis(j, X, isite1, org_sigma1)) * (-tmp_trans); + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { #pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += X_Spin_CisAis(j, X, isite1, org_sigma1) * tmp_v1[j] * tmp_trans; + dmv = X_Spin_CisAis(j, X, isite1, org_sigma1) * tmp_trans; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -321,16 +336,22 @@ int GetPairExcitedStateHalfSpin( } else { //org_sigma1 != org_sigma2 // for the canonical case if (org_isite1 > X->Def.Nsite) {//For MPI - X_child_CisAit_spin_MPIdouble(org_isite1 - 1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, X->Def.Tpow, list_1_org, list_1buf_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); - + X_child_CisAit_spin_MPIdouble(org_isite1 - 1, org_sigma2, tmp_trans, + X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, X->Def.Tpow, + list_1_org, list_1buf_org, list_2_1, list_2_2, + X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { isite1 = X->Def.Tpow[org_isite1 - 1]; #pragma omp parallel for default(none) private(j, tmp_off, num1) \ - firstprivate(i_max, isite1, org_sigma2, X, tmp_trans, list_1_org, list_1, list_2_1, list_2_2) shared(tmp_v0, tmp_v1) +firstprivate(i_max, isite1, org_sigma2, X, tmp_trans, list_1_org, list_1, list_2_1, list_2_2) \ +shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { num1 = X_Spin_CisAit(j, X, isite1, org_sigma2, list_1_org, list_2_1, list_2_2, &tmp_off); - if (num1 != 0) tmp_v0[tmp_off] += tmp_v1[j] * tmp_trans*(double)num1; + if (num1 != 0) { + dmv = tmp_trans*(double)num1; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); + } } } } @@ -350,19 +371,18 @@ int GetPairExcitedStateHalfSpin( /// \version 1.2 int GetPairExcitedStateGeneralSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + int nstate, + double complex **tmp_v0, /**< [out] Result v0 = H v1*/ double complex **tmp_v1 /**< [in] v0 = H v1*/ - ) { long unsigned int i, j, idim_maxMPI; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; long unsigned int off = 0; - - double complex tmp_trans = 0; + double complex tmp_trans = 0, dmv; long int i_max; - int tmp_sgn, num1; + int tmp_sgn, num1, one = 1; i_max = X->Check.idim_maxOrg; double complex **tmp_v1bufOrg; @@ -388,7 +408,8 @@ int GetPairExcitedStateGeneralSpin( if (num1 == 0) { #pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += -tmp_trans * tmp_v1[j]; + dmv = -tmp_trans; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -396,15 +417,15 @@ int GetPairExcitedStateGeneralSpin( if (num1 != 0) { #pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += tmp_trans * tmp_v1[j]; + zaxpy_(nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); } } } }//org_sigma1=org_sigma2 else {//org_sigma1 != org_sigma2 - X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, - tmp_v1, tmp_v1bufOrg, i_max, list_1_org, list_1buf_org, - X->Large.ihfbit); + X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, + tmp_trans, X, nstate, tmp_v0, tmp_v1, + tmp_v1bufOrg, i_max, list_1_org, list_1buf_org, X->Large.ihfbit); } } else {//org_isite1 <= X->Def.Nsite @@ -414,14 +435,16 @@ int GetPairExcitedStateGeneralSpin( #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1, list_1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += -tmp_trans * tmp_v1[j] * (1.0 - num1); + dmv = -tmp_trans * (1.0 - num1); + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { #pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1, list_1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += tmp_trans * tmp_v1[j] * num1; + dmv = tmp_trans * num1; + zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } }//org_sigma1=org_sigma2 @@ -434,13 +457,9 @@ int GetPairExcitedStateGeneralSpin( X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn != FALSE) { ConvertToList1GeneralSpin(off, X->Large.ihfbit, &tmp_off); -#ifdef _DEBUG - printf("rank=%d, org=%ld, tmp_off=%ld, list_1=%ld, ihfbit=%ld\n", myrank, list_1_org[j], off, list_1[tmp_off], X->Large.ihfbit); -#endif - tmp_v0[tmp_off] += tmp_v1[j] * tmp_trans; + zaxpy_(nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } - } } } diff --git a/src/include/CalcSpectrumByBiCG.h b/src/include/CalcSpectrumByBiCG.h index d01ece1ad..d3de4ab14 100644 --- a/src/include/CalcSpectrumByBiCG.h +++ b/src/include/CalcSpectrumByBiCG.h @@ -20,7 +20,6 @@ int CalcSpectrumByBiCG( struct EDMainCalStruct *X, double complex *vrhs, double complex *v2, - double complex *v4, int Nomega, double complex *dcSpectrum, double complex *dcomega diff --git a/src/include/Multiply.h b/src/include/Multiply.h index e93eb1dd9..6f6578a20 100644 --- a/src/include/Multiply.h +++ b/src/include/Multiply.h @@ -15,11 +15,5 @@ /* along with this program. If not, see . */ #pragma once -int Multiply -( - struct BindStruct *X - ); - -int MultiplyForTEM( - struct BindStruct *X -); \ No newline at end of file +int Multiply(struct BindStruct *X); +int MultiplyForTEM(struct BindStruct *X, double complex **v2); diff --git a/src/include/SingleExHubbard.h b/src/include/SingleExHubbard.h index ef7b84c37..1551bb885 100644 --- a/src/include/SingleExHubbard.h +++ b/src/include/SingleExHubbard.h @@ -16,16 +16,7 @@ #pragma once #include "Common.h" -int GetSingleExcitedStateHubbard -( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ -); - -int GetSingleExcitedStateHubbardGC - ( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - ); +int GetSingleExcitedStateHubbard(struct BindStruct *X, int nstate, + double complex **tmp_v0, double complex **tmp_v1); +int GetSingleExcitedStateHubbardGC(struct BindStruct *X, int nstate, + double complex **tmp_v0, double complex **tmp_v1); diff --git a/src/include/global.h b/src/include/global.h index 8ce1fed8b..4b1323a80 100644 --- a/src/include/global.h +++ b/src/include/global.h @@ -33,16 +33,8 @@ double complex **v0; /**< A vector after multiplying Hamiltonian, @f$ v_0 = H v_1@f$.*/ double complex **v1; /**< A vector before multiplying Hamiltonian, @f$ v_0 = H v_1@f$.*/ -double complex *v2; /**< A temporary vector for time evolution calculation, @f$ v2 = H*v1 = H^coef |psi(t)>@f$.*/ double complex **v1buf; /**< A temporary vector for MPI. */ -//[s] For calcSpectrum -double complex *v1Org; /**< An input vector to calculate spectrum function.*/ -double complex *vg; /**< A vector used in the CG mode.*/ -//[e] For calcSpectrum - -double *alpha,*beta; /**< Tridiagonal components used in Lanczos mode.*/ -double complex **vec; /**< Eigen vectors.*/ double *list_Diagonal; /**< list for diagonal components.*/ long unsigned int *list_1; /**< list of getting real-space configuration for canonical state*/ long unsigned int *list_1buf;/**< list of getting real-space configuration for canonical state across processes*/ @@ -70,8 +62,6 @@ int step_spin;/**< output step for TE calculation.*/ /*[e] For TPQ*/ /*[s] For All Diagonalization*/ -double complex**Ham; /**> Hamiltonian for full diagonalization. */ -double complex **L_vec;/**> eigen vectors*/ #ifdef _SCALAPACK double complex *Z_vec; /**> distributed matrix of eigen vector*/ int descZ_vec[9]; /*descriptor for Z_vec*/ diff --git a/src/include/makeHam.h b/src/include/makeHam.h deleted file mode 100644 index 054c3b0a2..000000000 --- a/src/include/makeHam.h +++ /dev/null @@ -1,19 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ -#pragma once -#include "Common.h" - -int makeHam(struct BindStruct *X); diff --git a/src/include/matrixlapack.h b/src/include/matrixlapack.h index b40023b4b..0508eacb0 100644 --- a/src/include/matrixlapack.h +++ b/src/include/matrixlapack.h @@ -32,9 +32,6 @@ #endif #include -int DSEVvalue(int xNsize, double **A, double *r); -int DSEVvector(int xNsize, double **A, double *r, double **vec); - -int ZHEEVall(int xNsize, double complex **A, double complex *r,double complex **vec); +int ZHEEVall(int xNsize, double complex **A, double *r,double complex **vec); #endif diff --git a/src/include/mltplyHubbard.h b/src/include/mltplyHubbard.h index 48a12dc23..4f8ce8255 100644 --- a/src/include/mltplyHubbard.h +++ b/src/include/mltplyHubbard.h @@ -46,18 +46,13 @@ void child_general_int struct BindStruct *X ); - void child_general_hopp -( - double complex *tmp_v0, - double complex *tmp_v1, - struct BindStruct *X, - double complex trans - ); +(int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X, double complex trans); void child_exchange ( - int nstate, double complex **tmp_v0, + int nstate, + double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); diff --git a/src/lapack_diag.c b/src/lapack_diag.c index 614a53386..3c82dd5b4 100644 --- a/src/lapack_diag.c +++ b/src/lapack_diag.c @@ -50,16 +50,10 @@ struct BindStruct *X//!<[inout] for (i = 0; i < i_max; i++) { for (j = 0; j < i_max; j++) { - //printf("Ham %f %f ", creal(Ham[i+1][j+1]), cimag(Ham[i+1][j+1])); - Ham[i][j] = Ham[i + 1][j + 1]; + v0[i][j] = v0[i + 1][j]; } } xMsize = i_max; - /*for(i=0; iDef.iNGPU == 0) { #ifdef _SCALAPACK if(nproc >1) { @@ -90,39 +84,36 @@ struct BindStruct *X//!<[inout] diag_scalapack_cmp(xMsize, Ham, v0, Z_vec, descZ_vec); //printf("Z %f %f\n", creal(Z_vec[0]), cimag(Z_vec[1])); } else { - ZHEEVall(xMsize, Ham, v0, L_vec); + ZHEEVall(xMsize, Ham, v0, v1); } #else - ZHEEVall(xMsize, Ham, v0, L_vec); + ZHEEVall(xMsize, v0, X->Phys.energy, v1); #endif } else { #ifdef _MAGMA if(myrank==0){ - if(diag_magma_cmp(xMsize, Ham, v0, L_vec, X->Def.iNGPU) != 0) { + if(diag_magma_cmp(xMsize, Ham, v0, v1, X->Def.iNGPU) != 0) { return -1; } } #else fprintf(stdoutMPI, "Warning: MAGMA is not used in this calculation."); - ZHEEVall(xMsize, Ham, v0, L_vec); + ZHEEVall(xMsize, v0, X->Phys.energy, v1); #endif } - - /*for (i = 0; i < i_max; i++) { + for (i = i_max; i > 0; i--) { for (j = 0; j < i_max; j++) { - fprintf(stdoutMPI, "%f %f \n", creal(L_vec[i][j]), cimag(L_vec[i][j])); + v1[i][j] = v1[i - 1][j]; } - }*/ + } strcpy(sdt, cFileNameEigenvalue_Lanczos); if (childfopenMPI(sdt, "w", &fp) != 0) { return -1; } for (i = 0; i < i_max; i++) { - fprintf(fp, " %ld %.10lf \n", i, creal(v0[i][0])); + fprintf(fp, " %ld %.10lf \n", i, X->Phys.energy[i]); } fclose(fp); - - return 0; } diff --git a/src/makeHam.c b/src/makeHam.c deleted file mode 100644 index 8014dd933..000000000 --- a/src/makeHam.c +++ /dev/null @@ -1,552 +0,0 @@ -/* HPhi - Quantum Lattice Model Simulator */ -/* Copyright (C) 2015 The University of Tokyo */ - -/* This program is free software: you can redistribute it and/or modify */ -/* it under the terms of the GNU General Public License as published by */ -/* the Free Software Foundation, either version 3 of the License, or */ -/* (at your option) any later version. */ - -/* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ - -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ - -#include -#include "mltplyCommon.h" -#include "mltplyHubbardCore.h" -#include "mltplySpinCore.h" -#include "makeHam.h" -#include "wrapperMPI.h" - -/** - * @file makeHam.c - * - * @brief Making Hamiltonian for the full diagonalization method. - * - * @version 0.2 - * @details add function to treat the case of generalspin - * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - - */ - - -/** - * @brief Making Hamiltonian for the full diagonalization method.\n - * The Hamiltonian is stored in the two dimensional array @f$ \verb|Ham| @f$. - * - * @param X [in] Struct for getting the information of the operators - * - * @retval 0 normally finished - * @retval -1 unnormally finished - * - * @version 0.2 - * @details add function to treat the case of generalspin - * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -int makeHam(struct BindStruct *X) { - - long unsigned int i, j; - long unsigned int is1_spin; - long unsigned int irght, ilft, ihfbit; - double complex dmv; - double num1; - long unsigned int off; - long unsigned int isite1, isite2, isite3, isite4; - int sigma1, sigma2, sigma3, sigma4; - long unsigned int isA_up, isB_up; - double complex tmp_trans, tmp_V; - long unsigned int Asum, Bsum, Adiff, Bdiff; - long unsigned int tmp_off, tmp_off_2; - int tmp_sgn; - off = 0; - tmp_off = 0; - tmp_off_2 = 0; - long unsigned int i_max; - i_max = X->Check.idim_max; - int ihermite = 0; - int idx = 0; - - if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { - return -1; - } - X->Large.i_max = i_max; - X->Large.irght = irght; - X->Large.ilft = ilft; - X->Large.ihfbit = ihfbit; - X->Large.prdct = 0.0; - X->Large.mode = M_Ham; - - for (i = 0; i <= i_max; i++) { - for (j = 0; j <= i_max; j++) { - Ham[i][j] = 0; - } - } -#pragma omp parallel for default(none) firstprivate(i_max) private(j) shared(Ham, list_Diagonal, v0, v1) - for (j = 1; j <= i_max; j++) { - Ham[j][j] += list_Diagonal[j]; - v0[j] = 1.0; - v1[j] = 1.0; - //printf("%ld, %f\n", j, list_Diagonal[j]); - } - switch (X->Def.iCalcModel) { - case HubbardGC: - //Transfer - for (i = 0; i < X->Def.EDNTransfer / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.EDGeneralTransfer[idx][0] + 1; - isite2 = X->Def.EDGeneralTransfer[idx][2] + 1; - sigma1 = X->Def.EDGeneralTransfer[idx][1]; - sigma2 = X->Def.EDGeneralTransfer[idx][3]; - - if (child_general_hopp_GetInfo(X, isite1, isite2, sigma1, sigma2) != 0) { - return -1; - } - tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; - - for (j = 1; j <= X->Large.i_max; j++) { - dmv = tmp_trans * - GC_CisAjt(j, v0, v1, X, X->Large.is1_spin, X->Large.is2_spin, X->Large.isA_spin, X->Large.A_spin, - tmp_trans, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } - } - - - for (i = 0; i < X->Def.NInterAll_OffDiagonal / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; - isite2 = X->Def.InterAll_OffDiagonal[idx][2] + 1; - isite3 = X->Def.InterAll_OffDiagonal[idx][4] + 1; - isite4 = X->Def.InterAll_OffDiagonal[idx][6] + 1; - sigma1 = X->Def.InterAll_OffDiagonal[idx][1]; - sigma2 = X->Def.InterAll_OffDiagonal[idx][3]; - sigma3 = X->Def.InterAll_OffDiagonal[idx][5]; - sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; - tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - child_general_int_GetInfo( - i, - X, - isite1, - isite2, - isite3, - isite4, - sigma1, - sigma2, - sigma3, - sigma4, - tmp_V - ); - - i_max = X->Large.i_max; - isite1 = X->Large.is1_spin; - isite2 = X->Large.is2_spin; - Asum = X->Large.isA_spin; - Adiff = X->Large.A_spin; - - isite3 = X->Large.is3_spin; - isite4 = X->Large.is4_spin; - Bsum = X->Large.isB_spin; - Bdiff = X->Large.B_spin; - - tmp_V = X->Large.tmp_V; - - if (isite1 == isite2 && isite3 == isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, v0, v1, X, &tmp_off); - Ham[j][j] += dmv; - } - } else if (isite1 == isite2 && isite3 != isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } else if (isite1 != isite2 && isite3 == isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } else if (isite1 != isite2 && isite3 != isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, - v0, v1, X, &tmp_off_2); - Ham[tmp_off_2 + 1][j] += dmv; - } - } - } - } - //Pair hopping - for (i = 0; i < X->Def.NPairHopping / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - child_pairhopp_GetInfo(idx, X); - for (j = 1; j <= X->Large.i_max; j++) { - dmv = GC_child_pairhopp_element(j, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } - } - //Exchange - for (i = 0; i < X->Def.NExchangeCoupling; i++) { - child_exchange_GetInfo(i, X); - for (j = 1; j <= X->Large.i_max; j++) { - dmv = GC_child_exchange_element(j, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } - break; - case KondoGC: - case Hubbard: - case Kondo: - //Transfer - for (i = 0; i < X->Def.EDNTransfer / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - - isite1 = X->Def.EDGeneralTransfer[idx][0] + 1; - isite2 = X->Def.EDGeneralTransfer[idx][2] + 1; - sigma1 = X->Def.EDGeneralTransfer[idx][1]; - sigma2 = X->Def.EDGeneralTransfer[idx][3]; - - if (child_general_hopp_GetInfo(X, isite1, isite2, sigma1, sigma2) != 0) { - return -1; - } - tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; - - for (j = 1; j <= X->Large.i_max; j++) { - dmv = tmp_trans * - X_CisAjt(list_1[j], X, X->Large.is1_spin, X->Large.is2_spin, X->Large.isA_spin, X->Large.A_spin, - &tmp_off); - Ham[tmp_off][j] += dmv; - } - } - } - - //InterAll - for (i = 0; i < X->Def.NInterAll_OffDiagonal / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; - isite2 = X->Def.InterAll_OffDiagonal[idx][2] + 1; - isite3 = X->Def.InterAll_OffDiagonal[idx][4] + 1; - isite4 = X->Def.InterAll_OffDiagonal[idx][6] + 1; - sigma1 = X->Def.InterAll_OffDiagonal[idx][1]; - sigma2 = X->Def.InterAll_OffDiagonal[idx][3]; - sigma3 = X->Def.InterAll_OffDiagonal[idx][5]; - sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; - tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - if (isite1 == 1 && sigma1 == 0 && isite2 == 4 && sigma2 == 0 && isite3 == 17 && sigma3 == 0 && isite4 == 19 && - sigma4 == 0) { - tmp_V = tmp_V * 1.0; - } -// fprintf(stdoutMPI, "Debug: %d, %d, %d, %d, %d, %d, %d, %d\n ", isite1, sigma1,isite2, sigma2,isite3, sigma3,isite4, sigma4); - child_general_int_GetInfo( - i, - X, - isite1, - isite2, - isite3, - isite4, - sigma1, - sigma2, - sigma3, - sigma4, - tmp_V - ); - - i_max = X->Large.i_max; - isite1 = X->Large.is1_spin; - isite2 = X->Large.is2_spin; - Asum = X->Large.isA_spin; - Adiff = X->Large.A_spin; - - isite3 = X->Large.is3_spin; - isite4 = X->Large.is4_spin; - Bsum = X->Large.isB_spin; - Bdiff = X->Large.B_spin; - - tmp_V = X->Large.tmp_V; - - if (isite1 == isite2 && isite3 == isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = child_CisAisCisAis_element(j, isite1, isite3, tmp_V, v0, v1, X, &tmp_off); - Ham[j][j] += dmv; - } - } else if (isite1 == isite2 && isite3 != isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, v0, v1, X, &tmp_off); - Ham[tmp_off][j] += dmv; - } - } else if (isite1 != isite2 && isite3 == isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, tmp_V, v0, v1, X, &tmp_off); - Ham[tmp_off][j] += dmv; - } - } else if (isite1 != isite2 && isite3 != isite4) { - - for (j = 1; j <= i_max; j++) { - dmv = child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, v0, - v1, X, &tmp_off_2); - Ham[tmp_off_2][j] += dmv; - } - } - } - } - - //Pair hopping - for (i = 0; i < X->Def.NPairHopping / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - child_pairhopp_GetInfo(idx, X); - for (j = 1; j <= X->Large.i_max; j++) { - dmv = child_pairhopp_element(j, v0, v1, X, &tmp_off); - Ham[tmp_off][j] += dmv; - } - } - } - //Exchange - for (i = 0; i < X->Def.NExchangeCoupling; i++) { - child_exchange_GetInfo(i, X); - for (j = 1; j <= X->Large.i_max; j++) { - dmv = child_exchange_element(j, v0, v1, X, &tmp_off); - Ham[tmp_off][j] += dmv; - } - } - break; - - case SpinGC: - if (X->Def.iFlgGeneralSpin == FALSE) { - //Transfer - for (i = 0; i < X->Def.EDNTransfer / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.EDGeneralTransfer[idx][0] + 1; - isite2 = X->Def.EDGeneralTransfer[idx][2] + 1; - sigma1 = X->Def.EDGeneralTransfer[idx][1]; - sigma2 = X->Def.EDGeneralTransfer[idx][3]; - tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; - - if (child_general_hopp_GetInfo(X, isite1, isite2, sigma1, sigma2) != 0) { - return -1; - } - - if (isite1 == isite2) { - is1_spin = X->Def.Tpow[isite1 - 1]; - if (sigma1 == sigma2) { - // longitudinal magnetic field - for (j = 1; j <= i_max; j++) { - Ham[j][j] += tmp_trans * X_Spin_CisAis(j, X, is1_spin, sigma1); - } - } else { - // transverse magnetic field - is1_spin = X->Def.Tpow[isite1 - 1]; - - for (j = 1; j <= i_max; j++) { - Ham[off + 1][j] += tmp_trans * X_SpinGC_CisAit(j, X, is1_spin, sigma2, &off); - } - } - } else { - // hopping is not allowed in localized spin system - return -1; - } - } - } - - //InterAll - for (i = 0; i < X->Def.NInterAll_OffDiagonal / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; - isite2 = X->Def.InterAll_OffDiagonal[idx][4] + 1; - sigma1 = X->Def.InterAll_OffDiagonal[idx][1]; - sigma2 = X->Def.InterAll_OffDiagonal[idx][3]; - sigma3 = X->Def.InterAll_OffDiagonal[idx][5]; - sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; - tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - - child_general_int_spin_GetInfo(X, isite1, isite2, sigma1, sigma2, sigma3, sigma4, tmp_V); - isA_up = X->Def.Tpow[isite1 - 1]; - isB_up = X->Def.Tpow[isite2 - 1]; - - if (sigma1 == sigma2 && sigma3 == sigma4) { //diagonal - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAisCisAis_spin_element(j, isA_up, isB_up, sigma2, sigma4, tmp_V, v0, v1, X); - Ham[j][j] += dmv; - } - } else if (sigma1 == sigma2 && sigma3 != sigma4) { - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAisCitAiu_spin_element(j, sigma2, sigma4, isA_up, isB_up, tmp_V, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } else if (sigma1 != sigma2 && sigma3 == sigma4) { - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAitCiuAiu_spin_element(j, sigma2, sigma4, isA_up, isB_up, tmp_V, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } else if (sigma1 != sigma2 && sigma3 != sigma4) { - for (j = 1; j <= i_max; j++) { - dmv = GC_child_CisAitCiuAiv_spin_element(j, sigma2, sigma4, isA_up, isB_up, tmp_V, v0, v1, X, - &tmp_off_2); - Ham[tmp_off_2 + 1][j] += dmv; - } - } - } - } - //Exchange - for (i = 0; i < X->Def.NExchangeCoupling; i++) { - child_exchange_spin_GetInfo(i, X); - for (j = 1; j <= X->Large.i_max; j++) { - dmv = GC_child_exchange_spin_element(j, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } - - //PairLift - for (i = 0; i < X->Def.NPairLiftCoupling / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - child_pairlift_spin_GetInfo(idx, X); - - for (j = 1; j <= X->Large.i_max; j++) { - dmv = GC_child_pairlift_spin_element(j, v0, v1, X, &tmp_off); - Ham[tmp_off + 1][j] += dmv; - } - } - } - } else { //For General spin - for (i = 0; i < X->Def.EDNTransfer / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.EDGeneralTransfer[idx][0] + 1; - isite2 = X->Def.EDGeneralTransfer[idx][2] + 1; - sigma1 = X->Def.EDGeneralTransfer[idx][1]; - sigma2 = X->Def.EDGeneralTransfer[idx][3]; - tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; - - if (isite1 == isite2) { - // longitudinal magnetic field is absorbed in diagonal calculation. - // transverse magnetic field - for (j = 1; j <= i_max; j++) { - num1 = GetOffCompGeneralSpin(j - 1, isite1, sigma2, sigma1, &off, X->Def.SiteToBit, X->Def.Tpow); - Ham[off + 1][j] += tmp_trans * num1; - } - } else { - // hopping is not allowed in localized spin system - return -1; - } - } - } - - //InterAll - for (i = 0; i < X->Def.NInterAll_OffDiagonal / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; - isite2 = X->Def.InterAll_OffDiagonal[idx][4] + 1; - sigma1 = X->Def.InterAll_OffDiagonal[idx][1]; - sigma2 = X->Def.InterAll_OffDiagonal[idx][3]; - sigma3 = X->Def.InterAll_OffDiagonal[idx][5]; - sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; - tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - for (j = 1; j <= i_max; j++) { - num1 = GetOffCompGeneralSpin(j - 1, isite1, sigma2, sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - num1 = GetOffCompGeneralSpin(tmp_off, isite2, sigma4, sigma3, &off, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - Ham[off + 1][j] += tmp_V * num1; - } - } - } - } - } - } - break; - - case Spin: - if (X->Def.iFlgGeneralSpin == FALSE) { - //Transfer is abosrbed in diagonal term. - //InterAll - for (i = 0; i < X->Def.NInterAll_OffDiagonal / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - - isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; - isite2 = X->Def.InterAll_OffDiagonal[idx][4] + 1; - sigma1 = X->Def.InterAll_OffDiagonal[idx][1]; - sigma2 = X->Def.InterAll_OffDiagonal[idx][3]; - sigma3 = X->Def.InterAll_OffDiagonal[idx][5]; - sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; - tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - - child_general_int_spin_GetInfo(X, isite1, isite2, sigma1, sigma2, sigma3, sigma4, tmp_V); - isA_up = X->Large.is1_up; - isB_up = X->Large.is2_up; - - for (j = 1; j <= i_max; j++) { - tmp_sgn = X_child_exchange_spin_element(j, X, isA_up, isB_up, sigma2, sigma4, &tmp_off); - dmv = tmp_sgn * tmp_V; - Ham[tmp_off][j] += dmv; - } - } - } - - //Exchange - for (i = 0; i < X->Def.NExchangeCoupling; i++) { - child_exchange_spin_GetInfo(i, X); - for (j = 1; j <= X->Large.i_max; j++) { - dmv = child_exchange_spin_element(j, v0, v1, X, &tmp_off); - Ham[tmp_off][j] += dmv; - } - } - - } else { //For General spin - //Transfer absorbed in Diagonal term. - - //InterAll - for (i = 0; i < X->Def.NInterAll_OffDiagonal / 2; i++) { - for (ihermite = 0; ihermite < 2; ihermite++) { - idx = 2 * i + ihermite; - isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; - isite2 = X->Def.InterAll_OffDiagonal[idx][4] + 1; - sigma1 = X->Def.InterAll_OffDiagonal[idx][1]; - sigma2 = X->Def.InterAll_OffDiagonal[idx][3]; - sigma3 = X->Def.InterAll_OffDiagonal[idx][5]; - sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; - tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - - for (j = 1; j <= i_max; j++) { - num1 = GetOffCompGeneralSpin(list_1[j], isite1, sigma2, sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - num1 = GetOffCompGeneralSpin(tmp_off, isite2, sigma4, sigma3, &off, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - ConvertToList1GeneralSpin(off, X->Check.sdim, &tmp_off); - Ham[tmp_off][j] += tmp_V; - } - } - } - } - } - } - - break; - } - return 0; -} diff --git a/src/matrixlapack.c b/src/matrixlapack.c index c83068d97..bba85e803 100644 --- a/src/matrixlapack.c +++ b/src/matrixlapack.c @@ -34,329 +34,16 @@ * */ - -int dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info); -int dgetri_(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info); - #ifdef SR int dsyevd_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *iwork, int *liwork, int *info); -int M_DSYEV(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *iwork, int *liwork, int *info); int zheevd_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int *iwork, int *liwork, int *info); #else -int dsyev_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info); -int M_DSYEV(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info); -double dlamch_(char *cmach); int zheev_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int *info); #endif int dsyevx_(char *jobz, char *range, char *uplo, int *n, double *a, int *lda, double *vl, double *vu, int *il, int *iu, double *abstol, int *m, double *w, double *z__, int *ldz, double *work, int *lwork, int *iwork, int *ifail, int *info); - - -/** - * - * @brief function for transforming Row-major matrix (C) to Column-major matrix (Fortran) - * @param[in] N - * @param[in] M - * @param[in] A Row-major matrix - * @param[out] a Column-major matrix - * @author Takahiro Misawa (The University of Tokyo) - */ -void to_f( -int N, //!<[in] -int M, //!<[in] -double **A, //!<[in] -double *a //!<[out] -){ - int i,j,k; - k=0; - for(j=0;jDef.iCalcType == FullDiag) { if (myrank == 0) { for (j = 0; j < i_max; j++) { - v0[j + 1] = L_vec[i][j]; + v0[j + 1] = v1[i][j]; } } } else { for (j = 0; j < i_max; j++) { - v0[j + 1] = L_vec[i][j]; + v0[j + 1] = v1[i][j]; } } } -#else - for (j = 0; j < i_max; j++) { - v0[j + 1][i] = L_vec[j][i]; - } -#endif }/*for (i = 0; i < neig; i++)*/ +#endif if (expec_energy_flct(X, neig, v0, v1) != 0) { fprintf(stderr, "Error: calc expec_energy.\n"); diff --git a/src/xsetmem.c b/src/xsetmem.c index 367e57789..58b5895ff 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -148,17 +148,16 @@ void setmem_def /// -/// \brief Set size of memories for Hamiltonian (Ham, L_vec), vectors(vg, v0, v1, v2, vec, alpha, beta), lists (list_1, list_2_1, list_2_2, list_Diagonal) and Phys(BindStruct.PhysList) struct in the case of Full Diag mode. +/// \brief Set size of memories for vectors(vg, v0, v1, v2, vec, alpha, beta), lists (list_1, list_2_1, list_2_2, list_Diagonal) and Phys(BindStruct.PhysList) struct in the case of Full Diag mode. /// \param X [in,out] BindStruct to give information and give size of memories for Hamiltonian, vectors, lists and Phys struct in the case of Full Diag mode. /// \retval -1 Fail to set memories. /// \retval 0 Normal to set memories. /// \version 0.1 int setmem_large ( - struct BindStruct *X - ) { - - unsigned long int j = 0; + struct BindStruct *X +) { + int nstate; unsigned long int idim_maxMPI; idim_maxMPI = MaxMPI_li(X->Check.idim_max); @@ -179,82 +178,40 @@ int setmem_large } list_Diagonal = d_1d_allocate(X->Check.idim_max + 1); - v0 = cd_1d_allocate(X->Check.idim_max + 1); - v1 = cd_1d_allocate(X->Check.idim_max + 1); - if (X->Def.iCalcType == TimeEvolution) { - v2 = cd_1d_allocate(X->Check.idim_max + 1); - } else { - v2 = cd_1d_allocate(1); + + if (X->Def.iCalcType == FullDiag) { + nstate = X->Check.idim_max + 1; } -#ifdef MPI - v1buf = cd_1d_allocate(idim_maxMPI + 1); -#endif // MPI - if (X->Def.iCalcType == TPQCalc) { - vg = cd_1d_allocate(1); + else if (X->Def.iCalcType == CG) { + nstate = X->Def.k_exct; } - else { - vg = cd_1d_allocate(X->Check.idim_max + 1); + else if (X->Def.iCalcType == TPQCalc) { + nstate = NumAve; } - alpha = d_1d_allocate(X->Def.Lanczos_max + 1); - beta = d_1d_allocate(X->Def.Lanczos_max + 1); - - if ( - list_Diagonal == NULL - || v0 == NULL - || v1 == NULL - || vg == NULL - ) { - return -1; + else { + nstate = 1; } + v0 = cd_2d_allocate(X->Check.idim_max + 1, nstate); + v1 = cd_2d_allocate(X->Check.idim_max + 1, nstate); +#ifdef MPI + v1buf = cd_2d_allocate(idim_maxMPI + 1, nstate); +#endif // MPI - if (X->Def.iCalcType == TPQCalc || X->Def.iFlgCalcSpec != CALCSPEC_NOT) { - vec = cd_2d_allocate(X->Def.Lanczos_max + 1, X->Def.Lanczos_max + 1); - } - else if (X->Def.iCalcType == Lanczos || X->Def.iCalcType == CG) { - if (X->Def.LanczosTarget > X->Def.nvec) { - vec = cd_2d_allocate(X->Def.LanczosTarget + 1, X->Def.Lanczos_max + 1); - } - else { - vec = cd_2d_allocate(X->Def.nvec + 1, X->Def.Lanczos_max + 1); - } - } + X->Phys.num_down = d_1d_allocate(nstate); + X->Phys.num_up = d_1d_allocate(nstate); + X->Phys.num = d_1d_allocate(nstate); + X->Phys.num2 = d_1d_allocate(nstate); + X->Phys.energy = d_1d_allocate(nstate); + X->Phys.var = d_1d_allocate(nstate); + X->Phys.doublon = d_1d_allocate(nstate); + X->Phys.doublon2 = d_1d_allocate(nstate); + X->Phys.Sz = d_1d_allocate(nstate); + X->Phys.Sz2 = d_1d_allocate(nstate); + X->Phys.s2 = d_1d_allocate(nstate); - if (X->Def.iCalcType == FullDiag) { - X->Phys.all_num_down = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_num_up = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_energy = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_doublon = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_sz = d_1d_allocate(X->Check.idim_max + 1); - X->Phys.all_s2 = d_1d_allocate(X->Check.idim_max + 1); - Ham = cd_2d_allocate(X->Check.idim_max + 1, X->Check.idim_max + 1); - L_vec = cd_2d_allocate(X->Check.idim_max + 1, X->Check.idim_max + 1); - - if (X->Phys.all_num_down == NULL - || X->Phys.all_num_up == NULL - || X->Phys.all_energy == NULL - || X->Phys.all_doublon == NULL - || X->Phys.all_s2 == NULL - ) { - return -1; - } - for (j = 0; j < X->Check.idim_max + 1; j++) { - if (Ham[j] == NULL || L_vec[j] == NULL) { - return -1; - } - } - } - else if (X->Def.iCalcType == CG) { - X->Phys.all_num_down = d_1d_allocate(X->Def.k_exct); - X->Phys.all_num_up = d_1d_allocate(X->Def.k_exct); - X->Phys.all_energy = d_1d_allocate(X->Def.k_exct); - X->Phys.all_doublon = d_1d_allocate(X->Def.k_exct); - X->Phys.all_sz = d_1d_allocate(X->Def.k_exct); - X->Phys.all_s2 = d_1d_allocate(X->Def.k_exct); - } fprintf(stdoutMPI, "%s", cProFinishAlloc); return 0; } - /// /// \brief Set the size of memories for InterAllDiagonal and InterAllOffDiagonal arrays. /// \param InterAllOffDiagonal [in,out] Arrays of cites and spin indexes of off-diagonal parts of InterAll interactions. From 8913dd95a2ec4fb69a7927cb5e164f450a904507 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Thu, 7 Mar 2019 18:29:03 +0900 Subject: [PATCH 08/50] Backup --- src/CMakeLists.txt | 11 +- src/CalcByLOBPCG.c | 23 +- src/CalcByTPQ.c | 6 +- src/CalcSpectrumByFullDiag.c | 2 +- src/FirstMultiply.c | 53 +- src/PairExHubbard.c | 25 +- src/SingleExHubbard.c | 4 +- src/StdFace/makefile_StdFace | 30 - src/diagonalcalc.c | 3103 +++++++++++++++---------------- src/expec_cisajs.c | 8 +- src/expec_cisajscktaltdc.c | 4 +- src/expec_energy_flct.c | 4 +- src/expec_totalspin.c | 21 +- src/include/CalcByFullDiag.h | 1 - src/include/FirstMultiply.h | 1 - src/include/diagonalcalc.h | 5 +- src/include/expec_totalspin.h | 21 +- src/include/mltplyHubbardCore.h | 11 +- src/include/mltplySpinCore.h | 19 +- src/include/wrapperMPI.h | 4 +- src/input.c | 4 +- src/makefile_src | 222 --- src/mltply.c | 4 +- src/mltplyHubbard.c | 6 +- src/mltplyHubbardCore.c | 17 +- src/mltplyMPIHubbard.c | 6 +- src/mltplyMPIHubbardCore.c | 5 +- src/mltplyMPISpin.c | 4 +- src/output.c | 8 +- src/phys.c | 2 +- src/wrapperMPI.c | 6 +- tool/makefile_tool | 22 - 32 files changed, 1662 insertions(+), 2000 deletions(-) delete mode 100644 src/StdFace/makefile_StdFace delete mode 100644 src/makefile_src delete mode 100644 tool/makefile_tool diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9a7509c4b..c74b7ac6c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,14 +10,13 @@ include_directories(include) add_definitions(-D_HPhi) add_definitions(-DDSFMT_MEXP=19937) -set(SOURCES FileIO.c HPhiMain.c HPhiTrans.c bitcalc.c check.c CheckMPI.c dSFMT.c diagonalcalc.c global.c log.c input.c output.c output_list.c readdef.c sz.c vec12.c xsetmem.c ErrorMessage.c LogMessage.c ProgressMessage.c wrapperMPI.c splash.c time.c eigenIO.c) +set(SOURCES FileIO.c HPhiMain.c HPhiTrans.c bitcalc.c check.c CheckMPI.c dSFMT.c diagonalcalc.c global.c log.c input.c output.c output_list.c readdef.c sz.c xsetmem.c ErrorMessage.c LogMessage.c ProgressMessage.c wrapperMPI.c splash.c time.c eigenIO.c) -set(SOURCES_MLTPLY Multiply.c mltply.c mltplySpin.c mltplyHubbard.c mltplyMPIHubbard.c mltplyMPISpin.c mltplyMPIBoost.c mltplyHubbardCore.c mltplySpinCore.c mltplyMPIHubbardCore.c mltplyMPISpinCore.c) -set(SOURCES_LANCZOS CalcByLanczos.c CG_EigenVector.c Lanczos_EigenValue.c Lanczos_EigenVector.c) +set(SOURCES_MLTPLY Multiply.c mltply.c mltplySpin.c mltplyHubbard.c mltplyMPIHubbard.c mltplyMPISpin.c mltplyHubbardCore.c mltplySpinCore.c mltplyMPIHubbardCore.c mltplyMPISpinCore.c) set(SOURCES_CG CalcByLOBPCG.c ) set(SOURCES_TPQ CalcByTPQ.c FirstMultiply.c) -set(SOURCES_DIAG CalcByFullDiag.c lapack_diag.c makeHam.c matrixlapack.c matrixlapack_magma.c matrixscalapack.c ) -set(SOURCES_SPECTRUM CalcSpectrum.c CalcSpectrumByBiCG.c CalcSpectrumByLanczos.c CalcSpectrumByTPQ.c CalcSpectrumByFullDiag.c SingleEx.c SingleExHubbard.c PairEx.c PairExHubbard.c PairExSpin.c ) +set(SOURCES_DIAG CalcByFullDiag.c lapack_diag.c matrixlapack.c matrixlapack_magma.c matrixscalapack.c ) +set(SOURCES_SPECTRUM CalcSpectrum.c CalcSpectrumByBiCG.c CalcSpectrumByFullDiag.c SingleEx.c SingleExHubbard.c PairEx.c PairExHubbard.c PairExSpin.c ) set(SOURCES_TEM CalcByTEM.c) set(SOURCES_PHYS expec_cisajs.c expec_cisajscktaltdc.c expec_totalspin.c phys.c expec_energy_flct.c) @@ -25,7 +24,7 @@ set(SOURCES_COMMON common/setmemory.c) set(SOURCES_STDFACE StdFace/ChainLattice.c StdFace/FCOrtho.c StdFace/HoneycombLattice.c StdFace/Kagome.c StdFace/Ladder.c StdFace/Orthorhombic.c StdFace/Pyrochlore.c StdFace/SquareLattice.c StdFace/StdFace_main.c StdFace/StdFace_ModelUtil.c StdFace/TriangularLattice.c StdFace/Wannier90.c) -add_executable(HPhi ${SOURCES} ${SOURCES_LANCZOS} ${SOURCES_CG} ${SOURCES_TPQ} ${SOURCES_SPECTRUM} ${SOURCES_TEM} ${SOURCES_MLTPLY} ${SOURCES_DIAG} ${SOURCES_PHYS} ${SOURCES_STDFACE} ${SOURCES_COMMON}) +add_executable(HPhi ${SOURCES} ${SOURCES_CG} ${SOURCES_TPQ} ${SOURCES_SPECTRUM} ${SOURCES_TEM} ${SOURCES_MLTPLY} ${SOURCES_DIAG} ${SOURCES_PHYS} ${SOURCES_STDFACE} ${SOURCES_COMMON}) target_link_libraries(HPhi komega ${LAPACK_LIBRARIES} m) if(MPI_FOUND) target_link_libraries(HPhi ${MPI_C_LIBRARIES}) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index 147e559f4..5236fa34f 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -290,7 +290,7 @@ static void Initialize_wave( dnorm = d_1d_allocate(X->Def.k_exct); NormMPI_dv(i_max, X->Def.k_exct, wave, dnorm); -#pragma omp parallel for default(none) shared(i_max,wave,dnorm,ie) private(idim) +#pragma omp parallel for default(none) shared(i_max,wave,dnorm,ie,X) private(idim) for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) wave[idim][ie] /= dnorm[ie]; free_d_1d_allocate(dnorm); @@ -360,7 +360,7 @@ int LOBPCG_Main( eig = d_1d_allocate(X->Def.k_exct); dnorm = d_1d_allocate(X->Def.k_exct); eigsub = d_1d_allocate(nsub); - hsub = cd_2d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); + hsub = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); ovlp = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); i_max = X->Check.idim_max; @@ -374,7 +374,7 @@ int LOBPCG_Main(
    • Set initial guess of wavefunction: @f${\bf x}=@f$initial guess
    • */ - Initialize_wave(X, &wxp[1]); + Initialize_wave(X, wxp[1]); TimeKeeper(X, cFileNameTimeKeep, cLanczos_EigenValueStart, "a"); @@ -423,7 +423,7 @@ int LOBPCG_Main( /**@brief
    • Compute residual vectors: @f${\bf w}={\bf X}-\mu {\bf x}@f$
    • */ -#pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig) private(idim,ie) reduction(+:dnorm) +#pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,X) private(idim,ie) for (idim = 1; idim <= i_max; idim++) { for (ie = 0; ie < X->Def.k_exct; ie++) { wxp[0][ie][idim] = hwxp[1][idim][ie] - eig[ie] * wxp[1][idim][ie]; @@ -441,7 +441,8 @@ int LOBPCG_Main( if (do_precon == 1) { for (ie = 0; ie < X->Def.k_exct; ie++) preshift[ie] = calc_preshift(eig[ie], dnorm[ie], eps_LOBPCG); -#pragma omp parallel for default(none) shared(wxp,ie,list_Diagonal,preshift,i_max,eps_LOBPCG) private(idim,precon) +#pragma omp parallel for default(none) shared(wxp,list_Diagonal,preshift,i_max,eps_LOBPCG,X) \ +private(idim,precon,ie) for (idim = 1; idim <= i_max; idim++) { for (ie = 0; ie < X->Def.k_exct; ie++){ precon = list_Diagonal[idim] - preshift[ie]; @@ -453,7 +454,7 @@ int LOBPCG_Main(
    • Normalize residual vector: @f${\bf w}={\bf w}/|w|@f$ */ NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm); -#pragma omp parallel for default(none) shared(i_max,wxp,dnorm,ie) private(idim) +#pragma omp parallel for default(none) shared(i_max,wxp,dnorm,ie,X) private(idim) for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) wxp[0][idim][ie] /= dnorm[ie]; @@ -500,8 +501,8 @@ int LOBPCG_Main( &wxp[ii][1][0], &X->Def.k_exct, &hwxp[jj][1][0], &X->Def.k_exct, &zero, &hsub[jj][0][ii][0], &nsub); } } - SumMPI_cv(nsub*nsub, ovlp); - SumMPI_cv(nsub*nsub, hsub); + SumMPI_cv(nsub*nsub, &ovlp[0][0][0][0]); + SumMPI_cv(nsub*nsub, &hsub[0][0][0][0]); for (ie = 0; ie < X->Def.k_exct; ie++) eig[ie] = creal(hsub[1][ie][1][ie]); @@ -565,7 +566,7 @@ int LOBPCG_Main( */ for (ii = 1; ii < 3; ii++) { NormMPI_dv(i_max, X->Def.k_exct, wxp[ii], dnorm); -#pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ie,ii) private(idim) +#pragma omp parallel for default(none) shared(i_max,wxp,hwxp,dnorm,ii,X) private(idim,ie) for (idim = 1; idim <= i_max; idim++) { for (ie = 0; ie < X->Def.k_exct; ie++) { wxp[ii][idim][ie] /= dnorm[ie]; @@ -701,7 +702,7 @@ int CalcByLOBPCG( exitMPI(-1); } byte_size = fread(vin, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); -#pragma omp parallel for default(none) shared(v1) firstprivate(i_max, ie), private(idim) +#pragma omp parallel for default(none) shared(v1,vin, i_max, ie), private(idim) for (idim = 1; idim <= i_max; idim++) { v1[ie][idim] = vin[idim]; } @@ -752,7 +753,7 @@ int CalcByLOBPCG( vin = cd_1d_allocate(X->Bind.Check.idim_max); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { -#pragma omp parallel for default(none) shared(X,v1,ie) private(idim) +#pragma omp parallel for default(none) shared(X,v1,ie,vin) private(idim) for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) vin[idim] = v1[idim][ie]; diff --git a/src/CalcByTPQ.c b/src/CalcByTPQ.c index 2e48f1c62..3f34dd616 100644 --- a/src/CalcByTPQ.c +++ b/src/CalcByTPQ.c @@ -150,12 +150,10 @@ int CalcByTPQ( /**@brief Initialize v1 and compute v0 = H*v1 */ - for (rand_i = 0; rand_i < NumAve; rand_i++) { - FirstMultiply(rand_i, &(X->Bind)); - inv_temp[rand_i] = 0.0; - } + FirstMultiply(&(X->Bind)); StopTimer(3100); for (rand_i = 0; rand_i < NumAve; rand_i++) { + inv_temp[rand_i] = 0.0; if (childfopenMPI(sdt_phys[rand_i], "a", &fp) == 0) { fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp[rand_i], X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index fe6501118..20768d44c 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -96,7 +96,7 @@ int CalcSpectrumByFullDiag( for (iomega = 0; iomega < Nomega; iomega++) { dcSpectrum[iomega] = 0.0; for (idim = 0; idim < idim_max_int; idim++) { - dcSpectrum[iomega] += v1[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); + dcSpectrum[iomega] += vAv2[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); }/*for (idim = 0; idim < idim_max_int; idim++)*/ }/*for (iomega = 0; iomega < Nomega; iomega++)*/ StopTimer(6304); diff --git a/src/FirstMultiply.c b/src/FirstMultiply.c index b2ee8f898..ff7f69a46 100644 --- a/src/FirstMultiply.c +++ b/src/FirstMultiply.c @@ -37,18 +37,19 @@ /// \version 0.1 /// \author Takahiro Misawa (The University of Tokyo) /// \author Kazuyoshi Yoshimi (The University of Tokyo) -int FirstMultiply(int rand_i, struct BindStruct *X) { +int FirstMultiply(struct BindStruct *X) { long int i, i_max; double complex dnorm; double Ns; long unsigned int u_long_i; dsfmt_t dsfmt; - int mythread; + int mythread, rand_i; Ns = 1.0*X->Def.NsiteMPI; i_max = X->Check.idim_max; + for (rand_i = 0; rand_i < NumAve; rand_i++) { #pragma omp parallel default(none) private(i, mythread, u_long_i, dsfmt) \ shared(v0, v1, nthreads, myrank, rand_i, X, stdoutMPI, cLogCheckInitComplex, cLogCheckInitReal) \ firstprivate(i_max) @@ -71,8 +72,8 @@ int FirstMultiply(int rand_i, struct BindStruct *X) { dsfmt_init_gen_rand(&dsfmt, u_long_i); if (X->Def.iInitialVecType == 0) { - - StartTimer(3101); + + StartTimer(3101); #pragma omp for for (i = 1; i <= i_max; i++) v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; @@ -80,7 +81,7 @@ int FirstMultiply(int rand_i, struct BindStruct *X) { else { #pragma omp for for (i = 1; i <= i_max; i++) - v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); + v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); } StopTimer(3101); @@ -89,43 +90,47 @@ int FirstMultiply(int rand_i, struct BindStruct *X) { Normalize v */ dnorm=0.0; -#pragma omp parallel for default(none) private(i) shared(v1, i_max) reduction(+: dnorm) +#pragma omp parallel for default(none) private(i) shared(v1, i_max, rand_i) reduction(+: dnorm) for(i=1;i<=i_max;i++){ dnorm += conj(v1[i][rand_i])*v1[i][rand_i]; } dnorm = SumMPI_dc(dnorm); dnorm=sqrt(dnorm); global_1st_norm[rand_i] = dnorm; -#pragma omp parallel for default(none) private(i) shared(v0,v1) firstprivate(i_max, dnorm) +#pragma omp parallel for default(none) private(i) shared(v0,v1,rand_i) firstprivate(i_max, dnorm) for(i=1;i<=i_max;i++){ v1[i][rand_i] = v1[i][rand_i] / dnorm; v0[i][rand_i] = v1[i][rand_i]; } - + }/*for (rand_i = 0; rand_i < NumAve; rand_i++)*/ + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStep, "a", rand_i, step_i); StartTimer(3102); - if(expec_energy_flct(X) !=0){ + if(expec_energy_flct(X, NumAve, v0, v1) !=0){ StopTimer(3102); return -1; } StopTimer(3102); -#pragma omp parallel for default(none) private(i) shared(v0, v1, list_1) firstprivate(i_max, Ns, LargeValue, myrank) - for(i = 1; i <= i_max; i++){ - v0[i][rand_i] = LargeValue * v1[i][rand_i] - v0[i][rand_i] / Ns; - } - dnorm=0.0; -#pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max) reduction(+: dnorm) - for (i = 1; i <= i_max; i++) { - dnorm += conj(v0[i][rand_i])*v0[i][rand_i]; - } - dnorm = SumMPI_dc(dnorm); - dnorm = sqrt(dnorm); - global_norm[rand_i] = dnorm; -#pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max, dnorm) - for (i = 1; i <= i_max; i++) { - v0[i][rand_i] = v0[i][rand_i] / dnorm; + for (rand_i = 0; rand_i < NumAve; rand_i++) { +#pragma omp parallel for default(none) private(i) shared(v0, v1, list_1,rand_i) firstprivate(i_max, Ns, LargeValue, myrank) + for (i = 1; i <= i_max; i++) { + v0[i][rand_i] = LargeValue * v1[i][rand_i] - v0[i][rand_i] / Ns; + } + + dnorm = 0.0; +#pragma omp parallel for default(none) private(i) shared(v0,rand_i) firstprivate(i_max) reduction(+: dnorm) + for (i = 1; i <= i_max; i++) { + dnorm += conj(v0[i][rand_i])*v0[i][rand_i]; + } + dnorm = SumMPI_dc(dnorm); + dnorm = sqrt(dnorm); + global_norm[rand_i] = dnorm; +#pragma omp parallel for default(none) private(i) shared(v0,rand_i) firstprivate(i_max, dnorm) + for (i = 1; i <= i_max; i++) { + v0[i][rand_i] = v0[i][rand_i] / dnorm; + } } TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStepEnd, "a", rand_i, step_i); return 0; diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index d7f816418..96327986d 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -36,7 +36,8 @@ /// \version 1.2 int GetPairExcitedStateHubbardGC( struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + int nstate, + double complex **tmp_v0, /**< [out] Result v0 = H v1*/ double complex **tmp_v1 /**< [in] v0 = H v1*/ ) { long unsigned int i, j; @@ -68,9 +69,7 @@ int GetPairExcitedStateHubbardGC( ibit = (unsigned long int) myrank & is; if (ibit != is) { //minus sign comes from negative tmp_trans due to readdef -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += -tmp_trans * tmp_v1[j]; + zaxpy_long(i_max*nstate, -tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); } } else {//X->Def.PairExcitationOperator[i][4]==1 @@ -82,21 +81,19 @@ int GetPairExcitedStateHubbardGC( } ibit = (unsigned long int) myrank & is; if (ibit == is) { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; + zaxpy_long(i_max*nstate, tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); } } } else { - X_GC_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, - tmp_v0, tmp_v1); + X_GC_child_general_hopp_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_trans, X, nstate, tmp_v0, tmp_v1); } } else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if (org_isite1 < org_isite2) { - X_GC_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, -tmp_trans, X, - tmp_v0, tmp_v1); + X_GC_child_general_hopp_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, + -tmp_trans, X, nstate, tmp_v0, tmp_v1); } else { X_GC_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, @@ -116,7 +113,7 @@ int GetPairExcitedStateHubbardGC( if (child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2) != 0) { return -1; } - GC_child_general_hopp(tmp_v0, tmp_v1, X, tmp_trans); + GC_child_general_hopp(nstate, tmp_v0, tmp_v1, X, tmp_trans); } } } @@ -231,9 +228,7 @@ private(j, tmp_sgn, tmp_off) } else { if (ibit == is) { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_trans) private(j) - for (j = 1; j <= i_max; j++) tmp_v0[j] += tmp_trans * tmp_v1[j]; + zaxpy_long(i_max*nstate, tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); } } } diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index 96ddc4a55..17f832e44 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -70,9 +70,9 @@ int GetSingleExcitedStateHubbard( X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X, list_1_org) \ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X, list_1_org,one) \ firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin) \ -private(j, isgn,tmp_off) +private(j, isgn,tmp_off,dmv) for (j = 1; j <= idim_max; j++) {//idim_max -> original dimension isgn = X_Cis(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); diff --git a/src/StdFace/makefile_StdFace b/src/StdFace/makefile_StdFace deleted file mode 100644 index e801ffdc2..000000000 --- a/src/StdFace/makefile_StdFace +++ /dev/null @@ -1,30 +0,0 @@ -include ../make.sys - -StdFace_OBJS = StdFace_main.o StdFace_ModelUtil.o -Lattice_OBJS = SquareLattice.o ChainLattice.o TriangularLattice.o \ - HoneycombLattice.o Ladder.o Kagome.o Orthorhombic.o FCOrtho.o \ - Pyrochlore.o Wannier90.o - -libStdFace.a:$(StdFace_OBJS) $(Lattice_OBJS) - ar -r -v $(AROPT) $@ $(StdFace_OBJS) $(Lattice_OBJS) - -SUFFIXES: .o .c - -.c.o: - $(CC) $(CFLAGS) -D _HPhi -c $< - -clean: - rm -f *.o *.a - -StdFace_main.o:StdFace_main.c StdFace_vals.h StdFace_ModelUtil.h -StdFace_ModelUtil.o:StdFace_ModelUtil.c StdFace_vals.h -SquareLattice.o:SquareLattice.c StdFace_vals.h StdFace_ModelUtil.h -ChainLattice.o:ChainLattice.c StdFace_vals.h StdFace_ModelUtil.h -TriangularLattice.o:TriangularLattice.c StdFace_vals.h StdFace_ModelUtil.h -HoneycombLattice.o:HoneycombLattice.c StdFace_vals.h StdFace_ModelUtil.h -Ladder.o:Ladder.c StdFace_vals.h StdFace_ModelUtil.h -Kagome.o:Kagome.c StdFace_vals.h StdFace_ModelUtil.h -Orthorhombic.o:Orthorhombic.c StdFace_vals.h StdFace_ModelUtil.h -FCOrtho.o:FCOrtho.c StdFace_vals.h StdFace_ModelUtil.h -Pyrochlore.o:Pyrochlore.c StdFace_vals.h StdFace_ModelUtil.h -Wannier90.o:Wannier90.c StdFace_vals.h StdFace_ModelUtil.h diff --git a/src/diagonalcalc.c b/src/diagonalcalc.c index 3a6ee7157..a2d0d99a6 100644 --- a/src/diagonalcalc.c +++ b/src/diagonalcalc.c @@ -32,46 +32,12 @@ * * */ - #include #include "FileIO.h" #include "diagonalcalc.h" #include "mltplySpinCore.h" #include "wrapperMPI.h" - - -int SetDiagonalTETransfer( - long unsigned int isite1, - double dtmp_V, - long unsigned int spin, - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 -); - -int SetDiagonalTEInterAll( - long unsigned int isite1, - long unsigned int isite2, - long unsigned int isigma1, - long unsigned int isigma2, - double dtmp_V, - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 -); - -int SetDiagonalTEChemi( - long unsigned int isite1, - long unsigned int spin, - double dtmp_V, - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 -); - /** - * - * * @brief Calculate diagonal components and obtain the list, list_diagonal. * * @param X [in] Struct to get the information of the diagonal operators. @@ -83,494 +49,475 @@ int SetDiagonalTEChemi( */ int diagonalcalc ( - struct BindStruct *X - ){ - + struct BindStruct *X +) { + FILE *fp; - long unsigned int i,j; - long unsigned int isite1,isite2; + long unsigned int i, j; + long unsigned int isite1, isite2; long unsigned int spin; double tmp_V; /*[s] For InterAll*/ - long unsigned int A_spin,B_spin; + long unsigned int A_spin, B_spin; /*[e] For InterAll*/ - long unsigned int i_max=X->Check.idim_max; + long unsigned int i_max = X->Check.idim_max; fprintf(stdoutMPI, "%s", cProStartCalcDiag); TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcStart, "a"); #pragma omp parallel for default(none) private(j) shared(list_Diagonal) firstprivate(i_max) - for(j = 1;j <= i_max; j++){ - list_Diagonal[j]=0.0; + for (j = 1; j <= i_max; j++) { + list_Diagonal[j] = 0.0; } - - if(X->Def.NCoulombIntra>0){ - if(childfopenMPI(cFileNameCheckCoulombIntra, "w", &fp)!=0){ + + if (X->Def.NCoulombIntra > 0) { + if (childfopenMPI(cFileNameCheckCoulombIntra, "w", &fp) != 0) { return -1; } - for(i = 0; i < X->Def.NCoulombIntra; i++){ - isite1 = X->Def.CoulombIntra[i][0]+1; - tmp_V = X->Def.ParaCoulombIntra[i]; - fprintf(fp,"i=%ld isite1=%ld tmp_V=%lf \n",i,isite1,tmp_V); + for (i = 0; i < X->Def.NCoulombIntra; i++) { + isite1 = X->Def.CoulombIntra[i][0] + 1; + tmp_V = X->Def.ParaCoulombIntra[i]; + fprintf(fp, "i=%ld isite1=%ld tmp_V=%lf \n", i, isite1, tmp_V); SetDiagonalCoulombIntra(isite1, tmp_V, X); } fclose(fp); } - if(X->Def.EDNChemi>0){ - if(childfopenMPI(cFileNameCheckChemi,"w", &fp)!=0){ + if (X->Def.EDNChemi > 0) { + if (childfopenMPI(cFileNameCheckChemi, "w", &fp) != 0) { return -1; } - for(i = 0; i < X->Def.EDNChemi; i++){ - isite1 = X->Def.EDChemi[i]+1; - spin = X->Def.EDSpinChemi[i]; - tmp_V = -X->Def.EDParaChemi[i]; - fprintf(fp,"i=%ld spin=%ld isite1=%ld tmp_V=%lf \n",i,spin,isite1,tmp_V); - if(SetDiagonalChemi(isite1, tmp_V,spin, X) !=0){ + for (i = 0; i < X->Def.EDNChemi; i++) { + isite1 = X->Def.EDChemi[i] + 1; + spin = X->Def.EDSpinChemi[i]; + tmp_V = -X->Def.EDParaChemi[i]; + fprintf(fp, "i=%ld spin=%ld isite1=%ld tmp_V=%lf \n", i, spin, isite1, tmp_V); + if (SetDiagonalChemi(isite1, tmp_V, spin, X) != 0) { return -1; } } fclose(fp); } - - if(X->Def.NCoulombInter>0){ - if(childfopenMPI(cFileNameCheckInterU,"w", &fp)!=0){ + + if (X->Def.NCoulombInter > 0) { + if (childfopenMPI(cFileNameCheckInterU, "w", &fp) != 0) { return -1; } - for(i = 0; i < X->Def.NCoulombInter; i++){ - isite1 = X->Def.CoulombInter[i][0]+1; - isite2 = X->Def.CoulombInter[i][1]+1; - tmp_V = X->Def.ParaCoulombInter[i]; - fprintf(fp,"i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n",i,isite1,isite2,tmp_V); - if(SetDiagonalCoulombInter(isite1, isite2, tmp_V, X) !=0){ + for (i = 0; i < X->Def.NCoulombInter; i++) { + isite1 = X->Def.CoulombInter[i][0] + 1; + isite2 = X->Def.CoulombInter[i][1] + 1; + tmp_V = X->Def.ParaCoulombInter[i]; + fprintf(fp, "i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n", i, isite1, isite2, tmp_V); + if (SetDiagonalCoulombInter(isite1, isite2, tmp_V, X) != 0) { return -1; } } - fclose(fp); + fclose(fp); } - if(X->Def.NHundCoupling>0){ - if(childfopenMPI(cFileNameCheckHund,"w", &fp) !=0){ + if (X->Def.NHundCoupling > 0) { + if (childfopenMPI(cFileNameCheckHund, "w", &fp) != 0) { return -1; } - for(i = 0; i < X->Def.NHundCoupling; i++){ - isite1 = X->Def.HundCoupling[i][0]+1; - isite2 = X->Def.HundCoupling[i][1]+1; - tmp_V = -X->Def.ParaHundCoupling[i]; - if(SetDiagonalHund(isite1, isite2, tmp_V, X) !=0){ + for (i = 0; i < X->Def.NHundCoupling; i++) { + isite1 = X->Def.HundCoupling[i][0] + 1; + isite2 = X->Def.HundCoupling[i][1] + 1; + tmp_V = -X->Def.ParaHundCoupling[i]; + if (SetDiagonalHund(isite1, isite2, tmp_V, X) != 0) { return -1; } - fprintf(fp,"i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n",i,isite1,isite2,tmp_V); + fprintf(fp, "i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n", i, isite1, isite2, tmp_V); } - fclose(fp); + fclose(fp); } - if(X->Def.NInterAll_Diagonal>0){ - if(childfopenMPI(cFileNameCheckInterAll,"w", &fp) !=0){ + if (X->Def.NInterAll_Diagonal > 0) { + if (childfopenMPI(cFileNameCheckInterAll, "w", &fp) != 0) { return -1; } - for(i = 0; i < X->Def.NInterAll_Diagonal; i++){ - isite1=X->Def.InterAll_Diagonal[i][0]+1; - A_spin=X->Def.InterAll_Diagonal[i][1]; - isite2=X->Def.InterAll_Diagonal[i][2]+1; - B_spin=X->Def.InterAll_Diagonal[i][3]; - tmp_V = X->Def.ParaInterAll_Diagonal[i]; - fprintf(fp,"i=%ld isite1=%ld A_spin=%ld isite2=%ld B_spin=%ld tmp_V=%lf \n", i, isite1, A_spin, isite2, B_spin, tmp_V); + for (i = 0; i < X->Def.NInterAll_Diagonal; i++) { + isite1 = X->Def.InterAll_Diagonal[i][0] + 1; + A_spin = X->Def.InterAll_Diagonal[i][1]; + isite2 = X->Def.InterAll_Diagonal[i][2] + 1; + B_spin = X->Def.InterAll_Diagonal[i][3]; + tmp_V = X->Def.ParaInterAll_Diagonal[i]; + fprintf(fp, "i=%ld isite1=%ld A_spin=%ld isite2=%ld B_spin=%ld tmp_V=%lf \n", i, isite1, A_spin, isite2, B_spin, tmp_V); SetDiagonalInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X); - } - fclose(fp); - } - - TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcFinish, "a"); - fprintf(stdoutMPI, "%s", cProEndCalcDiag); - return 0; -} - -/// @fn diagonalcalcForTE() Update the vector for diagonal operators ( using in Time Evolution mode). -/// \param X [in] Struct to get the information of the diagonal operators. -/// \param tmp_v0 [in,out] Result vector -/// \param tmp_v1 [in] Input produced vector -/// \retval -1 fail to update the vector. -/// \retval 0 succeed to update the vector. -/// \version 2.1 -int diagonalcalcForTE - ( - const int _istep, - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 - ) { - - long unsigned int i; - long unsigned int isite1, isite2; - long unsigned int A_spin, B_spin; - double tmp_V; - - if (X->Def.NTETransferDiagonal[_istep] > 0) { - for (i = 0; i < X->Def.NTETransferDiagonal[_istep]; i++) { - isite1 = X->Def.TETransferDiagonal[_istep][i][0] + 1; - A_spin = X->Def.TETransferDiagonal[_istep][i][1]; - tmp_V = X->Def.ParaTETransferDiagonal[_istep][i]; - SetDiagonalTETransfer(isite1, tmp_V, A_spin, X, nstate, tmp_v0, tmp_v1); } + fclose(fp); } - else if (X->Def.NTEInterAllDiagonal[_istep] >0) { - for (i = 0; i < X->Def.NTEInterAllDiagonal[_istep]; i++) { - //Assume n_{1\sigma_1} n_{2\sigma_2} - isite1 = X->Def.TEInterAllDiagonal[_istep][i][0] + 1; - A_spin = X->Def.TEInterAllDiagonal[_istep][i][1]; - isite2 = X->Def.TEInterAllDiagonal[_istep][i][2] + 1; - B_spin = X->Def.TEInterAllDiagonal[_istep][i][3]; - tmp_V = X->Def.ParaTEInterAllDiagonal[_istep][i]; - - if (SetDiagonalTEInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { - return -1; - } - } - if (X->Def.NTEChemi[_istep] > 0) { - for(i=0; i< X->Def.NTEChemi[_istep]; i++) { - isite1 = X->Def.TEChemi[_istep][i] + 1; - A_spin = X->Def.SpinTEChemi[_istep][i]; - tmp_V = -X->Def.ParaTEChemi[_istep][i]; - if (SetDiagonalTEChemi(isite1, A_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { - return -1; - } - } - } - } + TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcFinish, "a"); + fprintf(stdoutMPI, "%s", cProEndCalcDiag); return 0; } - - /** - * - * - * @brief Calculate the components for Coulombintra interaction, \f$ U_i n_ {i \uparrow}n_{i \downarrow} \f$ - * @param isite1 [in] a site number - * @param dtmp_V [in] A value of coulombintra interaction \f$ U_i \f$ - * @param X [in] Define list to get dimension number + * @brief Update the vector by the general two-body diagonal interaction, \f$ H_{i\sigma_1 j\sigma_2} n_ {i\sigma_1}n_{j\sigma_2}\f$.\n + * (Using in Time Evolution mode). + * @param isite1 [in] a site number \f$i \f$ + * @param isite2 [in] a site number \f$j \f$ + * @param isigma1 [in] a spin index at \f$i \f$ site. + * @param isigma2 [in] a spin index at \f$j \f$ site. + * @param dtmp_V [in] A value of general two-body diagonal interaction \f$ H_{i\sigma_1 j\sigma_2} \f$ + * @param X [in] Define list to get the operator information. + * @param tmp_v0 [in,out] Result vector + * @param tmp_v1 [in] Input produced vector * @retval -1 fail to calculate the diagonal component. * @retval 0 succeed to calculate the diagonal component. * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) + * @version 2.1 * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int SetDiagonalCoulombIntra -( - long unsigned int isite1, - double dtmp_V, - struct BindStruct *X - ){ - long unsigned int is; - long unsigned int ibit; - long unsigned int is1_up, is1_down; +int SetDiagonalTEInterAll( + long unsigned int isite1, + long unsigned int isite2, + long unsigned int isigma1, + long unsigned int isigma2, + double dtmp_V, + struct BindStruct *X, + int nstate, + double complex *tmp_v0, + double complex *tmp_v1 +) { + long unsigned int is1_spin; + long unsigned int is2_spin; + long unsigned int is1_up; + long unsigned int is2_up; + + long unsigned int ibit1_spin; + long unsigned int ibit2_spin; + + long unsigned int num1; + long unsigned int num2; long unsigned int j; - long unsigned int i_max=X->Check.idim_max; + long unsigned int i_max = X->Check.idim_max; + double complex dam_pr = 0.0; + /* - When isite1 is in the inter process region + Forse isite1 <= isite2 + */ + if (isite2 < isite1) { + j = isite2; + isite2 = isite1; + isite1 = j; + j = isigma2; + isigma2 = isigma1; + isigma1 = j; + } + /* + When isite1 & site2 are in the inter process regino */ - if (isite1 > X->Def.Nsite){ - + if (isite1 > X->Def.Nsite) { + switch (X->Def.iCalcModel) { case HubbardGC: case KondoGC: case Hubbard: case Kondo: + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + num1 = 0; + ibit1_spin = (unsigned long int)myrank&is1_spin; + num1 += ibit1_spin / is1_spin; + num2 = 0; + ibit2_spin = (unsigned long int)myrank&is2_spin; + num2 += ibit2_spin / is2_spin; + break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; - is = is1_up + is1_down; - ibit = (unsigned long int)myrank & is; - if (ibit == is) { -#pragma omp parallel for default(none) shared(list_Diagonal) \ - firstprivate(i_max, dtmp_V) private(j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V; - } - - break; /*case HubbardGC, KondoGC, Hubbard, Kondo:*/ - - case Spin: case SpinGC: - /* - They do not have the Coulomb term - */ - break; + case Spin: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + num1 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, isigma1); + num2 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is2_up, isigma2); + }/*if (X->Def.iFlgGeneralSpin == FALSE)*/ + else {//start:generalspin + num1 = BitCheckGeneral((unsigned long int) myrank, isite1, isigma1, + X->Def.SiteToBit, X->Def.Tpow); + num2 = BitCheckGeneral((unsigned long int) myrank, isite2, isigma2, + X->Def.SiteToBit, X->Def.Tpow); + } + break;/*case SpinGC, Spin:*/ default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; - //break; - - }/*switch (X->Def.iCalcModel)*/ - - return 0; + }/*if (isite1 > X->Def.Nsite)*/ - }/*if (isite1 >= X->Def.Nsite*/ - else{ - switch (X->Def.iCalcModel){ - case HubbardGC: - is1_up = X->Def.Tpow[2*isite1-2]; - is1_down = X->Def.Tpow[2*isite1-1]; - is=is1_up+is1_down; -#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, is, dtmp_V) private(ibit) - for(j = 1;j <= i_max;j++){ - ibit=(j-1)&is; - if(ibit==is){ - list_Diagonal[j]+=dtmp_V; - } - } - - break; - case KondoGC: - case Hubbard: - case Kondo: - is1_up = X->Def.Tpow[2*isite1-2]; - is1_down = X->Def.Tpow[2*isite1-1]; - is=is1_up+is1_down; -#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, is, dtmp_V) private(ibit) - for(j = 1;j <= i_max;j++){ - ibit=list_1[j]&is; - if(ibit==is){ - list_Diagonal[j]+=dtmp_V; - } + if (num1 * num2 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +firstprivate(i_max, dtmp_V) private(j) + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += dtmp_V * tmp_v1[j]; + dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } - break; - - case Spin: - case SpinGC: - break; - - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; - //break; } - } - return 0; -} + dam_pr = SumMPI_dc(dam_pr); + X->Large.prdct += dam_pr; + return 0; + }/*if (isite1 > X->Def.Nsite)*/ + else if (isite2 > X->Def.Nsite) { -/** - * - * - * @brief Calculate the components for the chemical potential \f$ \mu_{i \sigma_1} n_ {i \sigma_1} \f$ - * @param isite1 [in] a site number - * @param dtmp_V [in] A value of coulombintra interaction \f$ \mu_{i \sigma_1} \f$ - * @param spin [in] Spin index for the chemical potential - * @param X [in] Define list to get dimension number - * @retval -1 fail to calculate the diagonal component. - * @retval 0 succeed to calculate the diagonal component. - * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -int SetDiagonalChemi -( - long unsigned int isite1, - double dtmp_V, - long unsigned int spin, - struct BindStruct *X - ){ - long unsigned int is1_up; - long unsigned int ibit1_up; - long unsigned int num1; - long unsigned int isigma1 =spin; - long unsigned int is1,ibit1; + switch (X->Def.iCalcModel) { - long unsigned int j; - long unsigned int i_max=X->Check.idim_max; + case HubbardGC: - /* - When isite1 is in the inter process region - */ - if (isite1 > X->Def.Nsite){ + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; - switch (X->Def.iCalcModel) { + num2 = 0; + ibit2_spin = (unsigned long int)myrank&is2_spin; + num2 += ibit2_spin / is2_spin; + if (num2 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ + firstprivate(i_max, dtmp_V, is1_spin) private(num1, ibit1_spin, j) + for (j = 1; j <= i_max; j++) { + num1 = 0; + ibit1_spin = (j - 1) & is1_spin; + num1 += ibit1_spin / is1_spin; + tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; + dam_pr += dtmp_V * num1 * conj(tmp_v1[j]) * tmp_v1[j]; + } + } + break;/*case HubbardGC:*/ - case HubbardGC: case KondoGC: case Hubbard: case Kondo: - if (spin == 0) { - is1 = X->Def.Tpow[2 * isite1 - 2]; - } - else { - is1 = X->Def.Tpow[2 * isite1 - 1]; - } - ibit1 = (unsigned long int)myrank & is1; - num1 = ibit1 / is1; -#pragma omp parallel for default(none) shared(list_Diagonal) \ - firstprivate(i_max, dtmp_V, num1) private(j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += num1*dtmp_V; - - break;/*case HubbardGC, case KondoGC, Hubbard, Kondo:*/ - - case SpinGC: - case Spin: + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - ibit1_up = (((unsigned long int)myrank& is1_up) / is1_up) ^ (1 - spin); -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, ibit1_up) private(j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V * ibit1_up; - } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ - else /*if (X->Def.iFlgGeneralSpin == TRUE)*/ { - num1 = BitCheckGeneral((unsigned long int)myrank, - isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V) private(j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V; - }/*if (num1 != 0)*/ - }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ - break;/*case SpinGC, Spin:*/ + num2 = 0; + ibit2_spin = (unsigned long int)myrank&is2_spin; + num2 += ibit2_spin / is2_spin; + if (num2 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1)\ + firstprivate(i_max, dtmp_V, is1_spin) private(num1, ibit1_spin, j) + for (j = 1; j <= i_max; j++) { + num1 = 0; + ibit1_spin = list_1[j] & is1_spin; + num1 += ibit1_spin / is1_spin; + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } + break;/*case KondoGC, Hubbard, Kondo:*/ + + case SpinGC: + + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + + if (num2 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ + firstprivate(i_max, dtmp_V, is1_up, isigma1, X) private(num1, j) + for (j = 1; j <= i_max; j++) { + num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); + tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } + }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ + else {//start:generalspin + num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, + X->Def.SiteToBit, X->Def.Tpow); + if (num2 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } + }/* if (X->Def.iFlgGeneralSpin == TRUE)*/ + + break;/*case SpinGC:*/ + + case Spin: + + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + + if (num2 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = X_Spin_CisAis(j, X, is1_up, isigma1); + tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } + }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ + else /* if (X->Def.iFlgGeneralSpin == TRUE)*/ { + num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, \ + X->Def.SiteToBit, X->Def.Tpow); + if (num2 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1)\ +firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } + } /* if (X->Def.iFlgGeneralSpin == TRUE)*/ + + break;/*case Spin:*/ default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; - } /*switch (X->Def.iCalcModel)*/ - + }/*switch (X->Def.iCalcModel)*/ + dam_pr = SumMPI_dc(dam_pr); + X->Large.prdct += dam_pr; return 0; + }/*else if (isite2 > X->Def.Nsite)*/ - }/*if (isite1 >= X->Def.Nsite*/ - - switch (X->Def.iCalcModel){ - case HubbardGC: - if(spin==0){ - is1 = X->Def.Tpow[2*isite1-2]; - }else{ - is1 = X->Def.Tpow[2*isite1-1]; - } -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) - for(j = 1;j <= i_max;j++){ - - ibit1 = (j-1)&is1; - num1 = ibit1/is1; - //fprintf(stdoutMPI, "DEBUG: spin=%ld is1=%ld: isite1=%ld j=%ld num1=%ld \n",spin,is1,isite1,j,num1); - - list_Diagonal[j]+=num1*dtmp_V; + switch (X->Def.iCalcModel) { + case HubbardGC: //list_1[j] -> j-1 + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) + for (j = 1; j <= i_max; j++) { + num1 = 0; + num2 = 0; + ibit1_spin = (j - 1)&is1_spin; + num1 += ibit1_spin / is1_spin; + ibit2_spin = (j - 1)&is2_spin; + num2 += ibit2_spin / is2_spin; + tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; + dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } break; case KondoGC: case Hubbard: case Kondo: - if(spin==0){ - is1 = X->Def.Tpow[2*isite1-2]; - }else{ - is1 = X->Def.Tpow[2*isite1-1]; - } + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) - for(j = 1;j <= i_max;j++){ +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) + for (j = 1; j <= i_max; j++) { + num1 = 0; + num2 = 0; + ibit1_spin = list_1[j] & is1_spin; + num1 += ibit1_spin / is1_spin; - ibit1 = list_1[j]&is1; - num1 = ibit1/is1; - list_Diagonal[j]+=num1*dtmp_V; + ibit2_spin = list_1[j] & is2_spin; + num2 += ibit2_spin / is2_spin; + tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; + dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } break; - - case SpinGC: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) - for(j = 1;j <= i_max;j++){ - ibit1_up=(((j-1)& is1_up)/is1_up)^(1-spin); - list_Diagonal[j] += dtmp_V * ibit1_up; + + case Spin: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) + for (j = 1; j <= i_max; j++) { + num1 = X_Spin_CisAis(j, X, is1_up, isigma1); + num2 = X_Spin_CisAis(j, X, is2_up, isigma2); + tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; + dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } } - else{ -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (j-1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - list_Diagonal[j] += dtmp_V; - } - } + else { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + num1 = BitCheckGeneral(list_1[j], isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } + } break; - case Spin: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) - for(j = 1;j <= i_max;j++){ - ibit1_up=((list_1[j]& is1_up)/is1_up)^(1-spin); - list_Diagonal[j] += dtmp_V * ibit1_up; + case SpinGC: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) + for (j = 1; j <= i_max; j++) { + num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); + num2 = X_SpinGC_CisAis(j, X, is2_up, isigma2); + tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; + dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } } - else{ -#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - list_Diagonal[j] += dtmp_V; - } - } + else {//start:generalspin +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + num1 = BitCheckGeneral(j - 1, isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } } - break; + default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; } - + dam_pr = SumMPI_dc(dam_pr); + X->Large.prdct += dam_pr; return 0; } - /** - * - * @brief Calculate the components for Coulombinter interaction, \f$ V_{ij} n_ {i}n_{j} \f$ - * @param isite1 [in] a site number \f$i \f$ - * @param isite2 [in] a site number \f$j \f$ - * @param dtmp_V [in] A value of coulombinter interaction \f$ V_{ij} \f$ - * @param X [in] Define list to get the operator information. + * @brief Update the vector by the chemical potential \f$ \mu_{i \sigma_1} n_ {i \sigma_1} \f$ \n + * generated by the commutation relation in terms of the general two-body interaction, \n + * \f$ c_ {i \sigma_1} a_{j\sigma_2}c_ {j \sigma_2}a_ {i \sigma_1} = c_ {i \sigma_1}a_ {i \sigma_1}-c_ {i \sigma_1} a_ {i \sigma_1} c_ {j \sigma_2}a_{j\sigma_2}\f$ . + * (Using in Time Evolution mode). + * @param isite1 [in] a site number + * @param spin [in] a spin number + * @param dtmp_V [in] A value of coulombintra interaction \f$ \mu_{i \sigma_1} \f$ + * @param X [in] Define list to get dimension number + * @param tmp_v0 [in,out] Result vector + * @param tmp_v1 [in] Input produced vector * @retval -1 fail to calculate the diagonal component. * @retval 0 succeed to calculate the diagonal component. * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) + * @version 2.1 * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int SetDiagonalCoulombInter -( +int SetDiagonalTEChemi( long unsigned int isite1, - long unsigned int isite2, - double dtmp_V, - struct BindStruct *X - ) -{ - - long unsigned int is1_up, is1_down; - long unsigned int ibit1_up, ibit1_down; + long unsigned int spin, + double dtmp_V, + struct BindStruct *X, + int nstate, double complex *tmp_v0, + double complex *tmp_v1 +) { + long unsigned int is1_up; long unsigned int num1; - long unsigned int is2_up, is2_down; - long unsigned int ibit2_up, ibit2_down; - long unsigned int num2; + long unsigned int isigma1 = spin; + long unsigned int is1, ibit1; long unsigned int j; - long unsigned int i_max=X->Check.idim_max; + long unsigned int i_max = X->Check.idim_max; + double complex dam_pr = 0; /* - Force isite1 <= isite2 - */ - if (isite2 < isite1) { - j = isite2; - isite2 = isite1; - isite1 = j; - }/*if (isite2 < isite1)*/ - /* - When isite1 & site2 are in the inter process region + When isite1 is in the inter process region */ - if (/*isite2 => */ isite1 > X->Def.Nsite) { + if (isite1 > X->Def.Nsite) { switch (X->Def.iCalcModel) { @@ -579,239 +526,178 @@ int SetDiagonalCoulombInter case Hubbard: case Kondo: - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; - is2_up = X->Def.Tpow[2 * isite2 - 2]; - is2_down = X->Def.Tpow[2 * isite2 - 1]; - - num1 = 0; - num2 = 0; - - ibit1_up = (unsigned long int)myrank&is1_up; - num1 += ibit1_up / is1_up; - ibit1_down = (unsigned long int)myrank&is1_down; - num1 += ibit1_down / is1_down; - - ibit2_up = (unsigned long int)myrank&is2_up; - num2 += ibit2_up / is2_up; - ibit2_down = (unsigned long int)myrank&is2_down; - num2 += ibit2_down / is2_down; - -#pragma omp parallel for default(none) shared(list_Diagonal) \ - firstprivate(i_max, dtmp_V, num1, num2) private(j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += num1*num2*dtmp_V; - - break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } + ibit1 = (unsigned long int)myrank & is1; + num1 = ibit1 / is1; + break;/*case HubbardGC, case KondoGC, Hubbard, Kondo:*/ - case Spin: case SpinGC: -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V) - for (j = 1; j <= i_max; j++) { - list_Diagonal[j] += dtmp_V; - } - break;/*case Spin, SpinGC*/ + case Spin: + + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + num1 = (((unsigned long int)myrank& is1_up) / is1_up) ^ (1 - spin); + } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ + else /*if (X->Def.iFlgGeneralSpin == TRUE)*/ { + num1 = BitCheckGeneral((unsigned long int)myrank, + isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ + break;/*case SpinGC, Spin:*/ default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; - }/*switch (X->Def.iCalcModel)*/ - + } /*switch (X->Def.iCalcModel)*/ + if (num1 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +firstprivate(i_max, dtmp_V) private(j) + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += dtmp_V * tmp_v1[j]; + dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; + } + }/*if (num1 != 0)*/ + dam_pr = SumMPI_dc(dam_pr); + X->Large.prdct += dam_pr; return 0; - }/*if (isite1 > X->Def.Nsite)*/ - else if (isite2 > X->Def.Nsite /* => isite1 */) { - - switch(X->Def.iCalcModel){ - case HubbardGC: - case KondoGC: - case Hubbard: - case Kondo: - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; - is2_up = X->Def.Tpow[2 * isite2 - 2]; - is2_down = X->Def.Tpow[2 * isite2 - 1]; - num2 = 0; - ibit2_up = (unsigned long int)myrank&is2_up; - num2 += ibit2_up / is2_up; - ibit2_down = (unsigned long int)myrank&is2_down; - num2 += ibit2_down / is2_down; - break; - - case Spin: - case SpinGC: - break; + }/*if (isite1 >= X->Def.Nsite*/ - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; + switch (X->Def.iCalcModel) { + case HubbardGC: + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; } - - switch (X->Def.iCalcModel) { - case HubbardGC: - -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, num2, is1_up, is1_down) \ -private(num1, ibit1_up, ibit1_down, j) - for (j = 1; j <= i_max; j++) { - num1 = 0; - ibit1_up = (j - 1)&is1_up; - num1 += ibit1_up / is1_up; - ibit1_down = (j - 1)&is1_down; - num1 += ibit1_down / is1_down; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) + for (j = 1; j <= i_max; j++) { + ibit1 = (j - 1)&is1; + num1 = ibit1 / is1; + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + break; + case KondoGC: + case Hubbard: + case Kondo: + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } - list_Diagonal[j] += num1*num2*dtmp_V; - } +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) + for (j = 1; j <= i_max; j++) { - break;/*case HubbardGC*/ + ibit1 = list_1[j] & is1; + num1 = ibit1 / is1; + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + break; - case KondoGC: - case Hubbard: - case Kondo: - -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up, is1_down, num2) \ -private(num1, ibit1_up, ibit1_down, j) + case SpinGC: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) for (j = 1; j <= i_max; j++) { - num1 = 0; - ibit1_up = list_1[j] & is1_up; - num1 += ibit1_up / is1_up; - ibit1_down = list_1[j] & is1_down; - num1 += ibit1_down / is1_down; - - list_Diagonal[j] += num1*num2*dtmp_V; + num1 = (((j - 1)& is1_up) / is1_up) ^ (1 - spin); + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } - break;/*case KondoGC, Hubbard, Kondo:*/ - - case Spin: - case SpinGC: -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V) + } + else { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { - list_Diagonal[j] += dtmp_V; + num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + tmp_v0[j] += dtmp_V * tmp_v1[j]; + dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; + } } - break;/* case Spin, SpinGC:*/ - - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; - - }/*switch (X->Def.iCalcModel)*/ + } + break; - return 0; + case Spin: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) + for (j = 1; j <= i_max; j++) { + num1 = ((list_1[j] & is1_up) / is1_up) ^ (1 - spin); + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + } + } + else { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + tmp_v0[j] += dtmp_V * tmp_v1[j]; + dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; - }/*else if (isite2 > X->Def.Nsite)*/ - else{ - switch (X->Def.iCalcModel){ - case HubbardGC: //list_1[j] -> j-1 - is1_up = X->Def.Tpow[2*isite1-2]; - is1_down = X->Def.Tpow[2*isite1-1]; - is2_up = X->Def.Tpow[2*isite2-2]; - is2_down = X->Def.Tpow[2*isite2-1]; -#pragma omp parallel for default(none) shared( list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1, ibit1_up, ibit1_down, num2, ibit2_up, ibit2_down) - for(j = 1;j <= i_max;j++){ - num1=0; - num2=0; - ibit1_up=(j-1)&is1_up; - num1+=ibit1_up/is1_up; - ibit1_down=(j-1)&is1_down; - num1+=ibit1_down/is1_down; - - ibit2_up=(j-1)&is2_up; - num2+=ibit2_up/is2_up; - ibit2_down=(j-1)&is2_down; - num2+=ibit2_down/is2_down; - - list_Diagonal[j]+=num1*num2*dtmp_V; - } - break; - case KondoGC: - case Hubbard: - case Kondo: - is1_up = X->Def.Tpow[2*isite1-2]; - is1_down = X->Def.Tpow[2*isite1-1]; - is2_up = X->Def.Tpow[2*isite2-2]; - is2_down = X->Def.Tpow[2*isite2-1]; - -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1, ibit1_up, ibit1_down, num2, ibit2_up, ibit2_down) - for(j = 1;j <= i_max;j++){ - num1=0; - num2=0; - ibit1_up=list_1[j]&is1_up; - num1+=ibit1_up/is1_up; - ibit1_down=list_1[j]&is1_down; - num1+=ibit1_down/is1_down; - - ibit2_up=list_1[j]&is2_up; - num2+=ibit2_up/is2_up; - ibit2_down=list_1[j]&is2_down; - num2+=ibit2_down/is2_down; - - list_Diagonal[j]+=num1*num2*dtmp_V; - } - break; - - case Spin: - case SpinGC: -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V) - for(j = 1;j <= i_max; j++){ - list_Diagonal[j] += dtmp_V; - } - break; - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; + } + } } + + break; + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; } - + dam_pr = SumMPI_dc(dam_pr); + X->Large.prdct += dam_pr; return 0; } - /** - * - * @brief Calculate the components for Hund interaction, \f$ H_{ij}(n_ {i\uparrow}n_{j\uparrow}+ n_ {i\downarrow}n_{j\downarrow})\f$ + * @brief Update the vector by the general one-body diagonal interaction, \f$ \mu_{i\sigma_1} n_ {i\sigma_1}\f$.\n + * (Using in Time Evolution mode). * @param isite1 [in] a site number \f$i \f$ - * @param isite2 [in] a site number \f$j \f$ - * @param dtmp_V [in] A value of Hund interaction \f$ H_{ij} \f$ + * @param dtmp_V [in] A value of general one-body diagonal interaction \f$ \mu_{i\sigma_1} \f$ + * @param spin [in] a spin index at \f$i \f$ site. * @param X [in] Define list to get the operator information. + * @param tmp_v0 [in,out] Result vector + * @param tmp_v1 [in] Input produced vector * @retval -1 fail to calculate the diagonal component. * @retval 0 succeed to calculate the diagonal component. * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) + * @version 2.1 * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int SetDiagonalHund -( - long unsigned int isite1, - long unsigned int isite2, - double dtmp_V, - struct BindStruct *X - ){ - long unsigned int is1_up, is1_down; - long unsigned int ibit1_up, ibit1_down; - long unsigned int num1_up, num1_down; - long unsigned int is2_up, is2_down; - long unsigned int ibit2_up, ibit2_down; - long unsigned int num2_up, num2_down; +int SetDiagonalTETransfer +( + long unsigned int isite1, + double dtmp_V, + long unsigned int spin, + struct BindStruct *X, + int nstate, + double complex *tmp_v0, + double complex *tmp_v1 +) { + long unsigned int is1_up; + long unsigned int ibit1_up; + long unsigned int num1; + long unsigned int isigma1 = spin; + long unsigned int is1, ibit1; + double dam_pr = 0.0; - long unsigned int is_up; - long unsigned int ibit; long unsigned int j; - long unsigned int i_max=X->Check.idim_max; - /* - Force isite1 <= isite2 - */ - if (isite2 < isite1) { - j = isite2; - isite2 = isite1; - isite1 = j; - } + long unsigned int i_max = X->Check.idim_max; + /* - When isite1 & site2 are in the inter process region + When isite1 is in the inter process region */ - if (/*isite2 >= */ isite1 > X->Def.Nsite){ + if (isite1 > X->Def.Nsite) { switch (X->Def.iCalcModel) { @@ -819,286 +705,197 @@ int SetDiagonalHund case KondoGC: case Hubbard: case Kondo: - - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; - is2_up = X->Def.Tpow[2 * isite2 - 2]; - is2_down = X->Def.Tpow[2 * isite2 - 1]; - - num1_up = 0; - num1_down = 0; - num2_up = 0; - num2_down = 0; - - ibit1_up = (unsigned long int)myrank &is1_up; - num1_up = ibit1_up / is1_up; - ibit1_down = (unsigned long int)myrank &is1_down; - num1_down = ibit1_down / is1_down; - - ibit2_up = (unsigned long int)myrank &is2_up; - num2_up = ibit2_up / is2_up; - ibit2_down = (unsigned long int)myrank &is2_down; - num2_down = ibit2_down / is2_down; - -#pragma omp parallel for default(none) shared(list_Diagonal) \ - firstprivate(i_max, dtmp_V, num1_up, num1_down, num2_up, num2_down) private(j) - for (j = 1; j <= i_max; j++) - list_Diagonal[j] += dtmp_V*(num1_up*num2_up + num1_down*num2_down); - - break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } + ibit1 = (unsigned long int)myrank & is1; + num1 = ibit1 / is1; + break;/*case HubbardGC, case KondoGC, Hubbard, Kondo:*/ case SpinGC: case Spin: - - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - is_up = is1_up + is2_up; - ibit = (unsigned long int)myrank & is_up; - if (ibit == 0 || ibit == is_up) { -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V) private(j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V; - } + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + num1 = (((unsigned long int)myrank& is1_up) / is1_up) ^ (1 - spin); + } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ + else /*if (X->Def.iFlgGeneralSpin == TRUE)*/ { + num1 = BitCheckGeneral((unsigned long int)myrank, + isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ break;/*case SpinGC, Spin:*/ default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; - } - - return 0; - }/*if (isite1 > X->Def.Nsite)*/ - else if (isite2 > X->Def.Nsite /* >= isite1 */) { + } /*switch (X->Def.iCalcModel)*/ + if (num1 != 0) { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ + firstprivate(i_max, dtmp_V) private(j) + for (j = 1; j <= i_max; j++) { + tmp_v0[j] += dtmp_V * tmp_v1[j]; + dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; + } + } + }/*if (isite1 >= X->Def.Nsite*/ + else {//(isite1 < X->Def.Nsite) switch (X->Def.iCalcModel) { - case HubbardGC: - - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; - is2_up = X->Def.Tpow[2 * isite2 - 2]; - is2_down = X->Def.Tpow[2 * isite2 - 1]; - - num2_up = 0; - num2_down = 0; - - ibit2_up = (unsigned long int)myrank &is2_up; - num2_up = ibit2_up / is2_up; - ibit2_down = (unsigned long int)myrank &is2_down; - num2_down = ibit2_down / is2_down; - -#pragma omp parallel for default(none) shared( list_Diagonal) \ -firstprivate(i_max, dtmp_V, num2_up, num2_down, is1_up, is1_down) \ -private(num1_up, num1_down, ibit1_up, ibit1_down, j) + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ + firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { - num1_up = 0; - num1_down = 0; - - ibit1_up = (j - 1)&is1_up; - num1_up = ibit1_up / is1_up; - ibit1_down = (j - 1)&is1_down; - num1_down = ibit1_down / is1_down; - - list_Diagonal[j] += dtmp_V*(num1_up*num2_up + num1_down*num2_down); + ibit1 = (j - 1) & is1; + num1 = ibit1 / is1; + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } - break;/*case HubbardGC:*/ + break; case KondoGC: case Hubbard: case Kondo: - - is1_up = X->Def.Tpow[2 * isite1 - 2]; - is1_down = X->Def.Tpow[2 * isite1 - 1]; - is2_up = X->Def.Tpow[2 * isite2 - 2]; - is2_down = X->Def.Tpow[2 * isite2 - 1]; - - num2_up = 0; - num2_down = 0; - - ibit2_up = (unsigned long int)myrank&is2_up; - num2_up = ibit2_up / is2_up; - ibit2_down = (unsigned long int)myrank&is2_down; - num2_down = ibit2_down / is2_down; - -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ -firstprivate(i_max, dtmp_V, num2_up, num2_down, is1_up, is1_down) \ -private(num1_up, num1_down, ibit1_up, ibit1_down, j) + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ + firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { - num1_up = 0; - num1_down = 0; - - ibit1_up = list_1[j] & is1_up; - num1_up = ibit1_up / is1_up; - ibit1_down = list_1[j] & is1_down; - num1_down = ibit1_down / is1_down; - - list_Diagonal[j] += dtmp_V*(num1_up*num2_up + num1_down*num2_down); + ibit1 = list_1[j] & is1; + num1 = ibit1 / is1; + tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; + dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } - break;/*case KondoGC, Hubbard, Kondo:*/ + break; case SpinGC: - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - ibit2_up = (unsigned long int)myrank & is2_up; - - if (ibit2_up == is2_up) { -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ + firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) for (j = 1; j <= i_max; j++) { - ibit1_up = (j - 1) & is1_up; - if (ibit1_up == is1_up) { - list_Diagonal[j] += dtmp_V; - } + ibit1_up = (((j - 1) & is1_up) / is1_up) ^ (1 - spin); + tmp_v0[j] += dtmp_V * ibit1_up*tmp_v1[j]; + dam_pr += dtmp_V * ibit1_up*conj(tmp_v1[j]) * tmp_v1[j]; } } - else if(ibit2_up == 0){ -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + else { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ + firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { - ibit1_up = (j - 1) & is1_up; - if (ibit1_up == 0) { - list_Diagonal[j] += dtmp_V; + num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + tmp_v0[j] += dtmp_V * tmp_v1[j]; + dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } } } - break;/*case SpinGC:*/ + break; case Spin: - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - ibit2_up = (unsigned long int)myrank & is2_up; - - if (ibit2_up == is2_up) { -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ + firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) for (j = 1; j <= i_max; j++) { - ibit1_up = list_1[j] & is1_up; - if (ibit1_up == is1_up) { - list_Diagonal[j] += dtmp_V; - } + ibit1_up = ((list_1[j] & is1_up) / is1_up) ^ (1 - spin); + tmp_v0[j] += dtmp_V * ibit1_up * tmp_v1[j]; + dam_pr += dtmp_V * ibit1_up * conj(tmp_v1[j]) * tmp_v1[j]; } } - else if (ibit2_up == 0) { -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + else { +#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ + firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { - ibit1_up = list_1[j] & is1_up; - if (ibit1_up == 0) { - list_Diagonal[j] += dtmp_V; - } + num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; + dam_pr += dtmp_V * num1 * conj(tmp_v1[j]) * tmp_v1[j]; } } - break;/*case Spin:*/ + break; default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; + } + } + dam_pr = SumMPI_dc(dam_pr); + X->Large.prdct += dam_pr; + return 0; +} +/// @fn diagonalcalcForTE() Update the vector for diagonal operators ( using in Time Evolution mode). +/// \param X [in] Struct to get the information of the diagonal operators. +/// \param tmp_v0 [in,out] Result vector +/// \param tmp_v1 [in] Input produced vector +/// \retval -1 fail to update the vector. +/// \retval 0 succeed to update the vector. +/// \version 2.1 +int diagonalcalcForTE +( + const int _istep, + struct BindStruct *X, + int nstate, + double complex *tmp_v0, + double complex *tmp_v1 +) { - }/*switch (X->Def.iCalcModel)*/ + long unsigned int i; + long unsigned int isite1, isite2; + long unsigned int A_spin, B_spin; + double tmp_V; - return 0; + if (X->Def.NTETransferDiagonal[_istep] > 0) { + for (i = 0; i < X->Def.NTETransferDiagonal[_istep]; i++) { + isite1 = X->Def.TETransferDiagonal[_istep][i][0] + 1; + A_spin = X->Def.TETransferDiagonal[_istep][i][1]; + tmp_V = X->Def.ParaTETransferDiagonal[_istep][i]; + SetDiagonalTETransfer(isite1, tmp_V, A_spin, X, nstate, tmp_v0, tmp_v1); + } + } + else if (X->Def.NTEInterAllDiagonal[_istep] > 0) { + for (i = 0; i < X->Def.NTEInterAllDiagonal[_istep]; i++) { + //Assume n_{1\sigma_1} n_{2\sigma_2} + isite1 = X->Def.TEInterAllDiagonal[_istep][i][0] + 1; + A_spin = X->Def.TEInterAllDiagonal[_istep][i][1]; + isite2 = X->Def.TEInterAllDiagonal[_istep][i][2] + 1; + B_spin = X->Def.TEInterAllDiagonal[_istep][i][3]; + tmp_V = X->Def.ParaTEInterAllDiagonal[_istep][i]; - }/*else if (isite2 > X->Def.Nsite)*/ - else{ - switch (X->Def.iCalcModel){ - case HubbardGC: // list_1[j] -> j-1 - is1_up = X->Def.Tpow[2*isite1-2]; - is1_down = X->Def.Tpow[2*isite1-1]; - is2_up = X->Def.Tpow[2*isite2-2]; - is2_down = X->Def.Tpow[2*isite2-1]; - -#pragma omp parallel for default(none) shared( list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1_up, num1_down, num2_up, num2_down, ibit1_up, ibit1_down, ibit2_up, ibit2_down) - for(j = 1; j <= i_max;j++){ - num1_up=0; - num1_down=0; - num2_up=0; - num2_down=0; - - ibit1_up=(j-1)&is1_up; - num1_up=ibit1_up/is1_up; - ibit1_down=(j-1)&is1_down; - num1_down=ibit1_down/is1_down; - - ibit2_up=(j-1)&is2_up; - num2_up=ibit2_up/is2_up; - ibit2_down=(j-1)&is2_down; - num2_down=ibit2_down/is2_down; - - list_Diagonal[j]+=dtmp_V*(num1_up*num2_up+num1_down*num2_down); - } - break; - case KondoGC: - case Hubbard: - case Kondo: - is1_up = X->Def.Tpow[2*isite1-2]; - is1_down = X->Def.Tpow[2*isite1-1]; - is2_up = X->Def.Tpow[2*isite2-2]; - is2_down = X->Def.Tpow[2*isite2-1]; - -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1_up, num1_down, num2_up, num2_down, ibit1_up, ibit1_down, ibit2_up, ibit2_down) - for(j = 1; j <= i_max;j++){ - num1_up=0; - num1_down=0; - num2_up=0; - num2_down=0; - - ibit1_up=list_1[j]&is1_up; - num1_up=ibit1_up/is1_up; - ibit1_down=list_1[j]&is1_down; - num1_down=ibit1_down/is1_down; - - ibit2_up=list_1[j]&is2_up; - num2_up=ibit2_up/is2_up; - ibit2_down=list_1[j]&is2_down; - num2_down=ibit2_down/is2_down; - - list_Diagonal[j]+=dtmp_V*(num1_up*num2_up+num1_down*num2_down); - } - break; - - case SpinGC: - is1_up = X->Def.Tpow[isite1-1]; - is2_up = X->Def.Tpow[isite2-1]; - is_up = is1_up+is2_up; -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, is_up) private(j, ibit) - for(j = 1;j <= i_max;j++){ - ibit = (j-1) & is_up; - if(ibit == 0 || ibit == is_up){ - list_Diagonal[j]+= dtmp_V; + if (SetDiagonalTEInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { + return -1; } } - break; - - case Spin: - is1_up = X->Def.Tpow[isite1-1]; - is2_up = X->Def.Tpow[isite2-1]; - is_up = is1_up+is2_up; -#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, is_up) private(j, ibit) - for(j = 1;j <= i_max;j++){ - ibit = list_1[j] & is_up; - if(ibit == 0 || ibit == is_up){ - list_Diagonal[j]+= dtmp_V; - } + + if (X->Def.NTEChemi[_istep] > 0) { + for (i = 0; i < X->Def.NTEChemi[_istep]; i++) { + isite1 = X->Def.TEChemi[_istep][i] + 1; + A_spin = X->Def.SpinTEChemi[_istep][i]; + tmp_V = -X->Def.ParaTEChemi[_istep][i]; + if (SetDiagonalTEChemi(isite1, A_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { + return -1; + } } - break; - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; } } return 0; } - /** - * - * @brief Calculate the components for general two-body diagonal interaction, \f$ H_{i\sigma_1 j\sigma_2} n_ {i\sigma_1}n_{j\sigma_2}\f$ - * @param isite1 [in] a site number \f$i \f$ - * @param isite2 [in] a site number \f$j \f$ - * @param isigma1 [in] a spin index at \f$i \f$ site. - * @param isigma2 [in] a spin index at \f$j \f$ site. - * @param dtmp_V [in] A value of general two-body diagonal interaction \f$ H_{i\sigma_1 j\sigma_2} \f$ - * @param X [in] Define list to get the operator information. + * @brief Calculate the components for Coulombintra interaction, \f$ U_i n_ {i \uparrow}n_{i \downarrow} \f$ + * @param isite1 [in] a site number + * @param dtmp_V [in] A value of coulombintra interaction \f$ U_i \f$ + * @param X [in] Define list to get dimension number * @retval -1 fail to calculate the diagonal component. * @retval 0 succeed to calculate the diagonal component. * @@ -1106,43 +903,21 @@ firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int SetDiagonalInterAll +int SetDiagonalCoulombIntra ( - long unsigned int isite1, - long unsigned int isite2, - long unsigned int isigma1, - long unsigned int isigma2, - double dtmp_V, - struct BindStruct *X - ) -{ - long unsigned int is1_spin; - long unsigned int is2_spin; - long unsigned int is1_up; - long unsigned int is2_up; - - long unsigned int ibit1_spin; - long unsigned int ibit2_spin; - - long unsigned int num1; - long unsigned int num2; + long unsigned int isite1, + double dtmp_V, + struct BindStruct *X +) { + long unsigned int is; + long unsigned int ibit; + long unsigned int is1_up, is1_down; long unsigned int j; - long unsigned int i_max=X->Check.idim_max; - - /* - Forse isite1 <= isite2 - */ - if (isite2 < isite1) { - j = isite2; - isite2 = isite1; - isite1 = j; - j = isigma2; - isigma2 = isigma1; - isigma1 = j; - } + long unsigned int i_max = X->Check.idim_max; + /* - When isite1 & site2 are in the inter process regino + When isite1 is in the inter process region */ if (isite1 > X->Def.Nsite) { @@ -1153,942 +928,1118 @@ int SetDiagonalInterAll case Hubbard: case Kondo: - is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; - is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; - - num1 = 0; - ibit1_spin = (unsigned long int)myrank&is1_spin; - num1 += ibit1_spin / is1_spin; - - num2 = 0; - ibit2_spin = (unsigned long int)myrank&is2_spin; - num2 += ibit2_spin / is2_spin; - + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is = is1_up + is1_down; + ibit = (unsigned long int)myrank & is; + if (ibit == is) { #pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, num2, num1) private(ibit1_spin, j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += num1*num2*dtmp_V; + firstprivate(i_max, dtmp_V) private(j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V; + } - break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ + break; /*case HubbardGC, KondoGC, Hubbard, Kondo:*/ - case SpinGC: case Spin: - - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, isigma1); - num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); - -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num1, num2) private(j) - for (j = 1; j <= i_max; j++) { - list_Diagonal[j] += num1*num2*dtmp_V; - } - }/*if (X->Def.iFlgGeneralSpin == FALSE)*/ - else {//start:generalspin - num1 = BitCheckGeneral((unsigned long int)myrank, isite1, isigma1, - X->Def.SiteToBit, X->Def.Tpow); - num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, - X->Def.SiteToBit, X->Def.Tpow); - if (num1 !=0 && num2 != 0) { -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, num1, X) private(j) - for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V*num1; - } - }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ - - break;/*case SpinGC, Spin:*/ + case SpinGC: + /* + They do not have the Coulomb term + */ + break; default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; + //break; - }/*if (isite1 > X->Def.Nsite)*/ + }/*switch (X->Def.iCalcModel)*/ return 0; - }/*if (isite1 > X->Def.Nsite)*/ - else if (isite2 > X->Def.Nsite) { - + }/*if (isite1 >= X->Def.Nsite*/ + else { switch (X->Def.iCalcModel) { - case HubbardGC: - - is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; - is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; - - num2 = 0; - ibit2_spin = (unsigned long int)myrank&is2_spin; - num2 += ibit2_spin / is2_spin; - -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_spin, num2) private(num1, ibit1_spin, j) + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is = is1_up + is1_down; +#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, is, dtmp_V) private(ibit) for (j = 1; j <= i_max; j++) { - num1 = 0; - ibit1_spin = (j - 1)&is1_spin; - num1 += ibit1_spin / is1_spin; - list_Diagonal[j] += num1*num2*dtmp_V; + ibit = (j - 1)&is; + if (ibit == is) { + list_Diagonal[j] += dtmp_V; + } } - break;/*case HubbardGC:*/ + break; case KondoGC: case Hubbard: case Kondo: + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is = is1_up + is1_down; +#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, is, dtmp_V) private(ibit) + for (j = 1; j <= i_max; j++) { + ibit = list_1[j] & is; + if (ibit == is) { + list_Diagonal[j] += dtmp_V; + } + } + break; - is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; - is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + case Spin: + case SpinGC: + break; - num2 = 0; - ibit2_spin = (unsigned long int)myrank&is2_spin; - num2 += ibit2_spin / is2_spin; + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; + //break; + } + } + return 0; +} +/** + * @brief Calculate the components for the chemical potential \f$ \mu_{i \sigma_1} n_ {i \sigma_1} \f$ + * @param isite1 [in] a site number + * @param dtmp_V [in] A value of coulombintra interaction \f$ \mu_{i \sigma_1} \f$ + * @param spin [in] Spin index for the chemical potential + * @param X [in] Define list to get dimension number + * @retval -1 fail to calculate the diagonal component. + * @retval 0 succeed to calculate the diagonal component. + * + * @version 0.1 + * @author Takahiro Misawa (The University of Tokyo) + * @author Kazuyoshi Yoshimi (The University of Tokyo) + */ +int SetDiagonalChemi +( + long unsigned int isite1, + double dtmp_V, + long unsigned int spin, + struct BindStruct *X +) { + long unsigned int is1_up; + long unsigned int ibit1_up; + long unsigned int num1; + long unsigned int isigma1 = spin; + long unsigned int is1, ibit1; -#pragma omp parallel for default(none) shared(list_Diagonal, list_1) \ -firstprivate(i_max, dtmp_V, is1_spin, num2) private(num1, ibit1_spin, j) - for (j = 1; j <= i_max; j++) { - num1 = 0; - ibit1_spin = list_1[j] & is1_spin; - num1 += ibit1_spin / is1_spin; - list_Diagonal[j] += num1*num2*dtmp_V; - } - break;/*case KondoGC, Hubbard, Kondo:*/ + long unsigned int j; + long unsigned int i_max = X->Check.idim_max; - case SpinGC: - - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + /* + When isite1 is in the inter process region + */ + if (isite1 > X->Def.Nsite) { + switch (X->Def.iCalcModel) { + + case HubbardGC: + case KondoGC: + case Hubbard: + case Kondo: + + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } + ibit1 = (unsigned long int)myrank & is1; + num1 = ibit1 / is1; #pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); - list_Diagonal[j] += num1*num2*dtmp_V; - } - }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ - else {//start:generalspin - num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, - X->Def.SiteToBit, X->Def.Tpow); - if (num2 != 0) { -#pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - list_Diagonal[j] += dtmp_V*num1; - } - } - }/* if (X->Def.iFlgGeneralSpin == TRUE)*/ + firstprivate(i_max, dtmp_V, num1) private(j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += num1 * dtmp_V; - break;/*case SpinGC:*/ + break;/*case HubbardGC, case KondoGC, Hubbard, Kondo:*/ + case SpinGC: case Spin: - + if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); - + ibit1_up = (((unsigned long int)myrank& is1_up) / is1_up) ^ (1 - spin); #pragma omp parallel for default(none) shared(list_Diagonal) \ -firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = X_Spin_CisAis(j, X, is1_up, isigma1); - list_Diagonal[j] += num1*num2*dtmp_V; - } - }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ - else /* if (X->Def.iFlgGeneralSpin == TRUE)*/{ - num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, \ - X->Def.SiteToBit, X->Def.Tpow); - if (num2 != 0) { -#pragma omp parallel for default(none) shared(list_Diagonal, list_1) \ -firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - list_Diagonal[j] += dtmp_V*num1; - } - } - } /* if (X->Def.iFlgGeneralSpin == TRUE)*/ - - break;/*case Spin:*/ +firstprivate(i_max, dtmp_V, ibit1_up) private(j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V * ibit1_up; + } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ + else /*if (X->Def.iFlgGeneralSpin == TRUE)*/ { + num1 = BitCheckGeneral((unsigned long int)myrank, + isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V) private(j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V; + }/*if (num1 != 0)*/ + }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ + break;/*case SpinGC, Spin:*/ default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; - }/*switch (X->Def.iCalcModel)*/ + } /*switch (X->Def.iCalcModel)*/ return 0; - }/*else if (isite2 > X->Def.Nsite)*/ + }/*if (isite1 >= X->Def.Nsite*/ - switch (X->Def.iCalcModel){ - case HubbardGC: //list_1[j] -> j-1 - is1_spin = X->Def.Tpow[2*isite1-2+isigma1]; - is2_spin = X->Def.Tpow[2*isite2-2+isigma2]; -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) - for(j = 1;j <= i_max;j++){ - num1=0; - num2=0; - ibit1_spin=(j-1)&is1_spin; - num1+=ibit1_spin/is1_spin; - ibit2_spin=(j-1)&is2_spin; - num2+=ibit2_spin/is2_spin; - list_Diagonal[j]+=num1*num2*dtmp_V; - } + switch (X->Def.iCalcModel) { + case HubbardGC: + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) + for (j = 1; j <= i_max; j++) { + + ibit1 = (j - 1)&is1; + num1 = ibit1 / is1; + //fprintf(stdoutMPI, "DEBUG: spin=%ld is1=%ld: isite1=%ld j=%ld num1=%ld \n",spin,is1,isite1,j,num1); + + list_Diagonal[j] += num1 * dtmp_V; + } break; case KondoGC: case Hubbard: case Kondo: - is1_spin = X->Def.Tpow[2*isite1-2+isigma1]; - is2_spin = X->Def.Tpow[2*isite2-2+isigma2]; + if (spin == 0) { + is1 = X->Def.Tpow[2 * isite1 - 2]; + } + else { + is1 = X->Def.Tpow[2 * isite1 - 1]; + } -#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) - for(j = 1;j <= i_max;j++){ - num1=0; - num2=0; - ibit1_spin=list_1[j]&is1_spin; - num1+=ibit1_spin/is1_spin; - - ibit2_spin=list_1[j]&is2_spin; - num2+=ibit2_spin/is2_spin; - list_Diagonal[j]+=num1*num2*dtmp_V; - } - break; - - case Spin: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; - is2_up = X->Def.Tpow[isite2-1]; -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) - for(j = 1;j <= i_max; j++){ - num1=X_Spin_CisAis(j, X, is1_up, isigma1); - num2=X_Spin_CisAis(j, X, is2_up, isigma2); - list_Diagonal[j] += num1*num2*dtmp_V; - } - } - else{ -#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - num1=BitCheckGeneral (list_1[j], isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); - list_Diagonal[j] += dtmp_V*num1; - } - } - - } +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) + for (j = 1; j <= i_max; j++) { + + ibit1 = list_1[j] & is1; + num1 = ibit1 / is1; + list_Diagonal[j] += num1 * dtmp_V; + } break; - case SpinGC: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; - is2_up = X->Def.Tpow[isite2-1]; -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) - for(j = 1;j <= i_max; j++){ - num1=X_SpinGC_CisAis(j, X, is1_up, isigma1); - num2=X_SpinGC_CisAis(j, X, is2_up, isigma2); - list_Diagonal[j] += num1*num2*dtmp_V; - } - } - else{//start:generalspin -#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (j-1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - num1=BitCheckGeneral (j-1, isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); - list_Diagonal[j] += dtmp_V*num1; - } - } - } - break; - + case SpinGC: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) + for (j = 1; j <= i_max; j++) { + ibit1_up = (((j - 1)& is1_up) / is1_up) ^ (1 - spin); + list_Diagonal[j] += dtmp_V * ibit1_up; + } + } + else { +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + list_Diagonal[j] += dtmp_V; + } + } + } + break; + + case Spin: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) + for (j = 1; j <= i_max; j++) { + ibit1_up = ((list_1[j] & is1_up) / is1_up) ^ (1 - spin); + list_Diagonal[j] += dtmp_V * ibit1_up; + } + } + else { +#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + list_Diagonal[j] += dtmp_V; + } + } + } + + break; default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; } - - return 0; + return 0; } - /** - * - * @brief Update the vector by the general two-body diagonal interaction, \f$ H_{i\sigma_1 j\sigma_2} n_ {i\sigma_1}n_{j\sigma_2}\f$.\n - * (Using in Time Evolution mode). + * @brief Calculate the components for Coulombinter interaction, \f$ V_{ij} n_ {i}n_{j} \f$ * @param isite1 [in] a site number \f$i \f$ * @param isite2 [in] a site number \f$j \f$ - * @param isigma1 [in] a spin index at \f$i \f$ site. - * @param isigma2 [in] a spin index at \f$j \f$ site. - * @param dtmp_V [in] A value of general two-body diagonal interaction \f$ H_{i\sigma_1 j\sigma_2} \f$ + * @param dtmp_V [in] A value of coulombinter interaction \f$ V_{ij} \f$ * @param X [in] Define list to get the operator information. - * @param tmp_v0 [in,out] Result vector - * @param tmp_v1 [in] Input produced vector * @retval -1 fail to calculate the diagonal component. * @retval 0 succeed to calculate the diagonal component. * - * @version 2.1 + * @version 0.1 + * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int SetDiagonalTEInterAll( - long unsigned int isite1, - long unsigned int isite2, - long unsigned int isigma1, - long unsigned int isigma2, - double dtmp_V, - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 -){ - long unsigned int is1_spin; - long unsigned int is2_spin; - long unsigned int is1_up; - long unsigned int is2_up; - - long unsigned int ibit1_spin; - long unsigned int ibit2_spin; - +int SetDiagonalCoulombInter +( + long unsigned int isite1, + long unsigned int isite2, + double dtmp_V, + struct BindStruct *X +) { + long unsigned int is1_up, is1_down; + long unsigned int ibit1_up, ibit1_down; long unsigned int num1; + long unsigned int is2_up, is2_down; + long unsigned int ibit2_up, ibit2_down; long unsigned int num2; long unsigned int j; - long unsigned int i_max=X->Check.idim_max; - double complex dam_pr=0.0; - + long unsigned int i_max = X->Check.idim_max; /* - Forse isite1 <= isite2 + Force isite1 <= isite2 */ if (isite2 < isite1) { j = isite2; isite2 = isite1; isite1 = j; - j = isigma2; - isigma2 = isigma1; - isigma1 = j; - } + }/*if (isite2 < isite1)*/ /* - When isite1 & site2 are in the inter process regino + When isite1 & site2 are in the inter process region */ - if (isite1 > X->Def.Nsite) { + if (/*isite2 => */ isite1 > X->Def.Nsite) { switch (X->Def.iCalcModel) { - case HubbardGC: - case KondoGC: - case Hubbard: - case Kondo: - is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; - is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; - num1 = 0; - ibit1_spin = (unsigned long int)myrank&is1_spin; - num1 += ibit1_spin / is1_spin; - num2 = 0; - ibit2_spin = (unsigned long int)myrank&is2_spin; - num2 += ibit2_spin / is2_spin; - break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ - - case SpinGC: - case Spin: - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - num1 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, isigma1); - num2 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is2_up, isigma2); - }/*if (X->Def.iFlgGeneralSpin == FALSE)*/ - else {//start:generalspin - num1 = BitCheckGeneral((unsigned long int) myrank, isite1, isigma1, - X->Def.SiteToBit, X->Def.Tpow); - num2 = BitCheckGeneral((unsigned long int) myrank, isite2, isigma2, - X->Def.SiteToBit, X->Def.Tpow); - } - break;/*case SpinGC, Spin:*/ + case HubbardGC: + case KondoGC: + case Hubbard: + case Kondo: - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; - }/*if (isite1 > X->Def.Nsite)*/ + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; - if (num1 * num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ -firstprivate(i_max, dtmp_V) private(j) + num1 = 0; + num2 = 0; + + ibit1_up = (unsigned long int)myrank&is1_up; + num1 += ibit1_up / is1_up; + ibit1_down = (unsigned long int)myrank&is1_down; + num1 += ibit1_down / is1_down; + + ibit2_up = (unsigned long int)myrank&is2_up; + num2 += ibit2_up / is2_up; + ibit2_down = (unsigned long int)myrank&is2_down; + num2 += ibit2_down / is2_down; + +#pragma omp parallel for default(none) shared(list_Diagonal) \ + firstprivate(i_max, dtmp_V, num1, num2) private(j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += num1 * num2*dtmp_V; + + break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ + + case Spin: + case SpinGC: +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; + list_Diagonal[j] += dtmp_V; } - } - dam_pr=SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; - return 0; + break;/*case Spin, SpinGC*/ - }/*if (isite1 > X->Def.Nsite)*/ - else if (isite2 > X->Def.Nsite) { + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; - switch (X->Def.iCalcModel) { + }/*switch (X->Def.iCalcModel)*/ - case HubbardGC: + return 0; - is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; - is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + }/*if (isite1 > X->Def.Nsite)*/ + else if (isite2 > X->Def.Nsite /* => isite1 */) { - num2 = 0; - ibit2_spin = (unsigned long int)myrank&is2_spin; - num2 += ibit2_spin / is2_spin; - if(num2 !=0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ - firstprivate(i_max, dtmp_V, is1_spin) private(num1, ibit1_spin, j) - for (j = 1; j <= i_max; j++) { - num1 = 0; - ibit1_spin = (j - 1) & is1_spin; - num1 += ibit1_spin / is1_spin; - tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1 * conj(tmp_v1[j]) * tmp_v1[j]; - } - } - break;/*case HubbardGC:*/ + switch (X->Def.iCalcModel) { + case HubbardGC: + case KondoGC: + case Hubbard: + case Kondo: + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; + num2 = 0; + ibit2_up = (unsigned long int)myrank&is2_up; + num2 += ibit2_up / is2_up; + ibit2_down = (unsigned long int)myrank&is2_down; + num2 += ibit2_down / is2_down; + break; - case KondoGC: - case Hubbard: - case Kondo: + case Spin: + case SpinGC: + break; - is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; - is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; + } - num2 = 0; - ibit2_spin = (unsigned long int)myrank&is2_spin; - num2 += ibit2_spin / is2_spin; - if(num2 !=0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1)\ - firstprivate(i_max, dtmp_V, is1_spin) private(num1, ibit1_spin, j) - for (j = 1; j <= i_max; j++) { - num1 = 0; - ibit1_spin = list_1[j] & is1_spin; - num1 += ibit1_spin / is1_spin; - tmp_v0[j] += dtmp_V *num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - } - break;/*case KondoGC, Hubbard, Kondo:*/ + switch (X->Def.iCalcModel) { - case SpinGC: + case HubbardGC: - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, num2, is1_up, is1_down) \ +private(num1, ibit1_up, ibit1_down, j) + for (j = 1; j <= i_max; j++) { + num1 = 0; + ibit1_up = (j - 1)&is1_up; + num1 += ibit1_up / is1_up; + ibit1_down = (j - 1)&is1_down; + num1 += ibit1_down / is1_down; - if(num2 !=0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ - firstprivate(i_max, dtmp_V, is1_up, isigma1, X) private(num1, j) - for (j = 1; j <= i_max; j++) { - num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); - tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - } - }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ - else {//start:generalspin - num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, - X->Def.SiteToBit, X->Def.Tpow); - if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ -firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - } - }/* if (X->Def.iFlgGeneralSpin == TRUE)*/ + list_Diagonal[j] += num1 * num2*dtmp_V; + } - break;/*case SpinGC:*/ + break;/*case HubbardGC*/ - case Spin: + case KondoGC: + case Hubbard: + case Kondo: - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - is2_up = X->Def.Tpow[isite2 - 1]; - num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up, is1_down, num2) \ +private(num1, ibit1_up, ibit1_down, j) + for (j = 1; j <= i_max; j++) { + num1 = 0; + ibit1_up = list_1[j] & is1_up; + num1 += ibit1_up / is1_up; + ibit1_down = list_1[j] & is1_down; + num1 += ibit1_down / is1_down; - if(num2 !=0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ -firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = X_Spin_CisAis(j, X, is1_up, isigma1); - tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - } - }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ - else /* if (X->Def.iFlgGeneralSpin == TRUE)*/{ - num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, \ - X->Def.SiteToBit, X->Def.Tpow); - if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1)\ -firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for (j = 1; j <= i_max; j++) { - num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - } - } /* if (X->Def.iFlgGeneralSpin == TRUE)*/ + list_Diagonal[j] += num1 * num2*dtmp_V; + } + break;/*case KondoGC, Hubbard, Kondo:*/ - break;/*case Spin:*/ + case Spin: + case SpinGC: +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V) + for (j = 1; j <= i_max; j++) { + list_Diagonal[j] += dtmp_V; + } + break;/* case Spin, SpinGC:*/ - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; }/*switch (X->Def.iCalcModel)*/ - dam_pr=SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; + return 0; - }/*else if (isite2 > X->Def.Nsite)*/ - switch (X->Def.iCalcModel){ + }/*else if (isite2 > X->Def.Nsite)*/ + else { + switch (X->Def.iCalcModel) { case HubbardGC: //list_1[j] -> j-1 - is1_spin = X->Def.Tpow[2*isite1-2+isigma1]; - is2_spin = X->Def.Tpow[2*isite2-2+isigma2]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) - for(j = 1;j <= i_max;j++){ - num1=0; - num2=0; - ibit1_spin=(j-1)&is1_spin; - num1+=ibit1_spin/is1_spin; - ibit2_spin=(j-1)&is2_spin; - num2+=ibit2_spin/is2_spin; - tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; +#pragma omp parallel for default(none) shared( list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1, ibit1_up, ibit1_down, num2, ibit2_up, ibit2_down) + for (j = 1; j <= i_max; j++) { + num1 = 0; + num2 = 0; + ibit1_up = (j - 1)&is1_up; + num1 += ibit1_up / is1_up; + ibit1_down = (j - 1)&is1_down; + num1 += ibit1_down / is1_down; + + ibit2_up = (j - 1)&is2_up; + num2 += ibit2_up / is2_up; + ibit2_down = (j - 1)&is2_down; + num2 += ibit2_down / is2_down; + + list_Diagonal[j] += num1 * num2*dtmp_V; } break; case KondoGC: case Hubbard: case Kondo: - is1_spin = X->Def.Tpow[2*isite1-2+isigma1]; - is2_spin = X->Def.Tpow[2*isite2-2+isigma2]; + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) - for(j = 1;j <= i_max;j++){ - num1=0; - num2=0; - ibit1_spin=list_1[j]&is1_spin; - num1+=ibit1_spin/is1_spin; - - ibit2_spin=list_1[j]&is2_spin; - num2+=ibit2_spin/is2_spin; - tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; - } - break; +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1, ibit1_up, ibit1_down, num2, ibit2_up, ibit2_down) + for (j = 1; j <= i_max; j++) { + num1 = 0; + num2 = 0; + ibit1_up = list_1[j] & is1_up; + num1 += ibit1_up / is1_up; + ibit1_down = list_1[j] & is1_down; + num1 += ibit1_down / is1_down; - case Spin: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; - is2_up = X->Def.Tpow[isite2-1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) - for(j = 1;j <= i_max; j++){ - num1=X_Spin_CisAis(j, X, is1_up, isigma1); - num2=X_Spin_CisAis(j, X, is2_up, isigma2); - tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; - } - } - else{ -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - num1=BitCheckGeneral (list_1[j], isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += dtmp_V *num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - } + ibit2_up = list_1[j] & is2_up; + num2 += ibit2_up / is2_up; + ibit2_down = list_1[j] & is2_down; + num2 += ibit2_down / is2_down; + list_Diagonal[j] += num1 * num2*dtmp_V; } break; + case Spin: case SpinGC: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; - is2_up = X->Def.Tpow[isite2-1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) - for(j = 1;j <= i_max; j++){ - num1=X_SpinGC_CisAis(j, X, is1_up, isigma1); - num2=X_SpinGC_CisAis(j, X, is2_up, isigma2); - tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; - } - } - else{//start:generalspin -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (j-1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - num1=BitCheckGeneral (j-1, isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += dtmp_V *num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - } +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V) + for (j = 1; j <= i_max; j++) { + list_Diagonal[j] += dtmp_V; } break; - default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; + } } - dam_pr=SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; + return 0; } - /** - * - * - * @brief Update the vector by the chemical potential \f$ \mu_{i \sigma_1} n_ {i \sigma_1} \f$ \n - * generated by the commutation relation in terms of the general two-body interaction, \n - * \f$ c_ {i \sigma_1} a_{j\sigma_2}c_ {j \sigma_2}a_ {i \sigma_1} = c_ {i \sigma_1}a_ {i \sigma_1}-c_ {i \sigma_1} a_ {i \sigma_1} c_ {j \sigma_2}a_{j\sigma_2}\f$ . - * (Using in Time Evolution mode). - * @param isite1 [in] a site number - * @param spin [in] a spin number - * @param dtmp_V [in] A value of coulombintra interaction \f$ \mu_{i \sigma_1} \f$ - * @param X [in] Define list to get dimension number - * @param tmp_v0 [in,out] Result vector - * @param tmp_v1 [in] Input produced vector + * @brief Calculate the components for Hund interaction, \f$ H_{ij}(n_ {i\uparrow}n_{j\uparrow}+ n_ {i\downarrow}n_{j\downarrow})\f$ + * @param isite1 [in] a site number \f$i \f$ + * @param isite2 [in] a site number \f$j \f$ + * @param dtmp_V [in] A value of Hund interaction \f$ H_{ij} \f$ + * @param X [in] Define list to get the operator information. * @retval -1 fail to calculate the diagonal component. * @retval 0 succeed to calculate the diagonal component. * - * @version 2.1 + * @version 0.1 + * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int SetDiagonalTEChemi( - long unsigned int isite1, - long unsigned int spin, - double dtmp_V, - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 -){ - long unsigned int is1_up; - long unsigned int num1; - long unsigned int isigma1 =spin; - long unsigned int is1,ibit1; +int SetDiagonalHund +( + long unsigned int isite1, + long unsigned int isite2, + double dtmp_V, + struct BindStruct *X +) { - long unsigned int j; - long unsigned int i_max=X->Check.idim_max; - double complex dam_pr=0; + long unsigned int is1_up, is1_down; + long unsigned int ibit1_up, ibit1_down; + long unsigned int num1_up, num1_down; + long unsigned int is2_up, is2_down; + long unsigned int ibit2_up, ibit2_down; + long unsigned int num2_up, num2_down; + long unsigned int is_up; + long unsigned int ibit; + long unsigned int j; + long unsigned int i_max = X->Check.idim_max; /* - When isite1 is in the inter process region + Force isite1 <= isite2 + */ + if (isite2 < isite1) { + j = isite2; + isite2 = isite1; + isite1 = j; + } + /* + When isite1 & site2 are in the inter process region */ - if (isite1 > X->Def.Nsite){ + if (/*isite2 >= */ isite1 > X->Def.Nsite) { switch (X->Def.iCalcModel) { - case HubbardGC: - case KondoGC: - case Hubbard: - case Kondo: + case HubbardGC: + case KondoGC: + case Hubbard: + case Kondo: - if (spin == 0) { - is1 = X->Def.Tpow[2 * isite1 - 2]; - } - else { - is1 = X->Def.Tpow[2 * isite1 - 1]; - } - ibit1 = (unsigned long int)myrank & is1; - num1 = ibit1 / is1; - break;/*case HubbardGC, case KondoGC, Hubbard, Kondo:*/ - - case SpinGC: - case Spin: - - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - num1 = (((unsigned long int)myrank& is1_up) / is1_up) ^ (1 - spin); - } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ - else /*if (X->Def.iFlgGeneralSpin == TRUE)*/ { - num1 = BitCheckGeneral((unsigned long int)myrank, - isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ - break;/*case SpinGC, Spin:*/ - - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; - } /*switch (X->Def.iCalcModel)*/ - if (num1 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ -firstprivate(i_max, dtmp_V) private(j) - for (j = 1; j <= i_max; j++){ - tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; - } - }/*if (num1 != 0)*/ - dam_pr=SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; - return 0; + num1_up = 0; + num1_down = 0; + num2_up = 0; + num2_down = 0; - }/*if (isite1 >= X->Def.Nsite*/ + ibit1_up = (unsigned long int)myrank &is1_up; + num1_up = ibit1_up / is1_up; + ibit1_down = (unsigned long int)myrank &is1_down; + num1_down = ibit1_down / is1_down; - switch (X->Def.iCalcModel){ - case HubbardGC: - if(spin==0){ - is1 = X->Def.Tpow[2*isite1-2]; - }else{ - is1 = X->Def.Tpow[2*isite1-1]; + ibit2_up = (unsigned long int)myrank &is2_up; + num2_up = ibit2_up / is2_up; + ibit2_down = (unsigned long int)myrank &is2_down; + num2_down = ibit2_down / is2_down; + +#pragma omp parallel for default(none) shared(list_Diagonal) \ + firstprivate(i_max, dtmp_V, num1_up, num1_down, num2_up, num2_down) private(j) + for (j = 1; j <= i_max; j++) + list_Diagonal[j] += dtmp_V * (num1_up*num2_up + num1_down * num2_down); + + break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ + + case SpinGC: + case Spin: + + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + is_up = is1_up + is2_up; + ibit = (unsigned long int)myrank & is_up; + if (ibit == 0 || ibit == is_up) { +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V) private(j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V; } + break;/*case SpinGC, Spin:*/ - #pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) - for(j = 1;j <= i_max;j++){ - ibit1 = (j-1)&is1; - num1 = ibit1/is1; - tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; + } + + return 0; + + }/*if (isite1 > X->Def.Nsite)*/ + else if (isite2 > X->Def.Nsite /* >= isite1 */) { + + switch (X->Def.iCalcModel) { + + case HubbardGC: + + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; + + num2_up = 0; + num2_down = 0; + + ibit2_up = (unsigned long int)myrank &is2_up; + num2_up = ibit2_up / is2_up; + ibit2_down = (unsigned long int)myrank &is2_down; + num2_down = ibit2_down / is2_down; + +#pragma omp parallel for default(none) shared( list_Diagonal) \ +firstprivate(i_max, dtmp_V, num2_up, num2_down, is1_up, is1_down) \ +private(num1_up, num1_down, ibit1_up, ibit1_down, j) + for (j = 1; j <= i_max; j++) { + num1_up = 0; + num1_down = 0; + + ibit1_up = (j - 1)&is1_up; + num1_up = ibit1_up / is1_up; + ibit1_down = (j - 1)&is1_down; + num1_down = ibit1_down / is1_down; + + list_Diagonal[j] += dtmp_V * (num1_up*num2_up + num1_down * num2_down); } - break; + break;/*case HubbardGC:*/ + case KondoGC: case Hubbard: case Kondo: - if(spin==0){ - is1 = X->Def.Tpow[2*isite1-2]; - }else{ - is1 = X->Def.Tpow[2*isite1-1]; - } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) - for(j = 1;j <= i_max;j++){ + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; - ibit1 = list_1[j]&is1; - num1 = ibit1/is1; - tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + num2_up = 0; + num2_down = 0; + + ibit2_up = (unsigned long int)myrank&is2_up; + num2_up = ibit2_up / is2_up; + ibit2_down = (unsigned long int)myrank&is2_down; + num2_down = ibit2_down / is2_down; + +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ +firstprivate(i_max, dtmp_V, num2_up, num2_down, is1_up, is1_down) \ +private(num1_up, num1_down, ibit1_up, ibit1_down, j) + for (j = 1; j <= i_max; j++) { + num1_up = 0; + num1_down = 0; + + ibit1_up = list_1[j] & is1_up; + num1_up = ibit1_up / is1_up; + ibit1_down = list_1[j] & is1_down; + num1_down = ibit1_down / is1_down; + + list_Diagonal[j] += dtmp_V * (num1_up*num2_up + num1_down * num2_down); } - break; + break;/*case KondoGC, Hubbard, Kondo:*/ case SpinGC: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) - for(j = 1;j <= i_max;j++){ - num1=(((j-1)& is1_up)/is1_up)^(1-spin); - tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + ibit2_up = (unsigned long int)myrank & is2_up; + + if (ibit2_up == is2_up) { +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + for (j = 1; j <= i_max; j++) { + ibit1_up = (j - 1) & is1_up; + if (ibit1_up == is1_up) { + list_Diagonal[j] += dtmp_V; + } } } - else{ -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (j-1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; + else if (ibit2_up == 0) { +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + for (j = 1; j <= i_max; j++) { + ibit1_up = (j - 1) & is1_up; + if (ibit1_up == 0) { + list_Diagonal[j] += dtmp_V; } } } - break; + break;/*case SpinGC:*/ case Spin: - if(X->Def.iFlgGeneralSpin==FALSE){ - is1_up = X->Def.Tpow[isite1-1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) - for(j = 1;j <= i_max;j++){ - num1=((list_1[j]& is1_up)/is1_up)^(1-spin); - tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + ibit2_up = (unsigned long int)myrank & is2_up; + + if (ibit2_up == is2_up) { +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + for (j = 1; j <= i_max; j++) { + ibit1_up = list_1[j] & is1_up; + if (ibit1_up == is1_up) { + list_Diagonal[j] += dtmp_V; + } } } - else{ -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) - for(j = 1;j <= i_max; j++){ - num1=BitCheckGeneral (list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if(num1 != 0){ - tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; - + else if (ibit2_up == 0) { +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up) private(j, ibit1_up) + for (j = 1; j <= i_max; j++) { + ibit1_up = list_1[j] & is1_up; + if (ibit1_up == 0) { + list_Diagonal[j] += dtmp_V; } } } + break;/*case Spin:*/ + + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; + + }/*switch (X->Def.iCalcModel)*/ + + return 0; + + }/*else if (isite2 > X->Def.Nsite)*/ + else { + switch (X->Def.iCalcModel) { + case HubbardGC: // list_1[j] -> j-1 + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; + +#pragma omp parallel for default(none) shared( list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1_up, num1_down, num2_up, num2_down, ibit1_up, ibit1_down, ibit2_up, ibit2_down) + for (j = 1; j <= i_max; j++) { + num1_up = 0; + num1_down = 0; + num2_up = 0; + num2_down = 0; + + ibit1_up = (j - 1)&is1_up; + num1_up = ibit1_up / is1_up; + ibit1_down = (j - 1)&is1_down; + num1_down = ibit1_down / is1_down; + + ibit2_up = (j - 1)&is2_up; + num2_up = ibit2_up / is2_up; + ibit2_down = (j - 1)&is2_down; + num2_down = ibit2_down / is2_down; + + list_Diagonal[j] += dtmp_V * (num1_up*num2_up + num1_down * num2_down); + } + break; + case KondoGC: + case Hubbard: + case Kondo: + is1_up = X->Def.Tpow[2 * isite1 - 2]; + is1_down = X->Def.Tpow[2 * isite1 - 1]; + is2_up = X->Def.Tpow[2 * isite2 - 2]; + is2_down = X->Def.Tpow[2 * isite2 - 1]; + +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is1_down, is2_up, is2_down) private(num1_up, num1_down, num2_up, num2_down, ibit1_up, ibit1_down, ibit2_up, ibit2_down) + for (j = 1; j <= i_max; j++) { + num1_up = 0; + num1_down = 0; + num2_up = 0; + num2_down = 0; + + ibit1_up = list_1[j] & is1_up; + num1_up = ibit1_up / is1_up; + ibit1_down = list_1[j] & is1_down; + num1_down = ibit1_down / is1_down; + + ibit2_up = list_1[j] & is2_up; + num2_up = ibit2_up / is2_up; + ibit2_down = list_1[j] & is2_down; + num2_down = ibit2_down / is2_down; + + list_Diagonal[j] += dtmp_V * (num1_up*num2_up + num1_down * num2_down); + } + break; + + case SpinGC: + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + is_up = is1_up + is2_up; +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, is_up) private(j, ibit) + for (j = 1; j <= i_max; j++) { + ibit = (j - 1) & is_up; + if (ibit == 0 || ibit == is_up) { + list_Diagonal[j] += dtmp_V; + } + } + break; + case Spin: + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + is_up = is1_up + is2_up; +#pragma omp parallel for default(none) shared(list_1, list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, is_up) private(j, ibit) + for (j = 1; j <= i_max; j++) { + ibit = list_1[j] & is_up; + if (ibit == 0 || ibit == is_up) { + list_Diagonal[j] += dtmp_V; + } + } break; default: fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; + } } - dam_pr=SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; } - /** - * - * @brief Update the vector by the general one-body diagonal interaction, \f$ \mu_{i\sigma_1} n_ {i\sigma_1}\f$.\n - * (Using in Time Evolution mode). + * @brief Calculate the components for general two-body diagonal interaction, \f$ H_{i\sigma_1 j\sigma_2} n_ {i\sigma_1}n_{j\sigma_2}\f$ * @param isite1 [in] a site number \f$i \f$ - * @param dtmp_V [in] A value of general one-body diagonal interaction \f$ \mu_{i\sigma_1} \f$ - * @param spin [in] a spin index at \f$i \f$ site. + * @param isite2 [in] a site number \f$j \f$ + * @param isigma1 [in] a spin index at \f$i \f$ site. + * @param isigma2 [in] a spin index at \f$j \f$ site. + * @param dtmp_V [in] A value of general two-body diagonal interaction \f$ H_{i\sigma_1 j\sigma_2} \f$ * @param X [in] Define list to get the operator information. - * @param tmp_v0 [in,out] Result vector - * @param tmp_v1 [in] Input produced vector * @retval -1 fail to calculate the diagonal component. * @retval 0 succeed to calculate the diagonal component. * - * @version 2.1 + * @version 0.1 + * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ - -int SetDiagonalTETransfer - ( - long unsigned int isite1, - double dtmp_V, - long unsigned int spin, - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 - ){ +int SetDiagonalInterAll +( + long unsigned int isite1, + long unsigned int isite2, + long unsigned int isigma1, + long unsigned int isigma2, + double dtmp_V, + struct BindStruct *X +) +{ + long unsigned int is1_spin; + long unsigned int is2_spin; long unsigned int is1_up; - long unsigned int ibit1_up; + long unsigned int is2_up; + + long unsigned int ibit1_spin; + long unsigned int ibit2_spin; + long unsigned int num1; - long unsigned int isigma1 =spin; - long unsigned int is1,ibit1; - double dam_pr=0.0; + long unsigned int num2; long unsigned int j; - long unsigned int i_max=X->Check.idim_max; + long unsigned int i_max = X->Check.idim_max; /* - When isite1 is in the inter process region + Forse isite1 <= isite2 + */ + if (isite2 < isite1) { + j = isite2; + isite2 = isite1; + isite1 = j; + j = isigma2; + isigma2 = isigma1; + isigma1 = j; + } + /* + When isite1 & site2 are in the inter process regino */ - if (isite1 > X->Def.Nsite){ + if (isite1 > X->Def.Nsite) { switch (X->Def.iCalcModel) { - case HubbardGC: - case KondoGC: - case Hubbard: - case Kondo: - if (spin == 0) { - is1 = X->Def.Tpow[2 * isite1 - 2]; + case HubbardGC: + case KondoGC: + case Hubbard: + case Kondo: + + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + + num1 = 0; + ibit1_spin = (unsigned long int)myrank&is1_spin; + num1 += ibit1_spin / is1_spin; + + num2 = 0; + ibit2_spin = (unsigned long int)myrank&is2_spin; + num2 += ibit2_spin / is2_spin; + +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, num2, num1) private(ibit1_spin, j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += num1 * num2*dtmp_V; + + break;/*case HubbardGC, KondoGC, Hubbard, Kondo:*/ + + case SpinGC: + case Spin: + + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + num1 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, isigma1); + num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num1, num2) private(j) + for (j = 1; j <= i_max; j++) { + list_Diagonal[j] += num1 * num2*dtmp_V; } - else { - is1 = X->Def.Tpow[2 * isite1 - 1]; + }/*if (X->Def.iFlgGeneralSpin == FALSE)*/ + else {//start:generalspin + num1 = BitCheckGeneral((unsigned long int)myrank, isite1, isigma1, + X->Def.SiteToBit, X->Def.Tpow); + num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, + X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0 && num2 != 0) { +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, num1, X) private(j) + for (j = 1; j <= i_max; j++) list_Diagonal[j] += dtmp_V * num1; } - ibit1 = (unsigned long int)myrank & is1; - num1 = ibit1 / is1; - break;/*case HubbardGC, case KondoGC, Hubbard, Kondo:*/ - - case SpinGC: - case Spin: - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; - num1 = (((unsigned long int)myrank& is1_up) / is1_up) ^ (1 - spin); - } /*if (X->Def.iFlgGeneralSpin == FALSE)*/ - else /*if (X->Def.iFlgGeneralSpin == TRUE)*/ { - num1 = BitCheckGeneral((unsigned long int)myrank, - isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ - break;/*case SpinGC, Spin:*/ - - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; + }/*if (X->Def.iFlgGeneralSpin == TRUE)*/ - } /*switch (X->Def.iCalcModel)*/ + break;/*case SpinGC, Spin:*/ - if(num1 !=0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ - firstprivate(i_max, dtmp_V) private(j) + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; + + }/*if (isite1 > X->Def.Nsite)*/ + + return 0; + + }/*if (isite1 > X->Def.Nsite)*/ + else if (isite2 > X->Def.Nsite) { + + switch (X->Def.iCalcModel) { + + case HubbardGC: + + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + + num2 = 0; + ibit2_spin = (unsigned long int)myrank&is2_spin; + num2 += ibit2_spin / is2_spin; + +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_spin, num2) private(num1, ibit1_spin, j) for (j = 1; j <= i_max; j++) { - tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; + num1 = 0; + ibit1_spin = (j - 1)&is1_spin; + num1 += ibit1_spin / is1_spin; + list_Diagonal[j] += num1 * num2*dtmp_V; } - } - }/*if (isite1 >= X->Def.Nsite*/ - else {//(isite1 < X->Def.Nsite) - switch (X->Def.iCalcModel) { - case HubbardGC: - if (spin == 0) { - is1 = X->Def.Tpow[2 * isite1 - 2]; - } else { - is1 = X->Def.Tpow[2 * isite1 - 1]; - } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ - firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) - for (j = 1; j <= i_max; j++) { - ibit1 = (j - 1) & is1; - num1 = ibit1 / is1; - tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; - } - break; - - case KondoGC: - case Hubbard: - case Kondo: - if (spin == 0) { - is1 = X->Def.Tpow[2 * isite1 - 2]; - } else { - is1 = X->Def.Tpow[2 * isite1 - 1]; - } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ - firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) + break;/*case HubbardGC:*/ + + case KondoGC: + case Hubbard: + case Kondo: + + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + + num2 = 0; + ibit2_spin = (unsigned long int)myrank&is2_spin; + num2 += ibit2_spin / is2_spin; + +#pragma omp parallel for default(none) shared(list_Diagonal, list_1) \ +firstprivate(i_max, dtmp_V, is1_spin, num2) private(num1, ibit1_spin, j) + for (j = 1; j <= i_max; j++) { + num1 = 0; + ibit1_spin = list_1[j] & is1_spin; + num1 += ibit1_spin / is1_spin; + list_Diagonal[j] += num1 * num2*dtmp_V; + } + break;/*case KondoGC, Hubbard, Kondo:*/ + + case SpinGC: + + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) for (j = 1; j <= i_max; j++) { - ibit1 = list_1[j] & is1; - num1 = ibit1 / is1; - tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; + num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); + list_Diagonal[j] += num1 * num2*dtmp_V; } - break; - - case SpinGC: - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ - firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) - for (j = 1; j <= i_max; j++) { - ibit1_up = (((j - 1) & is1_up) / is1_up) ^ (1 - spin); - tmp_v0[j] += dtmp_V * ibit1_up*tmp_v1[j]; - dam_pr += dtmp_V * ibit1_up*conj(tmp_v1[j]) * tmp_v1[j]; - } - } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) + }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ + else {//start:generalspin + num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, + X->Def.SiteToBit, X->Def.Tpow); + if (num2 != 0) { +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - if (num1 != 0) { - tmp_v0[j] += dtmp_V *tmp_v1[j]; - dam_pr += dtmp_V *conj(tmp_v1[j]) * tmp_v1[j]; - } + list_Diagonal[j] += dtmp_V * num1; } } - break; + }/* if (X->Def.iFlgGeneralSpin == TRUE)*/ - case Spin: - if (X->Def.iFlgGeneralSpin == FALSE) { - is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ - firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) - for (j = 1; j <= i_max; j++) { - ibit1_up = ((list_1[j] & is1_up) / is1_up) ^ (1 - spin); - tmp_v0[j] += dtmp_V * ibit1_up * tmp_v1[j]; - dam_pr += dtmp_V * ibit1_up * conj(tmp_v1[j]) * tmp_v1[j]; - } - } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ - firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) + break;/*case SpinGC:*/ + + case Spin: + + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; + num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + +#pragma omp parallel for default(none) shared(list_Diagonal) \ +firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = X_Spin_CisAis(j, X, is1_up, isigma1); + list_Diagonal[j] += num1 * num2*dtmp_V; + } + }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ + else /* if (X->Def.iFlgGeneralSpin == TRUE)*/ { + num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, \ + X->Def.SiteToBit, X->Def.Tpow); + if (num2 != 0) { +#pragma omp parallel for default(none) shared(list_Diagonal, list_1) \ +firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); - tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1 * conj(tmp_v1[j]) * tmp_v1[j]; + list_Diagonal[j] += dtmp_V * num1; } } - break; + } /* if (X->Def.iFlgGeneralSpin == TRUE)*/ - default: - fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); - return -1; + break;/*case Spin:*/ + + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; + + }/*switch (X->Def.iCalcModel)*/ + + return 0; + + }/*else if (isite2 > X->Def.Nsite)*/ + + switch (X->Def.iCalcModel) { + case HubbardGC: //list_1[j] -> j-1 + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) + for (j = 1; j <= i_max; j++) { + num1 = 0; + num2 = 0; + ibit1_spin = (j - 1)&is1_spin; + num1 += ibit1_spin / is1_spin; + ibit2_spin = (j - 1)&is2_spin; + num2 += ibit2_spin / is2_spin; + list_Diagonal[j] += num1 * num2*dtmp_V; + } + break; + case KondoGC: + case Hubbard: + case Kondo: + is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; + is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; + +#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) + for (j = 1; j <= i_max; j++) { + num1 = 0; + num2 = 0; + ibit1_spin = list_1[j] & is1_spin; + num1 += ibit1_spin / is1_spin; + + ibit2_spin = list_1[j] & is2_spin; + num2 += ibit2_spin / is2_spin; + list_Diagonal[j] += num1 * num2*dtmp_V; } + break; + + case Spin: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) + for (j = 1; j <= i_max; j++) { + num1 = X_Spin_CisAis(j, X, is1_up, isigma1); + num2 = X_Spin_CisAis(j, X, is2_up, isigma2); + list_Diagonal[j] += num1 * num2*dtmp_V; + } + } + else { +#pragma omp parallel for default(none) shared(list_Diagonal, list_1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + num1 = BitCheckGeneral(list_1[j], isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); + list_Diagonal[j] += dtmp_V * num1; + } + } + + } + break; + + case SpinGC: + if (X->Def.iFlgGeneralSpin == FALSE) { + is1_up = X->Def.Tpow[isite1 - 1]; + is2_up = X->Def.Tpow[isite2 - 1]; +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) + for (j = 1; j <= i_max; j++) { + num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); + num2 = X_SpinGC_CisAis(j, X, is2_up, isigma2); + list_Diagonal[j] += num1 * num2*dtmp_V; + } + } + else {//start:generalspin +#pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) + for (j = 1; j <= i_max; j++) { + num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); + if (num1 != 0) { + num1 = BitCheckGeneral(j - 1, isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); + list_Diagonal[j] += dtmp_V * num1; + } + } + } + break; + + default: + fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); + return -1; } - dam_pr=SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; } diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index d40fd402c..2ea58db7d 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -14,7 +14,6 @@ /* You should have received a copy of the GNU General Public License */ /* along with this program. If not, see . */ -#include "mltplyCommon.h" #include "mltply.h" #include "FileIO.h" #include "bitcalc.h" @@ -25,6 +24,7 @@ #include "mltplyMPIHubbard.h" #include "mltplyMPISpinCore.h" #include "common/setmemory.h" +#include "mltplyCommon.h" /** * @file expec_cisajs.c @@ -258,7 +258,7 @@ int expec_cisajs_Hubbard( long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; double complex dam_pr = 0; long int i_max; - int num1; + int num1, one = 1; long int ibit; long unsigned int is; double complex tmp_OneGreen = 1.0; @@ -516,10 +516,10 @@ int expec_cisajs_SpinGC( ) { int info = 0; if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajs_SpinGCHalf(X, nstate, Xvec, vec, _fp); + info = expec_cisajs_SpinGCHalf(X, nstate, Xvec, vec, prod); } else { - info = expec_cisajs_SpinGCGeneral(X, nstate, Xvec, vec, _fp); + info = expec_cisajs_SpinGCGeneral(X, nstate, Xvec, vec, prod); } return info; } diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index e34fd701c..54ad77301 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -745,8 +745,8 @@ int expec_cisajscktalt_SpinGeneral( } else if (org_isite3 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal - dam_pr=X_child_CisAisCjuAju_GeneralSpin_MPIsingle( - org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec); + X_child_CisAisCjuAju_GeneralSpin_MPIsingle( + org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, tmp_V, X, nstate, Xvec, vec); } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index 76d907913..dd5eecab5 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -115,7 +115,7 @@ int expec_energy_flct( X->Phys.num[istate] = X->Def.NsiteMPI; X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; X->Phys.Sz[istate] = 0.5 * (double)X->Def.Total2SzMPI; - X->Phys.Sz2[istate] = X->Phys.Sz * X->Phys.Sz; + X->Phys.Sz2[istate] = X->Phys.Sz[istate] * X->Phys.Sz[istate]; } break; default: @@ -140,7 +140,7 @@ int expec_energy_flct( nCalcExpec = 5302; } StartTimer(nCalcExpec); - mltply(X, 1, nstate, tmp_v0, tmp_v1); // v0+=H*v1 + mltply(X, nstate, tmp_v0, tmp_v1); // v0+=H*v1 StopTimer(nCalcExpec); /* switch -> SpinGCBoost */ diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index bcc565280..94199adef 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -56,7 +56,8 @@ int expec_totalspin switch (X->Def.iCalcModel) { case Spin: totalspin_Spin(X, nstate, vec); - X->Phys.Sz = X->Def.Total2SzMPI / 2.; + for (istate = 0; istate < nstate; istate++) + X->Phys.Sz[istate] = X->Def.Total2SzMPI / 2.; break; case SpinGC: totalspin_SpinGC(X, nstate, vec); @@ -313,7 +314,7 @@ void totalspin_Spin( } } else {//off diagonal - spn += X_child_general_int_spin_TotalS_MPIdouble(isite1 - 1, isite2 - 1, X, nstate, vec, vec); + //debug spn += X_child_general_int_spin_TotalS_MPIdouble(isite1 - 1, isite2 - 1, X, nstate, vec, vec); } #endif } @@ -347,10 +348,10 @@ shared(list_1, vec) X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; } if (isite1 < isite2) { - spn += X_child_general_int_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec); + //debug spn += X_child_general_int_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec); } else { - spn += conj(X_child_general_int_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec)); + //debug spn += conj(X_child_general_int_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec)); } #endif }//isite1 > Nsite || isite2 > Nsite @@ -544,8 +545,8 @@ void totalspin_SpinGC( } }//isite1 = isite2 else {//off diagonal - spn += X_GC_child_CisAitCiuAiv_spin_MPIdouble( - isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec) / 2.0; + //debug spn += X_GC_child_CisAitCiuAiv_spin_MPIdouble( + //debug isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec) / 2.0; } } else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { @@ -575,10 +576,10 @@ private(ibit1_up, num1_up, num1_down, spn_z2, list_1_j) shared(vec) X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z2 / 4.0; } if (isite1 < isite2) { - spn += X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec) / 2.0; + //debug spn += X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec) / 2.0; } else { - spn += conj(X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec)) / 2.0; + //debug spn += conj(X_GC_child_CisAitCiuAiv_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec)) / 2.0; } } else { @@ -670,7 +671,7 @@ shared(vec) ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 - 1, &off_2, X->Def.SiteToBit, X->Def.Tpow); if (ibit_tmp != 0) { for (istate = 0; istate < nstate; istate++) - X->Phys.s2[istate] += conj(vec[j][istate])*vec[off_2 + 1] + X->Phys.s2[istate] += conj(vec[j][istate])*vec[off_2 + 1][istate] * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 + 1)) * sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 - 1)) / 2.0; } @@ -680,7 +681,7 @@ shared(vec) ibit_tmp = GetOffCompGeneralSpin(off, isite1, sigma_1, sigma_1 + 1, &off_2, X->Def.SiteToBit, X->Def.Tpow); if (ibit_tmp != 0) { for (istate = 0; istate < nstate; istate++) - X->Phys.s2[istate] += conj(vec[j][istate])*vec[off_2 + 1] + X->Phys.s2[istate] += conj(vec[j][istate])*vec[off_2 + 1][istate] * sqrt(S2*(S2 + 1) - spn_z2 * (spn_z2 - 1.0)) * sqrt(S1*(S1 + 1) - spn_z1 * (spn_z1 + 1)) / 2.0; } diff --git a/src/include/CalcByFullDiag.h b/src/include/CalcByFullDiag.h index 738dbdcd8..378de0bf4 100644 --- a/src/include/CalcByFullDiag.h +++ b/src/include/CalcByFullDiag.h @@ -15,7 +15,6 @@ /* along with this program. If not, see . */ #pragma once #include "Common.h" -#include "makeHam.h" #include "lapack_diag.h" #include "phys.h" #include "output.h" diff --git a/src/include/FirstMultiply.h b/src/include/FirstMultiply.h index 72e474108..61432bb4a 100644 --- a/src/include/FirstMultiply.h +++ b/src/include/FirstMultiply.h @@ -16,6 +16,5 @@ #pragma once #include "Common.h" int FirstMultiply( - int rand_i, struct BindStruct *X ); diff --git a/src/include/diagonalcalc.h b/src/include/diagonalcalc.h index d4244f415..a585f5b4b 100644 --- a/src/include/diagonalcalc.h +++ b/src/include/diagonalcalc.h @@ -65,7 +65,8 @@ int SetDiagonalInterAll int diagonalcalcForTE( const int _istep, struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 + int nstate, + double complex *tmp_v0, + double complex *tmp_v1 ); diff --git a/src/include/expec_totalspin.h b/src/include/expec_totalspin.h index eb7c47711..d7ae2d79a 100644 --- a/src/include/expec_totalspin.h +++ b/src/include/expec_totalspin.h @@ -16,23 +16,4 @@ #pragma once #include "Common.h" -int expec_totalspin -( - struct BindStruct *X, - double complex *vec - ); - -int expec_totalSz -( - struct BindStruct *X, - double complex *vec - ); - -void totalspin_Hubbard(struct BindStruct *X,double complex *vec); -void totalspin_HubbardGC(struct BindStruct *X,double complex *vec); -void totalspin_Spin(struct BindStruct *X,double complex *vec); -void totalspin_SpinGC(struct BindStruct *X,double complex *vec); - - -void totalSz_HubbardGC(struct BindStruct *X,double complex *vec); -void totalSz_SpinGC(struct BindStruct *X,double complex *vec); +int expec_totalspin(struct BindStruct *X, int nstate, double complex **vec); diff --git a/src/include/mltplyHubbardCore.h b/src/include/mltplyHubbardCore.h index 752231bed..d5608e1b2 100644 --- a/src/include/mltplyHubbardCore.h +++ b/src/include/mltplyHubbardCore.h @@ -179,7 +179,8 @@ void GC_child_CisAjtCkuAlv_element void GC_CisAis ( long unsigned int j, - double complex *tmp_v0, + int nstate, + double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, @@ -229,7 +230,8 @@ int X_GC_CisAjt void CisAjt ( long unsigned int j, - int nstate, double complex *tmp_v0, + int nstate, + double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, @@ -243,7 +245,8 @@ void CisAjt void GC_CisAjt ( long unsigned int j, - int nstate, double complex *tmp_v0, + int nstate, + double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X, long unsigned int is1_spin, @@ -340,7 +343,7 @@ int X_Cis -void X_Ajt +int X_Ajt ( long unsigned int j, long unsigned int is1_spin, diff --git a/src/include/mltplySpinCore.h b/src/include/mltplySpinCore.h index 0e621f4a4..e17350266 100644 --- a/src/include/mltplySpinCore.h +++ b/src/include/mltplySpinCore.h @@ -22,8 +22,9 @@ void child_exchange_spin_element ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); @@ -31,18 +32,20 @@ void child_exchange_spin_element void GC_child_pairlift_spin_element ( long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1, struct BindStruct *X, long unsigned int *tmp_off ); void GC_child_exchange_spin_element ( - long unsigned int j, - double complex *tmp_v0, - double complex *tmp_v1, - struct BindStruct *X, + long unsigned int j, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1, + struct BindStruct *X, long unsigned int *tmp_off ); diff --git a/src/include/wrapperMPI.h b/src/include/wrapperMPI.h index 371a1f659..9bea50e78 100644 --- a/src/include/wrapperMPI.h +++ b/src/include/wrapperMPI.h @@ -32,8 +32,8 @@ unsigned long int MaxMPI_li(unsigned long int idim); double MaxMPI_d(double dvalue); double complex SumMPI_dc(double complex norm); double SumMPI_d(double norm); -double SumMPI_dv(int nnorm, double *norm); -double SumMPI_cv(int nnorm, double complex *norm); +void SumMPI_dv(int nnorm, double *norm); +void SumMPI_cv(int nnorm, double complex *norm); unsigned long int SumMPI_li(unsigned long int idim); int SumMPI_i(int idim); unsigned long int BcastMPI_li(int root, unsigned long int idim); diff --git a/src/input.c b/src/input.c index b4c1eded9..a0d551ad9 100644 --- a/src/input.c +++ b/src/input.c @@ -49,8 +49,8 @@ int inputHam(struct BindStruct *X){ fgetsMPI(ctmp2, sizeof(ctmp2) / sizeof(char), fp); sscanf(ctmp2, "%ld %ld %lf %lf\n", &ham_i, &ham_j, &dHam_re, &dHam_im); - Ham[ham_i][ham_j]=dHam_re+I*dHam_im; - Ham[ham_j][ham_i]=conj(Ham[ham_i][ham_j]); + v0[ham_i][ham_j]=dHam_re+I*dHam_im; + v0[ham_j][ham_i]=conj(v0[ham_i][ham_j]); } fclose(fp); return 0; diff --git a/src/makefile_src b/src/makefile_src deleted file mode 100644 index 851f77e9e..000000000 --- a/src/makefile_src +++ /dev/null @@ -1,222 +0,0 @@ -include make.sys - -MTFLAGS = -DDSFMT_MEXP=19937 - -OBJS = \ -CG_EigenVector.o \ -CalcByFullDiag.o \ -CalcByLOBPCG.o \ -CalcByLanczos.o \ -CalcByTPQ.o \ -CalcSpectrum.o \ -CalcSpectrumByBiCG.o \ -CalcSpectrumByFullDiag.o \ -CalcSpectrumByLanczos.o \ -CalcSpectrumByTPQ.o \ -CheckMPI.o \ -ErrorMessage.o \ -FileIO.o \ -FirstMultiply.o \ -HPhiMain.o \ -HPhiTrans.o \ -input.o \ -Lanczos_EigenValue.o \ -Lanczos_EigenVector.o \ -LogMessage.o \ -Multiply.o \ -PairEx.o \ -PairExHubbard.o \ -PairExSpin.o \ -PowerLanczos.o \ -ProgressMessage.o \ -SingleEx.o \ -SingleExHubbard.o \ -bisec.o \ -bitcalc.o \ -check.o \ -dSFMT.o \ -diagonalcalc.o \ -expec_cisajs.o \ -expec_cisajscktaltdc.o \ -expec_energy_flct.o \ -expec_totalspin.o \ -global.o \ -lapack_diag.o \ -log.o \ -makeHam.o \ -matrixlapack.o \ -mltply.o \ -mltplyHubbard.o\ -mltplySpinCore.o\ -mltplyHubbardCore.o\ -mltplyMPIHubbard.o \ -mltplyMPIHubbardCore.o \ -mltplyMPISpin.o \ -mltplyMPISpinCore.o \ -mltplyMPIBoost.o \ -mltplySpin.o \ -output.o \ -output_list.o \ -phys.o \ -readdef.o \ -splash.o \ -sz.o \ -time.o \ -vec12.o \ -wrapperMPI.o \ -xsetmem.o \ -StdFace/libStdFace.a \ -komega/libkomega.a - -HEADERS= \ -include/CG_EigenVector.h \ -include/CalcByFullDiag.h \ -include/CalcByLanczos.h \ -include/CalcByTPQ.h \ -include/CalcSpectrum.h \ -include/CalcSpectrumByFullDiag.h \ -include/CalcSpectrumByLanczos.h \ -include/CalcSpectrumByTPQ.h \ -include/CalcTime.h\ -include/CheckMPI.h \ -include/Common.h \ -include/DefCommon.h \ -include/ErrorMessage.h \ -include/FileIO.h \ -include/FirstMultiply.h \ -include/HPhiMain.h \ -include/HPhiTrans.h \ -include/Lanczos_EigenValue.h \ -include/Lanczos_EigenVector.h \ -include/LogMessage.h \ -include/Multiply.h \ -include/PairEx.h \ -include/PairExSpin.h \ -include/PairExHubbard.h \ -include/PowerLanczos.h \ -include/ProgressMessage.h \ -include/SingleEx.h \ -include/SingleExHubbard.h \ -include/StdFace_main.h \ -include/bisec.h \ -include/bitcalc.h \ -include/check.h \ -include/dSFMT-params.h \ -include/dSFMT-params19937.h \ -include/dSFMT.h \ -include/defmodelBoost.h \ -include/diagonalcalc.h \ -include/expec_cisajs.h \ -include/expec_cisajscktaltdc.h \ -include/expec_totalspin.h \ -include/expec_energy_flct.h \ -include/global.h \ -include/input.h \ -include/lapack_diag.h \ -include/log.h \ -include/makeHam.h \ -include/matrixlapack.h \ -include/mfmemory.h \ -include/mltply.h \ -include/mltplyHubbard.h \ -include/mltplySpin.h \ -include/mltplyHubbardCore.h \ -include/mltplySpinCore.h \ -include/mltplyMPIBoost.h \ -include/mltplyMPIHubbard.h \ -include/mltplyMPIHubbardCore.h \ -include/mltplyMPISpin.h \ -include/mltplyMPISpinCore.h \ -include/output.h \ -include/output_list.h \ -include/phys.h \ -include/readdef.h \ -include/splash.h \ -include/struct.h \ -include/sz.h \ -include/vec12.h \ -include/version_major.h \ -include/version_miner.h \ -include/version_patch.h \ -include/wrapperMPI.h \ -include/xsetmem.h \ -include/xsetmem_def.h \ -include/xsetmem_large.h \ -komega/komega.h - -all: - cd StdFace; make -f makefile_StdFace - cd komega; make -f makefile_komega - make -f makefile_src HPhi - -SUFFIXES: .o .c - -.c.o: - $(CC) $(CFLAGS) $(MTFLAGS) -I ./include -c $< - -HPhi: $(OBJS) - $(CC) -o $@ -g $(OBJS) StdFace/libStdFace.a komega/libkomega.a $(LIBS) - -clean: - cd StdFace; make -f makefile_StdFace clean - cd komega; make -f makefile_komega clean - rm -f HPhi *.o *.a - -CG_EigenVector.o:$(HEADERS) -CalcByFullDiag.o:$(HEADERS) -CalcByLOBPCG.o:$(HEADERS) -CalcByLanczos.o:$(HEADERS) -CalcByTPQ.o:$(HEADERS) -CalcSpectrum.o:$(HEADERS) -CalcSpectrumByBiCG.o:$(HEADERS) -CalcSpectrumByFullDiag.o:$(HEADERS) -CalcSpectrumByLanczos.o:$(HEADERS) -CalcSpectrumByTPQ.o:$(HEADERS) -CheckMPI.o:$(HEADERS) -ErrorMessage.o:$(HEADERS) -FileIO.o:$(HEADERS) -FirstMultiply.o:$(HEADERS) -HPhiMain.o:$(HEADERS) -HPhiTrans.o:$(HEADERS) -Lanczos_EigenValue.o:$(HEADERS) -Lanczos_EigenVector.o:$(HEADERS) -LogMessage.o:$(HEADERS) -Multiply.o:$(HEADERS) -PairEx.o:$(HEADERS) -PairExHubbard.o:$(HEADERS) -PairExSpin.o:$(HEADERS) -PowerLanczos.o:$(HEADERS) -ProgressMessage.o:$(HEADERS) -SingleEx.o:$(HEADERS) -SingleExHubbard.o:$(HEADERS) -bisec.o:$(HEADERS) -bitcalc.o:$(HEADERS) -check.o:$(HEADERS) -dSFMT.o:$(HEADERS) -diagonalcalc.o:$(HEADERS) -expec_cisajs.o:$(HEADERS) -expec_cisajscktaltdc.o:$(HEADERS) -expec_energy_flct.o:$(HEADERS) -expec_totalspin.o:$(HEADERS) -global.o:$(HEADERS) -lapack_diag.o:$(HEADERS) -log.o:$(HEADERS) -makeHam.o:$(HEADERS) -matrixlapack.o:$(HEADERS) -mltply.o:$(HEADERS) -mltplyHubbard.o:$(HEADERS) -mltplyHubbardCore.o:$(HEADERS) -mltplySpinCore.o:$(HEADERS) -mltplyMPI.o:$(HEADERS) -mltplyMPIBoost.o:$(HEADERS) -mltplySpin.o:$(HEADERS) -output.o:$(HEADERS) -output_list.o:$(HEADERS) -phys.o:$(HEADERS) -readdef.o:$(HEADERS) -splash.o:$(HEADERS) -sz.o:$(HEADERS) -time.o:$(HEADERS) -vec12.o:$(HEADERS) -wrapperMPI.o:$(HEADERS) -xsetmem.o:$(HEADERS) diff --git a/src/mltply.c b/src/mltply.c index 39672c28a..0bd602886 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -95,10 +95,10 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp StartTimer(100); #pragma omp parallel for default(none) firstprivate(i_max) shared(tmp_v0, tmp_v1, list_Diagonal) for (j = 1; j <= i_max; j++) { - zaxpy_(&nstate, &list_Diagonal[j], &tmp_v1[j][0], &one, &tmp_v0[tmp_off][0], &one); + zaxpy_(&nstate, &list_Diagonal[j], &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); } StopTimer(100); - if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, nstate, tmp_v0, tmp_v1); + if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, nstate, &tmp_v0[0][0], &tmp_v1[0][0]); switch (X->Def.iCalcModel) { case HubbardGC: diff --git a/src/mltplyHubbard.c b/src/mltplyHubbard.c index 6d39eb146..26612511c 100644 --- a/src/mltplyHubbard.c +++ b/src/mltplyHubbard.c @@ -576,7 +576,7 @@ void child_general_hopp( #pragma omp parallel for default(none) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans) * trans; + CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans); return; }/*double complex child_general_hopp*/ /** @@ -603,13 +603,13 @@ void GC_child_general_hopp( #pragma omp parallel for default(none) \ private(j) firstprivate(i_max,X,isite1, trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - GC_CisAis(j, nstate, tmp_v0, tmp_v1, X, isite1, trans) * trans; + GC_CisAis(j, nstate, tmp_v0, tmp_v1, X, isite1, trans); }/*if (isite1 == isite2)*/ else { #pragma omp parallel for default(none) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j,tmp_off) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) - GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off) * trans; + GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off); } return; }/*double complex GC_child_general_hopp*/ diff --git a/src/mltplyHubbardCore.c b/src/mltplyHubbardCore.c index 269d5dc22..9429780e3 100644 --- a/src/mltplyHubbardCore.c +++ b/src/mltplyHubbardCore.c @@ -231,7 +231,8 @@ int child_exchange_GetInfo( */ void GC_CisAis( long unsigned int j,//!<[in] Index of element of wavefunction - int nstate, double complex **tmp_v0,//!<[inout] Result vector + int nstate, + double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of @f$(i \sigma)@f$ @@ -363,7 +364,7 @@ void GC_CisAjt( zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } else { - return 0; + return; } }/*double complex GC_CisAjt*/ /** @@ -475,7 +476,7 @@ void child_exchange_element( iexchg = list_1[j] - (is1_down + is2_up); iexchg += (is1_up + is2_down); if(GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off)!=TRUE){ - return 0; + return; } *tmp_off = off; dmv = tmp_J; @@ -485,7 +486,7 @@ void child_exchange_element( iexchg = list_1[j] - (is1_up + is2_down); iexchg += (is1_down + is2_up); if(GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off)!=TRUE){ - return 0; + return; } *tmp_off = off; dmv = tmp_J; @@ -529,7 +530,7 @@ void child_pairhopp_element( iexchg += (is1_up + is1_down); if(GetOffComp(list_2_1, list_2_2, iexchg, irght, ilft, ihfbit, &off)!=TRUE){ - return 0; + return; } *tmp_off = off; dmv = tmp_J; @@ -922,7 +923,7 @@ void GC_Cis( zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } else { - return 0; + return; } }/*double complex GC_Cis*/ /** @@ -968,7 +969,7 @@ void GC_Ajt( zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[list_1_off + 1][0], &one); } else { - return 0; + return; } }/*double complex GC_Ajt*/ /** @@ -1033,7 +1034,7 @@ term of canonical Hubbard system @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -void X_Ajt( +int X_Ajt( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int is1_spin,//!<[in] Bit mask long unsigned int *tmp_off,//!<[out] Index of final wavefunction diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index f1cd0fe5e..fa6826a84 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -79,7 +79,7 @@ void X_GC_child_general_hopp_MPIdouble( else return; idim_max_buf = SendRecv_i(origin, X->Check.idim_max); - SendRecv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); zaxpy_long(X->Check.idim_max*nstate, trans, &v1buf[1][0], &tmp_v0[1][0]); }/*void GC_child_general_hopp_MPIdouble*/ @@ -291,7 +291,7 @@ void X_child_general_hopp_MPIdouble( idim_max_buf = SendRecv_i(origin, X->Check.idim_max); SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); - SendRecv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) private(j, dmv, Fsgn, ioff) \ firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) @@ -352,7 +352,7 @@ void X_child_general_hopp_MPIsingle( idim_max_buf = SendRecv_i(origin, X->Check.idim_max); SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); - SendRecv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); + SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE */ diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index d27700f1c..32e221011 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -1126,7 +1126,8 @@ void X_Cis_MPI( int org_isite,//!<[in] Site i unsigned int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant - int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + int nstate, + double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[inout] Initial wavefunction double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max @@ -1169,7 +1170,7 @@ void X_Cis_MPI( else if (state2 == 0) { trans = (double)Fsgn * tmp_trans; } - else retur; + else return; #pragma omp parallel for default(none) private(j) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index 971b4d45f..57d3c2b1f 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -109,7 +109,7 @@ void X_child_general_int_spin_TotalS_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ){ - int mask1, mask2, num1_up, num2_up, ierr, origin; + int mask1, mask2, num1_up, num2_up, ierr, origin, one = 1; unsigned long int idim_max_buf, j, ioff, ibit_tmp; double complex dmv; @@ -132,7 +132,7 @@ void X_child_general_int_spin_TotalS_MPIdouble( for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - dmv = 0.5 * v1buf[j]; + zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ return; }/*double complex X_child_general_int_spin_MPIdouble*/ diff --git a/src/output.c b/src/output.c index c80e65109..0baaf9f7f 100644 --- a/src/output.c +++ b/src/output.c @@ -79,10 +79,10 @@ int outputHam(struct BindStruct *X){ FILE *fp; char sdt[D_FileNameMax]; -#pragma omp parallel for default(none) reduction(+:ihermite) firstprivate(imax) private(i, j) shared(Ham) +#pragma omp parallel for default(none) reduction(+:ihermite) firstprivate(imax) private(i, j) shared(v0) for (i=1; i<=imax; i++){ for (j=1; j<=i; j++){ - if(cabs(Ham[i][j])>1.0e-13){ + if(cabs(v0[i][j])>1.0e-13){ ihermite += 1; } } @@ -97,8 +97,8 @@ int outputHam(struct BindStruct *X){ fprintf(fp, "%ld %ld %ld \n", imax, imax, ihermite); for (i=1; i<=imax; i++){ for (j=1; j<=i; j++){ - if(cabs(Ham[i][j])>1.0e-13){ - fprintf(fp, "%ld %ld %lf %lf\n",i,j,creal(Ham[i][j]),cimag(Ham[i][j])); + if(cabs(v0[i][j])>1.0e-13){ + fprintf(fp, "%ld %ld %lf %lf\n",i,j,creal(v0[i][j]),cimag(v0[i][j])); } } } diff --git a/src/phys.c b/src/phys.c index 89c0cf049..26545db5e 100644 --- a/src/phys.c +++ b/src/phys.c @@ -123,7 +123,7 @@ void phys(struct BindStruct *X, //!<[inout] } #else if (X->Def.iCalcType == FullDiag) { - if (expec_totalspin(X, v1) != 0) { + if (expec_totalspin(X, neig, v1) != 0) { fprintf(stderr, "Error: calc TotalSpin.\n"); exitMPI(-1); } diff --git a/src/wrapperMPI.c b/src/wrapperMPI.c index b0163245f..814d1810e 100644 --- a/src/wrapperMPI.c +++ b/src/wrapperMPI.c @@ -241,7 +241,7 @@ void SumMPI_dv( ) { #ifdef MPI int ierr; - ierr = MPI_Allreduce(MPI_IN_PLACE, &norm, &nnorm, + ierr = MPI_Allreduce(MPI_IN_PLACE, norm, nnorm, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD); if (ierr != 0) exitMPI(-1); #endif @@ -257,7 +257,7 @@ void SumMPI_cv( ) { #ifdef MPI int ierr; - ierr = MPI_Allreduce(MPI_IN_PLACE, &norm, &nnorm, + ierr = MPI_Allreduce(MPI_IN_PLACE, norm, nnorm, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); if (ierr != 0) exitMPI(-1); #endif @@ -354,7 +354,6 @@ void NormMPI_dv( int istate; for (istate = 0; istate < nstate; istate++) dnorm[istate] = 0.0; -#pragma omp parallel for default(none) private(i) firstprivate(myrank) shared(_v1, idim) reduction(+: dnorm) for (idim = 1; idim <= ndim; idim++) { for (istate = 0; istate < nstate; istate++) { dnorm[istate] += conj(_v1[idim][istate])*_v1[idim][istate]; @@ -398,7 +397,6 @@ void MultiVecProdMPI( int istate; for (istate = 0; istate < nstate; istate++) prod[istate] = 0.0; -#pragma omp parallel for default(none) shared(v1,v2,ndim) private(idim) reduction(+: prod) for (idim = 1; idim <= ndim; idim++) { for (istate = 0; istate < nstate; istate++) { prod[istate] += conj(v1[idim][istate])*v2[idim][istate]; diff --git a/tool/makefile_tool b/tool/makefile_tool deleted file mode 100644 index 5e2da2d45..000000000 --- a/tool/makefile_tool +++ /dev/null @@ -1,22 +0,0 @@ -include ../src/make.sys - -.SUFFIXES : -.SUFFIXES : .o .F90 -.SUFFIXES : .o .c - -all:greenr2k cTPQ - -greenr2k:greenr2k.o key2lower.o - $(F90) greenr2k.o key2lower.o $(LIBS) -o $@ - -cTPQ:cTPQ.o - $(F90) cTPQ.o $(LIBS) -o $@ - -.F90.o: - $(F90) -c $< $(FFLAGS) - -.c.o: - $(CC) $(CFLAGS) -c $< - -clean: - rm -f *.o *.mod greenr2k cTPQ From d84960de89268344163f0db303a7b1641ac43a37 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Fri, 8 Mar 2019 00:55:07 +0900 Subject: [PATCH 09/50] Backup --- src/CalcByTEM.c | 12 +- src/CalcByTPQ.c | 14 +- src/CalcSpectrum.c | 74 +++--- src/CalcSpectrumByBiCG.c | 60 ++--- src/HPhiTrans.c | 17 +- src/PairExHubbard.c | 12 +- src/PairExSpin.c | 40 +-- src/SingleExHubbard.c | 12 +- src/bitcalc.c | 1 - src/check.c | 1 - src/common/setmemory.c | 2 +- src/expec_cisajs.c | 356 +++++++++++++-------------- src/expec_cisajscktaltdc.c | 378 ++++++++++++++--------------- src/expec_energy_flct.c | 299 ++++++++++++----------- src/expec_totalspin.c | 90 +++---- src/include/CalcSpectrum.h | 7 - src/include/mltplyMPIHubbardCore.h | 4 +- src/mltplyMPIHubbard.c | 8 +- src/mltplyMPISpin.c | 6 +- src/mltplyMPISpinCore.c | 6 +- 20 files changed, 686 insertions(+), 713 deletions(-) diff --git a/src/CalcByTEM.c b/src/CalcByTEM.c index 3f92e382d..ee33ad015 100644 --- a/src/CalcByTEM.c +++ b/src/CalcByTEM.c @@ -185,25 +185,25 @@ int CalcByTEM( if (childfopenMPI(sdt_phys, "a", &fp) != 0) { return -1; } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", Time, X->Bind.Phys.energy, X->Bind.Phys.var, - X->Bind.Phys.doublon, X->Bind.Phys.num, step_i); + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", + Time, X->Bind.Phys.energy[0], X->Bind.Phys.var[0], + X->Bind.Phys.doublon[0], X->Bind.Phys.num[0], step_i); fclose(fp); if (childfopenMPI(sdt_norm, "a", &fp) != 0) { return -1; } - fprintf(fp, "%.16lf %.16lf %d\n", Time, global_norm, step_i); + fprintf(fp, "%.16lf %.16lf %d\n", Time, global_norm[0], step_i); fclose(fp); if (childfopenMPI(sdt_flct, "a", &fp) != 0) { return -1; } fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", - Time, X->Bind.Phys.num, X->Bind.Phys.num2, X->Bind.Phys.doublon, X->Bind.Phys.doublon2, X->Bind.Phys.Sz, X->Bind.Phys.Sz2, step_i); + Time, X->Bind.Phys.num[0], X->Bind.Phys.num2[0], X->Bind.Phys.doublon[0], + X->Bind.Phys.doublon2[0], X->Bind.Phys.Sz[0], X->Bind.Phys.Sz2[0], step_i); fclose(fp); - - if (step_i % step_spin == 0) { expec_cisajs(&(X->Bind), 1, v0, v1); expec_cisajscktaltdc(&(X->Bind), 1, v0, v1); diff --git a/src/CalcByTPQ.c b/src/CalcByTPQ.c index 3f34dd616..ac38a1f48 100644 --- a/src/CalcByTPQ.c +++ b/src/CalcByTPQ.c @@ -195,7 +195,7 @@ int CalcByTPQ( StartTimer(3600); for (rand_i = 0; rand_i < NumAve; rand_i++) { inv_temp[rand_i] = (2.0 / Ns) / (LargeValue - X->Bind.Phys.energy[rand_i] / Ns); - if (childfopenMPI(sdt_phys, "a", &fp) == 0) { + if (childfopenMPI(sdt_phys[rand_i], "a", &fp) == 0) { fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp[rand_i], X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], X->Bind.Phys.doublon[rand_i], X->Bind.Phys.num[rand_i], step_i); @@ -203,14 +203,14 @@ int CalcByTPQ( } else return -1; // for norm - if (childfopenMPI(sdt_norm, "a", &fp) == 0) { + if (childfopenMPI(sdt_norm[rand_i], "a", &fp) == 0) { fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp[rand_i], global_norm[rand_i], global_1st_norm[rand_i], step_i); fclose(fp); } else return -1; // for fluctuations - if (childfopenMPI(sdt_flct, "a", &fp) == 0) { + if (childfopenMPI(sdt_flct[rand_i], "a", &fp) == 0) { fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp[rand_i], X->Bind.Phys.num[rand_i], X->Bind.Phys.num2[rand_i], X->Bind.Phys.doublon[rand_i], X->Bind.Phys.doublon2[rand_i], @@ -246,7 +246,7 @@ int CalcByTPQ( StartTimer(3600); for (rand_i = 0; rand_i < NumAve; rand_i++) { inv_temp[rand_i] = (2.0*step_i / Ns) / (LargeValue - X->Bind.Phys.energy[rand_i] / Ns); - if (childfopenMPI(sdt_phys, "a", &fp) == 0) { + if (childfopenMPI(sdt_phys[rand_i], "a", &fp) == 0) { fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp[rand_i], X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], X->Bind.Phys.doublon[rand_i], X->Bind.Phys.num[rand_i], step_i); @@ -255,7 +255,7 @@ int CalcByTPQ( } else return FALSE; - if (childfopenMPI(sdt_norm, "a", &fp) == 0) { + if (childfopenMPI(sdt_norm[rand_i], "a", &fp) == 0) { fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp[rand_i], global_norm[rand_i], global_1st_norm[rand_i], step_i); fclose(fp); @@ -263,9 +263,9 @@ int CalcByTPQ( else return FALSE; // for fluctuations - if (childfopenMPI(sdt_flct, "a", &fp) == 0) { + if (childfopenMPI(sdt_flct[rand_i], "a", &fp) == 0) { fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", - inv_temp, X->Bind.Phys.num[rand_i], X->Bind.Phys.num2[rand_i], + inv_temp[rand_i], X->Bind.Phys.num[rand_i], X->Bind.Phys.num2[rand_i], X->Bind.Phys.doublon[rand_i], X->Bind.Phys.doublon2[rand_i], X->Bind.Phys.Sz[rand_i], X->Bind.Phys.Sz2[rand_i], step_i); fclose(fp); diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 68e97d336..fa0b867eb 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -69,7 +69,38 @@ int OutputSpectrum( fclose(fp); return TRUE; }/*int OutputSpectrum*/ +/// \brief Parent function to calculate the excited state. +/// \param X [in] Struct to get number of excitation operators. +/// \param tmp_v0 [out] Result @f$ v_0 = H_{ex} v_1 @f$. +/// \param tmp_v1 [in] The original state before excitation @f$ v_1 @f$. +/// \retval FALSE Fail to calculate the excited state. +/// \retval TRUE Success to calculate the excited state. +int GetExcitedState +( + struct BindStruct *X, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1 +) +{ + if (X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0) { + fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); + return FALSE; + } + + if (X->Def.NSingleExcitationOperator > 0) { + if (GetSingleExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + return FALSE; + } + } + else if (X->Def.NPairExcitationOperator > 0) { + if (GetPairExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + return FALSE; + } + } + return TRUE; +} /** * @brief A main function to calculate spectrum. * @@ -96,7 +127,7 @@ int CalcSpectrum( int iFlagListModified = FALSE; FILE *fp; double dnorm; - double complex *v1Org; /**< Input vector to calculate spectrum function.*/ + double complex **v1Org; /**< Input vector to calculate spectrum function.*/ //ToDo: Nomega should be given as a parameter int Nomega; @@ -143,9 +174,9 @@ int CalcSpectrum( X->Bind.Def.iFlagListModified = iFlagListModified; //Set Memory - v1Org = cd_1d_allocate(X->Bind.Check.idim_maxOrg + 1); + v1Org = cd_2d_allocate(X->Bind.Check.idim_maxOrg + 1,1); for (i = 0; i < X->Bind.Check.idim_maxOrg + 1; i++) { - v1Org[i] = 0; + v1Org[i][0] = 0; } //Make excited state @@ -176,7 +207,7 @@ int CalcSpectrum( fprintf(stderr, "Error: A file of Input vector is incorrect.\n"); return -1; } - byte_size = fread(v1Org, sizeof(complex double), i_max + 1, fp); + byte_size = fread(&v1Org[0][0], sizeof(complex double), i_max + 1, fp); fclose(fp); StopTimer(6101); if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); @@ -233,7 +264,7 @@ int CalcSpectrum( StopTimer(6100); //Reset list_1, list_2_1, list_2_2 if (iFlagListModified == TRUE) { - free(v1Org); + free_cd_2d_allocate(v1Org); free(list_1_org); free(list_2_1_org); free(list_2_2_org); @@ -274,39 +305,6 @@ int CalcSpectrum( }/*int CalcSpectrum*/ /// -/// \brief Parent function to calculate the excited state. -/// \param X [in] Struct to get number of excitation operators. -/// \param tmp_v0 [out] Result @f$ v_0 = H_{ex} v_1 @f$. -/// \param tmp_v1 [in] The original state before excitation @f$ v_1 @f$. -/// \retval FALSE Fail to calculate the excited state. -/// \retval TRUE Success to calculate the excited state. -int GetExcitedState -( - struct BindStruct *X, - int nstate, - double complex **tmp_v0, - double complex **tmp_v1 -) -{ - if (X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0) { - fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); - return FALSE; - } - - - if (X->Def.NSingleExcitationOperator > 0) { - if (GetSingleExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { - return FALSE; - } - } - else if (X->Def.NPairExcitationOperator > 0) { - if (GetPairExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { - return FALSE; - } - } - return TRUE; -} -/// /// \brief Set target frequencies /// \param X [in, out] Struct to give and get the information of target frequencies.\n /// Output: dcOmegaMax, dcOmegaMin diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index 1ca230922..d23b4fe48 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -203,8 +203,8 @@ void InitShadowRes( */ int CalcSpectrumByBiCG( struct EDMainCalStruct *X,//!<[inout] - double complex *vrhs,//!<[in] [CheckList::idim_max] Right hand side vector, excited state. - double complex *v2,//!<[inout] [CheckList::idim_max] Work space for residual vector @f${\bf r}@f$ + double complex **vrhs,//!<[in] [CheckList::idim_max] Right hand side vector, excited state. + double complex **v2,//!<[inout] [CheckList::idim_max] Work space for residual vector @f${\bf r}@f$ int Nomega,//!<[in] Number of Frequencies double complex *dcSpectrum,//!<[out] [Nomega] Spectrum double complex *dcomega//!<[in] [Nomega] Frequency @@ -216,7 +216,7 @@ int CalcSpectrumByBiCG( size_t byte_size; int iret, max_step; unsigned long int liLanczosStp_vec = 0; - double complex *v4, *v12, *v14, res_proj; + double complex **v4, **v12, **v14, res_proj; int stp, one = 1, status[3], iomega; double *resz; @@ -226,10 +226,10 @@ int CalcSpectrumByBiCG(
    • Malloc vector for old residual vector (@f${\bf r}_{\rm old}@f$) and old shadow residual vector (@f${\bf {\tilde r}}_{\rm old}@f$).
    • */ - v12 = (double complex*)malloc((X->Bind.Check.idim_max + 1) * sizeof(double complex)); - v14 = (double complex*)malloc((X->Bind.Check.idim_max + 1) * sizeof(double complex)); - v4 = (double complex*)malloc((X->Bind.Check.idim_max + 1) * sizeof(double complex)); - resz = (double*)malloc(Nomega * sizeof(double)); + v12 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); + v14 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); + v4 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); + resz = d_1d_allocate(Nomega); /**
    • Set initial result vector(+shadow result vector) Read residual vectors if restart
    • @@ -245,8 +245,8 @@ int CalcSpectrumByBiCG( fprintf(stdoutMPI, " Start from SCRATCH.\n"); #pragma omp parallel for default(none) shared(v2,v4,vrhs,X) private(idim) for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { - v2[idim] = vrhs[idim]; - v4[idim] = vrhs[idim]; + v2[idim][0] = vrhs[idim][0]; + v4[idim][0] = vrhs[idim][0]; } //InitShadowRes(&(X->Bind), v4); } @@ -258,10 +258,10 @@ int CalcSpectrumByBiCG( printf("%s %ld %ld %ld\n", sdt, i_max, X->Bind.Check.idim_max, liLanczosStp_vec); exitMPI(-1); } - byte_size = fread(v2, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fread(v12, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fread(v4, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fread(v14, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fread(&v2[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fread(&v12[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fread(&v4[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fread(&v14[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); fprintf(stdoutMPI, " End: Input vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputSpectrumRecalcvecEnd, "a"); @@ -271,15 +271,15 @@ int CalcSpectrumByBiCG( else { #pragma omp parallel for default(none) shared(v2,v4,vrhs,X) private(idim) for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { - v2[idim] = vrhs[idim]; - v4[idim] = vrhs[idim]; + v2[idim][0] = vrhs[idim][0]; + v4[idim][0] = vrhs[idim][0]; } //InitShadowRes(&(X->Bind), v4); } /**
    • Input @f$\alpha, \beta@f$, projected residual, or start from scratch
    • */ - ReadTMComponents_BiCG(X, v2, v4, v12, v14, Nomega, dcSpectrum, dcomega); + ReadTMComponents_BiCG(X, &v2[0][0], &v4[0][0], &v12[0][0], &v14[0][0], Nomega, dcSpectrum, dcomega); /**
    • @b DO BiCG loop
      • @@ -296,18 +296,18 @@ int CalcSpectrumByBiCG( */ #pragma omp parallel for default(none) shared(X,v12,v14) private(idim) for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { - v12[idim] = 0.0; - v14[idim] = 0.0; + v12[idim][0] = 0.0; + v14[idim][0] = 0.0; } iret = mltply(&X->Bind, 1, v12, v2); iret = mltply(&X->Bind, 1, v14, v4); - res_proj = VecProdMPI(X->Bind.Check.idim_max, vrhs, v2); + res_proj = VecProdMPI(X->Bind.Check.idim_max, &vrhs[0][0], &v2[0][0]); /**
      • Update projected result vector dcSpectrum.
      • */ - komega_bicg_update(&v12[1], &v2[1], &v14[1], &v4[1], dcSpectrum, &res_proj, status); + komega_bicg_update(&v12[1][0], &v2[1][0], &v14[1][0], &v4[1][0], dcSpectrum, &res_proj, status); /**
      • Output residuals at each frequency for some analysis
      • @@ -324,7 +324,7 @@ int CalcSpectrumByBiCG( fprintf(fp, "\n"); } - fprintf(stdoutMPI, " %9d %9d %8d %25.15e\n", abs(status[0]), status[1], status[2], creal(v12[1])); + fprintf(stdoutMPI, " %9d %9d %8d %25.15e\n", abs(status[0]), status[1], status[2], creal(v12[1][0])); if (status[0] < 0) break; }/*for (stp = 0; stp <= X->Bind.Def.Lanczos_max; stp++)*/ fclose(fp); @@ -348,7 +348,7 @@ int CalcSpectrumByBiCG( fprintf(stdoutMPI, " Start: Output vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_OutputSpectrumRecalcvecStart, "a"); - komega_bicg_getvec(&v12[1], &v14[1]); + komega_bicg_getvec(&v12[1][0], &v14[1][0]); sprintf(sdt, cFileNameOutputRestartVec, X->Bind.Def.CDataFileHead, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) { @@ -356,10 +356,10 @@ int CalcSpectrumByBiCG( } byte_size = fwrite(&status[0], sizeof(status[0]), 1, fp); byte_size = fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp); - byte_size = fwrite(v2, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fwrite(v12, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fwrite(v4, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fwrite(v14, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fwrite(&v2[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fwrite(&v12[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fwrite(&v4[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fwrite(&v14[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); fprintf(stdoutMPI, " End: Output vectors for recalculation.\n"); @@ -368,9 +368,9 @@ int CalcSpectrumByBiCG( komega_bicg_finalize(); - free(resz); - free(v12); - free(v14); - free(v4); + free_d_1d_allocate(resz); + free_cd_2d_allocate(v12); + free_cd_2d_allocate(v14); + free_cd_2d_allocate(v4); return TRUE; }/*int CalcSpectrumByBiCG*/ diff --git a/src/HPhiTrans.c b/src/HPhiTrans.c index d24f3f506..76c54121a 100644 --- a/src/HPhiTrans.c +++ b/src/HPhiTrans.c @@ -90,16 +90,9 @@ int HPhiTrans(struct BindStruct *X) { * @author Kota Ido (The University of Tokyo) */ int TransferWithPeierls(struct BindStruct *X, const double time) { - FILE *fp_err; - char sdt_err[D_FileNameMax]; - - int i, k; - int cnt_trans; - + int i; int ri_x, rj_x; int ri_y, rj_y; - int isite1, isite2; - int isigma1, isigma2; double complex dir; const int Mode = (int) (X->Def.ParaLaser[0]); const double Avp = X->Def.ParaLaser[1]; @@ -179,20 +172,12 @@ int TransferWithPeierls(struct BindStruct *X, const double time) { * @author Kota Ido (The University of Tokyo) */ int TransferForQuench(struct BindStruct *X, const double time) { - FILE *fp_err; - char sdt_err[D_FileNameMax]; - int i, k; - int cnt_trans; - int ri_x, rj_x; int ri_y, rj_y; - int isite1, isite2; - int isigma1, isigma2; double complex dir; const int Mode = (int) (X->Def.ParaLaser[0]); const double Avp = X->Def.ParaLaser[1]; - const double omega = X->Def.ParaLaser[2]; const double time_d = X->Def.ParaLaser[3]; const double time_c = X->Def.ParaLaser[4]; const int Lx = (int) (X->Def.ParaLaser[5]); diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index 96327986d..61b024048 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -161,7 +161,7 @@ int GetPairExcitedStateHubbard( //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); + tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); #endif // MPI for (i = 0; i < X->Def.NPairExcitationOperator; i++) { @@ -206,7 +206,7 @@ private(j, tmp_sgn, tmp_off) for (j = 1; j <= i_max; j++) { tmp_sgn = X_CisAjt(list_1_org[j], X, ibitsite1, ibitsite2, Asum, Adiff, &tmp_off); dmv = tmp_trans * tmp_sgn; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } @@ -222,7 +222,7 @@ private(j, tmp_sgn, tmp_off) #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) for (j = 1; j <= i_max; j++) { - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -260,7 +260,7 @@ firstprivate(i_max, is, tmp_trans) private(num1, ibit) ibit = list_1[j] & is; num1 = (1 - ibit / is); dmv = -tmp_trans * num1; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { @@ -270,7 +270,7 @@ firstprivate(i_max, is, tmp_trans) private(num1, ibit) ibit = list_1[j] & is; num1 = ibit / is; dmv = tmp_trans * num1; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -281,7 +281,7 @@ firstprivate(i_max, is, tmp_trans) private(num1, ibit) } } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_2d_allocate(tmp_v1bufOrg); #endif // MPI return TRUE; } diff --git a/src/PairExSpin.c b/src/PairExSpin.c index fc084f63e..6caa634d2 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -102,7 +102,7 @@ int GetPairExcitedStateHalfSpinGC( #pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { dmv = (1.0 - X_SpinGC_CisAis(j, X, isite1, org_sigma1))* (-tmp_trans); - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { @@ -110,7 +110,7 @@ int GetPairExcitedStateHalfSpinGC( #pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { dmv = X_SpinGC_CisAis(j, X, isite1, org_sigma1)* tmp_trans; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -122,7 +122,7 @@ int GetPairExcitedStateHalfSpinGC( tmp_sgn = X_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); if (tmp_sgn != 0) { dmv = (double complex)tmp_sgn * tmp_trans; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off + 1], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off + 1], &one); } } } @@ -189,7 +189,7 @@ int GetPairExcitedStateGeneralSpinGC( for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = -tmp_trans * (1.0 - num1); - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { @@ -198,7 +198,7 @@ int GetPairExcitedStateGeneralSpinGC( for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = tmp_trans * num1; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -209,7 +209,7 @@ int GetPairExcitedStateGeneralSpinGC( num1 = GetOffCompGeneralSpin(j - 1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { dmv = tmp_trans * num1; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off + 1], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off + 1], &one); } } } @@ -277,7 +277,7 @@ int GetPairExcitedStateHalfSpin( //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); + tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); #endif // MPI for (i = 0; i < X->Def.NPairExcitationOperator; i++) { @@ -297,7 +297,7 @@ int GetPairExcitedStateHalfSpin( #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) for (j = 1; j <= i_max; j++) { - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -306,7 +306,7 @@ int GetPairExcitedStateHalfSpin( #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, tmp_trans) private(j) for (j = 1; j <= i_max; j++) - zaxpy_(nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); } } }// org_isite1 > X->Def.Nsite @@ -317,14 +317,14 @@ int GetPairExcitedStateHalfSpin( #pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { dmv = (1.0 - X_Spin_CisAis(j, X, isite1, org_sigma1)) * (-tmp_trans); - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { #pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { dmv = X_Spin_CisAis(j, X, isite1, org_sigma1) * tmp_trans; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -350,14 +350,14 @@ shared(tmp_v0, tmp_v1) num1 = X_Spin_CisAit(j, X, isite1, org_sigma2, list_1_org, list_2_1, list_2_2, &tmp_off); if (num1 != 0) { dmv = tmp_trans*(double)num1; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } } } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_2d_allocate(tmp_v1bufOrg); #endif return TRUE; } @@ -389,7 +389,7 @@ int GetPairExcitedStateGeneralSpin( //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); + tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); #endif // MPI for (i = 0; i < X->Def.NPairExcitationOperator; i++) { @@ -409,7 +409,7 @@ int GetPairExcitedStateGeneralSpin( #pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { dmv = -tmp_trans; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -417,7 +417,7 @@ int GetPairExcitedStateGeneralSpin( if (num1 != 0) { #pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - zaxpy_(nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); } } } @@ -436,7 +436,7 @@ int GetPairExcitedStateGeneralSpin( for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = -tmp_trans * (1.0 - num1); - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { @@ -444,7 +444,7 @@ int GetPairExcitedStateGeneralSpin( for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = tmp_trans * num1; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } }//org_sigma1=org_sigma2 @@ -457,7 +457,7 @@ int GetPairExcitedStateGeneralSpin( X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn != FALSE) { ConvertToList1GeneralSpin(off, X->Large.ihfbit, &tmp_off); - zaxpy_(nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[tmp_off], &one); + zaxpy_(&nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } @@ -469,7 +469,7 @@ int GetPairExcitedStateGeneralSpin( }//org_isite1 != org_isite2 } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_2d_allocate(tmp_v1bufOrg); #endif // MPI return TRUE; diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index 17f832e44..3aac0fa73 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -53,7 +53,7 @@ int GetSingleExcitedStateHubbard( //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); + tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1,nstate); #endif // MPI idim_max = X->Check.idim_maxOrg; @@ -77,7 +77,7 @@ private(j, isgn,tmp_off,dmv) isgn = X_Cis(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); dmv = isgn * tmpphi; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } @@ -95,13 +95,13 @@ private(j, isgn, tmp_off) isgn = X_Ajt(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); dmv = isgn * tmpphi; - zaxpy_(nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); + zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); } } } } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_2d_allocate(tmp_v1bufOrg); #endif return TRUE; }/*int GetSingleExcitedStateHubbard*/ @@ -134,7 +134,7 @@ int GetSingleExcitedStateHubbardGC( //set size #ifdef MPI idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_1d_allocate(idim_maxMPI + 1); + tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); #endif // MPI // SingleEx @@ -173,7 +173,7 @@ int GetSingleExcitedStateHubbardGC( } } #ifdef MPI - free_cd_1d_allocate(tmp_v1bufOrg); + free_cd_2d_allocate(tmp_v1bufOrg); #endif return TRUE; }/*int GetSingleExcitedStateHubbardGC*/ diff --git a/src/bitcalc.c b/src/bitcalc.c index 82ab6d050..3183aea05 100644 --- a/src/bitcalc.c +++ b/src/bitcalc.c @@ -289,7 +289,6 @@ int ConvertToList1GeneralSpin( ) { long unsigned int ia, ib; - long unsigned int tmp_list; ia=org_ibit%ihlfbit; ib=org_ibit/ihlfbit; if(list_2_1[ia]*list_2_2[ib]==0){ diff --git a/src/check.c b/src/check.c index 39a572d56..f4330e0ed 100644 --- a/src/check.c +++ b/src/check.c @@ -57,7 +57,6 @@ int check(struct BindStruct *X){ long unsigned int tmp; long unsigned int Ns,comb_1,comb_2,comb_3,comb_sum, comb_up, comb_down; int u_loc; - int mfint[7]; long int **comb; long unsigned int idimmax=0; long unsigned int idim=0; diff --git a/src/common/setmemory.c b/src/common/setmemory.c index a8f642b06..b0367038e 100644 --- a/src/common/setmemory.c +++ b/src/common/setmemory.c @@ -307,7 +307,7 @@ double complex****cd_4d_allocate(const long unsigned int N, const long unsigned A = (double complex****)calloc((N), sizeof(double complex**)); A[0] = (double complex***)calloc((M*N), sizeof(double complex*)); A[0][0] = (double complex**)calloc((L*M*N), sizeof(double complex)); - A[0][0][0] = (double complex**)calloc((K*L*M*N), sizeof(double complex)); + A[0][0][0] = (double complex*)calloc((K*L*M*N), sizeof(double complex)); for (int_i = 0; int_i < N; int_i++) { A[int_i] = A[0] + int_i * M; for (int_j = 0; int_j < M; int_j++) { diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index 2ea58db7d..2ff5f7877 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -37,133 +37,6 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) * */ -/** - * @brief function of calculation for one body green's function - * - * @param X [in] list for getting information to calculate one body green's function. - * @param vec [in] eigenvectors. - * - * @version 0.2 - * @details add calculation one body green's functions for general spin - * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * @retval 0 normally finished. - * @retval -1 abnormally finished. - */ -int expec_cisajs( - struct BindStruct *X, - int nstate, - double complex **Xvec, - double complex **vec -){ - FILE *fp; - char sdt[D_FileNameMax]; - double complex **prod; - long unsigned int irght, ilft, ihfbit, ica; - long int i_max; - //For TPQ - int step = 0, rand_i = 0, istate; - - if(X->Def.NCisAjt <1) return 0; - - i_max = X->Check.idim_max; - if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ - return -1; - } - X->Large.i_max = i_max; - X->Large.irght = irght; - X->Large.ilft = ilft; - X->Large.ihfbit = ihfbit; - X->Large.mode = M_CORR; - - switch(X->Def.iCalcType){ - case TPQCalc: - step=X->Def.istep; - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGStart, "a", 0, step); - break; - case TimeEvolution: - step = X->Def.istep; - TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecOneBodyGStart, "a", step); - break; - case FullDiag: - case CG: - break; - } - - prod = cd_2d_allocate(X->Def.NCisAjt, nstate); - switch(X->Def.iCalcModel){ - case HubbardGC: - if(expec_cisajs_HubbardGC(X, nstate, Xvec, vec, prod)!=0){ - return -1; - } - break; - - case KondoGC: - case Hubbard: - case Kondo: - if (expec_cisajs_Hubbard(X, nstate, Xvec, vec, prod) != 0) { - return -1; - } - break; - - case Spin: // for the Sz-conserved spin system - if (expec_cisajs_Spin(X, nstate, Xvec, vec, prod) != 0) { - return -1; - } - break; - - case SpinGC: - if (expec_cisajs_SpinGC(X, nstate, Xvec, vec, prod) != 0) { - return -1; - } - break; - - default: - return -1; - } - - for (istate = 0; istate < nstate; istate++) { - switch (X->Def.iCalcModel) { - case TPQCalc: - step = X->Def.istep; - sprintf(sdt, cFileName1BGreen_TPQ, X->Def.CDataFileHead, istate, step); - break; - case TimeEvolution: - step = X->Def.istep; - sprintf(sdt, cFileName1BGreen_TE, X->Def.CDataFileHead, step); - break; - case FullDiag: - case CG: - sprintf(sdt, cFileName1BGreen_FullDiag, X->Def.CDataFileHead, istate); - break; - } - if (childfopenMPI(sdt, "w", &fp) == 0) { - for (ica = 0; ica < X->Def.NCisAjt; ica++) { - fprintf(fp, " %4ld %4ld %4ld %4ld %.10lf %.10lf\n", - X->Def.CisAjt[ica][0], X->Def.CisAjt[ica][1], X->Def.CisAjt[ica][2], X->Def.CisAjt[ica][3], - creal(prod[ica][istate]), cimag(prod[ica][istate])); - } - fclose(fp); - } - else return -1; - }/*for (istate = 0; istate < nstate; istate++)*/ - - if(X->Def.St==0){ - if(X->Def.iCalcType==TPQCalc){ - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGFinish, "a", rand_i, step); - } - else if(X->Def.iCalcType==TimeEvolution){ - TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecOneBodyGFinish, "a", step); - } - }else if(X->Def.St==1){ - TimeKeeper(X, cFileNameTimeKeep, cCGExpecOneBodyGFinish, "a"); - fprintf(stdoutMPI, "%s", cLogCGExpecOneBodyGEnd); - } - free_cd_2d_allocate(prod); - return 0; -} /** * @brief function of calculation for one body green's function for Hubbard GC model. * @@ -338,31 +211,6 @@ firstprivate(i_max, is) private(num1, ibit) } return 0; } -/** - * @brief function of calculation for one body green's function for Spin model. - * - * @param X [in] list for getting information to calculate one body green's function. - * @param vec [in] eigenvector - * @param _fp [in] pointer to output file - * @retval 0 normally finished. - * @retval -1 abnormally finished. - */ -int expec_cisajs_Spin( - struct BindStruct *X, - int nstate, - double complex **Xvec, - double complex **vec, - double complex **prod -) { - int info = 0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajs_SpinHalf(X, nstate, Xvec, vec, prod); - } - else { - info = expec_cisajs_SpinGeneral(X, nstate, Xvec, vec, prod); - } - return info; -} /** * @brief function of calculation for one body green's function for Half-Spin model. * @@ -498,31 +346,6 @@ firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec, list_1) } return 0; } -/** - * @brief function of calculation for one body green's function for SpinGC model. - * - * @param X [in] list for getting information to calculate one body green's function. - * @param vec [in] eigenvector - * @param _fp [in] pointer to output file - * @retval 0 normally finished. - * @retval -1 abnormally finished. - */ -int expec_cisajs_SpinGC( - struct BindStruct *X, - int nstate, - double complex **Xvec, - double complex **vec, - double complex **prod -) { - int info = 0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajs_SpinGCHalf(X, nstate, Xvec, vec, prod); - } - else { - info = expec_cisajs_SpinGCGeneral(X, nstate, Xvec, vec, prod); - } - return info; -} /** * @brief function of calculation for one body green's function for Half-SpinGC model. * @@ -674,3 +497,182 @@ firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off) shared(vec) } return 0; } +/** + * @brief function of calculation for one body green's function for Spin model. + * + * @param X [in] list for getting information to calculate one body green's function. + * @param vec [in] eigenvector + * @param _fp [in] pointer to output file + * @retval 0 normally finished. + * @retval -1 abnormally finished. + */ +int expec_cisajs_Spin( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +) { + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajs_SpinHalf(X, nstate, Xvec, vec, prod); + } + else { + info = expec_cisajs_SpinGeneral(X, nstate, Xvec, vec, prod); + } + return info; +} + +/** + * @brief function of calculation for one body green's function for SpinGC model. + * + * @param X [in] list for getting information to calculate one body green's function. + * @param vec [in] eigenvector + * @param _fp [in] pointer to output file + * @retval 0 normally finished. + * @retval -1 abnormally finished. + */ +int expec_cisajs_SpinGC( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +) { + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajs_SpinGCHalf(X, nstate, Xvec, vec, prod); + } + else { + info = expec_cisajs_SpinGCGeneral(X, nstate, Xvec, vec, prod); + } + return info; +} +/** + * @brief function of calculation for one body green's function + * + * @param X [in] list for getting information to calculate one body green's function. + * @param vec [in] eigenvectors. + * + * @version 0.2 + * @details add calculation one body green's functions for general spin + * + * @version 0.1 + * @author Takahiro Misawa (The University of Tokyo) + * @author Kazuyoshi Yoshimi (The University of Tokyo) + * @retval 0 normally finished. + * @retval -1 abnormally finished. + */ +int expec_cisajs( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec +) { + FILE *fp; + char sdt[D_FileNameMax]; + double complex **prod; + long unsigned int irght, ilft, ihfbit, ica; + long int i_max; + //For TPQ + int step = 0, rand_i = 0, istate; + + if (X->Def.NCisAjt < 1) return 0; + + i_max = X->Check.idim_max; + if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { + return -1; + } + X->Large.i_max = i_max; + X->Large.irght = irght; + X->Large.ilft = ilft; + X->Large.ihfbit = ihfbit; + X->Large.mode = M_CORR; + + switch (X->Def.iCalcType) { + case TPQCalc: + step = X->Def.istep; + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGStart, "a", 0, step); + break; + case TimeEvolution: + step = X->Def.istep; + TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecOneBodyGStart, "a", step); + break; + case FullDiag: + case CG: + break; + } + + prod = cd_2d_allocate(X->Def.NCisAjt, nstate); + switch (X->Def.iCalcModel) { + case HubbardGC: + if (expec_cisajs_HubbardGC(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + case KondoGC: + case Hubbard: + case Kondo: + if (expec_cisajs_Hubbard(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + case Spin: // for the Sz-conserved spin system + if (expec_cisajs_Spin(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + case SpinGC: + if (expec_cisajs_SpinGC(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + default: + return -1; + } + + for (istate = 0; istate < nstate; istate++) { + switch (X->Def.iCalcModel) { + case TPQCalc: + step = X->Def.istep; + sprintf(sdt, cFileName1BGreen_TPQ, X->Def.CDataFileHead, istate, step); + break; + case TimeEvolution: + step = X->Def.istep; + sprintf(sdt, cFileName1BGreen_TE, X->Def.CDataFileHead, step); + break; + case FullDiag: + case CG: + sprintf(sdt, cFileName1BGreen_FullDiag, X->Def.CDataFileHead, istate); + break; + } + if (childfopenMPI(sdt, "w", &fp) == 0) { + for (ica = 0; ica < X->Def.NCisAjt; ica++) { + fprintf(fp, " %4d %4d %4d %4d %.10lf %.10lf\n", + X->Def.CisAjt[ica][0], X->Def.CisAjt[ica][1], X->Def.CisAjt[ica][2], X->Def.CisAjt[ica][3], + creal(prod[ica][istate]), cimag(prod[ica][istate])); + } + fclose(fp); + } + else return -1; + }/*for (istate = 0; istate < nstate; istate++)*/ + + if (X->Def.St == 0) { + if (X->Def.iCalcType == TPQCalc) { + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGFinish, "a", rand_i, step); + } + else if (X->Def.iCalcType == TimeEvolution) { + TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecOneBodyGFinish, "a", step); + } + } + else if (X->Def.St == 1) { + TimeKeeper(X, cFileNameTimeKeep, cCGExpecOneBodyGFinish, "a"); + fprintf(stdoutMPI, "%s", cLogCGExpecOneBodyGEnd); + } + free_cd_2d_allocate(prod); + return 0; +} diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index 54ad77301..8b41edefc 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -39,143 +39,6 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) * */ -/** - * @brief Parent function to calculate two-body green's functions - * - * @param X [in] data list for calculation - * @param vec [in] eigenvectors - * - * @retval 0 normally finished - * @retval -1 abnormally finished - * @note The origin of function's name cisajscktalt comes from c=creation, i=ith site, s=spin, a=annihiration, j=jth site and so on. - * - * @version 0.2 - * @details add function to treat the case of general spin - * - * @version 0.1 - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - */ -int expec_cisajscktaltdc -( - struct BindStruct *X, - int nstate, - double complex **Xvec, - double complex **vec - ){ - FILE *fp; - char sdt[D_FileNameMax]; - long unsigned int irght, ilft, ihfbit, icaca; - double complex **prod; - //For TPQ - int step = 0, rand_i = 0, istate; - - if(X->Def.NCisAjtCkuAlvDC <1) return 0; - X->Large.mode=M_CORR; - - if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ - return -1; - } - - //Make File Name for output - prod = cd_2d_allocate(X->Def.NCisAjt, nstate); - switch (X->Def.iCalcType){ - case TPQCalc: - step=X->Def.istep; - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGStart, "a", 0, step); - break; - case TimeEvolution: - step=X->Def.istep; - TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecTwoBodyGStart, "a", step); - break; - case FullDiag: - case CG: - break; - } - - switch(X->Def.iCalcModel){ - case HubbardGC: - if (expec_cisajscktalt_HubbardGC(X, nstate, Xvec, vec, prod) != 0) { - return -1; - } - break; - - case KondoGC: - case Hubbard: - case Kondo: - if (expec_cisajscktalt_Hubbard(X, nstate, Xvec, vec, prod) != 0) { - return -1; - } - break; - - case Spin: - if (expec_cisajscktalt_Spin(X, nstate, Xvec, vec, prod) != 0) { - return -1; - } - break; - - case SpinGC: - if (expec_cisajscktalt_SpinGC(X, nstate, Xvec, vec, prod) != 0) { - return -1; - } - break; - - default: - return -1; - } - - for (istate = 0; istate < nstate; istate++) { - switch (X->Def.iCalcModel) { - case TPQCalc: - step = X->Def.istep; - sprintf(sdt, cFileName2BGreen_TPQ, X->Def.CDataFileHead, istate, step); - break; - case TimeEvolution: - step = X->Def.istep; - sprintf(sdt, cFileName2BGreen_TE, X->Def.CDataFileHead, step); - break; - case FullDiag: - case CG: - sprintf(sdt, cFileName2BGreen_FullDiag, X->Def.CDataFileHead, istate); - break; - } - if (childfopenMPI(sdt, "w", &fp) == 0) { - for (icaca = 0; icaca < X->Def.NCisAjt; icaca++) { - fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf\n", - X->Def.CisAjtCkuAlvDC[icaca][0], X->Def.CisAjtCkuAlvDC[icaca][1], - X->Def.CisAjtCkuAlvDC[icaca][2], X->Def.CisAjtCkuAlvDC[icaca][3], - X->Def.CisAjtCkuAlvDC[icaca][4], X->Def.CisAjtCkuAlvDC[icaca][5], - X->Def.CisAjtCkuAlvDC[icaca][6], X->Def.CisAjtCkuAlvDC[icaca][7], - creal(prod[icaca][istate]), cimag(prod[icaca][istate])); - } - fclose(fp); - } - else return -1; - }/*for (istate = 0; istate < nstate; istate++)*/ - - if(X->Def.iCalcType==TPQCalc){ - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGFinish, "a", rand_i, step); - } - else if(X->Def.iCalcType==TimeEvolution){ - TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecTwoBodyGFinish, "a", step); - } - //[s] this part will be added - /* For FullDiag, it is convinient to calculate the total spin for each vector. - Such functions will be added - if(X->Def.iCalcType==FullDiag){ - if(X->Def.iCalcModel==Spin){ - expec_cisajscktaltdc_alldiag_spin(X,vec); - }else if(X->Def.iCalcModel==Hubbard || X->Def.iCalcModel==Kondo){ - expec_cisajscktaltdc_alldiag(X,vec); - }else{// - X->Phys.s2=0.0; - } - } - */ - //[e] - free_cd_2d_allocate(prod); - return 0; -} /// /// \brief Rearray interactions /// \param i @@ -513,32 +376,6 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t } return 0; } -/** - * @brief Parent function to calculate two-body green's functions for Spin model - * - * @param X [in] data list for calculation - * @param vec [in] eigenvectors - * @param _fp [in] output file name - * @retval 0 normally finished - * @retval -1 abnormally finished - * - */ -int expec_cisajscktalt_Spin( - struct BindStruct *X, - int nstate, - double complex **Xvec, - double complex **vec, - double complex **prod -) { - int info = 0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajscktalt_SpinHalf(X, nstate, Xvec, vec, prod); - } - else { - info = expec_cisajscktalt_SpinGeneral(X, nstate, Xvec, vec, prod); - } - return info; -} /** * @brief Child function to calculate two-body green's functions for 1/2 Spin model * @@ -793,32 +630,6 @@ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3 } return 0; } -/** - * @brief Parent function to calculate two-body green's functions for Spin GC model - * - * @param X [in] data list for calculation - * @param vec [in] eigenvectors - * @param _fp [in] output file name - * @retval 0 normally finished - * @retval -1 abnormally finished - * - */ -int expec_cisajscktalt_SpinGC( - struct BindStruct *X, - int nstate, - double complex **Xvec, - double complex **vec, - double complex **prod -){ - int info = 0; - if (X->Def.iFlgGeneralSpin == FALSE) { - info = expec_cisajscktalt_SpinGCHalf(X, nstate, Xvec, vec, prod); - } - else { - info = expec_cisajscktalt_SpinGCGeneral(X, nstate, Xvec, vec, prod); - } - return info; -} /** * @brief Child function to calculate two-body green's functions for 1/2 Spin GC model * @@ -1106,3 +917,192 @@ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3 } return 0; } +/** + * @brief Parent function to calculate two-body green's functions for Spin model + * + * @param X [in] data list for calculation + * @param vec [in] eigenvectors + * @param _fp [in] output file name + * @retval 0 normally finished + * @retval -1 abnormally finished + * + */ +int expec_cisajscktalt_Spin( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +) { + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajscktalt_SpinHalf(X, nstate, Xvec, vec, prod); + } + else { + info = expec_cisajscktalt_SpinGeneral(X, nstate, Xvec, vec, prod); + } + return info; +} +/** + * @brief Parent function to calculate two-body green's functions for Spin GC model + * + * @param X [in] data list for calculation + * @param vec [in] eigenvectors + * @param _fp [in] output file name + * @retval 0 normally finished + * @retval -1 abnormally finished + * + */ +int expec_cisajscktalt_SpinGC( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +) { + int info = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + info = expec_cisajscktalt_SpinGCHalf(X, nstate, Xvec, vec, prod); + } + else { + info = expec_cisajscktalt_SpinGCGeneral(X, nstate, Xvec, vec, prod); + } + return info; +} +/** + * @brief Parent function to calculate two-body green's functions + * + * @param X [in] data list for calculation + * @param vec [in] eigenvectors + * + * @retval 0 normally finished + * @retval -1 abnormally finished + * @note The origin of function's name cisajscktalt comes from c=creation, i=ith site, s=spin, a=annihiration, j=jth site and so on. + * + * @version 0.2 + * @details add function to treat the case of general spin + * + * @version 0.1 + * @author Takahiro Misawa (The University of Tokyo) + * @author Kazuyoshi Yoshimi (The University of Tokyo) + */ +int expec_cisajscktaltdc +( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec +) { + FILE *fp; + char sdt[D_FileNameMax]; + long unsigned int irght, ilft, ihfbit, icaca; + double complex **prod; + //For TPQ + int step = 0, rand_i = 0, istate; + + if (X->Def.NCisAjtCkuAlvDC < 1) return 0; + X->Large.mode = M_CORR; + + if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { + return -1; + } + + //Make File Name for output + prod = cd_2d_allocate(X->Def.NCisAjt, nstate); + switch (X->Def.iCalcType) { + case TPQCalc: + step = X->Def.istep; + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGStart, "a", 0, step); + break; + case TimeEvolution: + step = X->Def.istep; + TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecTwoBodyGStart, "a", step); + break; + case FullDiag: + case CG: + break; + } + + switch (X->Def.iCalcModel) { + case HubbardGC: + if (expec_cisajscktalt_HubbardGC(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + case KondoGC: + case Hubbard: + case Kondo: + if (expec_cisajscktalt_Hubbard(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + case Spin: + if (expec_cisajscktalt_Spin(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + case SpinGC: + if (expec_cisajscktalt_SpinGC(X, nstate, Xvec, vec, prod) != 0) { + return -1; + } + break; + + default: + return -1; + } + + for (istate = 0; istate < nstate; istate++) { + switch (X->Def.iCalcModel) { + case TPQCalc: + step = X->Def.istep; + sprintf(sdt, cFileName2BGreen_TPQ, X->Def.CDataFileHead, istate, step); + break; + case TimeEvolution: + step = X->Def.istep; + sprintf(sdt, cFileName2BGreen_TE, X->Def.CDataFileHead, step); + break; + case FullDiag: + case CG: + sprintf(sdt, cFileName2BGreen_FullDiag, X->Def.CDataFileHead, istate); + break; + } + if (childfopenMPI(sdt, "w", &fp) == 0) { + for (icaca = 0; icaca < X->Def.NCisAjt; icaca++) { + fprintf(fp, " %4d %4d %4d %4d %4d %4d %4d %4d %.10lf %.10lf\n", + X->Def.CisAjtCkuAlvDC[icaca][0], X->Def.CisAjtCkuAlvDC[icaca][1], + X->Def.CisAjtCkuAlvDC[icaca][2], X->Def.CisAjtCkuAlvDC[icaca][3], + X->Def.CisAjtCkuAlvDC[icaca][4], X->Def.CisAjtCkuAlvDC[icaca][5], + X->Def.CisAjtCkuAlvDC[icaca][6], X->Def.CisAjtCkuAlvDC[icaca][7], + creal(prod[icaca][istate]), cimag(prod[icaca][istate])); + } + fclose(fp); + } + else return -1; + }/*for (istate = 0; istate < nstate; istate++)*/ + + if (X->Def.iCalcType == TPQCalc) { + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGFinish, "a", rand_i, step); + } + else if (X->Def.iCalcType == TimeEvolution) { + TimeKeeperWithStep(X, cFileNameTimeKeep, cTEExpecTwoBodyGFinish, "a", step); + } + //[s] this part will be added + /* For FullDiag, it is convinient to calculate the total spin for each vector. + Such functions will be added + if(X->Def.iCalcType==FullDiag){ + if(X->Def.iCalcModel==Spin){ + expec_cisajscktaltdc_alldiag_spin(X,vec); + }else if(X->Def.iCalcModel==Hubbard || X->Def.iCalcModel==Kondo){ + expec_cisajscktaltdc_alldiag(X,vec); + }else{// + X->Phys.s2=0.0; + } + } + */ + //[e] + free_cd_2d_allocate(prod); + return 0; +} diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index dd5eecab5..53b4af551 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -21,156 +21,6 @@ #include "wrapperMPI.h" #include "CalcTime.h" #include "common/setmemory.h" - -/** - * @brief Parent function to calculate expected values of energy and physical quantities. - * - * @param X [in,out] X Struct to get information about file header names, dimension of hirbert space, calc type, physical quantities. - * - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * \retval 0 normally finished. - * \retval -1 abnormally finished. - */ -int expec_energy_flct( - struct BindStruct *X, - int nstate, - double complex **tmp_v0, - double complex **tmp_v1 -) { - - long unsigned int i, j; - long unsigned int irght, ilft, ihfbit; - double complex dam_pr, dam_pr1; - long unsigned int i_max; - int istate; - - switch (X->Def.iCalcType) { - case TPQCalc: - case TimeEvolution: -#ifdef _DEBUG - fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); - TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecStart, "a", step_i); -#endif - break; - case FullDiag: - case CG: - break; - default: - return -1; - } - - i_max = X->Check.idim_max; - if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { - return -1; - } - - X->Large.i_max = i_max; - X->Large.irght = irght; - X->Large.ilft = ilft; - X->Large.ihfbit = ihfbit; - X->Large.mode = M_ENERGY; - for (istate = 0; istate < nstate; istate++) X->Phys.energy[istate] = 0.0; - - int nCalcFlct; - if (X->Def.iCalcType == TPQCalc) { - nCalcFlct = 3201; - } - else {//For FullDiag - nCalcFlct = 5301; - } - StartTimer(nCalcFlct); - - switch (X->Def.iCalcModel) { - case HubbardGC: - expec_energy_flct_HubbardGC(X, nstate, tmp_v0); - break; - case KondoGC: - case Hubbard: - case Kondo: - expec_energy_flct_Hubbard(X, nstate, tmp_v0); - break; - - case SpinGC: - if (X->Def.iFlgGeneralSpin == FALSE) { - expec_energy_flct_HalfSpinGC(X, nstate, tmp_v0); - } - else {//for generalspin - expec_energy_flct_GeneralSpinGC(X, nstate, tmp_v0); - } - break;/*case SpinGC*/ - /* SpinGCBoost */ - case Spin: - /* - if(X->Def.iFlgGeneralSpin == FALSE){ - expec_energy_flct_HalfSpin(X); - } - else{ - expec_energy_flct_GeneralSpin(X); - } - */ - for (istate = 0; istate < nstate; istate++) { - X->Phys.doublon[istate] = 0.0; - X->Phys.doublon2[istate] = 0.0; - X->Phys.num[istate] = X->Def.NsiteMPI; - X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; - X->Phys.Sz[istate] = 0.5 * (double)X->Def.Total2SzMPI; - X->Phys.Sz2[istate] = X->Phys.Sz[istate] * X->Phys.Sz[istate]; - } - break; - default: - return -1; - } - - StopTimer(nCalcFlct); - -#pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) - for (i = 1; i <= i_max; i++) { - for (istate = 0; istate < nstate; istate++) { - tmp_v1[i][istate] = tmp_v0[i][istate]; - tmp_v0[i][istate] = 0.0; - } - } - - int nCalcExpec; - if (X->Def.iCalcType == TPQCalc) { - nCalcExpec = 3202; - } - else {//For FullDiag - nCalcExpec = 5302; - } - StartTimer(nCalcExpec); - mltply(X, nstate, tmp_v0, tmp_v1); // v0+=H*v1 - StopTimer(nCalcExpec); - /* switch -> SpinGCBoost */ - - for (istate = 0; istate < nstate; istate++) { - X->Phys.energy[istate] = 0.0; - X->Phys.var[istate] = 0.0; - } -#pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) - for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < nstate; istate++) { - X->Phys.energy[istate] += conj(tmp_v1[j][istate])*tmp_v0[j][istate]; // E = = - X->Phys.var[istate] += conj(tmp_v0[j][istate])*tmp_v0[j][istate]; // E^2 = = - } - } - SumMPI_cv(nstate, X->Phys.energy); - SumMPI_cv(nstate, X->Phys.var); - - switch (X->Def.iCalcType) { - case TPQCalc: - case TimeEvolution: -#ifdef _DEBUG - fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); - TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); -#endif - break; - default: - break; - } - return 0; -} /// /// \brief Calculate expected values of energies and physical quantities for Hubbard GC model /// \param X [in, out] X Struct to get information about file header names, dimension of hirbert space, calc type and output physical quantities. @@ -712,3 +562,152 @@ int expec_energy_flct_GeneralSpin( free_d_1d_allocate(tmp_v02); return 0; } +/** + * @brief Parent function to calculate expected values of energy and physical quantities. + * + * @param X [in,out] X Struct to get information about file header names, dimension of hirbert space, calc type, physical quantities. + * + * @author Takahiro Misawa (The University of Tokyo) + * @author Kazuyoshi Yoshimi (The University of Tokyo) + * \retval 0 normally finished. + * \retval -1 abnormally finished. + */ +int expec_energy_flct( + struct BindStruct *X, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1 +) { + + long unsigned int i, j; + long unsigned int irght, ilft, ihfbit; + double complex dam_pr, dam_pr1; + long unsigned int i_max; + int istate; + + switch (X->Def.iCalcType) { + case TPQCalc: + case TimeEvolution: +#ifdef _DEBUG + fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); + TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecStart, "a", step_i); +#endif + break; + case FullDiag: + case CG: + break; + default: + return -1; + } + + i_max = X->Check.idim_max; + if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { + return -1; + } + + X->Large.i_max = i_max; + X->Large.irght = irght; + X->Large.ilft = ilft; + X->Large.ihfbit = ihfbit; + X->Large.mode = M_ENERGY; + for (istate = 0; istate < nstate; istate++) X->Phys.energy[istate] = 0.0; + + int nCalcFlct; + if (X->Def.iCalcType == TPQCalc) { + nCalcFlct = 3201; + } + else {//For FullDiag + nCalcFlct = 5301; + } + StartTimer(nCalcFlct); + + switch (X->Def.iCalcModel) { + case HubbardGC: + expec_energy_flct_HubbardGC(X, nstate, tmp_v0); + break; + case KondoGC: + case Hubbard: + case Kondo: + expec_energy_flct_Hubbard(X, nstate, tmp_v0); + break; + + case SpinGC: + if (X->Def.iFlgGeneralSpin == FALSE) { + expec_energy_flct_HalfSpinGC(X, nstate, tmp_v0); + } + else {//for generalspin + expec_energy_flct_GeneralSpinGC(X, nstate, tmp_v0); + } + break;/*case SpinGC*/ + /* SpinGCBoost */ + case Spin: + /* + if(X->Def.iFlgGeneralSpin == FALSE){ + expec_energy_flct_HalfSpin(X); + } + else{ + expec_energy_flct_GeneralSpin(X); + } + */ + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = X->Def.NsiteMPI; + X->Phys.num2[istate] = X->Def.NsiteMPI*X->Def.NsiteMPI; + X->Phys.Sz[istate] = 0.5 * (double)X->Def.Total2SzMPI; + X->Phys.Sz2[istate] = X->Phys.Sz[istate] * X->Phys.Sz[istate]; + } + break; + default: + return -1; + } + + StopTimer(nCalcFlct); + +#pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) + for (i = 1; i <= i_max; i++) { + for (istate = 0; istate < nstate; istate++) { + tmp_v1[i][istate] = tmp_v0[i][istate]; + tmp_v0[i][istate] = 0.0; + } + } + + int nCalcExpec; + if (X->Def.iCalcType == TPQCalc) { + nCalcExpec = 3202; + } + else {//For FullDiag + nCalcExpec = 5302; + } + StartTimer(nCalcExpec); + mltply(X, nstate, tmp_v0, tmp_v1); // v0+=H*v1 + StopTimer(nCalcExpec); + /* switch -> SpinGCBoost */ + + for (istate = 0; istate < nstate; istate++) { + X->Phys.energy[istate] = 0.0; + X->Phys.var[istate] = 0.0; + } +#pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) { + X->Phys.energy[istate] += conj(tmp_v1[j][istate])*tmp_v0[j][istate]; // E = = + X->Phys.var[istate] += conj(tmp_v0[j][istate])*tmp_v0[j][istate]; // E^2 = = + } + } + SumMPI_dv(nstate, X->Phys.energy); + SumMPI_dv(nstate, X->Phys.var); + + switch (X->Def.iCalcType) { + case TPQCalc: + case TimeEvolution: +#ifdef _DEBUG + fprintf(stdoutMPI, "%s", cLogExpecEnergyEnd); + TimeKeeperWithStep(X, cFileNameTimeKeep, cTPQExpecEnd, "a", step_i); +#endif + break; + default: + break; + } + return 0; +} diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index 94199adef..bbc46099d 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -33,51 +33,6 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) * */ -/** - * @brief Parent function of calculation of total spin - * - * @param[in,out] X data list of calculation parameters - * @param[in] vec eigenvector - * - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * @retval 0 calculation is normally finished - */ -int expec_totalspin -( - struct BindStruct *X, - int nstate, - double complex **vec -) -{ - int istate; - - X->Large.mode = M_TOTALS; - switch (X->Def.iCalcModel) { - case Spin: - totalspin_Spin(X, nstate, vec); - for (istate = 0; istate < nstate; istate++) - X->Phys.Sz[istate] = X->Def.Total2SzMPI / 2.; - break; - case SpinGC: - totalspin_SpinGC(X, nstate, vec); - break; - case Hubbard: - case Kondo: - totalspin_Hubbard(X, nstate, vec); - break; - case HubbardGC: - case KondoGC: - totalspin_HubbardGC(X, nstate, vec); - break; - default: - for (istate = 0; istate < nstate; istate++) { - X->Phys.s2[istate] = 0.0; - X->Phys.Sz[istate] = 0.0; - } - } - return 0; -} /** * @brief function of calculating totalspin for Hubbard model * @@ -694,3 +649,48 @@ shared(vec) SumMPI_dv(nstate, X->Phys.s2); SumMPI_dv(nstate, X->Phys.Sz); } +/** + * @brief Parent function of calculation of total spin + * + * @param[in,out] X data list of calculation parameters + * @param[in] vec eigenvector + * + * @author Takahiro Misawa (The University of Tokyo) + * @author Kazuyoshi Yoshimi (The University of Tokyo) + * @retval 0 calculation is normally finished + */ +int expec_totalspin +( + struct BindStruct *X, + int nstate, + double complex **vec +) +{ + int istate; + + X->Large.mode = M_TOTALS; + switch (X->Def.iCalcModel) { + case Spin: + totalspin_Spin(X, nstate, vec); + for (istate = 0; istate < nstate; istate++) + X->Phys.Sz[istate] = X->Def.Total2SzMPI / 2.; + break; + case SpinGC: + totalspin_SpinGC(X, nstate, vec); + break; + case Hubbard: + case Kondo: + totalspin_Hubbard(X, nstate, vec); + break; + case HubbardGC: + case KondoGC: + totalspin_HubbardGC(X, nstate, vec); + break; + default: + for (istate = 0; istate < nstate; istate++) { + X->Phys.s2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + } + } + return 0; +} diff --git a/src/include/CalcSpectrum.h b/src/include/CalcSpectrum.h index 166bd4191..e63785ab4 100644 --- a/src/include/CalcSpectrum.h +++ b/src/include/CalcSpectrum.h @@ -20,13 +20,6 @@ int CalcSpectrum( struct EDMainCalStruct *X ); -int GetExcitedState( - struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1 -); - - int MakeExcitedList( struct BindStruct *X, int *iFlgListModifed diff --git a/src/include/mltplyMPIHubbardCore.h b/src/include/mltplyMPIHubbardCore.h index 249caf6a5..210e772d8 100644 --- a/src/include/mltplyMPIHubbardCore.h +++ b/src/include/mltplyMPIHubbardCore.h @@ -229,7 +229,7 @@ void X_child_CisAjt_MPIdouble struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1, - double complex *v1buf, + double complex **v1buf, long unsigned int *list_1_org, long unsigned int *list_1buf_org, long unsigned int *list_2_1_target, @@ -246,7 +246,7 @@ void X_child_CisAjt_MPIsingle struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1, - double complex *v1buf, + double complex **v1buf, long unsigned int *list_1_org, long unsigned int *list_1buf_org, long unsigned int *list_2_1_target, diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index fa6826a84..8bcd5bb9f 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -208,7 +208,7 @@ void X_GC_child_general_hopp_MPIsingle( trans = -(double) Fsgn * conj(tmp_trans); if (X->Large.mode == M_CORR|| X->Large.mode == M_CALCSPEC) trans = 0; }/*if (state2 != mask2)*/ - else return 0; + else return; bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; @@ -368,7 +368,7 @@ void X_child_general_hopp_MPIsingle( trans = 0; } } - else return 0; + else return; bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; @@ -428,7 +428,7 @@ void X_child_CisAjt_MPIsingle( SgnBit((unsigned long int) (origin & bit2diff), &Fsgn); // Fermion sign idim_max_buf = SendRecv_i(origin, X->Check.idim_maxOrg); - SendRecv_iv(origin, X->Check.idim_maxOrg + 1, list_1buf_org + 1, list_1_org, list_1buf_org); + SendRecv_iv(origin, X->Check.idim_maxOrg + 1, idim_max_buf + 1, list_1_org, list_1buf_org); SendRecv_cv(origin, X->Check.idim_maxOrg*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); /* Index in the intra PE @@ -442,7 +442,7 @@ void X_child_CisAjt_MPIsingle( state1check = mask1; trans = -(double) Fsgn * conj(tmp_trans); } - else return 0; + else return; bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index 57d3c2b1f..fffe7d40c 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -83,7 +83,7 @@ void X_child_general_int_spin_MPIdouble( Jint = 0; } } - else return 0; + else return; idim_max_buf = SendRecv_i(origin, X->Check.idim_max); SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); @@ -121,7 +121,7 @@ void X_child_general_int_spin_TotalS_MPIdouble( num2_up = (origin & mask2) / mask2; ibit_tmp = (num1_up) ^ (num2_up); - if (ibit_tmp == 0) return 0; + if (ibit_tmp == 0) return; idim_max_buf = SendRecv_i(origin, X->Check.idim_max); SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); @@ -196,7 +196,7 @@ void X_child_general_int_spin_MPIsingle( Jint = 0; } } - else return 0; + else return; idim_max_buf = SendRecv_i(origin, X->Check.idim_max); SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 140c4e372..3a6da09a0 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -105,9 +105,8 @@ void X_GC_child_CisAitCiuAiv_spin_MPIdouble( double complex **tmp_v1//!<[in] Vector to be producted ) { int mask1, mask2, state1, state2, ierr, origin; - unsigned long int idim_max_buf, j, nstatedim; + unsigned long int idim_max_buf, j; double complex Jint; - int one = 1; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; @@ -143,8 +142,7 @@ void X_GC_child_CisAitCiuAiv_spin_MPIdouble( idim_max_buf = SendRecv_i(origin, X->Check.idim_max); SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); - nstatedim = nstate * idim_max_buf; - zaxpy_(&nstatedim, &Jint, &v1buf[1][0], &one, &tmp_v0[1][0], &one); + zaxpy_long(nstate * idim_max_buf, Jint, &v1buf[1][0], &tmp_v0[1][0]); }/*void GC_child_CisAitCiuAiv_spin_MPIdouble*/ /** @brief Wrapper for calculating CisAisCjuAjv term in Spin model + GC From 0b9cfb170fdfbe922e4d194cbcdcbcc44f56ea78 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sat, 9 Mar 2019 01:08:42 +0900 Subject: [PATCH 10/50] Backup --- src/CalcByLOBPCG.c | 22 +++++++++++++++------- src/common/setmemory.c | 8 ++++---- src/expec_cisajscktaltdc.c | 2 +- src/mltply.c | 4 +++- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index 5236fa34f..9e4e02bd3 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -31,6 +31,13 @@ localy optimal block (preconditioned) conjugate gradient method. #include "mltplyCommon.h" #include "./common/setmemory.h" +void debug_print(int num, double complex *var){ + int i; + for (i=0;iDef.k_exct, 3, X->Def.k_exct); i_max = X->Check.idim_max; + i4_max = (int)i_max; free(v0); free(v1); @@ -495,9 +503,9 @@ private(idim,precon,ie) */ for (ii = 0; ii < 3; ii++) { for (jj = 0; jj < 3; jj++) { - zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i_max, &one, + zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i4_max, &one, &wxp[ii][1][0], &X->Def.k_exct, &wxp[jj][1][0], &X->Def.k_exct, &zero, &ovlp[jj][0][ii][0], &nsub); - zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i_max, &one, + zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i4_max, &one, &wxp[ii][1][0], &X->Def.k_exct, &hwxp[jj][1][0], &X->Def.k_exct, &zero, &hsub[jj][0][ii][0], &nsub); } } @@ -523,7 +531,7 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii++) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) @@ -534,7 +542,7 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii++) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) @@ -545,7 +553,7 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii += 2) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) @@ -556,7 +564,7 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii += 2) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i_max, &X->Def.k_exct, &one, + zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) diff --git a/src/common/setmemory.c b/src/common/setmemory.c index b0367038e..d2dfc87f2 100644 --- a/src/common/setmemory.c +++ b/src/common/setmemory.c @@ -304,16 +304,16 @@ void free_cd_3d_allocate(double complex***A){ double complex****cd_4d_allocate(const long unsigned int N, const long unsigned int M, const long unsigned int L, const long unsigned int K) { long unsigned int int_i, int_j, int_k; double complex****A; - A = (double complex****)calloc((N), sizeof(double complex**)); - A[0] = (double complex***)calloc((M*N), sizeof(double complex*)); - A[0][0] = (double complex**)calloc((L*M*N), sizeof(double complex)); + A = (double complex****)calloc((N), sizeof(double complex***)); + A[0] = (double complex***)calloc((M*N), sizeof(double complex**)); + A[0][0] = (double complex**)calloc((L*M*N), sizeof(double complex*)); A[0][0][0] = (double complex*)calloc((K*L*M*N), sizeof(double complex)); for (int_i = 0; int_i < N; int_i++) { A[int_i] = A[0] + int_i * M; for (int_j = 0; int_j < M; int_j++) { A[int_i][int_j] = A[0][0] + int_i * M*L + int_j * L; for (int_k = 0; int_k < L; int_k++) { - A[int_i][int_j][int_k] = A[0][0][0] + int_i * M*L*K + int_j * L*M + int_k * L; + A[int_i][int_j][int_k] = A[0][0][0] + int_i * M*L*K + int_j * L*K + int_k * K; } } } diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index 8b41edefc..69e2071bc 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -1008,7 +1008,7 @@ int expec_cisajscktaltdc } //Make File Name for output - prod = cd_2d_allocate(X->Def.NCisAjt, nstate); + prod = cd_2d_allocate(X->Def.NCisAjtCkuAlvDC, nstate); switch (X->Def.iCalcType) { case TPQCalc: step = X->Def.istep; diff --git a/src/mltply.c b/src/mltply.c index 0bd602886..f4ce00873 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -59,6 +59,7 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp long unsigned int irght=0; long unsigned int ilft=0; long unsigned int ihfbit=0; + double complex dmv; long unsigned int i_max; @@ -95,7 +96,8 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp StartTimer(100); #pragma omp parallel for default(none) firstprivate(i_max) shared(tmp_v0, tmp_v1, list_Diagonal) for (j = 1; j <= i_max; j++) { - zaxpy_(&nstate, &list_Diagonal[j], &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); + dmv = list_Diagonal[j]; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); } StopTimer(100); if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, nstate, &tmp_v0[0][0], &tmp_v1[0][0]); From 381d8a05dbbfa77ca19facd8db46dc143be824c7 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Mon, 11 Mar 2019 00:08:03 +0900 Subject: [PATCH 11/50] Backup --- src/CalcByLOBPCG.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index 9e4e02bd3..40ca6a50e 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -434,7 +434,7 @@ int LOBPCG_Main( #pragma omp parallel for default(none) shared(i_max,wxp,hwxp,eig,X) private(idim,ie) for (idim = 1; idim <= i_max; idim++) { for (ie = 0; ie < X->Def.k_exct; ie++) { - wxp[0][ie][idim] = hwxp[1][idim][ie] - eig[ie] * wxp[1][idim][ie]; + wxp[0][idim][ie] = hwxp[1][idim][ie] - eig[ie] * wxp[1][idim][ie]; } } NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm); From c02853436eaec194ca5077c25b15fdb1ed496fab Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Mon, 11 Mar 2019 14:45:10 +0900 Subject: [PATCH 12/50] Backup --- src/CalcByFullDiag.c | 6 +++--- src/CalcByLOBPCG.c | 8 ++++---- src/expec_cisajs.c | 2 +- src/expec_cisajscktaltdc.c | 4 ++-- src/lapack_diag.c | 4 ++-- src/mltply.c | 2 +- src/xsetmem.c | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/CalcByFullDiag.c b/src/CalcByFullDiag.c index 37867fd45..6b9587150 100644 --- a/src/CalcByFullDiag.c +++ b/src/CalcByFullDiag.c @@ -35,9 +35,9 @@ int CalcByFullDiag( fprintf(stdoutMPI, "%s", cLogFullDiag_SetHam_Start); StartTimer(5100); if(X->Bind.Def.iInputHam==FALSE){ - zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v0[0][0]); - zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v1[0][0]); - for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) v1[idim][idim] = 1.0; + zclear((X->Bind.Check.idim_max + 1)*X->Bind.Check.idim_max, &v0[0][0]); + zclear((X->Bind.Check.idim_max + 1)*X->Bind.Check.idim_max, &v1[0][0]); + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) v1[idim][idim-1] = 1.0; mltply(&(X->Bind), X->Bind.Check.idim_max, v0, v1); } else if(X->Bind.Def.iInputHam==TRUE){ diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index 40ca6a50e..f86d90255 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -321,7 +321,7 @@ static void Output_restart( //TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i); fprintf(stdoutMPI, "%s", cLogOutputVecStart); - vout = cd_1d_allocate(X->Check.idim_max); + vout = cd_1d_allocate(X->Check.idim_max + 1); for (ie = 0; ie < X->Def.k_exct; ie++) { sprintf(sdt, cFileNameOutputVector, ie, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) exitMPI(-1); @@ -616,13 +616,13 @@ private(idim,precon,ie)
      • Just Move wxp[1] into ::v1. The latter must be start from 0-index (the same as FullDiag)
      */ - v1 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); + v0 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); #pragma omp parallel for default(none) shared(i_max,wxp,v1,X) private(idim,ie) for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) - v1[idim][ie] = wxp[1][idim][ie]; + v0[idim][ie] = wxp[1][idim][ie]; free_cd_3d_allocate(wxp); - v0 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); + v1 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); if (iconv != 0) { sprintf(sdt, "%s", cLogLanczos_EigenValueNotConverged); diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index 2ff5f7877..5f4e2cdd9 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -636,7 +636,7 @@ int expec_cisajs( } for (istate = 0; istate < nstate; istate++) { - switch (X->Def.iCalcModel) { + switch (X->Def.iCalcType) { case TPQCalc: step = X->Def.istep; sprintf(sdt, cFileName1BGreen_TPQ, X->Def.CDataFileHead, istate, step); diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index 69e2071bc..26d8880af 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -1055,7 +1055,7 @@ int expec_cisajscktaltdc } for (istate = 0; istate < nstate; istate++) { - switch (X->Def.iCalcModel) { + switch (X->Def.iCalcType) { case TPQCalc: step = X->Def.istep; sprintf(sdt, cFileName2BGreen_TPQ, X->Def.CDataFileHead, istate, step); @@ -1070,7 +1070,7 @@ int expec_cisajscktaltdc break; } if (childfopenMPI(sdt, "w", &fp) == 0) { - for (icaca = 0; icaca < X->Def.NCisAjt; icaca++) { + for (icaca = 0; icaca < X->Def.NCisAjtCkuAlvDC; icaca++) { fprintf(fp, " %4d %4d %4d %4d %4d %4d %4d %4d %.10lf %.10lf\n", X->Def.CisAjtCkuAlvDC[icaca][0], X->Def.CisAjtCkuAlvDC[icaca][1], X->Def.CisAjtCkuAlvDC[icaca][2], X->Def.CisAjtCkuAlvDC[icaca][3], diff --git a/src/lapack_diag.c b/src/lapack_diag.c index 3c82dd5b4..7b6fc782a 100644 --- a/src/lapack_diag.c +++ b/src/lapack_diag.c @@ -101,9 +101,9 @@ struct BindStruct *X//!<[inout] ZHEEVall(xMsize, v0, X->Phys.energy, v1); #endif } - for (i = i_max; i > 0; i--) { + for (i = 0; i < i_max; i++) { for (j = 0; j < i_max; j++) { - v1[i][j] = v1[i - 1][j]; + v0[i + 1][j] = v1[i][j]; } } diff --git a/src/mltply.c b/src/mltply.c index f4ce00873..00309c159 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -142,7 +142,7 @@ void zaxpy_long( #pragma omp parallel for default(none) private(i) shared(n, a, x, y) for (i = 0; i < n; i++) - y[i] += a * x[i] + y[i]; + y[i] += a * x[i]; } /** @brief Wrapper of zswap. diff --git a/src/xsetmem.c b/src/xsetmem.c index 58b5895ff..b6b0dae22 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -180,7 +180,7 @@ int setmem_large list_Diagonal = d_1d_allocate(X->Check.idim_max + 1); if (X->Def.iCalcType == FullDiag) { - nstate = X->Check.idim_max + 1; + nstate = X->Check.idim_max; } else if (X->Def.iCalcType == CG) { nstate = X->Def.k_exct; From 04bd7e14d0d8d219214b42973ccc9ecd17c60a15 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Mon, 11 Mar 2019 18:30:46 +0900 Subject: [PATCH 13/50] Backup --- src/CalcByLOBPCG.c | 2 +- src/CalcSpectrum.c | 6 +++--- src/CalcSpectrumByBiCG.c | 3 +-- src/check.c | 23 ++++------------------- src/include/CalcSpectrumByBiCG.h | 4 ++-- src/mltplyMPIHubbard.c | 6 +++--- src/mltplyMPISpinCore.c | 7 ++----- 7 files changed, 16 insertions(+), 35 deletions(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index f86d90255..4bb4d1f0a 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -758,7 +758,7 @@ int CalcByLOBPCG( if (X->Bind.Def.iOutputEigenVec == TRUE) { TimeKeeper(&(X->Bind), cFileNameTimeKeep, cOutputEigenVecStart, "a"); - vin = cd_1d_allocate(X->Bind.Check.idim_max); + vin = cd_1d_allocate(X->Bind.Check.idim_max + 1); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { #pragma omp parallel for default(none) shared(X,v1,ie,vin) private(idim) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index fa0b867eb..d1376095a 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -212,8 +212,8 @@ int CalcSpectrum( StopTimer(6101); if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); - for (i = 0; i <= X->Bind.Check.idim_max; i++) { - v0[i] = 0; + for (i = 1; i <= X->Bind.Check.idim_max; i++) { + v0[i][0] = 0; } fprintf(stdoutMPI, " End: An Input vector is inputted in CalcSpectrum.\n\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorEnd, "a"); @@ -278,7 +278,7 @@ int CalcSpectrum( StartTimer(6200); switch (X->Bind.Def.iCalcType) { case CG: - iret = CalcSpectrumByBiCG(X, &v0[0][0], &v1[0][0], Nomega, dcSpectrum, dcomega); + iret = CalcSpectrumByBiCG(X, v0, v1, Nomega, dcSpectrum, dcomega); if (iret != TRUE) { return FALSE; } diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index d23b4fe48..5ae7dc490 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -226,7 +226,7 @@ int CalcSpectrumByBiCG(
    • Malloc vector for old residual vector (@f${\bf r}_{\rm old}@f$) and old shadow residual vector (@f${\bf {\tilde r}}_{\rm old}@f$).
    • */ - v12 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); + v12 = cd_2d_allocate(X->Bind.Check.idim_max + 2, 1); v14 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); v4 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); resz = d_1d_allocate(Nomega); @@ -323,7 +323,6 @@ int CalcSpectrumByBiCG( } fprintf(fp, "\n"); } - fprintf(stdoutMPI, " %9d %9d %8d %25.15e\n", abs(status[0]), status[1], status[2], creal(v12[1][0])); if (status[0] < 0) break; }/*for (stp = 0; stp <= X->Bind.Def.Lanczos_max; stp++)*/ diff --git a/src/check.c b/src/check.c index f4330e0ed..56363efa2 100644 --- a/src/check.c +++ b/src/check.c @@ -208,21 +208,6 @@ int check(struct BindStruct *X){ X->Check.idim_max = comb_sum; switch(X->Def.iCalcType) { - case Lanczos: - switch (X->Def.iCalcModel) { - case Hubbard: - case HubbardNConserved: - case Kondo: - case KondoGC: - case Spin: - X->Check.max_mem = 5.5 * X->Check.idim_max * 8.0 / (pow(10, 9)); - break; - case HubbardGC: - case SpinGC: - X->Check.max_mem = 4.5 * X->Check.idim_max * 8.0 / (pow(10, 9)); - break; - } - break; case CG: switch (X->Def.iCalcModel) { case Hubbard: @@ -230,11 +215,11 @@ int check(struct BindStruct *X){ case Kondo: case KondoGC: case Spin: - X->Check.max_mem = (6 * X->Def.k_exct + 2) * X->Check.idim_max * 16.0 / (pow(10, 9)); + X->Check.max_mem = (7 * X->Def.k_exct + 1.5) * X->Check.idim_max * 16.0 / (pow(10, 9)); break; case HubbardGC: case SpinGC: - X->Check.max_mem = (6 * X->Def.k_exct + 1.5) * X->Check.idim_max * 16.0 / (pow(10, 9)); + X->Check.max_mem = (7 * X->Def.k_exct + 1.0) * X->Check.idim_max * 16.0 / (pow(10, 9)); break; } break; @@ -246,7 +231,7 @@ int check(struct BindStruct *X){ case KondoGC: case Spin: if (X->Def.iFlgCalcSpec != CALCSPEC_NOT) { - X->Check.max_mem = (2) * X->Check.idim_max * 16.0 / (pow(10, 9)); + X->Check.max_mem = NumAve * 3 * X->Check.idim_max * 16.0 / (pow(10, 9)); } else { X->Check.max_mem = 4.5 * X->Check.idim_max * 16.0 / (pow(10, 9)); } @@ -254,7 +239,7 @@ int check(struct BindStruct *X){ case HubbardGC: case SpinGC: if (X->Def.iFlgCalcSpec != CALCSPEC_NOT) { - X->Check.max_mem = (2) * X->Check.idim_max * 16.0 / (pow(10, 9)); + X->Check.max_mem = NumAve * 3 * X->Check.idim_max * 16.0 / (pow(10, 9)); } else { X->Check.max_mem = 3.5 * X->Check.idim_max * 16.0 / (pow(10, 9)); } diff --git a/src/include/CalcSpectrumByBiCG.h b/src/include/CalcSpectrumByBiCG.h index d3de4ab14..2a0fe6ae5 100644 --- a/src/include/CalcSpectrumByBiCG.h +++ b/src/include/CalcSpectrumByBiCG.h @@ -18,8 +18,8 @@ int CalcSpectrumByBiCG( struct EDMainCalStruct *X, - double complex *vrhs, - double complex *v2, + double complex **vrhs, + double complex **v2, int Nomega, double complex *dcSpectrum, double complex *dcomega diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index 8bcd5bb9f..eae8bf898 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -265,7 +265,7 @@ void X_child_general_hopp_MPIdouble( ) { int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j, ioff; - double complex trans, dmv; + double complex trans; int one = 1; mask1 = (int) X->Def.Tpow[2 * org_isite1 + org_ispin1]; @@ -293,14 +293,14 @@ void X_child_general_hopp_MPIdouble( SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j, dmv, Fsgn, ioff) \ +#pragma omp parallel default(none) private(j, Fsgn, ioff) \ firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); - zaxpy_(&nstate, &dmv, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*End of parallel region*/ }/*void child_general_hopp_MPIdouble*/ diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 3a6da09a0..56e1f5a56 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -877,10 +877,7 @@ void X_child_CisAit_GeneralSpin_MPIdouble( origin = (int) off; idim_max_buf = SendRecv_i(origin, idim_max); - if(ierr != 0) exitMPI(-1); - SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org); - SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel for default(none)\ @@ -1408,8 +1405,8 @@ void X_GC_child_CisAis_spin_MPIdouble( int mask1, ibit1; mask1 = (int)X->Def.Tpow[org_isite1]; ibit1 = (((unsigned long int)myrank& mask1)/mask1)^(1-org_ispin1); - - zaxpy_long(X->Check.idim_max*nstate, tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); + if (ibit1 != 0) + zaxpy_long(X->Check.idim_max*nstate, tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); }/*double complex X_GC_child_CisAis_spin_MPIdouble*/ /** @brief Hopping term in Spin + GC From 55dd4fddf529a4386eebd679eb801bb22c16ac9c Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Fri, 15 Mar 2019 01:11:06 +0900 Subject: [PATCH 14/50] backup --- src/CalcByLOBPCG.c | 6 +- src/Multiply.c | 9 +- src/PairExHubbard.c | 21 +- src/PairExSpin.c | 52 +-- src/SingleExHubbard.c | 18 +- src/expec_cisajs.c | 63 +--- src/expec_cisajscktaltdc.c | 57 ++-- src/expec_energy_flct.c | 645 ++++++++++++++++++++++--------------- src/expec_totalspin.c | 43 --- src/mltply.c | 3 +- src/mltplyHubbard.c | 26 +- src/mltplyMPIHubbard.c | 24 +- src/mltplyMPIHubbardCore.c | 53 +-- src/mltplyMPISpin.c | 7 +- src/mltplyMPISpinCore.c | 33 +- src/mltplySpin.c | 29 +- test/CMakeLists.txt | 56 ---- 17 files changed, 596 insertions(+), 549 deletions(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index 4bb4d1f0a..f6b7d6170 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -297,7 +297,7 @@ static void Initialize_wave( dnorm = d_1d_allocate(X->Def.k_exct); NormMPI_dv(i_max, X->Def.k_exct, wave, dnorm); -#pragma omp parallel for default(none) shared(i_max,wave,dnorm,ie,X) private(idim) +#pragma omp parallel for default(none) shared(i_max,wave,dnorm,X) private(idim,ie) for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) wave[idim][ie] /= dnorm[ie]; free_d_1d_allocate(dnorm); @@ -462,7 +462,7 @@ private(idim,precon,ie)
    • Normalize residual vector: @f${\bf w}={\bf w}/|w|@f$ */ NormMPI_dv(i_max, X->Def.k_exct, wxp[0], dnorm); -#pragma omp parallel for default(none) shared(i_max,wxp,dnorm,ie,X) private(idim) +#pragma omp parallel for default(none) shared(i_max,wxp,dnorm,X) private(idim,ie) for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) wxp[0][idim][ie] /= dnorm[ie]; @@ -617,7 +617,7 @@ private(idim,precon,ie)
    */ v0 = cd_2d_allocate(X->Check.idim_max + 1, X->Def.k_exct); -#pragma omp parallel for default(none) shared(i_max,wxp,v1,X) private(idim,ie) +#pragma omp parallel for default(none) shared(i_max,wxp,v0,X) private(idim,ie) for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) v0[idim][ie] = wxp[1][idim][ie]; diff --git a/src/Multiply.c b/src/Multiply.c index 49858465a..96fedbc6d 100644 --- a/src/Multiply.c +++ b/src/Multiply.c @@ -45,23 +45,22 @@ int Multiply ) { long int i, i_max; - double complex dnorm; double Ns; int rand_i; i_max = X->Check.idim_max; Ns = 1.0*X->Def.NsiteMPI; // mltply is in expec_energy.c v0=H*v1 - for (rand_i = 0; rand_i < NumAve; rand_i++)dnorm = 0.0; -#pragma omp parallel for default(none) reduction(+: dnorm) private(i) \ -shared(v0, v1) firstprivate(i_max, Ns, LargeValue) +#pragma omp parallel for default(none) private(i,rand_i) \ + shared(v0, v1,NumAve) firstprivate(i_max, Ns, LargeValue) for (i = 1; i <= i_max; i++) { for (rand_i = 0; rand_i < NumAve; rand_i++) { v0[i][rand_i] = LargeValue * v1[i][rand_i] - v0[i][rand_i] / Ns; //v0=(l-H/Ns)*v1 } } NormMPI_dv(i_max, NumAve, v0, global_norm); -#pragma omp parallel for default(none) private(i) shared(v0) firstprivate(i_max, dnorm) +#pragma omp parallel for default(none) private(i,rand_i) \ +shared(v0,NumAve,global_norm) firstprivate(i_max) for (i = 1; i <= i_max; i++) for (rand_i = 0; rand_i < NumAve; rand_i++) v0[i][rand_i] = v0[i][rand_i] / global_norm[rand_i]; diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index 61b024048..0897e8ac5 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -104,7 +104,8 @@ int GetPairExcitedStateHubbardGC( if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && X->Def.PairExcitationOperator[i][4] == 0) { isite1 = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; -#pragma omp parallel for default(none) private(j) firstprivate(i_max,X,isite1, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j) \ +firstprivate(i_max,X,isite1, tmp_trans) shared(tmp_v0,tmp_v1,nstate) for (j = 1; j <= i_max; j++) { GC_AisCis(j, nstate, tmp_v0, tmp_v1, X, isite1, -tmp_trans); } @@ -200,9 +201,9 @@ int GetPairExcitedStateHubbard( } } else { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,stdoutMPI) \ -firstprivate(i_max, tmp_trans, Asum, Adiff, ibitsite1, ibitsite2, X, list_1_org, list_1, myrank) \ -private(j, tmp_sgn, tmp_off) +#pragma omp parallel for default(none) shared(tmp_v0,tmp_v1,one,nstate) \ +firstprivate(i_max,tmp_trans,Asum,Adiff,ibitsite1,ibitsite2,X,list_1_org,list_1,myrank) \ + private(j,tmp_sgn,tmp_off,dmv) for (j = 1; j <= i_max; j++) { tmp_sgn = X_CisAjt(list_1_org[j], X, ibitsite1, ibitsite2, Asum, Adiff, &tmp_off); dmv = tmp_trans * tmp_sgn; @@ -219,8 +220,8 @@ private(j, tmp_sgn, tmp_off) if (X->Def.PairExcitationOperator[i][4] == 0) { if (ibit != is) { dmv = -tmp_trans; -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_trans) private(j) +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,one,dmv,nstate) \ +firstprivate(i_max, tmp_trans) private(j) for (j = 1; j <= i_max; j++) { zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } @@ -254,8 +255,8 @@ private(j, tmp_sgn, tmp_off) if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; if (X->Def.PairExcitationOperator[i][4] == 0) { -#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) \ -firstprivate(i_max, is, tmp_trans) private(num1, ibit) +#pragma omp parallel for default(none) shared(list_1,nstate,tmp_v0,tmp_v1,one) \ +firstprivate(i_max,is,tmp_trans) private(num1,ibit,dmv) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; num1 = (1 - ibit / is); @@ -264,8 +265,8 @@ firstprivate(i_max, is, tmp_trans) private(num1, ibit) } } else { -#pragma omp parallel for default(none) shared(list_1, nstate, tmp_v0, tmp_v1) \ -firstprivate(i_max, is, tmp_trans) private(num1, ibit) +#pragma omp parallel for default(none) shared(list_1,nstate,tmp_v0,tmp_v1,one) \ + firstprivate(i_max,is,tmp_trans) private(num1,ibit,dmv) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; num1 = ibit / is; diff --git a/src/PairExSpin.c b/src/PairExSpin.c index 6caa634d2..1e569a0c7 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -99,7 +99,8 @@ int GetPairExcitedStateHalfSpinGC( if (org_sigma1 == org_sigma2) { if (X->Def.PairExcitationOperator[i][4] == 0) { // longitudinal magnetic field -#pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j, tmp_sgn,dmv) \ + firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(one,nstate,tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { dmv = (1.0 - X_SpinGC_CisAis(j, X, isite1, org_sigma1))* (-tmp_trans); zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); @@ -107,7 +108,8 @@ int GetPairExcitedStateHalfSpinGC( } else { // longitudinal magnetic field -#pragma omp parallel for default(none) private(j, tmp_sgn) firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j, tmp_sgn,dmv) \ + firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { dmv = X_SpinGC_CisAis(j, X, isite1, org_sigma1)* tmp_trans; zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); @@ -117,7 +119,8 @@ int GetPairExcitedStateHalfSpinGC( else { // transverse magnetic field // fprintf(stdoutMPI, "Debug: isite1=%d, org_sigma2=%d\n", isite1, org_sigma2); -#pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off) firstprivate(i_max, isite1, org_sigma2, X, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off,dmv) \ + firstprivate(i_max, isite1, org_sigma2, X, tmp_trans) shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = X_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); if (tmp_sgn != 0) { @@ -185,7 +188,8 @@ int GetPairExcitedStateGeneralSpinGC( if (org_sigma1 == org_sigma2) { if (X->Def.PairExcitationOperator[i][4] == 0) { // longitudinal magnetic field -#pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,num1,dmv) \ +firstprivate(i_max,org_isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = -tmp_trans * (1.0 - num1); @@ -194,7 +198,8 @@ int GetPairExcitedStateGeneralSpinGC( } else { // longitudinal magnetic field -#pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,num1,dmv) \ + firstprivate(i_max,org_isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = tmp_trans * num1; @@ -204,7 +209,9 @@ int GetPairExcitedStateGeneralSpinGC( } else { // transverse magnetic field -#pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,num1,dmv) \ +firstprivate(i_max,org_isite1,org_sigma1,org_sigma2,X,tmp_off,tmp_trans) \ +shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin(j - 1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { @@ -294,7 +301,7 @@ int GetPairExcitedStateHalfSpin( if (X->Def.PairExcitationOperator[i][4] == 0) { if (ibit1 == 0) { dmv = -tmp_trans; -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,one,nstate,dmv) \ firstprivate(i_max, tmp_trans) private(j) for (j = 1; j <= i_max; j++) { zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); @@ -303,7 +310,7 @@ int GetPairExcitedStateHalfSpin( } else { if (ibit1 != 0) { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,one,nstate) \ firstprivate(i_max, tmp_trans) private(j) for (j = 1; j <= i_max; j++) zaxpy_(&nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); @@ -314,14 +321,16 @@ int GetPairExcitedStateHalfSpin( isite1 = X->Def.Tpow[org_isite1 - 1]; if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && X->Def.PairExcitationOperator[i][4] == 0) { -#pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,dmv) \ +firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { dmv = (1.0 - X_Spin_CisAis(j, X, isite1, org_sigma1)) * (-tmp_trans); zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } else { -#pragma omp parallel for default(none) private(j) firstprivate(i_max, isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,dmv) \ +firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { dmv = X_Spin_CisAis(j, X, isite1, org_sigma1) * tmp_trans; zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); @@ -343,9 +352,9 @@ int GetPairExcitedStateHalfSpin( } else { isite1 = X->Def.Tpow[org_isite1 - 1]; -#pragma omp parallel for default(none) private(j, tmp_off, num1) \ -firstprivate(i_max, isite1, org_sigma2, X, tmp_trans, list_1_org, list_1, list_2_1, list_2_2) \ -shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,tmp_off,num1,dmv) \ +firstprivate(i_max,isite1,org_sigma2,X,tmp_trans,list_1_org,list_1,list_2_1,list_2_2) \ +shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { num1 = X_Spin_CisAit(j, X, isite1, org_sigma2, list_1_org, list_2_1, list_2_2, &tmp_off); if (num1 != 0) { @@ -406,7 +415,9 @@ int GetPairExcitedStateGeneralSpin( org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); if (X->Def.PairExcitationOperator[i][4] == 0) { if (num1 == 0) { -#pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,dmv) \ + firstprivate(i_max, tmp_trans) \ + shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { dmv = -tmp_trans; zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); @@ -415,7 +426,8 @@ int GetPairExcitedStateGeneralSpin( } else { if (num1 != 0) { -#pragma omp parallel for default(none) private(j) firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j) \ + firstprivate(i_max, tmp_trans) shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { zaxpy_(&nstate, &tmp_trans, tmp_v1[j], &one, tmp_v0[j], &one); } @@ -432,7 +444,9 @@ int GetPairExcitedStateGeneralSpin( if (org_sigma1 == org_sigma2) { // longitudinal magnetic field if (X->Def.PairExcitationOperator[i][4] == 0) { -#pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1, list_1) +#pragma omp parallel for default(none) private(j, num1,dmv) \ + firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) \ + shared(tmp_v0, tmp_v1, list_1,one,nstate) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = -tmp_trans * (1.0 - num1); @@ -440,7 +454,9 @@ int GetPairExcitedStateGeneralSpin( } } else { -#pragma omp parallel for default(none) private(j, num1) firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) shared(tmp_v0, tmp_v1, list_1) +#pragma omp parallel for default(none) private(j, num1,dmv) \ + firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) \ + shared(tmp_v0, tmp_v1, list_1,one,nstate) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = tmp_trans * num1; @@ -451,7 +467,7 @@ int GetPairExcitedStateGeneralSpin( else {//org_sigma1 != org_sigma2 #pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off) \ firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X, off, tmp_trans, myrank) \ - shared(tmp_v0, tmp_v1, list_1_org, list_1) + shared(tmp_v0, tmp_v1, list_1_org, list_1,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = GetOffCompGeneralSpin(list_1_org[j], org_isite1, org_sigma2, org_sigma1, &off, X->Def.SiteToBit, X->Def.Tpow); diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index 3aac0fa73..59d25b072 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -70,8 +70,8 @@ int GetSingleExcitedStateHubbard( X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X, list_1_org,one) \ - firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin) \ +#pragma omp parallel for default(none) shared(nstate,tmp_v0, tmp_v1, X, list_1_org,one) \ +firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin) \ private(j, isgn,tmp_off,dmv) for (j = 1; j <= idim_max; j++) {//idim_max -> original dimension isgn = X_Cis(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, @@ -88,9 +88,9 @@ private(j, isgn,tmp_off,dmv) list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X, list_1_org, list_1) \ - firstprivate(idim_max, tmpphi, org_isite, ispin, list_2_1, list_2_2, is1_spin, myrank) \ -private(j, isgn, tmp_off) +#pragma omp parallel for default(none) shared(tmp_v0,tmp_v1,X,list_1_org,list_1,one,nstate) \ +firstprivate(idim_max,tmpphi,org_isite,ispin,list_2_1,list_2_2,is1_spin,myrank) \ +private(j, isgn, tmp_off,dmv) for (j = 1; j <= idim_max; j++) {//idim_max -> original dimension isgn = X_Ajt(j, is1_spin, &tmp_off, list_1_org, list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); @@ -149,8 +149,8 @@ int GetSingleExcitedStateHubbardGC( idim_max, tmp_v1bufOrg, X->Def.Tpow); } else { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X) \ - firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) +#pragma omp parallel for default(none) shared(tmp_v0,tmp_v1,X,nstate) \ +firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) for (j = 1; j <= idim_max; j++) { is1_spin = X->Def.Tpow[2 * org_isite + ispin]; GC_Cis(j, nstate, tmp_v0, tmp_v1, is1_spin, tmpphi, &tmp_off); @@ -163,8 +163,8 @@ int GetSingleExcitedStateHubbardGC( idim_max, tmp_v1bufOrg, X->Def.Tpow); } else { -#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, X) \ - firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) +#pragma omp parallel for default(none) shared(tmp_v0,tmp_v1,X,nstate) \ +firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) for (j = 1; j <= idim_max; j++) { is1_spin = X->Def.Tpow[2 * org_isite + ispin]; GC_Ajt(j, nstate, tmp_v0, tmp_v1, is1_spin, tmpphi, &tmp_off); diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index 5f4e2cdd9..1ce822f81 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -55,7 +55,6 @@ int expec_cisajs_HubbardGC( ){ long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - double complex dam_pr = 0; long int i_max; long int ibit; long unsigned int is; @@ -69,7 +68,6 @@ int expec_cisajs_HubbardGC( org_isite2 = X->Def.CisAjt[i][2] + 1; org_sigma1 = X->Def.CisAjt[i][1]; org_sigma2 = X->Def.CisAjt[i][3]; - dam_pr = 0; if (org_isite1 > X->Def.Nsite && org_isite2 > X->Def.Nsite) { if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { @@ -129,7 +127,6 @@ int expec_cisajs_Hubbard( ) { long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - double complex dam_pr = 0; long int i_max; int num1, one = 1; long int ibit; @@ -143,7 +140,6 @@ int expec_cisajs_Hubbard( org_isite2 = X->Def.CisAjt[i][2] + 1; org_sigma1 = X->Def.CisAjt[i][1]; org_sigma2 = X->Def.CisAjt[i][3]; - dam_pr = 0.0; if (X->Def.iFlgSzConserved == TRUE) { if (org_sigma1 != org_sigma2) { @@ -195,7 +191,7 @@ int expec_cisajs_Hubbard( if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; -#pragma omp parallel for default(none) shared(list_1, vec) reduction(+:dam_pr) \ +#pragma omp parallel for default(none) shared(list_1, vec,Xvec,nstate,one,tmp_OneGreen) \ firstprivate(i_max, is) private(num1, ibit) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; @@ -230,7 +226,7 @@ int expec_cisajs_SpinHalf( long unsigned int i, j; long unsigned int isite1; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - double complex dam_pr = 0, dmv; + double complex dmv; long int i_max; long int ibit1; long unsigned int is1_up; @@ -256,21 +252,14 @@ int expec_cisajs_SpinHalf( }// org_isite1 > X->Def.Nsite else { isite1 = X->Def.Tpow[org_isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max, isite1, org_sigma1, X) shared(vec) +#pragma omp parallel for default(none) private(j,dmv) \ + firstprivate(i_max, isite1, org_sigma1, X) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { dmv = X_Spin_CisAis(j, X, isite1, org_sigma1); zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } } } - else { - dam_pr = 0.0; - } - } - else { - // for the canonical case - dam_pr = 0.0; } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); } @@ -294,7 +283,7 @@ int expec_cisajs_SpinGeneral( ) { long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - double complex dam_pr = 0, dmv; + double complex dmv; long int i_max; int num1, one = 1; i_max = X->Check.idim_max; @@ -312,36 +301,23 @@ int expec_cisajs_SpinGeneral( // longitudinal magnetic field num1 = BitCheckGeneral((unsigned long int)myrank, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - dam_pr = 0.0; if (num1 != 0) { zaxpy_long(i_max*nstate, 1.0, &vec[1][0], &Xvec[1][0]); } } - else { - dam_pr = 0.0; - } } else {//org_isite1 <= X->Def.Nsite if (org_sigma1 == org_sigma2) { // longitudinal magnetic field - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) \ -firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec, list_1) +#pragma omp parallel for default(none) private(j, num1,dmv) \ + firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec,Xvec, list_1,nstate,one) for (j = 1; j <= i_max; j++) { dmv = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } } - else { - dam_pr = 0.0; - } } } - else { - // hopping is not allowed in localized spin system - dam_pr = 0.0; - }//org_isite1 != org_isite2 - MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); } return 0; @@ -365,7 +341,7 @@ int expec_cisajs_SpinGCHalf( long unsigned int i, j; long unsigned int isite1; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - double complex dam_pr = 0, dmv; + double complex dmv; long int i_max; int tmp_sgn, one = 1; long unsigned int tmp_off = 0; @@ -378,7 +354,6 @@ int expec_cisajs_SpinGCHalf( org_isite2 = X->Def.CisAjt[i][2] + 1; org_sigma1 = X->Def.CisAjt[i][1]; org_sigma2 = X->Def.CisAjt[i][3]; - dam_pr = 0.0; if (org_isite1 == org_isite2) { if (org_isite1 > X->Def.Nsite) { @@ -394,8 +369,8 @@ int expec_cisajs_SpinGCHalf( if (org_sigma1 == org_sigma2) { // longitudinal magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn) \ -firstprivate(i_max, isite1, org_sigma1, X) shared(vec) +#pragma omp parallel for default(none) private(j, tmp_sgn,dmv) \ + firstprivate(i_max, isite1, org_sigma1, X) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { dmv = X_SpinGC_CisAis(j, X, isite1, org_sigma1); zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); @@ -403,8 +378,8 @@ firstprivate(i_max, isite1, org_sigma1, X) shared(vec) } else { // transverse magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, tmp_off) \ -firstprivate(i_max, isite1, org_sigma2, X) shared(vec) +#pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off,dmv) \ + firstprivate(i_max, isite1, org_sigma2, X) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { tmp_sgn = X_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); if (tmp_sgn != 0) { @@ -415,10 +390,6 @@ firstprivate(i_max, isite1, org_sigma2, X) shared(vec) } } } - else { - // hopping is not allowed in localized spin system - dam_pr = 0.0; - } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); } return 0; @@ -441,7 +412,7 @@ int expec_cisajs_SpinGCGeneral( ) { long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - double complex dam_pr = 0, dmv; + double complex dmv; long int i_max; long unsigned int tmp_off = 0; int num1, one = 1; @@ -470,8 +441,8 @@ int expec_cisajs_SpinGCGeneral( else {//org_isite1 <= X->Def.Nsite if (org_sigma1 == org_sigma2) { // longitudinal magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) \ -firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec) +#pragma omp parallel for default(none) private(j, num1,dmv) \ + firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); dmv = (double complex)num1; @@ -480,8 +451,8 @@ firstprivate(i_max, org_isite1, org_sigma1, X) shared(vec) } else { // transverse magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, num1) \ -firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off) shared(vec) +#pragma omp parallel for default(none) private(j, num1,dmv) \ + firstprivate(i_max, org_isite1, org_sigma1, org_sigma2, X,tmp_off) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin( j - 1, org_isite1, org_sigma2, org_sigma1, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index 26d8880af..f588eb49b 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -160,7 +160,7 @@ int expec_cisajscktalt_HubbardGC( long int i_max; for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { - zclear(i_max*nstate, &Xvec[1][0]); + zclear(X->Large.i_max*nstate, &Xvec[1][0]); org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; @@ -216,14 +216,14 @@ int expec_cisajscktalt_HubbardGC( Bdiff = X->Large.B_spin; if (isite1 == isite2 && isite3 == isite4) { -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec, X, &tmp_off); } } else if (isite1 == isite2 && isite3 != isite4) { -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, @@ -231,7 +231,7 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t } } else if (isite1 != isite2 && isite3 == isite4) { -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, @@ -239,7 +239,7 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t } } else if (isite1 != isite2 && isite3 != isite4) { -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, @@ -279,7 +279,7 @@ int expec_cisajscktalt_Hubbard( long int i_max; for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { - zclear(i_max*nstate, &Xvec[1][0]); + zclear(X->Large.i_max*nstate, &Xvec[1][0]); org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; @@ -341,14 +341,14 @@ int expec_cisajscktalt_Hubbard( tmp_V = 1.0; if (isite1 == isite2 && isite3 == isite4) { -#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) for (j = 1; j <= i_max; j++) { child_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec, X, &tmp_off); } } else if (isite1 == isite2 && isite3 != isite4) { -#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) for (j = 1; j <= i_max; j++) { child_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, @@ -356,7 +356,7 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t } } else if (isite1 != isite2 && isite3 == isite4) { -#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) for (j = 1; j <= i_max; j++) { child_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, @@ -364,7 +364,7 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t } } else if (isite1 != isite2 && isite3 != isite4) { -#pragma omp parallel for default(none) private(j) shared(vec,tmp_V) \ +#pragma omp parallel for default(none) private(j) shared(vec,tmp_V,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) for (j = 1; j <= i_max; j++) { child_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, @@ -452,8 +452,8 @@ int expec_cisajscktalt_SpinHalf( is1_up = X->Def.Tpow[org_isite1 - 1]; is2_up = X->Def.Tpow[org_isite3 - 1]; num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); -#pragma omp parallel for default(none)shared(vec) \ - firstprivate(i_max, tmp_V, is1_up, org_sigma1, X, num2) private(j, num1) +#pragma omp parallel for default(none)shared(vec,Xvec,nstate,one) \ + firstprivate(i_max, tmp_V, is1_up, org_sigma1, X, num2) private(j, num1,dmv) for (j = 1; j <= i_max; j++) { num1 = X_Spin_CisAis(j, X, is1_up, org_sigma1); dmv = tmp_V * num1*num2; @@ -473,7 +473,7 @@ int expec_cisajscktalt_SpinHalf( isA_up = X->Def.Tpow[org_isite1 - 1]; isB_up = X->Def.Tpow[org_isite3 - 1]; if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off, tmp_V) for (j = 1; j <= i_max; j++) { child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, @@ -482,14 +482,14 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off, tmp_V) } else if (org_isite1 == org_isite3 && org_sigma1 == org_sigma4 && org_sigma3 == org_sigma2) { #pragma omp parallel for default(none) private(j, dmv) \ -firstprivate(i_max,X,isA_up,org_sigma1, tmp_V) shared(vec, list_1) + firstprivate(i_max,X,isA_up,org_sigma1, tmp_V) shared(vec, list_1,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { dmv = tmp_V * X_Spin_CisAis(j, X, isA_up, org_sigma1); zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } } else if (org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) { // exchange -#pragma omp parallel for default(none) private(j, tmp_sgn, dmv) shared(vec) \ +#pragma omp parallel for default(none) private(j, tmp_sgn, dmv) shared(vec,Xvec,nstate,one) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { tmp_sgn = X_child_exchange_spin_element(j, X, isA_up, isB_up, org_sigma2, org_sigma4, &tmp_off); @@ -595,7 +595,7 @@ int expec_cisajscktalt_SpinGeneral( } else { if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal -#pragma omp parallel for default(none) private(j, num1) shared(vec,list_1) \ +#pragma omp parallel for default(none) private(j, num1) shared(vec,list_1,Xvec,nstate,one) \ firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); @@ -608,8 +608,9 @@ firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) } } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { -#pragma omp parallel for default(none) private(j, num1) \ -firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, list1_off, myrank, tmp_V) shared(vec, list_1) +#pragma omp parallel for default(none) private(j,num1) \ +firstprivate(i_max,X,org_isite1,org_isite3,org_sigma1,org_sigma2,org_sigma3,org_sigma4,tmp_off,tmp_off_2,list1_off,myrank,tmp_V) \ + shared(vec,list_1,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin(list_1[j], org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if (num1 != FALSE) { @@ -621,9 +622,6 @@ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3 } } } - //printf("DEBUG: rank=%d, dam_pr=%lf\n", myrank, creal(dam_pr)); - } - else { } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); @@ -729,7 +727,7 @@ int expec_cisajscktalt_SpinGCHalf( isA_up = X->Def.Tpow[org_isite2 - 1]; isB_up = X->Def.Tpow[org_isite4 - 1]; if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, @@ -737,7 +735,7 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) } } else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, @@ -745,7 +743,7 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) } } else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, @@ -753,7 +751,7 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) } } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { -#pragma omp parallel for default(none) private(j) shared(vec) \ +#pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_child_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, @@ -857,7 +855,7 @@ int expec_cisajscktalt_SpinGCGeneral( } else { if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal -#pragma omp parallel for default(none) private(j, num1) shared(vec) \ +#pragma omp parallel for default(none) private(j, num1) shared(vec,Xvec,nstate,one) \ firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); @@ -870,7 +868,7 @@ firstprivate(i_max,X,org_isite1, org_sigma1,org_isite3, org_sigma3, tmp_V) } } else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { -#pragma omp parallel for default(none) private(j, num1) shared(vec) \ +#pragma omp parallel for default(none) private(j, num1) shared(vec,Xvec,nstate,one) \ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma3,org_sigma4, tmp_off, tmp_V) for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin(j - 1, org_isite3, org_sigma4, org_sigma3, @@ -884,7 +882,7 @@ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma3,org_sigma4, } } else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { -#pragma omp parallel for default(none) private(j, num1) shared(vec) \ +#pragma omp parallel for default(none) private(j, num1) shared(vec,Xvec,nstate,one) \ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma2, org_sigma3, tmp_off, tmp_V) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, org_isite3, org_sigma3, X->Def.SiteToBit, X->Def.Tpow); @@ -899,7 +897,8 @@ firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1,org_sigma2, org_sigma3, } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp parallel for default(none) private(j, num1) \ -firstprivate(i_max,X, org_isite1, org_isite3, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_off, tmp_off_2, tmp_V) shared(vec) +firstprivate(i_max,X,org_isite1,org_isite3,org_sigma1,org_sigma2,org_sigma3,org_sigma4,tmp_off,tmp_off_2,tmp_V) \ + shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin(j - 1, org_isite3, org_sigma4, org_sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index 53b4af551..3252a9331 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -35,25 +35,22 @@ int expec_energy_flct_HubbardGC( long unsigned int isite1; long unsigned int is1_up_a, is1_up_b; long unsigned int is1_down_a, is1_down_b; - int bit_up, bit_down, bit_D, istate; + int bit_up, bit_down, bit_D, istate, mythread; long unsigned int ibit_up, ibit_down, ibit_D; - double D, tmp_D, tmp_D2; - double N, tmp_N, tmp_N2; - double Sz, tmp_Sz, tmp_Sz2; + double D, N, Sz; double *tmp_v02; long unsigned int i_max; unsigned int l_ibit1, u_ibit1, i_32; - i_max = X->Check.idim_max; + double **doublon_t, **doublon2_t, **num_t, **num2_t, **Sz_t, **Sz2_t; - tmp_v02 = d_1d_allocate(nstate); + i_max = X->Check.idim_max; + doublon_t = d_2d_allocate(nthreads, nstate); + doublon2_t = d_2d_allocate(nthreads, nstate); + num_t = d_2d_allocate(nthreads, nstate); + num2_t = d_2d_allocate(nthreads, nstate); + Sz_t = d_2d_allocate(nthreads, nstate); + Sz2_t = d_2d_allocate(nthreads, nstate); i_32 = 0xFFFFFFFF; //2^32 - 1 - // tentative doublon - tmp_D = 0.0; - tmp_D2 = 0.0; - tmp_N = 0.0; - tmp_N2 = 0.0; - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; //[s] for bit count is1_up_a = 0; @@ -71,63 +68,93 @@ int expec_energy_flct_HubbardGC( } } //[e] -#pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2) default(none) shared(v0,list_1) \ - firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ - private(j, tmp_v02,D,N,Sz,isite1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1,ibit_up,ibit_down,ibit_D) - for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; - bit_up = 0; - bit_down = 0; - bit_D = 0; - // isite1 > X->Def.Nsite - ibit_up = (unsigned long int) myrank & is1_up_a; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) myrank & is1_down_a; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); - - // isite1 <= X->Def.Nsite - ibit_up = (unsigned long int) (j - 1) & is1_up_b; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) (j - 1) & is1_down_b; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); - - D = bit_D; - N = bit_up + bit_down; - Sz = bit_up - bit_down; +#pragma omp parallel default(none) \ +shared(tmp_v0,list_1,doublon_t,doublon2_t,num_t,num2_t,Sz_t,Sz2_t,nstate) \ +firstprivate(i_max,X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ +private(j,tmp_v02,D,N,Sz,isite1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1, \ + ibit_up,ibit_down,ibit_D,mythread,istate) + { + tmp_v02 = d_1d_allocate(nstate); +#ifdef _OPENMP + mythread = omp_get_thread_num(); +#else + mythread = 0; +#endif +#pragma omp for + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) + tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + bit_up = 0; + bit_down = 0; + bit_D = 0; + // isite1 > X->Def.Nsite + ibit_up = (unsigned long int) myrank & is1_up_a; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + + ibit_down = (unsigned long int) myrank & is1_down_a; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); + + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); + + // isite1 <= X->Def.Nsite + ibit_up = (unsigned long int) (j - 1) & is1_up_b; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + + ibit_down = (unsigned long int) (j - 1) & is1_down_b; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); + + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); + + D = bit_D; + N = bit_up + bit_down; + Sz = bit_up - bit_down; + + for (istate = 0; istate < nstate; istate++) { + doublon_t[mythread][istate] += tmp_v02[istate] * D; + doublon2_t[mythread][istate] += tmp_v02[istate] * D * D; + num_t[mythread][istate] += tmp_v02[istate] * N; + num2_t[mythread][istate] += tmp_v02[istate] * N * N; + Sz_t[mythread][istate] += tmp_v02[istate] * Sz; + Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz; + } + }/*for (j = 1; j <= i_max; j++)*/ + free_d_1d_allocate(tmp_v02); + }/*end of parallel region*/ - for (istate = 0; istate < nstate; istate++) { - X->Phys.doublon[istate] += tmp_v02[istate] * D; - X->Phys.doublon2[istate] += tmp_v02[istate] * D * D; - X->Phys.num[istate] += tmp_v02[istate] * N; - X->Phys.num2[istate] += tmp_v02[istate] * N * N; - X->Phys.Sz[istate] += tmp_v02[istate] * Sz; - X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = 0.0; + X->Phys.num2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + X->Phys.Sz2[istate] = 0.0; + for (mythread = 0; mythread < nthreads; mythread++) { + X->Phys.doublon[istate] += doublon_t[mythread][istate]; + X->Phys.doublon2[istate] += doublon2_t[mythread][istate]; + X->Phys.num[istate] += num_t[mythread][istate]; + X->Phys.num2[istate] += num2_t[mythread][istate]; + X->Phys.Sz[istate] += Sz_t[mythread][istate]; + X->Phys.Sz2[istate] += Sz2_t[mythread][istate]; } } SumMPI_dv(nstate, X->Phys.doublon); @@ -144,7 +171,12 @@ int expec_energy_flct_HubbardGC( X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - free_d_1d_allocate(tmp_v02); + free_d_2d_allocate(doublon_t); + free_d_2d_allocate(doublon2_t); + free_d_2d_allocate(num_t); + free_d_2d_allocate(num2_t); + free_d_2d_allocate(Sz_t); + free_d_2d_allocate(Sz2_t); return 0; } /// @@ -161,27 +193,24 @@ int expec_energy_flct_Hubbard( long unsigned int isite1; long unsigned int is1_up_a, is1_up_b; long unsigned int is1_down_a, is1_down_b; - int bit_up, bit_down, bit_D, istate; - + int bit_up, bit_down, bit_D, istate, mythread; long unsigned int ibit_up, ibit_down, ibit_D; - double D, tmp_D, tmp_D2; - double N, tmp_N, tmp_N2; - double Sz, tmp_Sz, tmp_Sz2; + double **doublon_t, **doublon2_t, **num_t, **num2_t, **Sz_t, **Sz2_t; + double D, N, Sz; double *tmp_v02; long unsigned int i_max, tmp_list_1; unsigned int l_ibit1, u_ibit1, i_32; + i_max = X->Check.idim_max; - tmp_v02 = d_1d_allocate(nstate); + doublon_t = d_2d_allocate(nthreads, nstate); + doublon2_t = d_2d_allocate(nthreads, nstate); + num_t = d_2d_allocate(nthreads, nstate); + num2_t = d_2d_allocate(nthreads, nstate); + Sz_t = d_2d_allocate(nthreads, nstate); + Sz2_t = d_2d_allocate(nthreads, nstate); i_32 = (unsigned int)(pow(2, 32) - 1); - tmp_D = 0.0; - tmp_D2 = 0.0; - tmp_N = 0.0; - tmp_N2 = 0.0; - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - //[s] for bit count is1_up_a = 0; is1_up_b = 0; @@ -198,64 +227,94 @@ int expec_energy_flct_Hubbard( } } //[e] -#pragma omp parallel for reduction(+:tmp_D,tmp_D2,tmp_N,tmp_N2,tmp_Sz,tmp_Sz2) default(none) shared(v0,list_1) \ - firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ - private(j, tmp_v02,D,N,Sz,isite1,tmp_list_1,bit_up,bit_down,bit_D,u_ibit1,l_ibit1,ibit_up,ibit_down,ibit_D) - for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; - bit_up = 0; - bit_down = 0; - bit_D = 0; - tmp_list_1 = list_1[j]; - // isite1 > X->Def.Nsite - ibit_up = (unsigned long int) myrank & is1_up_a; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) myrank & is1_down_a; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); - - // isite1 <= X->Def.Nsite - ibit_up = (unsigned long int) tmp_list_1 & is1_up_b; - u_ibit1 = ibit_up >> 32; - l_ibit1 = ibit_up & i_32; - bit_up += pop(u_ibit1); - bit_up += pop(l_ibit1); - - ibit_down = (unsigned long int) tmp_list_1 & is1_down_b; - u_ibit1 = ibit_down >> 32; - l_ibit1 = ibit_down & i_32; - bit_down += pop(u_ibit1); - bit_down += pop(l_ibit1); - - ibit_D = (ibit_up) & (ibit_down >> 1); - u_ibit1 = ibit_D >> 32; - l_ibit1 = ibit_D & i_32; - bit_D += pop(u_ibit1); - bit_D += pop(l_ibit1); - - D = bit_D; - N = bit_up + bit_down; - Sz = bit_up - bit_down; +#pragma omp parallel default(none) \ +shared(tmp_v0,list_1,doublon_t,doublon2_t,num_t,num2_t,Sz_t,Sz2_t,nstate) \ +firstprivate(i_max, X,myrank,is1_up_a,is1_down_a,is1_up_b,is1_down_b,i_32) \ +private(j,tmp_v02,D,N,Sz,isite1,tmp_list_1,bit_up,bit_down,bit_D,u_ibit1, \ + l_ibit1,ibit_up,ibit_down,ibit_D,mythread,istate) + { + tmp_v02 = d_1d_allocate(nstate); +#ifdef _OPENMP + mythread = omp_get_thread_num(); +#else + mythread = 0; +#endif +#pragma omp for + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) + tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + bit_up = 0; + bit_down = 0; + bit_D = 0; + tmp_list_1 = list_1[j]; + // isite1 > X->Def.Nsite + ibit_up = (unsigned long int) myrank & is1_up_a; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + + ibit_down = (unsigned long int) myrank & is1_down_a; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); + + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); + + // isite1 <= X->Def.Nsite + ibit_up = (unsigned long int) tmp_list_1 & is1_up_b; + u_ibit1 = ibit_up >> 32; + l_ibit1 = ibit_up & i_32; + bit_up += pop(u_ibit1); + bit_up += pop(l_ibit1); + + ibit_down = (unsigned long int) tmp_list_1 & is1_down_b; + u_ibit1 = ibit_down >> 32; + l_ibit1 = ibit_down & i_32; + bit_down += pop(u_ibit1); + bit_down += pop(l_ibit1); + + ibit_D = (ibit_up) & (ibit_down >> 1); + u_ibit1 = ibit_D >> 32; + l_ibit1 = ibit_D & i_32; + bit_D += pop(u_ibit1); + bit_D += pop(l_ibit1); + + D = bit_D; + N = bit_up + bit_down; + Sz = bit_up - bit_down; + + for (istate = 0; istate < nstate; istate++) { + doublon_t[mythread][istate] += tmp_v02[istate] * D; + doublon2_t[mythread][istate] += tmp_v02[istate] * D * D; + num_t[mythread][istate] += tmp_v02[istate] * N; + num2_t[mythread][istate] += tmp_v02[istate] * N * N; + Sz_t[mythread][istate] += tmp_v02[istate] * Sz; + Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz; + } + }/*for (j = 1; j <= i_max; j++)*/ + free_d_1d_allocate(tmp_v02); + }/*end of parallel region*/ - for (istate = 0; istate < nstate; istate++) { - X->Phys.doublon[istate] += tmp_v02[istate] * D; - X->Phys.doublon2[istate] += tmp_v02[istate] * D * D; - X->Phys.num[istate] += tmp_v02[istate] * N; - X->Phys.num2[istate] += tmp_v02[istate] * N * N; - X->Phys.Sz[istate] += tmp_v02[istate] * Sz; - X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + for (istate = 0; istate < nstate; istate++) { + X->Phys.doublon[istate] = 0.0; + X->Phys.doublon2[istate] = 0.0; + X->Phys.num[istate] = 0.0; + X->Phys.num2[istate] = 0.0; + X->Phys.Sz[istate] = 0.0; + X->Phys.Sz2[istate] = 0.0; + for (mythread = 0; mythread < nthreads; mythread++) { + X->Phys.doublon[istate] += doublon_t[mythread][istate]; + X->Phys.doublon2[istate] += doublon2_t[mythread][istate]; + X->Phys.num[istate] += num_t[mythread][istate]; + X->Phys.num2[istate] += num2_t[mythread][istate]; + X->Phys.Sz[istate] += Sz_t[mythread][istate]; + X->Phys.Sz2[istate] += Sz2_t[mythread][istate]; } } SumMPI_dv(nstate, X->Phys.doublon); @@ -272,7 +331,12 @@ int expec_energy_flct_Hubbard( X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - free_d_1d_allocate(tmp_v02); + free_d_2d_allocate(doublon_t); + free_d_2d_allocate(doublon2_t); + free_d_2d_allocate(num_t); + free_d_2d_allocate(num2_t); + free_d_2d_allocate(Sz_t); + free_d_2d_allocate(Sz2_t); return 0; } /// @@ -290,21 +354,19 @@ int expec_energy_flct_HalfSpinGC( long unsigned int is1_up_a, is1_up_b; long unsigned int ibit1; - double Sz, tmp_Sz, tmp_Sz2; + double Sz; double *tmp_v02; long unsigned int i_max; unsigned int l_ibit1, u_ibit1, i_32; - int istate; + int istate, mythread; + double **Sz_t, **Sz2_t; i_max = X->Check.idim_max; - tmp_v02 = d_1d_allocate(nstate); + Sz_t = d_2d_allocate(nthreads, nstate); + Sz2_t = d_2d_allocate(nthreads, nstate); i_32 = 0xFFFFFFFF; //2^32 - 1 - // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - //[s] for bit count is1_up_a = 0; is1_up_b = 0; @@ -317,29 +379,49 @@ int expec_energy_flct_HalfSpinGC( } } //[e] -#pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0) \ - firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1) - for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; - Sz = 0.0; - - // isite1 > X->Def.Nsite - ibit1 = (unsigned long int) myrank & is1_up_a; - u_ibit1 = ibit1 >> 32; - l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); - // isite1 <= X->Def.Nsite - ibit1 = (unsigned long int) (j - 1)&is1_up_b; - u_ibit1 = ibit1 >> 32; - l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); - Sz = 2 * Sz - X->Def.NsiteMPI; - - for (istate = 0; istate < nstate; istate++) { - X->Phys.Sz[istate] += tmp_v02[istate] * Sz; - X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; +#pragma omp parallel default(none) shared(tmp_v0,Sz_t,Sz2_t,nstate) \ +firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) \ +private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1,mythread,istate) + { + tmp_v02 = d_1d_allocate(nstate); +#ifdef _OPENMP + mythread = omp_get_thread_num(); +#else + mythread = 0; +#endif +#pragma omp for + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) + tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; + + // isite1 > X->Def.Nsite + ibit1 = (unsigned long int) myrank & is1_up_a; + u_ibit1 = ibit1 >> 32; + l_ibit1 = ibit1 & i_32; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + // isite1 <= X->Def.Nsite + ibit1 = (unsigned long int) (j - 1)&is1_up_b; + u_ibit1 = ibit1 >> 32; + l_ibit1 = ibit1 & i_32; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + Sz = 2 * Sz - X->Def.NsiteMPI; + + for (istate = 0; istate < nstate; istate++) { + Sz_t[mythread][istate] += tmp_v02[istate] * Sz; + Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz; + } + }/*for (j = 1; j <= i_max; j++)*/ + free_d_1d_allocate(tmp_v02); + }/*End of parallel region*/ + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] = 0.0; + X->Phys.Sz2[istate] = 0.0; + for (mythread = 0; mythread < nthreads; mythread++) { + X->Phys.Sz[istate] += Sz_t[mythread][istate]; + X->Phys.Sz2[istate] += Sz2_t[mythread][istate]; } } SumMPI_dv(nstate, X->Phys.Sz); @@ -356,7 +438,8 @@ int expec_energy_flct_HalfSpinGC( X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - free_d_1d_allocate(tmp_v02); + free_d_2d_allocate(Sz_t); + free_d_2d_allocate(Sz2_t); return 0; } /// @@ -371,33 +454,53 @@ int expec_energy_flct_GeneralSpinGC( ) { long unsigned int j; long unsigned int isite1; - int istate; - double Sz, tmp_Sz, tmp_Sz2; + int istate, mythread; + double Sz; double *tmp_v02; long unsigned int i_max; + double **Sz_t, **Sz2_t; - tmp_v02 = d_1d_allocate(nstate); + Sz_t = d_2d_allocate(nthreads, nstate); + Sz2_t = d_2d_allocate(nthreads, nstate); i_max = X->Check.idim_max; - // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - - for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; - Sz = 0.0; - for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { - //prefactor 0.5 is added later. - if (isite1 > X->Def.Nsite) { - Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); +#pragma omp parallel default(none) \ +shared(i_max,nstate,X,myrank,Sz_t,Sz2_t,tmp_v0) \ +private(j,istate,tmp_v02,Sz,isite1,mythread) + { + tmp_v02 = d_1d_allocate(nstate); +#ifdef _OPENMP + mythread = omp_get_thread_num(); +#else + mythread = 0; +#endif +#pragma omp for + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) \ + tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + //prefactor 0.5 is added later. + if (isite1 > X->Def.Nsite) { + Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); + } + else { + Sz += GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); + } } - else { - Sz += GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); + for (istate = 0; istate < nstate; istate++) { + Sz_t[mythread][istate] += tmp_v02[istate] * Sz; + Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz; } - } - for (istate = 0; istate < nstate; istate++) { - X->Phys.Sz[istate] += tmp_v02[istate] * Sz; - X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + }/*for (j = 1; j <= i_max; j++)*/ + free_d_1d_allocate(tmp_v02); + }/*End of parallel region*/ + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] = 0.0; + X->Phys.Sz2[istate] = 0.0; + for (mythread = 0; mythread < nthreads; mythread++) { + X->Phys.Sz[istate] += Sz_t[mythread][istate]; + X->Phys.Sz2[istate] += Sz2_t[mythread][istate]; } } SumMPI_dv(nstate, X->Phys.Sz); @@ -414,7 +517,8 @@ int expec_energy_flct_GeneralSpinGC( X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - free_d_1d_allocate(tmp_v02); + free_d_2d_allocate(Sz_t); + free_d_2d_allocate(Sz2_t); return 0; } /// @@ -432,20 +536,18 @@ int expec_energy_flct_HalfSpin( long unsigned int is1_up_a, is1_up_b; long unsigned int ibit1; - double Sz, tmp_Sz, tmp_Sz2; + double Sz; double *tmp_v02; long unsigned int i_max, tmp_list_1; unsigned int l_ibit1, u_ibit1, i_32; - int istate; + int istate, mythread; + double **Sz_t, **Sz2_t; i_max = X->Check.idim_max; - tmp_v02 = d_1d_allocate(nstate); + Sz_t = d_2d_allocate(nthreads, nstate); + Sz2_t = d_2d_allocate(nthreads, nstate); i_32 = 0xFFFFFFFF; //2^32 - 1 - // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - //[s] for bit count is1_up_a = 0; is1_up_b = 0; @@ -458,30 +560,50 @@ int expec_energy_flct_HalfSpin( } } //[e] -#pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0, list_1) \ - firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1, tmp_list_1) - for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; - Sz = 0.0; - tmp_list_1 = list_1[j]; - - // isite1 > X->Def.Nsite - ibit1 = (unsigned long int) myrank & is1_up_a; - u_ibit1 = ibit1 >> 32; - l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); - // isite1 <= X->Def.Nsite - ibit1 = (unsigned long int) tmp_list_1 &is1_up_b; - u_ibit1 = ibit1 >> 32; - l_ibit1 = ibit1 & i_32; - Sz += pop(u_ibit1); - Sz += pop(l_ibit1); - Sz = 2 * Sz - X->Def.NsiteMPI; - - for (istate = 0; istate < nstate; istate++) { - X->Phys.Sz[istate] += tmp_v02[istate] * Sz; - X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; +#pragma omp parallel default(none) shared(tmp_v0, list_1,Sz_t,Sz2_t,nstate) \ +firstprivate(i_max,X,myrank,i_32,is1_up_a,is1_up_b) \ +private(j,Sz,ibit1,isite1,tmp_v02,u_ibit1,l_ibit1, tmp_list_1,mythread,istate) + { + tmp_v02 = d_1d_allocate(nstate); +#ifdef _OPENMP + mythread = omp_get_thread_num(); +#else + mythread = 0; +#endif +#pragma omp for + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) \ + tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; + tmp_list_1 = list_1[j]; + + // isite1 > X->Def.Nsite + ibit1 = (unsigned long int) myrank & is1_up_a; + u_ibit1 = ibit1 >> 32; + l_ibit1 = ibit1 & i_32; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + // isite1 <= X->Def.Nsite + ibit1 = (unsigned long int) tmp_list_1 &is1_up_b; + u_ibit1 = ibit1 >> 32; + l_ibit1 = ibit1 & i_32; + Sz += pop(u_ibit1); + Sz += pop(l_ibit1); + Sz = 2 * Sz - X->Def.NsiteMPI; + + for (istate = 0; istate < nstate; istate++) { + Sz_t[mythread][istate] += tmp_v02[istate] * Sz; + Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz; + } + }/*for (j = 1; j <= i_max; j++)*/ + free_d_1d_allocate(tmp_v02); + }/*End of parallel region*/ + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] = 0.0; + X->Phys.Sz2[istate] = 0.0; + for (mythread = 0; mythread < nthreads; mythread++) { + X->Phys.Sz[istate] += Sz_t[mythread][istate]; + X->Phys.Sz2[istate] += Sz2_t[mythread][istate]; } } SumMPI_dv(nstate, X->Phys.Sz); @@ -498,7 +620,8 @@ int expec_energy_flct_HalfSpin( X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - free_d_1d_allocate(tmp_v02); + free_d_2d_allocate(Sz_t); + free_d_2d_allocate(Sz2_t); return 0; } /// @@ -513,36 +636,53 @@ int expec_energy_flct_GeneralSpin( ) { long unsigned int j; long unsigned int isite1; - int istate; - double Sz, tmp_Sz, tmp_Sz2; + int istate, mythread; + double Sz; double *tmp_v02; long unsigned int i_max, tmp_list1; + double **Sz_t, **Sz2_t; - tmp_v02 = d_1d_allocate(nstate); + Sz_t = d_2d_allocate(nthreads, nstate); + Sz2_t = d_2d_allocate(nthreads, nstate); i_max = X->Check.idim_max; - // tentative doublon - tmp_Sz = 0.0; - tmp_Sz2 = 0.0; - -#pragma omp parallel for reduction(+:tmp_Sz,tmp_Sz2)default(none) shared(v0, list_1) \ - firstprivate(i_max,X,myrank) private(j,Sz,isite1,tmp_v02, tmp_list1) - for (j = 1; j <= i_max; j++) { - for (istate = 0; istate < nstate; istate++) tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; - Sz = 0.0; - tmp_list1 = list_1[j]; - for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { - //prefactor 0.5 is added later. - if (isite1 > X->Def.Nsite) { - Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); +#pragma omp parallel default(none) shared(tmp_v0, list_1,Sz_t,Sz2_t,nstate) \ +firstprivate(i_max,X,myrank,mythread) private(j,Sz,isite1,tmp_v02, tmp_list1,istate) + { + tmp_v02 = d_1d_allocate(nstate); +#ifdef _OPENMP + mythread = omp_get_thread_num(); +#else + mythread = 0; +#endif +#pragma omp for + for (j = 1; j <= i_max; j++) { + for (istate = 0; istate < nstate; istate++) + tmp_v02[istate] = conj(tmp_v0[j][istate]) * tmp_v0[j][istate]; + Sz = 0.0; + tmp_list1 = list_1[j]; + for (isite1 = 1; isite1 <= X->Def.NsiteMPI; isite1++) { + //prefactor 0.5 is added later. + if (isite1 > X->Def.Nsite) { + Sz += GetLocal2Sz(isite1, myrank, X->Def.SiteToBit, X->Def.Tpow); + } + else { + Sz += GetLocal2Sz(isite1, tmp_list1, X->Def.SiteToBit, X->Def.Tpow); + } } - else { - Sz += GetLocal2Sz(isite1, tmp_list1, X->Def.SiteToBit, X->Def.Tpow); + for (istate = 0; istate < nstate; istate++) { + Sz_t[mythread][istate] += tmp_v02[istate] * Sz; + Sz2_t[mythread][istate] += tmp_v02[istate] * Sz * Sz; } - } - for (istate = 0; istate < nstate; istate++) { - X->Phys.Sz[istate] += tmp_v02[istate] * Sz; - X->Phys.Sz2[istate] += tmp_v02[istate] * Sz * Sz; + }/*for (j = 1; j <= i_max; j++)*/ + free_d_1d_allocate(tmp_v02); + }/*End of parallel region*/ + for (istate = 0; istate < nstate; istate++) { + X->Phys.Sz[istate] = 0.0; + X->Phys.Sz2[istate] = 0.0; + for (mythread = 0; mythread < nthreads; mythread++) { + X->Phys.Sz[istate] += Sz_t[mythread][istate]; + X->Phys.Sz2[istate] += Sz2_t[mythread][istate]; } } SumMPI_dv(nstate, X->Phys.Sz); @@ -559,7 +699,8 @@ int expec_energy_flct_GeneralSpin( X->Phys.num_down[istate] = 0.5*(X->Phys.num[istate] - X->Phys.Sz[istate]); } - free_d_1d_allocate(tmp_v02); + free_d_2d_allocate(Sz_t); + free_d_2d_allocate(Sz2_t); return 0; } /** @@ -581,9 +722,9 @@ int expec_energy_flct( long unsigned int i, j; long unsigned int irght, ilft, ihfbit; - double complex dam_pr, dam_pr1; long unsigned int i_max; int istate; + double *energy_t, *var_t; switch (X->Def.iCalcType) { case TPQCalc: @@ -664,7 +805,8 @@ int expec_energy_flct( StopTimer(nCalcFlct); -#pragma omp parallel for default(none) private(i) shared(v1,v0) firstprivate(i_max) +#pragma omp parallel for default(none) private(i,istate) \ +shared(tmp_v1,tmp_v0,nstate) firstprivate(i_max) for (i = 1; i <= i_max; i++) { for (istate = 0; istate < nstate; istate++) { tmp_v1[i][istate] = tmp_v0[i][istate]; @@ -688,7 +830,6 @@ int expec_energy_flct( X->Phys.energy[istate] = 0.0; X->Phys.var[istate] = 0.0; } -#pragma omp parallel for default(none) reduction(+:dam_pr, dam_pr1) private(j) shared(v0, v1)firstprivate(i_max) for (j = 1; j <= i_max; j++) { for (istate = 0; istate < nstate; istate++) { X->Phys.energy[istate] += conj(tmp_v1[j][istate])*tmp_v0[j][istate]; // E = = diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index bbc46099d..893552363 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -72,10 +72,6 @@ void totalspin_Hubbard( is2_up = X->Def.Tpow[2 * isite2 - 2]; is2_down = X->Def.Tpow[2 * isite2 - 1]; -#pragma omp parallel for reduction(+:spn_z) default(none) \ -firstprivate(i_max, is1_up, is1_down, is2_up, is2_down, irght, ilft, ihfbit, isite1, isite2) \ -private(ibit1_up, num1_up, ibit2_up, num2_up, ibit1_down, num1_down, ibit2_down, num2_down, tmp_spn_z, iexchg, off) \ -shared(vec, list_1, list_2_1, list_2_2) for (j = 1; j <= i_max; j++) { ibit1_up = list_1[j] & is1_up; @@ -156,10 +152,6 @@ void totalspin_HubbardGC( is2_up = X->Def.Tpow[2 * isite2 - 2]; is2_down = X->Def.Tpow[2 * isite2 - 1]; -#pragma omp parallel for reduction(+:spn_z) default(none) \ -firstprivate(i_max, is1_up, is1_down, is2_up, is2_down, isite1, isite2) \ -private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, ibit1_down, num1_down, ibit2_down, num2_down, tmp_spn_z, iexchg, off) \ -shared(vec) for (j = 1; j <= i_max; j++) { list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -254,15 +246,11 @@ void totalspin_Spin( num2_down = 1 - num2_up; spn_z = (num1_up - num1_down) * (num2_up - num2_down); -#pragma omp parallel for default(none) reduction (+:spn_zd) shared(vec) \ - firstprivate(i_max, spn_z) private(j) for (j = 1; j <= i_max; j++) { for (istate = 0; istate < nstate; istate++) X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; } if (isite1 == isite2) { -#pragma omp parallel for default(none) reduction (+:spn_zd) shared(vec) \ - firstprivate(i_max) private(j) for (j = 1; j <= i_max; j++) { for (istate = 0; istate < nstate; istate++) X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] / 2.0; @@ -290,10 +278,6 @@ void totalspin_Spin( num2_down = 1 - num2_up; //diagonal -#pragma omp parallel for reduction(+: spn_zd) default(none) \ -firstprivate(i_max, is1_up, num2_up, num2_down) \ -private(ibit1_up, num1_up, num1_down, spn_z) \ -shared(list_1, vec) for (j = 1; j <= i_max; j++) { ibit1_up = list_1[j] & is1_up; num1_up = ibit1_up / is1_up; @@ -315,10 +299,6 @@ shared(list_1, vec) is2_up = X->Def.Tpow[isite2 - 1]; is_up = is1_up + is2_up; -#pragma omp parallel for reduction(+: spn_zd) default(none) \ -firstprivate(i_max, is_up, is1_up, is2_up, irght, ilft, ihfbit, isite1, isite2) \ -private(ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z, iexchg, off, ibit_tmp) \ -shared(list_1, list_2_1, list_2_2, vec) for (j = 1; j <= i_max; j++) { ibit1_up = list_1[j] & is1_up; num1_up = ibit1_up / is1_up; @@ -357,7 +337,6 @@ shared(list_1, list_2_1, list_2_2, vec) S1 = 0.5 * (X->Def.SiteToBit[isite1 - 1] - 1); S2 = 0.5 * (X->Def.SiteToBit[isite2 - 1] - 1); if (isite1 == isite2) { -#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, isite1, X, S1) private (spn_z1)shared(vec, list_1) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); for (istate = 0; istate < nstate; istate++) { @@ -367,9 +346,6 @@ shared(list_1, list_2_1, list_2_2, vec) } } else { -#pragma omp parallel for reduction(+: ) default(none) \ -firstprivate(i_max, isite1, isite2, X, S1, S2) \ -private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) shared(vec, list_1) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, list_1[j], X->Def.SiteToBit, X->Def.Tpow); spn_z2 = 0.5 * GetLocal2Sz(isite2, list_1[j], X->Def.SiteToBit, X->Def.Tpow); @@ -457,7 +433,6 @@ void totalspin_SpinGC( ibit1_up = myrank & is1_up; num1_up = ibit1_up / is1_up; num1_down = 1 - num1_up; -#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up, num1_up, num1_down) shared(vec) for (j = 1; j <= i_max; j++) { for (istate = 0; istate < nstate; istate++) X->Phys.Sz[istate] += conj(vec[j][istate])*vec[j][istate] * (num1_up - num1_down) / 2.0; @@ -465,7 +440,6 @@ void totalspin_SpinGC( } else { is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up) private(list_1_j, ibit1_up, num1_up, num1_down) shared(vec) for (j = 1; j <= i_max; j++) { list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -485,15 +459,11 @@ void totalspin_SpinGC( num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); num2_down = 1 - num2_up; spn_z2 = (num1_up - num1_down)*(num2_up - num2_down) / 4.0; -#pragma omp parallel for default(none) reduction (+:) shared(vec) \ - firstprivate(i_max, spn_z2) private(j) for (j = 1; j <= i_max; j++) { for (istate = 0; istate < nstate; istate++) X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * spn_z2; } if (isite1 == isite2) { -#pragma omp parallel for default(none) reduction (+:) shared(vec) \ - firstprivate(i_max) private(j) for (j = 1; j <= i_max; j++) { for (istate = 0; istate < nstate; istate++) X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] / 2.0; @@ -518,9 +488,6 @@ void totalspin_SpinGC( num2_up = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); num2_down = 1 - num2_up; //diagonal -#pragma omp parallel for reduction(+: ) default(none) \ -firstprivate(i_max, is1_up, num2_up, num2_down) \ -private(ibit1_up, num1_up, num1_down, spn_z2, list_1_j) shared(vec) for (j = 1; j <= i_max; j++) { list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -540,10 +507,6 @@ private(ibit1_up, num1_up, num1_down, spn_z2, list_1_j) shared(vec) else { is2_up = X->Def.Tpow[isite2 - 1]; is_up = is1_up + is2_up; -#pragma omp parallel for reduction(+: ) default(none) \ -firstprivate(i_max, is_up, is1_up, is2_up, isite1, isite2) \ -private(list_1_j, ibit1_up, num1_up, ibit2_up, num2_up, num1_down, num2_down, spn_z2, iexchg, off, ibit_tmp) \ -shared(vec) for (j = 1; j <= i_max; j++) { list_1_j = j - 1; ibit1_up = list_1_j & is1_up; @@ -582,7 +545,6 @@ shared(vec) S1 = 0.5*(X->Def.SiteToBit[isite1 - 1] - 1); if (isite1 > X->Def.Nsite) { spn_z1 = 0.5*GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); -#pragma omp parallel for reduction(+: ) default(none) firstprivate(S1, spn_z1,i_max) shared(vec) for (j = 1; j <= i_max; j++) { for (istate = 0; istate < nstate; istate++) { X->Phys.s2[istate] += conj(vec[j][istate])*vec[j][istate] * S1*(S1 + 1.0); @@ -591,7 +553,6 @@ shared(vec) } } else { -#pragma omp parallel for reduction(+: ) default(none) firstprivate(i_max, isite1, X, S1) private(spn_z1) shared(vec) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5*GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); for (istate = 0; istate < nstate; istate++) { @@ -608,10 +569,6 @@ shared(vec) else if (isite1 > X->Def.Nsite || isite2 > X->Def.Nsite) { } else { //inner-process -#pragma omp parallel for reduction(+: ) default(none) \ -firstprivate(i_max, isite1, isite2, X, S1, S2) \ -private(spn_z1, spn_z2, off, off_2, ibit_tmp, sigma_1, sigma_2) \ -shared(vec) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5*GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); spn_z2 = 0.5*GetLocal2Sz(isite2, j - 1, X->Def.SiteToBit, X->Def.Tpow); diff --git a/src/mltply.c b/src/mltply.c index 00309c159..425af7b14 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -94,7 +94,8 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp X->Large.mode = M_MLTPLY; StartTimer(100); -#pragma omp parallel for default(none) firstprivate(i_max) shared(tmp_v0, tmp_v1, list_Diagonal) +#pragma omp parallel for default(none) private(dmv) \ + firstprivate(i_max) shared(tmp_v0, tmp_v1, list_Diagonal,one,nstate) for (j = 1; j <= i_max; j++) { dmv = list_Diagonal[j]; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); diff --git a/src/mltplyHubbard.c b/src/mltplyHubbard.c index 26612511c..df8e665fa 100644 --- a/src/mltplyHubbard.c +++ b/src/mltplyHubbard.c @@ -529,7 +529,8 @@ void child_pairhopp( long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; -#pragma omp parallel for default(none) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) \ + firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); return; @@ -548,7 +549,8 @@ void child_exchange( long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; -#pragma omp parallel for default(none) firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) \ + firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); return; @@ -572,9 +574,8 @@ void child_general_hopp( isite2 = X->Large.is2_spin; Asum = X->Large.isA_spin; Adiff = X->Large.A_spin; - //fprintf(stdout, "DEBUG, isite1=%ld, isite2=%ld, Asum=%ld, Adiff=%ld \n", isite1, isite2, Asum, Adiff); -#pragma omp parallel for default(none) \ -firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j) shared(tmp_v0, tmp_v1,nstate) \ +firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) for (j = 1; j <= i_max; j++) CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans); return; @@ -601,13 +602,13 @@ void GC_child_general_hopp( if (isite1 == isite2) { #pragma omp parallel for default(none) \ -private(j) firstprivate(i_max,X,isite1, trans) shared(tmp_v0, tmp_v1) + private(j) firstprivate(i_max,X,isite1, trans) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) GC_CisAis(j, nstate, tmp_v0, tmp_v1, X, isite1, trans); }/*if (isite1 == isite2)*/ else { -#pragma omp parallel for default(none) \ -firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) private(j,tmp_off) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) private(j,tmp_off) shared(tmp_v0,tmp_v1,nstate) \ +firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) for (j = 1; j <= i_max; j++) GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off); } @@ -647,7 +648,7 @@ void child_general_int( #pragma omp parallel default(none) \ private(j, tmp_off, tmp_off_2) \ firstprivate(i_max, X, isite1, isite2, isite3, isite4, Asum, Bsum, Adiff, Bdiff, tmp_V) \ -shared(tmp_v0, tmp_v1) + shared(tmp_v0, tmp_v1,nstate) { if (isite1 == isite2 && isite3 == isite4) { #pragma omp for @@ -706,7 +707,7 @@ void GC_child_general_int( #pragma omp parallel default(none) private(j) \ firstprivate(i_max, X, isite1, isite2, isite4, isite3, Asum, Bsum, Adiff, Bdiff, tmp_off, tmp_off_2, tmp_V) \ -shared(tmp_v0, tmp_v1) +shared(tmp_v0, tmp_v1,nstate) { if (isite1 == isite2 && isite3 == isite4) { #pragma omp for @@ -747,7 +748,8 @@ void GC_child_pairhopp( long unsigned int i_max = X->Large.i_max; long unsigned int off = 0; -#pragma omp parallel for default(none) firstprivate(i_max,X,off) private(j) shared(tmp_v0, tmp_v1) +#pragma omp parallel for default(none) \ +firstprivate(i_max,X,off) private(j) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) GC_child_pairhopp_element(j, nstate, tmp_v0, tmp_v1, X, &off); @@ -768,7 +770,7 @@ void GC_child_exchange( long unsigned int off = 0; #pragma omp parallel for default(none) \ - firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) + firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) GC_child_exchange_element(j, nstate, tmp_v0, tmp_v1, X, &off); return; diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index eae8bf898..25d19835b 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -137,7 +137,7 @@ void X_child_CisAjt_MPIdouble( #pragma omp parallel for default(none) private(j, ioff) \ firstprivate(idim_max_buf, trans, X, list_2_1_target, list_2_2_target, list_1buf_org) \ - shared(v1buf, tmp_v0) + shared(v1buf, tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); @@ -212,8 +212,9 @@ void X_GC_child_general_hopp_MPIsingle( bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; -#pragma omp parallel default(none) private(j, dmv, state1, Fsgn, ioff) \ - firstprivate(idim_max_buf, trans, X, mask1, state1check, bit1diff) shared(v1buf, tmp_v1, tmp_v0) +#pragma omp parallel default(none) private(j,dmv,state1,Fsgn,ioff) \ + firstprivate(idim_max_buf,trans,X,mask1,state1check,bit1diff) \ + shared(v1buf,tmp_v1,tmp_v0,nstate,one) { #pragma omp for for (j = 0; j < idim_max_buf; j++) { @@ -293,8 +294,9 @@ void X_child_general_hopp_MPIdouble( SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j, Fsgn, ioff) \ - firstprivate(idim_max_buf, trans, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) +#pragma omp parallel default(none) private(j,Fsgn,ioff) \ + firstprivate(idim_max_buf,trans,X) \ + shared(list_2_1,list_2_2,list_1buf,v1buf,tmp_v1,tmp_v0,nstate,one) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { @@ -372,8 +374,9 @@ void X_child_general_hopp_MPIsingle( bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; -#pragma omp parallel default(none) private(j, dmv, Fsgn, ioff, jreal, state1) \ - firstprivate(idim_max_buf, trans, X, mask1, state1check, bit1diff, myrank) shared(list_1, list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) +#pragma omp parallel default(none) private(j,dmv,Fsgn,ioff,jreal,state1) \ + firstprivate(idim_max_buf,trans,X,mask1,state1check,bit1diff,myrank) \ + shared(list_1,list_2_1,list_2_2,list_1buf,v1buf,tmp_v1,tmp_v0,nstate,one) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { @@ -382,7 +385,7 @@ void X_child_general_hopp_MPIsingle( state1 = jreal & mask1; if (state1 == state1check) { - SgnBit(jreal & bit1diff, &Fsgn); + SgnBit(jreal & bit1diff,&Fsgn); GetOffComp(list_2_1, list_2_2, jreal ^ mask1, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); @@ -446,8 +449,9 @@ void X_child_CisAjt_MPIsingle( bit1diff = X->Def.Tpow[2 * X->Def.Nsite - 1] * 2 - mask1 * 2; -#pragma omp parallel for default(none) private(j, dmv, Fsgn, ioff, jreal, state1) \ - firstprivate(idim_max_buf, trans, X, mask1, state1check, bit1diff,list_2_1_target, list_2_2_target, list_1buf_org, list_1) shared(v1buf, tmp_v0) +#pragma omp parallel for default(none) private(j,dmv,Fsgn,ioff,jreal,state1) \ + firstprivate(idim_max_buf,trans,X,mask1,state1check,bit1diff,list_2_1_target,list_2_2_target,list_1buf_org,list_1) \ + shared(v1buf, tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { jreal = list_1buf_org[j]; state1 = jreal & mask1; diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 32e221011..17224f9df 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -284,8 +284,9 @@ void X_GC_child_CisAisCjtAjt_Hubbard_MPI( if (org_isite1 > org_isite3) tmp_ispin1 = X->Def.Tpow[2 * org_isite3 + org_ispin3]; else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; -#pragma omp parallel default(none) shared(org_isite1, org_ispin1, org_isite3, org_ispin3, nstate, tmp_v0, tmp_v1) \ - firstprivate(i_max, tmp_V, X) private(j, tmp_off, tmp_ispin1) +#pragma omp parallel default(none) \ + shared(org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one,tmp_v0,tmp_v1) \ + firstprivate(i_max,tmp_V,X) private(j,tmp_off,tmp_ispin1) #pragma omp for for (j = 1; j <= i_max; j++) { if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) { @@ -360,7 +361,8 @@ void X_GC_child_CisAjtCkuAku_Hubbard_MPI( if (CheckBit_Ajt(isite3, myrank, &tmp_off) == FALSE) return; #pragma omp parallel default(none) \ -firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared(tmp_v0, tmp_v1) +firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) \ + private(j,tmp_off) shared(tmp_v0, tmp_v1,nstate) { #pragma omp for for (j = 1; j <= i_max; j++) @@ -378,9 +380,9 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) private(j,tmp_off) shared( idim_max_buf = SendRecv_i(origin, X->Check.idim_max); SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j, dmv, tmp_off, Fsgn, org_rankbit, Adiff) \ -shared(v1buf, tmp_v1, nstate, tmp_v0, myrank, origin, isite3, org_isite3, isite1, isite2, org_isite2, org_isite1) \ -firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4) +#pragma omp parallel default(none) private(j,dmv,tmp_off,Fsgn,org_rankbit,Adiff) \ + shared(v1buf,tmp_v1,nstate,one,tmp_v0,myrank,origin,isite3,org_isite3,isite1,isite2,org_isite2,org_isite1) \ +firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4) { if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) { if (isite2 > isite1) Adiff = isite2 - isite1 * 2; @@ -513,7 +515,7 @@ void X_GC_child_CisAjtCkuAlv_Hubbard_MPI( else Adiff = isite1 - isite4 * 2; #pragma omp parallel for default(none) private(j, tmp_off) \ -firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) + firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) for (j = 1; j <= i_max; j++) GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off); @@ -522,7 +524,7 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) org_isite2, org_ispin2, -tmp_V, X, nstate, tmp_v0, tmp_v1); if (X->Large.mode != M_CORR) { //for hermite #pragma omp parallel for default(none) private(j, tmp_off) \ -firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) + firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) for (j = 1; j <= i_max; j++) GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off); @@ -568,7 +570,9 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) } else { org_rankbit = X->Def.OrgTpow[2 * X->Def.Nsite] * origin; -#pragma omp parallel for default(none) private(j, dmv, tmp_off, Fsgn) firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit) shared(v1buf, tmp_v1, tmp_v0) +#pragma omp parallel for default(none) private(j,dmv,tmp_off,Fsgn) \ + firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,org_rankbit) \ + shared(v1buf,tmp_v1,tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { if (GetSgnInterAll(tmp_isite4, tmp_isite3, tmp_isite2, tmp_isite1, &Fsgn, X, (j - 1) + org_rankbit, &tmp_off) == TRUE) { dmv = tmp_V * Fsgn; @@ -601,7 +605,7 @@ void X_GC_child_CisAis_Hubbard_MPI( zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*if (org_isite1 + 1 > X->Def.Nsite)*/ else { -#pragma omp parallel default(none) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel default(none) shared(tmp_v0, tmp_v1,nstate,one) \ firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off) { #pragma omp for @@ -663,7 +667,6 @@ void X_child_CisAisCjtAjt_Hubbard_MPI( if (iCheck != TRUE) return; if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite) { -#pragma omp for zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*if (org_isite1 + 1 > X->Def.Nsite && org_isite3 + 1 > X->Def.Nsite)*/ else if (org_isite1 + 1 > X->Def.Nsite || org_isite3 + 1 > X->Def.Nsite) { @@ -671,8 +674,8 @@ void X_child_CisAisCjtAjt_Hubbard_MPI( else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; #pragma omp parallel for default(none) \ -shared(tmp_v0, tmp_v1, list_1, org_isite1, org_ispin1, org_isite3, org_ispin3) \ - firstprivate(i_max, tmp_V, X, tmp_ispin1) private(j, tmp_off) +shared(tmp_v0,tmp_v1,list_1,org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one) \ + firstprivate(i_max,tmp_V,X,tmp_ispin1) private(j,tmp_off) for (j = 1; j <= i_max; j++) { if (CheckBit_Ajt(tmp_ispin1, list_1[j], &tmp_off) == TRUE) { zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); @@ -764,7 +767,7 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, nstate, tmp_ if (X->Large.mode != M_CORR) { //for hermite #pragma omp parallel for default(none) private(j, tmp_off) \ -firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) + firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) for (j = 1; j <= i_max; j++) CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V); @@ -806,8 +809,9 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0) Fsgn *= X_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == TRUE)*/ -#pragma omp parallel default(none) private(j, ioff) \ -firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, nstate, tmp_v0, list_2_1, list_2_2, list_1buf) +#pragma omp parallel default(none) private(j,ioff) \ +firstprivate(idim_max_buf,tmp_V,X) \ + shared(v1buf,tmp_v1,nstate,tmp_v0,list_2_1,list_2_2,list_1buf,one) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { @@ -826,7 +830,7 @@ firstprivate(idim_max_buf, tmp_V, X) shared(v1buf, tmp_v1, nstate, tmp_v0, list_ #pragma omp parallel default(none) private(j, dmv, tmp_off, Fsgn, ioff) \ firstprivate(myrank, idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, \ org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isite4, org_ispin4) \ -shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2) + shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf, list_2_1, list_2_2) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { @@ -907,7 +911,8 @@ void X_child_CisAjtCkuAku_Hubbard_MPI( if (myrank == origin) {// only k is in PE //for hermite #pragma omp parallel default(none) \ -firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp_v0, tmp_v1) +firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) \ + private(j) shared(tmp_v0, tmp_v1,nstate) { #pragma omp for for (j = 1; j <= i_max; j++) @@ -926,9 +931,9 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) private(j) shared(tmp SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j, dmv, ioff, tmp_off, Fsgn, Adiff) \ -firstprivate(idim_max_buf, tmp_V, X, tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4, org_rankbit, isite3) \ -shared(v1buf, tmp_v1, nstate, tmp_v0, list_1buf, list_2_1, list_2_2, origin, org_isite3, myrank, isite1, isite2, org_isite1, org_isite2) +#pragma omp parallel default(none) private(j,dmv,ioff,tmp_off,Fsgn,Adiff) \ +firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,org_rankbit,isite3) \ + shared(v1buf,tmp_v1,nstate,one,tmp_v0,list_1buf,list_2_1,list_2_2,origin,org_isite3,myrank,isite1,isite2,org_isite1,org_isite2) { if (org_isite1 + 1 > X->Def.Nsite && org_isite2 + 1 > X->Def.Nsite) { @@ -1013,7 +1018,7 @@ void X_child_CisAis_Hubbard_MPI( zaxpy_long(i_max*nstate, tmp_V, &tmp_v1[1][0], &tmp_v0[1][0]); }/*if (org_isite1 + 1 > X->Def.Nsite)*/ else { -#pragma omp parallel default(none) shared(tmp_v0, tmp_v1, list_1) \ +#pragma omp parallel default(none) shared(tmp_v0, tmp_v1, list_1,nstate,one) \ firstprivate(i_max, tmp_V, X, isite1) private(j, tmp_off) { #pragma omp for @@ -1174,7 +1179,7 @@ void X_Cis_MPI( #pragma omp parallel for default(none) private(j) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ -shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) + shared(tmp_v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) {//idim_max_buf -> original GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); @@ -1233,7 +1238,7 @@ void X_Ajt_MPI( #pragma omp parallel for default(none) private(j) \ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ -shared(tmp_v1buf, tmp_v1, nstate, tmp_v0, list_1buf_org) + shared(tmp_v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index fffe7d40c..693a3ba28 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -90,7 +90,7 @@ void X_child_general_int_spin_MPIdouble( SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel for default(none) private(j, ioff) \ - firstprivate(idim_max_buf, Jint, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) + firstprivate(idim_max_buf,Jint,X) shared(list_2_1,list_2_2,list_1buf,v1buf,tmp_v1,tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); @@ -128,7 +128,8 @@ void X_child_general_int_spin_TotalS_MPIdouble( SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel for default(none) private(j, dmv, ioff) \ - firstprivate(idim_max_buf, X) shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) + firstprivate(idim_max_buf, X) \ + shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1, list_2_2, list_1buf[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); @@ -208,7 +209,7 @@ void X_child_general_int_spin_MPIsingle( #pragma omp parallel for default(none) private(j, ioff, jreal, state1) \ firstprivate(idim_max_buf, Jint, X, mask1, state1check, org_isite1) \ -shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0) +shared(list_2_1, list_2_2, list_1buf, v1buf, tmp_v1, tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { jreal = list_1buf[j]; diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 56e1f5a56..ce250636b 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -304,7 +304,7 @@ void X_GC_child_CisAisCjuAju_spin_MPIdouble( num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, mask2, org_ispin3); #pragma omp parallel default(none) private(j, dmv) \ - firstprivate(tmp_J, X, num1, num2) shared(tmp_v1, tmp_v0) + firstprivate(tmp_J, X, num1, num2) shared(tmp_v1, tmp_v0,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { @@ -338,7 +338,7 @@ void X_GC_child_CisAisCjuAju_spin_MPIsingle( num2 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, mask2, org_ispin3); #pragma omp parallel default(none) private(j, dmv, num1) \ - firstprivate(Jint, X, num2, mask1, org_ispin1) shared(tmp_v1, tmp_v0) + firstprivate(Jint, X, num2, mask1, org_ispin1) shared(tmp_v1, tmp_v0,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { @@ -414,7 +414,8 @@ void X_GC_child_CisAitCiuAiv_spin_MPIsingle( mask1 = X->Def.Tpow[org_isite1]; #pragma omp parallel default(none) private(j, state1, ioff) \ - firstprivate(idim_max_buf, Jint, X, state1check, mask1) shared(v1buf, tmp_v1, tmp_v0) + firstprivate(idim_max_buf, Jint, X, state1check, mask1) \ + shared(v1buf, tmp_v1, tmp_v0,nstate,one) { #pragma omp for for (j = 0; j < idim_max_buf; j++) { @@ -488,7 +489,8 @@ void X_GC_child_CisAisCjuAjv_spin_MPIsingle( mask1 = X->Def.Tpow[org_isite1]; #pragma omp parallel default(none) private(j, state1) \ - firstprivate(idim_max_buf, Jint, X, state1check, mask1) shared(v1buf, tmp_v1, tmp_v0) + firstprivate(idim_max_buf, Jint, X, state1check, mask1) \ + shared(v1buf, tmp_v1, tmp_v0,nstate,one) { #pragma omp for for (j = 0; j < idim_max_buf; j++) { @@ -552,7 +554,7 @@ void X_GC_child_CisAitCjuAju_spin_MPIsingle( mask1 = (int)X->Def.Tpow[org_isite1]; #pragma omp parallel default(none) private(j, dmv, state1, ioff) \ - firstprivate(Jint, X, state1check, mask1) shared(tmp_v1, tmp_v0) + firstprivate(Jint, X, state1check, mask1) shared(tmp_v1, tmp_v0,nstate,one) { #pragma omp for for (j = 0; j < X->Check.idim_max; j++) { @@ -882,7 +884,7 @@ void X_child_CisAit_GeneralSpin_MPIdouble( #pragma omp parallel for default(none)\ firstprivate(X, tmp_V, idim_max_buf, list_1buf_org) private(j, tmp_off) \ -shared (tmp_v0, tmp_v1, v1buf) +shared (tmp_v0, tmp_v1, v1buf,nstate,one) for (j = 1; j <= idim_max_buf; j++) { ConvertToList1GeneralSpin(list_1buf_org[j], X->Large.ihfbit, &tmp_off); zaxpy_(&nstate, &tmp_V, &v1buf[j][0], &one, &tmp_v0[tmp_off][0], &one); @@ -931,7 +933,7 @@ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) firstprivate(X, tmp_V, isite, IniSpin) \ -private(j, num1) shared (tmp_v0, tmp_v1, v1buf) +private(j, num1) shared (tmp_v0, tmp_v1, v1buf,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { @@ -971,7 +973,7 @@ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIsingle( #pragma omp parallel default(none) \ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, num1, off) \ -shared (tmp_v0, tmp_v1, v1buf) +shared (tmp_v0, tmp_v1, v1buf,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { @@ -1036,7 +1038,8 @@ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( SendRecv_cv(origin, X->Check.idim_max*nstate, X->Check.idim_max*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) \ -firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, off) shared (tmp_v0, tmp_v1, v1buf) +firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, off) \ + shared (tmp_v0, tmp_v1, v1buf,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { @@ -1073,7 +1076,8 @@ void X_GC_child_CisAisCjuAju_GeneralSpin_MPIsingle( else return; #pragma omp parallel default(none) \ -firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp_v0, tmp_v1) +firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) \ + shared (tmp_v0, tmp_v1,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { @@ -1141,7 +1145,7 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel default(none) firstprivate(X, tmp_V, idim_max_buf) \ -private(j, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) +private(j, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf,nstate,one) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { @@ -1222,7 +1226,8 @@ void X_child_CisAisCjuAju_GeneralSpin_MPIsingle( else return; #pragma omp parallel default(none) \ -firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) shared (tmp_v0, tmp_v1, list_1) +firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) \ + shared (tmp_v0, tmp_v1, list_1,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { @@ -1281,7 +1286,7 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( #pragma omp parallel default(none) \ firstprivate(X, tmp_V, idim_max_buf, IniSpin, FinSpin, isite) \ -private(j, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf) +private(j, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf,nstate,one) { #pragma omp for for (j = 1; j <= idim_max_buf; j++) { @@ -1382,7 +1387,7 @@ void X_child_CisAit_spin_MPIdouble( #pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(idim_max_buf, trans, X, list_1buf_org, list_2_1_target, list_2_2_target) \ -shared(v1buf, tmp_v0) +shared(v1buf, tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &tmp_off); zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[tmp_off][0], &one); diff --git a/src/mltplySpin.c b/src/mltplySpin.c index 3fd61530f..1a28b84ba 100644 --- a/src/mltplySpin.c +++ b/src/mltplySpin.c @@ -352,9 +352,9 @@ int mltplyGeneralSpin( sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; #pragma omp parallel for default(none) \ - private(j, tmp_sgn, off, tmp_off, tmp_off2) \ - firstprivate(i_max, isite1, isite2, sigma1, sigma2, sigma3, sigma4, X, tmp_V, ihfbit) \ - shared(tmp_v0, tmp_v1, list_1, list_2_1, list_2_2) +private(j,tmp_sgn,off,tmp_off,tmp_off2) \ +firstprivate(i_max,isite1,isite2,sigma1,sigma2,sigma3,sigma4,X,tmp_V,ihfbit) \ +shared(tmp_v0,tmp_v1,list_1,list_2_1,list_2_2,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = GetOffCompGeneralSpin(list_1[j], isite2, sigma4, sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn == TRUE) { @@ -461,7 +461,7 @@ int mltplyHalfSpinGC( is1_spin = X->Def.Tpow[isite1 - 1]; #pragma omp parallel for default(none) \ private(j, tmp_sgn) firstprivate(i_max, is1_spin, sigma2, X,off, tmp_trans) \ -shared(tmp_v0, tmp_v1) +shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = X_SpinGC_CisAit(j, X, is1_spin, sigma2, &off); if(tmp_sgn !=0){ @@ -652,7 +652,7 @@ int mltplyGeneralSpinGC( // transverse magnetic field #pragma omp parallel for default(none) \ private(j, tmp_sgn, num1) firstprivate(i_max, isite1, sigma1, sigma2, X, off, tmp_trans) \ -shared(tmp_v0, tmp_v1) +shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { num1 = GetOffCompGeneralSpin( j - 1, isite1, sigma2, sigma1, &off, X->Def.SiteToBit, X->Def.Tpow); @@ -716,7 +716,7 @@ shared(tmp_v0, tmp_v1) #pragma omp parallel for default(none) \ private(j, tmp_sgn, off) \ firstprivate(i_max, isite1, isite2, sigma1, sigma3, sigma4, X, tmp_V) \ - shared(tmp_v0, tmp_v1) + shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = GetOffCompGeneralSpin( j - 1, isite2, sigma4, sigma3, &off, X->Def.SiteToBit, X->Def.Tpow); @@ -734,7 +734,7 @@ shared(tmp_v0, tmp_v1) #pragma omp parallel for default(none) \ private(j, tmp_sgn, off, tmp_off) \ firstprivate(i_max, isite1, isite2, sigma1, sigma2, sigma3, sigma4, X, tmp_V) \ - shared(tmp_v0, tmp_v1) + shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = BitCheckGeneral(j - 1, isite2, sigma3, X->Def.SiteToBit, X->Def.Tpow); if (tmp_sgn == TRUE) { @@ -750,7 +750,7 @@ shared(tmp_v0, tmp_v1) #pragma omp parallel for default(none) \ private(j, tmp_sgn, off, tmp_off) \ firstprivate(i_max, isite1, isite2, sigma1, sigma2, sigma3, sigma4, X, tmp_V) \ - shared(tmp_v0, tmp_v1) + shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = GetOffCompGeneralSpin( j - 1, isite2, sigma4, sigma3, &tmp_off, X->Def.SiteToBit, X->Def.Tpow); @@ -792,7 +792,7 @@ void child_exchange_spin( long unsigned int off = 0; #pragma omp parallel for default(none) \ -firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) + firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); }/*double complex child_exchange_spin*/ @@ -811,7 +811,7 @@ void GC_child_exchange_spin( long unsigned int off = 0; #pragma omp parallel for default(none) \ -firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) + firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) GC_child_exchange_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); }/*double complex GC_child_exchange_spin*/ @@ -830,7 +830,7 @@ void GC_child_pairlift_spin( long unsigned int off = 0; #pragma omp parallel for default(none) \ -firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1) + firstprivate(i_max, X,off) private(j) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) GC_child_pairlift_spin_element(j, nstate, tmp_v0, tmp_v1, X, &off); }/*double complex GC_child_pairlift_spin*/ @@ -860,7 +860,8 @@ void child_general_int_spin( isB_up = X->Large.is2_up; #pragma omp parallel for default(none) private(j, tmp_sgn, dmv) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) shared(tmp_v1, tmp_v0) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) \ +shared(tmp_v1, tmp_v0,one,nstate) for (j = 1; j <= i_max; j++) { tmp_sgn = X_child_exchange_spin_element(j, X, isA_up, isB_up, org_sigma2, org_sigma4, &tmp_off); if (tmp_sgn != 0) { @@ -898,8 +899,8 @@ void GC_child_general_int_spin( isB_up = X->Def.Tpow[org_isite2 - 1]; #pragma omp parallel default(none) \ -private(j) shared(tmp_v0, tmp_v1) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma1,org_sigma2,org_sigma3,org_sigma4,tmp_off, tmp_V) +private(j) shared(tmp_v0,tmp_v1,nstate) \ +firstprivate(i_max,X,isA_up,isB_up,org_sigma1,org_sigma2,org_sigma3,org_sigma4,tmp_off,tmp_V) { if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal #pragma omp for diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6ec9bed90..a2b3147a3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,62 +1,6 @@ # include guard cmake_minimum_required(VERSION 2.8.0 ) -# -# Lanczos -# -add_test( - NAME lanczos_hubbard_square - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_hubbard_square.sh -) -add_test( - NAME lanczos_kondo_chain - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_kondo_chain.sh -) -add_test( - NAME lanczos_spin_kagome - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_spin_kagome.sh -) -add_test( - NAME lanczos_spin_ladder_DM -COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_spin_ladder_DM.sh -) - -add_test( - NAME lanczos_genspin_ladder - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_genspin_ladder.sh -) -add_test( - NAME lanczos_hubbardgc_tri - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_hubbardgc_tri.sh -) -add_test( - NAME lanczos_kondogc_chain - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_kondogc_chain.sh -) -add_test( - NAME lanczos_spingc_honey - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_spingc_honey.sh -) -add_test( - NAME lanczos_genspingc_ladder - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_genspingc_ladder.sh -) -add_test( - NAME lanczos_hubbard_square_restart - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_hubbard_square_restart.sh -) -add_test( - NAME lanczos_kondo_chain_restart - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_kondo_chain_restart.sh -) -add_test( - NAME lanczos_spin_kagome_restart - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_spin_kagome_restart.sh -) -add_test( - NAME lanczos_genspin_ladder_restart - COMMAND ${CMAKE_SOURCE_DIR}/test/lanczos_genspin_ladder_restart.sh -) # # LOBCG # From bd22ba16976cc591b2efe9fbfcc4e9db996473c7 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sun, 17 Mar 2019 19:23:20 +0900 Subject: [PATCH 15/50] Backup --- src/CalcSpectrum.c | 489 ++++++++++++--------------- src/CalcSpectrumByBiCG.c | 94 ++--- src/CalcSpectrumByFullDiag.c | 33 +- src/include/CalcSpectrum.h | 16 +- src/include/CalcSpectrumByBiCG.h | 5 +- src/include/CalcSpectrumByFullDiag.h | 6 +- 6 files changed, 265 insertions(+), 378 deletions(-) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index d1376095a..26c5e1b72 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -37,280 +37,13 @@ * */ -/// \brief Output spectrum. -/// \param X [in] Read information of the frequency origin. -/// \param Nomega [in] A total number of discrete frequencies. -/// \param dcSpectrum [in] Array of spectrum. -/// \param dcomega [in] Array of discrete frequencies. -/// \retval FALSE Fail to open the output file. -/// \retval TRUE Success to output the spectrum. -int OutputSpectrum( - struct EDMainCalStruct *X, - int Nomega, - double complex *dcSpectrum, - double complex *dcomega) -{ - FILE *fp; - char sdt[D_FileNameMax]; - int i; - - //output spectrum - sprintf(sdt, cFileNameCalcDynamicalGreen, X->Bind.Def.CDataFileHead); - if(childfopenMPI(sdt, "w", &fp)!=0){ - return FALSE; - } - - for (i = 0; i < Nomega; i++) { - fprintf(fp, "%.10lf %.10lf %.10lf %.10lf \n", - creal(dcomega[i]-X->Bind.Def.dcOmegaOrg), cimag(dcomega[i]-X->Bind.Def.dcOmegaOrg), - creal(dcSpectrum[i]), cimag(dcSpectrum[i])); - }/*for (i = 0; i < Nomega; i++)*/ - - fclose(fp); - return TRUE; -}/*int OutputSpectrum*/ -/// \brief Parent function to calculate the excited state. -/// \param X [in] Struct to get number of excitation operators. -/// \param tmp_v0 [out] Result @f$ v_0 = H_{ex} v_1 @f$. -/// \param tmp_v1 [in] The original state before excitation @f$ v_1 @f$. -/// \retval FALSE Fail to calculate the excited state. -/// \retval TRUE Success to calculate the excited state. -int GetExcitedState -( - struct BindStruct *X, - int nstate, - double complex **tmp_v0, - double complex **tmp_v1 -) -{ - if (X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0) { - fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); - return FALSE; - } - - - if (X->Def.NSingleExcitationOperator > 0) { - if (GetSingleExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { - return FALSE; - } - } - else if (X->Def.NPairExcitationOperator > 0) { - if (GetPairExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { - return FALSE; - } - } - return TRUE; -} -/** - * @brief A main function to calculate spectrum. - * - * @param X [in,out] CalcStruct list for getting and pushing calculation information \n - * input: iFlgSpecOmegaOrg, dcOmegaMax, dcOmegaMin, iNOmega etc.\n - * output: dcOmegaOrg, iFlagListModified. - * - * @retval 0 normally finished - * @retval -1 unnormally finished - * - * @version 1.1 - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * @author Youhei Yamaji (The University of Tokyo) - * - */ -int CalcSpectrum( - struct EDMainCalStruct *X -) { - char sdt[D_FileNameMax]; - char *defname; - unsigned long int i; - unsigned long int i_max = 0; - int i_stp; - int iFlagListModified = FALSE; - FILE *fp; - double dnorm; - double complex **v1Org; /**< Input vector to calculate spectrum function.*/ - - //ToDo: Nomega should be given as a parameter - int Nomega; - double complex OmegaMax, OmegaMin; - double complex *dcSpectrum; - double complex *dcomega; - size_t byte_size; - - //set omega - if (SetOmega(&(X->Bind.Def)) != TRUE) { - fprintf(stderr, "Error: Fail to set Omega.\n"); - exitMPI(-1); - } - else { - if (X->Bind.Def.iFlgSpecOmegaOrg == FALSE) { - X->Bind.Def.dcOmegaOrg = I * (X->Bind.Def.dcOmegaMax - X->Bind.Def.dcOmegaMin) / (double)X->Bind.Def.iNOmega; - } - } - /* - Set & malloc omega grid - */ - Nomega = X->Bind.Def.iNOmega; - dcSpectrum = cd_1d_allocate(Nomega); - dcomega = cd_1d_allocate(Nomega); - OmegaMax = X->Bind.Def.dcOmegaMax + X->Bind.Def.dcOmegaOrg; - OmegaMin = X->Bind.Def.dcOmegaMin + X->Bind.Def.dcOmegaOrg; - for (i = 0; i < Nomega; i++) { - dcomega[i] = (OmegaMax - OmegaMin) / Nomega * i + OmegaMin; - } - - fprintf(stdoutMPI, "\nFrequency range:\n"); - fprintf(stdoutMPI, " Omega Max. : %15.5e %15.5e\n", creal(OmegaMax), cimag(OmegaMax)); - fprintf(stdoutMPI, " Omega Min. : %15.5e %15.5e\n", creal(OmegaMin), cimag(OmegaMin)); - fprintf(stdoutMPI, " Num. of Omega : %d\n", Nomega); - - if (X->Bind.Def.NSingleExcitationOperator == 0 && X->Bind.Def.NPairExcitationOperator == 0) { - fprintf(stderr, "Error: Any excitation operators are not defined.\n"); - exitMPI(-1); - } - //Make New Lists - if (MakeExcitedList(&(X->Bind), &iFlagListModified) == FALSE) { - return FALSE; - } - X->Bind.Def.iFlagListModified = iFlagListModified; - - //Set Memory - v1Org = cd_2d_allocate(X->Bind.Check.idim_maxOrg + 1,1); - for (i = 0; i < X->Bind.Check.idim_maxOrg + 1; i++) { - v1Org[i][0] = 0; - } - - //Make excited state - StartTimer(6100); - if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || - X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || - (X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC && X->Bind.Def.iCalcType == CG)) { - //input eigen vector - StartTimer(6101); - fprintf(stdoutMPI, " Start: An Eigenvector is inputted in CalcSpectrum.\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorStart, "a"); - GetFileNameByKW(KWSpectrumVec, &defname); - strcat(defname, "_rank_%d.dat"); - // sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, X->Bind.Def.k_exct - 1, myrank); - sprintf(sdt, defname, myrank); - childfopenALL(sdt, "rb", &fp); - - if (fp == NULL) { - fprintf(stderr, "Error: A file of Inputvector does not exist.\n"); - return -1; - } - - byte_size = fread(&i_stp, sizeof(i_stp), 1, fp); - X->Bind.Large.itr = i_stp; //For TPQ - byte_size = fread(&i_max, sizeof(i_max), 1, fp); - if (i_max != X->Bind.Check.idim_maxOrg) { - fprintf(stderr, "Error: myrank=%d, i_max=%ld\n", myrank, i_max); - fprintf(stderr, "Error: A file of Input vector is incorrect.\n"); - return -1; - } - byte_size = fread(&v1Org[0][0], sizeof(complex double), i_max + 1, fp); - fclose(fp); - StopTimer(6101); - if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); - - for (i = 1; i <= X->Bind.Check.idim_max; i++) { - v0[i][0] = 0; - } - fprintf(stdoutMPI, " End: An Input vector is inputted in CalcSpectrum.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorEnd, "a"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcExcitedStateStart, "a"); - fprintf(stdoutMPI, " Start: Calculating an excited Eigen vector.\n"); - - //Multiply Operator - StartTimer(6102); - GetExcitedState(&(X->Bind), 1, v0, v1Org); - StopTimer(6102); - - //calculate norm - dnorm = NormMPI_dc(X->Bind.Check.idim_max, &v0[0][0]); - if (fabs(dnorm) < pow(10.0, -15)) { - fprintf(stderr, "Warning: Norm of an excitation vector becomes 0.\n"); - fprintf(stdoutMPI, " End: Calculating an excited Eigenvector.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcExcitedStateEnd, "a"); - fprintf(stdoutMPI, " End: Calculating a spectrum.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumEnd, "a"); - for (i = 0; i < Nomega; i++) { - dcSpectrum[i] = 0; - } - OutputSpectrum(X, Nomega, dcSpectrum, dcomega); - return TRUE; - } - //normalize vector -#pragma omp parallel for default(none) private(i) shared(v1, v0) firstprivate(i_max, dnorm, X) - for (i = 1; i <= X->Bind.Check.idim_max; i++) { - v1[i][0] = v0[i][0] / dnorm; - } - - //Output excited vector - if (X->Bind.Def.iOutputExVec == 1) { - sprintf(sdt, cFileNameOutputExcitedVec, X->Bind.Def.CDataFileHead, myrank); - if (childfopenALL(sdt, "w", &fp) != 0) { - return -1; - } - fprintf(fp, "%ld\n", X->Bind.Check.idim_max); - for (i = 1; i <= X->Bind.Check.idim_max; i++) { - fprintf(fp, "%.10lf, %.10lf\n", creal(v1[i][0]), cimag(v1[i][0])); - } - fclose(fp); - } - - fprintf(stdoutMPI, " End: Calculating an excited Eigenvector.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcExcitedStateEnd, "a"); - } - StopTimer(6100); - //Reset list_1, list_2_1, list_2_2 - if (iFlagListModified == TRUE) { - free_cd_2d_allocate(v1Org); - free(list_1_org); - free(list_2_1_org); - free(list_2_2_org); - } - //calculate Diagonal term - diagonalcalc(&(X->Bind)); - - int iret = TRUE; - fprintf(stdoutMPI, " Start: Calculating a spectrum.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumStart, "a"); - StartTimer(6200); - switch (X->Bind.Def.iCalcType) { - case CG: - iret = CalcSpectrumByBiCG(X, v0, v1, Nomega, dcSpectrum, dcomega); - if (iret != TRUE) { - return FALSE; - } - break; - case FullDiag: - iret = CalcSpectrumByFullDiag(X, Nomega, dcSpectrum, dcomega); - break; - default: - break; - } - StopTimer(6200); - - if (iret != TRUE) { - fprintf(stderr, " Error: The selected calculation type is not supported for calculating spectrum mode.\n"); - return FALSE; - } - - fprintf(stdoutMPI, " End: Calculating a spectrum.\n\n"); - TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumEnd, "a"); - iret = OutputSpectrum(X, Nomega, dcSpectrum, dcomega); - free_cd_1d_allocate(dcSpectrum); - free_cd_1d_allocate(dcomega); - return TRUE; - -}/*int CalcSpectrum*/ -/// -/// \brief Set target frequencies -/// \param X [in, out] Struct to give and get the information of target frequencies.\n -/// Output: dcOmegaMax, dcOmegaMin -/// -/// \retval FALSE Fail to set frequencies. -/// \retval TRUE Success to set frequencies. + /// + /// \brief Set target frequencies + /// \param X [in, out] Struct to give and get the information of target frequencies.\n + /// Output: dcOmegaMax, dcOmegaMin + /// + /// \retval FALSE Fail to set frequencies. + /// \retval TRUE Success to set frequencies. int SetOmega ( struct DefineList *X @@ -589,7 +322,7 @@ int MakeExcitedList( #ifdef _DEBUG if (*iFlgListModifed == TRUE) { for (j = 1; j <= X->Check.idim_maxOrg; j++) { - fprintf(stdout, "Debug1: myrank=%d, list_1_org[ %ld] = %ld\n", + fprintf(stdout, "Debug1: myrank=%d, list_1_org[ %ld] = %ld\n", myrank, j, list_1_org[j] + myrank * X->Def.OrgTpow[2 * X->Def.NsiteMPI - 1]); } @@ -600,4 +333,210 @@ int MakeExcitedList( #endif return TRUE; } +/// \brief Output spectrum. +/// \param X [in] Read information of the frequency origin. +/// \param Nomega [in] A total number of discrete frequencies. +/// \param dcSpectrum [in] Array of spectrum. +/// \param dcomega [in] Array of discrete frequencies. +/// \retval FALSE Fail to open the output file. +/// \retval TRUE Success to output the spectrum. +int OutputSpectrum( + struct EDMainCalStruct *X, + int Nomega, + double complex *dcSpectrum, + double complex *dcomega) +{ + FILE *fp; + char sdt[D_FileNameMax]; + int i; + + //output spectrum + sprintf(sdt, cFileNameCalcDynamicalGreen, X->Bind.Def.CDataFileHead); + if(childfopenMPI(sdt, "w", &fp)!=0){ + return FALSE; + } + + for (i = 0; i < Nomega; i++) { + fprintf(fp, "%.10lf %.10lf %.10lf %.10lf \n", + creal(dcomega[i]-X->Bind.Def.dcOmegaOrg), cimag(dcomega[i]-X->Bind.Def.dcOmegaOrg), + creal(dcSpectrum[i]), cimag(dcSpectrum[i])); + }/*for (i = 0; i < Nomega; i++)*/ + + fclose(fp); + return TRUE; +}/*int OutputSpectrum*/ +/// \brief Parent function to calculate the excited state. +/// \param X [in] Struct to get number of excitation operators. +/// \param tmp_v0 [out] Result @f$ v_0 = H_{ex} v_1 @f$. +/// \param tmp_v1 [in] The original state before excitation @f$ v_1 @f$. +/// \retval FALSE Fail to calculate the excited state. +/// \retval TRUE Success to calculate the excited state. +int GetExcitedState +( + struct BindStruct *X, + int nstate, + double complex **tmp_v0, + double complex **tmp_v1 +) +{ + if (X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0) { + fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); + return FALSE; + } + + + if (X->Def.NSingleExcitationOperator > 0) { + if (GetSingleExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + return FALSE; + } + } + else if (X->Def.NPairExcitationOperator > 0) { + if (GetPairExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + return FALSE; + } + } + return TRUE; +} +/** + * @brief A main function to calculate spectrum. + * + * @param X [in,out] CalcStruct list for getting and pushing calculation information \n + * input: iFlgSpecOmegaOrg, dcOmegaMax, dcOmegaMin, iNOmega etc.\n + * output: dcOmegaOrg, iFlagListModified. + * + * @retval 0 normally finished + * @retval -1 unnormally finished + * + * @version 1.1 + * @author Kazuyoshi Yoshimi (The University of Tokyo) + * @author Youhei Yamaji (The University of Tokyo) + * + */ +int CalcSpectrum( + struct EDMainCalStruct *X +) { + char sdt[D_FileNameMax]; + char *defname; + unsigned long int i; + unsigned long int i_max = 0; + int i_stp; + int iFlagListModified = FALSE; + FILE *fp; + double dnorm; + double complex **v1Org; /**< Input vector to calculate spectrum function.*/ + + //ToDo: Nomega should be given as a parameter + int Nomega; + double complex OmegaMax, OmegaMin; + double complex *dcSpectrum; + double complex *dcomega; + size_t byte_size; + //set omega + if (SetOmega(&(X->Bind.Def)) != TRUE) { + fprintf(stderr, "Error: Fail to set Omega.\n"); + exitMPI(-1); + } + else { + if (X->Bind.Def.iFlgSpecOmegaOrg == FALSE) { + X->Bind.Def.dcOmegaOrg = I * (X->Bind.Def.dcOmegaMax - X->Bind.Def.dcOmegaMin) / (double)X->Bind.Def.iNOmega; + } + } + /* + Set & malloc omega grid + */ + Nomega = X->Bind.Def.iNOmega; + dcSpectrum = cd_1d_allocate(Nomega); + dcomega = cd_1d_allocate(Nomega); + OmegaMax = X->Bind.Def.dcOmegaMax + X->Bind.Def.dcOmegaOrg; + OmegaMin = X->Bind.Def.dcOmegaMin + X->Bind.Def.dcOmegaOrg; + for (i = 0; i < Nomega; i++) { + dcomega[i] = (OmegaMax - OmegaMin) / Nomega * i + OmegaMin; + } + + fprintf(stdoutMPI, "\nFrequency range:\n"); + fprintf(stdoutMPI, " Omega Max. : %15.5e %15.5e\n", creal(OmegaMax), cimag(OmegaMax)); + fprintf(stdoutMPI, " Omega Min. : %15.5e %15.5e\n", creal(OmegaMin), cimag(OmegaMin)); + fprintf(stdoutMPI, " Num. of Omega : %d\n", Nomega); + + if (X->Bind.Def.NSingleExcitationOperator == 0 && X->Bind.Def.NPairExcitationOperator == 0) { + fprintf(stderr, "Error: Any excitation operators are not defined.\n"); + exitMPI(-1); + } + //Make New Lists + if (MakeExcitedList(&(X->Bind), &iFlagListModified) == FALSE) { + return FALSE; + } + X->Bind.Def.iFlagListModified = iFlagListModified; + + v1Org = cd_2d_allocate(X->Bind.Check.idim_maxOrg + 1,1); + + //Make excited state + StartTimer(6100); + if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || + X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || + (X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC && X->Bind.Def.iCalcType == CG)) { + //input eigen vector + StartTimer(6101); + fprintf(stdoutMPI, " Start: An Eigenvector is inputted in CalcSpectrum.\n"); + TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorStart, "a"); + GetFileNameByKW(KWSpectrumVec, &defname); + strcat(defname, "_rank_%d.dat"); + // sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, X->Bind.Def.k_exct - 1, myrank); + sprintf(sdt, defname, myrank); + childfopenALL(sdt, "rb", &fp); + + if (fp == NULL) { + fprintf(stderr, "Error: A file of Inputvector does not exist.\n"); + return -1; + } + + byte_size = fread(&i_stp, sizeof(i_stp), 1, fp); + X->Bind.Large.itr = i_stp; //For TPQ + byte_size = fread(&i_max, sizeof(i_max), 1, fp); + if (i_max != X->Bind.Check.idim_maxOrg) { + fprintf(stderr, "Error: myrank=%d, i_max=%ld\n", myrank, i_max); + fprintf(stderr, "Error: A file of Input vector is incorrect.\n"); + return -1; + } + byte_size = fread(&v1Org[0][0], sizeof(complex double), i_max + 1, fp); + fclose(fp); + StopTimer(6101); + if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); + } + StopTimer(6100); + + diagonalcalc(&(X->Bind)); + + int iret = TRUE; + fprintf(stdoutMPI, " Start: Calculating a spectrum.\n\n"); + TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumStart, "a"); + StartTimer(6200); + switch (X->Bind.Def.iCalcType) { + case CG: + iret = CalcSpectrumByBiCG(X, v0, v1, Nomega, dcSpectrum, dcomega, v1Org); + if (iret != TRUE) { + return FALSE; + } + break; + case FullDiag: + iret = CalcSpectrumByFullDiag(X, Nomega, dcSpectrum, dcomega, v1Org); + break; + default: + break; + } + StopTimer(6200); + + if (iret != TRUE) { + fprintf(stderr, " Error: The selected calculation type is not supported for calculating spectrum mode.\n"); + return FALSE; + } + + fprintf(stdoutMPI, " End: Calculating a spectrum.\n\n"); + TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumEnd, "a"); + iret = OutputSpectrum(X, Nomega, dcSpectrum, dcomega); + free_cd_1d_allocate(dcSpectrum); + free_cd_1d_allocate(dcomega); + return TRUE; + +}/*int CalcSpectrum*/ diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index 5ae7dc490..8e79dafab 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -23,6 +23,8 @@ #include "common/setmemory.h" #include "komega/komega.h" #include "mltply.h" +#include "CalcSpectrum.h" +#include "mltplyCommon.h" #ifdef MPI #include #endif @@ -146,49 +148,6 @@ int OutputTMComponents_BiCG( return TRUE; }/*int OutputTMComponents_BiCG*/ -/**@brief -Initialize Shadow Residual as a random vector (Experimental) -*/ -void InitShadowRes( - struct BindStruct *X,//!<[inout] - double complex *v4//!<[out] [CheckList::idim_max] shadow residual vector -) -{ - long int idim, iv; - int mythread; - double dnorm; - /* - For DSFMT - */ - long unsigned int u_long_i; - dsfmt_t dsfmt; - - iv = X->Def.initial_iv; -#pragma omp parallel default(none) private(idim, u_long_i, mythread, dsfmt) \ - shared(v4, iv, X, nthreads, myrank) - { - /* - Initialise MT - */ -#ifdef _OPENMP - mythread = omp_get_thread_num(); -#else - mythread = 0; -#endif - u_long_i = 123432 + labs(iv) + mythread + nthreads * myrank; - dsfmt_init_gen_rand(&dsfmt, u_long_i); - -#pragma omp for - for (idim = 1; idim <= X->Check.idim_max; idim++) - v4[idim] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) - + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; - }/*#pragma omp parallel*/ - - dnorm = sqrt(creal(VecProdMPI(X->Check.idim_max, v4, v4))); -#pragma omp parallel for default(none) shared(X,v4,dnorm) private(idim) - for (idim = 1; idim <= X->Check.idim_max; idim++) v4[idim] /= dnorm; - -}/*void InitShadowRes*/ /** * @brief A main function to calculate spectrum by BiCG method * In this function, the @f$K\omega@f$ library is used. @@ -203,11 +162,12 @@ void InitShadowRes( */ int CalcSpectrumByBiCG( struct EDMainCalStruct *X,//!<[inout] - double complex **vrhs,//!<[in] [CheckList::idim_max] Right hand side vector, excited state. - double complex **v2,//!<[inout] [CheckList::idim_max] Work space for residual vector @f${\bf r}@f$ + double complex **v2,//!<[in] [CheckList::idim_max] Right hand side vector, excited state. + double complex **v4,//!<[inout] [CheckList::idim_max] Work space for residual vector @f${\bf r}@f$ int Nomega,//!<[in] Number of Frequencies double complex *dcSpectrum,//!<[out] [Nomega] Spectrum - double complex *dcomega//!<[in] [Nomega] Frequency + double complex *dcomega,//!<[in] [Nomega] Frequency + double complex **v1Org ) { char sdt[D_FileNameMax]; @@ -216,7 +176,7 @@ int CalcSpectrumByBiCG( size_t byte_size; int iret, max_step; unsigned long int liLanczosStp_vec = 0; - double complex **v4, **v12, **v14, res_proj; + double complex **vL, **v12, **v14, *res_proj; int stp, one = 1, status[3], iomega; double *resz; @@ -228,8 +188,9 @@ int CalcSpectrumByBiCG( */ v12 = cd_2d_allocate(X->Bind.Check.idim_max + 2, 1); v14 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); - v4 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); + vL = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); resz = d_1d_allocate(Nomega); + res_proj = cd_1d_allocate(1); /**
  • Set initial result vector(+shadow result vector) Read residual vectors if restart
  • @@ -243,12 +204,10 @@ int CalcSpectrumByBiCG( if (childfopenALL(sdt, "rb", &fp) != 0) { fprintf(stdoutMPI, "INFO: File for the restart is not found.\n"); fprintf(stdoutMPI, " Start from SCRATCH.\n"); -#pragma omp parallel for default(none) shared(v2,v4,vrhs,X) private(idim) - for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { - v2[idim][0] = vrhs[idim][0]; - v4[idim][0] = vrhs[idim][0]; - } - //InitShadowRes(&(X->Bind), v4); + GetExcitedState(&(X->Bind), 1, v2, v1Org); +#pragma omp parallel for default(none) shared(v2,v4,v1Org,X) private(idim) + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) + v4[idim][0] = v2[idim][0]; } else { byte_size = fread(&liLanczosStp_vec, sizeof(int), 1, fp); @@ -269,12 +228,10 @@ int CalcSpectrumByBiCG( }/*if (childfopenALL(sdt, "rb", &fp) == 0)*/ }/*if (X->Bind.Def.iFlgCalcSpec > RECALC_FROM_TMComponents)*/ else { -#pragma omp parallel for default(none) shared(v2,v4,vrhs,X) private(idim) - for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { - v2[idim][0] = vrhs[idim][0]; - v4[idim][0] = vrhs[idim][0]; - } - //InitShadowRes(&(X->Bind), v4); + GetExcitedState(&(X->Bind), 1, v2, v1Org); +#pragma omp parallel for default(none) shared(v2,v4,v1Org,X) private(idim) + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) + v4[idim][0] = v2[idim][0]; } /**
  • Input @f$\alpha, \beta@f$, projected residual, or start from scratch
  • @@ -294,21 +251,17 @@ int CalcSpectrumByBiCG(
  • @f${\bf v}_{2}={\hat H}{\bf v}_{12}, {\bf v}_{4}={\hat H}{\bf v}_{14}@f$, where @f${\bf v}_{12}, {\bf v}_{14}@f$ are old (shadow) residual vector.
  • */ -#pragma omp parallel for default(none) shared(X,v12,v14) private(idim) - for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { - v12[idim][0] = 0.0; - v14[idim][0] = 0.0; - } + zclear(X->Bind.Check.idim_max, &v12[1][0]); + zclear(X->Bind.Check.idim_max, &v14[1][0]); iret = mltply(&X->Bind, 1, v12, v2); iret = mltply(&X->Bind, 1, v14, v4); - res_proj = VecProdMPI(X->Bind.Check.idim_max, &vrhs[0][0], &v2[0][0]); + GetExcitedState(&(X->Bind), 1, vL, v1Org); + res_proj[0] = VecProdMPI(X->Bind.Check.idim_max, &vL[0][0], &v2[0][0]); /**
  • Update projected result vector dcSpectrum.
  • */ - - komega_bicg_update(&v12[1][0], &v2[1][0], &v14[1][0], &v4[1][0], dcSpectrum, &res_proj, status); - + komega_bicg_update(&v12[1][0], &v2[1][0], &v14[1][0], &v4[1][0], dcSpectrum, res_proj, status); /**
  • Output residuals at each frequency for some analysis
  • */ @@ -370,6 +323,7 @@ int CalcSpectrumByBiCG( free_d_1d_allocate(resz); free_cd_2d_allocate(v12); free_cd_2d_allocate(v14); - free_cd_2d_allocate(v4); + free_cd_2d_allocate(vL); + free_cd_1d_allocate(res_proj); return TRUE; }/*int CalcSpectrumByBiCG*/ diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index 20768d44c..b0c75ab45 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -26,6 +26,7 @@ full-diagonalization method. #include "mltplyCommon.h" #include "CalcTime.h" #include "common/setmemory.h" +#include "CalcSpectrum.h" void zcopy_(int *n, double complex *x, int *incx, double complex *y, int *incy); void zdotc_(double complex *xy, int *n, double complex *x, int *incx, double complex *y, int *incy); @@ -40,23 +41,23 @@ int CalcSpectrumByFullDiag( struct EDMainCalStruct *X,//!<[inout] int Nomega,//!<[in] Number of frequencies double complex *dcSpectrum,//!<[out] [Nomega] Spectrum - double complex *dcomega//!<[in] [Nomega] Frequency + double complex *dcomega,//!<[in] [Nomega] Frequency + double complex **v1Org ) { int idim, jdim, iomega; int idim_max_int; int incr=1; - double *vAv2; - double complex *vg, vAv; + double complex **vR, **vL, vRv, vLv, *vLvvRv; /**
    • Generate fully stored Hamiltonian. Because ::v0 & ::v1 are overwritten, copy ::v0 into ::vg.
    • */ idim_max_int = (int)X->Bind.Check.idim_max; - vg = cd_1d_allocate(idim_max_int); - vAv2 = d_1d_allocate(idim_max_int); - zcopy_(&idim_max_int, &v0[1][0], &incr, &vg[0], &incr); + vR = cd_2d_allocate(idim_max_int, 1); + vL = cd_2d_allocate(idim_max_int, 1); + vLvvRv = cd_1d_allocate(idim_max_int); StartTimer(6301); zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v0[0][0]); @@ -77,11 +78,16 @@ int CalcSpectrumByFullDiag( */ StartTimer(6303); + GetExcitedState(&(X->Bind), 1, vR, v1Org); + GetExcitedState(&(X->Bind), 1, vL, v1Org); for (idim = 0; idim < idim_max_int; idim++) { - vAv = 0.0; - for (jdim = 0; jdim < idim_max_int; jdim++) vAv += conj(vg[jdim]) * v1[jdim][idim]; - //zdotc_(&v1[idim], &idim_max_int, &vg[0], &incr, &L_vec[idim][0], &incr); - vAv2[idim] = conj(vAv) * vAv; + vRv = 0.0; + vLv = 0.0; + for (jdim = 0; jdim < idim_max_int; jdim++) { + vRv += conj(v1[jdim][idim]) * vR[jdim][1]; + vLv += conj(v1[jdim][idim]) * vL[jdim][1]; + } + vLvvRv[idim] = conj(vLv) * vRv; }/*for (idim = 0; idim < idim_max_int; idim++)*/ StopTimer(6303); /** @@ -96,12 +102,13 @@ int CalcSpectrumByFullDiag( for (iomega = 0; iomega < Nomega; iomega++) { dcSpectrum[iomega] = 0.0; for (idim = 0; idim < idim_max_int; idim++) { - dcSpectrum[iomega] += vAv2[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); + dcSpectrum[iomega] += vLvvRv[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); }/*for (idim = 0; idim < idim_max_int; idim++)*/ }/*for (iomega = 0; iomega < Nomega; iomega++)*/ StopTimer(6304); - free_cd_1d_allocate(vg); - free_d_1d_allocate(vAv2); + free_cd_2d_allocate(vR); + free_cd_2d_allocate(vR); + free_cd_1d_allocate(vLvvRv); return TRUE; }/*CalcSpectrumByFullDiag*/ diff --git a/src/include/CalcSpectrum.h b/src/include/CalcSpectrum.h index e63785ab4..102c44692 100644 --- a/src/include/CalcSpectrum.h +++ b/src/include/CalcSpectrum.h @@ -16,18 +16,6 @@ #pragma once #include "Common.h" -int CalcSpectrum( - struct EDMainCalStruct *X -); +int CalcSpectrum(struct EDMainCalStruct *X); +int GetExcitedState(struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1); -int MakeExcitedList( - struct BindStruct *X, - int *iFlgListModifed - ); - -int ReSetList(struct BindStruct *X); - -int SetOmega -( - struct DefineList *X - ); diff --git a/src/include/CalcSpectrumByBiCG.h b/src/include/CalcSpectrumByBiCG.h index 2a0fe6ae5..becb23ea3 100644 --- a/src/include/CalcSpectrumByBiCG.h +++ b/src/include/CalcSpectrumByBiCG.h @@ -18,9 +18,10 @@ int CalcSpectrumByBiCG( struct EDMainCalStruct *X, - double complex **vrhs, double complex **v2, + double complex **v4, int Nomega, double complex *dcSpectrum, - double complex *dcomega + double complex *dcomega, + double complex **v1Org ); diff --git a/src/include/CalcSpectrumByFullDiag.h b/src/include/CalcSpectrumByFullDiag.h index 637a18b6d..83379013f 100644 --- a/src/include/CalcSpectrumByFullDiag.h +++ b/src/include/CalcSpectrumByFullDiag.h @@ -17,7 +17,5 @@ #include "struct.h" -int CalcSpectrumByFullDiag(struct EDMainCalStruct *X, - int Nomega, - double complex *dcSpectrum, - double complex *dcomega); +int CalcSpectrumByFullDiag( + struct EDMainCalStruct *X, int Nomega, double complex *dcSpectrum, double complex *dcomega, double complex **v1org); From f47ad160d88ee51839385a92e2374788906864a5 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Tue, 19 Mar 2019 22:12:28 +0900 Subject: [PATCH 16/50] Backup --- src/CalcSpectrum.c | 94 ++++++++++-------- src/CalcSpectrumByBiCG.c | 35 ++++--- src/CalcSpectrumByFullDiag.c | 66 +++++++------ src/PairEx.c | 11 ++- src/PairExHubbard.c | 40 ++++---- src/PairExSpin.c | 142 ++++++++++++++------------- src/SingleEx.c | 7 +- src/SingleExHubbard.c | 30 +++--- src/StdFace/StdFace_main.c | 48 ++++----- src/include/CalcSpectrum.h | 2 +- src/include/CalcSpectrumByBiCG.h | 3 +- src/include/CalcSpectrumByFullDiag.h | 3 +- src/include/PairEx.h | 3 +- src/include/PairExHubbard.h | 7 +- src/include/PairExSpin.h | 44 +-------- src/include/SingleEx.h | 3 +- src/include/SingleExHubbard.h | 4 +- src/include/struct.h | 14 +-- src/readdef.c | 123 +++++++++++++---------- src/xsetmem.c | 12 ++- 20 files changed, 356 insertions(+), 335 deletions(-) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 26c5e1b72..13baf41ec 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -143,7 +143,7 @@ int MakeExcitedList( X->Check.idim_maxOrg = X->Check.idim_max; X->Check.idim_maxMPIOrg = X->Check.idim_maxMPI; - if (X->Def.NSingleExcitationOperator > 0) { + if (X->Def.NNSingleExcitationOperator > 0) { switch (X->Def.iCalcModel) { case HubbardGC: break; @@ -158,7 +158,7 @@ int MakeExcitedList( return FALSE; } } - else if (X->Def.NPairExcitationOperator > 0) { + else if (X->Def.NNPairExcitationOperator > 0) { switch (X->Def.iCalcModel) { case HubbardGC: case SpinGC: @@ -168,7 +168,7 @@ int MakeExcitedList( case Hubbard: case Kondo: case Spin: - if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { + if (X->Def.PairExcitationOperator[0][0][1] != X->Def.PairExcitationOperator[0][0][3]) { *iFlgListModifed = TRUE; } break; @@ -209,12 +209,12 @@ int MakeExcitedList( return FALSE; } - if (X->Def.NSingleExcitationOperator > 0) { + if (X->Def.NNSingleExcitationOperator > 0) { switch (X->Def.iCalcModel) { case HubbardGC: break; case HubbardNConserved: - if (X->Def.SingleExcitationOperator[0][2] == 1) { //cis + if (X->Def.SingleExcitationOperator[0][0][2] == 1) { //cis X->Def.Ne = X->Def.NeMPI + 1; } else { @@ -224,9 +224,9 @@ int MakeExcitedList( case KondoGC: case Hubbard: case Kondo: - if (X->Def.SingleExcitationOperator[0][2] == 1) { //cis + if (X->Def.SingleExcitationOperator[0][0][2] == 1) { //cis X->Def.Ne = X->Def.NeMPI + 1; - if (X->Def.SingleExcitationOperator[0][1] == 0) {//up + if (X->Def.SingleExcitationOperator[0][0][1] == 0) {//up X->Def.Nup = X->Def.NupOrg + 1; X->Def.Ndown = X->Def.NdownOrg; } @@ -237,7 +237,7 @@ int MakeExcitedList( } else {//ajt X->Def.Ne = X->Def.NeMPI - 1; - if (X->Def.SingleExcitationOperator[0][1] == 0) {//up + if (X->Def.SingleExcitationOperator[0][0][1] == 0) {//up X->Def.Nup = X->Def.NupOrg - 1; X->Def.Ndown = X->Def.NdownOrg; @@ -253,7 +253,7 @@ int MakeExcitedList( return FALSE; } } - else if (X->Def.NPairExcitationOperator > 0) { + else if (X->Def.NNPairExcitationOperator > 0) { X->Def.Ne = X->Def.NeMPI; switch (X->Def.iCalcModel) { case HubbardGC: @@ -263,8 +263,8 @@ int MakeExcitedList( case KondoGC: case Hubbard: case Kondo: - if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { - if (X->Def.PairExcitationOperator[0][1] == 0) {//up + if (X->Def.PairExcitationOperator[0][0][1] != X->Def.PairExcitationOperator[0][0][3]) { + if (X->Def.PairExcitationOperator[0][0][1] == 0) {//up X->Def.Nup = X->Def.NupOrg + 1; X->Def.Ndown = X->Def.NdownOrg - 1; } @@ -275,9 +275,9 @@ int MakeExcitedList( } break; case Spin: - if (X->Def.PairExcitationOperator[0][1] != X->Def.PairExcitationOperator[0][3]) { + if (X->Def.PairExcitationOperator[0][0][1] != X->Def.PairExcitationOperator[0][0][3]) { if (X->Def.iFlgGeneralSpin == FALSE) { - if (X->Def.PairExcitationOperator[0][1] == 0) {//down + if (X->Def.PairExcitationOperator[0][0][1] == 0) {//down X->Def.Nup = X->Def.NupOrg - 1; X->Def.Ndown = X->Def.NdownOrg + 1; } @@ -287,7 +287,7 @@ int MakeExcitedList( } } else {//for general spin - X->Def.Total2Sz = X->Def.Total2SzMPI + 2 * (X->Def.PairExcitationOperator[0][1] - X->Def.PairExcitationOperator[0][3]); + X->Def.Total2Sz = X->Def.Total2SzMPI + 2 * (X->Def.PairExcitationOperator[0][0][1] - X->Def.PairExcitationOperator[0][0][3]); } } break; @@ -343,12 +343,13 @@ int MakeExcitedList( int OutputSpectrum( struct EDMainCalStruct *X, int Nomega, - double complex *dcSpectrum, + int NdcSpectrum, + double complex **dcSpectrum, double complex *dcomega) { FILE *fp; char sdt[D_FileNameMax]; - int i; + int iomega, idcSpectrum; //output spectrum sprintf(sdt, cFileNameCalcDynamicalGreen, X->Bind.Def.CDataFileHead); @@ -356,11 +357,14 @@ int OutputSpectrum( return FALSE; } - for (i = 0; i < Nomega; i++) { - fprintf(fp, "%.10lf %.10lf %.10lf %.10lf \n", - creal(dcomega[i]-X->Bind.Def.dcOmegaOrg), cimag(dcomega[i]-X->Bind.Def.dcOmegaOrg), - creal(dcSpectrum[i]), cimag(dcSpectrum[i])); - }/*for (i = 0; i < Nomega; i++)*/ + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + for (iomega = 0; iomega < Nomega; iomega++) { + fprintf(fp, "%.10lf %.10lf %.10lf %.10lf \n", + creal(dcomega[iomega] - X->Bind.Def.dcOmegaOrg), cimag(dcomega[iomega] - X->Bind.Def.dcOmegaOrg), + creal(dcSpectrum[iomega][idcSpectrum]), cimag(dcSpectrum[iomega][idcSpectrum])); + }/*for (i = 0; i < Nomega; i++)*/ + fprintf(fp, "\n"); + } fclose(fp); return TRUE; @@ -376,22 +380,17 @@ int GetExcitedState struct BindStruct *X, int nstate, double complex **tmp_v0, - double complex **tmp_v1 + double complex **tmp_v1, + int iEx ) { - if (X->Def.NSingleExcitationOperator > 0 && X->Def.NPairExcitationOperator > 0) { - fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); - return FALSE; - } - - - if (X->Def.NSingleExcitationOperator > 0) { - if (GetSingleExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + if (X->Def.NNSingleExcitationOperator > 0) { + if (GetSingleExcitedState(X, nstate, tmp_v0, tmp_v1, iEx) != TRUE) { return FALSE; } } - else if (X->Def.NPairExcitationOperator > 0) { - if (GetPairExcitedState(X, nstate, tmp_v0, tmp_v1) != TRUE) { + else if (X->Def.NNPairExcitationOperator > 0) { + if (GetPairExcitedState(X, nstate, tmp_v0, tmp_v1, iEx) != TRUE) { return FALSE; } } @@ -419,7 +418,7 @@ int CalcSpectrum( char *defname; unsigned long int i; unsigned long int i_max = 0; - int i_stp; + int i_stp, NdcSpectrum; int iFlagListModified = FALSE; FILE *fp; double dnorm; @@ -428,7 +427,7 @@ int CalcSpectrum( //ToDo: Nomega should be given as a parameter int Nomega; double complex OmegaMax, OmegaMin; - double complex *dcSpectrum; + double complex **dcSpectrum; double complex *dcomega; size_t byte_size; @@ -446,7 +445,6 @@ int CalcSpectrum( Set & malloc omega grid */ Nomega = X->Bind.Def.iNOmega; - dcSpectrum = cd_1d_allocate(Nomega); dcomega = cd_1d_allocate(Nomega); OmegaMax = X->Bind.Def.dcOmegaMax + X->Bind.Def.dcOmegaOrg; OmegaMin = X->Bind.Def.dcOmegaMin + X->Bind.Def.dcOmegaOrg; @@ -459,10 +457,24 @@ int CalcSpectrum( fprintf(stdoutMPI, " Omega Min. : %15.5e %15.5e\n", creal(OmegaMin), cimag(OmegaMin)); fprintf(stdoutMPI, " Num. of Omega : %d\n", Nomega); - if (X->Bind.Def.NSingleExcitationOperator == 0 && X->Bind.Def.NPairExcitationOperator == 0) { - fprintf(stderr, "Error: Any excitation operators are not defined.\n"); + if (X->Bind.Def.NNSingleExcitationOperator == 0){ + if (X->Bind.Def.NNPairExcitationOperator == 0) { + fprintf(stderr, "Error: Any excitation operators are not defined.\n"); + exitMPI(-1); + } + else { + NdcSpectrum = X->Bind.Def.NNPairExcitationOperator - 1; + } + } + else if (X->Bind.Def.NNPairExcitationOperator == 0) { + NdcSpectrum = X->Bind.Def.NNSingleExcitationOperator - 1; + } + else { + fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); exitMPI(-1); } + dcSpectrum = cd_2d_allocate(Nomega, NdcSpectrum); + //Make New Lists if (MakeExcitedList(&(X->Bind), &iFlagListModified) == FALSE) { return FALSE; @@ -514,13 +526,13 @@ int CalcSpectrum( StartTimer(6200); switch (X->Bind.Def.iCalcType) { case CG: - iret = CalcSpectrumByBiCG(X, v0, v1, Nomega, dcSpectrum, dcomega, v1Org); + iret = CalcSpectrumByBiCG(X, v0, v1, Nomega, NdcSpectrum, dcSpectrum, dcomega, v1Org); if (iret != TRUE) { return FALSE; } break; case FullDiag: - iret = CalcSpectrumByFullDiag(X, Nomega, dcSpectrum, dcomega, v1Org); + iret = CalcSpectrumByFullDiag(X, Nomega, NdcSpectrum, dcSpectrum, dcomega, v1Org); break; default: break; @@ -534,8 +546,8 @@ int CalcSpectrum( fprintf(stdoutMPI, " End: Calculating a spectrum.\n\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumEnd, "a"); - iret = OutputSpectrum(X, Nomega, dcSpectrum, dcomega); - free_cd_1d_allocate(dcSpectrum); + iret = OutputSpectrum(X, Nomega, NdcSpectrum, dcSpectrum, dcomega); + free_cd_2d_allocate(dcSpectrum); free_cd_1d_allocate(dcomega); return TRUE; diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index 8e79dafab..c03e66838 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -38,7 +38,8 @@ void ReadTMComponents_BiCG( double complex *v12,//!<[inout] [CheckList::idim_max] Old residual vector double complex *v14,//!<[inout] [CheckList::idim_max] Old shadow residual vector int Nomega,//!<[in] Number of frequencies - double complex *dcSpectrum,//!<[inout] [Nomega] Projected result vector, spectrum + int NdcSpectrum, + double complex **dcSpectrum,//!<[inout] [Nomega] Projected result vector, spectrum double complex *dcomega//!<[in] [Nomega] Frequency ) { char sdt[D_FileNameMax]; @@ -66,7 +67,7 @@ void ReadTMComponents_BiCG( fprintf(stdoutMPI, "INFO: File for the restart is not found.\n"); fprintf(stdoutMPI, " Start from SCRATCH.\n"); max_step = (int)X->Bind.Def.Lanczos_max; - komega_bicg_init(&idim_max2int, &one, &Nomega, dcSpectrum, dcomega, &max_step, &eps_Lanczos, &comm); + komega_bicg_init(&idim_max2int, &NdcSpectrum, &Nomega, &dcSpectrum[0][0], dcomega, &max_step, &eps_Lanczos, &comm); } else { fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp); @@ -99,7 +100,7 @@ void ReadTMComponents_BiCG( if (X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents) X->Bind.Def.Lanczos_max = 0; max_step = (int)(iter_old + X->Bind.Def.Lanczos_max); - komega_bicg_restart(&idim_max2int, &one, &Nomega, dcSpectrum, dcomega, &max_step, &eps_Lanczos, status, + komega_bicg_restart(&idim_max2int, &NdcSpectrum, &Nomega, &dcSpectrum[0][0], dcomega, &max_step, &eps_Lanczos, status, &iter_old, &v2[1], &v12[1], &v4[1], &v14[1], alphaCG, betaCG, &z_seed, res_save, &comm); free(alphaCG); free(betaCG); @@ -108,7 +109,7 @@ void ReadTMComponents_BiCG( }/*if (X->Bind.Def.iFlgCalcSpec > RECALC_NOT)*/ else { max_step = (int)X->Bind.Def.Lanczos_max; - komega_bicg_init(&idim_max2int, &one, &Nomega, dcSpectrum, dcomega, &max_step, &eps_Lanczos, &comm); + komega_bicg_init(&idim_max2int, &NdcSpectrum, &Nomega, &dcSpectrum[0][0], dcomega, &max_step, &eps_Lanczos, &comm); } }/*int ReadTMComponents_BiCG*/ @@ -165,7 +166,8 @@ int CalcSpectrumByBiCG( double complex **v2,//!<[in] [CheckList::idim_max] Right hand side vector, excited state. double complex **v4,//!<[inout] [CheckList::idim_max] Work space for residual vector @f${\bf r}@f$ int Nomega,//!<[in] Number of Frequencies - double complex *dcSpectrum,//!<[out] [Nomega] Spectrum + int NdcSpectrum, + double complex **dcSpectrum,//!<[out] [Nomega] Spectrum double complex *dcomega,//!<[in] [Nomega] Frequency double complex **v1Org ) @@ -174,7 +176,7 @@ int CalcSpectrumByBiCG( unsigned long int idim, i_max; FILE *fp; size_t byte_size; - int iret, max_step; + int iret, max_step, idcSpectrum; unsigned long int liLanczosStp_vec = 0; double complex **vL, **v12, **v14, *res_proj; int stp, one = 1, status[3], iomega; @@ -190,7 +192,7 @@ int CalcSpectrumByBiCG( v14 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); vL = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); resz = d_1d_allocate(Nomega); - res_proj = cd_1d_allocate(1); + res_proj = cd_1d_allocate(NdcSpectrum); /**
    • Set initial result vector(+shadow result vector) Read residual vectors if restart
    • @@ -204,7 +206,8 @@ int CalcSpectrumByBiCG( if (childfopenALL(sdt, "rb", &fp) != 0) { fprintf(stdoutMPI, "INFO: File for the restart is not found.\n"); fprintf(stdoutMPI, " Start from SCRATCH.\n"); - GetExcitedState(&(X->Bind), 1, v2, v1Org); + zclear(X->Bind.Check.idim_max, &v2[1][0]); + GetExcitedState(&(X->Bind), 1, v2, v1Org, 0); #pragma omp parallel for default(none) shared(v2,v4,v1Org,X) private(idim) for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) v4[idim][0] = v2[idim][0]; @@ -228,7 +231,8 @@ int CalcSpectrumByBiCG( }/*if (childfopenALL(sdt, "rb", &fp) == 0)*/ }/*if (X->Bind.Def.iFlgCalcSpec > RECALC_FROM_TMComponents)*/ else { - GetExcitedState(&(X->Bind), 1, v2, v1Org); + zclear(X->Bind.Check.idim_max, &v2[1][0]); + GetExcitedState(&(X->Bind), 1, v2, v1Org, 0); #pragma omp parallel for default(none) shared(v2,v4,v1Org,X) private(idim) for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) v4[idim][0] = v2[idim][0]; @@ -236,7 +240,7 @@ int CalcSpectrumByBiCG( /**
    • Input @f$\alpha, \beta@f$, projected residual, or start from scratch
    • */ - ReadTMComponents_BiCG(X, &v2[0][0], &v4[0][0], &v12[0][0], &v14[0][0], Nomega, dcSpectrum, dcomega); + ReadTMComponents_BiCG(X, &v2[0][0], &v4[0][0], &v12[0][0], &v14[0][0], Nomega, NdcSpectrum, dcSpectrum, dcomega); /**
    • @b DO BiCG loop
      • @@ -256,12 +260,15 @@ int CalcSpectrumByBiCG( iret = mltply(&X->Bind, 1, v12, v2); iret = mltply(&X->Bind, 1, v14, v4); - GetExcitedState(&(X->Bind), 1, vL, v1Org); - res_proj[0] = VecProdMPI(X->Bind.Check.idim_max, &vL[0][0], &v2[0][0]); + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + zclear(X->Bind.Check.idim_max, &vL[1][0]); + GetExcitedState(&(X->Bind), 1, vL, v1Org, idcSpectrum + 1); + res_proj[idcSpectrum] = VecProdMPI(X->Bind.Check.idim_max, &vL[0][0], &v2[0][0]); + } /**
      • Update projected result vector dcSpectrum.
      • */ - komega_bicg_update(&v12[1][0], &v2[1][0], &v14[1][0], &v4[1][0], dcSpectrum, res_proj, status); + komega_bicg_update(&v12[1][0], &v2[1][0], &v14[1][0], &v4[1][0], &dcSpectrum[0][0], res_proj, status); /**
      • Output residuals at each frequency for some analysis
      • */ @@ -271,7 +278,7 @@ int CalcSpectrumByBiCG( for (iomega = 0; iomega < Nomega; iomega++) { fprintf(fp, "%7i %20.10e %20.10e %20.10e %20.10e\n", stp, creal(dcomega[iomega]), - creal(dcSpectrum[iomega]), cimag(dcSpectrum[iomega]), + creal(dcSpectrum[iomega][0]), cimag(dcSpectrum[iomega][0]), resz[iomega]); } fprintf(fp, "\n"); diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index b0c75ab45..15be6f16d 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -40,14 +40,15 @@ G(z) = \sum_n \frac{|\langle n|c|0\rangle|^2}{z - E_n} int CalcSpectrumByFullDiag( struct EDMainCalStruct *X,//!<[inout] int Nomega,//!<[in] Number of frequencies - double complex *dcSpectrum,//!<[out] [Nomega] Spectrum + int NdcSpectrum, + double complex **dcSpectrum,//!<[out] [Nomega] Spectrum double complex *dcomega,//!<[in] [Nomega] Frequency double complex **v1Org ) { int idim, jdim, iomega; int idim_max_int; - int incr=1; + int incr = 1, idcSpectrum; double complex **vR, **vL, vRv, vLv, *vLvvRv; /**
          @@ -76,37 +77,40 @@ int CalcSpectrumByFullDiag(
        • Compute @f$|\langle n|c|0\rangle|^2@f$ for all @f$n@f$ and store them into ::v1, where @f$c|0\rangle@f$ is ::vg.
        • */ - StartTimer(6303); - - GetExcitedState(&(X->Bind), 1, vR, v1Org); - GetExcitedState(&(X->Bind), 1, vL, v1Org); - for (idim = 0; idim < idim_max_int; idim++) { - vRv = 0.0; - vLv = 0.0; - for (jdim = 0; jdim < idim_max_int; jdim++) { - vRv += conj(v1[jdim][idim]) * vR[jdim][1]; - vLv += conj(v1[jdim][idim]) * vL[jdim][1]; - } - vLvvRv[idim] = conj(vLv) * vRv; - }/*for (idim = 0; idim < idim_max_int; idim++)*/ - StopTimer(6303); - /** -
        • Compute spectrum - @f[ - \sum_n \frac{|\langle n|c|0\rangle|^2}{z - E_n} - @f] -
        • -
        - */ - StartTimer(6304); - for (iomega = 0; iomega < Nomega; iomega++) { - dcSpectrum[iomega] = 0.0; + zclear(X->Bind.Check.idim_max, &vR[1][0]); + GetExcitedState(&(X->Bind), 1, vR, v1Org, 0); + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + StartTimer(6303); + zclear(X->Bind.Check.idim_max, &vL[1][0]); + GetExcitedState(&(X->Bind), 1, vL, v1Org, idcSpectrum + 1); for (idim = 0; idim < idim_max_int; idim++) { - dcSpectrum[iomega] += vLvvRv[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); + vRv = 0.0; + vLv = 0.0; + for (jdim = 0; jdim < idim_max_int; jdim++) { + vRv += conj(v1[jdim][idim]) * vR[jdim][1]; + vLv += conj(v1[jdim][idim]) * vL[jdim][1]; + } + vLvvRv[idim] = conj(vLv) * vRv; }/*for (idim = 0; idim < idim_max_int; idim++)*/ - }/*for (iomega = 0; iomega < Nomega; iomega++)*/ - StopTimer(6304); - free_cd_2d_allocate(vR); + StopTimer(6303); + /** +
      • Compute spectrum + @f[ + \sum_n \frac{|\langle n|c|0\rangle|^2}{z - E_n} + @f] +
      • +
      + */ + StartTimer(6304); + for (iomega = 0; iomega < Nomega; iomega++) { + dcSpectrum[iomega][idcSpectrum] = 0.0; + for (idim = 0; idim < idim_max_int; idim++) { + dcSpectrum[iomega][idcSpectrum] += vLvvRv[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); + }/*for (idim = 0; idim < idim_max_int; idim++)*/ + }/*for (iomega = 0; iomega < Nomega; iomega++)*/ + StopTimer(6304); + }/*for (idcSpectrum = 1; idcSpectrum < NdcSpectrum; idcSpectrum++)*/ + free_cd_2d_allocate(vL); free_cd_2d_allocate(vR); free_cd_1d_allocate(vLvvRv); return TRUE; diff --git a/src/PairEx.c b/src/PairEx.c index fcfc4b763..8cfdb92be 100644 --- a/src/PairEx.c +++ b/src/PairEx.c @@ -47,7 +47,8 @@ int GetPairExcitedState ( struct BindStruct *X, int nstate, double complex **tmp_v0, - double complex **tmp_v1 + double complex **tmp_v1, + int iEx ) { int iret; @@ -73,21 +74,21 @@ int GetPairExcitedState switch (X->Def.iCalcModel) { case HubbardGC: - iret = GetPairExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1, iEx); break; case KondoGC: case Hubbard: case Kondo: - iret = GetPairExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1, iEx); break; case Spin: // for the Sz-conserved spin system - iret = GetPairExcitedStateSpin(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateSpin(X, nstate, tmp_v0, tmp_v1, iEx); break; case SpinGC: - iret = GetPairExcitedStateSpinGC(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateSpinGC(X, nstate, tmp_v0, tmp_v1, iEx); break; default: diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index 0897e8ac5..63df9d709 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -38,7 +38,8 @@ int GetPairExcitedStateHubbardGC( struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ) { long unsigned int i, j; long unsigned int isite1; @@ -49,17 +50,17 @@ int GetPairExcitedStateHubbardGC( long int ibit; long unsigned int is; i_max = X->Check.idim_maxOrg; - for (i = 0; i < X->Def.NPairExcitationOperator; i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; + for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { + org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[iEx][i][1]; + org_sigma2 = X->Def.PairExcitationOperator[iEx][i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[iEx][i]; if (org_isite1 > X->Def.Nsite && org_isite2 > X->Def.Nsite) { if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { if (org_sigma1 == 0) { is = X->Def.Tpow[2 * org_isite1 - 2]; } @@ -72,7 +73,7 @@ int GetPairExcitedStateHubbardGC( zaxpy_long(i_max*nstate, -tmp_trans, &tmp_v1[1][0], &tmp_v0[1][0]); } } - else {//X->Def.PairExcitationOperator[i][4]==1 + else {//X->Def.PairExcitationOperator[iEx][i][4]==1 if (org_sigma1 == 0) { is = X->Def.Tpow[2 * org_isite1 - 2]; } @@ -102,7 +103,7 @@ int GetPairExcitedStateHubbardGC( } else { - if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && X->Def.PairExcitationOperator[i][4] == 0) { + if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && X->Def.PairExcitationOperator[iEx][i][4] == 0) { isite1 = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; #pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isite1, tmp_trans) shared(tmp_v0,tmp_v1,nstate) @@ -132,7 +133,8 @@ firstprivate(i_max,X,isite1, tmp_trans) shared(tmp_v0,tmp_v1,nstate) int GetPairExcitedStateHubbard( struct BindStruct *X,/**< [inout] define list to get and put information of calculation*/ int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ) { long unsigned int i, j, idim_maxMPI; long unsigned int irght, ilft, ihfbit; @@ -165,12 +167,12 @@ int GetPairExcitedStateHubbard( tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); #endif // MPI - for (i = 0; i < X->Def.NPairExcitationOperator; i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; + for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { + org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[iEx][i][1]; + org_sigma2 = X->Def.PairExcitationOperator[iEx][i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[iEx][i]; ibitsite1 = X->Def.OrgTpow[2 * org_isite1 - 2 + org_sigma1]; ibitsite2 = X->Def.OrgTpow[2 * org_isite2 - 2 + org_sigma2]; child_general_hopp_GetInfo(X, org_isite1, org_isite2, org_sigma1, org_sigma2); @@ -217,7 +219,7 @@ firstprivate(i_max,tmp_trans,Asum,Adiff,ibitsite1,ibitsite2,X,list_1_org,list_1, if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) {//diagonal is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; ibit = (unsigned long int) myrank & is; - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { if (ibit != is) { dmv = -tmp_trans; #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,one,dmv,nstate) \ @@ -254,7 +256,7 @@ firstprivate(i_max, tmp_trans) private(j) } if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) { is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { #pragma omp parallel for default(none) shared(list_1,nstate,tmp_v0,tmp_v1,one) \ firstprivate(i_max,is,tmp_trans) private(num1,ibit,dmv) for (j = 1; j <= i_max; j++) { diff --git a/src/PairExSpin.c b/src/PairExSpin.c index 1e569a0c7..7aee48eb9 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -23,31 +23,6 @@ #ifdef MPI #include "common/setmemory.h" #endif - -// -/// \brief Calculation of pair excited state for Spin Grand canonical system -/// \param X [in,out] define list to get and put information of calculation -/// \param tmp_v0 [out] Result v0 = H v1 -/// \param tmp_v1 [in] v0 = H v1 -/// \returns TRUE: Normally finished -/// \returns FALSE: Abnormally finished -/// \author Kazuyoshi Yoshimi -/// \version 1.2 -int GetPairExcitedStateSpinGC( - struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ -) { - - int iret = 0; - if (X->Def.iFlgGeneralSpin == FALSE) { - iret = GetPairExcitedStateHalfSpinGC(X, nstate, tmp_v0, tmp_v1); - } - else { - iret = GetPairExcitedStateGeneralSpinGC(X, nstate, tmp_v0, tmp_v1); - } - return iret; -} /// Calculation of pair excited state for Half Spin Grand canonical system /// \param X [in,out] define list to get and put information of calculation /// \param tmp_v0 [out] Result v0 = H v1 @@ -60,7 +35,8 @@ int GetPairExcitedStateHalfSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ) { long unsigned int i, j; long unsigned int isite1; @@ -73,16 +49,16 @@ int GetPairExcitedStateHalfSpinGC( i_max = X->Check.idim_maxOrg; - for (i = 0; i < X->Def.NPairExcitationOperator; i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; + for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { + org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[iEx][i][1]; + org_sigma2 = X->Def.PairExcitationOperator[iEx][i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[iEx][i]; if (org_isite1 == org_isite2) { if (org_isite1 > X->Def.Nsite) { if (org_sigma1 == org_sigma2) { // longitudinal magnetic field - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { X_GC_child_AisCis_spin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); } else { @@ -97,7 +73,7 @@ int GetPairExcitedStateHalfSpinGC( else { isite1 = X->Def.Tpow[org_isite1 - 1]; if (org_sigma1 == org_sigma2) { - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { // longitudinal magnetic field #pragma omp parallel for default(none) private(j, tmp_sgn,dmv) \ firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(one,nstate,tmp_v0, tmp_v1) @@ -150,8 +126,8 @@ int GetPairExcitedStateGeneralSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ) { long unsigned int i, j; int num1; @@ -162,16 +138,16 @@ int GetPairExcitedStateGeneralSpinGC( long int i_max; i_max = X->Check.idim_maxOrg; - for (i = 0; i < X->Def.NPairExcitationOperator; i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; + for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { + org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[iEx][i][1]; + org_sigma2 = X->Def.PairExcitationOperator[iEx][i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[iEx][i]; if (org_isite1 == org_isite2) { if (org_isite1 > X->Def.Nsite) { if (org_sigma1 == org_sigma2) { - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { // longitudinal magnetic field X_GC_child_AisCis_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, -tmp_trans, X, nstate, tmp_v0, tmp_v1); } @@ -186,7 +162,7 @@ int GetPairExcitedStateGeneralSpinGC( } else {//org_isite1 <= X->Def.Nsite if (org_sigma1 == org_sigma2) { - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { // longitudinal magnetic field #pragma omp parallel for default(none) private(j,num1,dmv) \ firstprivate(i_max,org_isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) @@ -229,7 +205,7 @@ shared(tmp_v0,tmp_v1,one,nstate) } return TRUE; } -/// Calculation of pair excited state for Spin canonical system +/// \brief Calculation of pair excited state for Spin Grand canonical system /// \param X [in,out] define list to get and put information of calculation /// \param tmp_v0 [out] Result v0 = H v1 /// \param tmp_v1 [in] v0 = H v1 @@ -237,19 +213,19 @@ shared(tmp_v0,tmp_v1,one,nstate) /// \returns FALSE: Abnormally finished /// \author Kazuyoshi Yoshimi /// \version 1.2 -int GetPairExcitedStateSpin( +int GetPairExcitedStateSpinGC( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ - int nstate, - double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - + int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ) { + int iret = 0; if (X->Def.iFlgGeneralSpin == FALSE) { - iret = GetPairExcitedStateHalfSpin(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateHalfSpinGC(X, nstate, tmp_v0, tmp_v1, iEx); } else { - iret = GetPairExcitedStateGeneralSpin(X, nstate, tmp_v0, tmp_v1); + iret = GetPairExcitedStateGeneralSpinGC(X, nstate, tmp_v0, tmp_v1, iEx); } return iret; } @@ -265,7 +241,8 @@ int GetPairExcitedStateHalfSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ) { long unsigned int i, j, idim_maxMPI; @@ -287,18 +264,18 @@ int GetPairExcitedStateHalfSpin( tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); #endif // MPI - for (i = 0; i < X->Def.NPairExcitationOperator; i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; + for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { + org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[iEx][i][1]; + org_sigma2 = X->Def.PairExcitationOperator[iEx][i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[iEx][i]; if (org_sigma1 == org_sigma2) { if (org_isite1 == org_isite2) { if (org_isite1 > X->Def.Nsite) { is1_up = X->Def.Tpow[org_isite1 - 1]; ibit1 = X_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, org_sigma1); - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { if (ibit1 == 0) { dmv = -tmp_trans; #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1,one,nstate,dmv) \ @@ -320,7 +297,7 @@ int GetPairExcitedStateHalfSpin( else { isite1 = X->Def.Tpow[org_isite1 - 1]; if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2 && - X->Def.PairExcitationOperator[i][4] == 0) { + X->Def.PairExcitationOperator[iEx][i][4] == 0) { #pragma omp parallel for default(none) private(j,dmv) \ firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { @@ -382,7 +359,8 @@ int GetPairExcitedStateGeneralSpin( struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ) { long unsigned int i, j, idim_maxMPI; @@ -401,19 +379,19 @@ int GetPairExcitedStateGeneralSpin( tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); #endif // MPI - for (i = 0; i < X->Def.NPairExcitationOperator; i++) { - org_isite1 = X->Def.PairExcitationOperator[i][0] + 1; - org_isite2 = X->Def.PairExcitationOperator[i][2] + 1; - org_sigma1 = X->Def.PairExcitationOperator[i][1]; - org_sigma2 = X->Def.PairExcitationOperator[i][3]; - tmp_trans = X->Def.ParaPairExcitationOperator[i]; + for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { + org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; + org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; + org_sigma1 = X->Def.PairExcitationOperator[iEx][i][1]; + org_sigma2 = X->Def.PairExcitationOperator[iEx][i][3]; + tmp_trans = X->Def.ParaPairExcitationOperator[iEx][i]; if (org_isite1 == org_isite2) { if (org_isite1 > X->Def.Nsite) { if (org_sigma1 == org_sigma2) { // longitudinal magnetic field num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1, org_sigma1, X->Def.SiteToBit, X->Def.Tpow); - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { if (num1 == 0) { #pragma omp parallel for default(none) private(j,dmv) \ firstprivate(i_max, tmp_trans) \ @@ -443,7 +421,7 @@ int GetPairExcitedStateGeneralSpin( else {//org_isite1 <= X->Def.Nsite if (org_sigma1 == org_sigma2) { // longitudinal magnetic field - if (X->Def.PairExcitationOperator[i][4] == 0) { + if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { #pragma omp parallel for default(none) private(j, num1,dmv) \ firstprivate(i_max, org_isite1, org_sigma1, X, tmp_trans) \ shared(tmp_v0, tmp_v1, list_1,one,nstate) @@ -490,3 +468,27 @@ int GetPairExcitedStateGeneralSpin( return TRUE; } +/// Calculation of pair excited state for Spin canonical system +/// \param X [in,out] define list to get and put information of calculation +/// \param tmp_v0 [out] Result v0 = H v1 +/// \param tmp_v1 [in] v0 = H v1 +/// \returns TRUE: Normally finished +/// \returns FALSE: Abnormally finished +/// \author Kazuyoshi Yoshimi +/// \version 1.2 +int GetPairExcitedStateSpin( + struct BindStruct *X,/**< [in,out] define list to get and put information of calculation*/ + int nstate, + double complex **tmp_v0, /**< [out] Result v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx +) { + int iret = 0; + if (X->Def.iFlgGeneralSpin == FALSE) { + iret = GetPairExcitedStateHalfSpin(X, nstate, tmp_v0, tmp_v1, iEx); + } + else { + iret = GetPairExcitedStateGeneralSpin(X, nstate, tmp_v0, tmp_v1, iEx); + } + return iret; +} diff --git a/src/SingleEx.c b/src/SingleEx.c index fd9a6de2c..654724f0c 100644 --- a/src/SingleEx.c +++ b/src/SingleEx.c @@ -31,7 +31,8 @@ int GetSingleExcitedState( struct BindStruct *X,//!Def.iCalcModel) { case HubbardGC: - iret = GetSingleExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1); + iret = GetSingleExcitedStateHubbardGC(X, nstate, tmp_v0, tmp_v1, iEx); break; case KondoGC: case Hubbard: case Kondo: - iret = GetSingleExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1); + iret = GetSingleExcitedStateHubbard(X, nstate, tmp_v0, tmp_v1, iEx); break; case Spin: diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index 59d25b072..f27d3ec80 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -36,7 +36,8 @@ int GetSingleExcitedStateHubbard( struct BindStruct *X,//!Def.NSingleExcitationOperator == 0) { + if (X->Def.NSingleExcitationOperator[iEx] == 0) { return TRUE; } double complex **tmp_v1bufOrg; @@ -57,11 +58,11 @@ int GetSingleExcitedStateHubbard( #endif // MPI idim_max = X->Check.idim_maxOrg; - for (i = 0; i < X->Def.NSingleExcitationOperator; i++) { - org_isite = X->Def.SingleExcitationOperator[i][0]; - ispin = X->Def.SingleExcitationOperator[i][1]; - itype = X->Def.SingleExcitationOperator[i][2]; - tmpphi = X->Def.ParaSingleExcitationOperator[i]; + for (i = 0; i < X->Def.NSingleExcitationOperator[iEx]; i++) { + org_isite = X->Def.SingleExcitationOperator[iEx][i][0]; + ispin = X->Def.SingleExcitationOperator[iEx][i][1]; + itype = X->Def.SingleExcitationOperator[iEx][i][2]; + tmpphi = X->Def.ParaSingleExcitationOperator[iEx][i]; is1_spin = X->Def.Tpow[2 * org_isite + ispin]; if (itype == 1) { if (org_isite >= X->Def.Nsite) { @@ -116,7 +117,8 @@ int GetSingleExcitedStateHubbardGC( struct BindStruct *X,//!Check.idim_max; idim_max = X->Check.idim_maxOrg; //tmp_v0 - if (X->Def.NSingleExcitationOperator == 0) { + if (X->Def.NSingleExcitationOperator[iEx] == 0) { return TRUE; } double complex **tmp_v1bufOrg; @@ -138,11 +140,11 @@ int GetSingleExcitedStateHubbardGC( #endif // MPI // SingleEx - for (i = 0; i < X->Def.NSingleExcitationOperator; i++) { - org_isite = X->Def.SingleExcitationOperator[i][0]; - ispin = X->Def.SingleExcitationOperator[i][1]; - itype = X->Def.SingleExcitationOperator[i][2]; - tmpphi = X->Def.ParaSingleExcitationOperator[i]; + for (i = 0; i < X->Def.NSingleExcitationOperator[iEx]; i++) { + org_isite = X->Def.SingleExcitationOperator[iEx][i][0]; + ispin = X->Def.SingleExcitationOperator[iEx][i][1]; + itype = X->Def.SingleExcitationOperator[iEx][i][2]; + tmpphi = X->Def.ParaSingleExcitationOperator[iEx][i]; if (itype == 1) { if (org_isite >= X->Def.Nsite) { X_GC_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, diff --git a/src/StdFace/StdFace_main.c b/src/StdFace/StdFace_main.c index 4b982f74f..9141927d8 100644 --- a/src/StdFace/StdFace_main.c +++ b/src/StdFace/StdFace_main.c @@ -233,7 +233,7 @@ static void PrintCalcMod(struct StdIntList *StdI) */ static void PrintExcitation(struct StdIntList *StdI) { FILE *fp; - int NumOp, **spin, isite, ispin, icell, itau; + int NumOp, **spin, isite, ispin, icell, itau, iEx; double *coef, pi, Cphase, S, Sz; double *fourier_r, *fourier_i; @@ -371,43 +371,47 @@ static void PrintExcitation(struct StdIntList *StdI) { if (StdI->SpectrumBody == 1) { fp = fopen("single.def", "w"); fprintf(fp, "=============================================\n"); - if (strcmp(StdI->model, "kondo") == 0) { - fprintf(fp, "NSingle %d\n", StdI->nsite / 2 * NumOp); - } - else { - fprintf(fp, "NSingle %d\n", StdI->nsite * NumOp); - } + fprintf(fp, "NSingle %d\n", 2); fprintf(fp, "=============================================\n"); fprintf(fp, "============== Single Excitation ============\n"); fprintf(fp, "=============================================\n"); if (strcmp(StdI->model, "kondo") == 0) { - for (isite = StdI->nsite / 2; isite < StdI->nsite; isite++) { - fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], - fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); - }/*for (isite = 0; isite < StdI->nsite; isite++)*/ + for (iEx = 0; iEx < 2; iEx++) { + fprintf(fp, "%d\n", StdI->nsite / 2 * NumOp); + for (isite = StdI->nsite / 2; isite < StdI->nsite; isite++) { + fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], + fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); + }/*for (isite = 0; isite < StdI->nsite; isite++)*/ + }/*for (iEx = 0; iEx < 2; iEx++)*/ }/*if (strcmp(StdI->model, "kondo") == 0)*/ else { - for (isite = 0; isite < StdI->nsite; isite++) { - fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], - fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); - }/*for (isite = 0; isite < StdI->nsite; isite++)*/ + for (iEx = 0; iEx < 2; iEx++) { + fprintf(fp, "%d\n", StdI->nsite * NumOp); + for (isite = 0; isite < StdI->nsite; isite++) { + fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], + fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); + }/*for (isite = 0; isite < StdI->nsite; isite++)*/ + }/*for (iEx = 0; iEx < 2; iEx++)*/ } fprintf(stdout, " single.def is written.\n\n"); } else { fp = fopen("pair.def", "w"); fprintf(fp, "=============================================\n"); - fprintf(fp, "NPair %d\n", StdI->nsite * NumOp); + fprintf(fp, "NPair %d\n", 2); fprintf(fp, "=============================================\n"); fprintf(fp, "=============== Pair Excitation =============\n"); fprintf(fp, "=============================================\n"); - for (isite = 0; isite < StdI->nsite; isite++) { - for (ispin = 0; ispin < NumOp; ispin++) { - fprintf(fp, "%d %d %d %d 1 %25.15f %25.15f\n", - isite, spin[ispin][0], isite, spin[ispin][1], - fourier_r[isite] * coef[ispin], fourier_i[isite] * coef[ispin]); + for (iEx = 0; iEx < 2; iEx++) { + fprintf(fp, "%d\n", StdI->nsite * NumOp); + for (isite = 0; isite < StdI->nsite; isite++) { + for (ispin = 0; ispin < NumOp; ispin++) { + fprintf(fp, "%d %d %d %d 1 %25.15f %25.15f\n", + isite, spin[ispin][0], isite, spin[ispin][1], + fourier_r[isite] * coef[ispin], fourier_i[isite] * coef[ispin]); + } } - } + }/*for (iEx = 0; iEx < 2; iEx++)*/ fprintf(stdout, " pair.def is written.\n\n"); } fflush(fp); diff --git a/src/include/CalcSpectrum.h b/src/include/CalcSpectrum.h index 102c44692..7704e6b3b 100644 --- a/src/include/CalcSpectrum.h +++ b/src/include/CalcSpectrum.h @@ -17,5 +17,5 @@ #include "Common.h" int CalcSpectrum(struct EDMainCalStruct *X); -int GetExcitedState(struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1); +int GetExcitedState(struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1, int iEx); diff --git a/src/include/CalcSpectrumByBiCG.h b/src/include/CalcSpectrumByBiCG.h index becb23ea3..8946a4e28 100644 --- a/src/include/CalcSpectrumByBiCG.h +++ b/src/include/CalcSpectrumByBiCG.h @@ -21,7 +21,8 @@ int CalcSpectrumByBiCG( double complex **v2, double complex **v4, int Nomega, - double complex *dcSpectrum, + int NdcSpectrum, + double complex **dcSpectrum, double complex *dcomega, double complex **v1Org ); diff --git a/src/include/CalcSpectrumByFullDiag.h b/src/include/CalcSpectrumByFullDiag.h index 83379013f..c87cb100a 100644 --- a/src/include/CalcSpectrumByFullDiag.h +++ b/src/include/CalcSpectrumByFullDiag.h @@ -18,4 +18,5 @@ #include "struct.h" int CalcSpectrumByFullDiag( - struct EDMainCalStruct *X, int Nomega, double complex *dcSpectrum, double complex *dcomega, double complex **v1org); + struct EDMainCalStruct *X, int Nomega, int NdcSpectrum, + double complex **dcSpectrum, double complex *dcomega, double complex **v1org); diff --git a/src/include/PairEx.h b/src/include/PairEx.h index a8cccccba..09e37066f 100644 --- a/src/include/PairEx.h +++ b/src/include/PairEx.h @@ -20,5 +20,6 @@ int GetPairExcitedState ( struct BindStruct *X, int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ); diff --git a/src/include/PairExHubbard.h b/src/include/PairExHubbard.h index 50c9c5c9e..021e854fa 100644 --- a/src/include/PairExHubbard.h +++ b/src/include/PairExHubbard.h @@ -19,12 +19,13 @@ int GetPairExcitedStateHubbardGC( struct BindStruct *X, int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ); int GetPairExcitedStateHubbard( struct BindStruct *X, int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ); diff --git a/src/include/PairExSpin.h b/src/include/PairExSpin.h index 1339f2d45..996925d22 100644 --- a/src/include/PairExSpin.h +++ b/src/include/PairExSpin.h @@ -16,45 +16,5 @@ #pragma once #include "Common.h" -int GetPairExcitedStateSpinGC( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - -); - -int GetPairExcitedStateHalfSpinGC( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - -); - - -int GetPairExcitedStateGeneralSpinGC( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - -); - -int GetPairExcitedStateSpin( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ -); - -int GetPairExcitedStateHalfSpin( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - -); - - -int GetPairExcitedStateGeneralSpin( - struct BindStruct *X, - int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ - -); +int GetPairExcitedStateSpinGC(struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1, int iEx); +int GetPairExcitedStateSpin(struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1, int iEx); diff --git a/src/include/SingleEx.h b/src/include/SingleEx.h index 73c514521..18a26d3f2 100644 --- a/src/include/SingleEx.h +++ b/src/include/SingleEx.h @@ -21,5 +21,6 @@ int GetSingleExcitedState ( struct BindStruct *X, int nstate, double complex **tmp_v0, /**< [out] Result v0 = H v1*/ - double complex **tmp_v1 /**< [in] v0 = H v1*/ + double complex **tmp_v1, /**< [in] v0 = H v1*/ + int iEx ); diff --git a/src/include/SingleExHubbard.h b/src/include/SingleExHubbard.h index 1551bb885..0e376f6ad 100644 --- a/src/include/SingleExHubbard.h +++ b/src/include/SingleExHubbard.h @@ -17,6 +17,6 @@ #include "Common.h" int GetSingleExcitedStateHubbard(struct BindStruct *X, int nstate, - double complex **tmp_v0, double complex **tmp_v1); + double complex **tmp_v0, double complex **tmp_v1, int iEx); int GetSingleExcitedStateHubbardGC(struct BindStruct *X, int nstate, - double complex **tmp_v0, double complex **tmp_v1); + double complex **tmp_v0, double complex **tmp_v1, int iEx); diff --git a/src/include/struct.h b/src/include/struct.h index 1c8b01a5b..5d5ce3ba5 100644 --- a/src/include/struct.h +++ b/src/include/struct.h @@ -177,16 +177,18 @@ struct DefineList { int **CisAjtCkuAlvDC;/**<@brief [DefineList::NCisAjtCkuAlvDC][4] Indices of two-body correlation function. malloc in setmem_def().*/ unsigned int NCisAjtCkuAlvDC;/**<@brief Number of indices of two-body correlation function.*/ - int **SingleExcitationOperator;/**<@brief [DefineList::NSingleExcitationOperator][3] + int ***SingleExcitationOperator;/**<@brief [DefineList::NSingleExcitationOperator][3] Indices of single excitaion operator for spectrum. malloc in setmem_def().*/ - unsigned int NSingleExcitationOperator;/**<@brief Number of single excitaion operator for spectrum.*/ - double complex *ParaSingleExcitationOperator;/**<@brief [DefineList::NSingleExcitationOperator] + unsigned int NNSingleExcitationOperator;/**<@brief Number of single excitaion operator for spectrum.*/ + unsigned int *NSingleExcitationOperator;/**<@brief Number of single excitaion operator for spectrum.*/ + double complex **ParaSingleExcitationOperator;/**<@brief [DefineList::NSingleExcitationOperator] Coefficient of single excitaion operator for spectrum. malloc in setmem_def().*/ - int **PairExcitationOperator;/**<@brief [DefineList::NPairExcitationOperator][5] + int ***PairExcitationOperator;/**<@brief [DefineList::NPairExcitationOperator][5] Indices of pair excitaion operator for spectrum. malloc in setmem_def().*/ - unsigned int NPairExcitationOperator;/**<@brief Number of pair excitaion operator for spectrum.*/ - double complex *ParaPairExcitationOperator;/**<@brief [DefineList::NPairExcitationOperator] + unsigned int NNPairExcitationOperator;/**<@brief Number of pair excitaion operator for spectrum.*/ + unsigned int *NPairExcitationOperator;/**<@brief Number of pair excitaion operator for spectrum.*/ + double complex **ParaPairExcitationOperator;/**<@brief [DefineList::NPairExcitationOperator] Coefficient of pair excitaion operator for spectrum. malloc in setmem_def().*/ int iCalcType;/**<@brief Switch for calculation type. 0:Lanczos, 1:TPQCalc, 2:FullDiag.*/ diff --git a/src/readdef.c b/src/readdef.c index 02c43d676..4490ac8cc 100644 --- a/src/readdef.c +++ b/src/readdef.c @@ -799,14 +799,14 @@ int ReadDefFileNInt( /* Read singleexcitation.def----------------------------------------*/ fgetsMPI(ctmp, sizeof(ctmp)/sizeof(char), fp); fgetsMPI(ctmp2, 256, fp); - sscanf(ctmp2,"%s %d\n", ctmp, &(X->NSingleExcitationOperator)); + sscanf(ctmp2,"%s %d\n", ctmp, &(X->NNSingleExcitationOperator)); break; case KWPairExcitation: /* Read pairexcitation.def----------------------------------------*/ fgetsMPI(ctmp, sizeof(ctmp)/sizeof(char), fp); fgetsMPI(ctmp2, 256, fp); - sscanf(ctmp2,"%s %d\n", ctmp, &(X->NPairExcitationOperator)); + sscanf(ctmp2,"%s %d\n", ctmp, &(X->NNPairExcitationOperator)); break; default: @@ -1707,33 +1707,41 @@ int ReadDefFileIdxPara( case KWSingleExcitation: /*singleexcitation.def----------------------------------------*/ - if(X->NSingleExcitationOperator>0) { + if(X->NNSingleExcitationOperator>0) { if(X->iCalcModel == Spin || X->iCalcModel == SpinGC) { fprintf(stderr, "SingleExcitation is not allowed for spin system.\n"); fclose(fp); return ReadDefFileError(defname); } while (fgetsMPI(ctmp2, 256, fp) != NULL) { - sscanf(ctmp2, "%d %d %d %lf %lf\n", - &isite1, - &isigma1, - &itype, - &dvalue_re, - &dvalue_im - ); - - if (CheckSite(isite1, X->Nsite) != 0) { - fclose(fp); - return ReadDefFileError(defname); - } + sscanf(ctmp2, "%d\n", &X->NSingleExcitationOperator[idx]); + X->SingleExcitationOperator[idx] = (int**)malloc(sizeof(int*)*X->NSingleExcitationOperator[idx]); + X->ParaSingleExcitationOperator[idx] = (double complex*)malloc( + sizeof(double complex)*X->NSingleExcitationOperator[idx]); + for (i = 0; i < X->NSingleExcitationOperator[idx]; ++i) { + fgetsMPI(ctmp2, 256, fp); + sscanf(ctmp2, "%d %d %d %lf %lf\n", + &isite1, + &isigma1, + &itype, + &dvalue_re, + &dvalue_im + ); + + if (CheckSite(isite1, X->Nsite) != 0) { + fclose(fp); + return ReadDefFileError(defname); + } - X->SingleExcitationOperator[idx][0] = isite1; - X->SingleExcitationOperator[idx][1] = isigma1; - X->SingleExcitationOperator[idx][2] = itype; - X->ParaSingleExcitationOperator[idx] = dvalue_re + I * dvalue_im; + X->SingleExcitationOperator[idx][i] = (int*)malloc(sizeof(int) * 3); + X->SingleExcitationOperator[idx][i][0] = isite1; + X->SingleExcitationOperator[idx][i][1] = isigma1; + X->SingleExcitationOperator[idx][i][2] = itype; + X->ParaSingleExcitationOperator[idx][i] = dvalue_re + I * dvalue_im; + }/*for (i = 0; i < X->NSingleExcitationOperator[idx]; ++i)*/ idx++; } - if (idx != X->NSingleExcitationOperator) { + if (idx != X->NNSingleExcitationOperator) { fclose(fp); return ReadDefFileError(defname); } @@ -1742,42 +1750,49 @@ int ReadDefFileIdxPara( case KWPairExcitation: /*pairexcitation.def----------------------------------------*/ - if(X->NPairExcitationOperator>0) { + if(X->NNPairExcitationOperator>0) { while (fgetsMPI(ctmp2, 256, fp) != NULL) { - sscanf(ctmp2, "%d %d %d %d %d %lf %lf\n", - &isite1, - &isigma1, - &isite2, - &isigma2, - &itype, - &dvalue_re, - &dvalue_im - ); - if (CheckPairSite(isite1, isite2, X->Nsite) != 0) { - fclose(fp); - return ReadDefFileError(defname); - } - - if(itype==1){ - X->PairExcitationOperator[idx][0] = isite1; - X->PairExcitationOperator[idx][1] = isigma1; - X->PairExcitationOperator[idx][2] = isite2; - X->PairExcitationOperator[idx][3] = isigma2; - X->PairExcitationOperator[idx][4] = itype; - X->ParaPairExcitationOperator[idx] = dvalue_re + I * dvalue_im; - } - else{ - X->PairExcitationOperator[idx][0] = isite2; - X->PairExcitationOperator[idx][1] = isigma2; - X->PairExcitationOperator[idx][2] = isite1; - X->PairExcitationOperator[idx][3] = isigma1; - X->PairExcitationOperator[idx][4] = itype; - X->ParaPairExcitationOperator[idx] = -(dvalue_re + I * dvalue_im); - } + sscanf(ctmp2, "%d\n", &X->NPairExcitationOperator[idx]); + X->PairExcitationOperator[idx] = (int**)malloc(sizeof(int*)*X->NPairExcitationOperator[idx]); + X->ParaPairExcitationOperator[idx] = (double complex*)malloc( + sizeof(double complex)*X->NPairExcitationOperator[idx]); + for (i = 0; i < X->NPairExcitationOperator[idx]; ++i) { + fgetsMPI(ctmp2, 256, fp); + sscanf(ctmp2, "%d %d %d %d %d %lf %lf\n", + &isite1, + &isigma1, + &isite2, + &isigma2, + &itype, + &dvalue_re, + &dvalue_im + ); + if (CheckPairSite(isite1, isite2, X->Nsite) != 0) { + fclose(fp); + return ReadDefFileError(defname); + } + X->PairExcitationOperator[idx][i] = (int*)malloc(sizeof(int) * 5); + if (itype == 1) { + X->PairExcitationOperator[idx][i][0] = isite1; + X->PairExcitationOperator[idx][i][1] = isigma1; + X->PairExcitationOperator[idx][i][2] = isite2; + X->PairExcitationOperator[idx][i][3] = isigma2; + X->PairExcitationOperator[idx][i][4] = itype; + X->ParaPairExcitationOperator[idx][i] = dvalue_re + I * dvalue_im; + } + else { + X->PairExcitationOperator[idx][i][0] = isite2; + X->PairExcitationOperator[idx][i][1] = isigma2; + X->PairExcitationOperator[idx][i][2] = isite1; + X->PairExcitationOperator[idx][i][3] = isigma1; + X->PairExcitationOperator[idx][i][4] = itype; + X->ParaPairExcitationOperator[idx][i] = -(dvalue_re + I * dvalue_im); + } + }/*for (i = 0; i < X->NPairExcitationOperator[idx]; ++i)*/ idx++; } - if (idx != X->NPairExcitationOperator) { + if (idx != X->NNPairExcitationOperator) { fclose(fp); return ReadDefFileError(defname); } @@ -2565,8 +2580,8 @@ void InitializeInteractionNum X->NInterAll=0; X->NCisAjt=0; X->NCisAjtCkuAlvDC=0; - X->NSingleExcitationOperator=0; - X->NPairExcitationOperator=0; + X->NNSingleExcitationOperator=0; + X->NNPairExcitationOperator=0; //[s] Time Evolution X->NTETimeSteps=0; X->NLaser=0; diff --git a/src/xsetmem.c b/src/xsetmem.c index b6b0dae22..2dd40f336 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -100,10 +100,14 @@ void setmem_def X->Def.CisAjt = i_2d_allocate(X->Def.NCisAjt, 4); X->Def.CisAjtCkuAlvDC = i_2d_allocate(X->Def.NCisAjtCkuAlvDC, 8); - X->Def.SingleExcitationOperator = i_2d_allocate(X->Def.NSingleExcitationOperator, 3); - X->Def.ParaSingleExcitationOperator = cd_1d_allocate(X->Def.NSingleExcitationOperator); - X->Def.PairExcitationOperator = i_2d_allocate(X->Def.NPairExcitationOperator, 5); - X->Def.ParaPairExcitationOperator = cd_1d_allocate(X->Def.NPairExcitationOperator); + X->Def.NSingleExcitationOperator = ui_1d_allocate(X->Def.NNSingleExcitationOperator); + X->Def.SingleExcitationOperator = (int***)malloc(sizeof(int**)*X->Def.NNSingleExcitationOperator); + X->Def.ParaSingleExcitationOperator = (double complex**)malloc( + sizeof(double complex*)*X->Def.NNSingleExcitationOperator); + X->Def.NPairExcitationOperator = ui_1d_allocate(X->Def.NNPairExcitationOperator); + X->Def.PairExcitationOperator = (int***)malloc(sizeof(int**)*X->Def.NNPairExcitationOperator); + X->Def.ParaPairExcitationOperator = (double complex**)malloc( + sizeof(double complex*)*X->Def.NNPairExcitationOperator); X->Def.ParaLaser = d_1d_allocate(X->Def.NLaser); From f76a0b7822138b4928b42091f932e60f55d5d0af Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Fri, 22 Mar 2019 18:35:30 +0900 Subject: [PATCH 17/50] Backup --- src/CalcByTEM.c | 1 + src/CalcSpectrum.c | 1 - src/CalcSpectrumByBiCG.c | 10 +- src/CalcSpectrumByFullDiag.c | 2 +- src/HPhiTrans.c | 12 +- src/PairExHubbard.c | 8 +- src/PairExSpin.c | 15 +- src/SingleExHubbard.c | 4 +- src/StdFace/StdFace_main.c | 11 +- src/StdFace/Wannier90.c | 4 +- src/diagonalcalc.c | 363 +++++++++++------------ src/eigenIO.c | 1 - src/expec_cisajs.c | 27 +- src/expec_cisajscktaltdc.c | 54 +--- src/expec_energy_flct.c | 3 +- src/expec_totalspin.c | 1 - src/include/diagonalcalc.h | 44 +-- src/include/mltplyMPISpinCore.h | 12 +- src/include/struct.h | 9 +- src/mltply.c | 4 +- src/mltplyHubbardCore.c | 4 - src/mltplyMPIHubbard.c | 14 +- src/mltplyMPIHubbardCore.c | 20 +- src/mltplyMPISpin.c | 14 +- src/mltplyMPISpinCore.c | 79 +++-- src/mltplySpinCore.c | 3 - src/phys.c | 7 +- src/readdef.c | 8 +- src/wrapperMPI.c | 2 +- src/xsetmem.c | 7 - tool/CMakeLists.txt | 4 +- tool/dynamicalr2k.F90 | 511 ++++++++++++++++++++++++++++++++ tool/key2lower.c | 2 +- 33 files changed, 803 insertions(+), 458 deletions(-) create mode 100644 tool/dynamicalr2k.F90 diff --git a/src/CalcByTEM.c b/src/CalcByTEM.c index ee33ad015..e59e74f6a 100644 --- a/src/CalcByTEM.c +++ b/src/CalcByTEM.c @@ -98,6 +98,7 @@ int CalcByTEM( if (i_max != X->Bind.Check.idim_max) { fprintf(stderr, "Error: A file of Inputvector is incorrect.\n"); fclose(fp); + printf("byte_size : %d\n", (int)byte_size); exitMPI(-1); } fread(&v1[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 13baf41ec..1eafb32c6 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -421,7 +421,6 @@ int CalcSpectrum( int i_stp, NdcSpectrum; int iFlagListModified = FALSE; FILE *fp; - double dnorm; double complex **v1Org; /**< Input vector to calculate spectrum function.*/ //ToDo: Nomega should be given as a parameter diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index c03e66838..ff8feba2f 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -45,7 +45,7 @@ void ReadTMComponents_BiCG( char sdt[D_FileNameMax]; char ctmp[256]; - int one = 1, status[3], idim_max2int, max_step, iter_old; + int status[3], idim_max2int, max_step, iter_old; unsigned long int idx; double complex *alphaCG, *betaCG, *res_save, z_seed; double z_seed_r, z_seed_i, alpha_r, alpha_i, beta_r, beta_i, res_r, res_i; @@ -176,10 +176,10 @@ int CalcSpectrumByBiCG( unsigned long int idim, i_max; FILE *fp; size_t byte_size; - int iret, max_step, idcSpectrum; + int idcSpectrum; unsigned long int liLanczosStp_vec = 0; double complex **vL, **v12, **v14, *res_proj; - int stp, one = 1, status[3], iomega; + int stp, status[3], iomega; double *resz; fprintf(stdoutMPI, "##### Spectrum calculation with BiCG #####\n\n"); @@ -257,8 +257,8 @@ int CalcSpectrumByBiCG( */ zclear(X->Bind.Check.idim_max, &v12[1][0]); zclear(X->Bind.Check.idim_max, &v14[1][0]); - iret = mltply(&X->Bind, 1, v12, v2); - iret = mltply(&X->Bind, 1, v14, v4); + mltply(&X->Bind, 1, v12, v2); + mltply(&X->Bind, 1, v14, v4); for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { zclear(X->Bind.Check.idim_max, &vL[1][0]); diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index 15be6f16d..7fdd4136b 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -48,7 +48,7 @@ int CalcSpectrumByFullDiag( { int idim, jdim, iomega; int idim_max_int; - int incr = 1, idcSpectrum; + int idcSpectrum; double complex **vR, **vL, vRv, vLv, *vLvvRv; /**
        diff --git a/src/HPhiTrans.c b/src/HPhiTrans.c index 76c54121a..6673405f8 100644 --- a/src/HPhiTrans.c +++ b/src/HPhiTrans.c @@ -46,8 +46,8 @@ int HPhiTrans(struct BindStruct *X) { FILE *fp_err; char sdt_err[D_FileNameMax]; - int i, k; - int cnt_trans; + unsigned int i, k; + unsigned int cnt_trans; strcpy(sdt_err, cFileNameWarningOnTransfer); if (childfopenMPI(sdt_err, "w", &fp_err) != 0) { @@ -90,7 +90,7 @@ int HPhiTrans(struct BindStruct *X) { * @author Kota Ido (The University of Tokyo) */ int TransferWithPeierls(struct BindStruct *X, const double time) { - int i; + unsigned int i; int ri_x, rj_x; int ri_y, rj_y; double complex dir; @@ -172,18 +172,15 @@ int TransferWithPeierls(struct BindStruct *X, const double time) { * @author Kota Ido (The University of Tokyo) */ int TransferForQuench(struct BindStruct *X, const double time) { - int i, k; + unsigned int i; int ri_x, rj_x; int ri_y, rj_y; - double complex dir; const int Mode = (int) (X->Def.ParaLaser[0]); const double Avp = X->Def.ParaLaser[1]; const double time_d = X->Def.ParaLaser[3]; const double time_c = X->Def.ParaLaser[4]; const int Lx = (int) (X->Def.ParaLaser[5]); const int Ly = (int) (X->Def.ParaLaser[6]); - const double dirX = X->Def.ParaLaser[7]; - const double dirY = X->Def.ParaLaser[8]; const double dt = time - time_c; double Bessel; @@ -214,7 +211,6 @@ int TransferForQuench(struct BindStruct *X, const double time) { } else if (ri_y - rj_y < -1) { rj_y -= Ly; } - dir = dirX * (ri_x - rj_x) + dirY * (ri_y - rj_y); X->Def.EDParaGeneralTransfer[i] = X->Def.ParaGeneralTransfer[i] * Bessel; } diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index 63df9d709..521017fdb 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -46,8 +46,8 @@ int GetPairExcitedStateHubbardGC( long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; double complex tmp_trans = 0; - long int i_max; - long int ibit; + long unsigned int i_max; + long unsigned int ibit; long unsigned int is; i_max = X->Check.idim_maxOrg; for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { @@ -142,9 +142,9 @@ int GetPairExcitedStateHubbard( long unsigned int tmp_off = 0; double complex tmp_trans = 0, dmv; - long int i_max; + long unsigned int i_max; int tmp_sgn, num1, one = 1; - long int ibit; + long unsigned int ibit; long unsigned int is, Asum, Adiff; long unsigned int ibitsite1, ibitsite2; diff --git a/src/PairExSpin.c b/src/PairExSpin.c index 7aee48eb9..8620808b5 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -44,7 +44,7 @@ int GetPairExcitedStateHalfSpinGC( long unsigned int tmp_off = 0; double complex tmp_trans = 0, dmv; - long int i_max; + long unsigned int i_max; int tmp_sgn, one = 1; i_max = X->Check.idim_maxOrg; @@ -135,7 +135,7 @@ int GetPairExcitedStateGeneralSpinGC( long unsigned int tmp_off = 0; int one = 1; double complex tmp_trans = 0, dmv; - long int i_max; + long unsigned int i_max; i_max = X->Check.idim_maxOrg; for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { @@ -250,7 +250,7 @@ int GetPairExcitedStateHalfSpin( long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; double complex tmp_trans = 0, dmv; - long int i_max; + long unsigned int i_max; int num1, one = 1; long int ibit1; long unsigned int is1_up; @@ -323,9 +323,8 @@ firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstat else { //org_sigma1 != org_sigma2 // for the canonical case if (org_isite1 > X->Def.Nsite) {//For MPI X_child_CisAit_spin_MPIdouble(org_isite1 - 1, org_sigma2, tmp_trans, - X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, i_max, X->Def.Tpow, - list_1_org, list_1buf_org, list_2_1, list_2_2, - X->Large.irght, X->Large.ilft, X->Large.ihfbit); + X, nstate, tmp_v0, tmp_v1, i_max, + list_1_org, list_1buf_org, list_2_1, list_2_2); } else { isite1 = X->Def.Tpow[org_isite1 - 1]; @@ -368,7 +367,7 @@ int GetPairExcitedStateGeneralSpin( long unsigned int tmp_off = 0; long unsigned int off = 0; double complex tmp_trans = 0, dmv; - long int i_max; + long unsigned int i_max; int tmp_sgn, num1, one = 1; i_max = X->Check.idim_maxOrg; @@ -415,7 +414,7 @@ int GetPairExcitedStateGeneralSpin( else {//org_sigma1 != org_sigma2 X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, tmp_trans, X, nstate, tmp_v0, tmp_v1, - tmp_v1bufOrg, i_max, list_1_org, list_1buf_org, X->Large.ihfbit); + i_max, list_1_org, list_1buf_org); } } else {//org_isite1 <= X->Def.Nsite diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index f27d3ec80..fce95fc60 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -39,7 +39,7 @@ int GetSingleExcitedStateHubbard( double complex **tmp_v1,//!<[in] v0 = H v1 int iEx ) { - long int idim_max, idim_maxMPI; + long unsigned int idim_max, idim_maxMPI; long unsigned int i, j; long unsigned int org_isite, ispin, itype; long unsigned int is1_spin; @@ -120,7 +120,7 @@ int GetSingleExcitedStateHubbardGC( double complex **tmp_v1,//!<[in] v0 = H v1 int iEx ) { - long int idim_max, idim_maxMPI; + long unsigned int idim_max, idim_maxMPI; long unsigned int i, j; long unsigned int org_isite, ispin, itype; long unsigned int is1_spin; diff --git a/src/StdFace/StdFace_main.c b/src/StdFace/StdFace_main.c index 9141927d8..83ecdcfc1 100644 --- a/src/StdFace/StdFace_main.c +++ b/src/StdFace/StdFace_main.c @@ -234,7 +234,7 @@ static void PrintCalcMod(struct StdIntList *StdI) static void PrintExcitation(struct StdIntList *StdI) { FILE *fp; int NumOp, **spin, isite, ispin, icell, itau, iEx; - double *coef, pi, Cphase, S, Sz; + double *coef, Cphase, S, Sz; double *fourier_r, *fourier_i; if (strcmp(StdI->model, "spin") == 0 && StdI->S2 > 1) { @@ -324,7 +324,7 @@ static void PrintExcitation(struct StdIntList *StdI) { } else if (strcmp(StdI->SpectrumType, "density") == 0) { NumOp = 2; - coef[0] = 1,0; + coef[0] = 1.0; coef[1] = 1.0; spin[0][0] = 0; spin[0][1] = 0; @@ -432,10 +432,9 @@ static void PrintExcitation(struct StdIntList *StdI) { */ static void VectorPotential(struct StdIntList *StdI) { FILE *fp; - int it, ii, isite, icell, itau, itrans, jsite, jcell, jtau, ntrans0; - double Cphase, time, dR[3]; + int it, ii; + double time; double **Et; - double complex coef; fprintf(stdout, "\n @ Time-evolution\n\n"); @@ -529,7 +528,7 @@ static void VectorPotential(struct StdIntList *StdI) { */ static void PrintPump(struct StdIntList *StdI) { FILE *fp; - int it, ii, isite, ipump, jpump, npump0; + int it, isite, ipump, jpump, npump0; if (StdI->PumpBody == 1) { diff --git a/src/StdFace/Wannier90.c b/src/StdFace/Wannier90.c index 92a963aa4..b570fc4f1 100644 --- a/src/StdFace/Wannier90.c +++ b/src/StdFace/Wannier90.c @@ -34,7 +34,7 @@ static void geometry_W90( ) { int isite, ii, ierr; - char filename[256]; + char filename[265]; FILE *fp; sprintf(filename, "%s_geom.dat", StdI->CDataFileHead); @@ -352,7 +352,7 @@ void StdFace_Wannier90( int n_t, n_u, n_j; double complex *W90_t, *W90_j, *W90_u; int **t_indx, **u_indx, **j_indx; - char filename[256]; + char filename[263]; /**@brief (1) Compute the shape of the super-cell and sites in the super-cell */ diff --git a/src/diagonalcalc.c b/src/diagonalcalc.c index a2d0d99a6..85183acd0 100644 --- a/src/diagonalcalc.c +++ b/src/diagonalcalc.c @@ -37,120 +37,6 @@ #include "diagonalcalc.h" #include "mltplySpinCore.h" #include "wrapperMPI.h" -/** - * @brief Calculate diagonal components and obtain the list, list_diagonal. - * - * @param X [in] Struct to get the information of the diagonal operators. - * - * @author Takahiro Misawa (The University of Tokyo) - * @author Kazuyoshi Yoshimi (The University of Tokyo) - * @retval -1 fail to calculate diagonal components. - * @retval 0 succeed to calculate diagonal components. - */ -int diagonalcalc -( - struct BindStruct *X -) { - - FILE *fp; - long unsigned int i, j; - long unsigned int isite1, isite2; - long unsigned int spin; - double tmp_V; - - /*[s] For InterAll*/ - long unsigned int A_spin, B_spin; - /*[e] For InterAll*/ - long unsigned int i_max = X->Check.idim_max; - - fprintf(stdoutMPI, "%s", cProStartCalcDiag); - TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcStart, "a"); - -#pragma omp parallel for default(none) private(j) shared(list_Diagonal) firstprivate(i_max) - for (j = 1; j <= i_max; j++) { - list_Diagonal[j] = 0.0; - } - - if (X->Def.NCoulombIntra > 0) { - if (childfopenMPI(cFileNameCheckCoulombIntra, "w", &fp) != 0) { - return -1; - } - for (i = 0; i < X->Def.NCoulombIntra; i++) { - isite1 = X->Def.CoulombIntra[i][0] + 1; - tmp_V = X->Def.ParaCoulombIntra[i]; - fprintf(fp, "i=%ld isite1=%ld tmp_V=%lf \n", i, isite1, tmp_V); - SetDiagonalCoulombIntra(isite1, tmp_V, X); - } - fclose(fp); - } - - if (X->Def.EDNChemi > 0) { - if (childfopenMPI(cFileNameCheckChemi, "w", &fp) != 0) { - return -1; - } - for (i = 0; i < X->Def.EDNChemi; i++) { - isite1 = X->Def.EDChemi[i] + 1; - spin = X->Def.EDSpinChemi[i]; - tmp_V = -X->Def.EDParaChemi[i]; - fprintf(fp, "i=%ld spin=%ld isite1=%ld tmp_V=%lf \n", i, spin, isite1, tmp_V); - if (SetDiagonalChemi(isite1, tmp_V, spin, X) != 0) { - return -1; - } - } - fclose(fp); - } - - if (X->Def.NCoulombInter > 0) { - if (childfopenMPI(cFileNameCheckInterU, "w", &fp) != 0) { - return -1; - } - for (i = 0; i < X->Def.NCoulombInter; i++) { - isite1 = X->Def.CoulombInter[i][0] + 1; - isite2 = X->Def.CoulombInter[i][1] + 1; - tmp_V = X->Def.ParaCoulombInter[i]; - fprintf(fp, "i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n", i, isite1, isite2, tmp_V); - if (SetDiagonalCoulombInter(isite1, isite2, tmp_V, X) != 0) { - return -1; - } - } - fclose(fp); - } - if (X->Def.NHundCoupling > 0) { - if (childfopenMPI(cFileNameCheckHund, "w", &fp) != 0) { - return -1; - } - for (i = 0; i < X->Def.NHundCoupling; i++) { - isite1 = X->Def.HundCoupling[i][0] + 1; - isite2 = X->Def.HundCoupling[i][1] + 1; - tmp_V = -X->Def.ParaHundCoupling[i]; - if (SetDiagonalHund(isite1, isite2, tmp_V, X) != 0) { - return -1; - } - fprintf(fp, "i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n", i, isite1, isite2, tmp_V); - } - fclose(fp); - } - - if (X->Def.NInterAll_Diagonal > 0) { - if (childfopenMPI(cFileNameCheckInterAll, "w", &fp) != 0) { - return -1; - } - for (i = 0; i < X->Def.NInterAll_Diagonal; i++) { - isite1 = X->Def.InterAll_Diagonal[i][0] + 1; - A_spin = X->Def.InterAll_Diagonal[i][1]; - isite2 = X->Def.InterAll_Diagonal[i][2] + 1; - B_spin = X->Def.InterAll_Diagonal[i][3]; - tmp_V = X->Def.ParaInterAll_Diagonal[i]; - fprintf(fp, "i=%ld isite1=%ld A_spin=%ld isite2=%ld B_spin=%ld tmp_V=%lf \n", i, isite1, A_spin, isite2, B_spin, tmp_V); - SetDiagonalInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X); - } - fclose(fp); - } - - TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcFinish, "a"); - fprintf(stdoutMPI, "%s", cProEndCalcDiag); - return 0; -} /** * @brief Update the vector by the general two-body diagonal interaction, \f$ H_{i\sigma_1 j\sigma_2} n_ {i\sigma_1}n_{j\sigma_2}\f$.\n * (Using in Time Evolution mode). @@ -175,7 +61,6 @@ int SetDiagonalTEInterAll( long unsigned int isigma2, double dtmp_V, struct BindStruct *X, - int nstate, double complex *tmp_v0, double complex *tmp_v1 ) { @@ -192,9 +77,6 @@ int SetDiagonalTEInterAll( long unsigned int j; long unsigned int i_max = X->Check.idim_max; - double complex dam_pr = 0.0; - - /* Forse isite1 <= isite2 */ @@ -249,17 +131,13 @@ int SetDiagonalTEInterAll( }/*if (isite1 > X->Def.Nsite)*/ if (num1 * num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V) private(j) for (j = 1; j <= i_max; j++) { tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } } - dam_pr = SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; - }/*if (isite1 > X->Def.Nsite)*/ else if (isite2 > X->Def.Nsite) { @@ -274,14 +152,13 @@ firstprivate(i_max, dtmp_V) private(j) ibit2_spin = (unsigned long int)myrank&is2_spin; num2 += ibit2_spin / is2_spin; if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V, is1_spin) private(num1, ibit1_spin, j) for (j = 1; j <= i_max; j++) { num1 = 0; ibit1_spin = (j - 1) & is1_spin; num1 += ibit1_spin / is1_spin; tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1 * conj(tmp_v1[j]) * tmp_v1[j]; } } break;/*case HubbardGC:*/ @@ -297,14 +174,13 @@ firstprivate(i_max, dtmp_V) private(j) ibit2_spin = (unsigned long int)myrank&is2_spin; num2 += ibit2_spin / is2_spin; if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1)\ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, list_1)\ firstprivate(i_max, dtmp_V, is1_spin) private(num1, ibit1_spin, j) for (j = 1; j <= i_max; j++) { num1 = 0; ibit1_spin = list_1[j] & is1_spin; num1 += ibit1_spin / is1_spin; tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } break;/*case KondoGC, Hubbard, Kondo:*/ @@ -317,12 +193,11 @@ firstprivate(i_max, dtmp_V) private(j) num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V, is1_up, isigma1, X) private(num1, j) for (j = 1; j <= i_max; j++) { num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ @@ -330,12 +205,11 @@ firstprivate(i_max, dtmp_V) private(j) num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } }/* if (X->Def.iFlgGeneralSpin == TRUE)*/ @@ -350,12 +224,11 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) num2 = X_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = X_Spin_CisAis(j, X, is1_up, isigma1); tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ @@ -363,12 +236,11 @@ firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) num2 = BitCheckGeneral((unsigned long int)myrank, isite2, isigma2, \ X->Def.SiteToBit, X->Def.Tpow); if (num2 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1)\ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1, list_1)\ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } } /* if (X->Def.iFlgGeneralSpin == TRUE)*/ @@ -380,8 +252,6 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) return -1; }/*switch (X->Def.iCalcModel)*/ - dam_pr = SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; }/*else if (isite2 > X->Def.Nsite)*/ @@ -389,7 +259,9 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) case HubbardGC: //list_1[j] -> j-1 is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) \ +private(num1, ibit1_spin, num2, ibit2_spin) for (j = 1; j <= i_max; j++) { num1 = 0; num2 = 0; @@ -398,7 +270,6 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) ibit2_spin = (j - 1)&is2_spin; num2 += ibit2_spin / is2_spin; tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } break; case KondoGC: @@ -407,7 +278,9 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) is1_spin = X->Def.Tpow[2 * isite1 - 2 + isigma1]; is2_spin = X->Def.Tpow[2 * isite2 - 2 + isigma2]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) private(num1, ibit1_spin, num2, ibit2_spin) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, is1_spin, is2_spin) \ +private(num1, ibit1_spin, num2, ibit2_spin) for (j = 1; j <= i_max; j++) { num1 = 0; num2 = 0; @@ -417,7 +290,6 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) ibit2_spin = list_1[j] & is2_spin; num2 += ibit2_spin / is2_spin; tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } break; @@ -425,22 +297,24 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) \ +private(j, num1, num2) for (j = 1; j <= i_max; j++) { num1 = X_Spin_CisAis(j, X, is1_up, isigma1); num2 = X_Spin_CisAis(j, X, is2_up, isigma2); tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) \ +private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { num1 = BitCheckGeneral(list_1[j], isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } @@ -451,22 +325,24 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) \ +private(j, num1, num2) for (j = 1; j <= i_max; j++) { num1 = X_SpinGC_CisAis(j, X, is1_up, isigma1); num2 = X_SpinGC_CisAis(j, X, is2_up, isigma2); tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; - dam_pr += dtmp_V * num1*num2*conj(tmp_v1[j]) * tmp_v1[j]; } } else {//start:generalspin -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) private(j, num1) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isite2, isigma1, isigma2, X) \ +private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { num1 = BitCheckGeneral(j - 1, isite2, isigma2, X->Def.SiteToBit, X->Def.Tpow); tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } } @@ -476,8 +352,6 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; } - dam_pr = SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; } /** @@ -502,7 +376,7 @@ int SetDiagonalTEChemi( long unsigned int spin, double dtmp_V, struct BindStruct *X, - int nstate, double complex *tmp_v0, + double complex *tmp_v0, double complex *tmp_v1 ) { long unsigned int is1_up; @@ -512,7 +386,6 @@ int SetDiagonalTEChemi( long unsigned int j; long unsigned int i_max = X->Check.idim_max; - double complex dam_pr = 0; /* When isite1 is in the inter process region @@ -555,15 +428,12 @@ int SetDiagonalTEChemi( } /*switch (X->Def.iCalcModel)*/ if (num1 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V) private(j) for (j = 1; j <= i_max; j++) { tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } }/*if (num1 != 0)*/ - dam_pr = SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; }/*if (isite1 >= X->Def.Nsite*/ @@ -577,12 +447,12 @@ firstprivate(i_max, dtmp_V) private(j) is1 = X->Def.Tpow[2 * isite1 - 1]; } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { ibit1 = (j - 1)&is1; num1 = ibit1 / is1; tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } break; case KondoGC: @@ -595,33 +465,32 @@ firstprivate(i_max, dtmp_V) private(j) is1 = X->Def.Tpow[2 * isite1 - 1]; } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) +#pragma omp parallel for default(none) \ +shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { - ibit1 = list_1[j] & is1; num1 = ibit1 / is1; tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } break; case SpinGC: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) +#pragma omp parallel for default(none) \ +shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) for (j = 1; j <= i_max; j++) { num1 = (((j - 1)& is1_up) / is1_up) ^ (1 - spin); tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } } } @@ -630,20 +499,20 @@ firstprivate(i_max, dtmp_V) private(j) case Spin: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) +#pragma omp parallel for default(none) \ +shared(list_1, tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, spin) private(num1) for (j = 1; j <= i_max; j++) { num1 = ((list_1[j] & is1_up) / is1_up) ^ (1 - spin); tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) +#pragma omp parallel for default(none) \ +shared(tmp_v0, tmp_v1, list_1) firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } } @@ -654,8 +523,6 @@ firstprivate(i_max, dtmp_V) private(j) fprintf(stdoutMPI, cErrNoModel, X->Def.iCalcModel); return -1; } - dam_pr = SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; } /** @@ -673,14 +540,12 @@ firstprivate(i_max, dtmp_V) private(j) * @version 2.1 * @author Kazuyoshi Yoshimi (The University of Tokyo) */ - int SetDiagonalTETransfer ( long unsigned int isite1, double dtmp_V, long unsigned int spin, struct BindStruct *X, - int nstate, double complex *tmp_v0, double complex *tmp_v1 ) { @@ -689,7 +554,6 @@ int SetDiagonalTETransfer long unsigned int num1; long unsigned int isigma1 = spin; long unsigned int is1, ibit1; - double dam_pr = 0.0; long unsigned int j; long unsigned int i_max = X->Check.idim_max; @@ -734,11 +598,10 @@ int SetDiagonalTETransfer } /*switch (X->Def.iCalcModel)*/ if (num1 != 0) { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1)\ +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V) private(j) for (j = 1; j <= i_max; j++) { tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } } }/*if (isite1 >= X->Def.Nsite*/ @@ -751,13 +614,12 @@ int SetDiagonalTETransfer else { is1 = X->Def.Tpow[2 * isite1 - 1]; } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(list_1, tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { ibit1 = (j - 1) & is1; num1 = ibit1 / is1; tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } break; @@ -770,35 +632,33 @@ int SetDiagonalTETransfer else { is1 = X->Def.Tpow[2 * isite1 - 1]; } -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ +#pragma omp parallel for default(none) shared(list_1, tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, is1) private(num1, ibit1) for (j = 1; j <= i_max; j++) { ibit1 = list_1[j] & is1; num1 = ibit1 / is1; tmp_v0[j] += dtmp_V * num1*tmp_v1[j]; - dam_pr += dtmp_V * num1*conj(tmp_v1[j]) * tmp_v1[j]; } break; case SpinGC: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1) \ - firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) +#pragma omp parallel for default(none) \ +shared(list_1, tmp_v0, tmp_v1) \ +firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) for (j = 1; j <= i_max; j++) { ibit1_up = (((j - 1) & is1_up) / is1_up) ^ (1 - spin); tmp_v0[j] += dtmp_V * ibit1_up*tmp_v1[j]; - dam_pr += dtmp_V * ibit1_up*conj(tmp_v1[j]) * tmp_v1[j]; } } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(tmp_v0, tmp_v1) \ - firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) +#pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ +firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(j - 1, isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); if (num1 != 0) { tmp_v0[j] += dtmp_V * tmp_v1[j]; - dam_pr += dtmp_V * conj(tmp_v1[j]) * tmp_v1[j]; } } } @@ -807,21 +667,19 @@ int SetDiagonalTETransfer case Spin: if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ +#pragma omp parallel for default(none) shared(list_1, tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V, is1_up, spin) private(num1, ibit1_up) for (j = 1; j <= i_max; j++) { ibit1_up = ((list_1[j] & is1_up) / is1_up) ^ (1 - spin); tmp_v0[j] += dtmp_V * ibit1_up * tmp_v1[j]; - dam_pr += dtmp_V * ibit1_up * conj(tmp_v1[j]) * tmp_v1[j]; } } else { -#pragma omp parallel for default(none) reduction(+:dam_pr) shared(list_1, nstate, tmp_v0, tmp_v1)\ +#pragma omp parallel for default(none) shared(list_1, tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) for (j = 1; j <= i_max; j++) { num1 = BitCheckGeneral(list_1[j], isite1, isigma1, X->Def.SiteToBit, X->Def.Tpow); tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; - dam_pr += dtmp_V * num1 * conj(tmp_v1[j]) * tmp_v1[j]; } } break; @@ -831,8 +689,6 @@ int SetDiagonalTETransfer return -1; } } - dam_pr = SumMPI_dc(dam_pr); - X->Large.prdct += dam_pr; return 0; } /// @fn diagonalcalcForTE() Update the vector for diagonal operators ( using in Time Evolution mode). @@ -846,7 +702,6 @@ int diagonalcalcForTE ( const int _istep, struct BindStruct *X, - int nstate, double complex *tmp_v0, double complex *tmp_v1 ) { @@ -861,7 +716,7 @@ int diagonalcalcForTE isite1 = X->Def.TETransferDiagonal[_istep][i][0] + 1; A_spin = X->Def.TETransferDiagonal[_istep][i][1]; tmp_V = X->Def.ParaTETransferDiagonal[_istep][i]; - SetDiagonalTETransfer(isite1, tmp_V, A_spin, X, nstate, tmp_v0, tmp_v1); + SetDiagonalTETransfer(isite1, tmp_V, A_spin, X, tmp_v0, tmp_v1); } } else if (X->Def.NTEInterAllDiagonal[_istep] > 0) { @@ -873,7 +728,7 @@ int diagonalcalcForTE B_spin = X->Def.TEInterAllDiagonal[_istep][i][3]; tmp_V = X->Def.ParaTEInterAllDiagonal[_istep][i]; - if (SetDiagonalTEInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { + if (SetDiagonalTEInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X, tmp_v0, tmp_v1) != 0) { return -1; } } @@ -883,7 +738,7 @@ int diagonalcalcForTE isite1 = X->Def.TEChemi[_istep][i] + 1; A_spin = X->Def.SpinTEChemi[_istep][i]; tmp_V = -X->Def.ParaTEChemi[_istep][i]; - if (SetDiagonalTEChemi(isite1, A_spin, tmp_V, X, nstate, tmp_v0, tmp_v1) != 0) { + if (SetDiagonalTEChemi(isite1, A_spin, tmp_V, X, tmp_v0, tmp_v1) != 0) { return -1; } } @@ -2043,3 +1898,117 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) } return 0; } +/** + * @brief Calculate diagonal components and obtain the list, list_diagonal. + * + * @param X [in] Struct to get the information of the diagonal operators. + * + * @author Takahiro Misawa (The University of Tokyo) + * @author Kazuyoshi Yoshimi (The University of Tokyo) + * @retval -1 fail to calculate diagonal components. + * @retval 0 succeed to calculate diagonal components. + */ +int diagonalcalc +( + struct BindStruct *X +) { + + FILE *fp; + long unsigned int i, j; + long unsigned int isite1, isite2; + long unsigned int spin; + double tmp_V; + + /*[s] For InterAll*/ + long unsigned int A_spin, B_spin; + /*[e] For InterAll*/ + long unsigned int i_max = X->Check.idim_max; + + fprintf(stdoutMPI, "%s", cProStartCalcDiag); + TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcStart, "a"); + +#pragma omp parallel for default(none) private(j) shared(list_Diagonal) firstprivate(i_max) + for (j = 1; j <= i_max; j++) { + list_Diagonal[j] = 0.0; + } + + if (X->Def.NCoulombIntra > 0) { + if (childfopenMPI(cFileNameCheckCoulombIntra, "w", &fp) != 0) { + return -1; + } + for (i = 0; i < X->Def.NCoulombIntra; i++) { + isite1 = X->Def.CoulombIntra[i][0] + 1; + tmp_V = X->Def.ParaCoulombIntra[i]; + fprintf(fp, "i=%ld isite1=%ld tmp_V=%lf \n", i, isite1, tmp_V); + SetDiagonalCoulombIntra(isite1, tmp_V, X); + } + fclose(fp); + } + + if (X->Def.EDNChemi > 0) { + if (childfopenMPI(cFileNameCheckChemi, "w", &fp) != 0) { + return -1; + } + for (i = 0; i < X->Def.EDNChemi; i++) { + isite1 = X->Def.EDChemi[i] + 1; + spin = X->Def.EDSpinChemi[i]; + tmp_V = -X->Def.EDParaChemi[i]; + fprintf(fp, "i=%ld spin=%ld isite1=%ld tmp_V=%lf \n", i, spin, isite1, tmp_V); + if (SetDiagonalChemi(isite1, tmp_V, spin, X) != 0) { + return -1; + } + } + fclose(fp); + } + + if (X->Def.NCoulombInter > 0) { + if (childfopenMPI(cFileNameCheckInterU, "w", &fp) != 0) { + return -1; + } + for (i = 0; i < X->Def.NCoulombInter; i++) { + isite1 = X->Def.CoulombInter[i][0] + 1; + isite2 = X->Def.CoulombInter[i][1] + 1; + tmp_V = X->Def.ParaCoulombInter[i]; + fprintf(fp, "i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n", i, isite1, isite2, tmp_V); + if (SetDiagonalCoulombInter(isite1, isite2, tmp_V, X) != 0) { + return -1; + } + } + fclose(fp); + } + if (X->Def.NHundCoupling > 0) { + if (childfopenMPI(cFileNameCheckHund, "w", &fp) != 0) { + return -1; + } + for (i = 0; i < X->Def.NHundCoupling; i++) { + isite1 = X->Def.HundCoupling[i][0] + 1; + isite2 = X->Def.HundCoupling[i][1] + 1; + tmp_V = -X->Def.ParaHundCoupling[i]; + if (SetDiagonalHund(isite1, isite2, tmp_V, X) != 0) { + return -1; + } + fprintf(fp, "i=%ld isite1=%ld isite2=%ld tmp_V=%lf \n", i, isite1, isite2, tmp_V); + } + fclose(fp); + } + + if (X->Def.NInterAll_Diagonal > 0) { + if (childfopenMPI(cFileNameCheckInterAll, "w", &fp) != 0) { + return -1; + } + for (i = 0; i < X->Def.NInterAll_Diagonal; i++) { + isite1 = X->Def.InterAll_Diagonal[i][0] + 1; + A_spin = X->Def.InterAll_Diagonal[i][1]; + isite2 = X->Def.InterAll_Diagonal[i][2] + 1; + B_spin = X->Def.InterAll_Diagonal[i][3]; + tmp_V = X->Def.ParaInterAll_Diagonal[i]; + fprintf(fp, "i=%ld isite1=%ld A_spin=%ld isite2=%ld B_spin=%ld tmp_V=%lf \n", i, isite1, A_spin, isite2, B_spin, tmp_V); + SetDiagonalInterAll(isite1, isite2, A_spin, B_spin, tmp_V, X); + } + fclose(fp); + } + + TimeKeeper(X, cFileNameTimeKeep, cDiagonalCalcFinish, "a"); + fprintf(stdoutMPI, "%s", cProEndCalcDiag); + return 0; +} \ No newline at end of file diff --git a/src/eigenIO.c b/src/eigenIO.c index 2b14b4199..fa476a7fe 100644 --- a/src/eigenIO.c +++ b/src/eigenIO.c @@ -117,7 +117,6 @@ int InputRealEigenValue(int xNsize, double *ene, char *filename) { int InputCmpEigenValue(int xNsize, complex double *ene, char *filename) { FILE *fp = NULL; - int i = 0; fp = fopen(filename, "rb+"); if(fp == NULL) { diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index 1ce822f81..1e53a3cec 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -53,10 +53,10 @@ int expec_cisajs_HubbardGC( double complex **vec, double complex **prod ){ - long unsigned int i, j; + long unsigned int i; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - long int i_max; - long int ibit; + long unsigned int i_max; + long unsigned int ibit; long unsigned int is; double complex tmp_OneGreen = 1.0; @@ -127,11 +127,11 @@ int expec_cisajs_Hubbard( ) { long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; - long int i_max; + long unsigned int i_max; int num1, one = 1; - long int ibit; + long unsigned int ibit; long unsigned int is; - double complex tmp_OneGreen = 1.0; + double complex tmp_OneGreen = 1.0, dmv; i_max = X->Check.idim_max; for (i = 0; i < X->Def.NCisAjt; i++) { @@ -192,11 +192,12 @@ int expec_cisajs_Hubbard( is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; #pragma omp parallel for default(none) shared(list_1, vec,Xvec,nstate,one,tmp_OneGreen) \ -firstprivate(i_max, is) private(num1, ibit) +firstprivate(i_max, is) private(num1, ibit, dmv) for (j = 1; j <= i_max; j++) { ibit = list_1[j] & is; num1 = ibit / is; - zaxpy_(&nstate, &tmp_OneGreen, vec[j], &one, Xvec[j], &one); + dmv = (double complex)num1; + zaxpy_(&nstate, &dmv, vec[j], &one, Xvec[j], &one); } } else { @@ -227,7 +228,7 @@ int expec_cisajs_SpinHalf( long unsigned int isite1; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; double complex dmv; - long int i_max; + long unsigned int i_max; long int ibit1; long unsigned int is1_up; int one = 1; @@ -284,7 +285,7 @@ int expec_cisajs_SpinGeneral( long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; double complex dmv; - long int i_max; + long unsigned int i_max; int num1, one = 1; i_max = X->Check.idim_max; @@ -342,7 +343,7 @@ int expec_cisajs_SpinGCHalf( long unsigned int isite1; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; double complex dmv; - long int i_max; + long unsigned int i_max; int tmp_sgn, one = 1; long unsigned int tmp_off = 0; @@ -413,7 +414,7 @@ int expec_cisajs_SpinGCGeneral( long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; double complex dmv; - long int i_max; + long unsigned int i_max; long unsigned int tmp_off = 0; int num1, one = 1; @@ -544,7 +545,7 @@ int expec_cisajs( char sdt[D_FileNameMax]; double complex **prod; long unsigned int irght, ilft, ihfbit, ica; - long int i_max; + long unsigned int i_max; //For TPQ int step = 0, rand_i = 0, istate; diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index f588eb49b..17a698743 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -157,7 +157,7 @@ int expec_cisajscktalt_HubbardGC( long unsigned int tmp_off = 0; long unsigned int tmp_off_2 = 0; double complex tmp_V = 1.0 + 0.0*I; - long int i_max; + long unsigned int i_max; for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { zclear(X->Large.i_max*nstate, &Xvec[1][0]); @@ -276,7 +276,7 @@ int expec_cisajscktalt_Hubbard( long unsigned int tmp_off = 0; long unsigned int tmp_off_2 = 0; double complex tmp_V; - long int i_max; + long unsigned int i_max; for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { zclear(X->Large.i_max*nstate, &Xvec[1][0]); @@ -396,14 +396,12 @@ int expec_cisajscktalt_SpinHalf( long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; - long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; - long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; long unsigned int isA_up, isB_up; long unsigned int is1_up, is2_up; long unsigned int tmp_off = 0; int tmp_sgn, num1, num2, one = 1; double complex tmp_V; - long int i_max; + long unsigned int i_max; double complex dmv; i_max = X->Check.idim_max; @@ -411,14 +409,6 @@ int expec_cisajscktalt_SpinHalf( for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { zclear(i_max*nstate, &Xvec[1][0]); - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { //error message will be added zclear(nstate, prod[i]); @@ -525,14 +515,12 @@ int expec_cisajscktalt_SpinGeneral( long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; - long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; - long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; long unsigned int tmp_off = 0; long unsigned int tmp_off_2 = 0; long unsigned int list1_off = 0; int num1, one = 1; double complex tmp_V; - long int i_max; + long unsigned int i_max; int tmp_Sz; long unsigned int tmp_org = 0; i_max = X->Check.idim_max; @@ -540,14 +528,6 @@ int expec_cisajscktalt_SpinGeneral( for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { zclear(i_max*nstate, &Xvec[1][0]); - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { zclear(nstate, prod[i]); @@ -648,24 +628,14 @@ int expec_cisajscktalt_SpinGCHalf( long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; - long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; - long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; long unsigned int isA_up, isB_up; long unsigned int tmp_off = 0; double complex tmp_V; - long int i_max; + long unsigned int i_max; i_max = X->Check.idim_max; for (i = 0; i < X->Def.NCisAjtCkuAlvDC; i++) { zclear(i_max*nstate, &Xvec[1][0]); - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { //error message will be added @@ -713,7 +683,7 @@ int expec_cisajscktalt_SpinGCHalf( } else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { X_GC_child_CisAitCjuAju_spin_MPIsingle( - org_isite1 - 1, org_sigma1, org_sigma2, org_isite3 - 1, org_sigma3, + org_isite1 - 1, org_sigma2, org_isite3 - 1, org_sigma3, tmp_V, X, nstate, Xvec, vec); } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { @@ -784,26 +754,16 @@ int expec_cisajscktalt_SpinGCGeneral( long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_isite3, org_isite4; long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; - long unsigned int tmp_org_isite1, tmp_org_isite2, tmp_org_isite3, tmp_org_isite4; - long unsigned int tmp_org_sigma1, tmp_org_sigma2, tmp_org_sigma3, tmp_org_sigma4; long unsigned int tmp_off = 0; long unsigned int tmp_off_2 = 0; int num1, one = 1; double complex tmp_V; - long int i_max; + long unsigned int i_max; i_max = X->Check.idim_max; X->Large.mode = M_CORR; for(i=0;iDef.NCisAjtCkuAlvDC;i++){ zclear(i_max*nstate, &Xvec[1][0]); - tmp_org_isite1 = X->Def.CisAjtCkuAlvDC[i][0] + 1; - tmp_org_sigma1 = X->Def.CisAjtCkuAlvDC[i][1]; - tmp_org_isite2 = X->Def.CisAjtCkuAlvDC[i][2] + 1; - tmp_org_sigma2 = X->Def.CisAjtCkuAlvDC[i][3]; - tmp_org_isite3 = X->Def.CisAjtCkuAlvDC[i][4] + 1; - tmp_org_sigma3 = X->Def.CisAjtCkuAlvDC[i][5]; - tmp_org_isite4 = X->Def.CisAjtCkuAlvDC[i][6] + 1; - tmp_org_sigma4 = X->Def.CisAjtCkuAlvDC[i][7]; if (Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X) != 0) { //error message will be added diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index 3252a9331..bdd29e0fe 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -647,7 +647,7 @@ int expec_energy_flct_GeneralSpin( i_max = X->Check.idim_max; #pragma omp parallel default(none) shared(tmp_v0, list_1,Sz_t,Sz2_t,nstate) \ -firstprivate(i_max,X,myrank,mythread) private(j,Sz,isite1,tmp_v02, tmp_list1,istate) +firstprivate(i_max,X,myrank) private(j,Sz,isite1,tmp_v02, tmp_list1,istate,mythread) { tmp_v02 = d_1d_allocate(nstate); #ifdef _OPENMP @@ -724,7 +724,6 @@ int expec_energy_flct( long unsigned int irght, ilft, ihfbit; long unsigned int i_max; int istate; - double *energy_t, *var_t; switch (X->Def.iCalcType) { case TPQCalc: diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index 893552363..b6fb3b9fb 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -415,7 +415,6 @@ void totalspin_SpinGC( int num1_down, num2_down; int sigma_1, sigma_2; long unsigned int ibit1_up, ibit2_up, ibit_tmp, is_up; - double complex spn_z; double complex spn_z1, spn_z2; long unsigned int list_1_j; long unsigned int i_max; diff --git a/src/include/diagonalcalc.h b/src/include/diagonalcalc.h index a585f5b4b..1877e8094 100644 --- a/src/include/diagonalcalc.h +++ b/src/include/diagonalcalc.h @@ -21,52 +21,10 @@ int diagonalcalc struct BindStruct *X ); -int SetDiagonalCoulombIntra -( - long unsigned int isite1, - double dtmp_V, - struct BindStruct *X - ); - -int SetDiagonalChemi -( - long unsigned int isite1, - double dtmp_V, - long unsigned int spin, - struct BindStruct *X - ); - -int SetDiagonalCoulombInter -( - long unsigned int isite1, - long unsigned int isite2, - double dtmp_V, - struct BindStruct *X - ); - -int SetDiagonalHund -( - long unsigned int isite1, - long unsigned int isite2, - double dtmp_V, - struct BindStruct *X - ); - -int SetDiagonalInterAll -( - long unsigned int isite1, - long unsigned int isite2, - long unsigned int isigma1, - long unsigned int isigma2, - double dtmp_V, - struct BindStruct *X - ); - int diagonalcalcForTE( const int _istep, struct BindStruct *X, - int nstate, - double complex *tmp_v0, + double complex *tmp_v0, double complex *tmp_v1 ); diff --git a/src/include/mltplyMPISpinCore.h b/src/include/mltplyMPISpinCore.h index 734d2545e..ba27ca8b6 100644 --- a/src/include/mltplyMPISpinCore.h +++ b/src/include/mltplyMPISpinCore.h @@ -166,11 +166,9 @@ void X_child_CisAit_GeneralSpin_MPIdouble struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1, - double complex **tmp_v1buf, unsigned long int idim_max, long unsigned int *list_1_org, - long unsigned int *list_1buf_org, - long unsigned int _ihfbit + long unsigned int *list_1buf_org ); @@ -256,7 +254,6 @@ void X_GC_child_CisAisCjuAjv_spin_MPIsingle void X_GC_child_CisAitCjuAju_spin_MPIsingle ( int org_isite1, - int org_ispin1, int org_ispin2, int org_isite3, int org_ispin3, @@ -329,16 +326,11 @@ void X_child_CisAit_spin_MPIdouble struct BindStruct *X /**< [inout]*/, int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1, /**< [in] v0 = H v1*/ - double complex **tmp_v1buf, unsigned long int idim_max, - long unsigned int *Tpow, long unsigned int *list_1_org, long unsigned int *list_1buf_org, long unsigned int *list_2_1_target, - long unsigned int *list_2_2_target, - long unsigned int _irght, - long unsigned int _ilft, - long unsigned int _ihfbit + long unsigned int *list_2_2_target ); void X_child_CisAisCjuAju_GeneralSpin_MPIdouble diff --git a/src/include/struct.h b/src/include/struct.h index 5d5ce3ba5..d365d18a1 100644 --- a/src/include/struct.h +++ b/src/include/struct.h @@ -107,7 +107,7 @@ struct DefineList { malloc in setmem_def().\n Data Format [DefineList::NTransfer][4]: 0->site number i, 1-> spin index on i, 2-> site number j, 3-> spin index on j. */ - int **EDGeneralTransfer;/**<@brief Index of transfer integrals for calculation. + unsigned int **EDGeneralTransfer;/**<@brief Index of transfer integrals for calculation. malloc in setmem_def().\n Data Format [DefineList::NTransfer][4]: 0->site number i, 1-> spin index on i, 2-> site number j, 3-> spin index on j. */ double complex *ParaGeneralTransfer;/**<@brief Value of general transfer integrals by a def file. @@ -137,13 +137,13 @@ struct DefineList { malloc in setmem_def().*/ unsigned int NPairHopping;/**<@brief Number of pair-hopping term*/ - int **PairHopping;/**<@brief [DefineList::NPairHopping][2] Index of pair-hopping. + unsigned int **PairHopping;/**<@brief [DefineList::NPairHopping][2] Index of pair-hopping. malloc in setmem_def().*/ double *ParaPairHopping;/**<@brief [DefineList::NPairHopping] Coupling constant of pair-hopping term. malloc in setmem_def().*/ unsigned int NExchangeCoupling;/**<@brief Number of exchange term*/ - int **ExchangeCoupling;/**<@brief [DefineList::NExchangeCoupling][2] Index of exchange term. + unsigned int **ExchangeCoupling;/**<@brief [DefineList::NExchangeCoupling][2] Index of exchange term. malloc in setmem_def().*/ double *ParaExchangeCoupling;/**<@brief [DefineList::NExchangeCoupling] Coupling constant of exchange term. malloc in setmem_def().*/ @@ -158,7 +158,7 @@ struct DefineList { //[s] For InterAll int **InterAll;/**<@brief [DefineList::NinterAll][8] Interacted quartet*/ - int **InterAll_OffDiagonal;/**<@brief [DefineList::NinterAll_OffDiagonal][8] Interacted quartet*/ + unsigned int **InterAll_OffDiagonal;/**<@brief [DefineList::NinterAll_OffDiagonal][8] Interacted quartet*/ int **InterAll_Diagonal;/**<@brief [DefineList::NinterAll_Diagonal][4] Interacted quartet*/ unsigned int NInterAll;/**<@brief Total Number of Interacted quartet*/ unsigned int NInterAll_Diagonal;/**<@brief Number of interall term (diagonal)*/ @@ -313,7 +313,6 @@ struct CheckList { @brief For Matrix-Vector product */ struct LargeList { - double complex prdct;/**<@brief The expectation value of the energy.*/ int itr;/**<@brief Iteration number.*/ long int iv;/**<@brief Used for initializing vector.*/ long int i_max;/**<@brief Length of eigenvector*/ diff --git a/src/mltply.c b/src/mltply.c index 425af7b14..7ed03ea9c 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -66,7 +66,6 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp StartTimer(1); i_max = X->Check.idim_max; - X->Large.prdct = 0.0; if(i_max!=0){ if (X->Def.iFlgGeneralSpin == FALSE) { @@ -101,7 +100,7 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); } StopTimer(100); - if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, nstate, &tmp_v0[0][0], &tmp_v1[0][0]); + if (X->Def.iCalcType == TimeEvolution) diagonalcalcForTE(step_i, X, &tmp_v0[0][0], &tmp_v1[0][0]); switch (X->Def.iCalcModel) { case HubbardGC: @@ -126,7 +125,6 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp return -1; } - X->Large.prdct = SumMPI_dc(X->Large.prdct); StopTimer(1); return 0; } diff --git a/src/mltplyHubbardCore.c b/src/mltplyHubbardCore.c index 9429780e3..576c5127c 100644 --- a/src/mltplyHubbardCore.c +++ b/src/mltplyHubbardCore.c @@ -464,7 +464,6 @@ void child_exchange_element( long unsigned int ilft = X->Large.ilft; long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; - int mode = X->Large.mode; int one = 1; ibit1_up = list_1[j] & is1_up; @@ -517,7 +516,6 @@ void child_pairhopp_element( long unsigned int ilft = X->Large.ilft; long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; - int mode = X->Large.mode; int one = 1; ibit1_up = list_1[j] & is1_up; @@ -558,7 +556,6 @@ void GC_child_exchange_element( long unsigned int is2_down = X->Large.is2_down; long unsigned int list_1_j, list_1_off; double complex tmp_J = X->Large.tmp_J; - int mode = X->Large.mode; int one = 1; list_1_j = j - 1; @@ -608,7 +605,6 @@ void GC_child_pairhopp_element( long unsigned int is2_down = X->Large.is2_down; long unsigned int list_1_j, list_1_off; double complex tmp_J = X->Large.tmp_J; - int mode = X->Large.mode; int one = 1; list_1_j = j - 1; diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index 25d19835b..08d359296 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -54,8 +54,8 @@ void X_GC_child_general_hopp_MPIdouble( int nstate, double complex **tmp_v0,//!< [out] Result v0 = H v1 double complex **tmp_v1 //!< [in] v0 = H v1 ) { - int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; - unsigned long int idim_max_buf, j; + int mask1, mask2, state1, state2, origin, bitdiff, Fsgn; + unsigned long int idim_max_buf; double complex trans; mask1 = (int)X->Def.Tpow[2 * org_isite1 + org_ispin1]; @@ -104,7 +104,7 @@ void X_child_CisAjt_MPIdouble( long unsigned int *list_2_1_target,//!<[in] long unsigned int *list_2_2_target//!<[in] ) { - int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; + int mask1, mask2, state1, state2, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j, ioff; double complex trans; int one = 1; @@ -176,7 +176,7 @@ void X_GC_child_general_hopp_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ) { - int mask2, state1, state2, ierr, origin, bit2diff, Fsgn; + int mask2, state1, state2, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, mask1, state1check, bit1diff, ioff; double complex trans, dmv; int one = 1; @@ -264,7 +264,7 @@ void X_child_general_hopp_MPIdouble( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ) { - int mask1, mask2, state1, state2, ierr, origin, bitdiff, Fsgn; + int mask1, mask2, state1, state2, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j, ioff; double complex trans; int one = 1; @@ -337,7 +337,7 @@ void X_child_general_hopp_MPIsingle( int nstate, double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ) { - int mask2, state2, ierr, origin, bit2diff, Fsgn; + int mask2, state2, origin, bit2diff, Fsgn; unsigned long int mask1, state1, idim_max_buf, j, state1check, bit1diff, ioff, jreal; double complex trans, dmv; int one = 1; @@ -415,7 +415,7 @@ void X_child_CisAjt_MPIsingle( long unsigned int *list_2_1_target,//!<[in] ??? long unsigned int *list_2_2_target//!<[in] ??? ){ - int mask2, state2, ierr, origin, bit2diff, Fsgn; + int mask2, state2, origin, bit2diff, Fsgn; unsigned long int mask1, state1, idim_max_buf, j, state1check, bit1diff, ioff, jreal; double complex trans, dmv; int one = 1; diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 17224f9df..f9980e4a6 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -313,7 +313,7 @@ void X_GC_child_CisAjtCkuAku_Hubbard_MPI( ) { unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; - int iCheck, ierr, Fsgn; + int iCheck, Fsgn; unsigned long int isite1, isite2, isite3; unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4; unsigned long int j, Asum, Adiff; @@ -458,7 +458,7 @@ void X_GC_child_CisAjtCkuAlv_Hubbard_MPI( ) { unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; - int iCheck, ierr, Fsgn; + int iCheck, Fsgn; unsigned long int isite1, isite2, isite3, isite4; unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4; unsigned long int j, Adiff, Bdiff; @@ -703,7 +703,7 @@ void X_child_CisAjtCkuAlv_Hubbard_MPI( ) { unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf; - int iCheck, ierr, Fsgn; + int iCheck, Fsgn; unsigned long int isite1, isite2, isite3, isite4; unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4; unsigned long int j, Adiff, Bdiff; @@ -866,7 +866,7 @@ void X_child_CisAjtCkuAku_Hubbard_MPI( ) { unsigned long int i_max = X->Check.idim_max; unsigned long int idim_max_buf, ioff; - int iCheck, ierr, Fsgn; + int iCheck, Fsgn; unsigned long int isite1, isite2, isite3; unsigned long int tmp_isite1, tmp_isite2, tmp_isite3, tmp_isite4; unsigned long int j, Asum, Adiff; @@ -1047,8 +1047,8 @@ void X_GC_Cis_MPI( double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { - int mask2, state2, ierr, origin, bit2diff, Fsgn; - unsigned long int idim_max_buf, j; + int mask2, state2, origin, bit2diff, Fsgn; + unsigned long int idim_max_buf; double complex trans; // org_isite >= Nsite @@ -1095,8 +1095,8 @@ void X_GC_Ajt_MPI( double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { - int mask2, state2, ierr, origin, bit2diff, Fsgn; - unsigned long int idim_max_buf, j; + int mask2, state2, origin, bit2diff, Fsgn; + unsigned long int idim_max_buf; double complex trans; // org_isite >= Nsite @@ -1145,7 +1145,7 @@ void X_Cis_MPI( long unsigned int _ilft,//!<[in] Similer to LargeList::ilft long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit ) { - int mask2, state2, ierr, origin, bit2diff, Fsgn; + int mask2, state2, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, ioff; double complex trans; int one = 1; @@ -1207,7 +1207,7 @@ void X_Ajt_MPI( long unsigned int _ilft,//!<[in] Similer to LargeList::ilft long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit ){ - int mask2, state2, ierr, origin, bit2diff, Fsgn; + int mask2, state2, origin, bit2diff, Fsgn; unsigned long int idim_max_buf, j, ioff; double complex trans; int one = 1; diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index 693a3ba28..c571909cf 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -40,10 +40,6 @@ void child_general_int_spin_MPIdouble( (int)X->Def.InterAll_OffDiagonal[i_int][3], (int)X->Def.InterAll_OffDiagonal[i_int][4], (int)X->Def.InterAll_OffDiagonal[i_int][5], (int)X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - /** - Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ - to LargeList::prdct - */ }/*void child_general_int_spin_MPIdouble*/ /** @brief Exchange term in Spin model @@ -62,7 +58,7 @@ void X_child_general_int_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - int mask1, mask2, state1, state2, ierr, origin; + int mask1, mask2, state1, state2, origin; unsigned long int idim_max_buf, j, ioff; double complex Jint; int one = 1; @@ -109,7 +105,7 @@ void X_child_general_int_spin_TotalS_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ){ - int mask1, mask2, num1_up, num2_up, ierr, origin, one = 1; + int mask1, mask2, num1_up, num2_up, origin, one = 1; unsigned long int idim_max_buf, j, ioff, ibit_tmp; double complex dmv; @@ -154,10 +150,6 @@ void child_general_int_spin_MPIsingle( (int)X->Def.InterAll_OffDiagonal[i_int][3], (int)X->Def.InterAll_OffDiagonal[i_int][4], (int)X->Def.InterAll_OffDiagonal[i_int][5], (int)X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); - /** - Add @f$\langle v_1| H_{\rm this} | v_1 \rangle@f$ - to LargeList::prdct - */ }/*void child_general_int_spin_MPIsingle*/ /* @brief General interaction term of canonical spin system. @@ -175,7 +167,7 @@ void X_child_general_int_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - int mask2, state2, ierr, origin; + int mask2, state2, origin; unsigned long int mask1, idim_max_buf, j, ioff, state1, jreal, state1check; double complex Jint; int one = 1; diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index ce250636b..394c06e0b 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -104,8 +104,8 @@ void X_GC_child_CisAitCiuAiv_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - int mask1, mask2, state1, state2, ierr, origin; - unsigned long int idim_max_buf, j; + int mask1, mask2, state1, state2, origin; + unsigned long int idim_max_buf; double complex Jint; mask1 = (int)X->Def.Tpow[org_isite1]; @@ -176,9 +176,9 @@ void X_GC_child_CisAisCjuAjv_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - int mask1, mask2, state2, ierr; + int mask1, mask2, state2; long int origin, num1; - unsigned long int idim_max_buf, j; + unsigned long int idim_max_buf; double complex Jint; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4) {//CisAisCitAis @@ -239,9 +239,9 @@ void X_GC_child_CisAitCjuAju_spin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - int mask1, mask2, state1, ierr, num1; + int mask1, mask2, state1, num1; long int origin; - unsigned long int idim_max_buf, j; + unsigned long int idim_max_buf; double complex Jint; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) {//cisaitcisais @@ -382,7 +382,7 @@ void X_GC_child_CisAitCiuAiv_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - int mask2, state2, ierr, origin; + int mask2, state2, origin; unsigned long int mask1, idim_max_buf, j, ioff, state1, state1check; double complex Jint; int one = 1; @@ -458,7 +458,7 @@ void X_GC_child_CisAisCjuAjv_spin_MPIsingle( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - int mask2, state2, ierr, origin; + int mask2, state2, origin; unsigned long int mask1, idim_max_buf, j, state1, state1check; double complex Jint; int one = 1; @@ -513,7 +513,7 @@ void GC_child_CisAitCjuAju_spin_MPIsingle( double complex **tmp_v1//!<[in] v0 = H v1 ){ X_GC_child_CisAitCjuAju_spin_MPIsingle( - X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], + X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); }/*void GC_child_CisAisCjuAjv_spin_MPIsingle*/ @@ -524,7 +524,6 @@ void GC_child_CisAitCjuAju_spin_MPIsingle( */ void X_GC_child_CisAitCjuAju_spin_MPIsingle( int org_isite1,//!<[in] Site 1 - int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 int org_isite3,//!<[in] Site 3 int org_ispin3,//!<[in] Spin 3 @@ -586,10 +585,9 @@ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - unsigned long int off, j; - int origin, ierr; + unsigned long int off; + int origin; double complex tmp_V; - int ihermite = TRUE, one = 1; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4) {//cisaisciuais=0 && cisaiucisais=0 return; } @@ -632,8 +630,8 @@ void X_GC_child_CisAitCjuAju_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - unsigned long int j, off; - int origin, ierr, one = 1; + unsigned long int off; + int origin; double complex tmp_V; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) {//cisaitcisais=0 && cisaiscitais=0 @@ -680,8 +678,8 @@ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { - unsigned long int tmp_off, off, j; - int origin, ierr, ihermite, one = 1; + unsigned long int tmp_off, off; + int origin, ihermite; double complex tmp_V; ihermite = TRUE; @@ -740,9 +738,8 @@ void X_GC_child_CisAisCjuAju_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { - unsigned long int j, num1; + unsigned long int num1; double complex tmp_V; - int one = 1; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); @@ -770,8 +767,8 @@ void X_GC_child_CisAit_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { - unsigned long int off, j; - int origin, ierr, one = 1; + unsigned long int off; + int origin; double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int) myrank, org_isite1 + 1, org_ispin1, org_ispin2, @@ -804,7 +801,7 @@ void X_GC_child_CisAis_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { - unsigned long int j, num1; + unsigned long int num1; double complex tmp_V; num1 = BitCheckGeneral((unsigned long int) myrank, @@ -828,9 +825,8 @@ void X_GC_child_AisCis_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { - unsigned long int j, num1; + unsigned long int num1; double complex tmp_V; - int one = 1; num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); @@ -851,17 +847,16 @@ void X_child_CisAit_GeneralSpin_MPIdouble( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + int nstate, + double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[in] Input wavefunction - double complex **tmp_v1buf,//!<[inout] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max long unsigned int *list_1_org,//!<[in] Similar to ::list_1 - long unsigned int *list_1buf_org,//!<[in] Similar to ::list_1buf - long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit + long unsigned int *list_1buf_org//!<[in] Similar to ::list_1buf ) { unsigned long int off, j, tmp_off,idim_max_buf; - int origin, ierr, one = 1; + int origin, one = 1; double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int) myrank, org_isite1 + 1, org_ispin1, org_ispin2, @@ -906,7 +901,7 @@ void X_GC_child_CisAisCjuAjv_GeneralSpin_MPIsingle( double complex **tmp_v1//!<[in] Input wavefunction ){ unsigned long int off, j, num1; - int origin, ierr, isite, IniSpin, one = 1; + int origin, isite, IniSpin, one = 1; double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int)myrank, @@ -1009,7 +1004,7 @@ void X_GC_child_CisAitCjuAjv_GeneralSpin_MPIsingle( double complex **tmp_v1//!<[in] Input wavefunction ){ unsigned long int off, j; - int origin, ierr, isite, IniSpin, FinSpin, one = 1; + int origin, isite, IniSpin, FinSpin, one = 1; double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int)myrank, @@ -1105,7 +1100,7 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIdouble( double complex **tmp_v1//!<[in] Input wavefunction ){ unsigned long int tmp_off, off, j, idim_max_buf; - int origin, ierr, one = 1; + int origin, one = 1; double complex tmp_V; int ihermite=TRUE; @@ -1168,9 +1163,8 @@ void X_child_CisAisCjuAju_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[in] Input wavefunction ) { - unsigned long int j, num1; + unsigned long int num1; double complex tmp_V; - int one = 1; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin3) { num1 = BitCheckGeneral((unsigned long int) myrank, org_isite1 + 1, org_ispin1, X->Def.SiteToBit, X->Def.Tpow); @@ -1255,7 +1249,7 @@ void X_child_CisAitCjuAjv_GeneralSpin_MPIsingle( double complex **tmp_v1//!<[in] Input wavefunction ){ unsigned long int tmp_off, off, j, idim_max_buf; - int origin, ierr, isite, IniSpin, FinSpin, one = 1; + int origin, isite, IniSpin, FinSpin, one = 1; double complex tmp_V; if (GetOffCompGeneralSpin((unsigned long int)myrank, @@ -1314,8 +1308,8 @@ void X_GC_child_CisAit_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/) { - int mask1, state1, ierr, origin; - unsigned long int idim_max_buf, j; + int mask1, state1, origin; + unsigned long int idim_max_buf; double complex trans; mask1 = (int)X->Def.Tpow[org_isite1]; @@ -1352,18 +1346,13 @@ void X_child_CisAit_spin_MPIdouble( struct BindStruct *X /**< [inout]*/, int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1, /**< [in] v0 = H v1*/ - double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max - long unsigned int *Tpow,//!<[in] Similar to DefineList::Tpow long unsigned int *list_1_org,//!<[in] Similar to ::list_1 long unsigned int *list_1buf_org,//!<[in] Similar to ::list_1buf long unsigned int *list_2_1_target,//!<[in] Similar to ::list_2_1 - long unsigned int *list_2_2_target,//!<[in] Similar to ::list_2_2 - long unsigned int _irght,//!<[in] Similer to LargeList::irght - long unsigned int _ilft,//!<[in] Similer to LargeList::ilft - long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit + long unsigned int *list_2_2_target//!<[in] Similar to ::list_2_2 ){ - int mask1, state1, ierr, origin, one = 1; + int mask1, state1, origin, one = 1; unsigned long int idim_max_buf, j; unsigned long int tmp_off; double complex trans; @@ -1406,7 +1395,6 @@ void X_GC_child_CisAis_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ - long unsigned int j; int mask1, ibit1; mask1 = (int)X->Def.Tpow[org_isite1]; ibit1 = (((unsigned long int)myrank& mask1)/mask1)^(1-org_ispin1); @@ -1426,7 +1414,6 @@ void X_GC_child_AisCis_spin_MPIdouble( int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ - long unsigned int j; int mask1; int ibit1; mask1 = (int)X->Def.Tpow[org_isite1]; diff --git a/src/mltplySpinCore.c b/src/mltplySpinCore.c index d46f774f5..a54a8a8d8 100644 --- a/src/mltplySpinCore.c +++ b/src/mltplySpinCore.c @@ -283,7 +283,6 @@ void child_exchange_spin_element( long unsigned int ilft = X->Large.ilft; long unsigned int ihfbit = X->Large.ihfbit; double complex tmp_J = X->Large.tmp_J; - int mode = X->Large.mode; long unsigned int ibit_tmp; int one = 1; @@ -312,7 +311,6 @@ void GC_child_exchange_spin_element( ) { long unsigned int is_up = X->Large.isA_spin; double complex tmp_J = X->Large.tmp_J; - int mode = X->Large.mode; long unsigned int list_1_j, list_1_off; int one = 1; @@ -343,7 +341,6 @@ void GC_child_pairlift_spin_element( ) { long unsigned int is_up = X->Large.isA_spin; double complex tmp_J = X->Large.tmp_J; - int mode = X->Large.mode; int one = 1; long unsigned int list_1_off; long unsigned int list_1_j = j - 1; diff --git a/src/phys.c b/src/phys.c index 26545db5e..023c3ee80 100644 --- a/src/phys.c +++ b/src/phys.c @@ -48,12 +48,15 @@ void phys(struct BindStruct *X, //!<[inout] unsigned long int neig //!<[in] ) { - long unsigned int i, j, i_max; + long unsigned int i; double tmp_N; - i_max = X->Check.idim_max; #ifdef _SCALAPACK double complex *vec_tmp; int ictxt, ierr, rank; + long unsigned int j, i_max; + + i_max = X->Check.idim_max; + if(use_scalapack){ fprintf(stdoutMPI, "In scalapack fulldiag, total spin is not calculated !\n"); vec_tmp = malloc(i_max*sizeof(double complex)); diff --git a/src/readdef.c b/src/readdef.c index 4490ac8cc..161c0c67f 100644 --- a/src/readdef.c +++ b/src/readdef.c @@ -986,7 +986,7 @@ int ReadDefFileIdxPara( char defname[D_FileNameMaxReadDef]; char ctmp[D_CharTmpReadDef], ctmp2[256]; - unsigned int i,j, idx, itype; + unsigned int i, idx, itype; int xitmp[8]; int iKWidx=0; int iboolLoc=0; @@ -1004,7 +1004,6 @@ int ReadDefFileIdxPara( int icnt_trans=0; int iflg_trans=0; int icnt_interall=0; - int iflg_interall=0; unsigned int iloop=0; @@ -1503,7 +1502,7 @@ int ReadDefFileIdxPara( if(X->NLaser>0){ //printf("Read Start\n"); while(fgetsMPI(ctmp2, 256, fp) != NULL){ - sscanf(ctmp2, "%s %lf\n", &(ctmp), &(X->ParaLaser[idx])); + sscanf(ctmp2, "%s %lf\n", ctmp, &(X->ParaLaser[idx])); //printf("[%d]:%f\n",idx,X->ParaLaser[idx]); idx++; } @@ -2465,10 +2464,7 @@ void SetConvergenceFactor //In future, convergence facator can be set by a def file. int neps = -8; int nepsCG =-8; - int nbisec =-14; int nEnergy = -12; - int nShiftBeta=8; - int nepsvec12=-14; eps=pow(10.0, neps); eps_CG=pow(10.0, nepsCG); eps_Lanczos = pow(10,-X->LanczosEps); diff --git a/src/wrapperMPI.c b/src/wrapperMPI.c index 814d1810e..0ac592b5e 100644 --- a/src/wrapperMPI.c +++ b/src/wrapperMPI.c @@ -349,7 +349,6 @@ void NormMPI_dv( double complex **_v1,//!<[in] [idim] vector to be producted double *dnorm ) { - double complex cdnorm = 0; unsigned long int idim; int istate; @@ -500,6 +499,7 @@ unsigned long int SendRecv_i( ierr = MPI_Sendrecv(&isend, 1, MPI_UNSIGNED_LONG, origin, 0, &ircv, 1, MPI_UNSIGNED_LONG, origin, 0, MPI_COMM_WORLD, &statusMPI); + if (ierr != 0) exitMPI(ierr); return ircv; #else return isend; diff --git a/src/xsetmem.c b/src/xsetmem.c index 2dd40f336..f7204dc9c 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -29,9 +29,6 @@ * @author Kazuyoshi Yoshimi (The University of Tokyo) */ - -static unsigned long int mfint[7];/*for malloc*/ - /// /// \brief Set size of memories headers of output files. /// \param X [out] BindStruct to get headers of files.\n @@ -56,9 +53,6 @@ void setmem_def struct BindStruct *X, struct BoostList *xBoost ) { - unsigned long int i = 0; - unsigned long int j = 0; - unsigned long int k = 0; X->Def.Tpow = lui_1d_allocate(2 * X->Def.Nsite + 2); X->Def.OrgTpow = lui_1d_allocate(2 * X->Def.Nsite + 2); X->Def.SiteToBit = li_1d_allocate(X->Def.Nsite + 1); @@ -111,7 +105,6 @@ void setmem_def X->Def.ParaLaser = d_1d_allocate(X->Def.NLaser); - unsigned int ipivot, iarrayJ, ispin; xBoost->list_6spin_star = i_2d_allocate(xBoost->R0 * xBoost->num_pivot, 7); xBoost->list_6spin_pair = i_3d_allocate(xBoost->R0 * xBoost->num_pivot, 7, 15); xBoost->arrayJ = cd_3d_allocate(xBoost->NumarrayJ, 3, 3); diff --git a/tool/CMakeLists.txt b/tool/CMakeLists.txt index 21c0939d2..fd2ad6b49 100644 --- a/tool/CMakeLists.txt +++ b/tool/CMakeLists.txt @@ -9,9 +9,11 @@ endif(${CMAKE_PROJECT_NAME} STREQUAL "Project") add_library(key2lower STATIC key2lower.c) add_executable(greenr2k greenr2k.F90) add_executable(cTPQ cTPQ.F90) +add_executable(dynamicalr2k dynamicalr2k.F90) target_link_libraries(greenr2k key2lower ${LAPACK_LIBRARIES}) +target_link_libraries(dynamicalr2k key2lower ${LAPACK_LIBRARIES}) -install(TARGETS greenr2k RUNTIME DESTINATION bin) +install(TARGETS greenr2k dynamicalr2k RUNTIME DESTINATION bin) # # Scripts # diff --git a/tool/dynamicalr2k.F90 b/tool/dynamicalr2k.F90 new file mode 100644 index 000000000..9975670f8 --- /dev/null +++ b/tool/dynamicalr2k.F90 @@ -0,0 +1,511 @@ +MODULE fourier_val + ! + IMPLICIT NONE + ! + INTEGER,SAVE :: & + & nomega, & + & nk_line, & ! Numberof along each k line + & nnode, & ! Number of node of k-path + & nr, & ! Number of R-vector + & norb, & ! Number of orbitals per unit cell + & box(3,3), & ! Supercell index + & nsite, & ! Number of sites + & ncor, & ! Nomber of Correlation function + & nk ! Number of k to be computed + ! + REAL(8),SAVE :: & + & omegamin, & + & omegamax, & + & recipr(3,3) ! Reciprocal lattice vector + ! + CHARACTER(256),SAVE :: & + & filehead, & ! Filename header for correlation functions + & file_gindx ! Filename for index of Correlation + ! + INTEGER,ALLOCATABLE,SAVE :: & + & nreq(:), & ! (nr) Number of equivalent R-vector for each R + & irv(:,:,:), & ! (3,125,nr) R-vector + & rindx(:), & ! (nsite) Index of R + & orb(:), & ! (nsite) Index of orbital + & indx(:,:) ! (nr,norb) Mapping index for each Correlation function + ! + REAL(8),ALLOCATABLE,SAVE :: & + & knode(:,:), & ! (3,nnode) Nodes of k path + & phase(:,:), & ! (125,nr) Boundary phase + & kvec(:,:) ! (3,nk) k-vector in the 1st BZ + ! + COMPLEX(8),ALLOCATABLE,SAVE :: & + & cor(:,:,:), & ! (nr,norb,nomega) Correlation function in real space + & cor_k(:,:,:) ! (nk,norb,nomega) Correlation function in the k-space + ! + CHARACTER(256),ALLOCATABLE :: & + & kname(:) ! (nnode) Label of k-point node + ! +END MODULE fourier_val +! +! +! +MODULE fourier_routine + ! + IMPLICIT NONE + ! + INTERFACE + SUBROUTINE key2lower(key) BIND(c) + USE,INTRINSIC :: iso_c_binding + CHARACTER(KIND=C_CHAR) :: key(*) + END SUBROUTINE key2lower + END INTERFACE + ! +CONTAINS +! +! Read from HPhi/mVMC input files +! +SUBROUTINE read_filename() + ! + USE fourier_val, ONLY : file_gindx, filehead, nsite, omegamin, omegamax, nomega + IMPLICIT NONE + ! + INTEGER :: fi = 10 + CHARACTER(256) :: modpara, keyname, namelist + ! + WRITE(*,*) + WRITE(*,*) "##### Read HPhi Input Files #####" + WRITE(*,*) + ! + namelist = "" + CALL GETARG(1, namelist) + ! + ! Read from NameList file + ! + OPEN(fi,file = TRIM(namelist)) + ! + DO + READ(fi,*,END=10) keyname + BACKSPACE(fi) + CALL key2lower(keyname) + ! + IF(TRIM(ADJUSTL(keyname)) == "singleexcitation") THEN + READ(fi,*) keyname, file_gindx + ELSE IF(TRIM(ADJUSTL(keyname)) == "pairexcitation") THEN + READ(fi,*) keyname, file_gindx + ELSE IF(TRIM(ADJUSTL(keyname)) == "modpara") THEN + READ(fi,*) keyname, modpara + ELSE + READ(fi,*) keyname + END IF + END DO + ! +10 CONTINUE + WRITE(*,*) " Read from ", TRIM(namelist) + CLOSE(FI) + ! + WRITE(*,*) " Excitation Index file : ", TRIM(ADJUSTL(file_gindx)) + WRITE(*,*) " ModPara file : ", TRIM(ADJUSTL(modpara)) + ! + ! Read from Modpara file + ! + OPEN(fi,file = TRIM(modpara)) + ! + DO + READ(fi,*,END=20) keyname + BACKSPACE(fi) + CALL key2lower(keyname) + ! + IF(TRIM(ADJUSTL(keyname)) == "nsite") THEN + READ(fi,*) keyname, nsite + ELSE IF(TRIM(ADJUSTL(keyname)) == "cdatafilehead") THEN + READ(fi,*) keyname, filehead + ELSE IF(TRIM(ADJUSTL(keyname)) == "nomega") THEN + READ(fi,*) keyname, nomega + ELSE IF(TRIM(ADJUSTL(keyname)) == "omegamax") THEN + READ(fi,*) keyname, omegamax + ELSE IF(TRIM(ADJUSTL(keyname)) == "omegamin") THEN + READ(fi,*) keyname, omegamin + ELSE + READ(fi,*) keyname + END IF + END DO + ! +20 CONTINUE + WRITE(*,*) " Read from ", TRIM(modpara) + WRITE(*,*) " FileHead : ", TRIM(ADJUSTL(filehead)) + WRITE(*,*) " Number of site : ", nsite + WRITE(*,*) " Number of omega : ", nomega + WRITE(*,*) " Minimum Omega : ", omegamin + WRITE(*,*) " Maximum Omega : ", omegamax + CLOSE(FI) + ! + filehead = "output/" // TRIM(ADJUSTL(filehead)) + ! +END SUBROUTINE read_filename +! +! Read geometry from file +! +SUBROUTINE read_geometry() + ! + USE fourier_val, ONLY : recipr, box, nsite, phase, irv, rindx, orb, & + & nr, nreq, norb, nnode, knode, nk_line, kname + IMPLICIT NONE + ! + INTEGER :: fi = 10, isite, ii, ir, ipiv(3), irv0(3), i1, i2, i3, inode + REAL(8) :: phase0(3), work(10), direct(3,3), rrv(3), lenrv, lenrv0 + CHARACTER(256) :: filename + INTEGER,ALLOCATABLE :: irv1(:,:) + ! + WRITE(*,*) + WRITE(*,*) "##### Read Geometry Input File #####" + WRITE(*,*) + ! + ! "nd argument should be geometry file name + ! + CALL GETARG(2, filename) + ! + WRITE(*,*) " Read from ", TRIM(filename) + OPEN(fi, file = TRIM(filename)) + ! + ! Direct lattice vector in arbitraly unit + ! + DO ii = 1, 3 + READ(fi,*) direct(1:3,ii) + END DO + WRITE(*,*) " Direct LATTICE VECTOR :" + WRITE(*,'(4x3f15.10)') direct(1:3, 1:3) + ! + ! Bondary phase + ! + READ(fi,*) phase0(1:3) + WRITE(*,*) " Boundary phase[degree] : " + WRITE(*,'(4x3f15.10)') phase0(1:3) + phase0(1:3) = phase0(1:3) * ACOS(-1.0d0) / 180.0d0 + ! + ! Supercell index (a0w, a0l, a1w, a1l) + ! + DO ii = 1, 3 + READ(fi,*) box(1:3,ii) + END DO + WRITE(*,*) " Supercell Index :" + WRITE(*,'(3i8)') box(1:3, 1:3) + ! + ! R-vector and orbital index + ! + ALLOCATE(irv1(3,nsite), orb(nsite), rindx(nsite)) + ! + nr = 0 + DO isite = 1, nsite + READ(fi,*) irv0(1:3), orb(isite) + DO ir = 1, nr + IF(ALL(irv1(1:3,ir) == irv0(1:3))) THEN + rindx(isite) = ir + GOTO 10 + END IF + END DO + nr = nr + 1 + irv1(1:3, nr) = irv0(1:3) + rindx(isite) = nr + ! +10 CONTINUE + ! + END DO + orb(1:nsite) = orb(1:nsite) + 1 + norb = MAXVAL(orb) + WRITE(*,*) " Number of orbitals :", norb + ! + ! k-point + ! + READ(fi,*) nnode, nk_line + ALLOCATE(knode(3,nnode), kname(nnode)) + WRITE(*,*) " Number of k-node, and k-points along lines :", nnode, nk_line + WRITE(*,*) " k-node :" + DO inode = 1, nnode + READ(fi,*) kname(inode), knode(1:3,inode) + WRITE(*,'(a,a,3f10.5)') " ", TRIM(kname(inode)), knode(1:3,inode) + END DO + ! + CLOSE(fi) + ! + ! Compute Reciprocal Lattice Vector + ! + recipr(1:3,1:3) = direct(1:3,1:3) + CALL dgetrf(3, 3, recipr, 3, Ipiv, ii) + CALL dgetri(3, recipr, 3, ipiv, work, 10, ii) + WRITE(*,*) " Reciplocal lattice vector :" + WRITE(*,'(4x3f15.10)') recipr(1:3, 1:3) + ! + ! Move original R-vector to the nearest one with periodic boundary cond. + ! + ALLOCATE(nreq(nsite), irv(3,125,nsite), phase(125,nsite)) + ! + WRITE(*,*) " Number of R-vector :", nr + DO ir = 1, nr + lenrv0 = 1.0d10 + DO i1 = -2, 2 + DO i2 = -2, 2 + DO i3 = -2, 2 + ! + irv0(1:3) = irv1(1:3,ir) + MATMUL(box(1:3,1:3), (/i1,i2,i3/)) + rrv(1:3) = MATMUL(direct(1:3,1:3), DBLE(irv0(1:3))) + lenrv = SQRT(DOT_PRODUCT(rrv, rrv)) + IF(lenrv < lenrv0 - 1.0d-6) THEN + lenrv0 = lenrv + nreq(ir) = 1 + ELSE IF(ABS(lenrv - lenrv0) < 1.0d-6) THEN + nreq(ir) = nreq(ir) + 1 + ELSE + CYCLE + END IF + ! + irv(1:3, nreq(ir), ir) = irv0(1:3) + phase(nreq(ir), ir) = DOT_PRODUCT(DBLE((/i1,i2,i3/)), phase0(1:3)) + ! + END DO ! i3 = -2, 2 + END DO ! i2 = -2, 2 + END DO ! i1 = -2, 2 + ! + DO i1 = 1, nreq(ir) + WRITE(*,'(3i5,f7.2,a)',advance="no") irv(1:3, i1, ir), phase(i1, ir), ", " + END DO + WRITE(*,*) + ! + END DO ! ir = 1, nr + ! + DEALLOCATE(irv1) + ! +END SUBROUTINE read_geometry +! +! Set k points +! +SUBROUTINE set_kpoints() + ! + USE fourier_val, ONLY : nk, kvec, nnode, nk_line, knode + ! + IMPLICIT NONE + ! + INTEGER :: inode, ik + REAL(8) :: xx + ! + nk = nk_line * (nnode - 1) + 1 + WRITE(*,*) " Number of k : ", nk + ALLOCATE(kvec(3,nk)) + ! + kvec(1:3,1) = knode(1:3,1) + nk = 1 + DO inode = 1, nnode - 1 + DO ik = 1, nk_line + xx = DBLE(ik) / DBLE(nk_line) + nk = nk + 1 + kvec(1:3,nk) = (1d0 - xx)*knode(1:3,inode) + xx*knode(1:3,inode+1) + END DO + END DO + ! +END SUBROUTINE set_kpoints +! +! Read Correlation Function +! +SUBROUTINE read_corrindx() + ! + USE fourier_val, ONLY : file_gindx, ncor, indx, nr, rindx, orb, norb + IMPLICIT NONE + ! + INTEGER :: fi = 10, itmp, icor, nops, iops + CHARACTER(100) :: ctmp + ! + WRITE(*,*) + WRITE(*,*) "##### Read Correlation Index File #####" + WRITE(*,*) + ! + ALLOCATE(indx(nr,norb)) + indx(1:nr,1:norb) = 0 + ! + ! Read index for the One-Body Correlation + ! + OPEN(fi, file = TRIM(file_gindx)) + WRITE(*,*) " Read from ", TRIM(file_gindx) + READ(fi,*) ctmp + READ(fi,*) ctmp, ncor + ncor = ncor - 1 + IF(ncor /= nr*norb) STOP "Number of correlation and NR*Norb is different." + READ(fi,*) ctmp + READ(fi,*) ctmp + READ(fi,*) ctmp + WRITE(*,*) " Number of Correlation Function : ", ncor + ! + ! Search mapping index for up-up and down-down correlation + ! + READ(fi,*) nops + DO iops = 1, nops + READ(fi,*) itmp + END DO + ! + DO icor = 1, ncor + READ(fi,*) nops + DO iops = 1, nops + READ(fi,*) itmp + END DO + indx(rindx(itmp + 1), orb(itmp + 1)) = icor + END DO + ! + WRITE(*,*) " Number of Index : ", COUNT(indx(1:nr, 1:norb) /= 0) + ! + CLOSE(fi) + ! +END SUBROUTINE read_corrindx +! +! Read Correlation Function +! +SUBROUTINE read_corrfile() + ! + USE fourier_val, ONLY : filehead, ncor, indx, cor, norb, nr, nomega + IMPLICIT NONE + ! + INTEGER :: fi = 10, icor, iorb, ir, iomega + COMPLEX(8),ALLOCATABLE :: cor0(:,:) + REAL(8) :: dtmp(4) + ! + ALLOCATE(cor(nr,norb,nomega)) + ALLOCATE(cor0(nomega,ncor)) + cor(1:nr,1:norb,1:nomega) = CMPLX(0d0, 0d0, KIND(1d0)) + ! + OPEN(fi, file = TRIM(filehead) // "_DynamicalGreen.dat") + ! + DO icor = 1, ncor + DO iomega = 1, nomega + READ(fi,*) dtmp(1:4) + cor0(iomega,icor) = CMPLX(dtmp(3), dtmp(4), KIND(1d0)) + END DO + END DO + ! + CLOSE(fi) + ! + ! Map it into Up-Up(1) and Down-Down(2) Correlation + ! + DO iorb = 1, norb + DO ir = 1, nr + cor(ir, iorb, 1:nomega) = cor0(1:nomega, indx(ir, iorb)) + END DO + END DO + ! + DEALLOCATE(cor0, indx) + ! +END SUBROUTINE read_corrfile +! +! Fourier transformation +! +SUBROUTINE fourier_cor() + ! + USE fourier_val, ONLY : cor, cor_k, kvec, nk, nr, nreq, norb, irv, phase, nomega + IMPLICIT NONE + ! + INTEGER :: ik, ir, ireq + REAL(8) :: tpi = 2.0 * ACOS(-1d0), theta + COMPLEX(8),ALLOCATABLE :: fmat(:,:) + ! + ALLOCATE(fmat(nk,nr), cor_k(nk,norb,nomega)) + ! + ! Matirx for Fourier trans. exp(-i k R) + ! + DO ik = 1, nk + DO ir = 1, nr + fmat(ik,ir) = CMPLX(0d0, 0d0, KIND(1d0)) + DO ireq = 1, nreq(ir) + theta = - tpi * DOT_PRODUCT(kvec(1:3,ik), DBLE(irv(1:3,ireq,ir))) & + & + tpi * phase(ireq,ir) + fmat(ik,ir) = fmat(ik,ir) + CMPLX(COS(theta), SIN(theta), KIND(1d0)) + END DO + fmat(ik,ir) = fmat(ik,ir) / DBLE(nreq(ir)) + END DO ! ir = 1, nr + END DO ! ik = 1, nk + ! + CALL zgemm('N', 'N', nk, norb*nomega, nr, CMPLX(1d0, 0d0, KIND(1d0)), fmat, nk, & + & cor, nr, CMPLX(0d0,0d0,KIND(1d0)), cor_k, nk) + ! + DEALLOCATE(fmat, cor) + ! +END SUBROUTINE fourier_cor +! +! Output Fourier component of Correlation function +! +SUBROUTINE output_cor() + ! + USE fourier_val, ONLY : cor_k, nk, nnode, knode, nk_line, kname, norb, & + & recipr, filehead, nomega, omegamin, omegamax + IMPLICIT NONE + ! + INTEGER :: fo = 20, ik, inode, ikk, iomega + REAL(8) :: dk(3), dk_cart(3), xk(nk), & + & xk_label(nnode), klength, omega + ! + ! Compute x-position for plotting band + ! + xk(1) = 0.0 + ikk = 1 + DO inode = 1, nnode - 1 + dk(1:3) = knode(1:3, inode+1) - knode(1:3, inode) + dk_cart(1:3) = MATMUL(recipr(1:3,1:3), dk(1:3)) + klength = SQRT(DOT_PRODUCT(dk_cart, dk_cart)) / DBLE(nk_line) + xk_label(inode) = xk(ikk) + DO ik = 1, nk_line + xk(ikk+1) = xk(ikk) + klength + ikk = ikk + 1 + END DO + END DO + xk_label(nnode) = xk(ikk) + ! + ! Output Correlation function in the 1st BZ + ! + WRITE(*,*) + WRITE(*,*) "##### Output Files #####" + WRITE(*,*) + ! + WRITE(*,*) " Correlation in k-space : ", TRIM(filehead) // "_dyn.dat" + ! + OPEN(fo, file = TRIM(filehead) // "_dyn.dat") + ! + DO iomega = 1, nomega + omega = (omegamax - omegamin) * DBLE(iomega - 1) / DBLE(nomega) + omegamin + DO ik = 1, ikk + WRITE(fo,'(1000e15.5)') xk(ik), omega, cor_k(ik, 1:norb, iomega) + END DO + WRITE(fo,*) + END DO + ! + CLOSE(fo) + ! + OPEN(fo, file = "kpath.gp") + ! + WRITE(fo,'(a)',advance="no") "set xtics (" + DO inode = 1, nnode - 1 + WRITE(fo,'(a,a,a,f10.5,a)',advance="no") "'", TRIM(kname(inode)), "' ", xk_label(inode), ", " + END DO + WRITE(fo,'(a,a,a,f10.5,a)') "'", TRIM(kname(nnode)), "' ", xk_label(nnode), ")" + WRITE(fo,'(a)') "set ylabel 'Frequency'" + WRITE(fo,'(a)') "set grid xtics lt 1 lc 0" + ! + CLOSE(fo) + ! + DEALLOCATE(cor_k) + ! +END SUBROUTINE output_cor +! +END MODULE fourier_routine +! +! Main routine +! +PROGRAM dynamicalr2k + ! + USE fourier_routine, ONLY : read_filename, read_geometry, set_kpoints, & + & read_corrindx, read_corrfile, fourier_cor, output_cor + IMPLICIT NONE + ! + CALL read_filename() + CALL read_geometry() + CALL set_kpoints() + CALL read_corrindx() + CALL read_corrfile() + CALL fourier_cor() + CALL output_cor() + ! + WRITE(*,*) + WRITE(*,*) "##### Done #####" + WRITE(*,*) + ! +END PROGRAM dynamicalr2k diff --git a/tool/key2lower.c b/tool/key2lower.c index 9258b70a4..f416eade1 100644 --- a/tool/key2lower.c +++ b/tool/key2lower.c @@ -2,6 +2,6 @@ #include void key2lower(char *key){ - int ii; + unsigned int ii; for (ii = 0; ii < strlen(key); ii++) key[ii] = tolower(key[ii]); } From 7082cf6111dd11016c45204bbdf11743e10a88e0 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sat, 23 Mar 2019 01:28:15 +0900 Subject: [PATCH 18/50] Backup --- src/CalcSpectrum.c | 31 ++++++++- src/CalcSpectrumByBiCG.c | 7 +- src/CalcSpectrumByFullDiag.c | 3 - src/HPhiMain.c | 7 +- src/StdFace/StdFace_main.c | 121 ++++++++++++++++++++++++---------- src/include/DefCommon.h | 1 + test/spectrum_spin_kagome.sh | 57 +++++++++++----- test/spectrum_spingc_honey.sh | 65 +++++++++++++----- 8 files changed, 212 insertions(+), 80 deletions(-) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 1eafb32c6..774ac324f 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -22,7 +22,7 @@ #include "PairEx.h" #include "wrapperMPI.h" #include "FileIO.h" -#include "./common/setmemory.h" +#include "common/setmemory.h" #include "readdef.h" #include "sz.h" #include "check.h" @@ -430,6 +430,32 @@ int CalcSpectrum( double complex *dcomega; size_t byte_size; + if (X->Bind.Def.iFlgCalcSpec == CALCSPEC_SCRATCH) { + free_lui_1d_allocate(list_1); + free_lui_1d_allocate(list_2_1); + free_lui_1d_allocate(list_2_2); + free_d_1d_allocate(list_Diagonal); + free_cd_2d_allocate(v0); + v1Org = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); + for (i = 1; i <= X->Bind.Check.idim_max; i++) v1Org[i][0] = v1[i][0]; + free_cd_2d_allocate(v1); +#ifdef MPI + free_lui_1d_allocate(list_1buf); + free_cd_2d_allocate(v1buf); +#endif // MPI + free_d_1d_allocate(X->Bind.Phys.num_down); + free_d_1d_allocate(X->Bind.Phys.num_up); + free_d_1d_allocate(X->Bind.Phys.num); + free_d_1d_allocate(X->Bind.Phys.num2); + free_d_1d_allocate(X->Bind.Phys.energy); + free_d_1d_allocate(X->Bind.Phys.var); + free_d_1d_allocate(X->Bind.Phys.doublon); + free_d_1d_allocate(X->Bind.Phys.doublon2); + free_d_1d_allocate(X->Bind.Phys.Sz); + free_d_1d_allocate(X->Bind.Phys.Sz2); + free_d_1d_allocate(X->Bind.Phys.s2); + }/*if (X->Bind.Def.iFlgCalcSpec == CALCSPEC_SCRATCH)*/ + //set omega if (SetOmega(&(X->Bind.Def)) != TRUE) { fprintf(stderr, "Error: Fail to set Omega.\n"); @@ -479,14 +505,13 @@ int CalcSpectrum( return FALSE; } X->Bind.Def.iFlagListModified = iFlagListModified; - - v1Org = cd_2d_allocate(X->Bind.Check.idim_maxOrg + 1,1); //Make excited state StartTimer(6100); if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || (X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC && X->Bind.Def.iCalcType == CG)) { + v1Org = cd_2d_allocate(X->Bind.Check.idim_maxOrg + 1, 1); //input eigen vector StartTimer(6101); fprintf(stdoutMPI, " Start: An Eigenvector is inputted in CalcSpectrum.\n"); diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index ff8feba2f..a5cc7702a 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -118,7 +118,8 @@ write @f$\alpha, \beta@f$, projected residual for restart */ int OutputTMComponents_BiCG( struct EDMainCalStruct *X,//!<[inout] - int liLanczosStp//!<[in] the BiCG step + int liLanczosStp,//!<[in] the BiCG step + int nL ) { char sdt[D_FileNameMax]; @@ -128,7 +129,7 @@ int OutputTMComponents_BiCG( alphaCG = (double complex*)malloc(liLanczosStp * sizeof(double complex)); betaCG = (double complex*)malloc(liLanczosStp * sizeof(double complex)); - res_save = (double complex*)malloc(liLanczosStp * sizeof(double complex)); + res_save = (double complex*)malloc(liLanczosStp*nL * sizeof(double complex)); komega_bicg_getcoef(alphaCG, betaCG, &z_seed, res_save); @@ -297,7 +298,7 @@ int CalcSpectrumByBiCG(
      • Save @f$\alpha, \beta@f$, projected residual
      • */ if (X->Bind.Def.iFlgCalcSpec != RECALC_FROM_TMComponents) - OutputTMComponents_BiCG(X, abs(status[0])); + OutputTMComponents_BiCG(X, abs(status[0]), NdcSpectrum); /**
      • output vectors for recalculation
      diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index 7fdd4136b..85b9210ae 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -27,9 +27,6 @@ full-diagonalization method. #include "CalcTime.h" #include "common/setmemory.h" #include "CalcSpectrum.h" - -void zcopy_(int *n, double complex *x, int *incx, double complex *y, int *incy); -void zdotc_(double complex *xy, int *n, double complex *x, int *incx, double complex *y, int *incy); /** @brief Compute the Green function with the Lehmann representation and FD @f[ diff --git a/src/HPhiMain.c b/src/HPhiMain.c index 0659b4cd7..84faa2153 100644 --- a/src/HPhiMain.c +++ b/src/HPhiMain.c @@ -30,6 +30,7 @@ #include "wrapperMPI.h" #include "splash.h" #include "CalcTime.h" +#include "common/setmemory.h" /*! @mainpage @@ -252,7 +253,8 @@ int main(int argc, char* argv[]){ } //Start Calculation - if(X.Bind.Def.iFlgCalcSpec == CALCSPEC_NOT) { + if(X.Bind.Def.iFlgCalcSpec == CALCSPEC_NOT || + X.Bind.Def.iFlgCalcSpec == CALCSPEC_SCRATCH) { if(check(&(X.Bind))==MPIFALSE){ exitMPI(-1); @@ -317,7 +319,8 @@ int main(int argc, char* argv[]){ exitMPI(-3); } } - else{ + + if(X.Bind.Def.iFlgCalcSpec != CALCSPEC_NOT){ StartTimer(6000); if (CalcSpectrum(&X) != TRUE) { StopTimer(6000); diff --git a/src/StdFace/StdFace_main.c b/src/StdFace/StdFace_main.c index 83ecdcfc1..28a8a3b1e 100644 --- a/src/StdFace/StdFace_main.c +++ b/src/StdFace/StdFace_main.c @@ -204,6 +204,7 @@ static void PrintCalcMod(struct StdIntList *StdI) else if (strcmp(StdI->CalcSpec, "restart_in") == 0) iCalcSpec = 4; else if (strcmp(StdI->CalcSpec, "restartsave") == 0 || strcmp(StdI->CalcSpec, "restart") == 0) iCalcSpec = 5; + else if (strcmp(StdI->CalcSpec, "scratch") == 0) iCalcSpec = 6; else { fprintf(stdout, "\n ERROR ! CalcSpec : %s\n", StdI->CalcSpec); StdFace_exit(-1); @@ -214,7 +215,7 @@ static void PrintCalcMod(struct StdIntList *StdI) fprintf(fp, "#CalcType = 0:Lanczos, 1:TPQCalc, 2:FullDiag, 3:CG, 4:Time-evolution\n"); fprintf(fp, "#CalcModel = 0:Hubbard, 1:Spin, 2:Kondo, 3:HubbardGC, 4:SpinGC, 5:KondoGC\n"); fprintf(fp, "#Restart = 0:None, 1:Save, 2:Restart&Save, 3:Restart\n"); - fprintf(fp, "#CalcSpec = 0:None, 1:Normal, 2:No H*Phi, 3:Save, 4:Restart, 5:Restart&Save\n"); + fprintf(fp, "#CalcSpec = 0:None, 1:Normal, 2:No H*Phi, 3:Save, 4:Restart, 5:Restart&Save, 6:Scratch\n"); fprintf(fp, "CalcType %3d\n", iCalcType); fprintf(fp, "CalcModel %3d\n", iCalcModel); fprintf(fp, "ReStart %3d\n", iRestart); @@ -233,7 +234,7 @@ static void PrintCalcMod(struct StdIntList *StdI) */ static void PrintExcitation(struct StdIntList *StdI) { FILE *fp; - int NumOp, **spin, isite, ispin, icell, itau, iEx; + int NumOp, **spin, isite, ispin, icell, itau, iEx, lR; double *coef, Cphase, S, Sz; double *fourier_r, *fourier_i; @@ -279,10 +280,12 @@ static void PrintExcitation(struct StdIntList *StdI) { spin[1][1] = 1; } StdI->SpectrumBody = 2; + lR = 0; } else { fprintf(stdout, " SpectrumType = %s\n", StdI->SpectrumType); - if (strcmp(StdI->SpectrumType, "szsz") == 0) { + if (strcmp(StdI->SpectrumType, "szsz") == 0 || + strcmp(StdI->SpectrumType, "szsz_r") == 0) { if (strcmp(StdI->model, "spin") == 0) { NumOp = StdI->S2 + 1; for (ispin = 0; ispin <= StdI->S2; ispin++) { @@ -301,9 +304,12 @@ static void PrintExcitation(struct StdIntList *StdI) { spin[1][0] = 1; spin[1][1] = 1; } + if (strcmp(StdI->SpectrumType, "szsz") == 0) lR = 0; + else lR = 1; StdI->SpectrumBody = 2; } - else if (strcmp(StdI->SpectrumType, "s+s-") == 0) { + else if (strcmp(StdI->SpectrumType, "s+s-") == 0 || + strcmp(StdI->SpectrumType, "s+s-_r") == 0) { if (strcmp(StdI->model, "spin") == 0 && StdI->S2 > 1) { NumOp = StdI->S2; S = (double)StdI->S2 * 0.5; @@ -320,9 +326,12 @@ static void PrintExcitation(struct StdIntList *StdI) { spin[0][0] = 0; spin[0][1] = 1; } + if (strcmp(StdI->SpectrumType, "s+s-") == 0) lR = 0; + else lR = 1; StdI->SpectrumBody = 2; } - else if (strcmp(StdI->SpectrumType, "density") == 0) { + else if (strcmp(StdI->SpectrumType, "density") == 0 || + strcmp(StdI->SpectrumType, "density_r") == 0) { NumOp = 2; coef[0] = 1.0; coef[1] = 1.0; @@ -330,18 +339,26 @@ static void PrintExcitation(struct StdIntList *StdI) { spin[0][1] = 0; spin[1][0] = 1; spin[1][1] = 1; + if (strcmp(StdI->SpectrumType, "density") == 0) lR = 0; + else lR = 1; StdI->SpectrumBody = 2; } - else if (strcmp(StdI->SpectrumType, "up") == 0) { + else if (strcmp(StdI->SpectrumType, "up") == 0 || + strcmp(StdI->SpectrumType, "up_r") == 0) { NumOp = 1; coef[0] = 1.0; spin[0][0] = 0; + if (strcmp(StdI->SpectrumType, "up") == 0) lR = 0; + else lR = 1; StdI->SpectrumBody = 1; } - else if (strcmp(StdI->SpectrumType, "down") == 0) { + else if (strcmp(StdI->SpectrumType, "down") == 0 || + strcmp(StdI->SpectrumType, "down_r") == 0) { NumOp = 1; coef[0] = 1.0; spin[0][0] = 1; + if (strcmp(StdI->SpectrumType, "down") == 0) lR = 0; + else lR = 1; StdI->SpectrumBody = 1; } else { @@ -371,49 +388,87 @@ static void PrintExcitation(struct StdIntList *StdI) { if (StdI->SpectrumBody == 1) { fp = fopen("single.def", "w"); fprintf(fp, "=============================================\n"); - fprintf(fp, "NSingle %d\n", 2); + if (lR == 0) fprintf(fp, "NSingle %d\n", 2); + else fprintf(fp, "NSingle %d\n", 1+ StdI->nsite); fprintf(fp, "=============================================\n"); fprintf(fp, "============== Single Excitation ============\n"); fprintf(fp, "=============================================\n"); - if (strcmp(StdI->model, "kondo") == 0) { - for (iEx = 0; iEx < 2; iEx++) { - fprintf(fp, "%d\n", StdI->nsite / 2 * NumOp); - for (isite = StdI->nsite / 2; isite < StdI->nsite; isite++) { - fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], - fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); - }/*for (isite = 0; isite < StdI->nsite; isite++)*/ - }/*for (iEx = 0; iEx < 2; iEx++)*/ - }/*if (strcmp(StdI->model, "kondo") == 0)*/ + if (lR == 0) { + if (strcmp(StdI->model, "kondo") == 0) { + for (iEx = 0; iEx < 2; iEx++) { + fprintf(fp, "%d\n", StdI->nsite / 2 * NumOp); + for (isite = StdI->nsite / 2; isite < StdI->nsite; isite++) { + fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], + fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); + }/*for (isite = 0; isite < StdI->nsite; isite++)*/ + }/*for (iEx = 0; iEx < 2; iEx++)*/ + }/*if (strcmp(StdI->model, "kondo") == 0)*/ + else { + for (iEx = 0; iEx < 2; iEx++) { + fprintf(fp, "%d\n", StdI->nsite * NumOp); + for (isite = 0; isite < StdI->nsite; isite++) { + fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], + fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); + }/*for (isite = 0; isite < StdI->nsite; isite++)*/ + }/*for (iEx = 0; iEx < 2; iEx++)*/ + } + }/*if (lR == 0)*/ else { - for (iEx = 0; iEx < 2; iEx++) { - fprintf(fp, "%d\n", StdI->nsite * NumOp); + if (strcmp(StdI->model, "kondo") == 0) { + fprintf(fp, "%d\n", NumOp); + fprintf(fp, "%d %d 0 %25.15f 0.0\n", StdI->nsite / 2, spin[0][0], coef[0]); + for (isite = StdI->nsite / 2; isite < StdI->nsite; isite++) { + fprintf(fp, "%d\n", NumOp); + fprintf(fp, "%d %d 0 %25.15f 0.0\n", isite, spin[0][0], coef[0]); + } + } + else { + fprintf(fp, "%d\n", NumOp); + fprintf(fp, "%d %d 0 %25.15f 0.0\n", 0, spin[0][0], coef[0]); for (isite = 0; isite < StdI->nsite; isite++) { - fprintf(fp, "%d %d 0 %25.15f %25.15f\n", isite, spin[0][0], - fourier_r[isite] * coef[0], fourier_i[isite] * coef[0]); - }/*for (isite = 0; isite < StdI->nsite; isite++)*/ - }/*for (iEx = 0; iEx < 2; iEx++)*/ - } + fprintf(fp, "%d\n", NumOp); + fprintf(fp, "%d %d 0 %25.15f 0.0\n", isite, spin[0][0], coef[0]); + } + } + }/*if (lR != 0)*/ fprintf(stdout, " single.def is written.\n\n"); - } + }/*if (StdI->SpectrumBody == 1)*/ else { fp = fopen("pair.def", "w"); fprintf(fp, "=============================================\n"); - fprintf(fp, "NPair %d\n", 2); + if (lR == 0) fprintf(fp, "NPair %d\n", 2); + else fprintf(fp, "NSingle %d\n", 1 + StdI->nsite); fprintf(fp, "=============================================\n"); fprintf(fp, "=============== Pair Excitation =============\n"); fprintf(fp, "=============================================\n"); - for (iEx = 0; iEx < 2; iEx++) { - fprintf(fp, "%d\n", StdI->nsite * NumOp); + if (lR == 0) { + for (iEx = 0; iEx < 2; iEx++) { + fprintf(fp, "%d\n", StdI->nsite * NumOp); + for (isite = 0; isite < StdI->nsite; isite++) { + for (ispin = 0; ispin < NumOp; ispin++) { + fprintf(fp, "%d %d %d %d 1 %25.15f %25.15f\n", + isite, spin[ispin][0], isite, spin[ispin][1], + fourier_r[isite] * coef[ispin], fourier_i[isite] * coef[ispin]); + } + } + }/*for (iEx = 0; iEx < 2; iEx++)*/ + }/*if (lR == 0)*/ + else { + fprintf(fp, "%d\n", NumOp); + for (ispin = 0; ispin < NumOp; ispin++) { + fprintf(fp, "%d %d %d %d 1 %25.15f 0.0\n", + 0, spin[ispin][0], 0, spin[ispin][1], coef[ispin]); + } for (isite = 0; isite < StdI->nsite; isite++) { + fprintf(fp, "%d\n", NumOp); for (ispin = 0; ispin < NumOp; ispin++) { - fprintf(fp, "%d %d %d %d 1 %25.15f %25.15f\n", - isite, spin[ispin][0], isite, spin[ispin][1], - fourier_r[isite] * coef[ispin], fourier_i[isite] * coef[ispin]); + fprintf(fp, "%d %d %d %d 1 %25.15f 0.0\n", + isite, spin[ispin][0], isite, spin[ispin][1], coef[ispin]); } } - }/*for (iEx = 0; iEx < 2; iEx++)*/ + }/*if (lR != 0)*/ fprintf(stdout, " pair.def is written.\n\n"); - } + }/*if (StdI->SpectrumBody == 2)*/ fflush(fp); fclose(fp); diff --git a/src/include/DefCommon.h b/src/include/DefCommon.h index 2514f58cc..a58b7a2f0 100644 --- a/src/include/DefCommon.h +++ b/src/include/DefCommon.h @@ -60,6 +60,7 @@ #define RECALC_OUTPUT_TMComponents_VEC 3 #define RECALC_FROM_TMComponents_VEC 4 #define RECALC_INOUT_TMComponents_VEC 5 +#define CALCSPEC_SCRATCH 6 /*!< ReStartVector */ #define NUM_RESTART 4 diff --git a/test/spectrum_spin_kagome.sh b/test/spectrum_spin_kagome.sh index 06e0ddc4f..6ce503b4c 100755 --- a/test/spectrum_spin_kagome.sh +++ b/test/spectrum_spin_kagome.sh @@ -3,9 +3,9 @@ mkdir -p spectrum_spin_kagome/ cd spectrum_spin_kagome # -# Ground state +# Sz-Sz spectrum # -cat > stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in < stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in < Date: Sat, 23 Mar 2019 22:48:22 +0900 Subject: [PATCH 19/50] Backup --- test/spectrum_genspin_ladder.sh | 33 ++++++----- test/spectrum_genspingc_ladder.sh | 32 ++++++----- test/spectrum_hubbard_square.sh | 87 +++++++++++++++++++++-------- test/spectrum_hubbardgc_tri.sh | 91 +++++++++++++++++++++++-------- test/spectrum_kondo_chain.sh | 83 ++++++++++++++++++++-------- test/spectrum_kondogc_chain.sh | 83 ++++++++++++++++++++-------- 6 files changed, 283 insertions(+), 126 deletions(-) diff --git a/test/spectrum_genspin_ladder.sh b/test/spectrum_genspin_ladder.sh index dd7e303b8..17d6bdec7 100755 --- a/test/spectrum_genspin_ladder.sh +++ b/test/spectrum_genspin_ladder.sh @@ -3,9 +3,9 @@ mkdir -p spectrum_genspin_ladder/ cd spectrum_genspin_ladder # -# Ground state +# Sz-Sz spectrum # -cat > stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in < stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in < stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in < stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in < stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in < stan1.in < stan2.in <> stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in <> stan2.in < stan2.in < Date: Mon, 25 Mar 2019 01:15:59 +0900 Subject: [PATCH 20/50] BugFix in TPQ --- src/CalcByTPQ.c | 44 ++++++++++-------------- src/FirstMultiply.c | 83 ++++++++++++++++++++++++++------------------- 2 files changed, 66 insertions(+), 61 deletions(-) diff --git a/src/CalcByTPQ.c b/src/CalcByTPQ.c index ac38a1f48..982788692 100644 --- a/src/CalcByTPQ.c +++ b/src/CalcByTPQ.c @@ -23,6 +23,7 @@ #include "wrapperMPI.h" #include "CalcTime.h" #include "common/setmemory.h" +#include "mltplyCommon.h" /** * @file CalcByTPQ.c * @version 0.1, 0.2 @@ -65,6 +66,8 @@ int CalcByTPQ( step_spin = ExpecInterval; X->Bind.Def.St = 0; fprintf(stdoutMPI, "%s", cLogTPQ_Start); + global_norm = d_1d_allocate(NumAve); + global_1st_norm = d_1d_allocate(NumAve); //for rand_i =0, rand_iBind.Def.istep = 0; - StartTimer(3300); - iret = expec_cisajs(&(X->Bind), NumAve, v0, v1); - StopTimer(3300); - if (iret != 0) return -1; - - StartTimer(3400); - iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v0, v1); - StopTimer(3400); - if (iret != 0) return -1; - /**@brief Compute v1=0, and compute v0 = H*v1 */ @@ -238,6 +226,20 @@ int CalcByTPQ( Multiply(&(X->Bind)); StopTimer(3500); + if (step_i%step_spin == 0) { + StartTimer(3300); + zclear(NumAve*X->Bind.Check.idim_max, &v1[1][0]); + iret = expec_cisajs(&(X->Bind), NumAve, v1, v0); + StopTimer(3300); + if (iret != 0) return -1; + + StartTimer(3400); + zclear(NumAve*X->Bind.Check.idim_max, &v1[1][0]); + iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v1, v0); + StopTimer(3400); + if (iret != 0) return -1; + } + StartTimer(3200); iret = expec_energy_flct(&(X->Bind), NumAve, v0, v1); StopTimer(3200); @@ -273,18 +275,6 @@ int CalcByTPQ( else return -1; }/*for (rand_i = 0; rand_i < NumAve; rand_i++)*/ StopTimer(3600); - - if (step_i%step_spin == 0) { - StartTimer(3300); - iret = expec_cisajs(&(X->Bind), NumAve, v0, v1); - StopTimer(3300); - if (iret != 0) return -1; - - StartTimer(3400); - iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v0, v1); - StopTimer(3400); - if (iret != 0) return -1; - } }/*for (step_i = X->Bind.Def.istep; step_i < X->Bind.Def.Lanczos_max; step_i++)*/ if (X->Bind.Def.iReStart == RESTART_OUT || X->Bind.Def.iReStart == RESTART_INOUT) { diff --git a/src/FirstMultiply.c b/src/FirstMultiply.c index ff7f69a46..0df7065ac 100644 --- a/src/FirstMultiply.c +++ b/src/FirstMultiply.c @@ -18,7 +18,9 @@ #include "common/setmemory.h" #include "wrapperMPI.h" #include "CalcTime.h" - +#include "mltplyCommon.h" +#include "expec_cisajs.h" +#include "expec_cisajscktaltdc.h" /** * @file FirstMultiply.c * @author Takahiro Misawa (The University of Tokyo) @@ -44,22 +46,21 @@ int FirstMultiply(struct BindStruct *X) { double Ns; long unsigned int u_long_i; dsfmt_t dsfmt; - int mythread, rand_i; + int mythread, rand_i, iret; Ns = 1.0*X->Def.NsiteMPI; i_max = X->Check.idim_max; for (rand_i = 0; rand_i < NumAve; rand_i++) { #pragma omp parallel default(none) private(i, mythread, u_long_i, dsfmt) \ - shared(v0, v1, nthreads, myrank, rand_i, X, stdoutMPI, cLogCheckInitComplex, cLogCheckInitReal) \ - firstprivate(i_max) +shared(v0, v1, nthreads, myrank, rand_i, X, stdoutMPI, cLogCheckInitComplex, cLogCheckInitReal) \ +firstprivate(i_max) { #pragma omp for for (i = 1; i <= i_max; i++) { v0[i][rand_i] = 0.0; v1[i][rand_i] = 0.0; } - /* Initialise MT */ @@ -77,35 +78,49 @@ int FirstMultiply(struct BindStruct *X) { #pragma omp for for (i = 1; i <= i_max; i++) v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; - }/*if (X->Def.iInitialVecType == 0)*/ - else { + }/*if (X->Def.iInitialVecType == 0)*/ + else { #pragma omp for - for (i = 1; i <= i_max; i++) - v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); - } - StopTimer(3101); - - }/*#pragma omp parallel*/ - /* + for (i = 1; i <= i_max; i++) + v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); + } + StopTimer(3101); + }/*#pragma omp parallel*/ + /* Normalize v - */ - dnorm=0.0; + */ + dnorm = 0.0; #pragma omp parallel for default(none) private(i) shared(v1, i_max, rand_i) reduction(+: dnorm) - for(i=1;i<=i_max;i++){ - dnorm += conj(v1[i][rand_i])*v1[i][rand_i]; - } - dnorm = SumMPI_dc(dnorm); - dnorm=sqrt(dnorm); - global_1st_norm[rand_i] = dnorm; -#pragma omp parallel for default(none) private(i) shared(v0,v1,rand_i) firstprivate(i_max, dnorm) - for(i=1;i<=i_max;i++){ - v1[i][rand_i] = v1[i][rand_i] / dnorm; - v0[i][rand_i] = v1[i][rand_i]; - } + for (i = 1; i <= i_max; i++) { + dnorm += conj(v1[i][rand_i])*v1[i][rand_i]; + } + dnorm = SumMPI_dc(dnorm); + dnorm = sqrt(dnorm); + global_1st_norm[rand_i] = dnorm; +#pragma omp parallel for default(none) private(i) shared(v1,rand_i) firstprivate(i_max, dnorm) + for (i = 1; i <= i_max; i++) v1[i][rand_i] = v1[i][rand_i] / dnorm; }/*for (rand_i = 0; rand_i < NumAve; rand_i++)*/ TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStep, "a", rand_i, step_i); - + /**@brief +Compute expectation value at infinite temperature +*/ + X->Def.istep = 0; + StartTimer(3300); + zclear(NumAve*i_max, &v0[1][0]); + iret = expec_cisajs(X, NumAve, v0, v1); + StopTimer(3300); + if (iret != 0) return -1; + + StartTimer(3400); + zclear(NumAve*i_max, &v0[1][0]); + iret = expec_cisajscktaltdc(X, NumAve, v0, v1); + StopTimer(3400); + if (iret != 0) return -1; + +#pragma omp parallel for default(none) private(i,rand_i) shared(v0,v1,i_max,NumAve) + for (i = 1; i <= i_max; i++) + for (rand_i = 0; rand_i < NumAve; rand_i++) v0[i][rand_i] = v1[i][rand_i]; StartTimer(3102); if(expec_energy_flct(X, NumAve, v0, v1) !=0){ StopTimer(3102); @@ -114,13 +129,15 @@ int FirstMultiply(struct BindStruct *X) { StopTimer(3102); for (rand_i = 0; rand_i < NumAve; rand_i++) { -#pragma omp parallel for default(none) private(i) shared(v0, v1, list_1,rand_i) firstprivate(i_max, Ns, LargeValue, myrank) +#pragma omp parallel for default(none) private(i) shared(v0, v1, list_1,rand_i) \ +firstprivate(i_max, Ns, LargeValue, myrank) for (i = 1; i <= i_max; i++) { v0[i][rand_i] = LargeValue * v1[i][rand_i] - v0[i][rand_i] / Ns; } dnorm = 0.0; -#pragma omp parallel for default(none) private(i) shared(v0,rand_i) firstprivate(i_max) reduction(+: dnorm) +#pragma omp parallel for default(none) private(i) shared(v0,rand_i) \ +firstprivate(i_max) reduction(+: dnorm) for (i = 1; i <= i_max; i++) { dnorm += conj(v0[i][rand_i])*v0[i][rand_i]; } @@ -128,10 +145,8 @@ int FirstMultiply(struct BindStruct *X) { dnorm = sqrt(dnorm); global_norm[rand_i] = dnorm; #pragma omp parallel for default(none) private(i) shared(v0,rand_i) firstprivate(i_max, dnorm) - for (i = 1; i <= i_max; i++) { - v0[i][rand_i] = v0[i][rand_i] / dnorm; - } - } + for (i = 1; i <= i_max; i++) v0[i][rand_i] = v0[i][rand_i] / dnorm; + }/*for (rand_i = 0; rand_i < NumAve; rand_i++)*/ TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStepEnd, "a", rand_i, step_i); return 0; } From c5351d73327632e4c17519f47c284e8cf411040d Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Tue, 26 Mar 2019 01:35:30 +0900 Subject: [PATCH 21/50] Backup --- src/CalcByTPQ.c | 2 - src/CalcSpectrum.c | 8 +- src/CalcSpectrumByBiCG.c | 2 +- src/FirstMultiply.c | 2 - src/SingleExHubbard.c | 2 +- src/expec_cisajs.c | 14 +- src/expec_cisajscktaltdc.c | 6 - src/mltply.c | 46 +- src/mltplyMPIHubbardCore.c | 4 +- src/mltplyMPISpin.c | 1 + test/lobcg_genspin_ladder.sh | 1129 +++++++++++++++++++++++++++- test/lobcg_genspingc_ladder.sh | 1159 ++++++++++++++++++++++++++++- test/lobcg_hubbard_square.sh | 273 ++++++- test/lobcg_hubbardgc_tri.sh | 153 +++- test/lobcg_kondo_chain.sh | 301 +++++++- test/lobcg_kondogc_chain.sh | 445 ++++++++++- test/lobcg_spin_kagome.sh | 393 +++++++++- test/lobcg_spingc_honey.sh | 523 ++++++++++++- test/spectrum_genspin_ladder.sh | 16 +- test/spectrum_genspingc_ladder.sh | 16 +- test/spectrum_hubbard_square.sh | 40 +- test/spectrum_hubbardgc_tri.sh | 40 +- test/spectrum_kondo_chain.sh | 40 +- test/spectrum_kondogc_chain.sh | 40 +- test/spectrum_spin_kagome.sh | 24 +- test/spectrum_spingc_honey.sh | 24 +- 26 files changed, 4593 insertions(+), 110 deletions(-) diff --git a/src/CalcByTPQ.c b/src/CalcByTPQ.c index 982788692..b94f7b114 100644 --- a/src/CalcByTPQ.c +++ b/src/CalcByTPQ.c @@ -228,13 +228,11 @@ int CalcByTPQ( if (step_i%step_spin == 0) { StartTimer(3300); - zclear(NumAve*X->Bind.Check.idim_max, &v1[1][0]); iret = expec_cisajs(&(X->Bind), NumAve, v1, v0); StopTimer(3300); if (iret != 0) return -1; StartTimer(3400); - zclear(NumAve*X->Bind.Check.idim_max, &v1[1][0]); iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v1, v0); StopTimer(3400); if (iret != 0) return -1; diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 774ac324f..2659d93c9 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -310,11 +310,10 @@ int MakeExcitedList( fprintf(stdoutMPI, cErrLargeMem, iErrCodeMem); exitMPI(-1); } - if (sz(X, list_1, list_2_1, list_2_2) != 0) { return FALSE; } - + if (X->Def.iCalcModel == HubbardNConserved) { X->Def.iCalcModel = Hubbard; } @@ -431,6 +430,11 @@ int CalcSpectrum( size_t byte_size; if (X->Bind.Def.iFlgCalcSpec == CALCSPEC_SCRATCH) { + X->Bind.Def.Nsite = X->Bind.Def.NsiteMPI; + X->Bind.Def.Total2Sz = X->Bind.Def.Total2SzMPI; + X->Bind.Def.Ne = X->Bind.Def.NeMPI; + X->Bind.Def.Nup = X->Bind.Def.NupMPI; + X->Bind.Def.Ndown = X->Bind.Def.NdownMPI; free_lui_1d_allocate(list_1); free_lui_1d_allocate(list_2_1); free_lui_1d_allocate(list_2_2); diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index a5cc7702a..b0834a69d 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -260,7 +260,7 @@ int CalcSpectrumByBiCG( zclear(X->Bind.Check.idim_max, &v14[1][0]); mltply(&X->Bind, 1, v12, v2); mltply(&X->Bind, 1, v14, v4); - + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { zclear(X->Bind.Check.idim_max, &vL[1][0]); GetExcitedState(&(X->Bind), 1, vL, v1Org, idcSpectrum + 1); diff --git a/src/FirstMultiply.c b/src/FirstMultiply.c index 0df7065ac..5e58032e2 100644 --- a/src/FirstMultiply.c +++ b/src/FirstMultiply.c @@ -107,13 +107,11 @@ Compute expectation value at infinite temperature */ X->Def.istep = 0; StartTimer(3300); - zclear(NumAve*i_max, &v0[1][0]); iret = expec_cisajs(X, NumAve, v0, v1); StopTimer(3300); if (iret != 0) return -1; StartTimer(3400); - zclear(NumAve*i_max, &v0[1][0]); iret = expec_cisajscktaltdc(X, NumAve, v0, v1); StopTimer(3400); if (iret != 0) return -1; diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index fce95fc60..37701a74f 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -56,7 +56,7 @@ int GetSingleExcitedStateHubbard( idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1,nstate); #endif // MPI - + idim_max = X->Check.idim_maxOrg; for (i = 0; i < X->Def.NSingleExcitationOperator[iEx]; i++) { org_isite = X->Def.SingleExcitationOperator[iEx][i][0]; diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index 1e53a3cec..ea0fb7eeb 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -59,11 +59,13 @@ int expec_cisajs_HubbardGC( long unsigned int ibit; long unsigned int is; double complex tmp_OneGreen = 1.0; + int complex_conj, istate; i_max = X->Check.idim_max; for (i = 0; i < X->Def.NCisAjt; i++) { zclear(i_max*nstate, &Xvec[1][0]); + complex_conj = 0; org_isite1 = X->Def.CisAjt[i][0] + 1; org_isite2 = X->Def.CisAjt[i][2] + 1; org_sigma1 = X->Def.CisAjt[i][1]; @@ -95,7 +97,7 @@ int expec_cisajs_HubbardGC( else { X_GC_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -tmp_OneGreen, X, nstate, Xvec, vec); - zswap_long(i_max*nstate, &vec[1][0], &Xvec[1][0]); + complex_conj = 1; } } else { @@ -106,6 +108,8 @@ int expec_cisajs_HubbardGC( } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + if (complex_conj == 1) + for (istate = 0; istate < nstate; istate++) prod[i][istate] = conj(prod[i][istate]); } return 0; } @@ -128,7 +132,7 @@ int expec_cisajs_Hubbard( long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int i_max; - int num1, one = 1; + int num1, one = 1, complex_conj, istate; long unsigned int ibit; long unsigned int is; double complex tmp_OneGreen = 1.0, dmv; @@ -136,6 +140,7 @@ int expec_cisajs_Hubbard( i_max = X->Check.idim_max; for (i = 0; i < X->Def.NCisAjt; i++) { zclear(i_max*nstate, &Xvec[1][0]); + complex_conj = 0; org_isite1 = X->Def.CisAjt[i][0] + 1; org_isite2 = X->Def.CisAjt[i][2] + 1; org_sigma1 = X->Def.CisAjt[i][1]; @@ -161,7 +166,6 @@ int expec_cisajs_Hubbard( if (org_isite1 > X->Def.Nsite && org_isite2 > X->Def.Nsite) { if (org_isite1 == org_isite2 && org_sigma1 == org_sigma2) {//diagonal - is = X->Def.Tpow[2 * org_isite1 - 2 + org_sigma1]; ibit = (unsigned long int)myrank & is; if (ibit == is) { @@ -181,7 +185,7 @@ int expec_cisajs_Hubbard( else { X_child_general_hopp_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, -tmp_OneGreen, X, nstate, Xvec, vec); - zswap_long(i_max*nstate, &vec[1][0], &Xvec[1][0]); + complex_conj = 1; } } else { @@ -205,6 +209,8 @@ firstprivate(i_max, is) private(num1, ibit, dmv) } } MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); + if (complex_conj == 1) + for (istate = 0; istate < nstate; istate++) prod[i][istate] = conj(prod[i][istate]); } return 0; } diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index 17a698743..c6fd717e7 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -177,28 +177,22 @@ int expec_cisajscktalt_HubbardGC( isite3 = X->Def.OrgTpow[2 * org_isite3 - 2 + org_sigma3]; isite4 = X->Def.OrgTpow[2 * org_isite4 - 2 + org_sigma4]; if (isite1 == isite2 && isite3 == isite4) { - X_GC_child_CisAisCjtAjt_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, 1.0, X, nstate, Xvec, vec); } else if (isite1 == isite2 && isite3 != isite4) { - X_GC_child_CisAisCjtAku_Hubbard_MPI( org_isite1 - 1, org_sigma1, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, 1.0, X, nstate, Xvec, vec); - } else if (isite1 != isite2 && isite3 == isite4) { - X_GC_child_CisAjtCkuAku_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, 1.0, X, nstate, Xvec, vec); - } else if (isite1 != isite2 && isite3 != isite4) { X_GC_child_CisAjtCkuAlv_Hubbard_MPI(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, org_isite3 - 1, org_sigma3, org_isite4 - 1, org_sigma4, 1.0, X, nstate, Xvec, vec); } - }//InterPE else { child_general_int_GetInfo(i, X, org_isite1, org_isite2, org_isite3, org_isite4, diff --git a/src/mltply.c b/src/mltply.c index 7ed03ea9c..bda2e3e8d 100644 --- a/src/mltply.c +++ b/src/mltply.c @@ -67,25 +67,19 @@ int mltply(struct BindStruct *X, int nstate, double complex **tmp_v0,double comp StartTimer(1); i_max = X->Check.idim_max; - if(i_max!=0){ - if (X->Def.iFlgGeneralSpin == FALSE) { - if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { - return -1; - } + if (X->Def.iFlgGeneralSpin == FALSE) { + if (GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit) != 0) { + return -1; } - else{ - if(X->Def.iCalcModel==Spin){ - if (GetSplitBitForGeneralSpin(X->Def.Nsite, &ihfbit, X->Def.SiteToBit) != 0) { - return -1; - } + } + else { + if (X->Def.iCalcModel == Spin) { + if (GetSplitBitForGeneralSpin(X->Def.Nsite, &ihfbit, X->Def.SiteToBit) != 0) { + return -1; } } - } - else{ - irght=0; - ilft=0; - ihfbit=0; - } + } + X->Large.i_max = i_max; X->Large.irght = irght; X->Large.ilft = ilft; @@ -144,25 +138,7 @@ void zaxpy_long( y[i] += a * x[i]; } /** -@brief Wrapper of zswap. -*/ -void zswap_long( - unsigned long int n, - double complex *x, - double complex *y -) { - unsigned long int i; - double complex x0; - -#pragma omp parallel for default(none) private(i,x0) shared(n, x, y) - for (i = 0; i < n; i++) { - x0 = x[i]; - x[i] = y[i]; - y[i] = x0; - } -} -/** -@brief Wrapper of zswap. +@brief clear double complex array. */ void zclear( unsigned long int n, diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index f9980e4a6..6ab0cad34 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -285,8 +285,8 @@ void X_GC_child_CisAisCjtAjt_Hubbard_MPI( else tmp_ispin1 = X->Def.Tpow[2 * org_isite1 + org_ispin1]; #pragma omp parallel default(none) \ - shared(org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one,tmp_v0,tmp_v1) \ - firstprivate(i_max,tmp_V,X) private(j,tmp_off,tmp_ispin1) + shared(org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,one,tmp_v0,tmp_v1,tmp_ispin1) \ + firstprivate(i_max,tmp_V,X) private(j,tmp_off) #pragma omp for for (j = 1; j <= i_max; j++) { if (CheckBit_Ajt(tmp_ispin1, j - 1, &tmp_off) == TRUE) { diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index c571909cf..d18225609 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -198,6 +198,7 @@ void X_child_general_int_spin_MPIsingle( Index in the intra PE */ mask1 = X->Def.Tpow[org_isite1]; + //printf("debug1 %ld\n", idim_max_buf); #pragma omp parallel for default(none) private(j, ioff, jreal, state1) \ firstprivate(idim_max_buf, Jint, X, mask1, state1check, org_isite1) \ diff --git a/test/lobcg_genspin_ladder.sh b/test/lobcg_genspin_ladder.sh index 9b5b36c30..9dd6b1264 100755 --- a/test/lobcg_genspin_ladder.sh +++ b/test/lobcg_genspin_ladder.sh @@ -32,8 +32,1133 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/lobcg_genspingc_ladder.sh b/test/lobcg_genspingc_ladder.sh index 82dafa71d..6addf163d 100755 --- a/test/lobcg_genspingc_ladder.sh +++ b/test/lobcg_genspingc_ladder.sh @@ -36,8 +36,1163 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste4.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste4.dat` +echo "Diff output/zvo_cisajs_eigen2.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste7.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste7.dat` +echo "Diff output/zvo_cisajscktalt_eigen2.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/lobcg_hubbard_square.sh b/test/lobcg_hubbard_square.sh index fbeae33bd..0e4491bd3 100755 --- a/test/lobcg_hubbard_square.sh +++ b/test/lobcg_hubbard_square.sh @@ -18,7 +18,7 @@ EOF ${MPIRUN} ../../src/HPhi -s stan.in -# Check value +# Check Energy cat > reference.dat < reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff/NR} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste4.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste4.dat` +echo "Diff output/zvo_cisajs_eigen2.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste7.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste7.dat` +echo "Diff output/zvo_cisajscktalt_eigen2.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/lobcg_hubbardgc_tri.sh b/test/lobcg_hubbardgc_tri.sh index e25df4426..5b8314a60 100755 --- a/test/lobcg_hubbardgc_tri.sh +++ b/test/lobcg_hubbardgc_tri.sh @@ -32,8 +32,157 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/lobcg_kondo_chain.sh b/test/lobcg_kondo_chain.sh index ed17496bb..0aed0fbe7 100755 --- a/test/lobcg_kondo_chain.sh +++ b/test/lobcg_kondo_chain.sh @@ -30,8 +30,305 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/lobcg_kondogc_chain.sh b/test/lobcg_kondogc_chain.sh index 77e5d7085..731678295 100755 --- a/test/lobcg_kondogc_chain.sh +++ b/test/lobcg_kondogc_chain.sh @@ -35,8 +35,449 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste4.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste4.dat` +echo "Diff output/zvo_cisajs_eigen2.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste7.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste7.dat` +echo "Diff output/zvo_cisajscktalt_eigen2.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/lobcg_spin_kagome.sh b/test/lobcg_spin_kagome.sh index 83cd31fdd..a8078d324 100755 --- a/test/lobcg_spin_kagome.sh +++ b/test/lobcg_spin_kagome.sh @@ -34,8 +34,397 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/lobcg_spingc_honey.sh b/test/lobcg_spingc_honey.sh index feb63f40b..8644b8333 100755 --- a/test/lobcg_spingc_honey.sh +++ b/test/lobcg_spingc_honey.sh @@ -43,8 +43,527 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$3)*($2-$3))} END{printf "%8.6f", diff}' paste.dat` +paste output/zvo_energy.dat reference.dat > paste1.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($2-$3)*($2-$3))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/zvo_energy.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check one-body G + +cat > reference.dat < paste2.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste2.dat` +echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste3.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste3.dat` +echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste4.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($5-$7)*($5-$7)+($6-$8)*($6-$8))} +END{printf "%8.6f", diff/NR} +' paste4.dat` +echo "Diff output/zvo_cisajs_eigen2.dat : " ${diff} +test "${diff}" = "0.000000" + +# Check two-body G + +cat > reference.dat < paste5.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste5.dat` +echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste6.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste6.dat` +echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} +test "${diff}" = "0.000000" + +cat > reference.dat < paste7.dat +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($9-$11)*($9-$11)+($10-$12)*($10-$12))} +END{printf "%8.6f", diff/NR} +' paste7.dat` +echo "Diff output/zvo_cisajscktalt_eigen2.dat : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_genspin_ladder.sh b/test/spectrum_genspin_ladder.sh index 17d6bdec7..2397140d5 100755 --- a/test/spectrum_genspin_ladder.sh +++ b/test/spectrum_genspin_ladder.sh @@ -33,7 +33,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -65,8 +71,12 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%7.5f", diff}' paste2.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%7.5f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} test "${diff}" = "0.00000" exit $? diff --git a/test/spectrum_genspingc_ladder.sh b/test/spectrum_genspingc_ladder.sh index f1e2c3807..194a20957 100755 --- a/test/spectrum_genspingc_ladder.sh +++ b/test/spectrum_genspingc_ladder.sh @@ -32,7 +32,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -63,8 +69,12 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste2.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_hubbard_square.sh b/test/spectrum_hubbard_square.sh index 8c96b72cc..97ce00466 100755 --- a/test/spectrum_hubbard_square.sh +++ b/test/spectrum_hubbard_square.sh @@ -33,7 +33,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -65,7 +71,13 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste2.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +test "${diff}" = "0.000000" # # Density-Density spectrum # @@ -97,7 +109,13 @@ cat > reference.dat < paste3.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste3.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste3.dat` +echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +test "${diff}" = "0.000000" # # Up-Up spectrum # @@ -129,7 +147,13 @@ cat > reference.dat < paste4.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste4.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste4.dat` +echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +test "${diff}" = "0.000000" # # Down-Down spectrum # @@ -161,8 +185,12 @@ cat > reference.dat < paste5.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste5.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste5.dat` +echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_hubbardgc_tri.sh b/test/spectrum_hubbardgc_tri.sh index be59c75a2..514523750 100755 --- a/test/spectrum_hubbardgc_tri.sh +++ b/test/spectrum_hubbardgc_tri.sh @@ -34,7 +34,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -67,7 +73,13 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste2.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +test "${diff}" = "0.000000" # # Density-Density spectrum # @@ -100,7 +112,13 @@ cat > reference.dat < paste3.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste3.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste3.dat` +echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +test "${diff}" = "0.000000" # # Up-Up spectrum # @@ -133,7 +151,13 @@ cat > reference.dat < paste4.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste4.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste4.dat` +echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +test "${diff}" = "0.000000" # # Down-Down spectrum # @@ -166,8 +190,12 @@ cat > reference.dat < paste5.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste5.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste5.dat` +echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_kondo_chain.sh b/test/spectrum_kondo_chain.sh index df47124d7..57bcb79f3 100755 --- a/test/spectrum_kondo_chain.sh +++ b/test/spectrum_kondo_chain.sh @@ -32,7 +32,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -63,7 +69,13 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste2.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +test "${diff}" = "0.000000" # # Density-Density spectrum # @@ -94,7 +106,13 @@ cat > reference.dat < paste3.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste3.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste3.dat` +echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +test "${diff}" = "0.000000" # # Up-Up spectrum # @@ -125,7 +143,13 @@ cat > reference.dat < paste4.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste4.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste4.dat` +echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +test "${diff}" = "0.000000" # # Down-Down spectrum # @@ -156,8 +180,12 @@ cat > reference.dat < paste5.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste5.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste5.dat` +echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_kondogc_chain.sh b/test/spectrum_kondogc_chain.sh index 156af1b58..8c4ffc399 100755 --- a/test/spectrum_kondogc_chain.sh +++ b/test/spectrum_kondogc_chain.sh @@ -32,7 +32,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -63,7 +69,13 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste2.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +test "${diff}" = "0.000000" # # Density-Density spectrum # @@ -94,7 +106,13 @@ cat > reference.dat < paste3.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste3.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste3.dat` +echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +test "${diff}" = "0.000000" # # Up-Up spectrum # @@ -125,7 +143,13 @@ cat > reference.dat < paste4.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste4.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste4.dat` +echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +test "${diff}" = "0.000000" # # Down-Down spectrum # @@ -156,8 +180,12 @@ cat > reference.dat < paste5.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste5.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste5.dat` +echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_spin_kagome.sh b/test/spectrum_spin_kagome.sh index 6ce503b4c..baa2d995f 100755 --- a/test/spectrum_spin_kagome.sh +++ b/test/spectrum_spin_kagome.sh @@ -36,7 +36,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -71,7 +77,13 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste2.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +test "${diff}" = "0.000000" # # Density-Density spectrum # @@ -106,8 +118,12 @@ cat > reference.dat < paste3.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste3.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste3.dat` +echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_spingc_honey.sh b/test/spectrum_spingc_honey.sh index ae27d7c4f..34e141fd5 100755 --- a/test/spectrum_spingc_honey.sh +++ b/test/spectrum_spingc_honey.sh @@ -40,7 +40,13 @@ cat > reference.dat < paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste1.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste1.dat` +echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +test "${diff}" = "0.000000" # # S+S- spectrum # @@ -79,7 +85,13 @@ cat > reference.dat < paste2.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste2.dat` +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste2.dat` +echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +test "${diff}" = "0.000000" # # Density-Density spectrum # @@ -118,8 +130,12 @@ cat > reference.dat < paste3.dat -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff}' paste3.dat` - +diff=`awk ' +BEGIN{diff=0.0} +{diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} +END{printf "%8.6f", diff} +' paste3.dat` +echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} test "${diff}" = "0.000000" exit $? From f2089fa5b61013be8f793b06eea31c54ed668771 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Tue, 26 Mar 2019 15:20:03 +0900 Subject: [PATCH 22/50] greenr2k output TPQ correlation function and its eerror like in the mVMC case --- tool/greenr2k.F90 | 129 ++++++++++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 57 deletions(-) diff --git a/tool/greenr2k.F90 b/tool/greenr2k.F90 index cc0a34387..4ca705c0f 100644 --- a/tool/greenr2k.F90 +++ b/tool/greenr2k.F90 @@ -3,6 +3,8 @@ MODULE fourier_val IMPLICIT NONE ! INTEGER,SAVE :: & + & interval, & + & numave, & & nkg(3), & ! k-grid for momentum ditribution & nk_line, & ! Numberof along each k line & nnode, & ! Number of node of k-path @@ -86,10 +88,11 @@ END SUBROUTINE key2lower ! SUBROUTINE read_filename() ! - USE fourier_val, ONLY : file_one, file_two, filehead, nsite, nwfc, filetail, calctype + USE fourier_val, ONLY : file_one, file_two, filehead, nsite, nwfc, & + & filetail, calctype, numave, interval IMPLICIT NONE ! - INTEGER :: fi = 10, lanczos_max, numave, interval, irun, istep, iwfc, idx_start + INTEGER :: fi = 10, lanczos_max, irun, istep, iwfc, idx_start CHARACTER(256) :: modpara, calcmod, keyname, namelist ! WRITE(*,*) @@ -220,8 +223,8 @@ SUBROUTINE read_filename() ALLOCATE(filetail(nwfc)) ! iwfc = 0 - DO irun = 0, numave - 1 - DO istep = 0, lanczos_max - 1 + DO istep = 0, lanczos_max - 1 + DO irun = 0, numave - 1 IF(MOD(istep, interval) == 0) THEN iwfc = iwfc + 1 WRITE(filetail(iwfc),'(a,i0,a,i0,a)') & @@ -756,11 +759,11 @@ END SUBROUTINE fourier_cor ! SUBROUTINE output_cor() ! - USE fourier_val, ONLY : cor_k, nk, nnode, knode, nk_line, kname, norb, & - & nwfc, recipr, filehead, filetail, calctype, nkg + USE fourier_val, ONLY : cor_k, nk, nnode, knode, nk_line, kname, norb, interval, & + & nwfc, recipr, filehead, filetail, calctype, nkg, numave IMPLICIT NONE ! - INTEGER :: fo = 20, ik, iwfc, inode, iorb, jorb, ii, ikk + INTEGER :: fo = 20, ik, iwfc, inode, iorb, jorb, ii, ikk, iwfc1, iwfc2, istep REAL(8) :: dk(3), dk_cart(3), xk(nk), & & xk_label(nnode), klength CHARACTER(256) :: filename @@ -790,61 +793,73 @@ SUBROUTINE output_cor() ! WRITE(*,*) " Correlation in k-space : ", TRIM(filehead) // "_corr", "*.dat" ! - IF(calctype == 4) THEN + IF(calctype == 1 .OR. calctype == 4) THEN ! - ! mVMC + ! TPQ/mVMC ! ALLOCATE(cor_ave(ikk,6,norb,norb), cor_err(ikk,6,norb,norb)) ! - ! Average - ! - cor_ave(1:ikk,1:6,1:norb,1:norb) = SUM(cor_k(1:ikk,1:6,1:norb,1:norb,1:nwfc), 5) / DBLE(nwfc) - ! - ! Variance - ! - cor_err(1:ikk,1:6,1:norb,1:norb) = 0d0 - DO iwfc = 1, nwfc - cor_err(1:ikk,1:6,1:norb,1:norb) = cor_err(1:ikk,1:6,1:norb,1:norb) & - & + CMPLX( DBLE(cor_k(1:ikk,1:6,1:norb,1:norb,iwfc) - cor_ave(1:ikk,1:6,1:norb,1:norb))**2, & - & AIMAG(cor_k(1:ikk,1:6,1:norb,1:norb,iwfc) - cor_ave(1:ikk,1:6,1:norb,1:norb))**2, & - & KIND(0d0)) - END DO - ! - ! Standard Error - ! - IF(nwfc == 1) THEN - cor_err(1:ikk,1:6,1:norb,1:norb) = CMPLX(0d0, 0d0, KIND(0d0)) - ELSE - cor_err(1:ikk,1:6,1:norb,1:norb) = CMPLX(SQRT( DBLE(cor_err(1:ikk,1:6,1:norb,1:norb))), & - & SQRT(AIMAG(cor_err(1:ikk,1:6,1:norb,1:norb))), KIND(0d0)) & - & / SQRT(DBLE(nwfc * (nwfc - 1))) - END IF - ! - filename = TRIM(filehead) // "_corr.dat" - OPEN(fo, file = TRIM(filename)) - ! - WRITE(fo,*) "# k-length[1]" - ii = 1 - DO iorb = 1, norb - DO jorb = 1, norb - WRITE(fo,'(a,i3,a,i3)') "# Orbital", iorb, " to Orbital", jorb - WRITE(fo,'(a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a)') & - & "# UpUp[", ii+1, ",", ii+2, ",", ii+13, ",", ii+14, & - & "] (Re. Im. Err.) DownDown[", ii+3, ",", ii+4, ",", ii+15, ",", ii+16, "]" - WRITE(fo,'(a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a)') & - & "# Density[", ii+5, ",", ii+6, ",", ii+17, ",", ii+18, & - & "] SzSz[", ii+7, ",", ii+8, ",", ii+19, ",", ii+20, & - & "] S+S-[", ii+9, ",", ii+10, ",", ii+21, ",", ii+22, & - & "] S.S[", ii+11, ",", ii+12, ",", ii+23, ",", ii+24, "]" - ii = ii+24 + DO istep = 1, nwfc / numave + ! + iwfc1 = numave*(istep-1) + 1 + iwfc2 = numave*istep + ! + ! Average + ! + cor_ave(1:ikk,1:6,1:norb,1:norb) = SUM(cor_k(1:ikk,1:6,1:norb,1:norb,& + & iwfc1:iwfc2), 5) / DBLE(numave) + ! + ! Variance + ! + cor_err(1:ikk,1:6,1:norb,1:norb) = 0d0 + DO iwfc = iwfc1, iwfc2 + cor_err(1:ikk,1:6,1:norb,1:norb) = cor_err(1:ikk,1:6,1:norb,1:norb) & + & + CMPLX( DBLE(cor_k(1:ikk,1:6,1:norb,1:norb,iwfc) - cor_ave(1:ikk,1:6,1:norb,1:norb))**2, & + & AIMAG(cor_k(1:ikk,1:6,1:norb,1:norb,iwfc) - cor_ave(1:ikk,1:6,1:norb,1:norb))**2, & + & KIND(0d0)) END DO - END DO - ! - DO ik = 1, ikk - WRITE(fo,'(1000e15.5)') cor_ave(ik,1:6, 1:norb, 1:norb), cor_err(ik,1:6, 1:norb, 1:norb) - END DO - ! - CLOSE(fo) + ! + ! Standard Error + ! + IF(numave == 1) THEN + cor_err(1:ikk,1:6,1:norb,1:norb) = CMPLX(0d0, 0d0, KIND(0d0)) + ELSE + cor_err(1:ikk,1:6,1:norb,1:norb) = CMPLX(SQRT( DBLE(cor_err(1:ikk,1:6,1:norb,1:norb))), & + & SQRT(AIMAG(cor_err(1:ikk,1:6,1:norb,1:norb))), KIND(0d0)) & + & / SQRT(DBLE(numave * (numave - 1))) + END IF + ! + IF(calctype == 1)THEN + WRITE(filename,'(a,a,i0,a)') TRIM(filehead), "_corr_step", interval*(istep-1), ".dat" + ELSE + filename = TRIM(filehead) // "_corr.dat" + END IF + OPEN(fo, file = TRIM(filename)) + ! + WRITE(fo,*) "# k-length[1]" + ii = 1 + DO iorb = 1, norb + DO jorb = 1, norb + WRITE(fo,'(a,i3,a,i3)') "# Orbital", iorb, " to Orbital", jorb + WRITE(fo,'(a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a)') & + & "# UpUp[", ii+1, ",", ii+2, ",", ii+13, ",", ii+14, & + & "] (Re. Im. Err.) DownDown[", ii+3, ",", ii+4, ",", ii+15, ",", ii+16, "]" + WRITE(fo,'(a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a)') & + & "# Density[", ii+5, ",", ii+6, ",", ii+17, ",", ii+18, & + & "] SzSz[", ii+7, ",", ii+8, ",", ii+19, ",", ii+20, & + & "] S+S-[", ii+9, ",", ii+10, ",", ii+21, ",", ii+22, & + & "] S.S[", ii+11, ",", ii+12, ",", ii+23, ",", ii+24, "]" + ii = ii+24 + END DO + END DO + ! + DO ik = 1, ikk + WRITE(fo,'(1000e15.5)') xk(ik), cor_ave(ik,1:6, 1:norb, 1:norb), cor_err(ik,1:6, 1:norb, 1:norb) + END DO + ! + CLOSE(fo) + ! + END DO ! istep = 1, nwfc / numave ! DEALLOCATE(cor_ave, cor_err) ! From 05760267c8c8ec3efc7988349f8a97d6e3822c23 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Tue, 26 Mar 2019 15:40:39 +0900 Subject: [PATCH 23/50] The first line has infinite temperature. --- tool/AveSSrand.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tool/AveSSrand.py b/tool/AveSSrand.py index b9e1b7602..225f76665 100644 --- a/tool/AveSSrand.py +++ b/tool/AveSSrand.py @@ -32,7 +32,7 @@ f = open(str_, 'r') count=0 for line in f: - if count ==0: + if count ==0 or count ==1: count +=1 continue data = line.split() @@ -42,9 +42,9 @@ DataEne.append(np.zeros(Setnum)) DataC.append(np.zeros(Setnum)) - DataTmp[count-1][i]=1.0/float(data[0]) - DataEne[count-1][i]=float(data[1]) - DataC[count-1][i]=pow(float(data[0]),2)*( float(data[2])-pow(float(data[1]),2)) + DataTmp[count-2][i]=1.0/float(data[0]) + DataEne[count-2][i]=float(data[1]) + DataC[count-2][i]=pow(float(data[0]),2)*( float(data[2])-pow(float(data[1]),2)) count+=1 f.close() From 54e1af6639469cf2de8d349eae35d38d7f5e15fb Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Wed, 27 Mar 2019 11:15:37 +0900 Subject: [PATCH 24/50] Backup --- src/CalcByLOBPCG.c | 33 +++++++++---------- src/CalcByTEM.c | 6 ++-- src/CheckMPI.c | 3 +- src/bitcalc.c | 4 +-- src/check.c | 19 ++--------- src/common/setmemory.c | 37 ++++++++++++++++++++++ src/common/setmemory.h | 2 ++ src/include/bitcalc.h | 2 +- src/include/readdef.h | 4 +-- src/include/struct.h | 2 +- src/mltplyMPIHubbardCore.c | 4 +-- src/readdef.c | 5 +-- src/xsetmem.c | 12 +++---- test/te_ac_hubbard_square.sh | 25 +++++++++++---- test/te_dc_hubbard_square.sh | 25 +++++++++++---- test/te_hubbard_chain_interall.sh | 25 +++++++++++---- test/te_hubbard_chain_interall_diagonal.sh | 25 +++++++++++---- test/te_kondo_chain_interall.sh | 25 +++++++++++---- test/te_pulse_hubbard_square.sh | 25 +++++++++++---- test/te_quench_hubbard_square.sh | 25 +++++++++++---- test/te_spin_chain_interall.sh | 17 +++++++--- 21 files changed, 220 insertions(+), 105 deletions(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index f6b7d6170..c4a139c0d 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -353,16 +353,17 @@ int LOBPCG_Main( FILE *fp; int iconv = -1, i4_max; long int idim, i_max; - int ii, jj, ie, nsub, stp, nsub_cut; + int ii, jj, ie, nsub, stp, nsub_cut, nstate; double complex ***wxp/*[0] w, [1] x, [2] p of Ref.1*/, ***hwxp/*[0] h*w, [1] h*x, [2] h*p of Ref.1*/, ****hsub, ****ovlp; /*Subspace Hamiltonian and Overlap*/ - double *eig, *dnorm, eps_LOBPCG, eigabs_max, *preshift, precon, dnormmax, *eigsub; + double *eig, *dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub; int do_precon = 0;//If = 1, use preconditioning (experimental) char tN = 'N', tC = 'C'; double complex one = 1.0, zero = 0.0; nsub = 3 * X->Def.k_exct; + nstate = X->Def.k_exct; eig = d_1d_allocate(X->Def.k_exct); dnorm = d_1d_allocate(X->Def.k_exct); @@ -448,12 +449,12 @@ int LOBPCG_Main( if (stp /= 1) { if (do_precon == 1) { for (ie = 0; ie < X->Def.k_exct; ie++) - preshift[ie] = calc_preshift(eig[ie], dnorm[ie], eps_LOBPCG); + preshift = calc_preshift(eig[ie], dnorm[ie], eps_LOBPCG); #pragma omp parallel for default(none) shared(wxp,list_Diagonal,preshift,i_max,eps_LOBPCG,X) \ private(idim,precon,ie) for (idim = 1; idim <= i_max; idim++) { for (ie = 0; ie < X->Def.k_exct; ie++){ - precon = list_Diagonal[idim] - preshift[ie]; + precon = list_Diagonal[idim] - preshift; if (fabs(precon) > eps_LOBPCG) wxp[0][idim][ie] /= precon; } } @@ -503,10 +504,10 @@ private(idim,precon,ie) */ for (ii = 0; ii < 3; ii++) { for (jj = 0; jj < 3; jj++) { - zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i4_max, &one, - &wxp[ii][1][0], &X->Def.k_exct, &wxp[jj][1][0], &X->Def.k_exct, &zero, &ovlp[jj][0][ii][0], &nsub); - zgemm_(&tN, &tC, &X->Def.k_exct, &X->Def.k_exct, &i4_max, &one, - &wxp[ii][1][0], &X->Def.k_exct, &hwxp[jj][1][0], &X->Def.k_exct, &zero, &hsub[jj][0][ii][0], &nsub); + zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one, + &wxp[ii][1][0], &nstate, &wxp[jj][1][0], &nstate, &zero, &ovlp[jj][0][ii][0], &nsub); + zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one, + &wxp[ii][1][0], &nstate, &hwxp[jj][1][0], &nstate, &zero, &hsub[jj][0][ii][0], &nsub); } } SumMPI_cv(nsub*nsub, &ovlp[0][0][0][0]); @@ -531,8 +532,8 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii++) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, - &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one, + &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) wxp[1][idim][ie] = v1buf[idim][ie]; @@ -542,8 +543,8 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii++) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, - &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one, + &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[1][idim][ie] = v1buf[idim][ie]; @@ -553,8 +554,8 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii += 2) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, - &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one, + &hsub[0][0][ii][0], &nsub, &wxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) wxp[2][idim][ie] = v1buf[idim][ie]; @@ -564,8 +565,8 @@ private(idim,precon,ie) */ zclear(i_max*X->Def.k_exct, &v1buf[1][0]); for (ii = 0; ii < 3; ii += 2) { - zgemm_(&tC, &tN, &X->Def.k_exct, &i4_max, &X->Def.k_exct, &one, - &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &X->Def.k_exct, &one, &v1buf[1][0], &X->Def.k_exct); + zgemm_(&tC, &tN, &nstate, &i4_max, &nstate, &one, + &hsub[0][0][ii][0], &nsub, &hwxp[ii][1][0], &nstate, &one, &v1buf[1][0], &nstate); } for (idim = 1; idim <= i_max; idim++) for (ie = 0; ie < X->Def.k_exct; ie++) hwxp[2][idim][ie] = v1buf[idim][ie]; diff --git a/src/CalcByTEM.c b/src/CalcByTEM.c index e59e74f6a..5bdb6f897 100644 --- a/src/CalcByTEM.c +++ b/src/CalcByTEM.c @@ -69,6 +69,8 @@ int CalcByTEM( double dt = ((X->Bind.Def.NLaser == 0) ? 0.0 : X->Bind.Def.Param.TimeSlice); double complex **v2; /**< Ttemporary vector for time evolution calculation, @f$ v2 = H*v1 = H^coef |psi(t)>@f$.*/ + global_norm = d_1d_allocate(1); + if (X->Bind.Def.NTETimeSteps < X->Bind.Def.Lanczos_max) { fprintf(stdoutMPI, "Error: NTETimeSteps must be larger than Lanczos_max.\n"); return -1; @@ -206,8 +208,8 @@ int CalcByTEM( fclose(fp); if (step_i % step_spin == 0) { - expec_cisajs(&(X->Bind), 1, v0, v1); - expec_cisajscktaltdc(&(X->Bind), 1, v0, v1); + expec_cisajs(&(X->Bind), 1, v2, v1); + expec_cisajscktaltdc(&(X->Bind), 1, v2, v1); } if (X->Bind.Def.iOutputEigenVec == TRUE) { if (step_i % X->Bind.Def.Param.OutputInterval == 0) { diff --git a/src/CheckMPI.c b/src/CheckMPI.c index e95bb9f8e..c5474b0c5 100644 --- a/src/CheckMPI.c +++ b/src/CheckMPI.c @@ -26,7 +26,8 @@ */ int CheckMPI(struct BindStruct *X/**< [inout] */) { - int isite, NDimInterPE, SmallDim, SpinNum, ipivot, ishift, isiteMax, isiteMax0; + unsigned int isite; + int NDimInterPE, SmallDim, SpinNum, ipivot, ishift, isiteMax, isiteMax0; /**@brief Branch for each model diff --git a/src/bitcalc.c b/src/bitcalc.c index 3183aea05..54159813f 100644 --- a/src/bitcalc.c +++ b/src/bitcalc.c @@ -418,7 +418,7 @@ int BitCheckGeneral( * @version 0.2 * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int GetBitGeneral( +unsigned int GetBitGeneral( const unsigned int isite, //!<[in] const long unsigned int org_bit, //!<[in] const long int *SiteToBit, //!<[in] @@ -453,7 +453,7 @@ int GetLocal2Sz ) { int TwiceSz=0; - int bitAtSite=0; + unsigned int bitAtSite=0; //get bit bitAtSite=GetBitGeneral(isite, org_bit, SiteToBit, Tpow); TwiceSz=-(SiteToBit[isite-1]-1)+2*bitAtSite; //-2S^{total}_i+2Sz_i diff --git a/src/check.c b/src/check.c index 56363efa2..a1a8bcf20 100644 --- a/src/check.c +++ b/src/check.c @@ -56,7 +56,7 @@ int check(struct BindStruct *X){ long unsigned int u_tmp; long unsigned int tmp; long unsigned int Ns,comb_1,comb_2,comb_3,comb_sum, comb_up, comb_down; - int u_loc; + unsigned int u_loc; long int **comb; long unsigned int idimmax=0; long unsigned int idim=0; @@ -68,7 +68,7 @@ int check(struct BindStruct *X){ X->Def.Ne=X->Def.Nup; } - int iAllup=X->Def.Ne; + unsigned int iAllup=X->Def.Ne; if(X->Def.iFlgScaLAPACK == 0) { /* @@ -132,19 +132,6 @@ int check(struct BindStruct *X){ break; case Kondo: - //idim_max - // calculation of dimension - // Nup = u_loc+u_cond - // Ndown = d_loc+d_cond - // NLocSpn = u_loc+d_loc - // Ncond = Nsite-NLocSpn - // idim_max = \sum_{u_loc=0}^{u_loc=Nup} - // Binomial(NLocSpn,u_loc) - // *Binomial(NCond,Nup-u_loc) - // *Binomial(NCond,Ndown+u_loc-NLocSpn) - //comb_1 = Binomial(NLocSpn,u_loc) - //comb_2 = Binomial(NCond,Nup-u_loc) - //comb_3 = Binomial(NCond,Ndown+u_loc-NLocSpn) Nup = X->Def.Nup; Ndown = X->Def.Ndown; NCond = X->Def.Nsite-X->Def.NLocSpn; @@ -162,7 +149,7 @@ int check(struct BindStruct *X){ NCond = X->Def.Nsite-X->Def.NLocSpn; NLocSpn = X->Def.NLocSpn; //4^Nc*2^Ns - for(i=0;i<(2*NCond+NLocSpn);i++){ + for(u_loc=0;u_loc <(2*NCond+NLocSpn); u_loc++){ comb_sum= 2*comb_sum; } break; diff --git a/src/common/setmemory.c b/src/common/setmemory.c index d2dfc87f2..a24ace36b 100644 --- a/src/common/setmemory.c +++ b/src/common/setmemory.c @@ -143,6 +143,23 @@ int **i_2d_allocate(const long unsigned int N, const long unsigned int M) { return A; } /// +/// \brief Allocation for A[N][M] +/// \param N [in] The size of the array A +/// \param M [in] The size of the array M +/// \return A Pointer to array A +/// \author Kazuyoshi Yoshimi (University of Tokyo) +unsigned int **ui_2d_allocate(const long unsigned int N, const long unsigned int M) { + unsigned int **A; + long unsigned int int_i; + A = (unsigned int **)calloc((N), sizeof(unsigned int *)); + A[0] = (unsigned int *)calloc((M * N), sizeof(unsigned int)); + for (int_i = 0; int_i < N; int_i++) { + A[int_i] = A[0] + int_i * M; + } + //memset(A[0], 0, sizeof(int)*M*N); + return A; +} +/// /// \brief Function to free 2d array (int) /// \param A Pointer of 2d array A void free_i_2d_allocate(int **A){ @@ -170,6 +187,26 @@ int***i_3d_allocate(const long unsigned int N, const long unsigned int M, const return A; } +/// \brief Allocation for A[N][M] +/// \param N [in] The size of the array A +/// \param M [in] The size of the array M +/// \return A Pointer to array A +/// \author Kazuyoshi Yoshimi (University of Tokyo) +unsigned int***ui_3d_allocate(const long unsigned int N, const long unsigned int M, const long unsigned int L) { + long unsigned int int_i, int_j; + unsigned int*** A; + A = (unsigned int***)calloc((N), sizeof(unsigned int**)); + A[0] = (unsigned int**)calloc((M*N), sizeof(unsigned int*)); + A[0][0] = (unsigned int*)calloc((L*M*N), sizeof(unsigned int)); + for (int_i = 0; int_i < N; int_i++) { + A[int_i] = A[0] + int_i * M; + for (int_j = 0; int_j < M; int_j++) { + A[int_i][int_j] = A[0][0] + int_i * M*L + int_j * L; + } + } + return A; +} + /// /// \brief Function to free 3d array (int) /// \param A A pointer of 3d array A diff --git a/src/common/setmemory.h b/src/common/setmemory.h index 4521156cf..f3cbdc723 100644 --- a/src/common/setmemory.h +++ b/src/common/setmemory.h @@ -99,6 +99,7 @@ void free_i_1d_allocate(int *A); /// \return A Pointer to array A /// \author Kazuyoshi Yoshimi (University of Tokyo) int **i_2d_allocate(const long unsigned int N, const long unsigned int M); +unsigned int **ui_2d_allocate(const long unsigned int N, const long unsigned int M); /// /// \brief Function to free 2d array (int) /// \param A Pointer of 2d array A @@ -113,6 +114,7 @@ void free_i_2d_allocate(int **A); /// \return A Pointer to array A /// \author Kazuyoshi Yoshimi (University of Tokyo) int ***i_3d_allocate(const long unsigned int N, const long unsigned int M, const long unsigned int L); +unsigned int ***ui_3d_allocate(const long unsigned int N, const long unsigned int M, const long unsigned int L); /// /// \brief Function to free 3d array (int) /// \param A Pointer of 3d array A diff --git a/src/include/bitcalc.h b/src/include/bitcalc.h index ec5ce67b2..3881bfecc 100644 --- a/src/include/bitcalc.h +++ b/src/include/bitcalc.h @@ -84,7 +84,7 @@ int BitCheckGeneral( ); -int GetBitGeneral( +unsigned int GetBitGeneral( const unsigned int isite, const long unsigned int org_bit, const long int *SiteToBit, diff --git a/src/include/readdef.h b/src/include/readdef.h index 5a0e19572..d60c06aae 100644 --- a/src/include/readdef.h +++ b/src/include/readdef.h @@ -82,7 +82,7 @@ int CheckInterAllHermite ( int **InterAll, double complex* ParaInterAll, - int **InterAllOffDiagonal, + unsigned int **InterAllOffDiagonal, double complex*ParaInterAllOffDiagonal, const int NInterAllOffDiagonal, const int iCalcModel @@ -102,7 +102,7 @@ int GetDiagonalInterAll const int NInterAll, int **InterAllDiagonal, double *ParaInterAllDiagonal, - int **InterAllOffDiagonal, + unsigned int **InterAllOffDiagonal, complex double *ParaInterAllOffDiagonal, int *Chemi, int *SpinChemi, diff --git a/src/include/struct.h b/src/include/struct.h index d365d18a1..87d017aaa 100644 --- a/src/include/struct.h +++ b/src/include/struct.h @@ -280,7 +280,7 @@ struct DefineList { int ***TEInterAll; /**< Index of time-dependent InterAll for Time Evolution. \n Data Format [NTE][NTEInterAll][8]: 0->site number i, 1-> spin index on i, 2-> site number j, 3-> spin index on j. 4->site number k, 5-> spin index on k, 6-> site number l, 7-> spin index on l.*/ - int ***TEInterAllOffDiagonal; /**< Index of off-diagonal part of time-dependent InterAll for Time Evolution. \n + unsigned int ***TEInterAllOffDiagonal; /**< Index of off-diagonal part of time-dependent InterAll for Time Evolution. \n Data Format [NTE][NTEInterAll][8]: 0->site number i, 1-> spin index on i, 2-> site number j, 3-> spin index on j. 4->site number k, 5-> spin index on k, 6-> site number l, 7-> spin index on l.*/ int ***TEInterAllDiagonal; /**< Index of diagonal part of time-dependent InterAll for Time Evolution. \n diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 6ab0cad34..1f50fe46b 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -931,8 +931,8 @@ firstprivate(i_max, Asum, Adiff, isite1, isite2, tmp_V, X) \ SendRecv_iv(origin, X->Check.idim_max + 1, idim_max_buf + 1, list_1, list_1buf); SendRecv_cv(origin, X->Check.idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); -#pragma omp parallel default(none) private(j,dmv,ioff,tmp_off,Fsgn,Adiff) \ -firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,org_rankbit,isite3) \ +#pragma omp parallel default(none) private(j,dmv,ioff,tmp_off,Fsgn,Adiff,org_rankbit) \ +firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,isite3) \ shared(v1buf,tmp_v1,nstate,one,tmp_v0,list_1buf,list_2_1,list_2_2,origin,org_isite3,myrank,isite1,isite2,org_isite1,org_isite2) { diff --git a/src/readdef.c b/src/readdef.c index 161c0c67f..4293d5fff 100644 --- a/src/readdef.c +++ b/src/readdef.c @@ -267,6 +267,7 @@ int ReadcalcmodFile( } if(CheckWords(ctmp, "CalcType")==0){ X->iCalcType=itmp; + if (X->iCalcType == Lanczos)X->iCalcType = CG; } else if(CheckWords(ctmp, "FlgFiniteTemperature")==0){ X->iFlgFiniteTemperature = itmp; @@ -2037,7 +2038,7 @@ int CheckInterAllHermite ( int **InterAll, double complex* ParaInterAll, - int **InterAllOffDiagonal, + unsigned int **InterAllOffDiagonal, double complex*ParaInterAllOffDiagonal, const int NInterAllOffDiagonal, const int iCalcModel @@ -2183,7 +2184,7 @@ int GetDiagonalInterAll const int NInterAll, int **InterAllDiagonal, double *ParaInterAllDiagonal, - int **InterAllOffDiagonal, + unsigned int **InterAllOffDiagonal, complex double *ParaInterAllOffDiagonal, int *Chemi, int *SpinChemi, diff --git a/src/xsetmem.c b/src/xsetmem.c index f7204dc9c..86271fddd 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -68,10 +68,10 @@ void setmem_def X->Def.ParaGeneralTransfer = cd_1d_allocate(X->Def.NTransfer); if (X->Def.iCalcType == TimeEvolution) { - X->Def.EDGeneralTransfer = i_2d_allocate(X->Def.NTransfer + X->Def.NTETransferMax, 4); + X->Def.EDGeneralTransfer = ui_2d_allocate(X->Def.NTransfer + X->Def.NTETransferMax, 4); X->Def.EDParaGeneralTransfer = cd_1d_allocate(X->Def.NTransfer + X->Def.NTETransferMax); } else { - X->Def.EDGeneralTransfer = i_2d_allocate(X->Def.NTransfer, 4); + X->Def.EDGeneralTransfer = ui_2d_allocate(X->Def.NTransfer, 4); X->Def.EDParaGeneralTransfer = cd_1d_allocate(X->Def.NTransfer); } @@ -81,9 +81,9 @@ void setmem_def X->Def.ParaCoulombInter = d_1d_allocate(X->Def.NCoulombInter + X->Def.NIsingCoupling); X->Def.HundCoupling = i_2d_allocate(X->Def.NHundCoupling + X->Def.NIsingCoupling, 2); X->Def.ParaHundCoupling = d_1d_allocate(X->Def.NHundCoupling + X->Def.NIsingCoupling); - X->Def.PairHopping = i_2d_allocate(X->Def.NPairHopping, 2); + X->Def.PairHopping = ui_2d_allocate(X->Def.NPairHopping, 2); X->Def.ParaPairHopping = d_1d_allocate(X->Def.NPairHopping); - X->Def.ExchangeCoupling = i_2d_allocate(X->Def.NExchangeCoupling, 2); + X->Def.ExchangeCoupling = ui_2d_allocate(X->Def.NExchangeCoupling, 2); X->Def.ParaExchangeCoupling = d_1d_allocate(X->Def.NExchangeCoupling); X->Def.PairLiftCoupling = i_2d_allocate(X->Def.NPairLiftCoupling, 2); X->Def.ParaPairLiftCoupling = d_1d_allocate(X->Def.NPairLiftCoupling); @@ -111,7 +111,7 @@ void setmem_def int NInterAllSet; NInterAllSet = (X->Def.iCalcType == TimeEvolution) ? X->Def.NInterAll + X->Def.NTEInterAllMax : X->Def.NInterAll; - X->Def.InterAll_OffDiagonal = i_2d_allocate(NInterAllSet, 8); + X->Def.InterAll_OffDiagonal = ui_2d_allocate(NInterAllSet, 8); X->Def.ParaInterAll_OffDiagonal = cd_1d_allocate(NInterAllSet); X->Def.InterAll_Diagonal = i_2d_allocate(NInterAllSet, 4); X->Def.ParaInterAll_Diagonal = d_1d_allocate(NInterAllSet); @@ -133,7 +133,7 @@ void setmem_def X->Def.ParaTEInterAll = cd_2d_allocate(X->Def.NTETimeSteps, X->Def.NTEInterAllMax); X->Def.ParaTEInterAllDiagonal = d_2d_allocate(X->Def.NTETimeSteps, X->Def.NTEInterAllMax); X->Def.NTEInterAllOffDiagonal = ui_1d_allocate(X->Def.NTETimeSteps); - X->Def.TEInterAllOffDiagonal = i_3d_allocate(X->Def.NTETimeSteps, X->Def.NTEInterAllMax, 8); + X->Def.TEInterAllOffDiagonal = ui_3d_allocate(X->Def.NTETimeSteps, X->Def.NTEInterAllMax, 8); X->Def.ParaTEInterAllOffDiagonal = cd_2d_allocate(X->Def.NTETimeSteps, X->Def.NTEInterAllMax); //Time-dependent Chemi generated by InterAll diagonal components X->Def.NTEChemi = ui_1d_allocate(X->Def.NTETimeSteps); diff --git a/test/te_ac_hubbard_square.sh b/test/te_ac_hubbard_square.sh index 32767c755..8adb3c186 100755 --- a/test/te_ac_hubbard_square.sh +++ b/test/te_ac_hubbard_square.sh @@ -83,12 +83,23 @@ cat > reference.dat < flct.dat paste flct.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D^2 : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz^2 : " ${diff} +test "${diff}" = "0.000000" + exit $? diff --git a/test/te_dc_hubbard_square.sh b/test/te_dc_hubbard_square.sh index 4625f734e..bf2ff4169 100755 --- a/test/te_dc_hubbard_square.sh +++ b/test/te_dc_hubbard_square.sh @@ -75,12 +75,23 @@ EOF sed -e "1d" output/Flct.dat > flct.dat paste flct.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D^2 : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz^2 : " ${diff} +test "${diff}" = "0.000000" + exit $? diff --git a/test/te_hubbard_chain_interall.sh b/test/te_hubbard_chain_interall.sh index 41086348a..d78f5b5e6 100755 --- a/test/te_hubbard_chain_interall.sh +++ b/test/te_hubbard_chain_interall.sh @@ -109,12 +109,23 @@ cat > reference.dat < flct.dat paste flct.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D^2 : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz^2 : " ${diff} +test "${diff}" = "0.000000" + exit $? diff --git a/test/te_hubbard_chain_interall_diagonal.sh b/test/te_hubbard_chain_interall_diagonal.sh index 85cdc2fe7..6db9612f2 100755 --- a/test/te_hubbard_chain_interall_diagonal.sh +++ b/test/te_hubbard_chain_interall_diagonal.sh @@ -109,12 +109,23 @@ cat > reference.dat < flct.dat paste flct.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D^2 : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz^2 : " ${diff} +test "${diff}" = "0.000000" + exit $? diff --git a/test/te_kondo_chain_interall.sh b/test/te_kondo_chain_interall.sh index 5d03bd8c4..d2623a3e1 100755 --- a/test/te_kondo_chain_interall.sh +++ b/test/te_kondo_chain_interall.sh @@ -109,12 +109,23 @@ cat > reference.dat < flct.dat paste flct.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D^2 : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz^2 : " ${diff} +test "${diff}" = "0.000000" + exit $? diff --git a/test/te_pulse_hubbard_square.sh b/test/te_pulse_hubbard_square.sh index f53dafba0..9dd736e10 100755 --- a/test/te_pulse_hubbard_square.sh +++ b/test/te_pulse_hubbard_square.sh @@ -74,12 +74,23 @@ cat > reference.dat < flct.dat paste flct.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D^2 : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz^2 : " ${diff} +test "${diff}" = "0.000000" + exit $? diff --git a/test/te_quench_hubbard_square.sh b/test/te_quench_hubbard_square.sh index ec651ff39..0a850f448 100755 --- a/test/te_quench_hubbard_square.sh +++ b/test/te_quench_hubbard_square.sh @@ -75,12 +75,23 @@ cat > reference.dat < flct.dat paste flct.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$10)*($2-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$11)*($3-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$12)*($4-$12))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$13)*($5-$13))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D^2 : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($6-$14)*($6-$14))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($7-$15)*($7-$15))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Sz^2 : " ${diff} +test "${diff}" = "0.000000" + exit $? diff --git a/test/te_spin_chain_interall.sh b/test/te_spin_chain_interall.sh index ef18870ae..811fd6f63 100755 --- a/test/te_spin_chain_interall.sh +++ b/test/te_spin_chain_interall.sh @@ -109,10 +109,17 @@ cat > reference.dat < ss.dat paste ss.dat reference.dat > paste1.dat -diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$8)*($2-$8))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($3-$9)*($3-$9))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($4-$10)*($4-$10))} END{printf "%8.6f", diff}' paste1.dat` -diff=`awk 'BEGIN{diff='${diff}'} {diff+=sqrt(($5-$11)*($5-$11))} END{printf "%8.6f", diff}' paste1.dat` - +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($2-$8)*($2-$8))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff Energy : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($3-$9)*($3-$9))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff E^2 : " ${diff} +test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($4-$10)*($4-$10))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff D : " ${diff} test "${diff}" = "0.000000" +diff=`awk 'BEGIN{diff=0.0} {diff+=sqrt(($5-$11)*($5-$11))} END{printf "%8.6f", diff/NR}' paste1.dat` +echo "Diff N : " ${diff} +test "${diff}" = "0.000000" + exit $? From f10f5ff51275539622d4399e5582df4a25e04427 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Wed, 27 Mar 2019 16:04:48 +0900 Subject: [PATCH 25/50] Bagfix --- src/CalcByLOBPCG.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index c4a139c0d..63d41f504 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -374,8 +374,8 @@ int LOBPCG_Main( i_max = X->Check.idim_max; i4_max = (int)i_max; - free(v0); - free(v1); + free_cd_2d_allocate(v0); + free_cd_2d_allocate(v1); wxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct); hwxp = cd_3d_allocate(3, X->Check.idim_max + 1, X->Def.k_exct); /**@brief From 5b104c3c9c58d22f20e159629eadf7f36a7851a2 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Fri, 29 Mar 2019 00:13:56 +0900 Subject: [PATCH 26/50] Backup --- tool/dynamicalr2k.F90 | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tool/dynamicalr2k.F90 b/tool/dynamicalr2k.F90 index 9975670f8..372e8134b 100644 --- a/tool/dynamicalr2k.F90 +++ b/tool/dynamicalr2k.F90 @@ -67,6 +67,7 @@ SUBROUTINE read_filename() ! INTEGER :: fi = 10 CHARACTER(256) :: modpara, keyname, namelist + REAL(8) :: eig0 ! WRITE(*,*) WRITE(*,*) "##### Read HPhi Input Files #####" @@ -137,6 +138,14 @@ SUBROUTINE read_filename() ! filehead = "output/" // TRIM(ADJUSTL(filehead)) ! + OPEN(fi,file = TRIM(filehead)//"_energy.dat") + READ(fi,*) keyname + READ(fi,*) keyname, eig0 + CLOSE(fi) + WRITE(*,*) " Minimum energy : ", eig0 + omegamin = omegamin - eig0 + omegamax = omegamax - eig0 + ! END SUBROUTINE read_filename ! ! Read geometry from file @@ -460,9 +469,9 @@ SUBROUTINE output_cor() ! OPEN(fo, file = TRIM(filehead) // "_dyn.dat") ! - DO iomega = 1, nomega - omega = (omegamax - omegamin) * DBLE(iomega - 1) / DBLE(nomega) + omegamin - DO ik = 1, ikk + DO ik = 1, ikk + DO iomega = 1, nomega + omega = (omegamax - omegamin) * DBLE(iomega - 1) / DBLE(nomega) + omegamin WRITE(fo,'(1000e15.5)') xk(ik), omega, cor_k(ik, 1:norb, iomega) END DO WRITE(fo,*) From 3a11864a841e83ee2004be8c5e0a0847d3ddfd4b Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Fri, 29 Mar 2019 23:56:57 +0900 Subject: [PATCH 27/50] BagFix : When building without MPI, the v1buf was not allocated while it is used in LOBPCG. --- src/CalcSpectrum.c | 24 ++++++++++---- src/PairExHubbard.c | 22 +++---------- src/PairExSpin.c | 32 +++--------------- src/SingleExHubbard.c | 37 ++++++--------------- src/include/mltplyMPIHubbardCore.h | 32 ++++-------------- src/include/mltplyMPISpinCore.h | 13 +++----- src/include/mltplySpinCore.h | 3 -- src/mltplyMPIHubbard.c | 11 ++----- src/mltplyMPIHubbardCore.c | 52 ++++++++++++------------------ src/mltplyMPISpinCore.c | 17 ++++------ src/mltplySpinCore.c | 7 ++-- src/wrapperMPI.c | 7 ++-- src/xsetmem.c | 16 ++++----- 13 files changed, 85 insertions(+), 188 deletions(-) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index 2659d93c9..ab7cc545d 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -182,13 +182,12 @@ int MakeExcitedList( if (GetlistSize(X) == TRUE) { list_1_org = lui_1d_allocate(X->Check.idim_max + 1); #ifdef MPI - list_1buf_org = lui_1d_allocate(X->Check.idim_maxMPI + 1); - //lui_malloc1(list_1buf_org, X->Check.idim_maxMPI + 1); + unsigned long int MAXidim_max; + MAXidim_max = MaxMPI_li(X->Check.idim_max); + list_1buf_org = lui_1d_allocate(MAXidim_max + 1); #endif // MPI list_2_1_org = lui_1d_allocate(X->Large.SizeOflist_2_1); list_2_2_org = lui_1d_allocate(X->Large.SizeOflist_2_2); - //lui_malloc1(list_2_1_org, X->Large.SizeOflist_2_1); - //lui_malloc1(list_2_2_org, X->Large.SizeOflist_2_2); if (list_1_org == NULL || list_2_1_org == NULL || list_2_2_org == NULL @@ -313,6 +312,15 @@ int MakeExcitedList( if (sz(X, list_1, list_2_1, list_2_2) != 0) { return FALSE; } +#ifdef MPI + unsigned long int MAXidim_max, MAXidim_maxOrg; + MAXidim_max = MaxMPI_li(X->Check.idim_max); + MAXidim_maxOrg = MaxMPI_li(X->Check.idim_maxOrg); + if (MAXidim_max < MAXidim_maxOrg) { + free_cd_2d_allocate(v1buf); + v1buf = cd_2d_allocate(MAXidim_maxOrg + 1, 1); + } +#endif // MPI if (X->Def.iCalcModel == HubbardNConserved) { X->Def.iCalcModel = Hubbard; @@ -435,9 +443,11 @@ int CalcSpectrum( X->Bind.Def.Ne = X->Bind.Def.NeMPI; X->Bind.Def.Nup = X->Bind.Def.NupMPI; X->Bind.Def.Ndown = X->Bind.Def.NdownMPI; - free_lui_1d_allocate(list_1); - free_lui_1d_allocate(list_2_1); - free_lui_1d_allocate(list_2_2); + if (GetlistSize(&(X->Bind)) == TRUE) { + free_lui_1d_allocate(list_1); + free_lui_1d_allocate(list_2_1); + free_lui_1d_allocate(list_2_2); + } free_d_1d_allocate(list_Diagonal); free_cd_2d_allocate(v0); v1Org = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index 521017fdb..1ed20d3e4 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -136,7 +136,7 @@ int GetPairExcitedStateHubbard( double complex **tmp_v1, /**< [in] v0 = H v1*/ int iEx ) { - long unsigned int i, j, idim_maxMPI; + long unsigned int i, j; long unsigned int irght, ilft, ihfbit; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; @@ -158,14 +158,6 @@ int GetPairExcitedStateHubbard( X->Large.ilft = ilft; X->Large.ihfbit = ihfbit; X->Large.mode = M_CALCSPEC; - // X->Large.mode = M_MLTPLY; - - double complex **tmp_v1bufOrg; - //set size -#ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); -#endif // MPI for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; @@ -185,21 +177,18 @@ int GetPairExcitedStateHubbard( org_isite2 > X->Def.Nsite) { X_child_CisAjt_MPIdouble(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, - -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, - list_1_org, list_1buf_org, list_2_1, list_2_2); + -tmp_trans, X, nstate, tmp_v0, tmp_v1); } else if (org_isite2 > X->Def.Nsite || org_isite1 > X->Def.Nsite) { if (org_isite1 < org_isite2) { X_child_CisAjt_MPIsingle(org_isite1 - 1, org_sigma1, org_isite2 - 1, org_sigma2, - -tmp_trans, X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, - list_1_org, list_1buf_org, list_2_1, list_2_2); + -tmp_trans, X, nstate, tmp_v0, tmp_v1); } else { X_child_CisAjt_MPIsingle(org_isite2 - 1, org_sigma2, org_isite1 - 1, org_sigma1, - -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, - list_1_org, list_1buf_org, list_2_1, list_2_2); + -conj(tmp_trans), X, nstate, tmp_v0, tmp_v1); } } else { @@ -283,8 +272,5 @@ firstprivate(i_max,is,tmp_trans) private(num1,ibit,dmv) } } } -#ifdef MPI - free_cd_2d_allocate(tmp_v1bufOrg); -#endif // MPI return TRUE; } diff --git a/src/PairExSpin.c b/src/PairExSpin.c index 8620808b5..072a2bda0 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -245,7 +245,7 @@ int GetPairExcitedStateHalfSpin( int iEx ) { - long unsigned int i, j, idim_maxMPI; + long unsigned int i, j; long unsigned int isite1; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; @@ -257,13 +257,6 @@ int GetPairExcitedStateHalfSpin( i_max = X->Check.idim_maxOrg; - double complex **tmp_v1bufOrg; - //set size -#ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); -#endif // MPI - for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; @@ -323,8 +316,7 @@ firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstat else { //org_sigma1 != org_sigma2 // for the canonical case if (org_isite1 > X->Def.Nsite) {//For MPI X_child_CisAit_spin_MPIdouble(org_isite1 - 1, org_sigma2, tmp_trans, - X, nstate, tmp_v0, tmp_v1, i_max, - list_1_org, list_1buf_org, list_2_1, list_2_2); + X, nstate, tmp_v0, tmp_v1, i_max); } else { isite1 = X->Def.Tpow[org_isite1 - 1]; @@ -332,7 +324,7 @@ firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstat firstprivate(i_max,isite1,org_sigma2,X,tmp_trans,list_1_org,list_1,list_2_1,list_2_2) \ shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { - num1 = X_Spin_CisAit(j, X, isite1, org_sigma2, list_1_org, list_2_1, list_2_2, &tmp_off); + num1 = X_Spin_CisAit(j, X, isite1, org_sigma2, &tmp_off); if (num1 != 0) { dmv = tmp_trans*(double)num1; zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off], &one); @@ -341,9 +333,6 @@ shared(tmp_v0,tmp_v1,one,nstate) } } } -#ifdef MPI - free_cd_2d_allocate(tmp_v1bufOrg); -#endif return TRUE; } /// Calculation of pair excited state for general Spin canonical system @@ -362,7 +351,7 @@ int GetPairExcitedStateGeneralSpin( int iEx ) { - long unsigned int i, j, idim_maxMPI; + long unsigned int i, j; long unsigned int org_isite1, org_isite2, org_sigma1, org_sigma2; long unsigned int tmp_off = 0; long unsigned int off = 0; @@ -371,13 +360,6 @@ int GetPairExcitedStateGeneralSpin( int tmp_sgn, num1, one = 1; i_max = X->Check.idim_maxOrg; - double complex **tmp_v1bufOrg; - //set size -#ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); -#endif // MPI - for (i = 0; i < X->Def.NPairExcitationOperator[iEx]; i++) { org_isite1 = X->Def.PairExcitationOperator[iEx][i][0] + 1; org_isite2 = X->Def.PairExcitationOperator[iEx][i][2] + 1; @@ -413,8 +395,7 @@ int GetPairExcitedStateGeneralSpin( }//org_sigma1=org_sigma2 else {//org_sigma1 != org_sigma2 X_child_CisAit_GeneralSpin_MPIdouble(org_isite1 - 1, org_sigma1, org_sigma2, - tmp_trans, X, nstate, tmp_v0, tmp_v1, - i_max, list_1_org, list_1buf_org); + tmp_trans, X, nstate, tmp_v0, tmp_v1, i_max); } } else {//org_isite1 <= X->Def.Nsite @@ -461,9 +442,6 @@ int GetPairExcitedStateGeneralSpin( return FALSE; }//org_isite1 != org_isite2 } -#ifdef MPI - free_cd_2d_allocate(tmp_v1bufOrg); -#endif // MPI return TRUE; } diff --git a/src/SingleExHubbard.c b/src/SingleExHubbard.c index 37701a74f..6334cb735 100644 --- a/src/SingleExHubbard.c +++ b/src/SingleExHubbard.c @@ -39,7 +39,7 @@ int GetSingleExcitedStateHubbard( double complex **tmp_v1,//!<[in] v0 = H v1 int iEx ) { - long unsigned int idim_max, idim_maxMPI; + long unsigned int idim_max; long unsigned int i, j; long unsigned int org_isite, ispin, itype; long unsigned int is1_spin; @@ -50,13 +50,7 @@ int GetSingleExcitedStateHubbard( if (X->Def.NSingleExcitationOperator[iEx] == 0) { return TRUE; } - double complex **tmp_v1bufOrg; - //set size -#ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1,nstate); -#endif // MPI - + idim_max = X->Check.idim_maxOrg; for (i = 0; i < X->Def.NSingleExcitationOperator[iEx]; i++) { org_isite = X->Def.SingleExcitationOperator[iEx][i][0]; @@ -66,8 +60,8 @@ int GetSingleExcitedStateHubbard( is1_spin = X->Def.Tpow[2 * org_isite + ispin]; if (itype == 1) { if (org_isite >= X->Def.Nsite) { - X_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, idim_max, - X->Def.Tpow, list_1_org, list_1buf_org, list_2_1, list_2_2, + X_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, idim_max, + X->Def.Tpow, X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { @@ -84,9 +78,8 @@ private(j, isgn,tmp_off,dmv) } else if (itype == 0) { if (org_isite >= X->Def.Nsite) { - X_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, tmp_v1bufOrg, - idim_max, X->Def.Tpow, list_1_org, list_1buf_org, - list_2_1, list_2_2, X->Large.irght, X->Large.ilft, X->Large.ihfbit); + X_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, + idim_max, X->Def.Tpow, X->Large.irght, X->Large.ilft, X->Large.ihfbit); } else { #pragma omp parallel for default(none) shared(tmp_v0,tmp_v1,X,list_1_org,list_1,one,nstate) \ @@ -101,9 +94,6 @@ private(j, isgn, tmp_off,dmv) } } } -#ifdef MPI - free_cd_2d_allocate(tmp_v1bufOrg); -#endif return TRUE; }/*int GetSingleExcitedStateHubbard*/ /** @@ -120,7 +110,7 @@ int GetSingleExcitedStateHubbardGC( double complex **tmp_v1,//!<[in] v0 = H v1 int iEx ) { - long unsigned int idim_max, idim_maxMPI; + long unsigned int idim_max; long unsigned int i, j; long unsigned int org_isite, ispin, itype; long unsigned int is1_spin; @@ -132,12 +122,6 @@ int GetSingleExcitedStateHubbardGC( if (X->Def.NSingleExcitationOperator[iEx] == 0) { return TRUE; } - double complex **tmp_v1bufOrg; - //set size -#ifdef MPI - idim_maxMPI = MaxMPI_li(X->Check.idim_maxOrg); - tmp_v1bufOrg = cd_2d_allocate(idim_maxMPI + 1, nstate); -#endif // MPI // SingleEx for (i = 0; i < X->Def.NSingleExcitationOperator[iEx]; i++) { @@ -148,7 +132,7 @@ int GetSingleExcitedStateHubbardGC( if (itype == 1) { if (org_isite >= X->Def.Nsite) { X_GC_Cis_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, - idim_max, tmp_v1bufOrg, X->Def.Tpow); + idim_max, X->Def.Tpow); } else { #pragma omp parallel for default(none) shared(tmp_v0,tmp_v1,X,nstate) \ @@ -162,7 +146,7 @@ firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) else if (itype == 0) { if (org_isite >= X->Def.Nsite) { X_GC_Ajt_MPI(org_isite, ispin, tmpphi, nstate, tmp_v0, tmp_v1, - idim_max, tmp_v1bufOrg, X->Def.Tpow); + idim_max, X->Def.Tpow); } else { #pragma omp parallel for default(none) shared(tmp_v0,tmp_v1,X,nstate) \ @@ -174,8 +158,5 @@ firstprivate(idim_max, tmpphi, org_isite, ispin) private(j, is1_spin, tmp_off) } } } -#ifdef MPI - free_cd_2d_allocate(tmp_v1bufOrg); -#endif return TRUE; }/*int GetSingleExcitedStateHubbardGC*/ diff --git a/src/include/mltplyMPIHubbardCore.h b/src/include/mltplyMPIHubbardCore.h index 210e772d8..6868cf29e 100644 --- a/src/include/mltplyMPIHubbardCore.h +++ b/src/include/mltplyMPIHubbardCore.h @@ -227,13 +227,9 @@ void X_child_CisAjt_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X, - int nstate, double complex **tmp_v0, - double complex **tmp_v1, - double complex **v1buf, - long unsigned int *list_1_org, - long unsigned int *list_1buf_org, - long unsigned int *list_2_1_target, - long unsigned int *list_2_2_target + int nstate, + double complex **tmp_v0, + double complex **tmp_v1 ); void X_child_CisAjt_MPIsingle @@ -245,12 +241,7 @@ void X_child_CisAjt_MPIsingle double complex tmp_trans, struct BindStruct *X, int nstate, double complex **tmp_v0, - double complex **tmp_v1, - double complex **v1buf, - long unsigned int *list_1_org, - long unsigned int *list_1buf_org, - long unsigned int *list_2_1_target, - long unsigned int *list_2_2_target + double complex **tmp_v1 ); @@ -262,7 +253,6 @@ void X_GC_Cis_MPI int nstate, double complex **tmp_v0, double complex **tmp_v1, unsigned long int idim_max, - double complex **tmp_v1buf, unsigned long int *Tpow ); @@ -271,10 +261,10 @@ void X_GC_Ajt_MPI int org_isite, int org_ispin, double complex tmp_trans, - int nstate, double complex **tmp_v0, + int nstate, + double complex **tmp_v0, double complex **tmp_v1, unsigned long int idim_max, - double complex **tmp_v1buf, long unsigned int *Tpow ); @@ -285,13 +275,8 @@ void X_Cis_MPI double complex tmp_trans, int nstate, double complex **tmp_v0, double complex **tmp_v1, - double complex **tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, - long unsigned int *list_1_org, - long unsigned int *list_1buf_org, - long unsigned int *list_2_1_target, - long unsigned int *list_2_2_target, long unsigned int _irght, long unsigned int _ilft, long unsigned int _ihfbit @@ -304,13 +289,8 @@ void X_Ajt_MPI double complex tmp_trans, int nstate, double complex **tmp_v0, double complex **tmp_v1, - double complex **tmp_v1buf, unsigned long int idim_max, long unsigned int *Tpow, - long unsigned int *list_1_org, - long unsigned int *list_1buf_org, - long unsigned int *list_2_1_target, - long unsigned int *list_2_2_target, long unsigned int _irght, long unsigned int _ilft, long unsigned int _ihfbit diff --git a/src/include/mltplyMPISpinCore.h b/src/include/mltplyMPISpinCore.h index ba27ca8b6..ec4bd5014 100644 --- a/src/include/mltplyMPISpinCore.h +++ b/src/include/mltplyMPISpinCore.h @@ -166,9 +166,7 @@ void X_child_CisAit_GeneralSpin_MPIdouble struct BindStruct *X, int nstate, double complex **tmp_v0, double complex **tmp_v1, - unsigned long int idim_max, - long unsigned int *list_1_org, - long unsigned int *list_1buf_org + unsigned long int idim_max ); @@ -324,13 +322,10 @@ void X_child_CisAit_spin_MPIdouble int org_ispin2, double complex tmp_trans, struct BindStruct *X /**< [inout]*/, - int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + int nstate, + double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1, /**< [in] v0 = H v1*/ - unsigned long int idim_max, - long unsigned int *list_1_org, - long unsigned int *list_1buf_org, - long unsigned int *list_2_1_target, - long unsigned int *list_2_2_target + unsigned long int idim_max ); void X_child_CisAisCjuAju_GeneralSpin_MPIdouble diff --git a/src/include/mltplySpinCore.h b/src/include/mltplySpinCore.h index e17350266..28fab9ddd 100644 --- a/src/include/mltplySpinCore.h +++ b/src/include/mltplySpinCore.h @@ -212,9 +212,6 @@ int X_Spin_CisAit( struct BindStruct *X, long unsigned int is1_spin, long unsigned int sigma2, - long unsigned int *list_1_Org_, - long unsigned int *list_2_1_, - long unsigned int *list_2_2_, long unsigned int *tmp_off ); diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index 08d359296..f390132f5 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -97,12 +97,7 @@ void X_child_CisAjt_MPIdouble( double complex tmp_trans,//!<[in] Transfer @f$t@f$ struct BindStruct *X,//!< [inout] int nstate, double complex **tmp_v0,//!< [out] Result v0 = H v1 - double complex **tmp_v1,//!< [in] v0 = H v1 - double complex **v1buf,//!<[in] - long unsigned int *list_1_org,//!<[in] - long unsigned int *list_1buf_org,//!<[in] - long unsigned int *list_2_1_target,//!<[in] - long unsigned int *list_2_2_target//!<[in] + double complex **tmp_v1//!< [in] v0 = H v1 ) { int mask1, mask2, state1, state2, origin, bitdiff, Fsgn; unsigned long int idim_max_buf, j, ioff; @@ -136,10 +131,10 @@ void X_child_CisAjt_MPIdouble( SendRecv_cv(origin, X->Check.idim_maxOrg*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel for default(none) private(j, ioff) \ - firstprivate(idim_max_buf, trans, X, list_2_1_target, list_2_2_target, list_1buf_org) \ + firstprivate(idim_max_buf, trans, X, list_2_1, list_2_2, list_1buf_org) \ shared(v1buf, tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], + GetOffComp(list_2_1, list_2_2, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &ioff); zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 1f50fe46b..ba41a0949 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -1003,7 +1003,8 @@ void X_child_CisAis_Hubbard_MPI( int org_ispin1,//!<[in] Spin 1 double complex tmp_V,//!<[in] Coupling constant struct BindStruct *X,//!<[inout] - int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction + int nstate, + double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { unsigned long int i_max = X->Check.idim_max; @@ -1041,10 +1042,10 @@ void X_GC_Cis_MPI( int org_isite,//!<[in] Site i int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant//!<[in] - int nstate, double complex **tmp_v0,//!<[out] Result v0 += H v1*/, + int nstate, + double complex **tmp_v0,//!<[out] Result v0 += H v1*/, double complex **tmp_v1,//!<[in] v0 += H v1*/, unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max - double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { int mask2, state2, origin, bit2diff, Fsgn; @@ -1066,7 +1067,7 @@ void X_GC_Cis_MPI( SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign idim_max_buf = SendRecv_i(origin, idim_max); - SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); if (state2 == mask2) { trans = 0; @@ -1076,7 +1077,7 @@ void X_GC_Cis_MPI( } else return; - zaxpy_long(idim_max_buf*nstate, trans, &tmp_v1buf[1][0], &tmp_v0[1][0]); + zaxpy_long(idim_max_buf*nstate, trans, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_Cis_MPI*/ /** @brief Single creation/annihilation operator @@ -1089,10 +1090,10 @@ void X_GC_Ajt_MPI( int org_isite,//!<[in] Site j int org_ispin,//!<[in] Spin t double complex tmp_trans,//!<[in] Coupling constant//!<[in] - int nstate, double complex **tmp_v0,//!<[out] Result v0 += H v1*/, + int nstate, + double complex **tmp_v0,//!<[out] Result v0 += H v1*/, double complex **tmp_v1,//!<[in] v0 += H v1*/, unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max - double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int *Tpow//!<[in] Similar to DefineList::Tpow ) { int mask2, state2, origin, bit2diff, Fsgn; @@ -1114,14 +1115,13 @@ void X_GC_Ajt_MPI( SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign idim_max_buf = SendRecv_i(origin, idim_max); - - SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); if ( state2 == 0 ) trans = 0; else if (state2 == mask2) trans = (double)Fsgn * tmp_trans; else return; - zaxpy_long(idim_max_buf*nstate, trans, &tmp_v1buf[1][0], &tmp_v0[1][0]); + zaxpy_long(idim_max_buf*nstate, trans, &v1buf[1][0], &tmp_v0[1][0]); }/*double complex X_GC_Ajt_MPI*/ /** @brief Compute @f$c_{is}^\dagger@f$ @@ -1134,13 +1134,8 @@ void X_Cis_MPI( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[inout] Initial wavefunction - double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max long unsigned int *Tpow,//!<[in] Similar to DefineList::Tpow - long unsigned int *list_1_org,//!<[in] Similar to ::list_1 - long unsigned int *list_1buf_org,//!<[in] Similar to ::list_1buf - long unsigned int *list_2_1_target,//!<[in] Similar to ::list_2_1 - long unsigned int *list_2_2_target,//!<[in] Similar to ::list_2_2 long unsigned int _irght,//!<[in] Similer to LargeList::irght long unsigned int _ilft,//!<[in] Similer to LargeList::ilft long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit @@ -1164,10 +1159,8 @@ void X_Cis_MPI( SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign idim_max_buf = SendRecv_i(origin, idim_max); - SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org); - - SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); if (state2 == mask2) { trans = 0; @@ -1178,12 +1171,12 @@ void X_Cis_MPI( else return; #pragma omp parallel for default(none) private(j) \ -firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ - shared(tmp_v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) +firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1, list_2_2) \ + shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) {//idim_max_buf -> original - GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], + GetOffComp(list_2_1, list_2_2, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); - zaxpy_(&nstate, &trans, &tmp_v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); }/*for (j = 1; j <= idim_max_buf; j++)*/ }/*double complex X_GC_Cis_MPI*/ /** @@ -1196,13 +1189,8 @@ void X_Ajt_MPI( double complex tmp_trans,//!<[in] Coupling constant int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[inout] Initial wavefunction - double complex **tmp_v1buf,//!<[in] buffer for wavefunction unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max long unsigned int *Tpow,//!<[in] Similar to DefineList::Tpow - long unsigned int *list_1_org,//!<[in] Similar to ::list_1 - long unsigned int *list_1buf_org,//!<[in] Similar to ::list_1buf - long unsigned int *list_2_1_target,//!<[in] Similar to ::list_2_1 - long unsigned int *list_2_2_target,//!<[in] Similar to ::list_2_2 long unsigned int _irght,//!<[in] Similer to LargeList::irght long unsigned int _ilft,//!<[in] Similer to LargeList::ilft long unsigned int _ihfbit//!<[in] Similer to LargeList::ihfbit @@ -1226,7 +1214,7 @@ void X_Ajt_MPI( SgnBit((unsigned long int) (bit2diff), &Fsgn); // Fermion sign idim_max_buf = SendRecv_i(origin, idim_max); SendRecv_iv(origin, idim_max + 1, idim_max_buf + 1, list_1_org, list_1buf_org); - SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &tmp_v1buf[1][0]); + SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); if (state2 == 0) { trans = 0; @@ -1237,11 +1225,11 @@ void X_Ajt_MPI( else return; #pragma omp parallel for default(none) private(j) \ -firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1_target, list_2_2_target) \ - shared(tmp_v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) +firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1, list_2_2) \ + shared(v1buf, tmp_v1, nstate,one, tmp_v0, list_1buf_org) for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], + GetOffComp(list_2_1, list_2_2, list_1buf_org[j], _irght, _ilft, _ihfbit, &ioff); - zaxpy_(&nstate, &trans, &tmp_v1buf[j][0], &one, &tmp_v0[ioff][0], &one); + zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[ioff][0], &one); } }/*double complex X_Ajt_MPI*/ diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 394c06e0b..4f8659cc3 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -850,9 +850,7 @@ void X_child_CisAit_GeneralSpin_MPIdouble( int nstate, double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[in] Input wavefunction - unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max - long unsigned int *list_1_org,//!<[in] Similar to ::list_1 - long unsigned int *list_1buf_org//!<[in] Similar to ::list_1buf + unsigned long int idim_max//!<[in] Similar to CheckList::idim_max ) { unsigned long int off, j, tmp_off,idim_max_buf; @@ -1344,13 +1342,10 @@ void X_child_CisAit_spin_MPIdouble( int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant struct BindStruct *X /**< [inout]*/, - int nstate, double complex **tmp_v0 /**< [out] Result v0 = H v1*/, + int nstate, + double complex **tmp_v0 /**< [out] Result v0 = H v1*/, double complex **tmp_v1, /**< [in] v0 = H v1*/ - unsigned long int idim_max,//!<[in] Similar to CheckList::idim_max - long unsigned int *list_1_org,//!<[in] Similar to ::list_1 - long unsigned int *list_1buf_org,//!<[in] Similar to ::list_1buf - long unsigned int *list_2_1_target,//!<[in] Similar to ::list_2_1 - long unsigned int *list_2_2_target//!<[in] Similar to ::list_2_2 + unsigned long int idim_max//!<[in] Similar to CheckList::idim_max ){ int mask1, state1, origin, one = 1; unsigned long int idim_max_buf, j; @@ -1375,10 +1370,10 @@ void X_child_CisAit_spin_MPIdouble( SendRecv_cv(origin, idim_max*nstate, idim_max_buf*nstate, &tmp_v1[1][0], &v1buf[1][0]); #pragma omp parallel for default(none) private(j, tmp_off) \ -firstprivate(idim_max_buf, trans, X, list_1buf_org, list_2_1_target, list_2_2_target) \ +firstprivate(idim_max_buf, trans, X, list_1buf_org, list_2_1, list_2_2) \ shared(v1buf, tmp_v0,nstate,one) for (j = 1; j <= idim_max_buf; j++) { - GetOffComp(list_2_1_target, list_2_2_target, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &tmp_off); + GetOffComp(list_2_1, list_2_2, list_1buf_org[j], X->Large.irght, X->Large.ilft, X->Large.ihfbit, &tmp_off); zaxpy_(&nstate, &trans, &v1buf[j][0], &one, &tmp_v0[tmp_off][0], &one); } }/*double complex X_child_CisAit_spin_MPIdouble*/ diff --git a/src/mltplySpinCore.c b/src/mltplySpinCore.c index a54a8a8d8..a16e05986 100644 --- a/src/mltplySpinCore.c +++ b/src/mltplySpinCore.c @@ -140,16 +140,13 @@ int X_Spin_CisAit( struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Bit mask for computing spin state long unsigned int sigma2,//!<[in] Spin state at site 2 - long unsigned int *list_1_Org_,//!<[in] Similar to ::list_1 - long unsigned int *list_2_1_,//!<[in] Similar to ::list_2_1 - long unsigned int *list_2_2_,//!<[in] Similar to ::list_2_2 long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { long unsigned int list_1_j; long unsigned int off; - list_1_j = list_1_Org_[j]; + list_1_j = list_1_org[j]; if (X_SpinGC_CisAit(list_1_j + 1, X, is1_spin, sigma2, &off) != 0) { - GetOffComp(list_2_1_, list_2_2_, off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, tmp_off); + GetOffComp(list_2_1, list_2_2, off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, tmp_off); return 1; } else { diff --git a/src/wrapperMPI.c b/src/wrapperMPI.c index 0ac592b5e..2c54dfb3e 100644 --- a/src/wrapperMPI.c +++ b/src/wrapperMPI.c @@ -41,9 +41,8 @@ Number of threads (::nthreads), and pointer to the standard output @author Mitsuaki Kawamura (The University of Tokyo) */ void InitializeMPI(int argc, char *argv[]){ - int ierr; - #ifdef MPI + int ierr; ierr = MPI_Init(&argc, &argv); ierr = MPI_Comm_size(MPI_COMM_WORLD, &nproc); ierr = MPI_Comm_rank(MPI_COMM_WORLD, &myrank); @@ -72,8 +71,8 @@ void InitializeMPI(int argc, char *argv[]){ @author Mitsuaki Kawamura (The University of Tokyo) */ void FinalizeMPI(){ - int ierr; #ifdef MPI + int ierr; ierr = MPI_Finalize(); if (ierr != 0) fprintf(stderr, "\n MPI_Finalize() = %d\n\n", ierr); #endif @@ -87,10 +86,10 @@ void exitMPI( int errorcode//!<[in] Error-code to be returned as that of this program ) { - int ierr; fflush(stdout); #ifdef MPI fprintf(stdout,"\n\n ####### [HPhi] You DO NOT have to WORRY about the following MPI-ERROR MESSAGE. #######\n\n"); + int ierr; ierr = MPI_Abort(MPI_COMM_WORLD, errorcode); ierr = MPI_Finalize(); if (ierr != 0) fprintf(stderr, "\n MPI_Finalize() = %d\n\n", ierr); diff --git a/src/xsetmem.c b/src/xsetmem.c index 86271fddd..129ca9d6b 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -155,15 +155,9 @@ int setmem_large struct BindStruct *X ) { int nstate; - unsigned long int idim_maxMPI; - - idim_maxMPI = MaxMPI_li(X->Check.idim_max); if (GetlistSize(X) == TRUE) { list_1 = lui_1d_allocate(X->Check.idim_max + 1); -#ifdef MPI - list_1buf = lui_1d_allocate(idim_maxMPI + 1); -#endif // MPI list_2_1 = lui_1d_allocate(X->Large.SizeOflist_2_1); list_2_2 = lui_1d_allocate(X->Large.SizeOflist_2_2); if (list_1 == NULL @@ -191,7 +185,12 @@ int setmem_large v0 = cd_2d_allocate(X->Check.idim_max + 1, nstate); v1 = cd_2d_allocate(X->Check.idim_max + 1, nstate); #ifdef MPI - v1buf = cd_2d_allocate(idim_maxMPI + 1, nstate); + unsigned long int MAXidim_max; + MAXidim_max = MaxMPI_li(X->Check.idim_max); + if (GetlistSize(X) == TRUE) list_1buf = lui_1d_allocate(MAXidim_max + 1); + v1buf = cd_2d_allocate(MAXidim_max + 1, nstate); +#else + if (X->Def.iCalcType == CG) v1buf = cd_2d_allocate(X->Check.idim_max + 1, nstate); #endif // MPI X->Phys.num_down = d_1d_allocate(nstate); @@ -242,9 +241,6 @@ void setmem_IntAll_Diagonal( int GetlistSize( struct BindStruct *X ) { - // unsigned int idim_maxMPI; - -// idim_maxMPI = MaxMPI_li(X->Check.idim_max); switch (X->Def.iCalcModel) { case Spin: case Hubbard: From 26daa11d734f3387d01d0b748a0cb27897707d65 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sat, 30 Mar 2019 23:28:12 +0900 Subject: [PATCH 28/50] Backup --- tool/wout2geom.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tool/wout2geom.sh b/tool/wout2geom.sh index 0ebbe0533..18e51c1ba 100644 --- a/tool/wout2geom.sh +++ b/tool/wout2geom.sh @@ -1,6 +1,6 @@ #!/bin/bash -grep -A 3 " Lattice Vectors " boron.wout | awk 'NR>1{print $2, $3, $4}' +grep -A 3 " Lattice Vectors " $1 | awk 'NR>1{print $2, $3, $4}' declare -a b0=`grep " b_1 " ${1} | awk '{print "("$2, $3, $4")"}'` declare -a b1=`grep " b_2 " ${1} | awk '{print "("$2, $3, $4")"}'` declare -a b2=`grep " b_3 " ${1} | awk '{print "("$2, $3, $4")"}'` From e001f0a3d75e65afb816441021fa7a4591873753 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sun, 31 Mar 2019 01:07:36 +0900 Subject: [PATCH 29/50] Bug fix in the third nearest of square lattice --- src/StdFace/SquareLattice.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/StdFace/SquareLattice.c b/src/StdFace/SquareLattice.c index 8d15c102f..f9b90963d 100644 --- a/src/StdFace/SquareLattice.c +++ b/src/StdFace/SquareLattice.c @@ -249,7 +249,7 @@ void StdFace_Tetragonal(struct StdIntList *StdI) StdFace_Coulomb(StdI, StdI->V1p, isite, jsite); }/*if (model != "spin")*/ /* - Nearest neighbor along 2W + Third Nearest neighbor along 2W */ StdFace_SetLabel(StdI, fp, iW, iL, 2, 0, 0, 0, &isite, &jsite, 3, &Cphase, dR); /**/ @@ -261,9 +261,9 @@ void StdFace_Tetragonal(struct StdIntList *StdI) StdFace_Coulomb(StdI, StdI->V0pp, isite, jsite); } /* - Nearest neighbor along L + Third Nearest neighbor along L */ - StdFace_SetLabel(StdI, fp, iW, iL, 0, 1, 0, 0, &isite, &jsite, 3, &Cphase, dR); + StdFace_SetLabel(StdI, fp, iW, iL, 0, 2, 0, 0, &isite, &jsite, 3, &Cphase, dR); /**/ if (strcmp(StdI->model, "spin") == 0) { StdFace_GeneralJ(StdI, StdI->J1pp, StdI->S2, StdI->S2, isite, jsite); From b0b95691e7cbfd605d05ff3d2c9f97758e8e4e3c Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sun, 31 Mar 2019 02:19:47 +0900 Subject: [PATCH 30/50] Add sample for the dinamical green's function with Fourier trans. --- samples/Spectrum/Scratch/README.md | 19 +++++++++++++++++++ samples/Spectrum/Scratch/stan.in | 11 +++++++++++ test/fulldiag_genspin_ladder.sh | 6 +++++- test/fulldiag_genspingc_ladder.sh | 6 +++++- test/fulldiag_hubbard_chain.sh | 6 +++++- test/fulldiag_hubbardgc_tri.sh | 6 +++++- test/fulldiag_kondo_chain.sh | 7 ++++++- test/fulldiag_kondogc_chain.sh | 6 +++++- test/fulldiag_spin_tri.sh | 6 +++++- test/fulldiag_spingc_tri.sh | 6 +++++- tool/dynamicalr2k.F90 | 5 ++++- 11 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 samples/Spectrum/Scratch/README.md create mode 100644 samples/Spectrum/Scratch/stan.in diff --git a/samples/Spectrum/Scratch/README.md b/samples/Spectrum/Scratch/README.md new file mode 100644 index 000000000..b2535b249 --- /dev/null +++ b/samples/Spectrum/Scratch/README.md @@ -0,0 +1,19 @@ +# 14-site Heisenberg model on Chain lattice + +Compute the Sz-Sz dynamical correlation function +and plot the imaginary part. + +``` bash +$ HPhi -s stan.in +$ echo "2 6 +G 0.0 0.0 0.0 +X 0.0 0.5 0.0 +" >> geometry.dat +$ dynamicalr2k namelist.def geometry.dat +$ gnuplot +``` + +``` gnuplot +gnuplot> load "kpath.gp" +gnuplot> splot [][][0:] "output/zvo_dyn.dat" u 1:2:(-$4) w l +``` diff --git a/samples/Spectrum/Scratch/stan.in b/samples/Spectrum/Scratch/stan.in new file mode 100644 index 000000000..b345ab376 --- /dev/null +++ b/samples/Spectrum/Scratch/stan.in @@ -0,0 +1,11 @@ +model = Spin +lattice = Chain +method = CG +L = 12 +2Sz = 0 +J = 1.0 +CalcSpec = Scratch +SpectrumType = SzSz_r +OmegaIm = 0.1 +OmegaMin = -6.0 +OmegaMax = -2.0 diff --git a/test/fulldiag_genspin_ladder.sh b/test/fulldiag_genspin_ladder.sh index 49124c138..63c9cd18d 100755 --- a/test/fulldiag_genspin_ladder.sh +++ b/test/fulldiag_genspin_ladder.sh @@ -67,7 +67,11 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` test "${diff}" = "0.000000" exit $? diff --git a/test/fulldiag_genspingc_ladder.sh b/test/fulldiag_genspingc_ladder.sh index 330541c9c..c2a858628 100755 --- a/test/fulldiag_genspingc_ladder.sh +++ b/test/fulldiag_genspingc_ladder.sh @@ -38,7 +38,11 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` test "${diff}" = "0.000000" exit $? diff --git a/test/fulldiag_hubbard_chain.sh b/test/fulldiag_hubbard_chain.sh index 91e5784f9..970eaedea 100755 --- a/test/fulldiag_hubbard_chain.sh +++ b/test/fulldiag_hubbard_chain.sh @@ -58,7 +58,11 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` test "${diff}" = "0.000000" exit $? diff --git a/test/fulldiag_hubbardgc_tri.sh b/test/fulldiag_hubbardgc_tri.sh index 3da198ba3..c1462d204 100755 --- a/test/fulldiag_hubbardgc_tri.sh +++ b/test/fulldiag_hubbardgc_tri.sh @@ -87,7 +87,11 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` test "${diff}" = "0.000000" exit $? diff --git a/test/fulldiag_kondo_chain.sh b/test/fulldiag_kondo_chain.sh index 7c23e29b9..a34985e1e 100755 --- a/test/fulldiag_kondo_chain.sh +++ b/test/fulldiag_kondo_chain.sh @@ -78,7 +78,12 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` +echo "Diff : " $diff test "${diff}" = "0.000000" exit $? diff --git a/test/fulldiag_kondogc_chain.sh b/test/fulldiag_kondogc_chain.sh index 0d67285a0..3b19050b4 100755 --- a/test/fulldiag_kondogc_chain.sh +++ b/test/fulldiag_kondogc_chain.sh @@ -84,7 +84,11 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` test "${diff}" = "0.000000" exit $? diff --git a/test/fulldiag_spin_tri.sh b/test/fulldiag_spin_tri.sh index 7f292f130..76b912b5a 100755 --- a/test/fulldiag_spin_tri.sh +++ b/test/fulldiag_spin_tri.sh @@ -43,7 +43,11 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` test "${diff}" = "0.000000" exit $? diff --git a/test/fulldiag_spingc_tri.sh b/test/fulldiag_spingc_tri.sh index b5d55b225..83da4dde1 100755 --- a/test/fulldiag_spingc_tri.sh +++ b/test/fulldiag_spingc_tri.sh @@ -86,7 +86,11 @@ cat > reference.dat < paste.dat -diff=`awk 'BEGIN{diff=0.0} NR>1{diff+=sqrt(($1-$6)*($1-$6))} END{printf "%8.6f", diff}' paste.dat` +diff=`awk ' +BEGIN{diff=0.0} +NR>1{diff+=sqrt(($1-$6)*($1-$6))} +END{printf "%8.6f", diff/NR} +' paste.dat` test "${diff}" = "0.000000" exit $? diff --git a/tool/dynamicalr2k.F90 b/tool/dynamicalr2k.F90 index 372e8134b..cc386dda7 100644 --- a/tool/dynamicalr2k.F90 +++ b/tool/dynamicalr2k.F90 @@ -486,7 +486,10 @@ SUBROUTINE output_cor() WRITE(fo,'(a,a,a,f10.5,a)',advance="no") "'", TRIM(kname(inode)), "' ", xk_label(inode), ", " END DO WRITE(fo,'(a,a,a,f10.5,a)') "'", TRIM(kname(nnode)), "' ", xk_label(nnode), ")" - WRITE(fo,'(a)') "set ylabel 'Frequency'" + WRITE(fo,'(a)') "set ylabel 'Energy from E_0'" + WRITE(fo,'(a)') "set zlabel 'Spectrum' rotate" + WRITE(fo,'(a)') "set ticslevel 0" + WRITE(fo,'(a)') "set xzeroaxis" WRITE(fo,'(a)') "set grid xtics lt 1 lc 0" ! CLOSE(fo) From b533d19b02c2e8faf0b68bafe12ab9df090987b2 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Sun, 31 Mar 2019 23:34:42 +0900 Subject: [PATCH 31/50] [Bugfix] The reciprocal lattice vector was incorrect. --- tool/dynamicalr2k.F90 | 2 +- tool/greenr2k.F90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tool/dynamicalr2k.F90 b/tool/dynamicalr2k.F90 index cc386dda7..1d1aa47a4 100644 --- a/tool/dynamicalr2k.F90 +++ b/tool/dynamicalr2k.F90 @@ -234,7 +234,7 @@ SUBROUTINE read_geometry() ! ! Compute Reciprocal Lattice Vector ! - recipr(1:3,1:3) = direct(1:3,1:3) + recipr(1:3,1:3) = transpose(direct(1:3,1:3)) CALL dgetrf(3, 3, recipr, 3, Ipiv, ii) CALL dgetri(3, recipr, 3, ipiv, work, 10, ii) WRITE(*,*) " Reciplocal lattice vector :" diff --git a/tool/greenr2k.F90 b/tool/greenr2k.F90 index 4ca705c0f..466cd9367 100644 --- a/tool/greenr2k.F90 +++ b/tool/greenr2k.F90 @@ -371,7 +371,7 @@ SUBROUTINE read_geometry() ! ! Compute Reciprocal Lattice Vector ! - recipr(1:3,1:3) = direct(1:3,1:3) + recipr(1:3,1:3) = transpose(direct(1:3,1:3)) CALL dgetrf(3, 3, recipr, 3, Ipiv, ii) CALL dgetri(3, recipr, 3, ipiv, work, 10, ii) WRITE(*,*) " Reciplocal lattice vector :" From cfe2273730deabef20aa326a1b5d59203ca94858 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Thu, 11 Apr 2019 17:26:56 +0900 Subject: [PATCH 32/50] Update manual (JP): * Usage of dynamicalr2k * New keyword for CalcSpec and SpectrumType --- doc/figs/dynamicalr2g.png | Bin 0 -> 131001 bytes .../expertmode_ja/index_expertmode_ja.rst | 5 +- .../outputfiles_ja/excitedvec_ja.rst | 2 +- ...ters_for_the_dynamical_Greens_function.rst | 31 +++++-- doc/ja/source/fourier/overview.rst | 18 +++- doc/ja/source/fourier/tutorial.rst | 87 +++++++++++++++++- doc/ja/source/fourier/util.rst | 27 +++++- src/readdef.c | 5 +- 8 files changed, 156 insertions(+), 19 deletions(-) create mode 100644 doc/figs/dynamicalr2g.png diff --git a/doc/figs/dynamicalr2g.png b/doc/figs/dynamicalr2g.png new file mode 100644 index 0000000000000000000000000000000000000000..4a8065e9c5637e9c5863baeffa45fb51be9231a3 GIT binary patch literal 131001 zcmdSBB8!XmrR2v4yr~?$uGtfefJ(RPl(XOP~U*T2$~M%>49g6b#2Huh~QcD4J0FY zLV-f*0#AbIf)MY(vl7Jr{V#qg3!;per0-@cYHfXJUA3~byc-aBK5Nfjzj=TXx|Kq> zs(k}yar>shb~!md{%kkhems+hW6A6;ghV3dabLkdqAQr2n>$ImU}7(F80MMu=4GYX zd4lg|B?t~JLw?U4zt;6&4hrF;shQayzlWo%tE;C^&X2ce-)JONU5*)qu$l7iX4#6m zx^3XwY3RG$GoM$zPoBm5+kJ8=;R8?6<{CP0-Z4B63p_t}o)6Om2M6zsXO@+f zJ-D$-^FQBisg3FDIw8`T2teHP}x;AoOaU8gq5O_Iy^1eA9o0uSAHXS61 z)SrCC4^$SSYyV<#`bV?(pgh=QTsomH1_K2@7G_X z@HzbjgM@^nq5H*80U7#hbaZr{L~O@q?304qna1$c~9#kG4JeCWAfb@e%A8W8#tWnAnAIlanSQ1It z7D47qN`8G@pY|Xpl2RQR)LBe|TarKj?c2A;RWF)ALi@HeaV4cmT>G~8^mH9Py=cP0 z;cqmNk&*sqkp`Ny$@|kq^K*00Kc>kH{O)$TgAoemC1O8~wGY-#@Oz)M-#lMrzZ~*- z3?-0xUmxC88sC)F*IyN;+tn=EaB*>AU|>v*vsuqQcD~%a+{ZNU7p4yn59iyf>g)UM zWx5v@7LI2Nbgo@5`w5^3#esnuJZ@qR4Gn2&X)QO}zg%lPUq5wfc5)njkS?fMJGtGA zWVpsR08wEvNxaV{H?60q2PvfMH2U(i_ClxM*0dHI5guMrSy@O4FND&p>$ernaov8k zXr2uQ(*Afpf~nE5He6c@nJ!N}`h%E~mEYrZ_Vf15gJ%||7!wJJ$-+;gf`S6tkq$5{@x1Ex_NQ}& z6k3Chw$@f1uQ5hh-`B@ehwsA&G_;$;#fv5Ds-5j^$rE*T^|Z0i$!48p3u_CHB62BHM##tvG&JTz3H&&17Je#Hl16NTj~knqP(>tkOvVw3u-mU5I&j*xU(Gc; z@9FQtMC3LZ_eJbwdB^PCou50k>z`cQbsW&#l&z)DF;E?WtvuhjH^#B{8m0_fndBBjT}G%@lW7>vbPo?e6WR%NEvK&$F|!T^-q5SV&4ra=QGDql`enq<;jl z^RSx6Xe4ZDx!<*R{BQb2fd~Sk36DY{Yo){2TS5XRW(zj3?QS<+Jn!uGYzrG3`{`4a zdf7MW0_}=qE7gRT`>GdeN=iYL?MVrOC-8L~wo5KAPd5h7`=4SZ%4Yj}dnMDgE8Kd+ zP;(S5W=qwO`7VFgSx)!I;6H39Do7XHuYW~8ZogUW@9!UBxSq_DK*r-#R8HxG&=7wLX4)Uka@gx$j(NLyQ^;yde(rp0e4&~D{oAm1@xGgZoqf)#er|r=UjyRC z8bp_G6$>*nxQ91ihfl%E{c&5iO*!EV)ZURFNF(~EA-p%D;igaCnjDlrxnUdOYh~enT&%jh0A84-v<1* z9ANaFuUmwQit1xzu+jIEf20_1a5?P2G@m30d|7?(Fa3~FAcb-i31M@m_wPGS`i_?G z{(FdtiGeuD;PbwIUA^n;YemXB$1QP4p|te$k>TOMU+bCftKo!U!a3`n9cu_;H>?9;6{;}ieVN_q!SHiMY?@q{P-hECHL<3`B5S1~RYfzZsAvU3N!PlakOOg+kurd!-UA z?48>$b-mt64SmmomKJU%rnA-^>1gq< zgoFLjxS8@rGqL;2?uV^D^Kz|t>Fl;%$IW~B#^u`e|EdQ*Ik*0CZO`XC;ei&48yy|( z2}QnnP*UJM6BQEr1;E9>fB*a*me=3|YxeeTZZ8Ng(#opy$=4(3T4)DNZ#WWdGiUO;YHTwKHo!T9*mZndrX#}C0J18r?9 z=G3X_Y2;75lo7__;{7TLeBV-2EgP3ZkqMq>H4H%L5O}NygoK1dj?~)yX-SHt)oW2* zdKsd~HZ?W%v9ErZ{rwvZ%9;^NSy}n@o^SMoKKStOSK#xwE!eP^#3tnb{OJIY#ni!9 z7~~jy9(6B1r|m%`Tz16A00n`kx*=0(dHLrmkmUp(H;_--F2)->&mlSv3Vm_Klt8%9 z(b4&OXZf5gl4j%bN5@c|PCmVbW>$ z)|9K>wffMGRBBcmgAWQ^=K9i8HK?L?f;a-Siu^2*h zDG2_Hy)b$FeJO^rJyW96xZ*St+x(ZT&+n`clYz?+KtO=BLND(Ve!6hty{6#jr-wC< zt!SFo`^IU^J1)^Gu+=*O00cqO3wG!7$(ybI=+=zOipt8*TIycM4iy17j1Yur`kp)A zJTG=Ztrcq}6wo9@0jaU)d}qX#U8ip31Au01i4+RYOxX_S+fwOch4TmgrNmEB|BKO2 zFDwoKR)?r${{J0z|G$}OILJXdFktrBbNmEgj+Z*DIEbn7S zPQ0qlXKpbiP!QdkbpdP8twVqUZ%HF>{a5xCR8JzJocek;CMHy&xQo4sSZtR5BR4ci z)T3O$!%Q3+M68~lV6W0@)tL50V9<_$`dU;_peLWEe-(WJ0weeD zKYwUiOBvAbuocG2bAAKyHK?&`A_;X^{yp#M!K7icf^@{(}ir;{R`Iek4U6r1e21@rp6gJu) z;@2zm>!Dq>sT9dq=r%L+^Ya6c*cI>&P^LjSjz6rYR8MZn9UqS|z=97p-R}j8qmZJ* z_x1LEmnCo4U;Fdt$bnN->2dW;5~cBVwiB}k?~eEP0siO(xTVoyLl{zsn}>%&F70x` zB=!%;IPWr`CjQ^Gb z7yxYG`eaSO>+RD1=(Vg;|Fj9_*c|2utvSZo{ryz<+d3b-&MO(UZXLxYsu z;+7!?8eoEt-qNGmTIhip{Q3&Ruxe7f=ytrqwJ`5{ zy=V@Y@5snVfn3bijMY_Fksul?n23<8T(7LW{CKHa*(>(~SYmtIG?1|42K|2mhB|lR zOWqDBf?%krsi|e^6*xdtFq0o3dPx37Pw^Uy5@14WUI!HhfKv2%+py~ZG6e>@)g+hB zuBNWu-`D5Ko0XA~QC?nd@O+-~E*px?5RCLIMeZ9PK$YBGTqLW~)6)&S_6q=?7F};) zPd@_qAvQ8n1cj7{$ms5Tr?0PX4Yvz?sB1mk0>WVG3@qtEs(F@Hiwn3E2M^Ek!PT{G z0%^(o0n9`0>-!*b7iYuv?iLm_6G&kmgdeVtblN=5fLO!tdA8!IA1DJAB+v?2-QVJ0~V|u7vnttCj$~| zPw3QHD9Oo1UjjI1Zfe?gy<{D)`y<6{vBR<$1{%!t*`ysH0XR4~8ujupHz9~yrONQ{ zT>j6G0J=J!WdR1uLQekqiwelOw-b&4yJ#5r*5~GSZ#YQGq*0?s00aQU1o2i2fUm>Y zg={*8xS}jB+;(8-edt(vEJ}kAfPmA zYHDWY<`QM=5RX_O8Y?O)d;y9xF)=|vKqy%JPEw22YqK_H3IQbc&yEw|izTY1Aff?*+~olK48%OdyVFFT=CYE< zKTBIM<6(PYy@I9hnW#wI`=_nXepOT(FWbfCy`CeLBYEhO~1Yy_!fj^#59Duh9K#v3Qrcx-2y`JP} z?)P5-5g;Qz9Jx5~VuCcKNd~NU)SYfbdXS0V$C+^z_g~u~t}) zvMm(*fwsFgR!9zhru=kbCm|{; zE3;TF6NGc(*uVZ-2egA^Ep)SC*$yPN1)!%vb3%ivtQ){?F0q68@Vp1bPv)l-**@og^J4ARI(%1}98sT#A#|(5Dnl-Bw_;fk zryH^1gY8<68enxneu(7o9MQx&o(BKQe7+Ed$_8T8i7_il6Si1;sFF51y>LY5E z|Ah9nAK4%PS0y}tQAC`Um3ry&3|qTJ&X5Oc%fnU%HI+4#5Z@;ZEqXquVke&snIhej ziL^hz6n`h*ms`96QbcHy^Svv&()C-PgQ(iffwxZ|O#og30zd|j`!P_Vh<#unnv79P z_iPty3aYEmfQZ=yDqg{{25g{@rF`d$SF1_J&EWZN7{AXQ|q0tFTjZopXo zLKF!iVSPE7a=3lmn^13cn|53!;{Jsdr$jBH0K9D3DDuuI*KGEHj<9V|ZQNXuxN z(@_vaY9s9$mHTAT#SCS=JkcAaID~#aZW53$oa-4XK(B|p*_S0gt}e(K@f52)#2fr*Zecs&d9?tHnHin=;b>ki!>XsX(nL1qe22!MbV zBo0B*sP!HpDCH$-knYhlTt=S zUTT;*M6>z1Vx-&i9&ny` zS)?qBNcaOyNx?z2N!^LFIGJr*7F}tfJT#Jj6zj>wH%ds5mCQ`k?vh8bhzRe;t3`GX z#_dVAre<>qNzyr0-v2AjA5Ep4{xxm)D=n78cZWlv_pLtv`py_vabAXWM%3gn;T|$J zb6q805|s+>U)bs+pKkQJg70aaWL%E$bM_km2H3ak7Xtmx@OY&ewZk6bkr03cL&hc` zDOn;JDxnkjKc0aNq^wbWMxBInH8L2uIvn6!I(^nOwdHpX|A55=Tk2WdicB7FGhz zY*^S5S)tYAfwrw6j8#tX9sb*`9l2C(4Gi1yUly1I7JFz##&mu8WXLycKYawF2=ZLj zl;?cf9uR3xC4E}@mE^UZ9fz^EDVhAD`jy1TpI8_5zuY{yT3K6TGU&G%^@QNsH4$~Y zfA}(jIVrFCs`P`ZD?iN|Nch^?Z~&}ZtOf#f6KX$?QPb1MY>R=ir`q0pe78tYg>${4 z{FFAYCh@)sm9;NV!c}4Z{nn+_5^C@+(KHfO>19+p8dlNZw6IO-l9squ(N^YwmTP&& z&yuvrlI>>EJ*kgMgQuq7%F^m~x9|rvMMHX{ez%lbJlKFt`U_31No=+yT|TSGy3CAK z9Lh@7?bF_eqV+6a*7$PO{hh3;cXK`~T0;GZ8Z zxcFSV2mlI_ERHJeh8pKP$YL-_iJ89)79V*UCs@XQ`V8J6miJrFvDUuHP zCUZ-e*H}`8hnjNPrL8p!&9j$Nev2~iuoljqF0a>AG-9|mMJi3i3n6gY^^SJL&CTt36SMOR1oV8MU8DbAe;Uind?n|%9~+Krxu~d2Kh9|` ziFBjn^o(s5rLe`zf*lC9R>u`p!)!KWQ@^)HFq`MqNx*t|9>})s%XBiSjJSq=XXiH} z{zq9gGEL@T~Ms2=5p@St>R4NlDLO<+>ZD;x$H{X9zT&a^7PPL z)vK9aYQNIx{KX3YdE}Yp79cC<-O)Rs)<1N;Ll(W=eRtYUyadVTe!K!Sud>!wTp^US z!V@40fnqY4e`22Ps}8zgKoN!c`U_U~{g=P6LHgRT~C-G8SBCCQhmoh27 zkPQ8jvMjm0c*WtdVqp-3{{?qNpFTk1$lZh6qnku>%Ptu<8zOH%q!c=8Ov5EA`8zYBqepK|roCKe_%xQS z>9Cp(#?{o+aIAW40Xh_Z+W8*hU8Jzc0bm6xC{zX=%BJ*0C??3bV<5MwQ~knP>J#KMLatbAoa!@vhNj3m zoS4mUH`|2`>&fX9Z5_G3nJHF0z)+5?_9n;+k4KY|c1KF&4Rts!bc_^6E4%z;Y{TL( zuP>fbu9|xuLlUsv%q;;`AfsFY>$z9`WyYPGUXKPow<@4^f>=A^F@#Aw0K?Z@YlL;B zHYEnX05#B9;T^|@>^2h+)<=%Xs?NX`JcnrEccYjX& zC~WN(eiHHU@Gvl(jPsn}t!##7 zX-rSS!)xaWqvf4`%1uOY9oJ+1tY)#5v|5FIfVkq#_=SS{+xPM(hc5Pa?(>Wrso4i_ zhb~-V&Jn1rX}WZ1iWlP3Q2dpKW9EW{!xYjWtSKz9@=!S^*YN6gHt5LOSzPPRl1qy7 z0@)RQGvmKK+O5(t!Mgcby|%nCH>a+t$q6zXroIPJ_e&teCCwDm#cNv%FlEqD@_JS9 zK<@zXB&&5QBLBA6)&huHduLEMOG-+Beq#VyaS-N05ML=bIgc-p%qUj(f28+nGHX%v zGr@*aihh&M!KD};@6$g2c?>PTjat&0NJr55Q%gwO`7G-~H%tO7Va869@B7JySoPxV z5JwzyY%_)~o){=}9ImwD%TI>1p5IRpX;gv_Y8u|==EW#|aPo~9o=vYQma3ehT0Z!m z&n0S27OjJx##wMfXp$M@ea#f4SJJ(66#5QuFA1)S+Oip@^O&ADGW5HZM#d_$L_(Ld z&dS3dsH`xEPIz(Rl-WV7g@_vPnGf}7(@^$c$x((~vN|^KwiR&VwfLeLraJ!?Y(+Ue z51;g?2pN5ILs~0tH8*fx|Z?VNpooeDkQBNbQHJ4$_(9h{j3}ZOyMNGA)NR9S+ z;czx9hZxL=mJ^#YiOgae&z7rNnSoW9N^4!ZJw5ktSJP=GSKsFoT{hp>^QP>sR5fa+ z(h)jHNV3Yz-IDCm|H9)w%|xPV40{IFC(`lIf{r+%!;78?^F~#r+7c%KDR#$IT`^=5 zhbZWhH3*qKY7!Ptn45EC#$H~S3(0y_N;+qm8ipsS zU0uJLnAW+20UwRS;P1W`stJlq?=oNhwLmBY(B63!OH(B3&=4U~!YoFkaM-LSviLw} z^R*8H)(a@@pt}gH3u}kJQ5`^30>J74nvL%Wfb|7By^h;=WDY_;?M$&8FFumF*Mv7d z3^6h~vR?MLjBPhxbcQb3fGwcue>eoy1$2*=L@36Hc-f+)G4)$l`4MhDwf#(LZ~t;S z$3&f}tL@Cu9Ot0=J4h{uiLyP^!aHERHv1Qybg(uce{rm#4)cs2fy&_qGsZR=rB|RJ zzwv%E*%BEcr4||Zm#1gZ+@Hj(K#FpUxi06&Uu)CE&!}`_`9vaUo2dRQ=HtI~za?=Y zn6>d9q0)Lby|QZC}2(@K>+#C@9UmYM>uJ0JFhY z$>#}}FEXN+FoY8qzO@d7u-%S$J&dT&sP`ZV`qV?i3O5)l%2t z&5L|wB$~{YC(e{s8BD-tu_~d$7#jMXl8Zk3=SsKR^tDtLSboh%0gR*Y2f(hps7T7W zFFuTp@Jq_VX%t2Q@&R@TX|+mw^wdVg!*?P9ZmSv5`hQJUP?=-q*)ij<-)30tXc-K$ zzOkl&g0}-_z{}MJ%r|p7_Q8(fgb~CzexOz1dssb?-|3NT2ADgG=eel`)LRXj5YvXo zi^zKC?3Zs>{^}OlD+x;nNvSh4d8=kd(FR@;&eVU_(Y2 zunHAyghDII8Q06_f+<*rY!G3LnQlmm+YQH>rvLrvS4KD&jifcdd_BCbf+P;1b!t?+ ze20BoIhPFXr!toJu1RqXZ&&=2F}^E~O4iqA)X5KU$JWBDeoQR2qaw0p`3Y#0EYWnV zBF`}8h-%$|hhSxrFNxWeQ+K7!A32J#{^i)8(y}pWY9IvDLcOn9v=hM+1IRsv*H)1X z#IFjaNyiM9J;Ta&*7#iI4^9(8>fBh-8WvFz*F83Cq)AR>o7USlET?nQ)SsKQb5MMX z7K2KP%OYYXeh+RN&s)wVL~;#NZh0h8i20*sGh$n>3X#i-bNPo^?nQ6a#Ta*zVQxJd z>(HqrJiAfZAE`CMhRL%8eDlq|5L+5Zu{ik_{W=+|*e zaKzS*KhbecVKvW7DsvwSGM-8bhhFkrBGGhmlqgwC3TD<1u-rm(Qk3BO#)xXNX)0MZ`0 zc>_O*Y2mMy=lcUOrN_0GCxZgzyN9)xj@H)J@^YJXAf*2?TWYMWJS-r|lvvO_K@nz* z2qPR873!nqbeP8Vb*OM#TYZw>w#?iqBYL_&OJ#v1hKx0m~T&W z8L?YQJeAy{ISXZf+h#hv@W@Z)BhFuxx3+CmxS;Gc>}yUm8~z|^l8>$%Ouc{1HAzWXqGmZqs2*%qbZ`|nnO91k z=6?@eT&+IUeLz;&UE#>rOvz6m&v7|1Q=AU|WOR%JEu&Zr7kqOX{CgHhb+sqTmwaPI zZa<;>&rg-z1H6KKzoxXKk6t)jE2Za|JEB5$vcFQ2kmAek_pfPTyI(t%K%rvjd{i!) z7JEK{(H$YiX90B-IAgfok2A(T>$JF7Iys#i8YrRnz4q5Y5uIXK+1=euN=nMg$_jWf z`3eF3)Z*dl)k3pwq{AGfUaZT|i#+Wp7qRWhZCM_J{vny_d*g9;m?jmevA9(? ze%j|?>$0~D*Q^f7yeoQQeK9jZkkv&2wd1`#)zpN1NEtJ#*5jgDo$L1RwJ#h(S=A}R z;^;EWs8<0EHOtXW5o`?hMy1&P5yk3aPCch9N-o>`_K0_$4y(2<<^wemOaUM79wwrR z;{2y!Zu|q@UvR3FM)3bA;fiP{i24!btjWwWKOH;SNNVs5_s~!dV$>|eTbVsqgDuszG&Hlj7Y=ioDi*M z0uXU#M@yVwarT#5)8K{|IJY?<&}wKj`aZa|cf4>Fyum^Qq9JIu-d*f1+cZQ)MFB^X zNa%apr8>*iHcwOS0 zTwrw@4M$&*oC^DJ144!ZBhXuhDKA8m953*&0UOd&OJeQ&)cNLs8nvfgcK0?kB;kGQ3;J!df-{R3U<7WeIB|1BbyQ9ZeWkl_isdxl#MMjt2C1FDrTJUN(PvKSvw0vb8DwXv7 zr+$6Gx_Ld#XZt~7y#qVx%7nhyY3`BY{KcR;;dk-n=QF>3ziEt?ULKky?4cf~AsnDD znxkgd)oF$o}wyc`qDr;K%L+r}A?SUK*_k*5%Zukx+v$Og2UbgF2J=F9O-F5kP-bxMn<%{Ugf=rxynI za*GySB#cLG6<;it=1hjW1O1F9Q>du*UhoB8yCVKTdjbs-@R;Og$a#s#~4sA5Q^R!4V>5?WF*Dw(u^Tqtd z*SnJTg;_t#X6FcM&4O4yBZ_N;N`)N90`o8XD*HS8rCpAlA5o#^Ji)y~7BWb9o@LAs6_y$EfWsw99*JAvmaG!4Qk_+sAxl4v zq|{P|{eai)y)HZ~{Y?WDY5LNN(kmxn?PvQpliEVoo7+r1-DvJj)>y7l89%A`9=!H? zYN*+~f^QfTS#{>MrNHK~jv3dILUk+9F0v5#_;aEcNC)3l>vgG4ct6JZrC?35@rK@x z|D-~28RMbn9g-gu`ordO%jN#g9BhAgC2HMqHw0k_p=kIlRAUJ%RP1h7gsC1&XL%3& z%_hW(vIOS6A9b&D4H~kzke^BmeZXlpJP8a>)?HZ}rCVhZwf@_AGlam;3&bs7-I)f5kaflNZgP ztMvD8db;f7-cl&o`sEQF24!0J!-xsnh)(bcW~YUyR7GxJ7T?jNveCO#)0uqO<+M)c zr>qEmfvQ0{xv5EvlXR&!R9l-X^lDvuneFQ>`YdLtFJ;-X8`;9MXugFn@s_!O)-AuVJ}`m(ftyv! zd^0oj9oW6Tk%z|W1w|ou{lu*>*g9{HOFzSwDZdIlpF`;9IF1OF@`O{$T~3fX%W7%V zxIG^>5Nr>p+7cSHL=Whx-SWB7(S4CN>=rimR<1Gt!l5ic+_wsN>F@5biT&*ygIgo$ z{*2HGSS391A0%2M71u073~KCU63J<6#mXO!fyu~(x;*y7bKJw02QDib6X&!avU42O zu4kV`LR_NrGXABP^l<7QP?Dh)8FhH6Z=0QM)sHhXGhtq2Bj|TJhs`bcLstQ52bdp$ z#qlej3_S!WUAeir;P?t3;FUntJvcu01Kt#1zf8`|1Sd4YP)UG^2ztK-Sn%=qyj<1_ z7j2$_W$pap0)`Xn?d`wmqw5sAC6()we?h}kSeWjr4c7Uz6kps1thTBZSAH%!X8A^` z-uz1+r(&0L2DZrpWP(=G#rAZzJ;S<4a~=rs*B_b-6 zeo=Eu@=NCqYg#eLVg1$Z(_B(SO!v4ZuCiP8fSvN=0@NrdO6pfWrzJ1d9`^95Ui-ZI zSfyMSMX1#}WBfaycY3z=ysILMhp zPV?KpNSdnlr}X?!MjkcV(V-8KY)7XDp~Y;_Vh*w^E9WAJYYpTS+>N{71n1tm_~i?Q|BUVKmb8uc8U17o;@sVZvl0Nj2YdsHd>& zWc#tIQ2wqLwAkGGU0=Z_hOh+3EhypaRj!J=79cj#@UcaaVV=CI0A@f1sQ6Y_0bwtI zu4wG^^m~>3hGaA$A*ZSS#quYx51j4^yfo+Mn*!r3>N5@0d=e0(z-b!bO? zAoH4>vziwc@2R38?0Ojl=6XYirr+fnohnJ-b+1;C zYEe1Ayj_A(;JBcL2eDa4hbPbC{2wYuVjz~MGuOYdw?{j3^MCY^^ZnDTS-?caFs>!9 zb;lJudb`=nHk)OuHtBE#T0al z)070&d&lrtoyRI&(YB>|Tis|>r@bDZB~I_jKY3ZuX`h+fLDW~bCk(^n%s%V}CySfS zi0Yc86jH5S$~M^KiIGEypP@^qVr2>&_}%$8V7Z&qE)nuWt{$v%0_mPfp;3@vlv0`j?D#BhB|113t=(}vuQKDXt^4X!R)Ds15e zwWsH^HeiYKzwIH2h=>4+8S#TTLM73>=%}i?dZk|LHE^9Wr_z#;Y<<(vPvv&~_v)E> z!3*|q^mo)-Tzk)!>7Y4ojsw|>C`1{I{B5fgeH_bvyDit2+|e%`5K8Da&`qZE^tgEM z-$Z|aW~|J1oVLnT)$Y0&1X=?_FjA9@+FQkhM=asT({GxUtR0XL*jl&v7Wcro_YUvuVR5WGD!tnKJj*w+|H`ZvN`JN*C=U<`J!T7?|o1_5A3vY z8RiR$SJ*bpT&f{0S>KZs&}J2@eyF)_`NBqU^9j)q4!&N0gKscsUh*$~OCoQqgZghE zPda^T14b6W-PH8|hT!W%5$Tca6tm{v3UeIl=HAixKf0zVqU>`GESaOp86-HGurgx{ zXMu(~!n|3ke3lzr!~SjG0eY&FqB)mYYLrqX=>5B+Oc#~zt7jdpix81Y2G==Cr^3e% zp^mTusf$9s$LC6yk}?E$`vOGDdjN*Oe6fR0WFeG-4r+Wa^bQgj5OMlP;lIhFc8IYd zW?^Mb-gp1l@F-}cH%tv_w#Ld-%ZX9(LS~-wnFns9UPO(XhrP>K|3?#GyaK54c9lOJ zLO1nqZxkUcfA-0yXAa4iJx@p>dgo(T*!64})RHP&D*7eQ-D$gvaM}7V)+n};202-?9?{MLX!RRfG z`zl$=2TA%5SYw!}9jWJ>V-knVTF^m5O^O;(VMz3H=WkA=I$9cjr44F(z1jr7I6j&_ zI*}#&#_E9K=Vs|58-bPdn$N-7Ri~efyJuC>r3+H(=b;W+PSq`|z5d%bLuLG0C8X$? z67L$Yn`#s0$bwU3CDVd=Mi`EcNh@yl1PE9aMD;-vJ0A1N;tC(A>gr80<5{IehmyV&z{0V z&xHDkHRIK{)p^Py){>1W&M`I_Oaaxi4J30Qjp>a6SC16OFMb5~!`z;&UontCux(8P z{u3+VRZm(&hOW%k;)BT0{=)0*U*Ke9Ct!OQpdGrqy)AmHfUAo@%!KSrh@uH>&)~db z!5BD=MNUT6H!u*3&vQMR%7RMyMYKehM1gMalj64zDej-lLEq@uikn<5 zuh(o<vcELN9#jluuC>dP!>liWV_ScWgg-X=qH z{QTiMmoj6F1b#ujFEkhr(!EGOoW;SXC4PL8^g!6a-k2Fy|DDE(Uf<|nC=|ffrKeVB z#w^Va1I3P>UJ5kahE&mcDSFbN%aVV|AE+%WD}gg1cqANMO@}V?d!+j(D@%Z%t)?K_6&&K1#YuFnaj9r<@J* zW^O7-uV89~uRe+k*P5#EZDwF6m&f&*UjPPAOm$0+Rd;0sK^b<-ca-mOR4Qe^hrPy~ zfS#j4W;^&BUa0zk)qHF~+dY0a{5uLdk3u!T;>pCorU9a`K?h7*N4ncR-R+|AtT@lT z!@5w9QNPHR>U`OJ-PBnTkq8I@^L9X+znFE-rTnKd3y3y1#*w8dlzbF^f!(r#&bh;j zmi)Fb;O!s{jSV!8^Ugcgz3l%0c=oqhZ})7GouR`3l9Y_iAa>hIy7T_QBKVF=D zM}Ent^qf%ickzv_@Di7%)XN^*5_cWXs3svNCBqIyq+tcCxS(X!jN^D>X2TNR@FY9x z5Uopp%?dTP_`NoeS-%Jas``M`dZ=$jEy)p&`dsp+L=ZTo06!A=gp@!${fQMw(CA>v zDCg0P+uFErUDcEeZIX`Qptzx&KZ54KQlW{TL|l=uBMBSW_;dsmI=69OdZXu>u7A(* zM%loH1m+UyZP8K`gnxpny!qo3W%F+=YBdSNlZl+IZBB7Qi@rC+SQ!8wmrd643lG7( z#DkC$ZRtmmkElS}Z2mx$pq8b*lo0BgxorwhO0ITPnv^VSINPKvAPWpcvT1o2!1)pM zmZhX625NNid)rfrlxY8dfRE9ZO_wn`LNe$F`4^n zL_gc?TjTMk1J*oJ9Bp7XxbH?_*xuP8f;|TZgux+KZy4lHNkgWHJ-vN>O6Y70EW*I= zHefwp=?jjCDXXXeH*jYsI7tpZisMnv$0{Td-xdDgHgn|n)O6vZ6WW+)^tO3WE-+A~ zGzq6-iJfe&Hids!HHJlDwr?Hmf%#A5pp#;4`c3nMan2L;b1TBX4IpZ?mRgj^-FDw! zTy(RrS{h>j1Obo%Fxuw5l~eRW4mQVqUnStrYM3oocii#wxy_b*cd6%ZXjwjXnoX^v z1S)n)4w9pCYh`7@pgmSzGvmA38MN;bUMYM(?#a7qb~h?8l8mCr(M7$|IDxZ3OB}nu z80sSteg+$+{e1+hd9Dz-C}?k@uPBM9)hPCpFxmpzI9K=74XFuW#j#b3eNeR-=y@#} zpgGDgN=lU>DJSoLKVij$C*F8(-2A;N>p+Kuh}QxuWk0l7Z?=vM^MH38J|IrI=r8}+ z@U~=_NKzAGBQ;fuVS2cGrlA@LQ{A1?+ye2d4)UAr?#SJ5ydQpIu2)irNGek+c@D%2 zPT}c!=t~JtdeRbB;092Xu>IHAUItQG2FNR~F3tE_%yqgxOq|&Qp@k3^SIL%kPtl8* zRjTrKALCDRZC?-5Czp~6T&2H;@IY2lpmS(%Es3PoFg1ML^AXl7`4@{tOwf%ae7M!FHB#Z(EcHpPgZ+0RJBt-1_d*uZV z^|hLL05`_#VP|lNy4;jSm7%{9$39*jkG=U29ctYwv%9=WtBZawL~iVQIZ3-){`R{0x4$myU(* zIhY_YP58W3@lAFVR*jOQ_`{FC(=dv3yQDQz!y*5>{91)Tq?ME%i#=30A#TVrRj`idySIkPH z6*5ACP3q+g+uF>icPUBa7O}QA7Oz*~e%2^9Ty$d@Tk{_aOh9paF3aXxrW>R+kw}KW zmVi&g8DF8=c!9gLJF~85!mNiRuVb-uh(GZ!H+~$=c6HuNjPkI)pa-%Pi#`<~b-wJY z+EM_*z=bE#gbktuOBc@3xb)2MU6lhB46xW}05T{2I3F`~&}by&q)&v%J!8hJ)BmMI z&;ZRw^8N0-+w?i^#>#s9>(FLL(lYrQaII zdhy8s{=)f-o)7Qtf#O!wb@_b$bK4m7d4Cf~w^@iEL~>V}n99Tzi2zlM&{|6v5?q1 z&N-5L+kX&~5zfNV?6j$+-!PDMMq0haBdseQvnA=ME2@;c0P&U(Lj7l$;hQkGdoE7f z8^4zY7ziq+81oO-HsL4!=`gnVSx@Z`53753eDB_Y(@d{Tzguv6yt9>KAkS-Ju66SB z^WWI#zu#sJvo=hQl`YQn(E{=wpufJdMdc-VC!2cQZ2?hefX4!3BJ%N-gaR}4E z=M31e?ywDBir>8C(xi;NoYGLJ2&fB%z|qJkeJP5o4r`yemWkRxsx+Fdv9w^{|61m* z`eLQMCs%LRlCtisfng70B&lwuj5j@AmE-MW8$`6MJO^7&amCQ>OG%yVnh>mpUjlGnfWa$CS#Vi%}Jy^*XD&pc^^Q<6y&O zv42>IytTH+wZs0wp40F3eN@Xa-gb)R! z23Tv}IcLWcdmH&LPh%grw&F5tKJU?s0i?hr`y-}yX=`gMCeVS`vE1F>zMeST6(T%W z8=5NlUzN~T`?X@6)kgLi<5h{Hgqw0Z&pOb7U8UR0W=ex=L1!}eHtUTzS?$F>sk`#a z>J{Mu=o*A-P*8N8iiIM2RY|DMdXvMXE0q{k1aAU(0))Gz8;(Dr&^w+C2Bip4&qz*o zQQiy6O7cQF2$$t2b8-6gtOcBY(^9wP;R9{W9$*byeiRc zRo|u!(f4yRg*1zq|F8^c#Hjf67wyeLYYzvt|IA|kCrw03DgyW()4Hx{4++jyMal!@ zI(;2v7Cq2>d#-ZjAs?d#+OJmM!2r1C^U^QTTfng8cT-_RD|cxX@Ru6zy1T=Emh8EA zKIq&4JA$E@tb7@s7=bc=PA7=xdAPG`1igPgSSP$P8SRd<&ROTXyK06%M?)0bri$>W z3!~Fff%NC;l~9=njy_babK6%8iL&&OY^af>=8!rkjGBJ~f4Ni>cz%_ruSb!9RX@2< z=xC+I66$G8nj%UK#}^OY0I6@;(a&^Be4|%ut;k3uUHT-5#q6)i=doo+UH4+@%|(lg zR_XYtrB=4l%$1IZJa#Ho?o~S~*grS$uF2alSaqq}aYNM&AiuvBYhbE%)~D0$?UDT8 zKCu;dlXP`QH0bO^>aj&L)mfSk7MLh){ev>p>A*BQ z|JSn;urXVDl11vC;2^@RGt=JKwc@zX9Daj;rLLzdRc*1b0kGjyV>4eh!;L+}sGW-j=j? z)E?qrn&{B+J2VlecQHDt}F?Y>o|yf&`K=wzj{N=Mh#D za<*1Z%6e0f6=5#1ZG%i`uzscXS$4;gMUu9o$`>+Aw?p&TkTr#ilPy0v1k_GvsUd@y z%D+K4aEOa1;ZRT}c>?|n6bd><3sfjX!Ic0mc^uPlzkRVX)wh~<+(`_cIb}sSX<6&D zww-6H8B|y}CZjYtF*-}M)dm+#QwGbHq;jR>E?$YW(-l&lujyGL6WxASD_^t`oa?ek z#?yqY%BB0ebH~tmxQ$$=k@(ZD3jchf&oEpXy-e-Sk-b5gK1CCIF-9mznwaC~u&ijt zT}61s;9O^B6OE*)1ws%U-Fs|)n4b|vTq4bflL4`8yXH@rnc$XYGzow?=R9xSn(rTrxhGL1*a#Tl`w2jBStwM1?SX&*MsJtZ zcv&3@TMZ!=Rxnoc7dKm!4jxu9kCD*748ssSBMXftS^uKtWQQ@^?W}|0P!y`uKVyH# zNyWz^?FD*{h5bwK76Nt`GRT{8Sodt)diWr)CLLuXS)unjZ|?ai`WDm|Yr@b}9XECz zUAvAJpzNf78&zwdc$(`7ZK@9Wot?7NzC4m{32o{Q`F$-ned@m76#gT=Up)2}l(I?` z%gvVb)Y?kS6bs|UoGU^<=m}~>Chet&IL%(_Pp+x}z1Oew-cBUhP}CUWP7YdvOC?Ut z#B_68)PM=Bj{BXBi`~97;{i{B6&rXGkA|i~yf_yv=dD`lIONG$Y}=k@nu}|G(}cGw ze#rj&Op|0Ne?|lH;KIhoz@dsZls}SAv@Ub_*<1@1qtMwxQAu>j@=@wseoe)m(FuW1 z<>A9{BU%Iujb-c>0+Sfa!(w8aag+{MN7M*f2JmE@ z;qJC)tcZ}pY}pQSSq(0IDR#B{`vN?l{VXPlPgQurdk1<#RRQYEF(@h@b^m z__$5TA*!W<#WX}q=CRJbgR0B<$}B$dw{^?!qM~?<<|Zl>(FP4-4Pevu?TSffty+*Iumg&^{2Ponfz! zx+v6FG|%sS;PsPtXLl-%ByMeGc9Cqd^HQhhd}n0>L*cL4%t{QCg;v0TT)|VgAJ5D+ z6;Hx9r|efy9a6*6qii$nplF7b#j%&TZBD~-l$G77V zh=o==vF}*y^9vdl6LF06u3nDC>r3>hsk|0WY)C5&aQg8YW)(YXzm8|8q!R?UPtRzy z+K6RA)0P;3Zwjaz$WqHxU<2n}2Y7tttzoLpKRXp%MOM^K~m{~ zvVPDRe_Xi?^bXrGJ#XQ?UFnR*VE1>HO_jCLzmFnuC{R<`p`lPDCQA>vuq7JJ)=JlL{$TMG4xYNWsx@%7b?AuzV@r_wNUA>f`#-;*BRgtd zhrDW`Tz3zKo?IFAS7b#l<0gc4$iOLbTTHokq?=`LO*2V-SKd6gN9lMQ+S6#P;rsPsMlRdyo`@o~ur^|2)K_w6#O=^)fp3L}p>A}OReg+kEdN@-9Db$^pPmbPTkL}Y@4`)O_y%z-`0`+87P4+`~+M4AVU zvD23m6sv*4VFdc?X=(vMc}`?amN6CL_M^n86EbboQI!LtA0 z6JbW8mnlf#7Jl;|@|=Pix|r~juJ(Q*-}S3NHkN$;NwhvRMuJZwovzOfG^TTmlnr5c zYHgf+a9|2hb6O~0{r=zrwj(SILa0r`+07&V38eLv`RQ}s>#Jj!Rzk_;H~hamj? zXnn5bc;KXA5Ei;={T}9HJW3c?Dn^|7ofgTjby*j6M}eJ80Y+yf0vSn&U39&LOLb3` zQP=j2v;np%Q((sXAv2BYJy0ncD5D zF{zA8;v1$Op_)B5+=3c(w$Cpb1jOv<^ZdiAa#U*!D|e*`tmGgCt>mv3Jh3y8_2lE| z>?oN;`4LeF4%AIC!)UTqmS{5Kr&V~XgGFE1MH~5R!#6xSvB+K5}%ec`0zKCf2}v9Pc+13ickXY$aa3Fyx41_n`R%q(LTIcrHsqe zo!)?S7H!(~+Vp$LsLBbga(i1hf&bAsxJ(_G_|O6%$LzomLrh#J(elqX%JWZc<}Bvww3S)Wl4Hf7I(Vt@RvcDDMGS?sv=;Z*+Vmml9-TlUiA1j(Uf)=?iV zvKbZue7GrbLD&`+;U|aL#F$h!`_akcpvEG7kbCcSQ_|olXW9TVCb*66bKB8KdbFVK z&XsFX#KE~uDnbWoC~TH`bTOwD9A6?-lyOaZR6sJ9tKd7W4K-vzvDFxkH@?*kTv}gS z_xW(vVSa^*)RdCq=uqb_B#Wr4%$bkpDje&$D7lWY7@aLDrD_-pg*X_wFunpF+fBxp z+9EoYO>YBN>NXo^b8u{3U1=D@kw(obbc#;CLOat~U|H#}h@yug4?32%5}S2X5PXX! z3m;7_iGJN+i2t+^ z<=ct0J1E+cFF5j(>(5OIEn9sK#XBpme<^4VX;8|_TR3Q(jkC%eAoIKp-7-+p&D@8R z9_#3h5M(sSoQ&h|^<9xOUVd)CJ2k1Lqhh{40<#*B zViIrOBcOE?f7|?gvr&4dkd=|0{oIS`e*su@$whp)$^Lt*H+su;>KPQeQDZkGAnZ75 zZwR051AVeL&M38n^G?IjeJm0aVsMeefh1gd(AEO@TMFJ<9sotm()$P@jlwj#G}@30 zy|mR#IrXhypCD6qMO4Mw^uV2^csr^@pPC<3F}9!<_gin5Ckpp`=^lz`W8i@FBPsrx z-oX{*tnd_^7A?*I_B=7JubJ ziO6I=_ugRD$csJ#)C!ByI(y#lHaw&8if@ADu4>{8OO0^shRK~3NHCu~B;xELsQ5T^ z8^0%`>`N-lnB@78RDcBHAW?twbke$;30VQKn4wNXe5JqvD&d-SkkTUQ_FHWfB{%b?lIU+p46RXCFRfD2>hX-g1=j!8-}r5PLf4 z`v#H-RE>*eC#0J2O)M!3g0R(?2=e31funv>v{z7u7uO@hxxUDsq#`e4kI(fY_{N)Q z={&n*#o{54WKYp}Y$~W`*3Pb-;4n8vd=C2W)N;g(2^5snjk6(-T2BNgY41ACgq{!F zq|qEV7DKAo-VKfW6PpAD<9}en>A;tRGHli@lXiV^EP%r(^A>*1IsbNuKfd6nexBj; zUo&Az*KK39-^(APO#mbW&}e=Ef#sQ*%Sr!H-|BbF!I!gdKAtBj1)S*cp|qh&qga^6 ztHtPAp;R==vxao*^qd^AmxY5BuCruxrx)*e+Ix*ovb+Mm{7E=N=<5q&8t~+r{dKQB z`JNyM=zhn66H*iF23n2X52rzVKgMH*gm9eXTCL)i;dd(^$i1QEBTuf9sp~|%JOZ2~5=K6n zeO4+POdo75a))SHFTnt9?TAAh2DLg?PJ1!jf~xI&a*R(+(nwB{2(!y-Lq&7r5*uBB zp=BY`fs8_7-R^C#b)-Y@rKGE=m|n|P=zaopR-!1YO34=Vo`d7F=YXaYbLupIYaEbr z8OJ#T19%cdso6lOIUWJEJajeBH8|RzQ-7&T)&*KHYoJ=01b!lb_0{^Y-<<4>Z_o*$ zLL1}BZ*{Aq*6TL_HN%af%SOEb=!as2N_yx)71MBteX$XhUu1n_+zkj~hq4_!45Leu zg8kHD-z|HOrNw-3y|oqz{B~@JLZR!dKQ9*ptBNpj(jvbMf8j!>;!47Q0xaLj=mNf6 z|D<3kfu31_>ESkQ`vl;=ob2NLPLNoUJ+UD9QUc~K3Wk!$_w`ZHx;Nbi$C9V^YXUa4b zoe(ofvHG(Z{WXo2_N2Y0_K8vCd-YLMh3NUjCLmwS6f9o$x%?hyAt{c&$km1z6Iie~j# zJGccl5XYLrU&U@5Xz>FhZl5vT_x%Vwt~~=*?Sr4c8@HYtESXZ-V3A;$g3}X~!c^&^ zQ=Q8H8UV*dU!#z1OV-4HzwJq+1-u=}oX2TZ*1~W;?+=L52~qf_?pny&1CzV| zZ%06}t;V_aaLCU><35q!YZ|m`O1NxH_U{LE%+K0BZvHCDX*!mPE=1R_XO0pFm9Bxa z$aXocq$ge1Y$}u@mNK69%;Hr42*>pIdOiv|tHDahyTK{NjoVE}o){D5BYl<3$fE!3 zISu}z#jNQpk^#jp6v{Xzo#zc26+jM95$ZI4_DH0}^XsvTy0)oxkunMF5v20abEjH=I6n$R>kh?!0N~7U~s%e$YV=%e2pfLDgvEfsU}xa z#L`Lp*nrAR1P=S?-*rV#*@KS^Sp&CklW?)3=F~sSk0V5{DOa0-IS{zP6dN~Luc|N6 z^VQ$79v=R5ZU#DBEMz75ZFXmiIo1rqSteiwx@#w8T#`Z);Rz8}#gLAr>=APWY5AxB z{44y6TqVJ!N!j@2Q)Ozw%{dJYKv6+5Jta9H1mBNEY9w#w7mBrI+gk@)N8LlY2Evty z(b`L;*PpHa+r_(97kCA0rHqo`fF=g%V^I7R-)BMlH88_p$y%@nkA3)TzeShh!~1X! zS!~6bh^24`hMw-sY*9_4Cuq;$o4)mv!{)V}2ip>EB7~Da77ka6VpR={|{5Fq&{;l`M=(Q!@;@(cUj)zkp|!b%RK$S zai`|9R#H`j{Q@oSmDA5h6cS;?sYN@uVNpMj_L1C|$8@4DD9p) zOsmG`O)Jd@t0KWfMw#i(1GlU3m}+aKIb3BlRg>$*Zg@>#xH%j|qrjHFp<(_v`b0;1 zsn{UP+PVp-1y-9Xyol3Y=Rm8EMMI;*7dxcY_s_9-e@u!uv4D3oXyU@JcA?au4{9ifx_MTt+L*{_ zG0S)}cg6=BGk6-Kx5{F11qHBXh_ViN&Hxl)toRYXCc)Qs&$elP!gRZ#x)7kUR_iD* zRt(Vr`px7jwl1P&nZ@%mq<37k4=g|HW43Nd%gxIhzlZ6ss^2GKj_KlrVSs zMVPA5GM89sq5vdIF!TukYp+Zk572Y%29f_u3L7+kl)7`xzRpK{<$Y&{3GK}+ep#u0 z$t+;_H}3dNXJ@0(CvTCuv!~iKrQ{x%AjcMSrOsU1x$2rsg?q%pB6hB{&316%YOY+-H~l8Ed|COX;Qa6o|~7$Wb;W zrA8NPHV6MH%`p2mif#W@aZo26!z_$h-X1ZD6bm*dS(k4%%zg z%x4xY>C?#UrcAuC{jSs3O&`>%=U2tH+b^ub#sfZP`ZS)|QMX$D6s+PA8G;56Hf}K> zFZ0$Y8bh1%wQc9$V}+{JBy*?`(MkSZL(jONWC%&FDm}9Z9a?qO#B}%(jgPCv{2faL z8cnn;pHj`tkI1De%tteLH}#xsNz_wcmjGq9bZ<08-~$8Qi81N$64mWp*oWj$<%T zYtclL5ckl(s-`}yDhtIICrr=N3?UxA^niy@@J1g-49fBubd^!i4A5Qx<-m#*?q^)V z&_sV4E(#t}V&Bu^UprEd^h@_G7Lg55Q5&U?lDnMwEDF#`h)_6tPjT zr`FPmHb)N-XnCXZxxsBe0f4!TE7(`y`)< zCx9e>5*RuST8%zHAaYh(Lw8e(p<-u?qJk6llC=4{#^Wa8Ov{lRKFXJTZ^-NCIgQt0 zOyntQ7H1v_7u}vP!!w9%=b9L>ky(PI%p zKUR_yaq@2dlk@G9j+JBdOpML(KU662F%)mLf1;z8okSO*c?xJ-?CH6@40`oJxSwLu z_7~_4)PCJN;}0s^fDmRW0SeLPYjDmzW%ngtr&pJj%~QJ9iC)Tl_n-FHxXU3iv{?Vf zx!ctZ`|i+M=TYpn2Yi$)Bc#no#|)Hw26%k-Q6zZcU|M`M<5i@%9wc*Us)M*XL9a*+ z2x7(-s`Mj;BL$@p$Tmh=ou{E?+TfMQlOOdVfTV&!4BSyLaK{N5vsw`SSm6Y6LGPS_RyeQ6qtHzqu`=#93(37>7@^{smYEG)+ z;|IY|JfPY&M3h7qS{vD;lH35OTxN>D%5Xv+Mix|tGCugGzx;Q0o)Is!WO&iosk1~08PMeTYClXaYgM^r+6dHIt{aj;6MbQsn?q-5I|>KsF{Q{ zHbhgY(f&~rdm_IrnQKmfx+O|6C*kZWak`#9)R?|Z^Jh4WsS287!FA@%{j}xUO+1TQYG3p77J9~gZEI7__(px)};&o`qpmK$M@xVAlfU%uqt$Hee$wl#ip<+EsY|M3Y+ z|90bQ+60rteg*1NQqA2IQ?>?XoxiQ71t2(#G_GmgpbsgoTNla_K%W_gwwTnEbf1U#PESD6!LnSRJu>jEoUM;lK+-53We zuoYe<-4`mK(wL8y?h1&#Uu!8^o13u20`1h#8HgFj7(d9nKk8f5NO8cjbs zE`6x`x)KS{pLRpXYD8c9ZP)9Rh=*Y0dyz1p!Ag%G#^PjkLN0pLvC7tF^c7rI>%s^{ z8luWKR4L(HS$kvB%n>bU53hSH+dx$9x*X|&c3yL`m^tJL%M9TzD$w%8rAf;tTDtHZ z$0B!)FDDi#_&96mN&iFwHe;FwaYw=bP-I<2Y~71@Zk5`ep8$!(l_cj`@G?Nt+1{r) zV_2U5sdFcgX8axGWc~>JqeR07Z;kJcg`_3|XVO6E*fY~FSKiwpI0%+6l)t1%6Rl5o zZTWpXLToxD%MbbupBwIWT$9<*@X!NME3m??T4eQ?|12P@NSF%mjQ5^*PbUbKQKTay zQ6W5;Yu9%fTIgYfXrhNDRuL`;b+2xL>CyEz)`$*gTS8r^19U;9=x^YL2s}&oIKCIV z(m6uPww#1Ibj!vocL1waCUc_n!>lD+ZhCpXngk?lz}hchO#IfmksR4P`&SX$9?WX5 z)^}D`S8w-W8+{eL4Ab%9kEbn2TF#Mm{RMfbACR`~=r^Iv=ztFA$@~4Vq`r-M6DyX! zGUN;LG+CRcRv0K<$+*?+svHl)lgDhC63TtUS7rNcF_Au}sjY@lT*uqd(7EBa_R{FV z;wkS19mf&#r_!Qw;!Cpr`YHGlGffLZ6YcZ#i#wY$)TTf41;}4ly zBBB4UCW8y?jIEJ3LxJQh*Ws?=DA@CbbU)4Sx5!`2MgY_Yh?2h|vB@0Cb<2<3zR(y* zZuKCLGE8M}%Qr94-b$Wl4)wj>x~epkR^P)cHKF%{0y$u|v3e;(F{h;!Dl+Upenu`8 z&Ht*o9d3_W?_&(BlQ&0!WSKPuh;A+@0rnbqw&@+5{aP57p%i1*G+D8?v?qHnYd_v> zn-{KXM*+9k#CXRSu;S#M6fJRoTx#j1`F_#H3N-1&m!YZL6j>K zC4i+`z8{#SWo|6byBMs~#)|LZBt%>6Zov6cQPygQLMrncMwdvVPY-%*fcrikpsBoQ z)(RkdZ*5&C);G)R6car7>t-VZU}kEy|+ zGuxw1u9q0mEd)dA@1h{u%`Tu|BFgRNl&<$s|uK6mfsay`9UdG~e33-2{{j+kn4O02rRcg6F%K zn*8~OS@paSfkh7N>-*Hx(=&-J@f_pW)7ap>6Jb>yQ28FrV$)(mJVQx}6+VhGQ-Oww zJ@p4YQPmVvuN4A=>bmgWO4F;G)@(bHUu^E8-xzI7!0!wzON>3qpFUGb{$%%(nv+z* z8K2YL@t*4MakbhTdz$muk2?o>G?i8e9j1#-$>?#D+jSZ=ZEY05ME~sS&Wd|Jrf(T3 zm39Dc`v}AG09|xONhrbX^(ol(1CA@}Am_`B#0Jy0c(m3&z+(Y8wHHrudj1#N1Gw%z z*s9^Y%P2p^?E;Ai<4~!hpLp7z_a{daPTiq(Cwh{Xvx3mi9Tky()9kXu@W~$g*D{qK zSO&Wu_)>~PPAbne%~ow1EyH=$lEn5ukbp;v%a&3t`WZrJKtrEWF<{`F`Rr|)z`SKJ z1@o*14aPVN(x@`;wx!*BT^qNW9EhWF0j&!H%^MLITM1(RDERyNE8~nb09v-72C2x( zLVhD!ds91U1EXrmgH=30{;C&MoVG2ts2)Syk*IlvUc zKq%qD6iJOqq{2XqPCX2JQnx#7={A}F>8ry40^jE8uu+t4@pJ}V03hT|l2`!&Cay1g zpNjwrr~+s^(3~Jv%1B*2p^Q2bE0O$rWuASUL_Kwi-3`WqC?2YV3i7a~9tLt`dtfU# z-k|PrL^Ku^5AEfbL|?*%8hEF|tNs}w%y?E*BsCvv>=Y=zpF=2mkHkp+q$+}F|glCTTS=)471*!wfY{B(+%?}|B@pPhElMdfDSH93pR zEDv4TePb8mV=CkxXxv(U#|S8S;+P0fxvnx#XEgSqNtKEx!1-qPgDU*n*#=Cae{iJx zAy#hm>O;=6^#*y=a_T9z{IA3RhFYeISV0M>%b?;|v#)zW@DJ3i_M!%JW+{4P;s z#n6gv$Z^isGe;pun;-s8qEUWi4aBD=b$$hs0tZ%PQZN&Q+m-4-6x$qEcPvBAg0pf6*Ec4S9-}G5n92&{61FhU&F$A`NvSi`D zagts1VPIJe?>o46qwlfIiBUkfs4$-puz;Co@6ze!uvkW6wprD;>-q*%vH@&g9-ti7 zGVn#vGeF*f1uY(NDl5VdLPmlyX5gLZlV6TKEq3y&Xxk~w-LY7X zKS_C8aC?FRVG zrLKDDKTGz4^REDegXKHZKSuqZ=J5qV?w4hB;?LOUFT>uQ!*GfZdVlZ zdHvG= zP&(EEK2jLGSRRDcE2%$Udu}Ls`B>D%)13#eMJ( z&?Hv-_r6Cc7b(@>_WuBG3t)gpDvQo#bxREZJV1TSB_H7zj*gD^JHagoG*Dk;A zEb-+&F#c^OUYrz>YlbG1Z(grO4`gl~^N@bE{I+u=@TU7Q_;lnS;4+b$v_wae?&%VK47L}UL8=NyY zXZH;dU8nt{_4&ZShr6bvu>m$FD4;r8)8OffP|CwASS7TWz^V5Eu=`W&-S%bQH*KTK zpB3EY&a9#8AWEEItl#SPYV|uT^Q;j)@W)&Iswz9I{VsRn+!TFE8_I8%L33HjY_5EgJu!~ET9_h z(FLbZqfcK7`i}4vvFY^1lC8<^hyO!8*fQGe-73A0lL0KL=&9&8Z<}EL^alWt?$;}w z;i0XKXuZifJ`Om~YcGZ&Zg>+qpRY~0IztKu)w_8;y1hTwPOeHqV;lqd+y37Z&U z4M6j47EHXK0+abz=8NA0QFWQu*i_p>(IN|k6n31XYs{)cyl9N(+GyHpGTfXx2d_6m z)QfJuT>yY34x{DD*4u35-l~>64D!_T$UIF+jE0G2py;QBwRnKVxjeKG3{@2`a@Rp| zEC4>87F5e!@5iF_rO8yMQY^T&(Z~}44j|GRsHq|p_iMjqThE{*H~ z1{mk>#)@!CcXHzqt$nl^7u4o3WwAC1j>XQZLj>s z52Ha%n2yMbq`-gg!M)rTLM=hrB2bd3EFvd_1=V``Fco^nG(`a?ghjQoBmffHQwyg{ z4{B6Dh$YyNc-#Fxyb^jlHNVA`_b^8Ox=ntz-hKysogZ^vm-P<{c z0F(X=aR!a1oQ5syH}8k|a)G@=5QVfvnpR-(uh`p_n3l#(v;X0y=xxq|Uq0sP{YAJG z64|8R3=1CF)9p95i!y8}$F{PDOZqJMH+*<6jHp)5ei?NDr~eDT*H7R$dg=d0mC2iIBaU_Lmoz*K1YVbN|^g_BVGS9xnqI z_FolVJWa^V`ia<28MRIt3$I;oYXM_lQ?Z?1JS?Eye?yy8b_0x7FCU!W&aJ$@u?k)< z>o4Yo&vsG~O1-y}3k`KZ($1`8{=`O*>Nx8%pl(kI;rfv5+`r$fZbWR70mM9Xhk|l1 z9wdZZTq#HGSkGD0g{k9WFWLlBtipA54@HOs!*vD52Re+>Q*stC|PTuI+r{rH?VGj=uusGXtU2Rlln> zHlwAxK?9KI9HlO+DN~{ui{rLrnD_uubwbm-lPun4YO#^t<9DqD=ZQNmq zQ~%W&6DAGO$fwawn9_GwMGQfA*NR^$&Jq?QO+l zX$oO9(Ax3UhS<;3-N^FoGsN!;s4)`ee_?XdL~|(S-T+4fy1bEV!H}Nf8;7H%iJ`LMFf1oNDlcHhZDM=CvtHLR;>Y{ zE6bLFT#+*B-<$@Joi^P?(+8m9Yf8cc#9`V&=zumu{-hv`aGlp-kyF@DCi1i>puw*0 zo0{LzmC=M$#~f&YHsN(%sMxPe8kk55nM>+K~rM88~seHFl}F<_m=*UMr0^Z#UquVb8q z`BuM$j$@4QBete)E%(a&B!(g`yM~vAl5U*${or0v!vCBS2D^74sejxsrP9cc%;g7E z(+4CJ-RLt}fFiDo(RliARw5O2b79Y@^{hOtPNn!2Hg;5=zW^{#-$g_ zV;bJr4}dbhX|g3WrcCeTt?ES@;O2YId7vcBy7Ouh-p=$>bN)E}2U~ZC!}GwScTxN( z5u(XrB$PnvEGtJeAx;qS&%B}RI{W!y%2JmNsQibqfNnTdo@w8goKg>?$oX?!fW$Ucn6?}a(m%G_tqEB z!eJ5GcJwTrp-=!q({L=S&2fM2s4M-x@e5DJJQfg7r;}vllSR)u9YBG~U zae_S90J5C!5ZODn_f;Ye-fdd3r{>4lM6nX?aN}{Z0N%6R0Y~j@3=6nIrF_#{|I`8e zID8Z#=@M^jF9U1~U598TJ-Hci_QXGTsQigV{UO;8@MJcY{y4$5wESYq02$pZbKN-q zP|yCKyVg*`Qg@}ObIZ&AI$uId8_)gksY`Fhg0s6-%SjR;(LTUyCUvlhc8r#KKRiFj zoSSj^6##8ry4$%K*a@X;zNl;^Mzi#*xiF1abg{ zsqf|oez#3|4mXLdOd_)tKpQ1Ar6R^^{kgNkdDIpBE5p5dYoHBz*5{x4TgUu%$g^g) z`U(g(y1bC!4;xa&J`e;Vn;$ksPM^Pw~9o-K2CwTW{8*E8yAn6R8c{ zPpjHaE2iB7;#VKiri0#u7GJ_HjsjRK?<1|w!cArIuz$%HM*A|d9mI~H56&Oer9xJV z?O|~WLXftlk7osgVHP>F^A;Si;e;4Jw+=~7J;O7u^aH9w2rN*93os9P&dj@@%TQLq zkD|*1>Ne=&y)`CaI$egdWWo!CiG6PjYgHip>gw=f6|D8D;4;ir<&4jBPwrabKT5pW zkLFMJIb!;ZJzT9Mbh>(i7PGce*`>KlZ#*w;neL~z4gxO>7DXqLgk(>kV!x#+1Kc}G z3qS)x5Mk@Z4fuK32;`U1obsd7aQk36kUN|WF+381vGi((()_gtsn~N#CUK3opgWyChy+Bxc|6Ab^j@dW;KK)z1Wy180Bz2Uhw zBMvxV&x|=c`y{-bzhNf1e3Zgz7oE1Q09dNBmHE{>C<=NzZhg$c3(54+WPM7pq7dh-*bh1$quiH2i-N+Sy8h0$4K6b0`^Bcu4;F1NOR#Qb`?&otx}VFEiQ z|23YkGqKg)vUN@lZIu&fiO><%tmXN@U~8*-{a)Dh#(Zo+mZCJHo8`%ivo8C@i;7kqt?>KjyS!w68BOGfA9P;@;4T*LAYwyAOjE@CG@)9Evkjdx- zGRNCPT03f&?NuEE-Yaa^r?gJUSI7(V#;lS2$vxL**=C%VKQriJaNr<91fP5|tW6tY zOu!AMKY1*K%g{00x5EoF~avRd`ibku3}0RBq^W}Ve;Ws1#2c< zwpmf<9O3^&549nH2rPHUe9ysIa{vO_K*O1Be(Hbf$BqpHPY*qgbYheO3s@5B7Lwi; zI&O^#T5n}7+jnc!0hzY1r3NEMg#o!|FD^yi8fS0cOwX}<+p<{LNwW{lR&uO}exeWj zeKNDh61%V;X#HL!k`Bp*p^JBI!#S|8Ygef&TPp|TP>tl+KNBgbqYwIUWm550xmV z(x?ExQ^R0!vy3m#5JbS#Lr`?bj=Yg)q>_v#{Qbb3(IhOrj-XR9O}YgXPBi z+8g5IG~e(2P$^#%GajeF0B=7&Ff;AC2a}c+|A9oC`0pNwe69b0jWHB=yHDFxrPMze ziP;nx`>&crQH1Z|D)EtVq9oPnpfz76(FaKbdt0d@eS+QI8LI(V%q&rA=MN;!R8S7! zAp6}MgbmniFAfD|IkrLn^^6$gqQW511v#b^`PNIsUBG)oef>}@QDAN)P*(y&#oPW= zwS&;~3=LnsoMNS@aHPeN{%y)zH1v%tCRwLaFX(S{``Xd0;G@rOe1B#;!9N1zYVJ9xQ^mJ-WS2L$(ci z{zX-J9jymk$)ZpAS)B~CqOqQ4%N(@_f}?$Q7H^J%LTuFhIJ1vq$_lQ%42r|(&DzdH z%h&Le1Dl5{qKAS{H`n12YgS}R4lmJnbCR_azux=tHfa;T6L(Da2nNA}B0U?*iZeuqP1Fx1GglZy=QD>_+M)WE~h0Id<1 z4gA=$u>#B^z@{eG-QC^Y-QC^Y-AYP{bc2*MNPh_7ef<8``{!A( z);;&!GtbQIy=Qiu13{c2s!*X-ruFJiFOj+KJmfAy@%=d-RFF&Xo^bmt9_5@!A?)+joFdR8c- zffWg%gOT)^EcC%eP3;5=??j(dki>-Duos*kNbb$YEu>n6D*ub;-@&q)ESz6T!$zGz}&FW z+xW87P{7@_6Hed{I>HV2bvucKPk*=LNQi#PnyCrOcn|5IE}nbAAS5l`J5A>os!VEhY-YD%Gb*H{bPu-*0kuFVGKR4i-8A zV{XS$zMIIoiA!a<_E3@NiH*SCy2=n@sFefVSsy2*+lHT}l!L~?d%bESQ?yHQE~qSg z$``Hnh0 z9v6Ec4uxG^UH7?D-03PQEU9=Bb9)4PN zgPfQAmAzSV#XAqWGSygV*@sJ0bm(9|jv{p=lCtNuV^5+>>oWE&RP^qKJb9G&_3F}C zvDd{Xn^UTrtP9bAw-3EbSqo^}6-f)|V=?kc591 z;H_SjoTLzmJl?9iy>5!OKx8=NC^PKT{_a`uYg;PDN{;6ELY>x0+tsdgA(#>e=>e8? zmI>CK^?*XHF0)?U`K+n7C$5DTf0cPgMutd{;vFBbJl^;62Oc%s{a3)E%^xhK_-EV>n$ZCqM7kL|rrRkv-j%TB ziF}6EhQ}ZEbr#TVzOB3KBdO+C zD$wz5qPN@ua(Dx@jJQXJi1X;e#Od&#E)9%$(bYqdjmAluc%j-!o!o?~d%kshmIlL? zHstp`n4~wH^&9D&kq6ZM{kS#8NLUrklL@rio`OBM;v-G*tjI2AmJArZUJgXAyLpCy|>MvD;50e#+pcLv_xLB&}QDXL5 zrhJZ^zTYURiN+Xg@k6Gl?`&#Dn&E+ui9bu{P3v$$<+1HywW5Hg{;|{KL?WzxhG*ul znE9!OUr0dak@siEmzJw|hv(-2!KTN_{1yX+XLn|4{mFPbYDjnj=@rGYN+~?(4$}BV zQU7jQXW;`Xau|{7e7JhfIYl8SaTNFFCXC*iG-W#8f}i|3AanG;n#=SmG_s>>kNl)a ziTl+Rlk7p4hl@%I*ZCjKR!3Ztb9e6Yvj2qtlgzlBfh$2Y&wZZl@`Eruto#O1P1jcQ zDbq`gLn>T<;vYK`_qVdQBdT;2i{UK`QpAR|uu%1Uwf7pBXSVqk8SGL!-?>X3V7}EwhnwPrt7q2&{}? zmy_#3*JIOejluD)zL>2p1h&xb%#C9oVv4-0$7c1;{ou+vVufd%STm9Wz571nAwyWz z)?Ztg*|Kf~qhjPFFtZBskeAe58s5#N%SmZDu*c2JxskaJ7vHh1mtTXs4uX(-32k!h zzmv=&D+mobEdJ35pOxFl(7B&Y;jy3g$oh1hHHR7KqzTn#2?o98VnSpJu$(UJ9#6MA z4jP6uofXfX^c#254IIXqSy`Ru<+%8P7hlJg_o)w>g}_$_)~!P-Lop=~Q3COMz-n%J zX$clWQSI&@2>bTb@Ydbc^|;;t0M-`{c;B52eGu#g_)x$@#v2Clx#eGmrCC9lub%~$54PoL!S&e4&-koMV?Lbo()uYb;Vo&}XjZhU z3GJ%JU%=NM!BfkauBJ3FFX!HDE4`?<K7DbF!uCrEsQb(hNK*?n+6Bz z&0U)@yXLZ8tr*`(s1{k&_)F}q^J2fJlR5|H{F3PCb7;3n`lKLjtM8K6@%Tn|0#Chw zM(w~N=Yw$7eWKg%$K5prUdF$6_2(`ZNrG~hPlyI@MC9#nS(j%870Ur@c!J|41Am4X-=vAwS@N&AJK*WWxh<Sv&o3`ww^^TcgJ;aHrp2( zL(vs8Wc65s-V;Mrqmxp>pd`H7j9g8z$S^BwQMLo$^mko1hNz|N4@_wZq}vIBuiwu&6a3jX&6$;EPwI!^p#vSJqo3bVHA zXy-eIx+ywmeGiAi-M_+yR6`PKZN{D2n{eCS~5{3H0;T+*Vr~%FzD?JNVowmAmlcU1Gct(hrnMYmqg^MCAcw1kXq-sUsbZ*@BIp*O9&F8qbrWDA&6*sxQ*g(2EdsY8d9K}OR-Gt?k9 zUOO2dPDDL31z#NItG2}Oa~OaE0{21bo;!MDVn>K4}{JC?)BpG_2PGqgSpoIP6Uy@w)PWXKknp%f4;p3A#6jM zwOx8yvi2cmJfl7 zU2gg&Pq{_4k|Sl9y-d5aB#tg*=p}X2Ha|H=x5W^fyIh)kVPCRa5&8vACtC&iouiQN zqh;2w#*I!@eMKd7+IK@8A?oi+?ujcV%XQg92&Z1?4%R|+-F3l(tow#BWL%n&<5TtV z#9XGOx(dpN4et^M057_)N=APza#a)#H2hXLAD9j#nQ-{k5nMqR;cNM!=Z|*RE9L3F zN*Hp1OWs?}RQI;CzF+;(`bZ^K2{S_%mtwi~1nApvQ!$1ZBDELxYmIc|EPIu-z&k2Q zQg73{v-YJdTvUS12-i?dyG!AZZSg^Fy*kpEHtQDCg8mG{fL7HB&Kj78D@o!MWh5v? z#!Bh-f#!I=nJZ`VgcT@Q`SWZq!ZP-0^8S@Lw}(98bN1G2Wyh=eWYr8svWW3eUHfCk zM7AYpdP{SUj+WV;UrAhdUp?1F>IcoVGLS@|!_Dz&1Anq3D=sdxs}Ua{uf;(T!aKzw z7lmq5YWB8&D0 zP)tG@Y7>0-BqBO7ZvosuxNfEQoEaMqR;+lvn|1RZtF({}VG=KydYr5zpLACvogkyK z{6gQ=@{|}HR1h%a+3A12aOu(|;Ij8!-kXB^FK*=dGo(bxELFbeE?ARFlO|mN_F}&6 zE4-T8+R~%KfBt(rzqz@I60x`KH&`SR0qp(+uagphH`Za_Pp1){(qoiU&=mRcW&S7i zt2fFtbR-CR2vQt<>9<}8 zB}HZFk_nODP$+$rD9_;!>M8XYd8nBMVjD}2m8#Z;+ZK1=VVyJfK@aX7+`(?FVDKno z1Zlh7L9;}~t)5bDpN_L_|HG~p|HEm}rmgW8$6AWTX2w$28FAVzrqG_QT`V=4Q(M%8 zPkSVRVOC<1Hl>6r6W*K1*fbT7?X0O73{12^d1u#3puS75v9Ikp8d;`ogX_Aj6kS2( z46KkC2Ixsx#zaw{t(7{9o27x+FvyNM^cexFnl23{ClO)bqG?aKF+{Ea<9u4wNxY3C zsS9DXCoBHpW~Wd0tn}CBsOoR5NRcsZ+NX3w9kF8a1y2>}(^BX%{!C@g(8>!<3F}^^ ze|e&6qPhGg+y)x!5(glz$}FLo`ei<9h?(%PT@dPRaoV0`8?F&{J8jO6OgY>P>NW)5 z7~OXkud{s!OAKT#|5Mps@o2V1b?9Grzr~mxUAF^B)b`5}>%pM+D+?yy$U^|h>f`yB zztia$4`V$A@C>iT^k%nDId$_hn0a-WMq;3wtuD8M5P>U4fnX+qRl;9lk21r8a#06EDOa8PLwrjkcTK1E5p9nYW0teJi$gI+NU;SOgD8g{o1L8KMX_xS z71A_4Q}q^u>qW8Xno_RFwMuhC-VXzHYB*`0*@Z^jtub>(N!&QiYMmi_PPTeOBw?SX zg~X%naZ@VH+;H7YN#vNFKHK|ZSzg77Fp#x_z*MuZ8BrEq+TfxOO(^2t`2U~=xe02N z8rDjVWv%AD=KAalPF8ni`12hAOoo=%Ma6!A=ipC2>x?a=yRHY2gNxEAwPh6QqPMfV ztgz&;wgNfq2xx!%h?VSPT?>!VXy%5>e!!;^l;~PM)#7eVg3k>lq#(5PR(*eNrZR+n zE`q?v``m&q{bif;x4{@>!kEj^&msliT?`l@AV56VOJF1V#f5Qs^T|=5z@ulgVc^x< z!J)w_yP00&u%N?k-Tiuh92g3|{c_RwI!IISJ5d3&$c)GN+MRx_3gZ!zp5P4ZAI|~Q zt(o!}sexAY%^3jhOvE>m(6f-gMk^?#dE350gi{@)+{5jW%RT(&PYT9dUj#ml;-RJs=ixFq9fC24NN($CUe9KvzwG= zP}zGMKjIp=^ilYU593 zbKu4CbA+*1`QETvMy>!{Nnz#QvVLh`fkm^$!yE#hR_Em7p8RU z4_YZhPzS@3a8r88Cn#DK1q}6FGe9gm(oMhsvUp!oJw0tIBAY6PA!Rg4xHT!cC~75L zIU`;vhFn}^k>AFYix_9@-bd1M4w@P^|5)eZw9N8!SlUaTGYp3hG&QZS&l7txB|R zc>e#r0MiE7`Ok04r-G+bx{1T3?Wd=&dmn~DP~Qt)e|$U7WX$@?E`_6=uW^ZMd-evV zU9Eiz;9uv{v;5vryZ`h2p&T+Zpj-ag(qDK<6gm@C*btX8S|l}?GVlvr>jw@Fm5E9S zF%pPuO9%|ya0JLNQkM8Ye{D9lUuEl!@%Z=%yiehX zeE$?heDY@61u0K%==lXkcK#N4w?u@83p~vx2*a}=x$x%6SKNR_L!*8Vn}>JaZy_^Q z_V)G;4g)to__*xWZuKpzOfctME6(Rqyg*mG#RS9W@6{@Y1Rta)>#r@OXjzqA|KOZ; zgMKhf4VV21YiQHt#~jgMAY3CGls^CiWZOAE|OFrC_!IbAvU)G3!{b|AEL2YeiU`Z zC|Bmh6`+4)C}qs?mAz@7j~S>8?e3hbtxng#BrbB>iBD`;OD@@RF{+~!Pwc9E_iire!dKTM0nibV%-cN%6_zB;UXdOZz<}nl>bY-{r6#~>CzN3 zuzcN6iR| z@Ze8S5J-eypz&dUib6v`C|OpQE6+K>261EVW>W}%_mabq(}4yk<4=1e3PUgbHZGMi zys*Y%?ug$Nrl2Hp6eFPr6l}|3nc|0xkLat!D3FQG*)9xOgeeRhomSZgS|eTQw9_@z zD0qVDJ>)z~iO%{e(cyq#Z zYD50*!$7n2blVuX36H(|W>|g=;`Ju81 z)ZP$0UG`@(QSYvWL`ff~xv1Zx`-V6G4nC`)fj9J5MmgWx^$Tvx4~4*HoCGbG@0Zqv zU3gfTmS_tM0H-O!Dn&oNOdf1>E_g_xzlC2RK@w3;-ApaU-o@t8Z(wF;ecbpH9{RS=x8V zdgi*5;$z#gfAqUU`-sAoZs=EB&EDQ!Bb*CRwKy-fGi_0^Z`tShcdz(%_+5-NI}oU4 zt5{LJS0W~NU8g1W13#S^&UWPjVe`~W?SgNI$@O#B-2-36YZK~`p)4V0Io4csGo4*;rA&j(3Z)`)1G}5}XlWnb|VRmTp?yC2vwe3O)^t?)S8a)h>i-gehk=QgWP= zF3|w@9JlYg#9a7CzFV!h|sJVdyn`wPxY_OTctjH~%VhCsEyU@X9`IWzg-{g5&$h8RIBL~m4XyztIpT#C(lpI{%{zcyy z0zq;gb=ZT}zkd$@%0uoU_1?|t3UXFiq7XMZ;BbKi7E#5yFhnbhm!1HuA?bX!b5Ai$ z#Zf!Nr{$rBfCvjsC(J;DA8xfr*y((SrQ&U-6?6N7zxgR=`o0J(js1tl=<1#wAA9{@ zhdB5w2+5cC1E8dTM}V8gpKowPez&uiz^Qk7dKx%&)Q;^vZu_YMZ&wYCWj&A0#f61d z^XY64zbb0ToQvk^ms(MuKMReh)jz~}lt5KJwqMhg3$+&AnqtEYjFc8=k^;MyJR;va2 zb7xbFcQhr$ws&pn_pVjmGY6S)bh(z!ImU3f2X`!>CW8KwrZWQY?z%pSlX}TYt`u;~ zMD4O~oj*N$7~FFKnl~{?D@wdKa*g3^$;9=hTm>Ml{fC>n2fnfXl-2)yE^Y#{^Z@ZO zScA0`v6>Tk7K-Ew@L2r!mGso<+E5mT_PXxviskm$jC9}hTN7Bv$sj&}?VX+3|x;W-<%*m3YQg%q(d2-CB|JM^R$t_pnT_9Y~+ zi^=gVl|`B5u&(bau|#N2QG>q1P)9*vOo-)O?N%E~dJV08QO8!z6{E&U?;?J5|9S9k z`lqZ8SrR_>-=3qnUUTYl7vwBnI`0nF)fQ5mK@!pb%oh8@tLZCIgKSahbk%F!LW`jN zm0W5mz*e$qcb(t4xGU4CI2>tQx^=3~l8*HyrLF3O5%T%i{5h5^HRE_gJ!$cIyWpbC zQqpR(B`o^WA-@hTMFRrObVpeH-PNKl^|0F|Ht*J0j}EmOGp*C+KpNl|sGV}NE@922=zs3;`G>(7xl+Gbl78*1p-xBr!IK#E& zx#!V;lgD?_xu>VK((pxNic6rE{NN6&{yeAGg};cdu(=k-y-`gru)0Xt?W95)Kivi3 zWPPrwAKdFR6W2A+04d)X!=RtUG2S~GLjQvrHAog!2Ic*ZpRT`hFmCH2Mw1F1y-}`6 zC-bMvLqeOXGqLfX08l4}s*4NFUEDiM72Fl7=RLJ=W%ye>r8e^(r|dhV{ncHw3E?~0 z7WNA7jyXDk^JcAVr9u9)IRdO1Y;D<7W$hdAfVOv`32n`kW?|-6rNy^*hxm29Re>=r zg3^I*6s(>MTf z=w`4qHqCUrQy%Qg!aA+31CjT#ZjApLo!+vA-4(dS@u#m|OKc?xO4&UoAHH<3x<$~h zI%zSUT*RrqU^<+DYBarB+e+T8o=gs7zXG7X*$Gx))9hLKo*xv>w;JPRy5BbprS+M< zs&{r~>07l6+tzxXWq>g?9m5(7|3zQC?@Ae*QBND+W@@HIaZYU?G-73T#~%->P9=2V zkDMRdVS6%Pw-r7L=m)%X6oW=#KEf6dzMC)Jn;S9*ph`HD*eS3t*nIo@>yO4f$Fj>_ z!eQ;Ssh7NxOn7}ZytOctSfNyx;$XOXI)?)v$YFMIon^2Ee>0prBU&0LO~&ZTyxQ(z z<6}@?nt9)pr6!KMUnb!L4N*NY2(AWaeXKB?zZ z?Y=cxS~JOL>lZ2l0@O}ac}=6=!I&%4ri|Z9k&E~>pK>+RLWj96tG;1C?{bVX;Ebj? z+&DFKN%xjWB~ZZ*e}PlUd@Yy?iP69kVbQBw05rS@UCF*{nKAcr(&QNJH)OGK!bxDo4 z=2u-B;xYL@^_JlGll|!`t>}vDrnFw3wbw|Xj1NbD#k&ur?skcT}2SWwTg6M$h2On z;(KQ_QWolk&F=t6)`3PqQq)%#(K{QoIVfMRyJ1qdu5Z~NC7%n+?zTEXr3hrqp$~x(4ZvLAjtZZRx8(;%!P#PXhiQOj^y5?=3z7#oqCAL z7z>S|o{r_irlzk539-cc*+;3pffNZD(*pAih1W-j^c?qGxW$AnJ`u2y`ky(-VICU6 z#Jy+DnJR!A4ll`$rp&;TD9wPxE%bK3qGJ}2CSg- zQCJ3RMSY*##ngy0MWW-Q_nzKMm z!^gI(k%ahSB#h$9a-|Xs1SQekiF5oql}9XF11VU>)r_EG(pu!Llvi{?`|O_Ez;8L8 zn*z@__|)WCpy<k{h@bc$l7gn&Abpczte~8mynKaf+gatzQL%ZmAg43KP^( zD;h~4Y{f-n_>(@Noy);U557=zWrrXT488-9v7$?!*(1YNOOW#W?o5{6oXRq;$59(qol&Z(BTNg zqKXhom6Mb|NJ|N)mOq@YXdUvb)q{bG~XjS8a;NtPl)WBRx&727X8_~ z8H*ebJl&r@U{jXu#(JMqup_Hz$m+4v##9$UMMCO90Hwg^1}F?|>#MWQeO;MJo=rtL zAGRz3IH4?LnGwn5~24gX|xfG8PZl5RzM7rXY`N~x3Y{wtsBsL{gpi<;rcG34)rexJ*9;R zutf9H@6yMi_iA)!DB_K5?j6yuH+tX1z_%2~p3Q&S8L$P*>VL4y*4?2945&}cN6AU5 z^yY`0JCvlAG7!T}knv9U`gCx4I*L8e_-H_EO%%72?%trxHTq%U36201a~Df&;wTk5 zCU2i(mY#_k-ys=Al>zrIfS{$IPm*_Kq>i~ zX`JKv z!Kj3~bG9~X9)syiDe4NK9I*`RCE>%sXS@TvzHOo;5cyw*$l$sXuEK4WM8)yldrrj8=Uor;@$wQE&|M1_JNi57t}J!InoT9p45`Zq+4)IlsUhLbX{ zuwROL(6K}}w&kU<*aC(-CivoHYO@KnEqG;x6v1QiS4sMeE<(bJfbB{qGxlW4Axu$( zZ4!6n%awchYNMtI?Ecq}}{0Z|hH*|J8;H1A1T5 z!R=%5j}N$15M3S)J+Dl8Nzt4@eDxeC#6;>Uw28K=BU4kd=^mmaclBYD>TbNP>}0Li z&7;Ln3*H1u^x_S8BMFruV>zZATug>Gm{4Xx8aQ`Whu*swDw6=uo9gHHDb~|>x@X)2 z#pPaNSC{6+V_rb2h2z#`*trh$MnD`ZR-=yU19XA!D1b5oNC`n_ z=&f4o&JNgy?wr~vf&8nY&Ucggs5T9U>06aR?4>N8;nMBF*LQ19(V8%+m+FMl4Efn# z+mhQh?>R%cGG^L9NfF=3&(m1ZvUij7QPux{QWq?I6rF!^VG*VG=1dcS(`I^WRP=zR zqHF_@?W!=1Sc#(;^0`s8hFToJOIP=a9jeHfvCU_sfsn^p(a@LMNP_KfcpJfXr~6Iu$``+b8IU;|L5u zBM7`^9mNYi{20*WL2lLr^k&C&1a-`K5?=T&-JH)A|Yfv7t0DqdS!O15-5z>4n-%zlo&yO4lrkz6xKj^ zoUa%a?>2(>ndnA9gO=s1+rs&)8!GoyN`sDpvdxf6CtpLO`PqwRVZ?mQEHxCUR6k+; zkw9@1arj}X)=Q_%$8f4jiqNZ$#TxUy?&aI9@Z zObW=lkyX$BWsY=n)krAD*fDwdBKvkvwRe<_9OzLbKz28wpSWLZUdhZuoyn^x&1)o9 z*A`lzE$6X18AE&it@fV;56R7CTqp>6ClpyKE*jJ_Q74_vgp=_xMU0#r^x8J?v0hq2jmh& z=8`z;6ZXK9{qyklL5`qxK|t5px-=q!-ULC==Go%mdz%ZlsXop2I!YRF2$S(HCE~`E zO2BNpi!&a*0MWgE+$+o|$*~fUL;#dvEh34NU64e=d*v~M4wSXOYh_o^^jPOhf=6>! zomPRO+Qoa)fzPDkKQ)il-v+c156(^hpvsuM-4MNv7n!oQ0@v6VDeH$0CYVUYPPZ?22;rsal0QuU8D;ShpL{gs)JFL z>c&#iqg@5+jjn&f6`)MxZ11fkp>AA29XFcoh{N8kPGtjPf(5+UrNGl_E&VxPo#TZ> zF!ai?NH0sb=#8KeQ}SQ5yT>&k`Y2F8VlvVJ#gesT;m1s?>|-Oyk)**?U43OI*rc28 znmBgkUv?FEf{deWfBYF; zidRU8QU)fuv-`;-UGAYnt2JY!6J!MYtf;!-!GMvE#hJXa_&`)_Ky#u{^3ygO`CdPV zm9N`TKZ1A^oBhWfgyHD_H=E$^`h4E8$NhQ{k^SM%cF+g(KLJ?F+|Y=Kh?i&%OdHeH zy-B#K)yB8~lSxm;O>)Dn66b$J)wMHy>K$gPv7Y^|0B|HVT3C#)d8d+bTm=94Wz2fm zlH8TInJu3Pjja5`{>$a(6Fr-%Ac&y%Vo;$l@U!Ny9Wusa@>$$h?R}e*!xZkj ziltPixC4&`GM4o1$cb0>J z5h|m*&ZDRYg057w&4cIyw2kwX=|vmOA6OS=$rkS%sH)PDz(%R=N~+@tMI`8JpP zb*w!DWfEjmQ7QB+twlHf#GbBg;}8(-1F?@DOGr=FK2MNiQ9ZIKxRn+8tZ=pNa`Ah% zETn8bS<)Df*Yb!sp~A>1^-yKjkS; zveoBoGc^Rvb=^vH{FFN|H*?_2%1MXTTR5R1aVPRQzl`P+xFCA!+b;@;%=ZzPy@zxF zOI!=ox&^VgALIY~+Hv|@pT3hTNfov4Z;SP$%8xV*D6B6Vb*5Lrk8z8I(x^nA(4T@uF|xTGd>l%Z$;J`O>*FB0GIYD!#A(= zN6sPzLu-UoLS(!-5`z^QdD7nk=EY z%qp8oatm4nvtqBZ&5A4hh-Rs)9*XPiDq?d9S_oRFs|oTG+7Jm0?Y3%2Pvu#vZO0-o zR;@4i5i4Vh8OBoSEQ5SroFrVyEWnc3KGzCwTK#qu6;Br^G~ z?yDGT_AT_Pjb=2pTOYi0I{oCom-iksxnwzeqG-1*A0&qUUf3ji*S&a zhkg{L>2WpNikpvS4288yDanSI8TQA__$;)pucBicDn#I}Syz35R6YhFL#LUhur3HR zI;#Kv=$AJaLSd{hPIW_4dI*u^G7}1`kSPzbT-?vpyw-&(GrMhF+fm^7>st9`JpX=} zOcKzRJ=PvUx+r#dl`*^V`P|WOaNmlK8&WzpG79%Gj4oQ6r5O5bLTYO@)JUG3jb@S1 z5YYwwSiBM<8y6c|PxZRCFcry2t%1HW-dvX^yYA9a&7r2>uC+|tc=V5(a)&{^y=Ttp zlVBR>=hN=T5a}UkU29F?tPZ>xeaMvh%Xo6G=6dm0gc$zzb{}wk-YbwK`p1|D0h?-QjD}2ZpPXFg2PaI;^CYK z#Ye+ZG_EzYE<_$d4&PoHtKUhggHK-Sww6%L9_l3d*QmX>Fx&@MrDeDk?Wo*Y1)X4s z7`L9$vR|H-pQOw7^F}W^SS2)OCV>Gd0)|7q*1z9&?ypI%xbwZa^YSzDF72TPR=X3m z$qi9f(w}~_r{y8#%|`eR)o%4)Jl`s|JNXZUeiUUI0_9QoGoTd@_CVx$K74db;8S|i zpwM7O5`6hHq@tn{a7`89+yM9XR&v4dt49S>j32a^U_e-dii&ERbmmKEMmVwKU!ut} z0$PZ#>6MwyYRfZ;`l7O@av%7bQ_v4|_Vmsyl*y7y&?J}@J8RcwBo4jN#S7=s@Ofs^ z=`?T_92=%@0rXT$Z_Pmo>^M@9SnOatl%raTJMvVe%PslXVMi1{sC3m?1$Wh#^t1W- z?geSODa}`x=P}|Oo6yE&n?XH@Z{)ZAW}Ki#ZgSJKmW^E-C?&4{q>vER6e8=bS{j_0 zhlIvARTeI0fdLDl+FLF9oVA~5xD7$u(I!cRAxP%AE~&>^_U|#PE4*(KvAxQ+&idLH z6b(ivwp*d>Z-Ub z174CJ(y6`=>I^=jci(?ImCN!@38rw1I7ifs(R+Ye1UAIdoS57Fnny;av<(ZKc& z$&arUN532>+yhv+@4`0P1#dV1J7gss&g9)-CkB;c@&QOEvx#7{Bv;<2j9wkd`cFBp z$`?O9N{lDCZ*;CkO*Xa!KTLYpJ+aG`&h4j&}5zztJ-dw>_0?bW`QxM#%b7 z-Yo2QuQO?+B^$j_XD0*GOJSjOl+JE+v%4CD3z<0U?kqeo)8)7(2be#m> za78s2eb>hqGF3Ygsy3DkF%$Ju4Hk_si$y9?mzB^dKRdb!k$r~W5x;hEcEn*IccU=@ zPiHFiOfy0Rf>u|V^f_}QR<=`dB4OXcY#RkEL}N3>@c4F&Hr@;|Pz$`cLJ5*?$}QNe z)&Wved5z4mG5K9fq;7$lXsa$&doC?`$Bv9OrMU!;O;u(BFX+5|HgKW3Jd-t8Pt{x2 zwIj)TB=JVvkT2_#-aNZ0a!;N2x@4QRB;F8k*+7YEYtQBs_Bm=LR2w7!?(1%-RcOb^ zLKYSyKN+PbwxTf7g88bl1jp-qHiB3N)RKeZtTidcvWSOlKd%o=y{iSAin&o14HFN9S99e zrf4RF{4iIw(^j>N#nZeket=eOnErdwQZVl$dLBb(mCg(P{MbNeaCa4Db&croy2s}2W{AK)*c!XJKw!Sn=^P`Jdl3h!GSm&Z^$!M|;bmtT z7qPNWigRJ66{X#foPuO2pF$l;vToWWx=PSG_A*bNz#RRrHA#q}>a8W=zT1eWxHiAJ=@Di+!?VDo`#wq%fbI@}N>}&Fm z`>fIEF^6@2I<~O?D`U2~_hTV3vI(9*OhW;IL7~h3ZzcMQR2IHu96Z5W%<27%Cu6t^ z^2!`tuLWY(0Cie@4ZefAgN-a9EIsOqyh~hWF2~8G35JE310PUbr?U9wEnL&RbdM3z zy^TKeBDC372Orpxf$(Qg;AseGSh#L=xuRM;5~6g`H!jIkW{0OTu1+t?rha!?Y-Sx8 zXBg$OoBq%(A&`sz-+H&oW&Gven?H8}^ek?;1Vi@z1HCsb-&_V#R&Ex5KF(NVe{!dO zSK_GUzF;B?KTiuwwCsD{s?40)yrUZ-^G;9pL}%;J>n^W`VTN}-VBO~)I%*+u;SS*~ z{lOdDk;p*7u;xlzQtng9)VG0zd@lpt3H)_|{SdUHvr9@Mf!R)HWySZfje%y6D(|R~ z$+&9~D48J-MAC|@WWejL)Loy*2;)M0Kw#~NlgGoRvLjA#6Z}A}l0RnIuan>jaTb^mnT&&9_T|l$c+to96mxsPr*%QJrn}I4?faD1 z_yU7S^L0zJHs?XEQ6=`VIPyrf;M8}D7@Mzza@+wr$Y-U%GZ{Cilkzh^N%+H0kRXtt zB`}h*FhziytP&gMD-!X-95Xa)Iigqt6f;Lq>Xmf2y2?$+Hm$HvB}1ui0n-}`47rgivDu2t%5 zuUq%HH#iXa1Czt<=QI26%Q1R%aQ$g-EZH?St7N(er*xu0hUHNrjCHw24fj(DKuvKVzm-IQ7>LWX)nAg!$Rv@2YGnztN z)b#)EvHs?0v1l}hBc-os#=*#=`bi5V6BGfTjxw;0o1oJ{e3Xq^s;#>psOA8=d1b?oB8H-b-(1qcCUe_*ptkpRsxHoNKKoy@zy9DUoJ5ss@ z(@)Tc8il&CxA=Xw`#5PjM41%WMLZXiI<~l0Fg)3fi|G2+Iel$54!dD@HZ+_6;t5$5n%_r>?Bq6BV^-pfX;%9`q% zt5nX3GNT&XrY#|W&$VM!1U3nG+RhWd=g0$6)s>z>roD~IJK5}ZQ4 zVZP?Xll@KKd7F$#-V<1qwQV=(+z@F@TrKm7_D~fHhGk2GgDOx9N)?Y&g5Ay{pt}mz zZ3Y09q*}p*QVNnOh7nR#J}M#zl1ynX2x5rg5e7M2%43l4@>ozL1FrUjnw{Si1#pb> zE<2AmgmG)YmQnskNjLGxYu4uXizPm=?$gDYbkYAcLyyY?_C?iIjh$n>Awm*W=4+R% zYyLuhVt-SS;p$`72YT_x^sxevRB4U=1+!MCl1VX+R^Up z>S_xNSfZHw?4L$Z0i|yhmg<&UIABAT-u(FGA?wGrdW|8`y156zhd^jj$EO5epN!-9 zhaA^*1B>#>LpW_HOI!(l9JSA43uY@0PTE@lI}qm+?C(`H2Y zRd=Ex=Y|#Zz%HrN)&%7FoSWRgeGBR7!i>wsB#d)RM`n{2j`jM;!z7N*vP_$PO{xkd zhZ+TX#rwQ1ivdFvIz2U`u%+T$wf?TgB+d#MEfZ$nlQ6X(Uy|&~manh^h2|+IR)z9m z2SooUGV690iT?Y3kQm07qf3W_M?lUjWy1|bTDVQKygC;wRXph?WdUPRFvVVIgp^2R zjn}BctkRO^6V`(hQ(XZOj7F2-{(1`Ey=A^{_bYLb%73N_AQgEdi$JTR+}Gve^cm&m z=YuAAb20xLlb4bkZc7)OxO-LGn|B{X_o3%~{Hl6zwGr}OqcCjgzOwI;I$PtrT7}?P z1}}|{`XR}VRj+M(q7DlSE>@z~mGNXPhuUbqfx<93>Y0HO z?qxwLYK-N>ND6&0omdwBQ^_mc?}9NQH=6&Bmy-<6ZKkgx6a(SyCz-{!`tYMQ7Sf{w zJfHCxS@ZKbDlWwLn~^l*(^!#GP32gU3oOfU&p)9oBi&7F4T2}u3d3dEBhKol)=z{W z{b`>N?1kVY@Uk#e<4@@)wRx>^Fi_gJYDWOy)5>83B07DIAbuA+@%$i~ZE+`OWeaZI z&H@ak;Ol}1O`e}($@UcpZ|YE+-rw_mj@t`|oSQ~2s>0ShJD06a;Z%Q*ZJr*-sh+DY zDmP?p4P(+%!~D;QV+-)Jc~K0TGZ*U5z(zEp;QkA`&lcD&dhEuw^{aV7{|4}nbG*ocIR4uuVELQq|5)np zby_#Pfix9hjtn-=bY4yZE8)1C00n()DX=y?;;Acn=*CiEPgo9gZZ=*m@uQ@ywTZac z8Cse!?x)80l)9DGsJ*vrTwi*@!I6bc_IGK=6%J~ASx1IpYH|buSI7F1ooR&{?mRf1 zCFLDnA&iZePSINU_(8MC$bp2qu3yTZJT6{3ep((vmh0H+<7fbwkU~i>CXM(}vC^y@ z$%2fS+RWJ8a=FaG;Q!59fa(-S`#IUQb#97rh{xAQmh(KuuDS@q5nR?*`O%IL``$z^VH^z1ri(?tIPk{ z=67)wYjBCOrob|PCzrXb@t|31fEf8c}a$k5dn=BAXqN+A4+ z25}&I+hzz`MEELbt?Iqn0{^ei_fni*X|#H3j?~e04o;$Lm)beg=%u&*{7=UnyD*!P zInV1RQ}4ru1ap=j;Wzg6|6sfk2IxXyLV7#~5zrccVUlLdkVP4fmRS`z<$KP#pHzTP zgWMi>s4w!LFFmAh^~KR8=1x`TU^RCxI^Vx9e2R1fC3kFVdr%4^3(I$LJ0COEMkDBT zF?G3~i?sH_6IU0n&M7y+#(^Bjz?TBq1tN8n(SrUorcA447mjUUgVcb}?HZ+U_}x}@ zr%};8KrSu{qjs+oEE!n@7Du^7lChY`U^9-v5YpAkjyHrfa*QtvdvIX`=)gs(-G(4x zkQ2N3q~4jox8|4zc^j>$zO0>VOL{tDAqFh5y+*CI3D<8)P{JGL_9Bbb-SQkMl>v^i|9-Rwwgq6?T<#uOW!5b=@mh`8%YzUbR7Pbea|Q90-&?Y_MwK&J zr@(!z@^C6DiSg>nE_cy|u2qv+UL6NS35yotSMZr1WRSpZA^wqv48ZSg_~|a>t;nOg z@dfZ{r*O+=rh811)BQGHvIYH+b5Se=9~fo6av?B59zepX`g*6Y({!4_vvbTVO1%uY#zifoIYj>(_rrvFWWx=pO^t>S&I{CEvg3$NWv=LItv1WrVPv;C$Ln#Hhbh8f z@2F;Gz8xpvo|~#GT;}VwbJXaDzTNDwDvEkcg1+f+8ZZb56@1|N?iW`3s@+) z5OQ2snQW^$Xcr_-yEkdS3i?jSoXoE+Nf%K4lwe+1mNthvxN{87{DTZI_js(1WD1s@ zK8xSa*$;pdY}MXV{yRCEao%t1?J?W4h3=%mqUw~GUlQ4?f;&@s@d{nk=QgD60^5Ag z8%5sO+Z&=fufxi_j`EX7(xZ(o$Z0(s0omiQq!_|I(xbH~pS+Y`d8%(P-GwC9ni^(fLwbSGm<96V-=}3eHjdpYH zV`jumr*xPS!k4DVQSrJ<)WNnK#i&p%j%5fFg$u$P7hs~w{^VCRkke!%6AoR@>UJGb2IZ=VCc-`%e-uXu>| zcgKP)pebyrFo5_S=7)ubg`KSxwe+!X>BIpjjom+^<>GU@MVt2MyhaFUTI%!4JC8P{ zSDJHYI+4gR2nUr5jU2dj+w&Jn^sIGp>!{5NjZR4tyUUgoR)4fY_N8zmp zI+YS}3aaQW`48OG61S{Jh~>B~4r1kkeN#Oo5 zLlg?|mK7;yKf`mkpI2#{Od_Sof65I4ts8Np*3bP;{KEn;h;MgUC;s7bx(UW0{%qZz z89tDQ0sb;GEz5tWn)TwWEjQfp|}T21=foM1JoC=DidOn%(k*-M^%15BFL^J?YWll^*ny{DC)@?g7u;JES&+5Y@W0wxhjA9WR zn2e9q)!HY$w|07xhs~d9{iPz_8316S{(*ZP@Y|b@VK3!)Y#jo4X^%$DSDu^2-Av!S z=f3hV*SV&F?B_v~g3qo(AY>DeA81$45S?H*40O0pS z!%5;h9^v!nmAP*){@U&@SLpz_Pt_|IxF0VWf^P+ZOizhN9<5F!!=B4-2r(J*uIpv%=G67_pZ+(c~H%D-0z|Oog>H7e%rE3(%i0%F4emdr3g) zjb}D32jPNasy3y(!%Xf*h}ZS>1&38I-cw10w*rjBZ0^G zbT~OgwX=HR%zOqrL2isePnisU=|}5f*{~rBPEDojvyGQ(TtQV~G`@~bZSSO*5JSMjJFYfXwWjPgz#D=@E5jgq$~)}Ic0dAN3(8^82ysg9fo zt$UaF_XnSQ2NsAE74p^p*8+5&9Q^3=Iyu&Z@k1Uz?`>#kVD9|Wr@e;{X8!i)A9Hy1 z+Z&5c!9Y*;jL9Te3I&xl;o&4%t8W7mZ^%5{r0dO%nwa=VSL`+aL2w*=mLj;6GhtYE z6#hyPr^YrWg)#Sa2Vw4Lv7nXXBwOJb_@75#HUX9{GT*cAb zoAM>0qI}R&(u9VYQp+O9j&fCyUOO<_!QQdVuX6IK_$Y>-nvNk8)CP3CRX~XZ_=LOC z7o{zWJ{98xU%^kb!PY$uiOTCxKw4x>6s@RunI*|Pefo{z+c5%kx#{ex{BfjOOZTsp zNaAfQbFnaA!Ozn{09nbxI4npCBrU%FX-CIO^TvJlDZy+cr84<5KP?CUfGN`m4V?2$ zYt@N-Or#C}ml?&E1fnx@4S9qY+oYt~{etZi3Jv|Y$7n2V$Vle_M;ICSsRXf(GRzt$ zkHGd2PEUqS?=Ec@5NmSiP7y*NY(knxq%vdD>?0=?C-Fh`vv2v;oiBL1&+gOwPj}OE zn8U;c6OJE@EUpWWa+rflx)ZFuYYtZ2Wf&FsQvLLF?}B%a-6ON7{+hjhEJehf>`Q?& zfMBtE)rR5j@}3Kav^JkR4j)R)Na#iCzS_}pk`cY=+3c|6H<>Vg*P+!@l0=_*Drv z+w!ox2D%)+#5jb#F4I9PBT4{9H};C0g<6A;O4X`@f-foIhDbvl_fhZJ z+1a$k^|E>H+3(*qm6aX;tD>lXRb)MWi}GV?%KzLs*E`CKMI}rOKJALx#9EWyTJip| zA8c`;VE@V1OU_P^%8c?Bs;&ZC2sXNciRnq!Td}mXZoxqApu-=k=&)T+bewO!%2rmQ z3p5!zgboH@#tLv(g=(>`6NUp1 zw2gjA{4=yfwi$qTSTXBr|IqnfteLXEAxbrg1Rk4%m>5R5bx%US5g=dfw1$XT2zzIY zl}+9!c98T!P>dCwM1!G#sdH0gL|yz?CMtR@h`v29Vttds=4F8g*#E2(1cUj{Vs1Z@ z_-)b73U=-^nhtA zHlQ=@?~c*or{v(@YrUN4YkvLJ@n^V=_;p?I;^@b}vJuES{%du(b~Wm|8J_jrW%4^0 zSZJiJ+~IBK3re2(k$!l1khKW_`4KaVBy19kTrA+xVgsDUNFEw{JOH;D)ja91K<}wm?FZV@yvLgQMI!{kEeM zmtH~n?GOTQ-H9lR{2!D!DQuVnwQ#G!IBbcXBkG4Cl6h00A}~N1n;5Apv~d$ATSFig z#||=@6^R9W7fc6Do52V^|C-E!xk*}Zc8teUkGjoB2EJ4(D=UC84yCh!{PHhX5 z5}5X7|6zd)v!u}uWNn(uXv~4E;SomZh_q|Nf5zLHAUPAFUVZX(9Q)Cjhh*3y1vNpc zo+`2g|Ktere_Ru+2J6+3M@f5E<*hntsiJ$(I^vY2%f(kEbFiQJXo?MJfMr~=ENV9{ zgy4`0TV#hg9IO(2CBlN?v#2sca3r4-(Z5$-Xi0YA>K>K09#3HejtF2o+eTkspM#+5 zfAvHEuYSgn1M>Y3$h{o~|#U@Rv!w(#3jKjd^GC3--47e^TyGn_l zW5kZP8&It(3H!iW4(Fyw1D9-XaU-#GzeUiuk`o*kxxttCCtLL?dTUd>oKl1qcs-Hv z&;Xgoer879(0KHzcT6fisHOL=z~_5Kax0LC7&z(|Sd%M7c^`w$5^=kWp}PVa+X2<% zi9_E%X+l=g1q0Zg0a*;Ok=<)xjo=vA#)iPK8-n#aE`c7Vr?M4LP9K5oIf6=4uqk70 zqSgy#_qOKZnm%r`!0+sbS)JkEe@k9JbUgRJlr!)13La*@IJq4h1n#~cck%7xepo)e zjeBH%5%UAY+P+4G)nEXtq-0qm`hKeGuosc3p`n<2_3(Zt>A+3w=Ee8N>JN+IQMCN^ zaX^)V>!;dwp=`MwmV8_v8Xs@YezYpiYWO1-pT$*1mQcEvg`%N8@3Ny{JiidmXWD{h2$kl@sS2w904QoP34OlJnaAZGx9Sa;1RfK#QYnkieiqv0_9dxm$aB5&%4DM%9%}5pT zK75Ln^~9VBGNCFmK-c}ApZ>;yYx~HN_NN>oqAoUrJ)xz&^FXWm6?B}WyBW`Sc{WZm zppbHMoDliQ@YwQIT1bPw#2guHk&Ad+_-W1-bHj$Nxc|H^+uM{L)trcDyfb6tCkyq) z_tQ*n5U&l(c#z&``s!fslbE~^ib~jQvwS(HqTmXKCQi=v8|Pno>o)*}@Ryt4+?=|4 zf)~8+1wQKo+YW0tQ>9N9yR3GX≷C6SI@bNW@#?PaDD81co>K9!YfNRvHO35MI!` zEDHsdd@$c55BS|SEd+%B=HWOy!{lSa!I~#q)lO3e?Z_l4Zi3et)22`bWl-E=k#aMu z1}**<;u>!O^)q(KYunNZj^19oJ6t2rTFAL^Web=*ZE(PHlp7?-6j}m1hX}U-erYW$ zFb`uffKp;W3yTH5!H|KUDa4I^nRXOIPPZ<>IDr)3X`}B$8lo}F8}4@pV<3(WK0+Dv zxmo~YLPM@b-OKLvKsD6m+UvwzlLrIZW;3^JUr@)h7*s&~E-%f94j6moI{I0=!yzc{YNFv1bfu$Icn-}(nz&OF)@cgg#GGE zCbOSbkqBg8VvKu!$oTZK`|3ubZO2q|ERE((Rn`_~uj)9}ws0r0$!t(;^X8?ES8C-p z&B2B!1`{NZRHcq%g9B%rjX|Z!0q;NAyo(Yo4wvQsc%d)?7@%AXG*e84S)f?nY%_`C zF;d$9B*h`!p5Yv9`T`Z7oU3;HyT3m_k^@DjA^RU1)^4oE8dTQILZ@*XzVl@fDyo2ORB9k zKS0a^E21pz%(2~WfaEwaE1qI+m161`mLM@6?1@qktCS3ZHhIP@<0Vs(v}6kDE2o+HnH_1{s<0 zYTb?;&tK^veA4w~;s45J{a@Mm&Mz*O(v1Dxe0^ydVgK~_(>0EV$%mD2dCG*!N|QdM zQu^#LZeFi_`dQYg^;gqdAV!Lkj$1bVoTvcl{dY~4O(beGZpl9i{=)d{{?n?CD!D#n z;i=xN!zIBy za&G>aAsG|p=0SHZx`dEJfD2?NbFCByu2n{6<|bL8`vsH1N=%_5P*xu&lYhNcpocC5 zGL9SjQa0=aXBf+Lbx%DYi^M+IV1aZOR)RF{aX+`~?~YjCbITblFlvcVL5x8(v6+)I zopfL9n~(fd;_wi@PH1-!#!`ONYJ8K_!?~KoN?D)VOx$DD8y{6tnjPLv2QZotdZbAq z7X+EO$e3Z`2=>lG#1`4I`n-_apY+{(#uCd<(SeQ_#G(y}PNys#e|Dv39ho?(99fBa z)E`pR{nNT``o~J#TbcYDmLBEr0P1~wJKd%^I4_LgNL);c0HxAI^FAXt@dT}w+h38( zWH!EP!QIpDxpqfK;<80;7D^7Cx7w$&^||oNa?L)F0ayE$j!-wc#_?)x?rz+>nLD7z zY+-a4P`?Rz_?>Ltoucp4rUkf3Virt>)=aI}qK3cNU#b=07WmI?yhgTH#rI1nV_MEB z;I*OBcv*0eg-K-KS&+8TKBrpqLyEU%I(KYo*TILt`4@zWvM!dp^zY#rYHi!F4}8X}!XA{X*?Z0!ISb^oCgw}7ha{}Bvm-7_4B(pZhW^i z1U0cHgI80@f|AwD=)tVrS^ddjViIp-9H*w72CnYL!%Fm{I9Cj~A+%-olE)aJhj zm52k`gG%eS0u+;1<8r|t`^>bVRzMs$?yr&;REGNQA=%2tX06NnZdO6C%dkIGE|cs3 zif8a&@l-XLPrZvQ@Vnynzgns@)~mUz_7~U|&Uoi2`>eFEG2Z^K>q-XbMU;G8R8!~1 zs?~(`st_IMdFJ=z!5po{l~e%0vV;TCS2qxlW+dLC-`)gWZ{w*oI#&icHU)A$b9_%by&u(=&*%>aBD-hbz!^RNc`_|-`n}7F98_;K$Xo;T(>|+ z@P$~_N%Y#b*oO6rSXa%KMUOoGCzusg8K=zHJmtgR8H{st>b9}lnt`Vu90&k6uf`Oj zkPf98Tak)oAgQz%Fu>VkqC>rNzL@nvRqiT+cUP_~y;o!=r{aid`KUcIpD27ylGsse z1|d#7j&lO&oHvZ2mAb1K@{OWQ_m*+k@(<5+hdvj`xPFo`{Ia%SHOISynO!WEuwI0u z4jOL$5g@{=CqDT3#F9#s08Th9F4>797z;bC>h4_aC#m9n+iFlKK4&IJ8A!eH%52BC z?DJwxPTFT48{Bs{K6lhG#_wgq?q*Y8Dgfim=RbV10TMUp0Bzg(SXH3KMLgkgRT!I{ zi{|3bTf5(tBl%uVLb2GwWT+Tm;j(@Le1FX!?k)43WPmLDffar=+_cmL0SckI$q*`_ zToVIDlPZ7};lPSN4atxOG4qWFJMK+3r!?MfHEjrM_$a&^QG6jF<~!~5jS1VlXH zke_{&gUit(EUKD6|I;<`QwK^|*>1sIVeJp+T_$6jT}p6@}|R7LZ;~oC4ranut-0g6@d(^~!HW!ocyDLIw$Lc}=@#CbI-*f()3dtya$8bTvaqo5KP^#*+8x248W4up?j!IofWv)n z$YRjW3?JP_HCMezgU|x>7r8k z5EUA-)T_mdy)6u8IG~tkrT{*mget**18cJYn4Zn`Pg zmM^8Qa>1NT)1)}=BoCTx=Oi6o(tny0LrtXd!C1-sO1ODVlKL(cky`j+x-UzI8%#OY z#TOq^)00j`_kI-3-RJEN$?T_?XGH;U>bjx$gf1A0De}O4KNP;zdXvfSInVtVx$W%z zjqXck0USv%?6e3&v~aoMDBHKS#_n*E0bN(p& zb2%CSXeY0V1TXz_;6|fErW@*YeU=5y_H*JSbB22EPuSN}K(k?!+I%NU7JIfGoOWX=G zasl*XPApM>LBvtkAh z`qYSAtmC-aa!jzOMF+q?K_nr7h?gK@V1*jWOyVQJebsC9(jJ7QT^9~oM}iw2MX{|I zu6?vm4X&3I$b$5M;+%%HYFv|jNv?QE%$yv$(BY_r;JmOQ(fVBxy)=M~(vyxzj7s4a zZyNsTSo&HCN99Mq3h;6>DgjcpU>Uh)eA}%j9l43fjw%>So`aus2(97YdNe8EFy*B3 z`CYtp0-=6V(DO4{2Ik|SjKkW%PRUC_;NjgAUoVKlMah8ajnC@p1}*|DFCX9Ue|+n? zG|=9?BF;}wV@8PPc%QccvyKqq{En9g2y%^o9}hU0deAOo`y?45Ic|M#u&pR{Y14At z*!&(jp>Q?gD8qs!yBR~hY)jSjexekEPK`7PfV`{>oLYn=t0j5Y4^8d?gob^bsq$vZ z6zesVDF^1Jbm1>fH{P4s_YM5j#%-Evl7}ovGWh(hQ8N|dMR9|=;mRxp&$3pk_5A-} zZe2m+@nqEHA-}H?&IL8us$(T-EPY*^r>R-^RP0*%F_U6sU^KHlfPjZCT}a|5-utmjv&9iIp0z8X@nm7NIAx*;(=G z91MUw7kkdz9bX9LztAMs@#o>v=qM5VHr9~;vmviMiSML$=bAwOItea};$hLg7y}|@ zAZ0&$=Mz0HD;a*`22KS5wm|oT6RLwH#_vbWyr5Hl9O9@fcR-`o^8As z`kM+(=qEsb)_wzHKma97Y5;me45T6ar8!_B%&tTvF7yz3Y)9#`4E#u^MP#i$n;1i+ z``awrw*BlLHa%X|l{;pGhD~R}*ho3J>HoSb0-P+jKxW7W?T0J z2aCfiHd!e{tIAq{ab`T!6bb<=C39)_sPsV^eehdv_9@6O*CEZ^MTZBIT)ZW{tik zrTN0ae@HV`8EY1OZ73sH1BD&zwK1TFi4+q1Uht*by=(l)#SS=iSLd?d9s73!y7V>6 z%G!Fh*0BFyuLcsm0bv@z>M+he*@lM# z0Z{{}Ckip)(|+1tbb^*wjL23Evj}hto^5OcP8;DzzVUxD6Z!0=Q=bc$gx9++X(wGF z_zM`D(}bl}Bxm9>65}w_E43zn8pZhho^NEtlYC~F1eSv8=1_=9fab#@CI)PUOnXcMdC&O#O_By=i)w;I+*^{?$IP3__5xc;q4l-xVl6*33@#<192tU$i><4Ch_ zz8ZxvvUcj_Vob4;bt1B09cx8W7#BRwuf9LVfG1;if^(==lMnIU7~=OMA#*R;q9>Xm zvcVJS#$3&Nsp1PrYIcevyfFONz3|2h%|)PS1)^FztrB6Hp`LQqx_!$7K!G-QZF(OO z!bxD8A=es*weH1B{#Eo7gTdI+XuMJA7`qyKEY+n!)qhs6J9Gco%j9**o_-zN_^^K7 zrH@Wn;d;c3T=>eb+g|O}jFFk+Z10h2y&C3I*H0Ai$w{*F&;D8nvnh7af87PpP}4#< z;-oR$Q+_1Ve@nNM{dXC6o8Ai}{0H@4iY0ym+%Cqpklb>@%p2{vei|BmmGw*n-Fj*6 zIq&5fNcXj+YoRxnQ(5zWChMfx7IL0dZjIa4;E?0sx7`Dx+ZO>QR*E;xD#f13 z)&oV+LJt^i2KAUp`qLk6i)t%&QKDyRl{qkpK0!f^u$hd{F|0*=f}-KcjxkI@yBM!1 zjloE4AIby=EgxeYD-PxMjv8lUCeykiDlyQ<&6Vxh0{gXU98_&I;w5=YH5EyGuwTSa z)voO@4x79RxXsDv?rDyXWKJ<3VDn~NDH}7TQ6j!qB~fJnnSwX_*LS#06||-?GpNak zhSI%%T;p93LZE7Wp}#O`H5C3pV>xn+h{Sh6IO|Q; zi~p0EzMB2JJ1mAfahU2fuL6aE>eX3N2iv1g5;YBDW2a8F#m4P|Vs?hE3dfynxtw!S1EH5l(oa72w&9|e4+oC_ zUJKYE7g-<40-0>m^nNGidA^3im9-h&__QcR*g)vtwzVYDR?I#CZSFsk>+IN6)u^Vp z{KXFJ0I_cAbPo1pK>2oa=Uh(u6-3tuCH+bs@iV)iP7XEJu@fKS2}aBZ0-NW=*pT;X z{k|@)QX&{5^TG@-MJq3Uzl*yWzIpexT{l#ieoBD@=C)d#^fDPWL5fJ?rvgmej3HhY z2>*?D?{x?gN}@_#y%x^hjDG3|Uipic&iYe-TU7qOkkxN@x6ak`T+OdNFq>4iaPkI& z&A%SBiU%bLAVR%S;B|!0%2i={Rx>=^J(^E^+ED;#)%xG2KWC|xp*Ba#wr_3ld(@3G zZ}}iW2TSuY()j5{v062OCpYx9#4W+VkrvrlVbBuBS&6Il^H*C@iu_6u~fv z-+F>!cn~#tFvkACN&ZI~|DT~eLO;tc-Dm>CveQ>eyk)D7yUW{4AOBA6W_&Z>lW)#u z0tSqm{@%8p%i2>nAMbqiyDLNbBo(^~{w~g#^;hf78vFYEYCN?nWg(!?(pWjiKJaIP zTVPEw;x>!6c%MqBzQoddAO57tSszoSGtY|BmO4`|)V6Er_)GAF7(EK@fx=bUj&ES9 zo61$-SH{cY@l2^sIkuAnf90Mm%75X;DPL@`SKjnFeDZdzGCEPCXil_RR+eeDr>vb1 z%=`aZ0F~V7eAJIJM>}v6^FqtGWfPF;GaXCeeyPJ_iYsGj3vz14wC32(%Jfr9p7tQ? zA4gWBX!eG)K~efEV%EQHXaBt6;wSOcU@0+5E#;v4WTG%`)yqbqJ(`wdnxxgN{Tq`6 z`s)`g`1{jvtV1)D(G+EM59~|<$`I~)uX#HL$o7Pe*C#i|d{WpDXD#DN3m%fEW1g+M zzwiB@F5-Ol$V)8$-Ene2C%*71xBhlZ-yB$O+w2a-D15vSRZvhcH$TXHdH4{-=)K~* z|Gv^FP!@fxMUuFhi;nwSa7%6Rczcl7B+B>&zt8r!V-&utLaT9TQg1D{jrs?6=@TJV z!4k!HS*TkfS6biVrqJlmbk5CPxE#l zOg9<(`r{OFuQR4({H-487Axs{_Zz>?z94e+>J5VF!Km7e&RffJ*&qJkr1SWct7C;G zFzmsR^Siy&him8ZG68NZSKtYEyr7<0QE%6L{`gxqGaeiSpMYDd2vq9utt8qWS?J}ItQ{_MhlsSF2)>j#G=vM8d3y!$mxH>iqH3mwqYl@tg?hW(W(dW(g}*h6EcF;g--$?Jv1;iV=@C;0H8u;*U?*DTCs zYfA)$zs#=f+ovRTQZ~lR`!#fDTKa!FeXOlH z=@-Zk>AauE>NTts+ADDb;i2Kd4Wq*l;U@-2_@aMOyU9knkN( z&epG0@+TjlGx&aZ-ygfK$J=faP_dJQaXUM$upEzcr%Yz4chXzZTF#Yl1_bT2mLxNp zK+81w>V6|#9?x;(TQ!`{);TUB+W0}sBQm<8M6FgFHa%97z}>I?!Ms#RkEOBT?EZ~! zC4QBaTJ_;grq_5c_fLbk;i~~`1)uA}_l!75;L`J)ol5Vx@z-&OyI<4+q6SbeV`JMuU;u&sUB}O#D0Tk3n>JRQ09ye>E-9^*+`N5wFIHc|Iq|MD*Q`uy%1CABQP-y{WG9jHd06T#M!M~#u(~hQ zs_yBElq!tY2~cM~-#DNgumL+z`+aef(4+wY0l^a9l>Dc(j3w&5VVsqyX@QU26&pJ~ zLPRM{){OAMb5chOojfbUd?kjf7H{a7iL7B48@!h1=$h?O6VKh*FftuuMt7CJ#no)E z#K-vPx|r3rwH@!R?oHC1p|`WaBrJ(<6RCHWH4|YR_-`QCn>7i~j&Z&s(|#XDbg}Zv z<9p{+D$g;1i&bcNlL0=LxxbeCMsIvTj_LK`lO*hC!d^ZN;%zlQUw@j!&(i30VB1rpjH(M=D7 z{IKwo5kBO|zvkvC$NxO}O+7Re)H0M21~rMeQFDka;x>o)Ulth=>6JZg+GwaI>(H7g zuH)t;jzYfs>i|$cByXbdnifR*p6C<&iH$AG+-AeicGzI*w|q#a$|HP*{ZtKuKI~IP zrZ(L(Z4dd+5#5}<=)Q*3;4BLK?zp~DUWE5ud~HfuqE4dboMLMiC~Y8}u~|w%jE9Sd zbHMsUG;573Z)^jxXGr^i8iHHzBj&tNBCS&4Ph4rjpwjGHdDO=ff{M|IPOo5@FQ??4Y1&X%E?z%I+i{(asiB&)j6)?MqgR{9gwA`_$j~px zLw%ob405Q^<3>RQm+KKUZF`jc4??f`OLz7o6(<*S_KqJdp;E-DM&1uA(S~!3{&M*UrD}o^$7QpzgP7K#ehQRG0FQEwX1T^rSVucp?HWHRutIhUXe*+Dz6g|;3x4`XTj*1 zDN0#lrcjWqmM8e{b9-KUs$v^^MFM+rpk^-T15JK1IBsKcSP>M*NlCd&VA~;GjvZf! z4h^p>s!TE}mEBj;>r3}_?`*Aqe4daTFjn?q*3aa`#l|XLI$tdcbm=DAnB*zPnk;B? zPS5-J;jACr=RVB2?>l_cTSJ~fCzA1V$2(-L_J=s`UBxZZD0YH5J}PvX?jku~tE|8C zhE6VLA7R@yE$z~HMe#2?=i^|a<)KA06d954OuYOg+#M5TZS%=Xj5thIkT(OjgG|qw z$*9;#$Wt*iRq!Euj?3%vLMZArN7K4Gmz=ci5@nk|Z`$7;T2{1TAMRP@CzKW~cn@CF zO4$^%AT5-{{G@U_H;);B&iBz2c~jtf|3iTg0v(CRxhfh>Sn9#>U4bT+9)d_3U=ts& zTtd4H71Vp9b5Sq`8?lC8d=m6>rO#CD(NfA*oXl*^_d@5iZ66tObY!EN%l>5kGC z`}BT%WX#>0O=Ro>y;vGM!=95^XR3Lf`qt;iZ$0UWuC)&jKD-YZuuu-}%Bio_r&i<| z->3L4)7LW0eRE7*Uf-L(<5^(6h*7@!8wM4OffaASVnSAED1zdI*{6!j34x#gS3lEq z>g1LKe_LI;Pc@~55M;2U2-bXMf*yVZFf=Q{qDSG)gCmPSuN7=P2V}7Z5iYgma;6Au z=RC=Ko`v}&ZZa|Zm);Gg-v{qCc`?j zGiHH{RFxcI^A`dsmD4DWxva13I=v{W~d*sgXR2n3X!g4mi>TTXFLu1H%CYBr^cpS!q?>gAe)x?$rozkiwL3~4o$jlx&vQxs zV%2Hw%uC*h2Ipst;L;qgV_>z^>UDF>f42tXf1|Kg^}2ad4fQIXw2SZY595mvZSUyV zyq+3%Xx*!|zBPBv3JpQ7z%JX0=N(hYNSq~W{)!QlOv~7wD04Udoikdg1$nmugS_jr(hDrIW5qm{4us;%S zzS&!MbT`2pKZap0ja`Irxh;2b^&Fc{164)nrTW)bt@gaHOtRr$(}mh@eipn*tmUnW zu$_CNM>Pl^?w6YR zrFTrT%y%!2TQl5al^X|AcjK^eO@b6?ejjIVvZh)WAO z*{uw7pJeOD(Sw=t#DvO3ENxnM>iLJX%D!+ z@Po^_3zpnN!x2wuNKpSF2%{Ll!a_(qQIQ6EHL?L%}qiwA(Wl?#a>`^K5>) zVQ zT}jMkr|CoWdHIgnEiNYR>7+#*iG zg_{_fD#zIwUzw{h3GZ;w=+A-1a;J6d-eE;zZ)k5J$jDEX^uvfmL|-ex>O?9vXSi`S z8r{hrhW-pIk@bcUI>O*MfjEaQ;x?m>r*sQv;~%TST_wB$J?1)wae<2gZ2kK_yrH3? zbFd82LGWZ<)A8R^rZP~Teu01I0rrnht{%L+yf&_J zD#I^hp^36BDqbp%A`tIeEBmI#Xadt^2wF_7dAkmcT3Z8an&!l*EH{S16s`LH%%wR* z+WN%QU#RdJnY>J4T#hlG{WE@TUrucha+9(wIeJCvLVlo(%G`w@Ef%9Ws@2Y({g6Y( z;~W-Q^Et{{%sMjmlZ`psb2i7}lI1p&=&I5<_ii3YE%BM!@*I zd=vL}7z#627_TmNF;h%nPswPQ>!fY|sx_$hA|IE$C|$0bCASZDafDnVUex*jTQHeM{c|T zC67V+r77q4(l%wi%xPL^aCpd6jlm{MYz7ngeclPsN7`6R%f7bY} z?b?($Z@I2&qat2nLrx4um`)&yFs(LKLo{gdOkhMO>u}=eqw$ObMg3GG ze!smmW;-Kaf&z00uaNl%82hKP<~>Jl@lHJ>{7}!wqte$~``6p1MLlm2(9DA3JsX$N zanr32CLHdvz}_tw^pAh|y07?f#B4V@ALL$r!h0trbV9Bo)tjx@#a-Z21t_l&l_%2% zWh#JHyeL!_${`$^q&ZUqS$*}nK(AgFU(DSJ3<)O4m3wkw>uK-eeRUi4*lx6D4`v7E z7tRdO69p&--I%B-nw5{BdFrz(b!M|C%75g|)hp2G{j_*L>2dLtHgmV-=J{XYS}TUV z)BCH!Nat^R6823K5vvjVuP39rO0uv3Xsz*~wE1ZFr3?3S|n=7a{a z)rTK>r@9jmSD8u8Xv!_` zU892_PMnDH@40o^F$E@)>18OAX?uo+Tb`708H+Ywv?PX(H`heTf2V3%r{0^g`~V#c zo6bvnp+x2{Up0}DVny#c+nq8`uWrK}r-EVb0Pj6>5uV0N*}+RIBj&V%VsAQIYN8^N z5_%6Yr1mob_Vjw!FIL6G-qR zYBqk*_w{V8^0!{CYU9CRT6ZS?qw4ukoK-KX%?YXDnZ%P$_=hXu^BoNtia0RKU<4h! zRA6jQD0F^4_Yg(-X3QSsi;y%%q?vcv=imgEh|8&cR0h>mYype)n~ z$$H)(%KSf`zA+%rH|)C>muuNA+cuVKnRm5p8_Qa@ZQBdWwr$(B^j!PD@AG`BPj%xw zuJfnkI7A&*KnPIN0UZ?y%yW4J8$$&z#y(hv=_M?se~} zW=}cq`vknV*bk?gM&qC-kX0qjYX2r5ZnwYqqq1r53eSw`nji<>UA@ZP}W{)I|%h`)|mK3PbK-YZJA_e*cexg zcpmC+P4 zrA@hDGPUMd;vRMb0pMVC1oI|ND{?|$z;h2(@fXQIEpfCZ0Bmlrpy77k1U>iwdmJBE9bU`! zo5OIx<+`46Q5y+PenZcC;YWK43CT#%;$egakRev6bhCBUs88s+6E`F1mk$Y#LbkUMYD(<_$^WJ0E#~ac6RYAWz;gx?2Y21?rX{j{-k-H97Zw*E4?e(P{{yu~GNN7?5mv^Qv*NG5#Kp3rbsgmd zFi?8gtiJ0;yp#_&6IRqU6XwLXjph(wTNcH2q7H8G>X{r&_;8OK`d8f<6uJi)Gn91hO)fs^o_S#=ofk7^lImvfQ7h*Y^16uK$Oong#CgaObR zQY~u^&GUKhx`=sEnBP=>)Xy29R(SAgZnHaW4*dXUvi{~ z!|u@g&!u1DT*3H#fasiU?ak^I?iv=3Dxfs)4u)zWRPkh_xS`A>MQ;V7&9HJQ?V+efh`N7F71 z1^5G?6iHXP2ItV12ZnJRw@VsMer}A#R2)=ZsbNy;@24Sw{b7P-hD8y={6iJWiajAz z`GW-h^XD0km;^Q*>jLY5zZs|+gKP(K`T8%OOR`;OmnO1^sznV?gaYoeH+03mh!(RR zR?#thD_7OV^o{PVsh+LVE?rEI^&T&81}>7HADE(`Yz_+Btn&b*@lq>v(9nL}1;zNY zqhTjA>)5Bfk)O;&0>j@QGJF|R1m!D1D$Cbz5iYyZ-3}L=787hntG^S2kp#fm^D<&k-6Q<a@`34q_Lpg*+!)ec3{>bW;C zYfJF@*Y~mU@ecAVaA4tkuVJZ+jhHiwgz_L}?=-QUv%<3eGKtm?E_AJ7r>->zyV#I< zd1L7PKEm8oAV9D`m~ezNu^hQ$sNy0ikH2+mioO!SEYJhKeITB|yAhsut6RV?cAd9# z#Q+HKzC!NOt4*!}&fSNA2x`S+&k9jziW6j|(3-btK9=aHHl=TEv(->nx;Hj->JOz3 z$co(ULFJQgN>_I7(jh3VJjZ7>_!}ehCN`Ij1U-FE+Wq}9J=$Gc!sdGR% z_giILAL7_>cUk2r2V@#1=Fv<_qZPG^m z92^14Eb#oS%vIbZNJSAXW564vUn5}1HLQ{wnbDA=O>@JL4_vbC|IDln47M8K`!vkc zF9+VbwjJt{gz$=w*lYqnq7>S(kmBIB>CCjn&pqrG2gMiL(J?eY! zIiCkcxbltD;Ygasbn(kU2ugv3u*-e*_SxGNPfRmL`>SlU!q4Y@A@L#^pvRZ@ecRi$ zS1IeOXqEzBC?LHotoyVYw4%qp$_DDo`&q`2}*B32ccShe7){-YvU{}9rb{-Jp3Hk8O)E`q=$(8GZ${v*jG9qlV* z#KVroMuXFyuJow+EJv&5`}YIe+yMAkuu=(+NhweN+EbK{=~I!$A<*BynT=|g1UZIb9PYx<_lc@b>qh=fJ8T6s@)DhV73&QfU(?X z60&D&B*?&`+lk%E&NqwKlu_5S+A;@pN{t?o&z5j6Xmi)9m#nizr2VATsNt~|Lg1zj zsm&a(DWX8O{Mb3q%^Znj?@h}Pc(zswAu4`zGZulj)(1+LL43xB&sEKQnX%w6hWft} zV7B}xE+{MIgv`?<#ns#Vs$w-Z{VC=lsdk&BRy)>4-=ORwN#7&*4)e%WZ z+whfZzZ(!5UIMD-n1?s}%_b1@lK~~{*PF<5Sd^kdL_FX%7%*n@FhDKVOHV7oo0vhQ z^U)l$!xL|5%5!2OAb@d5(r}Zuh3mOBPJ2Yx*ona1F+DRokJop~0LScAe+#&5EVHiV zK3WJhMng02`Wdkvy;!~XiD>%)ZF=)N=;iRFk5gJ(7na_=hxLwfIh}ESl@KMF&_0>r zwDikNVRd3-m}=_W#Yc2gK9MBF!}WG0=siP=uNo+S5qw$i-|Y|XKG*FlDf6VO-1)&Y z>TJGXA}Bjx#9$~as*ub=o_5lI=JJe7LqCiWZN^glY>GU3Yg9*@)1;6`{A>nFR>AIU z{%=P>W4auo3u8VLoLY5R?YhWKqj*k;D%|*4%(FWAC9cPTpoMs^%^1%0n|}hEdgr3! z^Civb#zE)b4vRy>i`3*3dYJy8d?kZyrk8q;L5QjuW5m=@G$vT^KHoypdT>~97H`mS z>JTK-!57Ux^TMhW>=Z&w0u^q;h?S;ub%mIK^`hO9)3!n#;ol!_;l-IPxY{s)F;NBt z5&%dOWM`U4(iS<2F!J75q1e3NfU(hcAdbF>2A7({QYq0~*MCd1+lwQp$Wv~#Q{!fC_=9?6JyPNfSKlJgwSZf8kg;3VJx7jj& zYJN2v%Qfbrm$Ws0C^3haF7( z*%~}xyT)}p?)pF?g_8u-ZNA*6UrQo@shOojs@?LMvU8FH76?E@z-4j?N9ha9Q_y085M#a(f(+CKyVFg}oQKd@wz4^q{O?~g#^jw6DKG9F`@RYt zCSdSju=^QM_Bo04k7!!dOWnG-LOIFSlQbR|hlrDCZ5;WY0P65IpMPlw+>vUxWYdG< z`zdHOdd);ytw|!N?*iGnkIaJS-QRuq>ThKfbAw*{c`^GqEG{_>l}6m|32t}wXaF-8 z1grN%rVZMxXRC?dX%}8|&S%R;o9c$7 z*%J(&EAfKm#PIsqzSxi`La_80%M@STBd)ilyIG9M$Rt@0_&VqvN`=vQ%?|q zd!jmg9ATte7kCWbbCWiOf6tE7AwU;5m@fyudObH)G_9e(5xQ*!C#u@xY4T0NqX0`* zexxgg+fDPN28x}J_GkoS9w?CNqpW?Ei;T3T@yX6Lv)|uKK+|+ksGp#T|JX?cr2B|+ zTYtrC717}5kj*0G5P+j^Ol*Dyt5WMIy&0t z{We<=xQ9kNg_~y9J;MJXIdQDfx_>k&x$~|tHZ~TJv$razwx%eOc53kit7g3Y5w}`d zrFX!x`RN_?{T+5&>%&AFwn(}5U5r$LT#r>s4xgJjp#N?I@q;D#Phfx$b7jK8U&6J8 zbZCCc76T3zxvIRXec2DxPg0z7MvH?JuUS|Z86k)RAPbB1_ zw)yWa1Cs`xtaBx9QUTabt~045+}jnjFPT#5Cw$A)jPkq%QEy%s5hKC&E8)q&nD%ii zZx#E-X}O+WU1oO?2j?m5z(^=S9veoW2EbOGqO(@?`Cj}Wd2OPSgZOh9l2r^aJq6YZ z0tKHCG>Gl|`#vT|)k^x$8iC3P&ZhhkBuo${4J&c;2JK#OdUWniAnv7QTTe{T39C4u z81T{byWBS7K?e)hCXOHs0d~8Mm_1{zFVtzhxDy`deufP&n^K;~1Ks=~+-K$Lx>7mYHqXr#)bv*LJx%P5&x?Yct zNg(fV*H3*j{*Er+_0{+eU_qQdO1ISpm{C%}flQyc!mVO@!Vc$?0z^(bc7RC3NyEmw zucY15au-#FcUN*a3TgIm<%ezeiE{@_;WwA7NX>i!%O&VKdN5|8DQsa;Ur`zGq%b?^ z839nWMsQ%@lQm)!`@d@8mpu>mL)e2Pf%L@guvk0<{3!uvrLP*Y^s9gkPOTm39uFTs zBv$ztX0Z&s^2mZ=NPF{%yNaCAWYyZligl(X6p}-_kx0BaYn;&gFZ1T~Q==5o($^g# zhTTjSp$>*92G~$?{WcagM9In&LPPVyfSwZP2TGv(ZehUtD_-Nc4s)Y#?of7B?7|oj z*N`fV5r(VePh+jETr}*9Xc{&1Q(5YgrfHINYu_x7g8+j)TcNXp+dS*pQQD55wwPX~ zmcRT6c!26kiz28LHx8LJxd|6SK<@U2?^-xf9{w@z$E4xR3*@NXr{$SoW9{7eDPj(+ zVAwk;s3!%^rr<-6*I_=xNKn@WiZ9BB=Vb>lK|lLt?GTAbV9ee&rqZc-BkZ3Hcf9}f z(D{B}{cqH;ZLBS0i{E};U*n~EM&j;T%8Y};5E6A25*+1^<2u4k+5#9OR~RVb3VHKu zevr!$xa*JkZ+0QK4T*7TX8F5Y1?g0Sa#kk^bFyPP87sjYH6F`znb0@pg~v8!8qIDR z^X3pZL-qdhnF&77x^#PC_9nk?J{j)0&_x*q&_50XiwtU5&-?jEve3{&+ryMtl^w-7?ldh78r6mq5VSs>zy`PSX9> z$>8y>TKeWq!djs8HA)eMl%(4r$%4`6_u)X-;tC;3yaf3`bo^H&VwD>T`5#{JB#bGb zh^aCTCO=z5$6blW`)pVwFTUT`@wq$X_Xp9ZFCQe#fRMJW`cF$!-mH@SS$%R<*fEMz z_%Ac^wiz|}^5hp?sNOM|zQlpa!zdHs6V$NQ& zr?#v%`%hfI7 zsa%EL`6_nM5D}{&p9%mUNTG=|E68H8v3X%n^=ave2%E!MAYLKb1LXVweslL3pr?Rw z4mgfL3{q!&PXc2Tm&Z{Hsmhc#89-sgUrL}XV;x3@ag?i1oofVsRpmQ;Pr^*K}Az9yOZBq^h|C%q*heowJJAp=8W zu;2wiC{+UG5MY3jzRuW4h~{95P9Zc!*zP3-5B%3zXrZB5DXan2O^X3DVrs)+4!dXO zZ)POSXqb!^vDO)mqXm3!$9cFw;*P<*0m~GhZ%rWAO}HO>jHD@;qduQ;kCzbB{1VUX zDpxc_sdVn2Z~zSH2BG(JzGbv(cWD9EPje1c7>~o=(7&bgAAhSqJOBaC{`J0#@8b~& zW5Uw!?QN>}xxi7@+!)i>PHJ+S#6*^5GSPru-P$;Rh z4ms(nz-~29k+xg&bOCA@PmM~W6ZX~3eiLY0&X}K;M|ac!hpJv@8{@?tDwIN`%w3XR zD6af`@?=pzso7wI_Cn!|(2H+qW00)?LT>^rq4%5=JT+{_&X%e%-qR5j%c6#Zi$C7| zF7~Oy%N+^9V3NZJGR@|znd~BXEkXHc-NM}Fn3QbVwRrFpMCA5pb;5j|-+{yqAh6E( zf3^qY!I=Pu)eaGTP(B~_E+w3yg~8)Fs;&2#OEV7eJ_{Bc6eKg-`L7Xe_mu_uW$MP>e&Gv_A5YkM>F<6_Y1P!C+u00ZCDJan`4>lsVR- zjYd0Ud%Fp0O?w4Wt#*{9D7}8VwY2eRy7l`NrHNK6;pODOHyZ%?BS6fJRlC)K?BRM} zDl!@k3|zVCnM$#>>}8QTu{>=_JT(oGg!tQ9iFO1LCpqw1Pf17O+iVdtiIiV;WJ%4x z_PRJvrIL^X7~Qf-tHBO!3xN!KY$3))Cg2cLe>$G$eDE1ov#rEKoi!;K$U6eJ17GPj z{>0$WTIiQ7vlD0DDLGT<#4QhT`SfwR>Ubq9O1-uMX5Ea3omR!x`18_+2v`u=l??au5Y;6!P`=K^JH=L5%y>udUo9F!Zbw!F^j6-_|+7;UiV(C;Il3X!F znG~#wKb@wmG;DZOYh*LaEq5Y5v=yvdFj79;E|Uboe@TFu%v3etZC%6ZUzgVx`|=9- zpPMHpoYVLn5xO=90H%-vG$!}D=a`W!H7z9AWANpU2xtxqIv^_lx;BXm$OZPG{0_ID zYA_sRIxWQhL1|nNJY_ABbF$uL>dVv5uI2#|KF(lefh9)<~$n%2( zJ#!~Q7rl2A^IKEV-s}nIk1Rpng1N&;`fT{Fn}_F~l;GJEEv9uDoQ%K(_^zw{xKd*O z;MRI+1LQ6sb1+3&ZxyO>Ek8LXbN{!Z^UOMHM3tYPxZ1mm6Zk8p5zbwQg6~!igUNZo z-v;8>Ne4iT2u#SopQ5uI3*jHr#%iHQfcxDc%ok5t3a z?K#BQOb{{#0C4OsV)zO2d;myz+0q+IyZju0wKV{O83WO*-h8g&_MHz&!CvLS;aXcm z37ndjE`-?!#C;z%d*?V5gGB1}&p1HT!ee}aw#b!SCv9qpN%%(N+8=XeXu(u=74jVr z*kl2L4d_KU7@wj5=!07lGnnt+2EvW8gF*De8O_d${>RU$;HmIm8n77U%3{aYQYZJd zkUv`;Ax6b!^17Cl{Z}5_Aw&-z9kxJUYV$<}AP{IPb43vWx;3TX^RR2z%X)7Jay;2{ z(FZJ$dD!d+JxNb*j$FW5y!(CV|YBXjpkbk6nd_YV`f}vG8)ee=tYM_z?ir z95Hp95+ZOmMku1$Z7CP>R2!t=ZnqkusdQ8T54UtzsU~?|8j$MV*6(_lOYa4kS&1#l z;j!#wgaOcEqCmlAW;xWHG)=4K4sa7=c&xc5&;Im}a%kcT(6n0TsON2=)?j|su}^8G zSF`KM<>-3XCDBikuKoIL89m0JW;@v!?B{s8zFcb3z^Sb2{y0jqK%q;CH_i`rf>c-% zUpl4_2g7|&J|HfwzS&rFC*^u*Se4%uFN=?ogzu8+#xU<7Wagv5_xF8oE<%z(>hJj)#Uq>>HaB*9?nP?@U%K3uLN2REvRVZ-4l1nJGCUn8FQa4fwD=WH|Vj zQu1(grjYCC^b98&(xVa(*w5GL!vwQ2lN9YB1^RXfeJmpLtdj9KBa4x@qyRud0#(iC}tp0gluJ<#X z58|Rw#fa7w=q%~h6{k(N*!WKjBQxb3DOrAJpjc-N;cx`>m}Bf>n8c%|A~!DC5e*`} z4jUl^U@iR}(7_ZBJjK!x_hubN1#-=Cqm+Q4!3A5)Pp2hC^tbGJ-MR54W6{W5MCRvU4I<|kWwvj4g7*{(ZGykYrgs=357%s6}#E%?f z;l>}+HW@eDfJeQ>EFhgkp;DXp)xE+_{&SUSVnnIx*$>8eB2|D)?9>3a`sE(nE?l>v z0c|2q0w4zixZ>9)gM117%hMh+b$enWrmXU}EDRu;2&AZGlzI9^DTERj%d_?yO8o~i z2seh37)l%PfU~q+fxFQoO+9PI70Ssy)q7;DOs|=%%K@D7|KB|~Q5^tr5u*e!YJ!)` z=OqSR$@XqlZ=gHQpSb;y|B~D3NP!C7<6klD&jF1J^t6MlOiWBy`;0!%L$Rc`LD1pW zDn@w!I!=1DTL%2yd1>vC^*Z!5+P?TKc&!*t5bT8mVJ zkDsdi+o+P2(Wgh25wj(N=$5;-@u$y^Xgs`M2VHIw%X#EQHa=Ufj3;Ld<;&A}ic6hs z-x|_S9mk^P=nDzmf z9bWgr;3#&00=Qiy81}F;(N~q4TARUvo0P#0!7qDLg1$KpI5J#_&s3=w$>xkUl0zN- z3!|7qu$n^UOV%Mz>4K$DoVAhe%2vfzUrN8hFXspt0=)(DA0=%k9a*J5RMp-Cp4XOz zS%O!3uhRD8Fzv~mnl9L;u(@*Ca7EBYzTGoWan@DB^cX;t6Eb}MP_EJnFrJq6 zje(rpqK`)9BsM=0;#j*%^(4-z_om?UBSxqnJJDUHmq5o}P5O`R9!n*It{luUps{Tk zRugOqRs;+T5>}I@22t!P=6+XtqrphqiuB*bE_>-PGNbr&;;!1bAijK(T$) zTt=c%leI2U=8<<%fPj*jK}@CEvKtX}l7PQC&MP_hS1#a3tq`N~M8^f&4L${dXf`-JkHuCtm_DOJA zq0jrDu{GNYK39^->+e$of!6pqpr<1%(Fwq2bj83Ta!U^{G<4qdzYHg4ChpK|XE|pN5 z!@wlYsjd6d#lhWc*N#&i7^Hs#N&drne}F==ri0xUMdo+f?g82w+_vd&5JD@B42b^e zc>q-Vtq(S3MNH`eFAuQ{WDq10hSZw4a=8)rUQ^I6cj1%SW^jU#M*d?B&X}?{rt?Qg zRc^Y*qLBMN@TQ4R-XxrKK>KIJr$Q;yX^&bzzCr*drjqop+ruo^>LFDa-lyrLFGQ*y z6LmZU(5E-^^qPlwFuKH*n2#A_MoGWV%l=%T(wmM!gu_zRZnTE~0g3RyBEEj%)>pVk z7uWlG5hu*r#4>b-04Y-&E%*wT*>F;_E8>}N$kzXE8 zp=Uu3UvOIvJ~i9MN8*{;h|VIi+2KJD40X5c=g_HACpaWYm)V`{wL5B@dM`<^97kRY zEbT@%@pqYGh|2omYjZxSO+Rq|F)5|}rJg60e#8n1iFH1$OAxT+F3&9mtR zJNo=w>GLv3dAT8#6b#IkRVc;ONKG~x4r%&76BHfsgA_g)k%gh;B!d`2TTs?p3~q#_ zC6(^$P3#z8U`zFC(8_}jRMT6?*pe9u=6m7YWxCHT)eI^X_za#rHNwCf;nfy7xSQ1x z8TTrS$`jX@SoAHDS3H}mxhle?!Y~zf%Lw#HlA_GJH8U(@4^(NJ64WX1B`CYOLC=8< zuG8qEjkX%K`(C$YQ0C}IhPNT!5`UNb(UHsvr`Ii7rwhHdV=%KO)7b~DT15f{o)c+E zbNkW+?IY+wEp`hPn*SE^4MgJAFQ1KiPqgb+b8daa?e9NOP*4aw4}ahA`FM*X>o~I| zDNL=Zys7=3T$jFo!u>^YYO#M0*?fIyTSS{I@uz|tbSP_iYF*`p}oe)TXstS zc5hl)JwA+Vq6R&lR*sdihpX?=H4$XiVYJAo6%~w^Ovm9HDjEnIzd>b2-s~Cjo$)st zOo#LS1wrSCImGmz&Cz{i=QIYQq9<5YZgZpHf{~K$K{F1a#Nfs%g1Y3$_cuzN4P|)O zqVakSkw2BuxY{Hs^XZv-)$^Yg7^9 zz+87a`rK1NOHs&Qq9**f6rz66|k}1T<_dO_cye&*2-u;FJ2pFGki;ia|mt)?pDimqq(&kDF_2! z9RNPU62q~eiLp7+?@*-}#H1LEr5Vos;`F!CY3_yRmgpWS_(HIizGxK&y|E!=k-yYt zV;CQ3Vm}*lpOtwtcU#tMv2|(M7)L5b&3BZ!v1QLRPKcUU`> zTD@o@h+C4$4s>WM_C|!BaZqr$SZ`6Kz$>6btj2R-iB%9S(==(PV4~6sOA2@F!PJA6zeyMUJXDR4%m`km3$$7I}f9=`kwOV+(d_4ziEk#VYnJn42Yj z(SNB(Gb|@-3(fY@J@yF@?Ap}+4dF2zMYj`9>S2XrtER&qE$Y~EkRnw(wArgBpXRq* zVTnqA8F+Xb34c&otGx0eLPv}|lSL4Bf+btIR|l@bQeE!f$Oy5*9sEXy`F1-)bj$NE!;SI6rG*5UC+h1mmHhC2|i>z#fy3w+u#;%~Ypd z_zKQHHTVfKJ$dX&I@d|8i(N?G784$(Vd(3|LQR}w9O3fy0T_2@#w5|y*KoJ;eOzaJ zoiTSlIMcXQ+@Dm9UzzUPX~d2eT6DOLe}o@Fg+w!>Lln@(YtT1UudX-0q#0rc5>HKH zi8u7Y<+_g-xltLXym~i!hJXZY?3&Xqa#RTCgUH7JmfVzN;ly=N>WKB;NbN4f+fa5j z46mx~<3lyuEM=PCg4wd`l>3oFfVO?Ja1zqht*v6OdNwE3?pdIsdC$8m{Z)+ThOA=4 zC7kpaAxNwumz!Pc&zmgg)~-I;oVc^IQmIa+u^|^g>fo@00)D`th!oJ}N34Q;%k-F* zz>F90^TYNTR+J=-z&TsX`A=iF3NJZf!K*$irS}bBq(@Jopw$Ph?z-GZ{4yIFVZ+K< z*=^FbFC;?*R^rk{zBSO39rfWVGIAS42)x~w!fw`#UGkvv&GUVXVZR?QAO z{dPEmjC-0T8}-&+D9=5b9ZK*dB+#fRTY)YiposrEIMxkJ@#aSrd|!6B%w)0m({yBp zju8s;(?vFcghTnk&TQbDab4wS|AQi&ehRl9D4ySuiBgocS|xf+=AYMBva9uKs0gK{ ziX3{5HjeQ=_sQ#vy8ePH5eIc7$Gzd9^s7fAD?e=l;g zK~oSTYjSY{&(C|oqAe--7d$NV(m+IXo>`9EZ>yM{PYH%7zx}9D9aui0l*~q<6t{rQ zw-{VAr$s9?){DE59O4%IT$iuFEGtIT5_iB-<|!If4_j$4Y_vB*Rp-eI!MbYlzmH2_ zA>P+Ln>@D5`O)eQ7o@yqY)JqQ2+ebJw7Qnp`hG!d39U5h_d=_x3SRjkw_iuvdXYPL ziX>cgE&80X?%#=*I`yHTq2uL@NZg;flyX(Pv}qpY?k$G)qmFM*f`0=s zNP&%sL-NQ_$`dVh1-A-WtCb$KJAcQ`9StIv2%9@{Ol_LeHmK$qv@&x$4!MC!6qXL1-p>~Y(2oaS#aaz9R6nFLSBZW&*e5f>ropqU)%_lQIa#LJ->TZ(u)+(` zXB;l{{@6<$KXYGJ&T$5CdViH%o&lPYmP;~6)bo3nsV43 zmmtA{VgC?9fkc-;1;asA;`4_+Qbl08c6N-4N~ES_3V=dkt6qu?%Bb9WXd%$1U}H!F zg`({^*=tPN#FiJ!3dk$sTj|!$;1&KdU9BgC(|xgboXGcs-!!N5Qs0}~R8{xlbGaFv z@BIC0GEa}cu;|6({qzeM;u3{qm;7E>`vft)W;4Ioo5qe+3%i=KG}Q=BOY+HA*Uk*p z-g~6>mtt>dFvvGJlu9`&rJxmTzRg0)JncGvg`2fAfm6GRMSpNHw2_qptLTYIm5^Nt z&t_or@HQww%d0SFpP+ntyQ-jF!!Bm0N9DL=!!y}-r}_NOq||=dK&(%qWaKkxhvM0i zm`BRMxA17ihzMNuiXj8tOoxi1(Z7o473Om~^#>db+Fmg%r&Vj;UfQ^X{zPy-r>oPE zh&;cv+}QKCN|$7^K2!BSm&$z1pzJz1F@Fpm3fbCTp3IjMc|UFY`T60nnzWxCj#)P8 z+Wz_2x9kl@gw1UR_A$F|{(Pj3#n}>3`RI7x&8GE)h4(=|wSIQ7O?LED{Xjqx-s;8; z@73BXQXr|lA)j-+O-s~bl;0F_Ojut+6YvoVuSSXZhtc{h^gXhZG)OJDS8aBOz_K^X zJ#q}ogPxgGxhf3t*s7OL8hQb&a^hqiPe3q*b1IQY$vXZGzUgs}Q3Dp-M^Kxmh*Ayq zK74_mFUb^buZImQ7nid%J}a}-t7pjSdLP-$PXJSiKxyzYIJo1~lyPDjx1RTN9(Z@j zZxaGaHMrTAxj7!G)P_n5Z}XKiD1OhS*Kr|Myfb2^-%V15q`@gU8iJ~PWzN>g{DWU1 zbJ(}tpbM5*y32ky`S*rD_2|<#>RRa5QXxIHE%ks`9x}ArDfBWhSytumUV%CtdWbn5 z27UowO>)H<$M8#lx;(Z>21{Age9zk>bQtMMdvjpbyQfTq@-qZ(0SOow^As~IEvbM= z(BW0t+CM?;A~GGM1) z#!}|GNr8oQ(!Gq|^h?(zFFa^iM|yMpeN@nj;mJ0)ezSz$L+|NsQx&U|m#;%FpM2>N z`r~R)2JeF*bG1@U9@|nN8UJS;z8-bxXRXwRezHipAC}nKeT`t_q>{qr+cTf`0?>*7 zlo*PE3y6?HH&I$K3$qH$wOdKy1YaxeE4NXGFiG1O`BrCf1d{c_l-_*i?@+ALVM6`n z=iTI1@I~;}N(U$+4+Wa&lTKYs6a`*OlVF~y%Ncvo5vEY^@liF4ObvcxGz2rkc?QiY z;WNytOy&yACha9>-;^Bdez_{tAgV~~xDB#}3C-?hI+5nU-IOS+=|l#v>%~7h#Bq8L{c2I7TCN6cihTfzW`g%J zLhLK!%c-NV#Q}uhG-FbHdH<{MxL|k?B=A%(9pAazkWX7EY5QgD<=uq|kPG(3C;mCs zg(6<$Q07U40^cdViPcG~?@KPLkqigLkEO%x2DfdM$;_vstsP~v4)<|}ctX-jYNO{J ztA}QNi~Ub8%Qq#{9ilc_WUk{rbgS88M00H}lJoJkgg?OG@1qKRV{7naLMnM_R0@ zx#~i*A4=v7Ihv#ZeAS_MxXlJsZwx}o0$s8 z2RUCI6dbSw4%Ra#ZoaeZ@_62P{8pyhlgnM9=y+FGF1M2qoL|w*=eL>kw0xRVr(Nw#Zhau47ZGyEbbE#w zw-y|`@LY6q=t$9^C;Tg^Kf76M`&v|akWonVtMlrxb^m*x#o^dP!Fr==+nrPXp-Xn< zR!@yM=o8o;(DppWg5x3!KN{#`cJb8*fdlN`EwhI*ta^ zf9)%-FZQyq&}Fl+h&o(+V{eLB3}bK!?TW6!BXT9 z%&Ekvd~-DIbzp9V!wwWY*D&J|c17=RixP~@az*EXd%u2rAnDh>VOw>`H@1ol=CnOa*k7UEM(gqWodLYaT7Q+)1X~*$hHGK;BV!tGQ+OSgfit2 z7b)E$ffQMaV35Otjq!=YM*sG%43S2Y#Sa6Q&p%RxxB!qMRi)8n#^`>7e~XnL&R4(u z>KlFk8-YCDB4~FbGyilGWk0C~yeR$)8zW|0c2IdxfMd(?TNkU4?hqEzX{~X)rej1} za_=-+dEr9~6xda$dTFG=u79EIZxsI>)Nva_3H#5!emE8RyPDB~2~yh9tev_|PGXOL z2y~?f)29=oa6u@a?f(j9d3xMWkSC(XM=ODz3acOdbTMoHz$0$n(3WKBZP#FGj;+!F z)b~p{Pb^8j-+kSSl9^lC?OTqW!llMWnvD@^_G)DzOyNR`9MkneOIy?MJS&AkPS;}* z?{-Re2sMMO>UW$}l2)#YXAey<-5dN6llmyBcGv8c%7TSoCn_PzT=#it8)N|AX@KS5 zvIB@8zwy%xcA&yJFjGm#0Gvb2-n-1b|5Kn4>@%@a(fZZbtb<%#hp*v5rHAMop&1uN{^l1maBiM%o?uv!Pbi4s#7keJgsBL_qy`j0 z?3Ygfi4}?rFYhe6V`(bO;cJFUt?XhZ zAb9V;;uyl%tSwcUoInj=h*V@KaRJ?xyZO0&B943i`is)J_s7HD+g9#qCKS5?%VB+Kb96tg=F%@2d=_xchd~vbp^pIt0Yh1cz zE70bt2bI>23_6{}kSBIf{QsfocoiJ>C)oa=;}dGq4$ZPm<|SfsvC|I95bH{9 zlo`*)D>X*2;2Y+3lm9BG&4B+4cj%87ia>U@WYUzqhY}aOV$Qc;^AjpG&VvN5?fdk$ zzr?=EB+{>z>`!W2#~~z0g&(Tm<72t-REzT9v^P z1UU{>^7q>$ae2t0{WxAyKAH$@|DP$ZD);%gI43u7J$_3CgdU1t^q6;XGFClX@M(hS?F zXk&fRTz?Gkb5wu*|^+il=Slg&G`SfaVI20g;&&Gjg@? zO3Nh6xGDT9rWs9cD^Md#fYNr%#ic$f%*kQpKUyw^m51J+J7S-Srnp^W!wy;`jtIr{Jlr0%RtwOkhv6WCP&F+p9G9uT{MR-|aYn;~E`nC`DNLi(IkWXOp(Wqz&@ zTKp-rax!vLZEMc9!0e6cB2LM1OOAf0ACb%SO5?wi&-dYARQ0fCTMXE%)70QO2do6d z4Fe!eSaP{DbnRa@wk&lTpJ2%gmv%z4Hi*^ak3?GPBWF5#)$yla`RT}eMx^+Ee`6q= ze*1}#r|{Ok?D!kRW3*QPo~yfgw!%&O``gI*ynj6xtZ&@^&~#Nnb#=}1;O_1g+}$O3 zaEIU$++BjZLvVL@cM0zH5!~G&xWnD~tL_WM1E<)v&zdzo-90nTUD>sxAai=6g8M1Y z2J2=A3!zluJj$hr($&<-&XrB zlJdcxq+6vLUje|mBdD}Cg>Hsf%JW1V1g7$`p4EJJrif|-D8+girBb5GkV4t#b>f9v z?)3uw|5XDgA41Tg%f#;FQBQAVQwuC0$hNvGq>l(a_}}sO^C}5e`pvj_(`~{m*HsC4 zNme+Mhn%vP#*q{q8~w{vn_Co)U`3=yrQY3$B!ysPfHZ@F0S*ft9uN-p;X6I2CnFvm zypL#G{-s^}u7O)OT&4yG0j3$x9$rI{02FxEYjO2t^tcm;4G_f~R+sxst{bXjdiFWm zk3ZNR3yjq^R_u}gp}Aw7^mo5>gAeh$H(~C>UQX{_+2k4`HVV~2%Gj^|vZPTK$YjX% zzT)+xWkyoePNDf8f4YE@T{3(gKulStxFi)>9J}-qg}W9|+|BxPcBIX`|Z`f*Zqlo&l4GCEZo9hUW8#1ie^~qyX}${8q@wz zAQJ5un~++sA*7iTYTqyxeY>`WE-M)nz)J%(kzPMPyav_9AHI4AEsVYB%EWOv2$ejy z=|8eeBK`$mJ;?0OimvPo_8KTlEFwd!rmq|N(-~FX3qnYL{ahZ01EeYle-0*{Ypg)N z3m!h?nLU}p(B51C7v`{H(1$#C&b%EfazN9Fll!10814vqg;ab zTCvpSN_OnDe$74Z17>Lf)Z1R?*1KKwHNe&Vi>PMRe-WiKmCkP77YGH|khaH@>0e)7 z^dFW=Jvq*HeiwhLrgkpxqA_+WTg9%#ekDhEKIyhPrK^g!EVT~P6*h;5%eZ|STPxl*>Sh-aGB1HS*hQ0GI zolF1f=47fov1)A^IU8DZU1ODY@y``1E9P?Zcn`qS{qSGy-Z&CGaPEth0HlX|{l2n} zxe(p^=x?m?yJ*28Q!7objFc(2CdN*QPtBy%s2&mmBCWX zllt%FPYu+@5LYpXUEB@lh4rQ+F;l(_qc60k<6Lx1E&1P_^`Z>_?H@Lg}TVAX2^XkC_VgIpm8G0E!JOC4d6?zaX9y)t-(J4BTei%L zZv4$XOR{QcR{nQkJ>Gn3jR>!mxf?t0VwkUcQytcSfYUGhoW*wA_Zz5peqP24rhT)X zyfHmFOiSMUV(_eN@=U!K_EAxH*f_QuuFq43;!EN4cK|9w$~sr(+|PiRHT-rULhtmY zG+`leIQkXbYs}F#kVw=9oYaNLA1T^nG|WPC#0<;%DNwJ zz1OM;LjoJX1>z(|vzcnnq{oJ?aob3PiGxL!7EE|lM$8ic>g70I=&s>?)2~(1e_0c# z5samV0>0~h@PIG>H?I6P2)>*PW_n)IoUjJa7^jFc*&#l~@d#`8ftIci{!MoXi`zLJIS~Pd>iI9YyuONf-oK{B> zWuC=4l8R6fD$QMl#9K=VK_GgHC91In(ThO5@4rpLIBK=zCL*S zgJ1=~-`*eSFyg6yRhenVns?1}-X~C#pC*0^rTBn z=mw_wGTW(d9(%0JTjpx@aN@76CfJz9uzMvTC< zHLLf%?7?E3ZD)xa*>s8kUE#^;D}ATgN<=MoiT<&-+7kHO&|5VM1m1o-Nr!zZLYOP7 zq^JE-^G51g=}U*jqN2JY!A9%EUa#edCam@x=eUAXHOq5X242GH5mI+_bAmj}apwTH zk91>(9q<&Vyg`S`w~-5coTrs57w0Lo!P8%AJbD7HbR;h=NQML<-3|%E>xJQF)?57zo?OW#nN|j*A+< zaoCtKz56H?Cak>vF;KJBW8{6L*b(o@=F!{DN={@VIP zjD(iqCwa$b_jAof!-~V&UQ3~|&d4%22qZ0*5#*g@>?6(GDLHWe$XXQ#R3iJtp(v`! ziIRYgfL2M^-i%9jp%MBwOILjMYW2hUow~vahvJwZaQJhtsXmKK2^vY@^j|Hl7lS_P z4Vr#l-N_5pPW}{dxsgq7R-}saZs2)jBm?ywloG7vPR9zV&qYm_)Ry|Wz9Q}VUwme) z+|T}*ZDsqp$(p@M!&R$Yeb!QmvD6n1ju}OR2G5#-9bavz)7PU94f?OcEbPd+B{oyD zde%lEyXrhOkuR{w-dVm%VQp?m)JSW>E)`I0 zVsXyWHk;+?h}Z`)k!Y8@Y}~GrMsk${8}R~-_6qQHn3|{=q$mJO!*w`QF)mZE%*frO zWVz!)!zxd`%;=S7(5;7v^lc>I8FYH(G)Q?EhC7j00shw9LQ*n$9)Lf(gp2K@kxV;) z5CQR-6yBvoW3$!skGp&8$LaYilf_>9;?)h-YjyTJ0OE1Oed`|^4(v!eO7`$sBJJ(= zD-g8ftkpq`i{>kl0Js;~zMMRc9=TZ=AiVB2cRH^tH2_6TyGYQelv`RAf8j#$|Gfar zWjoIdZiYYqldDdp%27|Kl&Idu#iH((ApVr6?-At~k6ju5^}MN(S|+!gB14YQR=y!) z*7&E*1`Q?{$4Rv_c`|Rk%LW7=dgPQiQyJmv{a(>m=VF1Jx>jx0I#FE6wO8Iw3n(b`MvjgtwOMY zdH3T<$=dYt-DQI*a9%@m@OExd93OK0Ju0)ZvDNW^y{{U-#Y|uqG|GYkLAm^Ycf^z&xiG;^HESpG|2^5SolL3RMxLaRwgk^~ zm)&h1bUra7!Giky5mYrMBoGmqck4qY3Zcd04Mpcbj+msu=!TETxoIUKxKo}{qI-)& zrI|XPykfWU2+Ejo7GO?a;os&cVpkk`Q+tAK12%Dq)y=R>gozyiVe z>EV4h=H!)V!L?4%Ye7M59Mr}YfJ%z3!Z6778+nH!Db4J>rBaT!hk>Hx_@<1>91pnt zP^|mf8H^1ts?zZitjv+5^JfLPT6Hb}zLj{ch)4ca`96BiJ?`_gm(yGryX|J+FiRLk z@8he|CAal#`1np+74$69|qseGL9w2%+0o=gCQ%lu~^)aCV+S|HZUqt&Oia>NCexamLO1Qsa^xB5uI1NHoTWfe1D+yYIaKxe8`q4JF2L3~@%H~Ww z8ycpZpOdx;QR2x9#c`;C$}lmr3QF3lgg)9VQb+kV|Jw8$5&FM6uhrwtoy^tBmxiYG zA)8lU>^Z-?T1bDD@+P0x2cRVcXlhTa9pt?|tyz+qtzA96yd=iQ_qxs8THo3dw%_bZ zeZNOTG8aJQxoyTjlB$7vxw^LWq~=J)NVvO#ee!@c1YUKse1_2MclG;*7UFw2(6Wc6 z^u%{~J6$e8lzMKFMS=OhLQ{>4KZ$owy0xvU%j8xxI)g^W5yz$~H<_P-5>21FwRR21 zsT9KbYe@m% z2M@1|$Pg+@Cu_2M`oR{KCdgpdma0enw>`r^WHGWDW=i818ioIi7#T=}MBDH%_W+J} zEo8TyzED>sC|>1FM9*VhBQr~lvl30)CEwiEql>W+O)&B<^U|0>mPwqUzdk7V?ks>e zFz8p&5(x$^ib;l@?wb0`@T|i2kFGx%W1k4g87?VLraFSc67ot_dx+R-j6rinsk~{w zMoIP!n(bkbFYmyU*Y`yrj@}&~p+M_T>T6az?S9xr9)mA5wyR*MhKXrN(TwNXBJ`_s zLtSi_fC&hC5Xw8;Sp2vwIBs@fUJ^K>pBFC~Gn8(Kpy4 z|*tJl!;=xYKm9a&O&IajSFlYP?6N zk2VApY#Lw}1P4d5^M>VgWq9SH+$P1L1!SI{T;p9D+h}$#Exotz#hov@KW$0W~f1~(Aw#DC)n$?J3_IVVx7=nf%Lzp2qy(?|Iw@{zu)b54+;B}x^D5i z(7((18)MKiRf0eU;t0fTgPtqV+eIA|nV~Mzbo{}(h2Ll_8$51F!4ozQkKE$l8;9pt zBKTRoW0Ff>{gbW!lleCDL#X?z`*|T(KGwHq-MyAX1OU0Feef;Q-31@R!RGbAd6fMG!+irlXSqZe{?TpR9oB?@7R%_(6~naACjy7W;9kUAu+e#^#CUp@Zz!SuczFvmqluX`S4*FJ84Ufp{^_$F)8CEu_? zSX}R<8*#Kv<`76%>p5CKC4_Y^$*p4_;ce95 zDS8d3ZY`d?5S;7S$hE}Su$gFv!wWO>n53 zE=L!nxuuq@{V^_XceK+I^46XHf^cF8$j#RdX5p{*rcK9`H8zf@YNz^D>5s~qM-!Au zvnje*hJ70>cJ%*lnEXk366-T~1ad2Lxb(6oJsEgjr@$DmvR*pJ%Rlw9Jrqv-3Q714 zZ?b9f9#h6df zYg4)yHARoTSJ)+h!YC4+$7W>SI&S`%pF)%{#9_j%dk&2IItO1O)xf|A`)f45G(>|D zyWkMy>cs@py0~ zfX()g(=^>k96KV;#D8&fd)gZcc$8v#bM8LlTCp;eqDB<~cnh&evG72Ri6~f#K{C^I zY-0|BqEnp7Z8>CqpxhWqL71wwlNkyS7npKg*rgAp;P_$>k)8NlcvLtrf6-7+lqXLw| z_1{Eq+g2^b!sm74nTXHcP@FgjGe)pNn}K6YtHtca_#I)=7!La1Nej~ADe{a8j)@{_ ztI9HE@@{3l{QwQU8$}&>egK`mAH&=|_0~%@++90Ju=I5p6RJwlkJMaiaFzJYO+)wlzXD2=+qqmO;BR1T zh#%m(E{lJLu==qhJ9+x1e_woet4-G0ndtkVQ63d`cOJjqW!1Uan?i=93e1eU^ z6=5jA(SsFnAU0pnGhK(D-a|vGD2;xS>Cbzz%ulQTlutxy&a9)RnP`1PChrjDql@1OHtVTVf+>(wTCSn1kSud5rP zPaD%S_eSO}SF3yT-Na5zogsD-M8c1&5XP;sbDyKTmr@n;UVVh=y*5(YR=w?SShV4| z(zmmACN>yM;`0K)Veg)^d(Xnl8t9->u2}G!~IfLDSg#E7w8#zw!H6zc8W<|I64 zl>xh}`l%8dr+^mO5{kdUpFZZa-c3)qx;-O zoHN5qr#yUdjNZd(W&{&_=x~4mxH9s2)cEbq!irWUqIgJw*h91);|$Dju4*C3_*9Wy zEZKN+BJ)!M^hKg~%C;&%Nd&-mG$0TYPd1!qZr+t#`95~e0A0V`kDIc}eq!K(aT#d^|B4n1e) z2*o1DBMlWjXGH$$s>mOfG{E-~1$wlYLfcHKCWqC}-CtLzq7iI=hZ#U_yWvcOv`pel zOt^r?>38`))mr*-^e(%lEol2AY=2#!lP3xWv`(%9WmfO+zu1LJ^Yr$Sd2&P*%XCnRvMwh)uXJjB^V)nzRqi`?%j_6}#>h{ju8{5>Y#gNa zoCX-3n4*sXLpddTyZ+?W5_qFl_wM3!Mz{p<9t(|ZATp%-z7sl11BDqt)MgE|^8s`^ zA(j~IGwekf4@;t@elwtOty;48XxnVmnaDmpv{fstO$cM#>o!oiH{3#A8iwCf5#HTWKngTNUDvq{C$U8*nf)uiM2X2&cr5|ejJ{M3*2n=lbFI>hYi#N) zrSR9WKTEspNjpHBR{tY;#vEOC&x|Y0V}L%IxAc}Ik zXTKWzCz+e)5vR;gGUr`;`Uos=#zG*SGCwQrFPo1eY@*Q-F4_*=yRyehf}lYnk)fC{ zw8Y;T&#koNSi(at2wKv9ER$9kuni#a0R1AtL^@=P3IyIwL47jS7{DVkMYpl&^%|Be zMyPfsWUe^z^1F(%@$%9cg|87BBAqFbPs}eu1v_l7ue0r3bh2SJ!0eg>rytqG}o>zljlQu zUjxMog@x&eO-Dp>s}^gGvXeFBw!{9`5~|f}xDJyrX9fqwGQw5m3~s9j^hp`e^NEIJbyQVD)N>+8hf{LSRg=x8oA_rslLG-RAQ~~z`b*)Z zS?K-)(%RMtDr|EXQN~DW?8yolub&bGEV>+FHZW#`hoUsS-zWyV>^DP=>(HcKGS!D_ z`n`kUsRS0N?yj)@+Mt!$yVa<2zr}AVB^LkZ7N>1%_}O;$=Ug{nS-q1~X68TY{Vx1g zZ~UCLb2_**rNprQj?>;}Z&nqWxmA0=ei_BEtKd_Ne|!Ya{+)IT(XXrRZlJzB4r=CI z6$v%2`2Ig~9D+PtJoQ)tcH+OepBg^XCVkezwm`eYlx#{;0UVVE28>z#GsH_Y3ZGY7Op@S8NS!B3h2oM&&Y ziMSSzv5RdoeyW^J;IfME4HPH;7XUqLj{F+(wrgE7i?!V&^43%B>IVP8O&OUujgXwYKqbSww9D4dlGb-wx2@L_oKMptmMRMP7cW4r zrI?*z5vp+N-w#YW6Y&o?1BrtTiZF?MmtV3n`RACea9oH_U^tTktV$mF=1?t#5Tc%c z{yx@W!r%kKk7T)n*5iF0DtO_pB=#-99=$)ljQDL_ZqnkQSpNzM)OR9<)Qzt&;4+bg zsy)=)10c3=w@4lg zBMbg*WHNZ$#o7A&c)aTU^bIxA|0q0ER|AyeB_$~EfBTuXBdBxI($YLaZu-*6#xxv_ zv|#)pkDQ);w7pcR!zkR@L&UU!#CBf=6abTk2ozpsjsJoO{LVDJete4UeTCr=#qtA>!66{Q zxq&`t$Y@F!L%yk|z<3)QWx}RS140VN`5(lLFkB!ibh|}&ue((uPV2)8<)1kQfl&OD zHyD+I=QpO8I(|)v(s`Hh&+b7CMxqcir@iu>*BvHa6VLNSe5qDNA*e9G0 zqak)w=4+Q!D>={M_nxBXq99|EYpz+YH}!VZQmnz&HsPWmXOhE;J8lt0m9U5i44xGj z*Cc@sI`Dgzf>mCU@pJYAq&fKog_$;BO>ld?B^+QgEu(Y5j&Py7J z7yG1jgU#Gqh8n*d&b9lh-gaB?!r=Y#;UQ(~l#lP{-A< zu5HeKF*3fl7k#}9XYdo4eYR+L(AGK4I=3O)8!4`kx9QLUqhsNiXJZfp0|r(SlIxZY z6UeNRZot}!-9o^t#*#y3ov8}4OVj38^3HR|MdqT~-a{hh^_QJyDvt1rKgy}d;H7}s zaC&r!RV@FG?bfO4ofEootADX#^3`=}LDlX{nh)*9KN`Rf6e&VMBM&tN*Fnm;P)hZJk61SGm=l0GpGM0REw$*jAT^q++dBLAEjX=&1fJ;IWroJkiTwm8jYzvRn(y# z?Ym{_$(z|(CK7*OMOGj+bW<$nC=T&^m+so46IW|2>m}^RMKjm>!<>pUE%en!=VSc{ z@mhl^!o&w2WAy~hj#K5EJ%VqqA0TT3&iS`P?q1J`l(bhy`!hWSqyOR)L(Q>y>!Ti- z^Pk|R{xHnQS$RvbHf7MpRyYm+og}43`P-N5VkFJ!vYjOv{CLx+?(z@_EDoSEf;&_)?UUW_Z$ zbWr3rDR7j=ZlfvyQkTG5UHVjeK3?p0**cV14Ez zV_wKNF-F{+^dBB520h`26dl?uUC!lyeTCWr52-oCxCEZX5-z8 zsyjf%XDU=h4`uQ z&)F6Hds8$>2+H!|7DWi@k9t?XY9+ux%}Xmv_{3g}C?MeLnJM>^27%1h_bxr@ZtH_M z`;z4%_>n+$a`C50M(E_-qsROqzv1~jH5Mu7xpOaH)Xg5YI{AsiXFe)QkH+MlRkX)& zpT9PnAuQc{bUb(wob(O1;5Z_~RsFD4*YcH?&q-{z>aroGlx((X-A$J-;2`8@dO1u$ z1(#SGigdN9F=#~Gwl$I5q)lNik)NVNwC+nje^S_F4`Yb6PK1%*G=J!gFBB+CV9&5` zHX%H>AZNh%&om0b2$+G<8bi1?rl~jfAedR%JExIA{4f5ss@H#Kzxf)U%)I@y%hr0& z+`fjvX#Ct{f0F4rU^<#z6n$0+triEPq5Y9=wW;J3N|tVe8su2UQq_mMZS_|!F3_0; zLu9Y?%Z$zVY}H%*sevdEj8x&|kVzNnGN(+NyhWJvi~|J|^@DJH0|qHKB%wK2fpRN<26L)B-Y zAteSWh+jOkD`yV45aqX4hLnk&WguLK=4D{-XR>!^JcITsyQ_bEEadwCeMbl}4goY)P-;nLToK3M2=QxB@!TH)DV4O`k*~>%x z84j@EA!ya!czOwkG!YPLRi@N#iYG=9aNRb8^sWYi@b`6YLCnA>)g1!2HB zp<1ie;`=+9@!tSlbVUjB`h3KwZ@xEPHi?oUVLpU~-|O-n(!$t0+qg&zzQ8JzMwPh< z5&A`tBSFS_mKE8&P_RS6n6run62GTX9ryNPugVP_Vp)K?oJZ}pO&=o>rWEH92k^OF zE^!6C4r66vaSS;r3}iXOjFiHha3ZWqF6UqG;3tx}!WrjD5ECb(U*`l9l^RO2=6r0` zJZhEd17ez^bUDqeQgGS3Y=KxNExk|pfF>F&YCFu9EuH?Bf_yIDRu~|in;hIWt-cdX zRI~j1ULZOB*}dkrS5AzjX*c5c^T>br_6)BQ?fF_Gy%>sFDVjp+n=g2&s0*(-6K;YQ z3Hk~D?*+)&si;dga9q;2#xQxAaE31R>q{1(u=i{oXRFd&4L$T((sU29HAe9}mYq|) z`yfUoVG>&cbgl}EP>uV>gOF(Q zbvDgB!@rH?+~uhY!iF-e+w0w<`W9?TG>s;d#ajPVomeof0IP2R&dYCl;Wf-=&o=fK zH$rk|+iqC}_GH8J{;YM#GfFbillPA)`=Fn~7~lW;KTa0X)<8?q@~qKYCzzIF*O9`) zB*Bmg!jdc^jWsRprC4HxaxA&1Oc98xG3$sFcVb2dwZRMc>M|H89KB!$`^(A=RLJX| z6j!hmk@ASb%=X9crb`UR%2M(-Gz_`?$lcCX#0phYZB7HxiE}2mW+Am{vm8Y4SW`51 zM_P*1Xb3FOCK4xI7hWe8RE9u;_QS9~$@zQ~LX}1Zvth4ryls;b*bjm)4TggsF|+5t ziE$xNSPg008Eek1MC*XYA1gmhg`$l{=BC$%rMi@(ed;1 zGdDLE6B{UAHveedbAo%ZNBi<^(_=3t)x~axJFXws<9Imzh#SaZ$18>Q=g+s$x*Ic29?iJMV$^ z#P5BWN=Ad3IHBLNotx1CD69-9(xhRJ?^jdW1M@PR3HRZ@=Cp*H(b^Ma6)olrnB}wga zrlBpjmY83WlUxhgwxaUXcH_dpx37>$bRmBWR`5d8gY4n`B3upudmJdyhMFa2L6UL# zixC(DuiO6X?A_LFnp^P5?^g>V%d5Wqh);cnwM|Sb53;v!15ZJ<4;9B>W2Q~Ad!fYU z1;c9hb~Epl7-#86jC2eXN^+Q|Z7a9yxc4L9oc@dOpHwF~3dB#|MfbN%<`}FuW_@e{ zwK1Z!ZAVM#C2NE%(0XrTcAM{;K%mz?`#5YDcNIezg3W_c?2H%!*&JFlP2Yh2Uxo9- zN^a*dcD7*AkDpm7Wic5($3sT4FlT&o*ynca~d(8M=GLfirLB8^RP=U zY>v8YoT%C(MKiQkn`Nt$5)W`~M2;NjJ3ayg{kH?!B|38+?7~%s48~{5j z5-ue=^-H@ln=Dw{HvNSc##8Do^+ZFzov1b_8^5kMFtAp1YDZXfs&$84yqfr~W>0T! znU(qsuY8W^9$?wjD2A|xBN^>m*R&w}X$q~oKK^IZ%%@F#TV@esg*0=#=qGi49MIApoA=fkx+)q1jM5iL<6Z5G7u z(zHCG2v(Srn2q4Y;LU;zA^-D?x+yT0Yg|ADsw%-ux!h!;7)X*iG{k-$amKU^!vv=Qc1aAo z?+BKxH50TOy}>!l13wSrt)|tSLMxM_iLt$JX<2LN@d|Cwur?_mQ`p7;l36+1}`~826L68AOGoP++oo# zI@Rx??@1}+;|p+JXHOo!`phU+T2m5ApLXdg-zZrP%1@1>l#wWhFc?UNJ>K+qVv@`a zg-gE7>h=N-L1=%6QT?!6j0`_CN^#eDP%NK-n_SgqzdSLzGIqn#&YT--Q)mn^Ry%5B zJaAs&YK*DeZa1Hk64*HU>XtC-O^g>Lxx7X8=~#hf3d#%YQ8@x%)C`6s9uQ;}X(9}l ztsKY-Sh2Jm#RqIq>+%j2`Cko2X}?4C?cfxab&N3%`VI zG3>)EV8x}zU19T)$cw@Ph%(n5*>g9aNACCS<)end5E!W7VmH%T(yF%E)}x)=Q)AQZ z;CSljjF$OjQy!y6!}vq2XYiZli0fYK1NU#5yOEa%u+$=TM#VYm)>rf@W@>j@-#79Mu-> zuE*Au8_K3?Yy#iPa5e`++QbXh2At#yq$<<})uV%1{1%>yA6}+Z;W(k{s`aFp4Iol2 zElq|RbSx8Sy-@s)VuX@v*_Bg`271>ew3w{YK2-^c7-&?S3~2dh7vB>y7EMer4z68`eS2sWU+Zi9WS%~ zLdpv!`sOBsb+&SBG^Zb@lcG*I7g%{bcj5g$a#3iHGl8wr4z{2Xf;GTLAf6l@8(+ty zW3`#*ZE;#r&Wj&$E)nAJyNP7VWz2>$w%*~T>pWee0!mfk zG>41MSCo7MRx@BIHo`ROcsC6=<@Y_1HjZ+M>|PGQw!rS=bg_!>sxqfJyg$a)MZ-`2 zD*~2wMB5&<+Em_5mCB&>SZ2%!f4%|RvzAs?(#L)v4*X`3Q~#8!f3YV_{rTIk(-8OY zqIFB5biQ@dtFpv0)i`2Uhj7AIpIDelSwH%w;My(UNf1MrtRWfKENHLvpuDCUf)*%9 z>5sL-Ml}p+AWEm{`Z&+`jr#P%fF(_YbK|Kja})(X`HW69S`^daBN1qtE#S~LTI2CO zJGENE6Xruv7rGkD$HW57sc0#Ysx7?CR4{%%HF*AT)MZCfVrRdL|9Q5H$bFcN*2Hf3d%%dRiA_BZM<=}4~F8sMd)Rlo_VVqZl@a8+?@>PXk zt#V4_Mk{BqvaL^bhqi^1tvC~@^D@Kx6ruJ^dgQW4VH~i~HcaanM{22sF5cDVY7r;HQJ%&evq(deCtRw~j+^XzhY6u!r%`E|` zPAyiGYSodha-X-)p_xQtwb)5j$~{T3#Aw*>g`fKaaN|+a&n4zZ@)$mTqtKdc)0X=y zQi(E)Z6_}8U9a6EB;ma+E<9bSB%Y14Z{MU&t2mylHu=MBx)$a>?r}keCyMV!B#Jmv z>RaC9Ym)Z@*&(bJhggf9ras+If_A*f2+u=IfW@nl1YdSFE; zVN-d0!JB)ohKQB0%NH#A08JrbJEdQyJvQF?!y0Y z{N=o`%;a;&(WEeynfKhryGzR@-_I&B7mi(->E9N(qW;eKShhIU!e)&ez;nsMz#GlTM|c6+hxu-EGc!ddV$7KmsU9s)c%;FX_?KH;H<@=;7jX5^ipwi3S>ii zXJzCX;{qx17W4t!FK%iKf)#BL`2rR#n)g$svcqd_1QCRo5EIAdOIdC16LHv#+Tsac z0!ee<_Nh^b_y!%<8C%!?+t`@8eJfrz`Ug^>-&&E?)$Mzp&D7Of?KWf*_KUTQU345j z%_$D(kG*>{z|@Dzx$3~Cfyo2Wdq@h#PAQ8~PXc z>9=Lk3muMA{EolPl~yfcLZmXpXLnQg8(L{S^vle*g-qMMPR|%D|- zuvSER;XLRimP8mxDR!e#4PPxdCUdXHXw4bKH0*6D&QBGpuGV9VFHdmtGN-7}^ms%j zhS32a|1Mj#w_se|p~IK`ghSIRG5D$?c0IOXavRtv*YJE-o84gXSLLK&6+Nf>08i=q8sq10gHy-k%~6ZdY3)j?SDtd*M$KZ&kHX(!fr77 zFr&5S2Dm?y7}D5(;)xH19TvazAt?<@2=V;lU_d+>4qF0zYo&)?rbqv(7Wr0we!rBg zTO@lUEuuAj4%es$6RWm^Dq{~XM2= z94s_>n8$d`_Yz7;*ZBq(bS~E@VL|^ZM%| zRl>D7{Gt4zND&Pg0u4nlCRH5JsgQmW+2Lsz>C!i&r;$dgIMgVXuaCxFYzI}QO+tm# z4FnU7A=4zEt@Krgq#L;9{$$|10=pUuOO*B9y=oI~*;n=(Y28EcQ*yh3iQwJ(o8`l? z;FMZvyQfLO>vqQQv4gAc368a0s7jV1+4GhYZKdv7gGworRA zbtiEV2cCHfO*`X}mRr-}F_J!)X4GE;qVpHAQ8W9e_AMyA`W77vbC2FNca1w{uKZcjm|n*~&z<+OTDBW(W(6>j z)QYmYSfOj%e|UN80>5P^vm%?(*DWivwuQ=PV;Mm{Og_gO=0~I`z-@FNjpWrh9_;e8 z#Y3@D?;uV6)zH)g=I(3&4w-Ju%&||CHbQ@9h~o}lz#3DeQody1mt9aMTR-@u;QM%i zL1|5D2wIXOZ%{1L8lh77MuSwZu2rQ+XCQMF1{A?3ARwQFEV2O=$?)$M?Q@6<%snYI zc$wT(`o%_|j;HcZpse_#&2>{-1tCmB`g*8(jXcL1!%|cZi?d1Xag^Ys!YqE!o2%ta z@Vr51=&qOh>Vqd-y2@{lv;mbzNP4S;TW{KjqkFo2LU#;-e z#4>+$ZFTMn8#Y=MF{PR-=-Z1w_l2*O$QQ}YgQS)fg62=ct|i2mC6+a_Fo95 zl=_kjo%7cg^*tHwFo$LRsE|jvO)hxJyeGK1xnO#{pxjx1D7*^G@2H46EYKzl(VeK- z-0~Yv@)VIKbpE721#^U`J4+HXen>k~)CCf6jcMBbNHjNHjVs{EB2f!GTh7IZIFBh?%>j6D>6g~Oz4GKJ~QB! z)E@Z3Oa}|?yr5MjX45w2d%dE2Tbk`NV`}cb;`)J;zR29>l{4AoJ?*)}z4DTIE>^pL zfz(E&v(>EJMr4vE+CF@Gl#ueb2RuJHq{QE%DD0HR>6iO zQlW$Bh09VH3(%=e`TXONIXpGs8A|u4e{9TBSATpQhrm^uqh>{XAF%slfLE2??C_GJ zSk-%aTKQ20UR*%)ulsvCn;#5VhgZ{e)$>Y-JR%i~M?};O{Qhy5Seom*YS(HI*?n9K zp9=BU6a?}&!p#L%!AeR>w%D!>JFWxS@Fwr?<_I<2Vk4kK42$Yw5eu>%jLI$>Q3Vh4 z7yV~IdzJLn-hz?fACL`Z5;GYY1XvF@H75?kPsE{DUHkmG#ibeg2m|p5O?eP9Gv+xq8%=~7XCOwO; zA>KD_;(RjLP;H^JbAXmS(zkz1wEQp1tiv=~OyiSp_Vz3qcYf)O>hEd;T6BQM8y&9J zS9fn}p(m>x7jkZwKel=tU3a-}3!2Wg%p{f_3qpGnm+$TpsOb9FGY4Wi2-H0|pC0Dj zsEhjP+%6L#(69kH_NTqi-eSHYynjQ+6kRw4G?!XWm-6+vfmJA9<}1NrNg;#TVS}|` zWAZQ|zlu!bmKe`{MGOtdnqvg%PN^A*&q++IMBWx=m}=F0S9>e7s2l=A42GK$jFQVr zdW$+4#9QG`mB~&8uS~1A9RhBXu^@2GkY{YA_n9L-#^7$SIAUzqIhPjH6@&(s^0Njg z1`w%#Jc}sEP!y2bx}}Z83n6GO{Tp#Gw{*3`EmO?eO91lD8$k5SM!zbDjSw+I|7>f! zSy=lH<7@!B zLaH{C+M?C4J(PiGZzG!4SE}H4hNwNvv~$$;=HkAkuc| zU#(FGvd8;)W3vqZMWj*9-jw90>tZ1@$fnI+#I#vznUwBMpR^&l-z0X_#!Nm`;6lv% za(GGta%BqG4Y2PdT{=hAiPY+YA+ zO2&zQdt<^2jv-J4;wKJ(aY2v2Oj_)>KK?bM_rZ|7Z6{jSc0ZtF?SE7_@PZ76F&QR0 zj&gqT@$C?_7pm2ZB1uGI`({tBd zC!!yGl`&}9h3D!D1D>HcxCR?gJczAvDrlu=ku_`tq^g61!hrHNYYvB+}^w%)B+6JwZgA zxOX0dyxuw;{|k|OjXv9D|Nc`@==G|bzlH~mDJkOi6XIpPxls1v@2aV=3QqRi12 zMKDCG8S@edq9B>CEXZF$!?>`fWWoLmf#rwtRN)02gJG_aP!E#-N7Oe)*WCwOpV+n< z+eVYLv27cT*)+DZtSG78r!z5`_FUNecyb_hpu(Ha(*+jXYV~TWM7KGr=1$& z((Ld|j65OGKGS*ZD|%@LvxHscF6uYutEjtEa(Coux>BZ`DH&p_mYaBHjVCgj4W}py zbdKe6=z4Mvgx2TG!=K?%Yzk4Kgh{J6%7VZ^q#!r4Afg6_-@9_25IJy#i8C*esisA; zJgAlU(e*M(1dBgHP0M;^iD1zxj2S;SaejHVbGuJCJiKpW=Fdz6z&p^uj%7O`x22Y~ zess4rUQh{#>WD)Q4az3V@QEmWL}BVIkJZp7aqVE-rgc2IHEOtaB2)XOmDqVJmRo-? zHO2R#KiWFy!9l?smw{m^fOGt`tW@4U$K@{hXV$0?`fss%M-<%oW?5AOlIbm3?p&3K*(|Z6L$gR4TKOZDm2eMbyazft%S>dx$$sb837HJA8`W?`$b+yP1-i z*Y~t7`NJhBsIR7`MuRDMmHa-VqEO%1_~m}Z*>~u4&liFTW@F?5HQCJjf6u260=GE# zzqc~?p$|<@H87eaX;g0pZmDVtF#9Tg?bqa)ri7WVF0h%Jc{9Cf0Fz+hS!IOLVE>S~ znP~f6eUz(o2V%}YzTZ+qc%#{Q@!p6dj=#m`_FaI&9~6|B0hi}@lcPO;F`l;* zWpvcFDjamLpcbeE-O~9S!u4Z1Q>P>KESdGLH{a{3dN8DrZ50UP9ivJ0cOwa2AaceF z@)MZ`klvL&yl@PxF4O58>QOf#yk@BYD+SP`YJ`i$b|{2a{x!thftmRLG%yAoiQ_0G zEYOpJ9FE$hOQSL8m>*1V;XrI;7dR?1DGQ6LV)M`F?06Ck8uvgK(h%d#?~tUVj>_yv ze#!txlJUw6v>Sg=I<+`lM`mQtZRn~ac;82Ewj_o*&a{GsXeZ-@;ht?W57{*erSD z4M%GF?beNdY<3w8sEyzAqQSpEry_XLEt38dv#q$l1 z2Y}WdFnX;(x?VpsDcZaBC9Xvy?TSeMeK~(j_!MLda*^p#cNNzIJ@G|`r{TJ zfSxGMm}+v4%{UDX`U*>uQEe6wuUCmXO@dp?je2OesP4as`~gqe0wM`9Go~F>pnCGL zwMP(b8UJ|oyKko1j9BOeGVhi)H~_?6RrIjFstspTI=9QvR%y*9H$3pn4nGB>ST|S^ z7Hn>%mSV{PQZTtO0tu$3Gb}YHJK* z_4BKS#NLeunKI#*Q{KVU6N779Y;#QNy4_;S!L?8>ce%n%h9Y1gHp8?c+j2(AA`XMK zerB4aDV-A+)et?k%tgDSlIV7+0+Nt^uqkufefI`@25+< zw;s%Wk22ScNxv2b^#RWjIZwt}G6a37IB~)^Im79_9 zOX`t<^46DWVNJjY;ehn{&Mrsw9Lt37O!Ued-p{`PP|6{Km9>eMV>7E}v=4H)~WluR1-wR|}|%u1l=a@bt;D)CPtRt>aHy|7#34Ez^S zh|3Mssmsj1S}0XV-D$HC_Dn?Lk4|5K?Y2!eL@;#KB@1ke5_iF1@L&QrLVw;X}+iRrlMi@q~f=;w_y>bf9^3d(V6w_y<8Hx7d@`5BYOVcw)0vfMFj~F6(BR$ zjEX2kLvjMUD>f4VFv`QBai9LMlpAx5s&6jp17%Bx^Wba?@C}lHq(p^OlljS8wWajf z@)HjMvxG$75&dF#yRan^!Dp&u#@|%2VPn}YE??5Vy=K($6au_E!lyd`s)er8jJ~ye zuFg2~q5E9(%l<8~&V!x%$?kHCxVAxiMLiOwJ(=4S;p0OvSX7`f>|}B0Ex$;6{5*l` zTMH6FO(a$pE?fK{y}1hZkVp#w$+ou&C;i+3xg)i#xu473QkoG#80=8g`BmBwO5jF2 zXVY9_qKbX3nWCR;vKQPgFb!T-oiLS@cJ&Vn@_oZ+Z!s1_EnMiha|h;Da`4Q^fdPDNm@Peo(BX4OB9LV!QLu%Wvrg zG+d~(NNL)qr#+=40xcb+LesIWVk>+SN9l+Lgk1p$F%!cK%{Md<5))?G4fAWt;dNfR_uvaZeJ|Cd~bbJ34!N+zQQssPh+Ygh78Z z*&o>(*5rMuq(v@3(RdM9n-{0$1!NyHgyIXI92?TiL$$;|@;pC#ilM>$rhr!qKo&P8 zq{>2>4CZ?VW8+3t?cTz1sUU=It&bG9G%EKA+wt>g$iD~25@RA9UXI!6=q$z=8wYWe z_KD0h4Uyy2aJ@E9=n`NXhIt}$0rmIm0uC|S^fPs`wrrG2V<+zjGcXBqGyw*6I|j~b zr4?xE3F!iwGplYn49tO<_)}$Co0P%~KqS6R`Vfgrx-!^+2GKv|I?k+{#}prXR~!rk z3x_i>+yCp7{2X(U#JpI2QP}EY7aT*F=$0o)m<9TB+p599wK1Ls%6+*Q2=BRxR{1aS zx)+LzU6IWzepXh=-!zkrCe2b)XSmxGe;%qmWsT^$6`M~`_ulb&ZK90m>#@vy&u%=D zJ}X!ZJkCboEka`+!*8>)sIS2H>~P;GJymPqdwJ~lKSrJhf%FXoot`DPht4TMkS{U^ z&R=9a_v5AK2-npxxA{OIVX%O1SY!|)m?0P}6;BvlFJd1$8UCpVEQ&U+x)OAQhRKk8 z+Ff2Ej$C$%_jR$crU+b^19^#jvw5BE!>MpD^$7&?UeW4i(*tw!+(a`O3?y!#_L)=G z50*{Kzx!Uj)3;@^^re{>vI`=Z_pFaj(n`Iq%emoP^47Fy*{ zP*6s>#6~S|G(3CDH7E92^l-UNPEOuuj5cs-$iibiR}NUfe0J##nnnM_ zqQp(?8+cvz`TP5egd%@{xz^oe9YLqAKor5jS{o_i#;Pj&$y~AJMtg&{AD5KvrJc~a z$XzLy`|1qPHwD#$G1|!-TKwCPutuJ=R8xSq-`j4w$}X|Ck|K|HPe1w zGU!9v1S~DFqOuqVVPBQ<+yV(j;mGi{aj68o}MLG>D~G1PhaH z5o?cs;-_YF53__s?+^eBLDns2tS{Q_ zUizdp$_%WL?YadycXk=Edzr%-&LM%qj2@uLzsEKM(UkS1#O5ia_3mW}=;+&P{`{TVz6Bb2O(BMEPkxG1t95e7Vxw}vqAMCjQJT^!Cb(4cV*J- zljP6^Bx!!~#Zt$p;OSK-OHK>{s>L!^k^21n%}j~DrQl(tmA`f!PUI>41QZ=S7j>_(cLm6*tQ z>aNUTPQ@b-%X|R$`z&WakAXKK3r@8$=+of^R}rb+B2ze`NaeRD!70L}SCNl8>C$~Q zN@_wxrV>({w6J4TV+HYxhaL>WX@ZRW`C&twYnj}HQ26;Lkdn{%mVrO~(yt zOCJDw#T~7&Xr7nlh~7Bx|Cg!6wV25Ae!ZPX0nXN5ZuN{<>B z->vBHnSn8!1Coed3tmzEx_^PV&;+m%&Rct320zQ^hDC9m%w`wg$o;3_AZyQ{h!%T^bGV9(D+3y=8j3nHK7{1Q?=X=J`yHYERmv)=VA1g)JU$-?Nn|K|MjDa zr!VtU>aa@<^rLZ8*`&5+M--m#x%XjZ)Mln!2#Oa^x9$>I(M%K12T@IDmzE z`R9#0Z&L`;Ia7;8oYf@iwzvZ&dqWy9^bzH#PNtYSBLsfV#SFU3fJkO7L)ZDT3+W@Vv%zjLZk{4i}!Ym-MgUV1uPXb!YrFAd%O_jZI;9mDFl>PWK@Cpi3dLqj6QMw&GYAX3!+9vZl$kWxm_ zOaK@;i$*lq2wjv_K8zWJWrG5x9?PznN@i_Y65y34Ru7!F0f*!``{Y``utvv3ynmW~bcohJZCFAmyd0+=f)(!Rc)# zHa-!C7S2Ed8UFb!Dokm0h7kmU1Ot&Ypcf1)l9rOQRCA)fq{15kJ`shiygU@}#a=J%RqK}jD;@@6x8xT4t#@hx~Z$%65w3((EBjNJAc-b#MDe$0qGA19z z%PqPz#7U2DP`jM#Dv#rC<)7(DQp!sQ5p7yk4~YohQZX&k3bCBnbbn~M9$b#f#$Ggg ze$%Vcx8`;SvtDDID;LL5K>>ZKd+Fj#KKX=JLmk&=%bs^9ynqMm;i2hUx_(t-bF=&7 z)h^)huy%I7_I==gv)BuMBRDuc&E&FOg6Vwn>Zs6ZtkkU8QixSj!y5Ran$U4nT9(Y9 zb#-d*`|R8CWWf6$*KmJvZ1Dc{42%+;t+u|`>PDd`63zos=$3(~+zIw@3hN^&V9IJo zjwQrw0u3u1r5g&ffB-=jw!(6YYfwFMAV&K9nZ8RFHIfL6p1Zcj1l{i0vheflU%fAC z`u)5VtCXGVe<@u{D7Yu&PqqNBrwZpI9AkA9-ttfiV}3ysQ<3WgKZD0t+F;phZ@AHW z>Qti!s|Z#dV+j+&3_)gKkOadMQiI(NOp$OFfHIeQZN6xX{T$KUt98YLf*k-8Xp8fW zIKUaOc@b*%iQd7TrHO`r0_-;OCrraq5MNP;g+T!>lU-V(DYAg^cDd*%eh6yPSaeP2)-h(%CKL|3gP=R-wh|EqzIWsrMZ?+TZ zQxpA`Z5{{&7@&-aJa2y0l)0&&ws5YvwDt-Fu6SDPgv2aipisFC=|0OI%tDFSuHQYS z{HtQZZRu!Nmp(XO<#&{^7Xf%KjMj&-bJh0x@A;1^@_c@u9<^<*b&_;&kQ!IdGx;fk z?k_{^(sH3lQKP_mLO__pw6NkNu%R;!de_o=YwBLcjl3)*yR|JN9GUW~<(#}-^%MLX| zZGKCO;xW*3soCx+7g6E{%7`xk2Eak1p9r~y-^@HQ6GI-W&3}bpYCp#e*%O}%;)aW= zs_pVr78VxH47cy4Tfdc|Z2Ow)A9O}^baXsk14rl|wt=9b2W@1(KW58}8HV;c0=p#b zR+?(^=Ye(2=>Uime7Wew6l;IE8gW~5w@^@+pe${FRoip|bws^`nA8V~#( zj<$IT5_Lz-@H7Bf<4iy+^@LMQc%>dD7ta*@)&a%=Vypn5_JjG0nyT!P|Lo|(U2bD^ zJOK5$+JNlYD{KgLJ<#6CYry}qZT(JJYw+NwVOG}j0pdQVGf7BNr}Hza*=$guz{gOn zC|2~uq3iaA++S4Uh54n@n1i7JF)@g~$`o*Q{L?ny zsQ}*^s%~L%=U14mac1mp7LlM;m-E4=sk$0J`P!@LG09eSa?g@~ff4=n_? zy;WFNRdvyw1M~VLNFCa#K8?pK%v0CIH@B~>5-BUgvp(ZjcVCR zZ7B+!6}6kR5Iph|+Bf11VvX&Ke?j}BVtIKfUy}CgAcYdOo9Ff1RsmA{eqQHIy>RAW(XGddJgjRq~+v_VzYFUQcv*Kg&)d1`B`O zpD2|3^%i>kR@Jo&^1^#()py@b0@goTEH~I*?@x4M?&EAf5fePoGSgAIzN3r7mIXnO zwoA5&(BXl$8))6jhviOoNPoip4nm@JhZ0{FYXGEHifPIe_(5GH`Gt31pdm>uptAeH z40J<7cprYzU{%v>#g2^e`;t<_$Gtepim0B88GYi#foFB2<@>h!+AaJB=xCy}VHcoM zEXM(v>I$C8OVN;?B_o)9)yV;gU`^$wLCFairSljXYXnBBs9g0pPR~8j#ACw}aY!>H zYgLdd%%wk}C{HQMkZHjL56jt{q^1(9l7b8|aryj|QsKy~*1C9s8bfOa=(Q_;M``k~ zjw14Sg?8@d2|Ne{_+J&{_?QW5!)qOO#kt(2fkCv%@7)}JdH%rEqE2||J*F+^kCRl( zBkV(=lrMYdFW*}Bti~0~!v$Vyvs2C=79Nu9)d9v0Deqq!j2F!ri8_b1Za(EDpw?gdZZUyQs)LH7vK z>M88dkRmL|AU{J)aL{jxphdJ{sN;AwD+qQWhUK!~949vY37Kp-qDs(GZc_fcHZ3yZ zTB%66;XwXy1#FVnfKc-xWgdTW+c!B}G;lU}u;pq>r_8w6pnv4-L_qFPPF)zQdKLM{ z*4@o4yL5aBaDcEdPIpBml>^g^*{)dPYC4=94?kBpV}D`WQOEzrNr&i7NNb#^oamnT(}51QevDBRDQU`A?VuWvQwum|3Zbml+1% z{t{Vo;C~8bj$)yPnH^U;41h}W|7!U5@(ApEYkl1NrN=a1@Z(c=rvUTKdPwCl$j|p3 ztGkv#Oz92#hFkD|UpTGJa>Hq5>zT)PG&kS^bfF!uGL3fQimt_4NaS)F2!-0&pre{2 ztr5aB*_?52_K39g!m%&=tGK5S5+YNw{~KR>MD_z3!GZ)G@b}tn6BAhy6VK9Q#E~P+ z5Wex|l(ikEiwB`xP}*8dgVq*Re=r+#L=B+~qQW~S{rKZ8U53!uD+?<{m?C5PO@|WR z#G%rl1ej_)-^gTaGV|*x#@8H=IcIpnneJnqRrI&O##!g!!Gn8#SE6xmAJ@r@ZmXj8)uW*N%mEI`P_}BHR^i-S{tA zrnO{CpQwBSX72@$_<+-?fgEU>JrL95oCFiPE$JheCew zdsnhWd(iA}sBEe_&TVAt@McdvVA#)l-1T38Mt}H>$hFD&cv?mh&KqL($N?C=z>p0_ z&M6&NnGFVSC#b(U!F^VB0#0hvWx%xMdRlak1R}_9(6OX=e4&}oh(H`1DF_+<6+2y_ z6OA2fyPvH5ScS`tutG}Pp~^5@lyWI4h+d0rdZC^QEGLc>1CBUelxH(uzWxv<*1tmU zi4{wk&2~Pi|g&CO^hk4!i8SbVCnWL~#%O_Ooi;GVyjmZ+d zC&VGeG={HHJs?~lSLB5iCtnTid!hYfE%1wXFwT@yu-F`l^#=j(07dC@wdtTR8le++ zH3<4EL=cvclqM_<3$&^fH=iSEzNtFC@UxgT6e%R8;BVaWVk&D0wAIUsH_UTZbiOz#Kl`)9rs znvWy0ZV;jPhR={s|5P@=0rDw-g~?T9%(z;(sD@Jy+w*6&3U+nu@0Ie9_i)qe&K;b^ z%}?Jv<7e*#2s2u%&X<$Mwo?|DzeMC=mP_*qe=RbI-J56mnP;3U^V0(pHUxyog5NEN z8U)gV8VV}3_8XFLM8N;dND4iuEPj&_X9)OTDTJS)R165kf1l+UE42^R`;X!J(U&Jj zS*In58a5uJN)~z1dCE(vBB%4F^1KNv+)m#?_`?3=G9BG+{(KH6`fGP-_mKvP`)!6^ zEb&rdnfq;l{b)0z!@1L8YdM`)uY$NA3=tI~3LVu>92S90Y=)vr3ueQ(kuFG0f|H1k z<^aB0Ay?O0)7c=Py2pQ2c$1^QQ{gYoAXku$4jOaw6s{n}6WO8c4q+0`E1k^wD!OHU=MP0#<1iGb%RJv4R}^Ni zMMlUf0s102&00~|s`NO`RQu17O0vFcI?6xnxxb{l3;SgYM?c*~;zF`&aiQAF40dnemz8=tx}oGdMq2os|8tkIkEXa(a`c^LDEH&=&0 z7qNHPdRalDgFxIpY(+*Kxejzeeo<_-`>6*R!!8vjnXcmZA&_<#`tI4Yp2LEk)|h=i8$A*WwD5D^&(1M@w~FIoxjrNre((MCK^vEK@|?2o$zCj# z-hUyua09=u;-vakpTle~hpeO}$y$!t4^T1Gb$)5EoqNiSEpkI_A`QH!C(5Q!#ruk? zC`LpI67E4o%;AW_CxL}_(oNXL&kSXgXK>Pm2NE(c%S3r| zw?9TLd_~DLmpvTMC^4kKZdNnM@l)`qI2yWWjC4|?#aSCQ%hl$|59`{;v4a_I1Wg5} zkqK|CFZD4Gz6}^v;*D#2BjqHOYp+=KueQX0=SE$RK_omu?5J7(xfB}^bdV4-X+p3R z1*z%W$;d>g98f&{DtgCSRK)DqMr(|1-u!*fKrY}Z!dup$I^f4V`2AcQMu3)n zKjlI&=!Gi6vUkHm7udi!vX@(fKt$q(H4>aD8v@i6)=+3G%sbHqZ8}Xa z;V#d^qU+7(6I6D80~^{=g2mscEY5qol~Bxx->lsZd!o9SK6R6Na(X`;o*kZl){y;{ z{g!n-SZl|IaN0;?ayvS19h+R)Ilh}=_a_-c$LFkd&oG-Cnq$8GeC74naNWIdoB8g> zz-H0gmn1JrvX<^#Ek8Aa751Nt+@vc=5hBWhBp*xa3V&b4fjna}R?=lyN`8V+_OFha z+P}Jxgq(ISalD_yY?!iUeCky2R)-g0lPfqW#&p7DEx8Hm6r1wc`gZ(xHl`~ES9koV zg9I`-vL^VxM->kx^(PX^A;hg}O2l3|dkqF2RV$h{wC_i{d1MFl%;y(eP9SL9KwLI7 zkRPTJD?ZnKH%4VXnU%Xq@joyn4VUL1!*yvA-%=e7yS@31#8lD3jKt=_Ea1QU^;R5o zMENez6n4y&Afgz$$-RfP-(Z>7YA4 z#%cO%$G91P)jy?Yksc85BwUwF;I+)FgGCF8Nt?T;E8a5iY~c|UzcKpqJ6!hU4{VNN z>32SRLLvdi9v&NpuJJvWiLf)$O&fWff}ev*tiJHu+HU<=Op|m)6xSz2ekNqz4r0gC zmpW%e7yNmqzSx@Ci)ZDXDu%IN&AXfMkT~JQ*rPJ(G3MZHbA|rT3*aAA7$91d;fc4q z_;-<-MPQ(>u0X$Jn1K!^7;dorr<~S3(0$_u0eb{!hRTxJL?#^JdPwBv{Gdq?rxIF_ z!Q!5d5xl>Df3m1jNx&wBCHXq<66D9JB_SoL0u}`Z8LX;B&4yYd(MO{xPt$uoCV162 zSnXMx{t2Z>_MYk4Ayr6;8a(?A*$B!daM~HFIfU@0tHoSCuykXWkaj2WH;XT~+W#6`a)nZVT zsx#4d$A0zM2dC71cG7-x{vSVSejBf+!4+!W6{w#S_G?QeIizSRT8+dkr{6Cl+T2n) zKff4M7U{2=CI66#S8!ImialGY*TBGNMv{}HclCW=Zw(eT-zO!A1>Rq-{lEThYkL8f zR6X3!oKxPPKgavzMC!ATX~bi1aNzWI2f`d59eo0rhU3cCO76LNwf4<{tSptQMVPj` zOP7gnQW}2s84mWwr`v~`Bm2`VSEVIcD(F$dg=8nP@@@pNct(5WBb&UutGONt*5uX< zUnlG8r;mAMcynkya@86Xj8_QI8;tUC)gOPy>zQ5H>`knJnX2RJKRztdQ;qw-?p-Sn zHo=+0E56Hj3uYmoppDO%6JsG9N1xJfHP9(X6g#QbCjt+>`DMt(vNKG%x&-(7bSLsC z`Yf98SjH+I*$_k?S9wGOb% zEh$|&$_+f2-Bs3yQ3ck3uXlMy&8z#q;Ukpbsy4GwNns~` z;DyOehMu-C*J6G=bQ>B&$lplscC|AEO!&NRbO$nfRRO z=z0Sbd>+aHtYNfW;H^Pzj=&RSrEKZ24XBHOJHd3A-sjbOm-inRQ-X%)4!@IfF-&pj zN8bF`8FrSoba;EBH96KA|RQ}xDVjE;&76EY%RS}-V07>q1vCLkm7(i!(( z%O#92Fy?;!GoXEOW)w-elA=J zq6Sa5gG-nyPnGyZq z=Ra~=Y&}Pb@uag~Ch7>ESZaJ9{_^CuD3N_iTWq$|gfGCTv6j{B7`|UhM0u`WKjW*~ zbnYfJU@Cs3#H%o+Z3KI=1xIgOO(u~n7E#G^*^!T_wp(0C>k&Usf*U~U$I4n^beNS$5Y&IB0yWc;8N%Ik1=`LzK<0>%WZ>fprt~;-D4=8 zlGy7)NIF?mNw@X;DR9hP1tNL-l1B)Yl9F;2>c7&WE}WbE5-=kyo9!YrR%6gcI+D_7d^QU6(W zjX9WOS+nKO3`I5PNSeO2*&9z(&Q88A`j;gxn1?x^I3ezAvtuKIRKixNdU}&4nKH}% z8C-1+smN6F4DpOWt-GcLv>An*f^Hi0zm zWGDNr+E9WAKe~|dRH=mlm6~bvsNO^(gp(Z^^It%NOY<4u4qZ(u6isrz+rq~k6%tH@ z%9vOo<#o`QND?=)+IfO1?VtMCQTao7{dpU+*G2Kg)7v^sEJNg=k{S#s033 zi`+;KN=iP^<+nAc=c_qX&aj6%<>6}2ZKgHfqqfT+b1m{~y(fUcygL!1A4SgY3?8ZZ z6XqY-*Z(vk%rQGDc&v3Czjgi2ws>m1Zh~u2$CYoJVL#-3j#!E*yT5U_upG&-r6g8u zl=Cbz+(wVBOBVmu<5n#H%!a@c}p4y*?9x#q4EyzU92KjGN&Poj| zh`q>G>~qpw={Ey=sy%Ix=3B3=qf@FJeBPSgj;}}ACm8fv{uCDvo_^1^{-&s+QfE1t zD|3)ram)t^;Et^QO!<8T<~9~Up><|{$i6*)W+rFs`qqJ#nVHGS$=SH#G{WrrIHJE~ zd=Jq*=VlDNjJ6~TLK^d+iI4`DPw^gYUJY^ZCL0sZEm+WMQzo|YdFgjHd&(g)<)72I z_XvH1Kb>AHO)%D021F_IQyz7DgAaH8fdIbPOxGyiqOXgn{1EOI)AWI5bWXZ~qXiaw zw!B5`nPuoD!i=QF3n6&?(QV$~Sk}p$c&134O>=d@C0I_1K|wvCq9$ngD~yF5w2)x1 zftCcu1V#s)|04dOLPC({N00`il(re52}hu+Dl;2GDnr_K*GxsRZP8CuKgTsaV#oRx zfeychf*?u$g3z!v(ZJG>2-pAlu)_07&J*fLRY+y?C zsN=SKng%3+kahdzcS*~Y>?^MIt+b~Q=dqRXG;4T`iSM%=`qkSS9YT%fEpxSauMRa5 z!G@ApoFY_9&3sB8w*58xWVlFZ>Z&R(w58x67p&hD<&>^+lYgthyB);8=xPYv{1IJb zRYAjEqE>XrwUzMx{Ez|E?UL{q(>KhedVg2he3OdWN^ob|v-RrxoBrk3l_TU%TAC$gDgd-leDO(ZP5YrS{8vwftAUt3Pig|cb2z{_!FS58TW z@I@r@WtBGTZK5rxjj~|(wX;07+nK;woq#VnNwBtJED#w>NWDv=kOM+pakE7NYGuqRU ziP2AI^F1*PoC+>)j%#u{hnKE@ZTz)3wXSj<)7H3|+={GQn zeiOi1R>vF)lJ%ui(iRr<@Z*MrA>x`D24elhu*wxNzj9Q8`@~Ck*pj6sbd!;2NH|{3 z(!^cUV3e4lh;A#j?^5XO+QXU_PKaeh$adc?@(lU>qtIF}_hkSeGHw&sO?hv%XY}RV zDy52Z1#ia!0!l`M36)4tf4slmt0Y5Ps5I~H?gC*wuj?!)Cf3*YhW55(vrrA-20Ne= z-%C&sd^{go{r}%QIw~xrm9_Qp@$vgb=$D%*Y`NIbSbhxN=~Pcqe)dbN-$F7+HR?lx zF#rQfJNe3A^f?ieoQ_!Ws$UuS;#Vdy_vNGofF^nvAUmV;Oc&?;f&>B?A_h3Zf_^*T+e(a4{@BSkB@m|4l8Y(R5|Nts;4%|B z&*%?Ai7Z6d{eW#_-j`V_%5$i-&()~{0VjfF=E!V+mrbdN6LJAL*0|A%;jh>7!<6;t za>^6Ts<9ccd@vS6oLb(rr?j}2Mv>yTH2%2?=wGxL&o`00A_}z`_E6lvzlU3PPJUt@ z#{)pk6Yo%aVkrby6%XJjGhAf{N=1bjr@b|MCpwL zke-_BkJfCaLcAuwW&9epwX@rE&&n;HnwkPGNq3@VdEYG#0DZ`Aia|%05aIjhH*a9I zFn|{eNHO7O=RSS}0XISWpT0>8a+ps@W;f)53R`xDw!K3$7)x!eiLIN?lR&QdtU+B{ zM#Gvv=s3&wZ)0_Mk@_FNTPZUd_m5TAfNsyxuZmbYY};x! zMrVd5D6#ff@0banX0(pFM& zybHgjv>(D!c?bOr6^o@t@x-X01lH*G6>?Vz7NXvGAVpRO3bxYv?_#$e9I($g2-sLx z5S=S2%tHb_Awhn&JX-ga(-+zKjU^@D0Lv5-64HM2^!&^WgMM485fhEN;D|;8qPO11 z>9hiw6uY%HUT*ID$jZn6SIC+0CM{i_ZVmw_uU>WDd;Ok6qfaM#5idK{c%J zb3zalE4JaAp4WPrTqP2riXR@^&kNF;bh_?Mp6G;(Lf5zq?2mJ^zOmEvhKaF93g(%~tDm0+Rq(xk~4I`OR$IXY%W zg$4j1taTIGCnpQ_NA!{Asb}KV!;#_Pqhnx!z1Ef7${};U*}?HEu~OpJ))ox0S2f@l zJ^7_xHhbt&`1<_zEQ#QdE(14G%Wg4#GF>$Bu}Mh=f8|Bp=J>ROU20_^?cbd&y2hgo zHXt=vC(Q1ofvIJu!H~-e9xeM6#=*1q6y~zmPro^j24(`I2GDc55!pl{r*<1Qy@%wKl&@F&u)9qqnnQd6z0~-wa-8mmAx*-g{52d~G_cB^BRK41f%{!O>!= zZuHq-f{{7HY2Dfs6BJ#V0sX4rT96$j%j(8l$KhWLg$8Qi~EY?vM(MkxNq!a{pw%EvQLN>`;n`{ zbx&zL=44*b-55PWAR(b31&drOq4Gu)Pi&b~{)H}@tPP8X`qldUfyi9aX2nl3>VOVV* zsvl3MF;ll9ehgv474_t#I&a!P_~X<)ZWHgl^w-Zn@-PaVSluAFAHE;2t%r!eG8G-x zD0T)XYt~UfCdDuIuXZAFf={nh)J~Iz*@Jmf##|(={2jfcjyy*p09>u^VE>wi1 z{~eVMB*nPpsOHV1QJYfn!R;2{hlJ3+^(j@gas=5RfVBD^YSp0pXUG%Im`pGf|zqZ7N zJtlmchaTsPQ=}AFW7<_P;?t;GB$osVAUJ$xYzRsiER@tLU64>BQqs279Sa*kE#O;) z)eq+*&6?^(SQZLuPDl-D+euQ)!PP5+ zCd=%|Gn#jftg(aOn3ojzs0edhnY~mN{1`47e*s~KSk#4Hi5TfUWqu2a0@!rI*`qJt z@A|vXn&|)Ch{7I##7Pr_SjdGpO%mDhq;HRJIp27M@R|?0L`!Ggp@8P(~Fj zF2qS>1ER|;@xwxaFrvUQ7d6ufC8d)|09U-}@e2#PARpQ80HSITmKt~iDYOpOVa9mV zsGNPlEF;}RW&s|UAlXMD@sO@Z&O}qfRsN?KUmA}`{t#Uybg>d>$iy9 z+id~`WOtq==M}s^BTaHQWB5nIMHG=-J>sr{Ie04+9RVqWY)jRNQFaFgUcHqr{kfSb_ zoPguCpg3IaULH{CsPH;7Q!SR_7vh3`&1p`+2~PjJGD0(9KF1nxo=z+IMlR;7FH5*5Hi?&~n$!;dV*Xqr`3UHG@lF_)yK_Im`fQ#L* z^SQ4IB==1ac)&?(cPs^gYINMK6%qDTNFWU{0fC1GirNIICV+4dug2RLul7yyO1n*r zscq$8o$)v1U=?eYS3%J^zw=FE;?5rcloV3;ir7tDop1J4YxAvpZN!p3L-wnNWj$Fi zl*O}PVQi8c=jx3U$>c#7o0yT6O&Jo2a&+S55#fsWcDI8I3k{`4;BK5KnRc5NE-y69 zb~+v3`AY2~6L5VDpSRi#R27psJ`<&7vAeM;Xg8>_>NIZq7N#usU z(d_6ryLSsOtevQ^9Kc&%+U%qX6XtCUDowJcUD|9a+qMcwh&@v*wP)2MvlYiDA4v1{ z0}KMjoPQA6(WQD?HQ6PX)`t?JNXT{9y@*WPnS5!7!6qGUs+)vf>gq>Y9XV1~2+hXEbzkSrqJOEgW7y;w7z6dscVcP|bo>Ilih7z_KkBY|DgUCyjh_2LT;@2`F)9QQ7Uod4V8O8T?QZP0woaX- ztJ`&WM!p8lRP--V!UiCw!PpTY05Wuf{l3Np5O1e-6`vu^V2>vF4j8Z}k>PdbB z@>cVY!pZud?0l?52LT#mgjN@Jdso(}T})ZSVx)+$K7G2vLFte{?`zFyo&7cR4z3Vg z7Ph1bZ7%-nJXn-)OJ!MY)?^Co*(M-!0Ft}({;vQ}C^;xVI+AZ&F`?@jv9JAJ9@RQm z2k}0k%{MXgCbc!VPG*X%?XF{fSZ^-&4`_0L#@?;M#I_eP9NB$_ocFczm5bjBQ~&v9 zm}y=d`70pjLI#2m7150eQ#1Yr*-_>tex)tmZ!)DNXTcy`A>Yv#Kg7Q{--xszAZOIw zN)+6Hj%@siK`sqQy7PR=D0EsPh?m&S`+tIdq0#1A(HRmJ*ROot`@OjR6Wik+CcCZC zWFW?w5lOjZMXS!-ds4#I-ro1PqHzsa!H`6+!ABV5+qn_H3{%h)mz=}#tt0FYsOV77 zMX&R;Vfj07S;2e^zif7iKu0u2CR_H{A;T6g{`N;?hJi@yTunm`K)+moaoQ8^E_Ja% zr4ml^_$tr)(^&$g1MDl@AXmoC`POy+zZ@!3wVwpGKMA+-fCktN6VEyc4W4RLawqj5 zRkW4cDMm`xq3Y`{2`xA22uQ@C(~wxA<$s#H?sux=w||t32nm^?WJXqGOA)g7j8Ik~ zd!3LK*?X_7g9zC(%HBd*C)t~9$9Qhv=ee%udj5mQ4}Neid_M1SkJsyU-?!j)pKx16 z7-tb5=yk|OX%4(RvpQ?b1oorFwUfuSnINF|WMy+IguMt8zuB4I^DBTlDUEQVeUQ9L zXF@y#*r0LSM{N-z<(KM?X8R8u>2GvX=(E2w;h~r*$lr8hbv>!+h?3OpGMJh0Ibb0^ zLt(yi0N&q>=dFVG$=LVW2$Q0cMVk~M-|cNJD#34Yyjht1Y-{%g244?lWtWV+e0F`u zZ&&k7nRW2kld@-DCG@nZT(ws)v6qRUdi;kjk6O#?_ zcusu*%b{#2mCS5tXvoiBLYFLG!(`mIq&jBq&OJgiLI~$hXE?T5JsZZUUs+^WR~b+0 zG^joVK(I!3vRDe4)Kz+w^;r~Ld|~><205WzE9lQ9IdL7&2kX*roa@tXJiqa`RfXp5 z@WgnVXnxSHs& zpX%1KU{}Ce(gva z|8)pEVMANF4eCOEyAfLC6~2USyZS;}n^AMranGpO2Zoq%4>03F6-6ctb5(mp>6j`|4e@&K8}jZd{+G| z>D=T3Y}Ee??5eEd@80e@{;9&n$R&m1e9QhKrut;p+XUp7YtOmC6Iv-@lsIF&k5${x8S8|Kc+Qf>6`EL`>|bks*sU#XEo>{#Ku zP%5ex-9~1dhYneHWB3p?kQMi?;jOz74IKh6zpU7f+D{S>%P&J-=Q?)a9}P#M%QAWE z2*j=0i@pF@h-Q7;1}6|6HbOB6CnqPkO{hyR8C$)ANKu^UdR~8+93CdWJ%1^sl30F6 zY)*vIA!)%YI`Ede1?U)~S(Nu(MM3LON>W#rS?xfMJ4>=Dr^kW!Eb*Jp2l8HHzljWPMmKS8a^l-1IT}wgI_SQtRVnqtv zbJy#lQ*;-wuQAPEV(X0XZ)W||ATPuA-&&Sak?OU`Y!@2alH8W3PgW(NrJnij9gsqP z?)ro7HCMv!Z#+>*cgr2NeX?UsiHDWeJ2{Y}3y_>K8MU*^Xjt88r0L&z`kQiMD;U#p zJc6=6W+*3y?MF@+5D2l2WpIpA?6nOV5|Tis`0$Ai$t3n2>>e^>%4@BZ*bL1CClcpJ z{cw{fz0OZFRV>E8T0#MUEpTB-B=YfN-+}@dL<4~bvBb$%jvBfoyRgt2v%mgHCNxRH z>m3N`E{0OsnxnzO`hkT#+OLM*;kMN?0V^vj`}_OTI_Yf96fV1;b_U+-2~Xc%4B_p- z7aK&G5wkStAJ+4j)f=ujBwj3PVNwn|P~mQZ ztw^%A;+6fYac8c)#d7$)lQ)UiNbP0R8`s~zc}TUXpeDo`A|ULPBLzuoI~pb zeSV6LV0_&3icfJ^5JnPYJ`TKy)i&xj?P+g*{6Ta}^biUrklHydKYsjB&r_NLZA~#4 zKU?%WXkEEJS+gT@#AG>?a`zNZxVNhJVm$$de0_SnhlGU=-J4IST?yuj324Rje^1>u z)vVj~*6>OYmyk90GP)BG1X9B?=F1LVqF;RP7nzGDyN&yR_*9H@Av8xU`C&T~v!|@L z3aDgp54QXCazP;GK}9ac`GQ}{tT|FW3Q4=J*DkyovJndF)z;)I7;kJ6el?RssZ~wk z4lhHgzUfu&zIY}+RmH*i{3N~rE2N-+anRl&LFAA%UsTb8jd9^dIh|K2u2geJpuJM# zZkTaJ0R*qi0W=6<@atPx;Pj z!V@R@IBsfiwqJGo8bYqFEqtE$%Uw$0u%uCPe%G}SmihM$Z^$KP`c3hBxyLbZR647f z+cm6(X3*aNk!@V}bSCd~i$-8+P*4jo!(!DZ>$X3swN`B5@wrM*Q650~P@B}0L3#j> zBMXeqCC#7LenW@iO69C%^oKY79_zolXRy;p|BOn!H(>~|iR6}h_qaErDip|s9avK}P1qE+DiYO>30H2A4uPv$^@~CD1DTIK*r>MYBVQg((wmm*+~3R1lXIB4z^7fB z6$Phu4`;)bQ-$}oW?CCc#ib;N(VX1mvyQiJ{krq8ZP;h(Jt|>3QtdvE^DTCVJHkk`1YJg+>3X8f-51I8zEVw zS*AU_altu9?J#TNo9(tE^^aPpWHy<(KGCPm@iXR+2hdD{YW#_CwK6xwi`q-sw?=rq z!F#6cwe=~J0#>!;-7#lsuQ8%2xd46IJN1-s4)EP(S^JqQUE@$l>H3V(CCj;#iV#~C zTehcrOy6r`0KRGU3e}j79Qmgd(j#@u+4alGDC0{XM4X$|-O4(~(%9+dG0Kaz!|Cf?oNR2U;^N|xk|ywyO>Wy;{$B|VX+`_va%>V=UAXy|S*rk*Es;ZFA5+dEGwR*IyCSG;$ z6~5p%*3c)(ciUZStvU5Yq23_CX_QLrT2V#HpA+^MJ{8%;UBWK{gwn0p+l0au%ou~;81DJlf~fe=uXE@7hEGgBM*Ez$QO>DN4~>OU8pky zD-I2Ex_T1%XsD^(EkC|+nFu&z=a`QBX%_m{~4St zCo1`w-6qAfIKY;FKVMdB71ZcPK)_X{oN-!rILX7msm^otcKN`@9g}Iwu8Ob<Qz-iQ256cu^`S8@;5tDkhrpmM^pDW{V zShdsV-d3JyPD?2zSvsut{K5QmM^5n|x$#CgrgA%sEyRT{o-bIzv4i9iY_OfgW{TCC zUbNjP-*j(%DdjLZLp#3%KE))$ZSfGii5Ypl@O<4w}N|4`^m*bz_ z^QETkeTjVLT`_F&R-XtK>w>+9vtb)SUx+3EKPwJO4KT4Y&u})p9)lK_I?k2$25O^~ z^p#)dYyO{1WTRAd;)*)BR8jbfc!j#Y*QPlQzVS0xYcrIq#&~Dt<)n18Dqmhq={v04 zkzv7M^j?>TqLdAsVzWxPqoOW^Jyml zp`(R`Did;v(iGRbD=7dwFWSqfnP|+5cB*tZqTQt*7drOsd9p@QSVdE#3W?yEnPo?TH->>CM6 zQPs#&4Vjzs5yR1^V}V1KJ4!empud2bsvAJ|-C{Zmd$9 z9|9qsD>+Y3GuBZ?jSu&?KN5N-Cnd$*H<}(8h_KI-5RAo0Z|oLom%M|g_Wkzfd(aJf z_a(-p-2S?Zcv?S6D80zWLR>hrgJ54=;?a?j)9r9CgL>Du_upI>W++M-)YUOx(`(h^ z_+Vymqs7XZeEIix;Z>@0XP>UeFUW$^qG)gA7w;KNQ?gGpeqsvYQQ6I2E8?p}Qn~=C zB;u-mJ@>8;rD{3*wZ=CFz!IOm%`yu^IcVN?xkoO<{4(Zm5r>=(iQ&lgOKQ?|%nX{N z*JOhM3#1e?#6EMz!%RXcxgvoxyBJ-7+&d zcuU)8iL~)ZZAGWga{lFI#mjNLEp@T?^E0Qec`g`Bg`uQmyNh`tyY+Vp$&0Ltxo)p8 z^H>jRq8cJOlU}SCq;^m#?$gHG-3ZYq=}Y|@LshR|6UEUB63pNdekWuSYvP;N7*fIjO;toA5Myoy~h&uz`^!hSlF}{>%FI(EO<)We5 zM>$ROcZpFG*u~HKRe;t3adEDwRs-2%PEO7uPKJ|{6LjyqNPlYLbLjkrvX?b4a&C+) z3?u^`pt9HTu*gRBr}#>eJ1bq?B2>656MZ7g!)NPKpiNT8T8t`Zms7r@?dU&P9`uQh zRla#}fiFkr2VZD0PT$gZ6579~a@`1DTN-WA;h+02CUyIb^vrpbhg?wq(L=dM;%YiN z)!+x9)Ej!iZC+010+x^08abHfQ2V-7_FPN6D(RoiLfO-ZGW@WLZns%fpYn}9;ZN4r zl64;qT+!+0VJk@j`S0MOl{%(9DCIg1WuQ)clvv8zVGL(Nn`5!rM=tZ)Q3vUGJ`hev z=1`Rl3}8Kv-TTykft=7rFBKR~;PKgSv>&>-xF`2&sj8M5e|t}Vy7bg2v8&-B1D+HK z_9O9gCbkBN%`)J5ANk@;!7CxWA*BkvCFb4{!Pp?kdsi{({K3$5yQIzgi$t$iayT{Q zjG6|1U^K<^v)v=sZlF3;gT69S`aTK!(C2Q733lDE0E+?g_VIzON2NmCQkYL2ZtSDsQTI|l~=j;}y6=U(54wvnNM!3l)xUY)l8IZ!oU zvsqr`9Gyi)M-Tt{^=n|@i{tXp^IG*McoyRmSK@eS1*Nu$gk0_|PxY7+v;5|g9&>C9 zaufIx78K%j)0W1HA-+KNX8DebHY4dV8*ymMV~m+2@s@P;Sr{gI{rVV?#-o+EZkO0I@VtRWfB` zRApnh(r(6Exa>V- zrOb5bLB4gicYbVM(Xf>Fe=v0JQ)$ck{Y>bbV+1L)-l#CIGC;O@z8aB9Y z|BXp7B<0F}aga6WWEl7IJwxM3(pO)VB>sj+;Ac;aBbY3w>)mRLn*gaXDyBmFL1dzD zdoZt@`4}UuILb)NG#-t>k#UPE|% z!=X2_mhZSpu(tv2cn*?#0EFE@H|N2|%#mu_*1NUBQRgTdYF1fMfC(3>Oj!{pJ7}|` zWtp7|q2gQUL;j7b9`?EmSqceust8v>Mn?0x^TB>&!}mMkqV|w{IiQcGHs6gw7xK-y zR&3vI5aaFd?s5~BLjy5~Khjh2q^A#baYgIoj~5b`Rfbj@r&VHMWVo2gW*owbJ_lX@l7W= z+1YcD@sLQD3jVz5+23fJw8;pR&d%k_k0RoY-W}y8+Z2?P7*$xs4qBWr7uFaQUnGn3 zU9q@Cv=7II{^U;BRG%HMVs@7ug2eZvO{ylyrSSt>&1sxZ-<=cJ61jcdnx`k>%4 zaNQfxzKnopfLj?|;7udqyaxBt9T*pJDDp8cNf&XkfdQR0BGd3Q8bKS+jhaPBud6%` zHg+h?boXyI9ZKt7eEkM-G>l{J-@r_JY$_)5<`fly?RW|Qy`rLmoSa-)bo+@dy=v0A z{E)Pc<-7c)T5;#KUm(VURCaH5Z1W?R$XM#xYi%1jIXQT0B=Xch&%TKRDm<)yoiba$ z(+2OC!K$aJ8GBrsm1P0|I_f5EJD^Ek)I#=iQ!Z#aqCjMb~YL@8_)r$Cru0k#sqplxvqL`gfYm|3JPF^4KWq(+;U#J03{)y6#5PBd$8MFTbNwA=-U^dATc@KdsqS)Zkv^(sIzS4W$JC9(vB2hsebAw|$JUDwHsteCdD2j?n`WZgKsEN#=gejBmSqC-8 z3l8?%KN3Z@G40!80k3*frV18kOX^lPkR>;^e`7X~2R1L6Q3`T$C=`m(PlarL+qJf~ z7LW1Dl`EE(mOwZOrxsUMjxIY=kdwnX0ujo6Q6xSioDl=A`2Iz{R=flp3*E!E5qF-3fvDGq`S}2WYJMkX1u!6(*Dm&!tH`F^pv-5?YlQXYMQAHXAuU3jgO2xxZ{m(@Tjt% zmk-X$dgw(T-~HzGYyLz93FM1|@C-E+Ax`Vsde5&v-@ktk3c{D_SRE^I6Mc?Eij^xX zDG`+yX%#N}A0E2K%D)sy4wW%gRt`sRYO|7w-s(5z*0~4W6)PPb9Yx*#&E@&?Tq7bT zewH&76GP?>&FSTtqO?n%4`S7O9@)ElNJ(AV;Yf;zkQ}4!+*49iw5%9!Ym*6;Q8zS1 zLh%%|+e(ka^}37A{XOpFLlo}S4H z>oK3pxK{}XKIv=vm0WH>Vn#O)78e(vBF`GDtE-zBkc%%NiW(jsrlX@nH`s}%_upG} ztW6fQh0s=(iB!ye_ru0pElo|R=BaIH5EB#>6s=io;b)ERyyPRvO_?p>Nk@othjxTK zvpt+XFFV_!LQhH-iEJ?75=`#>cz_iA@Dk7X11HR=c~$<|sA>9Dd2D>VgzMIy(>tV+(P@6FH6SyYUJjb-$vT3%ijNbY@A&Z7@wdGdyrv(?yk zLIfBYA2CN?m#Q`>p0W|7Buq?9ggXI&lL?MM;HEO(1&6yloHu22u(Y()9e=;~m4lm` z5Mwrcm6w-;Bdq)J;fDz`rj=&S?jamfVb?7NLz(B#r{RVz-?BQip`xPd*_qwSedpdl zOT=>W8#}!}kCnZJp5f$8E|!Vf%a=Kl3>i&gM+6Xa3x>hmffYSJPMvyhlJlfPeZaz2JifljVGF2Xmiz}6YA7drE>h1;< zK8teN$^4EQ*@a{14($IiV@F0#KG^-uScR2LNl8iO^xoFfW?$S7swSnbo}QlIWSZ>O z{0VOUw5k|eMeeWZvXbe!@)&T98FQxyJ9RDY_)BGukLzWs$P!;IM4{dar|#OoeU|wY zN#qM2?P01?y>pozUu_5Je4baZ$9 zTpOnIW1gtS3|f%kVS9=szSbUgHOHD=2cagjQUfhX|iNV8u53RY(%*|a~T%OMV z+%knbW5&C@cMQO$2yGY~24iklaDMrdCpv^&@z1#8Ibyq7TBMrL{mw$o)Se$MoAd$p zO@8$Upl7@8NHBX4h}qhfXt1Z+ion$S&d$ymb$rfA{)J6n)=btF!Jb8|QbH_39`l2Z zDWgVDSf00U-};e+%JA{<%s2`!dPvyuT%trU=xq^FnskF-IzB!w(&9*M^os!Pyd=ZG z=%~OLBV4eia|~R&%>pun@guv-*e2a@UYGmDc$rA$yu8kdi4wQF25wW}B1pszt*?01 zr=+By&$GA@&6j2<_z?&M0ZH@L*k8McaJ{P8t|3bCKV@Jc5IWwspb5^!hw%NLB;qmx m@dbfJh=5` を参照。 + +- ``SpectrumQW``, ``SpectrumQL``, ``SpectrumQH`` **形式 :** 実数(デフォルトはともに\ ``0.0``) - **説明 :** 計算する動的グリーン関数の波数を Fractional + **説明 :** ``SpectrumType`` が ``"SzSz"``, ``"S+S-"``, ``"Density"``, ``"up"``, + ``"down"`` のときのみ使用。 + 計算する動的グリーン関数の波数を Fractional coordinateで指定する。 逆格子ベクトルは :numref:`fig_chap04_1_lattice`, :numref:`fig_chap04_1_honeycomb`, :numref:`fig_ladder`, :numref:`fig_kagome` diff --git a/doc/ja/source/fourier/overview.rst b/doc/ja/source/fourier/overview.rst index 365369a23..bbf6b501e 100644 --- a/doc/ja/source/fourier/overview.rst +++ b/doc/ja/source/fourier/overview.rst @@ -2,7 +2,10 @@ ==== 本資料は, mVMC および :math:`{\mathcal H}\Phi` で計算された -サイト表示の相関関数をFourier変換し, 出力するユーティリティに関するマニュアルである. +サイト表示の静的相関関数をFourier変換し, 出力するユーティリティと +:math:`{\mathcal H}\Phi` で計算された +サイト表示の動的相関関数をFourier変換し, 出力するユーティリティ +に関するマニュアルである. 要件 ---- @@ -60,3 +63,16 @@ &\equiv \frac{1}{N_{\bf R}} \sum_{\bf R}^{N_{\bf R}} e^{-i {\bf k}\cdot{\bf R}} \langle {\hat {\bf S}}_{{\bf 0}\alpha} \cdot {\hat {\bf S}}_{{\bf R}\beta} \rangle \end{align} + +動的相関 + + \begin{align} + \langle {\hat X}_{{\bf k} \alpha \uparrow}^{\dagger} {\hat X}_{{\bf k} \beta \uparrow}\rangle (\omega) + &\equiv \sum_{\bf R}^{N_{\bf R}} e^{-i {\bf k}\cdot{\bf R}} + \langle {\hat X}_{{\bf R} \alpha \uparrow}^{\dagger} + (\omega - {\hat H})^{-1} + {\hat X}_{{\bf 0} \beta \uparrow}\rangle + \end{align} + +励起演算子 :math:`{\hat X}` は任意のものを指定できる。 +スタンダードモードでは上記の1体相関、2体相関の励起演算子を自動的に生成できる。 diff --git a/doc/ja/source/fourier/tutorial.rst b/doc/ja/source/fourier/tutorial.rst index fc62154b2..21e83c16d 100644 --- a/doc/ja/source/fourier/tutorial.rst +++ b/doc/ja/source/fourier/tutorial.rst @@ -1,7 +1,7 @@ .. _tutorial: -チュートリアル -============== +静的相関関数についてのチュートリアル +==================================== このチュートリアルでは, 正方格子ハバードモデル(8サイト)を例にとり説明する. @@ -125,3 +125,86 @@ gnuplotを使って, - kpath.gp (:ref:`gnuplot`) - output/zvo_corr*.dat (:ref:`zvocorr`) + +動的相関関数についてのチュートリアル +==================================== + +このチュートリアルでは, 1次元ハイゼンベルグ模型モデル(12サイト)を例にとり説明する. + +HPhi の実行 +----------- + +基底状態および相関関数の計算を行う. +入力ファイルは次の通り. + +:: + + model = Spin + lattice = Chain + method = CG + L = 12 + 2Sz = 0 + J = 1.0 + CalcSpec = Scratch + SpectrumType = SzSz_r + OmegaIm = 0.1 + OmegaMin = -6.0 + OmegaMax = -2.0 + +.. code-block:: bash + + $ HPhi -s input + +これにより, カレントディレクトリの ``output/`` 以下に +動的相関関数が出力される. + +関連するファイル + +- stan.in (mVMC/:math:`{\mathcal H}\Phi` のマニュアル参照) + +相関関数のフーリエ変換 +---------------------- + +ユーティリティプログラム ``dynamicalr2k`` を使って, +相関関数をフーリエ変関する. + +.. code-block:: bash + + $ echo "4 20 + G 0 0 0 + X 0.5 0 0 + M 0.5 0.5 0 + G 0 0 0" >> geometry.dat + $ dynamicalr2k namelist.def geometry.dat + +これにより, カレントディレクトリの ``output/`` 以下に +フーリエ変換された相関関数が出力される. + +関連するファイル + +- output/zvo_DynamicalGreen.dat +- geometry.dat (:ref:`geometry`) +- output/zvo_dyn.dat + +相関関数のプロット +------------------ + +gnuplotを使って, +相関関数を :math:`k` 空間でプロットする. + +:: + + load "kpath.gp" + splot "output/zvo_dyn.dat" u 1:2:(-$4) w l + +.. _dynamicalr2gpng: + +.. figure:: ../../../figs/dynamicalr2g.png + + 相関関数 :math:`\langle{\bf S}_{\bf k}\cdot{\bf S}_{\bf k}\rangle(\omega)` の虚部(4列目)を + プロットした図. + +関連するファイル + +- kpath.gp (:ref:`gnuplot`) +- output/zvo_dyn.dat diff --git a/doc/ja/source/fourier/util.rst b/doc/ja/source/fourier/util.rst index c57afb476..c23815bf7 100644 --- a/doc/ja/source/fourier/util.rst +++ b/doc/ja/source/fourier/util.rst @@ -7,7 +7,7 @@ $ ${PATH}/greenr2k ${NAMELIST} ${GEOMETRY} -ここで, ``${PATH}`` は ``fourier`` ユーティリティのバイナリのあるディレクトリのパス, +ここで, ``${PATH}`` は ``greenr2k`` ユーティリティのバイナリのあるディレクトリのパス, ${NAMELIST}は :math:`{\mathcal H}\Phi`/mVMC の NameList インプットファイル名, ${GEOMETRY}は :ref:`geometry` ファイルへのパスである. @@ -22,7 +22,7 @@ HPhi-Lanczos この場合に ``HPhi`` が ``output/`` ディレクトリに出力するサイト表示の相関関数は, ``zvo_cisajs.dat`` (1体), ``zvo_cisajscktalt.dat`` (2体)である. -``fourier`` ユーティリティーは, これらを読み込みFourier変換を行った後, +``greenr2k`` ユーティリティーは, これらを読み込みFourier変換を行った後, 単一のファイル ``zvo_corr.dat`` を ``output/`` ディレクトリに出力する. HPhi-TPQ @@ -31,7 +31,7 @@ HPhi-TPQ この場合に ``HPhi`` は, 各試行/TPQステップ毎に ``zvo_cisajs_run*step*.dat`` (1体), ``zvo_cisajscktalt_run*step*.dat`` (2体)というファイルを ``output/`` ディレクトリに出力する. -``fourier`` ユーティリティーは, 各試行/TPQステップ毎に +``greenr2k`` ユーティリティーは, 各試行/TPQステップ毎に 1体および2体の相関関数を読み込みFourier変換を行った後, ``zvo_corr_run*step*.dat`` という名前のファイルとして ``output/`` ディレクトリに出力する. @@ -41,7 +41,7 @@ HPhi-全対角化およびLOBCG この場合に ``HPhi`` は, 各波動関数ごとに ``zvo_cisajs_eigen*.dat`` (1体), ``zvo_cisajscktalt_eigen*.dat`` (2体)というファイルを ``output/`` ディレクトリに出力する. -``fourier`` ユーティリティーは, 各波動関数ごとに +``greenr2k`` ユーティリティーは, 各波動関数ごとに 1体および2体の相関関数を読み込みFourier変換を行った後, ``zvo_corr_eigen*.dat`` という名前のファイルとして ``output/`` ディレクトリに出力する. @@ -53,7 +53,7 @@ mVMC 試行を行いインデックスをつけられた ``zvo_cisajs_???.dat`` (1体), ``zvo_cisajscktalt_???.dat`` (2体)というファイルを ``output/`` ディレクトリに出力する. -``fourier`` ユーティリティーはそれらのファイルを読み込み, +``greenr2k`` ユーティリティーはそれらのファイルを読み込み, 各試行に対してFourier変換を行った後, それらの実部, 虚部ごとに平均値 @@ -75,3 +75,20 @@ mVMC を計算し, 平均値と誤差を含んだ単一のファイル ``zvo_corr_eigen*.dat`` を ``output/`` ディレクトリに出力する. +``dynamicalr2k`` ユーティリティの動作について +============================================= + +このユーティリティーは, 次のようにして使う. + +.. code-block:: bash + + $ ${PATH}/dynamicalr2k ${NAMELIST} ${GEOMETRY} + +ここで, ``${PATH}`` は ``dynamicalr2k`` ユーティリティのバイナリのあるディレクトリのパス, +${NAMELIST}は :math:`{\mathcal H}\Phi`/mVMC の NameList インプットファイル名, +${GEOMETRY}は :ref:`geometry` ファイルへのパスである. +この場合に ``HPhi`` が ``output/`` ディレクトリに出力するサイト表示の動的相関関数は, +``zvo_DynamicalGreen.dat`` である. +``greenr2k`` ユーティリティーは, これらを読み込みFourier変換を行った後, +単一のファイル ``zvo_corr.dat`` を ``output/`` ディレクトリに出力する. + diff --git a/src/readdef.c b/src/readdef.c index 4293d5fff..76a2db2ec 100644 --- a/src/readdef.c +++ b/src/readdef.c @@ -267,7 +267,10 @@ int ReadcalcmodFile( } if(CheckWords(ctmp, "CalcType")==0){ X->iCalcType=itmp; - if (X->iCalcType == Lanczos)X->iCalcType = CG; + if (X->iCalcType == Lanczos) { + fprintf(stdoutMPI, " LOBPCG is used alternative to Lanczos.\n"); + X->iCalcType = CG; + } } else if(CheckWords(ctmp, "FlgFiniteTemperature")==0){ X->iFlgFiniteTemperature = itmp; From 4407225c60ed89a546a8bf15d1c597092b75e546 Mon Sep 17 00:00:00 2001 From: mitsuaki1987 Date: Fri, 12 Apr 2019 10:53:32 +0900 Subject: [PATCH 33/50] Implement CMA --- src/CMakeLists.txt | 2 +- src/StdFace/ChainLattice.c | 8 +- src/mltplyMPIBoost.c | 429 +++++++++++++++++++++++++++++++++++++ src/mltplyMPIBoost.h | 31 +++ src/mltplySpin.c | 52 +++++ 5 files changed, 517 insertions(+), 5 deletions(-) create mode 100644 src/mltplyMPIBoost.c create mode 100644 src/mltplyMPIBoost.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c74b7ac6c..61745c769 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,7 +12,7 @@ add_definitions(-DDSFMT_MEXP=19937) set(SOURCES FileIO.c HPhiMain.c HPhiTrans.c bitcalc.c check.c CheckMPI.c dSFMT.c diagonalcalc.c global.c log.c input.c output.c output_list.c readdef.c sz.c xsetmem.c ErrorMessage.c LogMessage.c ProgressMessage.c wrapperMPI.c splash.c time.c eigenIO.c) -set(SOURCES_MLTPLY Multiply.c mltply.c mltplySpin.c mltplyHubbard.c mltplyMPIHubbard.c mltplyMPISpin.c mltplyHubbardCore.c mltplySpinCore.c mltplyMPIHubbardCore.c mltplyMPISpinCore.c) +set(SOURCES_MLTPLY Multiply.c mltply.c mltplySpin.c mltplyHubbard.c mltplyMPIHubbard.c mltplyMPISpin.c mltplyHubbardCore.c mltplySpinCore.c mltplyMPIHubbardCore.c mltplyMPISpinCore.c mltplyMPIBoost.c) set(SOURCES_CG CalcByLOBPCG.c ) set(SOURCES_TPQ CalcByTPQ.c FirstMultiply.c) set(SOURCES_DIAG CalcByFullDiag.c lapack_diag.c matrixlapack.c matrixlapack_magma.c matrixscalapack.c ) diff --git a/src/StdFace/ChainLattice.c b/src/StdFace/ChainLattice.c index 0d2730258..63a3e13e8 100644 --- a/src/StdFace/ChainLattice.c +++ b/src/StdFace/ChainLattice.c @@ -92,7 +92,7 @@ void StdFace_Chain( StdFace_PrintVal_d("D", &StdI->D[2][2], 0.0); StdFace_InputSpinNN(StdI->J, StdI->JAll, StdI->J0, StdI->J0All, "J0"); StdFace_InputSpinNN(StdI->Jp, StdI->JpAll, StdI->J0p, StdI->J0pAll, "J0'"); - StdFace_InputSpinNN(StdI->Jpp, StdI->JppAll, StdI->J0pp, StdI->J0ppAll, "J0'"); + StdFace_InputSpinNN(StdI->Jpp, StdI->JppAll, StdI->J0pp, StdI->J0ppAll, "J0''"); /**/ StdFace_NotUsed_d("mu", StdI->mu); StdFace_NotUsed_d("U", StdI->U); @@ -261,11 +261,11 @@ void StdFace_Chain_Boost(struct StdIntList *StdI) 0.25 * StdI->J0[2][0], 0.25 * StdI->J0[2][1], 0.25 * StdI->J0[2][2]); fprintf(fp, "# J 2\n"); fprintf(fp, "%25.15e %25.15e %25.15e\n", - 0.25 * StdI->Jp[0][0], 0.25 * StdI->Jp[0][1], 0.25 * StdI->Jp[0][2]); + 0.25 * StdI->J0p[0][0], 0.25 * StdI->J0p[0][1], 0.25 * StdI->J0p[0][2]); fprintf(fp, "%25.15e %25.15e %25.15e\n", - 0.25 * StdI->Jp[1][0], 0.25 * StdI->Jp[1][1], 0.25 * StdI->Jp[1][2]); + 0.25 * StdI->J0p[1][0], 0.25 * StdI->J0p[1][1], 0.25 * StdI->J0p[1][2]); fprintf(fp, "%25.15e %25.15e %25.15e\n", - 0.25 * StdI->Jp[2][0], 0.25 * StdI->Jp[2][1], 0.25 * StdI->Jp[2][2]); + 0.25 * StdI->J0p[2][0], 0.25 * StdI->J0p[2][1], 0.25 * StdI->J0p[2][2]); /* Topology */ diff --git a/src/mltplyMPIBoost.c b/src/mltplyMPIBoost.c new file mode 100644 index 000000000..51c63ce67 --- /dev/null +++ b/src/mltplyMPIBoost.c @@ -0,0 +1,429 @@ +/* HPhi - Quantum Lattice Model Simulator */ +/* Copyright (C) 2015 The University of Tokyo */ + +/* This program is free software: you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation, either version 3 of the License, or */ +/* (at your option) any later version. */ + +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ + +/* You should have received a copy of the GNU General Public License */ +/* along with this program. If not, see . */ + +//Define Mode for mltply +// complex version + +#ifdef MPI +#include "mpi.h" +#endif +#include "Common.h" +#include "common/setmemory.h" +#include "wrapperMPI.h" + +void zgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, double complex *ALPHA, double complex *matJL, int *LDA, double complex *arrayz, int *LDB, double complex *BETA, double complex *arrayx, int *LDC); + +/** + * + * Exchange term in Spin model + * + * @author Mitsuaki Kawamura (The University of Tokyo) + * @author Youhei Yamaji (The University of Tokyo) + */ +void child_general_int_spin_MPIBoost( + struct BindStruct *X /**< [inout]*/, + double complex *tmp_v0 /**< [out] Result v0 = H v1*/, + double complex *tmp_v1 /**< [in] v0 = H v1*/, + double complex *tmp_v2 /**< [inout] bufffer*/, + double complex *tmp_v3 /**< [inout] bufffer*/ + ) +{ +#ifdef MPI + + //double complex dam_pr = 0; + // MPI_Status statusMPI; + + // int ierr; + // int INFO; + char TRANSA, TRANSB; + int M, N, K, LDA, LDB, LDC; + double complex ALPHA, BETA; + long unsigned int i_max; + long unsigned int j, k, ell, iloop; + long unsigned int i1, i2; + long unsigned int iomp; + long unsigned int ell4, ell5, ell6, m0, Ipart1; + long unsigned int mi, mj, mri, mrj, mrk, mrl; + int indj; + long unsigned int ellrl, ellrk, ellrj, ellri, elli1, elli2, ellj1, ellj2; + long unsigned int iSS1, iSS2, iSSL1, iSSL2; + double complex **vecJ; + double complex **matJ, **matJ2; + double complex *matJL; + double complex *matI; + double complex **matB; + double complex *arrayz; + double complex *arrayx; + double complex *arrayw; + long unsigned int ishift1, ishift2, ishift3, ishift4, ishift5, pivot_flag, num_J_star; + long unsigned int pow4, pow5, pow41, pow51; + //long unsigned int pow1, pow2, pow3, pow4, pow5, pow11, pow21, pow31, pow41, pow51; + + i_max = X->Check.idim_max; + +/* +//zero clear + #pragma omp parallel for default(none) private(j) \ + shared(i_max,tmp_v0) + for(j=0;jBoost.W0, X->Boost.R0, X->Boost.num_pivot, X->Boost.ishift_nspin, X->Boost.list_6spin_star, X->Boost.list_6spin_pair, 1, X->Boost.arrayJ, X->Boost.vecB); + + for(iloop=0; iloop < X->Boost.R0; iloop++){ + + + for(j=iloop*X->Boost.num_pivot; j < (iloop+1)*X->Boost.num_pivot; j++){ + + num_J_star = (long unsigned int)X->Boost.list_6spin_star[j][0]; //(0,j) + ishift1 = (long unsigned int)X->Boost.list_6spin_star[j][1]; //(1,j) + ishift2 = (long unsigned int)X->Boost.list_6spin_star[j][2]; //(2,j) + ishift3 = (long unsigned int)X->Boost.list_6spin_star[j][3]; //(3,j) + ishift4 = (long unsigned int)X->Boost.list_6spin_star[j][4]; //(4,j) + ishift5 = (long unsigned int)X->Boost.list_6spin_star[j][5]; //(5,j) + pivot_flag = (long unsigned int)X->Boost.list_6spin_star[j][6]; //(6,j) + //pow1 = (int)pow(2.0,ishift1); + //pow2 = (int)pow(2.0,ishift1+ishift2); + //pow3 = (int)pow(2.0,ishift1+ishift2+ishift3); + pow4 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4); + pow5 = (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5); + //pow11= (int)pow(2.0,ishift1+1); + //pow21= (int)pow(2.0,ishift1+ishift2+1); + //pow31= (int)pow(2.0,ishift1+ishift2+ishift3+1); + pow41= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+1); + pow51= (int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+1); + + for(k=0; k < (64*64); k++){ + matJL[k] = 0.0 + 0.0*I; + matI[k] = 0.0 + 0.0*I; + } + for(k=0; k < 64; k++){ + matI[k+64*k] = 1.0; + } + + for(ell=0; ell < num_J_star; ell++){ + mi = (long unsigned int)X->Boost.list_6spin_pair[j][0][ell]; //(1,ell,j) + mj = (long unsigned int)X->Boost.list_6spin_pair[j][1][ell]; //(2,ell,j) + mri = (long unsigned int)X->Boost.list_6spin_pair[j][2][ell]; //(3,ell,j) + mrj = (long unsigned int)X->Boost.list_6spin_pair[j][3][ell]; //(4,ell,j) + mrk = (long unsigned int)X->Boost.list_6spin_pair[j][4][ell]; //(5,ell,j) + mrl = (long unsigned int)X->Boost.list_6spin_pair[j][5][ell]; //(6,ell,j) + indj = X->Boost.list_6spin_pair[j][6][ell]; //(7,ell,j) + for(i1 = 0; i1 < 3; i1++){ + for(i2 = 0; i2 < 3; i2++){ + vecJ[i1][i2] = X->Boost.arrayJ[(indj-1)][i1][i2]; + } + } + //matJSS(1,1) = vecJ(3,3) + matJ[0][0] = vecJ[2][2]; + //matJSS(1,2)= vecJ(1,1)-vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1) + matJ[0][1] = vecJ[0][0]-vecJ[1][1]-I*vecJ[0][1]-I*vecJ[1][0]; + //matJSS(1,3)= vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2) + matJ[0][2] = vecJ[2][0]-I*vecJ[2][1]; + //matJSS(1,4)= vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3) + matJ[0][3] = vecJ[0][2]-I*vecJ[1][2]; + //matJSS(2,1)= vecJ(1,1)-vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1) + matJ[1][0] = vecJ[0][0]-vecJ[1][1]+I*vecJ[0][1]+I*vecJ[1][0]; + //matJSS(2,2)= vecJ(3,3) + matJ[1][1] = vecJ[2][2]; + //matJSS(2,3)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)-dcmplx(0.0d0,1.0d0)*vecJ(2,3) + matJ[1][2] =(-1.0)*vecJ[0][2]-I*vecJ[1][2]; + //matJSS(2,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)-dcmplx(0.0d0,1.0d0)*vecJ(3,2) + matJ[1][3] =(-1.0)*vecJ[2][0]-I*vecJ[2][1]; + //matJSS(3,1)= vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2) + matJ[2][0] = vecJ[2][0]+I*vecJ[2][1]; + //matJSS(3,2)=dcmplx(-1.0d0,0.0d0)*vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3) + matJ[2][1] =(-1.0)*vecJ[0][2]+I*vecJ[1][2]; + //matJSS(3,3)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3) + matJ[2][2] =(-1.0)*vecJ[2][2]; + //matJSS(3,4)= vecJ(1,1)+vecJ(2,2)+dcmplx(0.0d0,1.0d0)*vecJ(1,2)-dcmplx(0.0d0,1.0d0)*vecJ(2,1) + matJ[2][3] = vecJ[0][0]+vecJ[1][1]+I*vecJ[0][1]-I*vecJ[1][0]; + //matJSS(4,1)= vecJ(1,3)+dcmplx(0.0d0,1.0d0)*vecJ(2,3) + matJ[3][0] = vecJ[0][2]+I*vecJ[1][2]; + //matJSS(4,2)=dcmplx(-1.0d0,0.0d0)*vecJ(3,1)+dcmplx(0.0d0,1.0d0)*vecJ(3,2) + matJ[3][1] =(-1.0)*vecJ[2][0]+I*vecJ[2][1]; + //matJSS(4,3)= vecJ(1,1)+vecJ(2,2)-dcmplx(0.0d0,1.0d0)*vecJ(1,2)+dcmplx(0.0d0,1.0d0)*vecJ(2,1) + matJ[3][2] = vecJ[0][0]+vecJ[1][1]-I*vecJ[0][1]+I*vecJ[1][0]; + //matJSS(4,4)=dcmplx(-1.0d0,0.0d0)*vecJ(3,3) + matJ[3][3] =(-1.0)*vecJ[2][2]; + + matJ2[3][3] = matJ[0][0]; + matJ2[3][0] = matJ[0][1]; + matJ2[3][1] = matJ[0][2]; + matJ2[3][2] = matJ[0][3]; + matJ2[0][3] = matJ[1][0]; + matJ2[0][0] = matJ[1][1]; + matJ2[0][1] = matJ[1][2]; + matJ2[0][2] = matJ[1][3]; + matJ2[1][3] = matJ[2][0]; + matJ2[1][0] = matJ[2][1]; + matJ2[1][1] = matJ[2][2]; + matJ2[1][2] = matJ[2][3]; + matJ2[2][3] = matJ[3][0]; + matJ2[2][0] = matJ[3][1]; + matJ2[2][1] = matJ[3][2]; + matJ2[2][2] = matJ[3][3]; + + for(ellri=0; ellri<2; ellri++){ + for(ellrj=0; ellrj<2; ellrj++){ + for(ellrk=0; ellrk<2; ellrk++){ + for(ellrl=0; ellrl<2; ellrl++){ + for(elli1=0; elli1<2; elli1++){ + for(ellj1=0; ellj1<2; ellj1++){ + for(elli2=0; elli2<2; elli2++){ + for(ellj2=0; ellj2<2; ellj2++){ + + iSSL1 = elli1*(int)pow(2,mi) + ellj1*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl); + iSSL2 = elli2*(int)pow(2,mi) + ellj2*(int)pow(2,mj) + ellri*(int)pow(2,mri) + ellrj*(int)pow(2,mrj) + ellrk*(int)pow(2,mrk) + ellrl*(int)pow(2,mrl); + iSS1 = elli1 + 2*ellj1; + iSS2 = elli2 + 2*ellj2; + matJL[iSSL1+64*iSSL2] += matJ2[iSS1][iSS2]; + } + } + } + } + } + } + } + } + + + }/* loop for ell */ + + /* external magnetic field B */ + if(pivot_flag==1){ + matB[0][0] = + X->Boost.vecB[2]; // -BM + matB[1][1] = - X->Boost.vecB[2]; // -BM + //matB[0][1] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM + //matB[1][0] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM + matB[0][1] = - X->Boost.vecB[0] - I*X->Boost.vecB[1]; // -BM + matB[1][0] = - X->Boost.vecB[0] + I*X->Boost.vecB[1]; // -BM + for(ellri=0; ellri<2; ellri++){ + for(ellrj=0; ellrj<2; ellrj++){ + for(ellrk=0; ellrk<2; ellrk++){ + for(ellrl=0; ellrl<2; ellrl++){ + for(ellj1=0; ellj1<2; ellj1++){ + for(elli1=0; elli1<2; elli1++){ + for(elli2=0; elli2<2; elli2++){ + for(ellj2=0; ellj2Boost.ishift_nspin; ellj2++){ + iSSL1 = elli1*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6)); + iSSL2 = elli2*(int)pow(2,ellj2) + ellj1*(int)pow(2,((ellj2+1)%6)) + ellri*(int)pow(2,((ellj2+2)%6)) + ellrj*(int)pow(2,((ellj2+3)%6)) + ellrk*(int)pow(2,((ellj2+4)%6)) + ellrl*(int)pow(2,((ellj2+5)%6)); + matJL[iSSL1+64*iSSL2] += matB[elli1][elli2]; + } + } + } + } + } + } + } + } + } + /* external magnetic field B */ + + iomp=i_max/(int)pow(2.0,ishift1+ishift2+ishift3+ishift4+ishift5+2); + + #pragma omp parallel default(none) private(arrayx,arrayz,arrayw,ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \ + shared(matJL,matI,iomp,i_max,myrank,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,tmp_v0,tmp_v1,tmp_v3) + { + + arrayx = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1))); + arrayz = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1))); + arrayw = cd_1d_allocate(64*((int)pow(2.0,ishift4+ishift5-1))); + +#pragma omp for + for(ell6 = 0; ell6 < iomp; ell6++){ + Ipart1=pow51*2*ell6; + for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){ + for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){ + for(m0 = 0; m0 < 16; m0++){ + arrayz[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]; + arrayz[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]; + arrayz[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]; + arrayz[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]; + tmp_v3[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]; + tmp_v3[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]; + tmp_v3[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]; + tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]=tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]; + arrayx[(0 + m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4 +pow41*ell5+Ipart1)]; + arrayx[(16+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4 +pow41*ell5+Ipart1)]; + arrayx[(32+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow5 +pow41*ell5+Ipart1)]; + arrayx[(48+ m0 +64*(ell4+ell5*(int)pow(2.0,ishift4-1)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+Ipart1)]; + } + } + } + + + for(ell5 = 0; ell5 < (int)pow(2.0, ishift5-1); ell5++){ + for(ell4 = 0; ell4 < (int)pow(2.0, ishift4-1); ell4++){ + for(m0 = 0; m0 < 16; m0++){ + arrayz[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)]; + arrayz[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)]; + arrayz[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)]; + arrayz[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)]; + tmp_v3[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)]; + tmp_v3[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)]; + tmp_v3[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)]; + tmp_v3[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)] = tmp_v1[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)]; + arrayx[(0 + m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4 +pow41*ell5+pow51+Ipart1)]; + arrayx[(16+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4 +pow41*ell5+pow51+Ipart1)]; + arrayx[(32+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow5 +pow41*ell5+pow51+Ipart1)]; + arrayx[(48+ m0+64*(ell4+ell5*(int)pow(2.0,ishift4-1)+(int)pow(2.0,ishift4+ishift5-2)))] = tmp_v0[(1 + m0+16*ell4+pow4+pow5+pow41*ell5+pow51+Ipart1)]; + } + + } + } + + TRANSA = 'N'; + TRANSB = 'N'; + M = 64; + N = (int)pow(2.0, ishift4+ishift5-1); + K = 64; + ALPHA = 1.0; + LDA = 64; + LDB = 64; + BETA = 1.0; + LDC = 64; + + zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matJL,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC); + //zgemm_(&TRANSA,&TRANSB,&M,&N,&K,&ALPHA,matI,&LDA,arrayz,&LDB,&BETA,arrayx,&LDC); +/* + for(ell5=0;ell5<(64*N);ell5++){ + arrayw[ell5]=0.0; + } + for(ell5=0;ell5<64;ell5++){ + for(ell4=0;ell4<64;ell4++){ + for(m0=0;m0Boost.ishift_nspin); + #pragma omp parallel for default(none) private(ell4,ell5,ell6,m0,Ipart1,TRANSA,TRANSB,M,N,K,LDA,LDB,LDC,ALPHA,BETA) \ + firstprivate(iomp) shared(i_max,ishift1,ishift2,ishift3,ishift4,ishift5,pow4,pow5,pow41,pow51,X,tmp_v0,tmp_v1) + for(ell5 = 0; ell5 < iomp; ell5++ ){ + for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){ + tmp_v0[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v1[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)]; + } + } + iomp=i_max/(int)pow(2.0,X->Boost.ishift_nspin); + #pragma omp parallel for default(none) private(ell4,ell5) \ + firstprivate(iomp) shared(i_max,X,tmp_v1,tmp_v3) + for(ell5 = 0; ell5 < iomp; ell5++ ){ + for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.ishift_nspin); ell4++){ + tmp_v1[(1 + ell5+(i_max/(int)pow(2.0,X->Boost.ishift_nspin))*ell4)] = tmp_v3[(1 + ell4+((int)pow(2.0,X->Boost.ishift_nspin))*ell5)]; + } + } + } + else{ + #pragma omp parallel for default(none) private(ell4) \ + shared(i_max,tmp_v0,tmp_v1,tmp_v3) + for(ell4 = 0; ell4 < i_max; ell4++ ){ + tmp_v0[1 + ell4] = tmp_v1[1 + ell4]; + tmp_v1[1 + ell4] = tmp_v3[1 + ell4]; + } + }/* if pivot_flag */ + + }/* loop for j */ + + /* + ierr = MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); + ierr = MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); + */ + MPI_Alltoall(&tmp_v1[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v3[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); + MPI_Alltoall(&tmp_v0[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,&tmp_v2[1],(int)(i_max/nproc),MPI_DOUBLE_COMPLEX,MPI_COMM_WORLD); + + + iomp=(int)pow(2.0,X->Boost.W0)/nproc; + #pragma omp parallel for default(none) private(ell4,ell5,ell6) \ + firstprivate(iomp) shared(i_max,X,nproc,tmp_v0,tmp_v1,tmp_v2,tmp_v3) + //for(ell4 = 0; ell4 < (int)pow(2.0,X->Boost.W0)/nproc; ell4++ ){ + for(ell4 = 0; ell4 < iomp; ell4++ ){ + for(ell5 = 0; ell5 < nproc; ell5++ ){ + for(ell6 = 0; ell6 < (int)(i_max/(int)pow(2.0,X->Boost.W0)); ell6++ ){ + tmp_v1[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v3[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)]; + tmp_v0[(1 + ell6+ell5*i_max/(int)pow(2.0,X->Boost.W0)+ell4*i_max/((int)pow(2.0,X->Boost.W0)/nproc))] = tmp_v2[(1 + ell6+ell4*i_max/(int)pow(2.0,X->Boost.W0)+ell5*i_max/nproc)]; + } + } + } + + + }/* loop for iloop */ + +/* + dam_pr= X_child_general_int_spin_MPIBoost + ( + matJ, X, tmp_v0, tmp_v1); + + X->Large.prdct += dam_pr; +*/ +// c_free1(arrayz, (int)pow(2.0, 16)); +// c_free1(arrayx, (int)pow(2.0, 16)); +// c_free1(arrayw, (int)pow(2.0, 16)); + + free_cd_2d_allocate(vecJ); + free_cd_2d_allocate(matJ); + free_cd_2d_allocate(matJ2); + free_cd_2d_allocate(matB); + free_cd_1d_allocate(matJL); + free_cd_1d_allocate(matI); +#endif + +}/*void child_general_int_spin_MPIBoost*/ + diff --git a/src/mltplyMPIBoost.h b/src/mltplyMPIBoost.h new file mode 100644 index 000000000..82403308c --- /dev/null +++ b/src/mltplyMPIBoost.h @@ -0,0 +1,31 @@ +/* HPhi - Quantum Lattice Model Simulator */ +/* Copyright (C) 2015 The University of Tokyo */ + +/* This program is free software: you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation, either version 3 of the License, or */ +/* (at your option) any later version. */ + +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ + +/* You should have received a copy of the GNU General Public License */ +/* along with this program. If not, see . */ + +//Define Mode for mltply +// complex version + +#pragma once +#include +#include "struct.h" + +void child_general_int_spin_MPIBoost +( + struct BindStruct *X, + double complex *tmp_v0, + double complex *tmp_v1, + double complex *tmp_v2, + double complex *tmp_v3 +); diff --git a/src/mltplySpin.c b/src/mltplySpin.c index 1a28b84ba..10d520111 100644 --- a/src/mltplySpin.c +++ b/src/mltplySpin.c @@ -165,6 +165,7 @@ General on-site term #include "mltplyHubbardCore.h" #include "mltplyMPISpin.h" #include "mltplyMPISpinCore.h" +#include "mltplyMPIBoost.h" /** @brief Driver function for Spin hamiltonian @return error code @@ -374,6 +375,54 @@ shared(tmp_v0,tmp_v1,list_1,list_2_1,list_2_2,one,nstate) return 0; }/*int mltplyGeneralSpin*/ /** +@brief Driver function for Spin hamiltonian (Boost) +@return error code +@author Kazuyoshi Yoshimi (The University of Tokyo) +*/ +int mltplySpinGCBoost( + struct BindStruct *X,//!<[inout] + int nstate, + double complex **tmp_v0,//!<[inout] Result vector + double complex **tmp_v1//!<[in] Input producted vector +) +{ + int istate; + /* SpinGCBoost */ + double complex* tmp_v2, *tmp_tmp_v0, *tmp_tmp_v1; + double complex* tmp_v3; + /* SpinGCBoost */ + + long unsigned int i_max, idim; + i_max = X->Check.idim_max; + + StartTimer(500); + tmp_tmp_v0 = cd_1d_allocate(i_max + 1); + tmp_tmp_v1 = cd_1d_allocate(i_max + 1); + tmp_v2 = cd_1d_allocate(i_max + 1); + tmp_v3 = cd_1d_allocate(i_max + 1); + + for (istate = 0; istate < nstate; istate++) { + for (idim = 1; idim <= i_max; idim++) { + tmp_tmp_v0[idim] = tmp_v0[idim][istate]; + tmp_tmp_v1[idim] = tmp_v1[idim][istate]; + } + child_general_int_spin_MPIBoost(X, tmp_tmp_v0, tmp_tmp_v1, tmp_v2, tmp_v3); + for (idim = 1; idim <= i_max; idim++) { + tmp_v0[idim][istate] = tmp_tmp_v0[idim]; + tmp_v1[idim][istate] = tmp_tmp_v1[idim]; + } + } + + /* SpinGCBoost */ + free_cd_1d_allocate(tmp_tmp_v0); + free_cd_1d_allocate(tmp_tmp_v1); + free_cd_1d_allocate(tmp_v2); + free_cd_1d_allocate(tmp_v3); + /* SpinGCBoost */ + StopTimer(500); + return 0; +}/*int mltplySpinGCBoost*/ +/** @brief Driver function for Spin hamiltonian @return error code @author Kazuyoshi Yoshimi (The University of Tokyo) @@ -391,6 +440,9 @@ int mltplySpinGC( if(iret != 0) return iret; + if (X->Boost.flgBoost == 1) + iret = mltplySpinGCBoost(X, nstate, tmp_v0, tmp_v1); + return iret; }/*int mltplySpinGC*/ /** From c203e2b978a27345ba2166f55325db6cca61dfe1 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Wed, 5 Oct 2022 15:15:46 +0900 Subject: [PATCH 34/50] Restore the deleted submodule --- .gitmodules | 3 +++ src/StdFace | 1 + 2 files changed, 4 insertions(+) create mode 160000 src/StdFace diff --git a/.gitmodules b/.gitmodules index e69de29bb..7f31e9e5d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "src/StdFace"] + path = src/StdFace + url = https://github.com/issp-center-dev/StdFace diff --git a/src/StdFace b/src/StdFace new file mode 160000 index 000000000..92967761b --- /dev/null +++ b/src/StdFace @@ -0,0 +1 @@ +Subproject commit 92967761b56e6ddf69b1039102dbf0d09256242c From ebf21cdfc02d18be175a4741233e8c52686d3c52 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Tue, 18 Oct 2022 16:26:17 +0900 Subject: [PATCH 35/50] To pass the tests. lobcg_kondo*.sh: Indices for itenerant was incorrect. this was fixed before, but not applied in these tests. spectrum_spin_kagome.sh: S+S- excitation lobcg_genspin_ladder.sh: D-term with general spin spectrum_spin_kagome.sh --- src/CalcByCanonicalTPQ.c | 332 +-- src/CalcByLanczos.c | 16 +- src/CalcByTEM.c | 3 +- src/FirstMultiply.c | 6 +- src/HPhiMain.c | 9 + src/Lanczos_EigenValue.c | 44 +- src/MakeIniVec.c | 51 +- src/StdFace | 2 +- src/check.c | 15 + src/expec_cisajs.c | 23 +- src/expec_cisajscktaltdc.c | 309 ++- src/expec_energy_flct.c | 2 + src/expec_totalspin.c | 18 +- src/include/Lanczos_EigenValue.h | 6 +- src/include/MakeIniVec.h | 1 - src/include/expec_cisajscktaltdc.h | 4 +- src/include/expec_totalspin.h | 2 +- src/input.c | 4 +- src/mltplyMPISpinCore.c | 41 +- src/mltplySpin.c | 2 +- src/output.c | 4 +- src/readdef.c | 5 - src/xsetmem.c | 8 +- test/lobcg_genspin_ladder.sh | 2144 ++++++++++---------- test/lobcg_kondo_chain.sh | 488 ++--- test/lobcg_kondogc_chain.sh | 732 +++---- test/spectrum_spin_kagome.sh | 4 +- test/te_hubbard_chain_interall.sh | 2 +- test/te_hubbard_chain_interall_diagonal.sh | 2 +- test/te_kondo_chain_interall.sh | 2 +- test/te_spin_chain_interall.sh | 2 +- 31 files changed, 2220 insertions(+), 2063 deletions(-) diff --git a/src/CalcByCanonicalTPQ.c b/src/CalcByCanonicalTPQ.c index 78a3b081f..a63c0ef7c 100644 --- a/src/CalcByCanonicalTPQ.c +++ b/src/CalcByCanonicalTPQ.c @@ -23,6 +23,7 @@ #include "FileIO.h" #include "wrapperMPI.h" #include "CalcTime.h" +#include "common/setmemory.h" /** @@ -47,9 +48,9 @@ * @retval -1 unnormally finished */ int CalcByCanonicalTPQ( - const int NumAve, - const int ExpecInterval, - struct EDMainCalStruct *X + const int NumAve, + const int ExpecInterval, + struct EDMainCalStruct* X ) { char sdt[D_FileNameMax]; @@ -58,230 +59,243 @@ int CalcByCanonicalTPQ( char sdt_flct[D_FileNameMax]; int rand_i, rand_max, iret; unsigned long int i_max; - int step_iO=0; - FILE *fp; - double inv_temp, Ns,delta_tau; + int step_iO = 0; + FILE* fp; + double inv_temp, Ns, delta_tau; struct TimeKeepStruct tstruct; size_t byte_size; - tstruct.tstart=time(NULL); - + tstruct.tstart = time(NULL); + rand_max = NumAve; step_spin = ExpecInterval; - if (X->Bind.Def.Param.ExpandCoef==0){ - X->Bind.Def.Param.ExpandCoef=10; - fprintf(stdout, "In cTPQ calc., the default value of ExpandCoef (=10) is used. \n"); - }else{ - fprintf(stdout, "In cTPQ calc., ExpandCoef is specified as %d. \n",X->Bind.Def.Param.ExpandCoef); + global_norm = d_1d_allocate(NumAve); + global_1st_norm = d_1d_allocate(NumAve); + if (X->Bind.Def.Param.ExpandCoef == 0) { + X->Bind.Def.Param.ExpandCoef = 10; + fprintf(stdout, "In cTPQ calc., the default value of ExpandCoef (=10) is used. \n"); + } + else { + fprintf(stdout, "In cTPQ calc., ExpandCoef is specified as %d. \n", X->Bind.Def.Param.ExpandCoef); } - X->Bind.Def.St=0; + X->Bind.Def.St = 0; fprintf(stdoutMPI, "%s", cLogTPQ_Start); - for (rand_i = 0; rand_iBind.Def.NsiteMPI; - fprintf(stdoutMPI, cLogTPQRand, rand_i+1, rand_max); - iret=0; - X->Bind.Def.irand=rand_i; - //Make or Read initial vector - if(X->Bind.Def.iReStart==RESTART_INOUT || X->Bind.Def.iReStart==RESTART_IN) { - StartTimer(3600); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogInputVecStart); - sprintf(sdt, cFileNameInputVector, rand_i, myrank); - childfopenALL(sdt, "rb", &fp); - if(fp==NULL){ - fprintf(stdout, "A file of Inputvector does not exist.\n"); - fprintf(stdout, "Start to calculate in normal procedure.\n"); - iret=1; - } - byte_size = fread(&step_i, sizeof(step_i), 1, fp); - byte_size = fread(&i_max, sizeof(long int), 1, fp); - if(i_max != X->Bind.Check.idim_max){ - fprintf(stderr, "Error: A file of Inputvector is incorrect.\n"); - exitMPI(-1); - } - byte_size = fread(v0, sizeof(complex double), X->Bind.Check.idim_max+1, fp); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogInputVecFinish); - fclose(fp); - StopTimer(3600); - X->Bind.Def.istep=step_i; - StartTimer(3200); - iret=expec_energy_flct(&(X->Bind), NumAve, v0, v1); //v1 <- v0 and v0 = H*v1 - StopTimer(3200); - if(iret != 0) return -1; + Ns = 1.0 * X->Bind.Def.NsiteMPI; + //fprintf(stdoutMPI, cLogTPQRand, rand_i+1, rand_max); + iret = 0; + //X->Bind.Def.irand = rand_i; - step_iO=step_i-1; - if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); + //Make or Read initial vector + if (X->Bind.Def.iReStart == RESTART_INOUT || X->Bind.Def.iReStart == RESTART_IN) { + StartTimer(3600); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogInputVecStart); + sprintf(sdt, cFileNameInputVector, 0, myrank); + childfopenALL(sdt, "rb", &fp); + if (fp == NULL) { + fprintf(stdout, "A file of Inputvector does not exist.\n"); + fprintf(stdout, "Start to calculate in normal procedure.\n"); + iret = 1; } - - if(X->Bind.Def.iReStart==RESTART_NOT || X->Bind.Def.iReStart==RESTART_OUT || iret ==1) { - StartTimer(3600); + byte_size = fread(&step_i, sizeof(step_i), 1, fp); + byte_size = fread(&i_max, sizeof(long int), 1, fp); + if (i_max != X->Bind.Check.idim_max) { + fprintf(stderr, "Error: A file of Inputvector is incorrect.\n"); + exitMPI(-1); + } + byte_size = fread(v0, sizeof(complex double), NumAve*(X->Bind.Check.idim_max + 1), fp); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogInputVecFinish); + fclose(fp); + StopTimer(3600); + X->Bind.Def.istep = step_i; + StartTimer(3200); + iret = expec_energy_flct(&(X->Bind), NumAve, v0, v1); //v1 <- v0 and v0 = H*v1 + StopTimer(3200); + if (iret != 0) return -1; + + step_iO = step_i - 1; + if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); + } + + if (X->Bind.Def.iReStart == RESTART_NOT || X->Bind.Def.iReStart == RESTART_OUT || iret == 1) { + StartTimer(3600); + for (rand_i = 0; rand_i < rand_max; rand_i++) { + sprintf(sdt_phys, cFileNameSSRand, rand_i); if (childfopenMPI(sdt_phys, "w", &fp) != 0) { return -1; } fprintf(fp, "%s", cLogSSRand); fclose(fp); -// for norm + // for norm + sprintf(sdt_norm, cFileNameNormRand, rand_i); if (childfopenMPI(sdt_norm, "w", &fp) != 0) { return -1; } fprintf(fp, "%s", cLogNormRand); fclose(fp); -// for fluctuations + // for fluctuations + sprintf(sdt_flct, cFileNameFlctRand, rand_i); if (childfopenMPI(sdt_flct, "w", &fp) != 0) { return -1; } fprintf(fp, "%s", cLogFlctRand); fclose(fp); + } + StopTimer(3600); - StopTimer(3600); - - step_i = 0; + step_i = 0; - StartTimer(3100); - if(rand_i==0){ - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "w", rand_i, step_i); - } - else{ - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "a", rand_i, step_i); - } - /**@brief - Initialize v1 and v0 = v1 - */ - MakeIniVec(rand_i, &(X->Bind)); - /*[s] tau*/ - inv_temp = 0.0; - delta_tau = 1.0/LargeValue; - /*[e] tau*/ - StopTimer(3100); - // for norm + StartTimer(3100); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "w", 0, step_i); + /**@brief + Initialize v1 and v0 = v1 + */ + MakeIniVec(&(X->Bind)); + /*[s] tau*/ + inv_temp = 0.0; + delta_tau = 1.0 / LargeValue; + /*[e] tau*/ + StopTimer(3100); + // for norm + for (rand_i = 0; rand_i < rand_max; rand_i++) { + sprintf(sdt_norm, cFileNameNormRand, rand_i); if (childfopenMPI(sdt_norm, "a", &fp) != 0) { return -1; } - fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp, global_1st_norm, global_1st_norm, step_i); + fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp, global_1st_norm[rand_i], global_1st_norm[rand_i], step_i); fclose(fp); - /**@brief - Compute expectation value at infinite temperature - */ - X->Bind.Def.istep = 0; - StartTimer(3300); - iret=expec_cisajs(&(X->Bind), NumAve, v2, v1); - StopTimer(3300); - if(iret !=0) return -1; + } + /**@brief + Compute expectation value at infinite temperature + */ + X->Bind.Def.istep = 0; + StartTimer(3300); + iret = expec_cisajs(&(X->Bind), NumAve, v2, v1); + StopTimer(3300); + if (iret != 0) return -1; - StartTimer(3400); - iret=expec_cisajscktaltdc(&(X->Bind), NumAve, v2, v1); - StopTimer(3400); - if(iret !=0) return -1; + StartTimer(3400); + iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v2, v1); + StopTimer(3400); + if (iret != 0) return -1; - - StartTimer(3200); - iret=expec_energy_flct(&(X->Bind), NumAve, v0, v1); //v1 <- v0 and v0 = H*v1 - StopTimer(3200); - if(iret !=0) return -1; - //inv_temp = 0; /* (2.0 / Ns) / (LargeValue - X->Bind.Phys.energy / Ns);*/ + + StartTimer(3200); + iret = expec_energy_flct(&(X->Bind), NumAve, v0, v1); //v1 <- v0 and v0 = H*v1 + StopTimer(3200); + if (iret != 0) return -1; + //inv_temp = 0; /* (2.0 / Ns) / (LargeValue - X->Bind.Phys.energy / Ns);*/ + for (rand_i = 0; rand_i < rand_max; rand_i++) { + sprintf(sdt_phys, cFileNameSSRand, rand_i); if (childfopenMPI(sdt_phys, "a", &fp) != 0) { return -1; } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.energy, X->Bind.Phys.var, - X->Bind.Phys.doublon, X->Bind.Phys.num, step_i); + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.num[rand_i], step_i); fclose(fp); StartTimer(3600); -// for fluctuations + // for fluctuations + sprintf(sdt_flct, cFileNameFlctRand, rand_i); if (childfopenMPI(sdt_flct, "a", &fp) != 0) { return -1; } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp,X->Bind.Phys.num,X->Bind.Phys.num2, X->Bind.Phys.doublon,X->Bind.Phys.doublon2, X->Bind.Phys.Sz,X->Bind.Phys.Sz2,step_i); + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.num[rand_i], X->Bind.Phys.num2[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.doublon2[rand_i], X->Bind.Phys.Sz[rand_i], X->Bind.Phys.Sz2[rand_i], step_i); fclose(fp); -// + // StopTimer(3600); - step_i += 1; - X->Bind.Def.istep = step_i; - step_iO=0; } + step_i += 1; + X->Bind.Def.istep = step_i; + step_iO = 0; + } - for (step_i = X->Bind.Def.istep; step_iBind.Def.Lanczos_max; step_i++){ - X->Bind.Def.istep=step_i; - if(step_i %((X->Bind.Def.Lanczos_max-step_iO)/10)==0){ - fprintf(stdoutMPI, cLogTPQStep, step_i, X->Bind.Def.Lanczos_max); - } - X->Bind.Def.istep=step_i; - StartTimer(3600); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "a", rand_i, step_i); - StopTimer(3600); - StartTimer(3500); - MultiplyForCanonicalTPQ(&(X->Bind),delta_tau); // v0=exp[-delta_tau*H/2]*v1 in 4th order - StopTimer(3500); + for (step_i = X->Bind.Def.istep; step_i < X->Bind.Def.Lanczos_max; step_i++) { + X->Bind.Def.istep = step_i; + if (step_i % ((X->Bind.Def.Lanczos_max - step_iO) / 10) == 0) { + fprintf(stdoutMPI, cLogTPQStep, step_i, X->Bind.Def.Lanczos_max); + } + X->Bind.Def.istep = step_i; + StartTimer(3600); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cTPQStep, "a", 0, step_i); + StopTimer(3600); + StartTimer(3500); + MultiplyForCanonicalTPQ(&(X->Bind), delta_tau); // v0=exp[-delta_tau*H/2]*v1 in 4th order + StopTimer(3500); - StartTimer(3200); - iret=expec_energy_flct(&(X->Bind), NumAve, v0, v1); //v1 <- v0 and v0 = H*v1 - StopTimer(3200); - if(iret !=0) return -1; -// - //inv_temp = (2.0*step_i / Ns) / (LargeValue - X->Bind.Phys.energy / Ns); - inv_temp += delta_tau; - //temp = 1.0/inv_temp; + StartTimer(3200); + iret = expec_energy_flct(&(X->Bind), NumAve, v0, v1); //v1 <- v0 and v0 = H*v1 + StopTimer(3200); + if (iret != 0) return -1; + // + //inv_temp = (2.0*step_i / Ns) / (LargeValue - X->Bind.Phys.energy / Ns); + inv_temp += delta_tau; + //temp = 1.0/inv_temp; - StartTimer(3600); - if(childfopenMPI(sdt_phys, "a", &fp)!=0){ + StartTimer(3600); + for (rand_i = 0; rand_i < rand_max; rand_i++) { + sprintf(sdt_phys, cFileNameSSRand, rand_i); + if (childfopenMPI(sdt_phys, "a", &fp) != 0) { return FALSE; } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.energy, X->Bind.Phys.var, X->Bind.Phys.doublon, X->Bind.Phys.num ,step_i); -// for + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.energy[rand_i], X->Bind.Phys.var[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.num[rand_i], step_i); + // for fclose(fp); - if(childfopenMPI(sdt_norm, "a", &fp)!=0){ + sprintf(sdt_norm, cFileNameNormRand, rand_i); + if (childfopenMPI(sdt_norm, "a", &fp) != 0) { return FALSE; } - fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp, global_norm, global_1st_norm, step_i); + fprintf(fp, "%.16lf %.16lf %.16lf %d\n", inv_temp, global_norm[rand_i], global_1st_norm[rand_i], step_i); fclose(fp); -// for fluctuations + // for fluctuations + sprintf(sdt_flct, cFileNameFlctRand, rand_i); if (childfopenMPI(sdt_flct, "a", &fp) != 0) { return -1; } - fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp,X->Bind.Phys.num,X->Bind.Phys.num2, X->Bind.Phys.doublon,X->Bind.Phys.doublon2, X->Bind.Phys.Sz,X->Bind.Phys.Sz2,step_i); + fprintf(fp, "%.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %.16lf %d\n", inv_temp, X->Bind.Phys.num[rand_i], X->Bind.Phys.num2[rand_i], + X->Bind.Phys.doublon[rand_i], X->Bind.Phys.doublon2[rand_i], X->Bind.Phys.Sz[rand_i], X->Bind.Phys.Sz2[rand_i], step_i); fclose(fp); -// - StopTimer(3600); + } + StopTimer(3600); - if (step_i%step_spin == 0){ - StartTimer(3300); - iret=expec_cisajs(&(X->Bind), NumAve, v2, v1); - StopTimer(3300); - if(iret !=0) return -1; + if (step_i % step_spin == 0) { + StartTimer(3300); + iret = expec_cisajs(&(X->Bind), NumAve, v2, v1); + StopTimer(3300); + if (iret != 0) return -1; - StartTimer(3400); - iret=expec_cisajscktaltdc(&(X->Bind), NumAve, v2, v1); - StopTimer(3400); - if(iret !=0) return -1; - } + StartTimer(3400); + iret = expec_cisajscktaltdc(&(X->Bind), NumAve, v2, v1); + StopTimer(3400); + if (iret != 0) return -1; } + } - if(X->Bind.Def.iReStart== RESTART_OUT || X->Bind.Def.iReStart==RESTART_INOUT){ - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogOutputVecStart); - sprintf(sdt, cFileNameOutputVector, rand_i, myrank); - if(childfopenALL(sdt, "wb", &fp)!=0){ - exitMPI(-1); - } - fwrite(&step_i, sizeof(step_i), 1, fp); - fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max),1,fp); - fwrite(v1, sizeof(complex double),X->Bind.Check.idim_max+1, fp); - fclose(fp); - TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", rand_i, step_i); - fprintf(stdoutMPI, "%s", cLogOutputVecFinish); + if (X->Bind.Def.iReStart == RESTART_OUT || X->Bind.Def.iReStart == RESTART_INOUT) { + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecStart, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogOutputVecStart); + sprintf(sdt, cFileNameOutputVector, 0, myrank); + if (childfopenALL(sdt, "wb", &fp) != 0) { + exitMPI(-1); } + fwrite(&step_i, sizeof(step_i), 1, fp); + fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp); + fwrite(&v1[0][0], sizeof(complex double), NumAve*(X->Bind.Check.idim_max + 1), fp); + fclose(fp); + TimeKeeperWithRandAndStep(&(X->Bind), cFileNameTPQStep, cOutputVecFinish, "a", 0, step_i); + fprintf(stdoutMPI, "%s", cLogOutputVecFinish); } + fprintf(stdoutMPI, "%s", cLogTPQ_End); - tstruct.tend=time(NULL); - fprintf(stdoutMPI, cLogTPQEnd, (int)(tstruct.tend-tstruct.tstart)); + tstruct.tend = time(NULL); + fprintf(stdoutMPI, cLogTPQEnd, (int)(tstruct.tend - tstruct.tstart)); return TRUE; } diff --git a/src/CalcByLanczos.c b/src/CalcByLanczos.c index 9a26a4c07..5125a7b91 100644 --- a/src/CalcByLanczos.c +++ b/src/CalcByLanczos.c @@ -111,7 +111,7 @@ int CalcByLanczos( StartTimer(4200); Lanczos_EigenVector(&(X->Bind)); StopTimer(4200); - + StartTimer(4300); iret=expec_energy_flct(&(X->Bind), 1, v0, v1); StopTimer(4300); @@ -123,7 +123,7 @@ int CalcByLanczos( fprintf(stdoutMPI, "\n"); fprintf(stdoutMPI, " Accuracy check !!!\n"); - fprintf(stdoutMPI, " LanczosEnergy = %.14e \n EnergyByVec = %.14e \n diff_ene = %.14e \n var = %.14e \n",X->Bind.Phys.Target_CG_energy,X->Bind.Phys.energy,diff_ene,var); + fprintf(stdoutMPI, " LanczosEnergy = %.14e \n EnergyByVec = %.14e \n diff_ene = %.14e \n var = %.14e \n",X->Bind.Phys.Target_CG_energy,X->Bind.Phys.energy[0], diff_ene, var); if(diff_ene < eps_Energy && var< eps_Energy){ fprintf(stdoutMPI, " Accuracy of Lanczos vectors is enough.\n"); fprintf(stdoutMPI, "\n"); @@ -145,13 +145,13 @@ int CalcByLanczos( diff_ene = fabs(X->Bind.Phys.Target_CG_energy-X->Bind.Phys.energy[0]) / fabs(X->Bind.Phys.Target_CG_energy); fprintf(stdoutMPI, "\n"); fprintf(stdoutMPI, " CG Accuracy check !!!\n"); - fprintf(stdoutMPI, " LanczosEnergy = %.14e\n EnergyByVec = %.14e\n diff_ene = %.14e\n var = %.14e \n ",X->Bind.Phys.Target_CG_energy,X->Bind.Phys.energy,diff_ene,var); + fprintf(stdoutMPI, " LanczosEnergy = %.14e\n EnergyByVec = %.14e\n diff_ene = %.14e\n var = %.14e \n ",X->Bind.Phys.Target_CG_energy,X->Bind.Phys.energy[0], diff_ene, var); fprintf(stdoutMPI, "\n"); //} } } else{//idim_max=1 - v0[1]=1; + v0[1][0] = 1; StartTimer(4300); iret=expec_energy_flct(&(X->Bind), 1, v0, v1); StopTimer(4300); @@ -253,9 +253,9 @@ int CalcByLanczos( exitMPI(-1); } - fprintf(fp,"Energy %.16lf \n",X->Bind.Phys.energy); - fprintf(fp,"Doublon %.16lf \n",X->Bind.Phys.doublon); - fprintf(fp,"Sz %.16lf \n",X->Bind.Phys.Sz); + fprintf(fp,"Energy %.16lf \n",X->Bind.Phys.energy[0]); + fprintf(fp,"Doublon %.16lf \n",X->Bind.Phys.doublon[0]); + fprintf(fp,"Sz %.16lf \n",X->Bind.Phys.Sz[0]); // fprintf(fp,"total S^2 %.10lf \n",X->Bind.Phys.s2); fclose(fp); @@ -267,7 +267,7 @@ int CalcByLanczos( } fwrite(&X->Bind.Large.itr, sizeof(X->Bind.Large.itr),1,fp); fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max),1,fp); - fwrite(v1, sizeof(complex double),X->Bind.Check.idim_max+1, fp); + fwrite(&v1[0][0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); fclose(fp); TimeKeeper(&(X->Bind), cFileNameTimeKeep, cOutputEigenVecFinish, "a"); } diff --git a/src/CalcByTEM.c b/src/CalcByTEM.c index 343fa01be..3c01fdf4f 100644 --- a/src/CalcByTEM.c +++ b/src/CalcByTEM.c @@ -66,8 +66,7 @@ int CalcByTEM( FILE *fp; double Time = X->Bind.Def.Param.Tinit; double dt = ((X->Bind.Def.NLaser == 0) ? 0.0 : X->Bind.Def.Param.TimeSlice); - double complex **v2; /**< Ttemporary vector for time evolution calculation, @f$ v2 = H*v1 = H^coef |psi(t)>@f$.*/ - + global_norm = d_1d_allocate(1); if (X->Bind.Def.NTETimeSteps < X->Bind.Def.Lanczos_max) { diff --git a/src/FirstMultiply.c b/src/FirstMultiply.c index 66b71fcac..a60bc7a33 100644 --- a/src/FirstMultiply.c +++ b/src/FirstMultiply.c @@ -55,9 +55,9 @@ int FirstMultiply(struct BindStruct *X) { /**@brief Initialize v1 and v0 = v1 */ - MakeIniVec(rand_i,X); + MakeIniVec(X); - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStep, "a", rand_i, step_i); + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStep, "a", 0, step_i); /**@brief Compute expectation value at infinite temperature */ @@ -101,6 +101,6 @@ firstprivate(i_max) reduction(+: dnorm) #pragma omp parallel for default(none) private(i) shared(v0,rand_i) firstprivate(i_max, dnorm) for (i = 1; i <= i_max; i++) v0[i][rand_i] = v0[i][rand_i] / dnorm; }/*for (rand_i = 0; rand_i < NumAve; rand_i++)*/ - TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStepEnd, "a", rand_i, step_i); + TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStepEnd, "a", 0, step_i); return 0; } diff --git a/src/HPhiMain.c b/src/HPhiMain.c index c0f94df3e..67d9fba0a 100644 --- a/src/HPhiMain.c +++ b/src/HPhiMain.c @@ -290,6 +290,15 @@ int main(int argc, char* argv[]){ StopTimer(2000); switch (X.Bind.Def.iCalcType) { + case Lanczos: + StartTimer(4000); + if (CalcByLanczos(&X) != TRUE) { + StopTimer(4000); + exitMPI(-3); + } + StopTimer(4000); + break; + case CG: if (CalcByLOBPCG(&X) != TRUE) { exitMPI(-3); diff --git a/src/Lanczos_EigenValue.c b/src/Lanczos_EigenValue.c index 6c7a952f9..a06e3e124 100644 --- a/src/Lanczos_EigenValue.c +++ b/src/Lanczos_EigenValue.c @@ -425,7 +425,7 @@ int Lanczos_GetTridiagonalMatrixComponents( /// \retval 0 Succeed to read the initial vector. /// \version 1.2 /// \author Kazuyoshi Yoshimi (The University of Tokyo) -int ReadInitialVector(struct BindStruct *X, double complex* _v0, double complex *_v1, unsigned long int *liLanczosStp_vec) +int ReadInitialVector(struct BindStruct *X, double complex** _v0, double complex **_v1, unsigned long int *liLanczosStp_vec) { size_t byte_size; char sdt[D_FileNameMax]; @@ -444,8 +444,8 @@ int ReadInitialVector(struct BindStruct *X, double complex* _v0, double complex fprintf(stderr, "Error: A size of Inputvector is incorrect.\n"); return -1; } - byte_size = fread(_v0, sizeof(complex double), X->Check.idim_max + 1, fp); - byte_size = fread(_v1, sizeof(complex double), X->Check.idim_max + 1, fp); + byte_size = fread(&_v0[0][0], sizeof(complex double), X->Check.idim_max + 1, fp); + byte_size = fread(&_v1[0][0], sizeof(complex double), X->Check.idim_max + 1, fp); fclose(fp); fprintf(stdoutMPI, " End: Input vectors for recalculation.\n"); TimeKeeper(X, cFileNameTimeKeep, c_InputSpectrumRecalcvecEnd, "a"); @@ -464,8 +464,8 @@ int ReadInitialVector(struct BindStruct *X, double complex* _v0, double complex /// \version 2.0 /// \author Kazuyoshi Yoshimi (The University of Tokyo) int OutputLanczosVector(struct BindStruct *X, - double complex* tmp_v0, - double complex *tmp_v1, + double complex** tmp_v0, + double complex **tmp_v1, unsigned long int liLanczosStp_vec){ char sdt[D_FileNameMax]; FILE *fp; @@ -479,8 +479,8 @@ int OutputLanczosVector(struct BindStruct *X, } fwrite(&liLanczosStp_vec, sizeof(liLanczosStp_vec),1,fp); fwrite(&X->Check.idim_max, sizeof(X->Check.idim_max),1,fp); - fwrite(tmp_v0, sizeof(complex double),X->Check.idim_max+1, fp); - fwrite(tmp_v1, sizeof(complex double),X->Check.idim_max+1, fp); + fwrite(&tmp_v0[0][0], sizeof(complex double), X->Check.idim_max + 1, fp); + fwrite(&tmp_v1[0][0], sizeof(complex double), X->Check.idim_max + 1, fp); fclose(fp); fprintf(stdoutMPI, " End: Output vectors for recalculation.\n"); @@ -496,7 +496,7 @@ int OutputLanczosVector(struct BindStruct *X, /// Output: Large.iv. /// \param tmp_v0 [out] The initial vector whose components are zero. /// \param tmp_v1 [out] The initial vector whose components are randomly given when initial_mode=1, otherwise, iv-th component is only given. -void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1) { +void SetInitialVector(struct BindStruct *X, double complex** tmp_v0, double complex **tmp_v1) { int iproc; long int i, iv, i_max; unsigned long int i_max_tmp, sum_i_max; @@ -522,8 +522,8 @@ void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double compl X->Def.k_exct); #pragma omp parallel for default(none) private(i) shared(tmp_v0, tmp_v1) firstprivate(i_max) for (i = 1; i <= i_max; i++) { - tmp_v0[i] = 0.0; - tmp_v1[i] = 0.0; + tmp_v0[i][0] = 0.0; + tmp_v1[i][0] = 0.0; } sum_i_max = 0; @@ -532,10 +532,10 @@ void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double compl i_max_tmp = BcastMPI_li(iproc, i_max); if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp) { if (myrank == iproc) { - tmp_v1[iv - sum_i_max + 1] = 1.0; + tmp_v1[iv - sum_i_max + 1][0] = 1.0; if (X->Def.iInitialVecType == 0) { - tmp_v1[iv - sum_i_max + 1] += 1.0 * I; - tmp_v1[iv - sum_i_max + 1] /= sqrt(2.0); + tmp_v1[iv - sum_i_max + 1][0] += 1.0 * I; + tmp_v1[iv - sum_i_max + 1][0] /= sqrt(2.0); } }/*if (myrank == iproc)*/ }/*if (sum_i_max <= iv && iv < sum_i_max + i_max_tmp)*/ @@ -545,10 +545,10 @@ void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double compl }/*for (iproc = 0; iproc < nproc; iproc++)*/ } else { - tmp_v1[iv + 1] = 1.0; + tmp_v1[iv + 1][0] = 1.0; if (X->Def.iInitialVecType == 0) { - tmp_v1[iv + 1] += 1.0 * I; - tmp_v1[iv + 1] /= sqrt(2.0); + tmp_v1[iv + 1][0] += 1.0 * I; + tmp_v1[iv + 1][0] /= sqrt(2.0); } } }/*if(initial_mode == 0)*/ @@ -562,7 +562,7 @@ void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double compl #pragma omp for for (i = 1; i <= i_max; i++) { - tmp_v0[i] = 0.0; + tmp_v0[i][0] = 0.0; } /* Initialise MT @@ -583,12 +583,12 @@ void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double compl if (X->Def.iInitialVecType == 0) { #pragma omp for for (i = 1; i <= i_max; i++) - tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) + - 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) * I; + tmp_v1[i][0] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) + + 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) * I; } else { #pragma omp for for (i = 1; i <= i_max; i++) - tmp_v1[i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5); + tmp_v1[i][0] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5); } }/*#pragma omp parallel*/ @@ -596,7 +596,7 @@ void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double compl cdnorm = 0.0; #pragma omp parallel for default(none) private(i) shared(tmp_v1, i_max) reduction(+: cdnorm) for (i = 1; i <= i_max; i++) { - cdnorm += conj(tmp_v1[i]) * tmp_v1[i]; + cdnorm += conj(tmp_v1[i][0]) * tmp_v1[i][0]; } if(X->Def.iFlgMPI==0) { cdnorm = SumMPI_dc(cdnorm); @@ -605,7 +605,7 @@ void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double compl dnorm = sqrt(dnorm); #pragma omp parallel for default(none) private(i) shared(tmp_v1) firstprivate(i_max, dnorm) for (i = 1; i <= i_max; i++) { - tmp_v1[i] = tmp_v1[i] / dnorm; + tmp_v1[i][0] = tmp_v1[i][0] / dnorm; } }/*else if(initial_mode==1)*/ } diff --git a/src/MakeIniVec.c b/src/MakeIniVec.c index f510ea861..ed1c594db 100644 --- a/src/MakeIniVec.c +++ b/src/MakeIniVec.c @@ -38,20 +38,21 @@ /*Note: X->Def.iInitialVecType == 0: All components are given by random complex numbers x+i*y, x = [-1,1),y=[-1,1]*/ /*Note: X->Def.iInitialVecType ==-1: random vectors on the 2*i_max complex sphere*/ /*Note: X->Def.iInitialVecType == others: All components are given by random real numbers x, x=[-1,1)*/ -int MakeIniVec(int rand_i, struct BindStruct *X) { +int MakeIniVec(struct BindStruct *X) { long int i, i_max; double complex dnorm; double Ns; long unsigned int u_long_i; dsfmt_t dsfmt; - int mythread; + int mythread, rand_i; double rand_X,rand_Y; double complex rand_Z1,rand_Z2; Ns = 1.0*X->Def.NsiteMPI; i_max = X->Check.idim_max; + for (rand_i = 0; rand_i < NumAve; rand_i++) { #pragma omp parallel default(none) private(i, mythread, u_long_i, dsfmt,rand_X,rand_Y,rand_Z1,rand_Z2) \ shared(v0, v1, nthreads, myrank, rand_i, X, stdoutMPI, cLogCheckInitComplex, cLogCheckInitReal) \ firstprivate(i_max) @@ -69,48 +70,50 @@ int MakeIniVec(int rand_i, struct BindStruct *X) { #else mythread = 0; #endif - u_long_i = 123432 + (rand_i+1)*labs(X->Def.initial_iv) + mythread + nthreads * myrank; + u_long_i = 123432 + (rand_i + 1) * labs(X->Def.initial_iv) + mythread + nthreads * myrank; dsfmt_init_gen_rand(&dsfmt, u_long_i); if (X->Def.iInitialVecType == 0) { StartTimer(3101); - #pragma omp for +#pragma omp for for (i = 1; i <= i_max; i++) - v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5)*I; + v1[i][rand_i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) + 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5) * I; /*if (X->Def.iInitialVecType == 0)*/ - }else if (X->Def.iInitialVecType == -1) { + } + else if (X->Def.iInitialVecType == -1) { StartTimer(3101); - #pragma omp for - for (i = 1; i <= i_max; i++){ - rand_X = dsfmt_genrand_close_open(&dsfmt); - rand_Y = dsfmt_genrand_close_open(&dsfmt); - rand_Z1 = sqrt(-2.0*log(rand_X))*cos(2.0*M_PI*rand_Y); - rand_Z2 = sqrt(-2.0*log(rand_X))*sin(2.0*M_PI*rand_Y); - v1[i][rand_i] = rand_Z1+I*rand_Z2; - } - /*if (X->Def.iInitialVecType == -1)*/ - }else { - #pragma omp for +#pragma omp for + for (i = 1; i <= i_max; i++) { + rand_X = dsfmt_genrand_close_open(&dsfmt); + rand_Y = dsfmt_genrand_close_open(&dsfmt); + rand_Z1 = sqrt(-2.0 * log(rand_X)) * cos(2.0 * M_PI * rand_Y); + rand_Z2 = sqrt(-2.0 * log(rand_X)) * sin(2.0 * M_PI * rand_Y); + v1[i][rand_i] = rand_Z1 + I * rand_Z2; + } + /*if (X->Def.iInitialVecType == -1)*/ + } + else { +#pragma omp for for (i = 1; i <= i_max; i++) - v1[i][rand_i] = 2.0*(dsfmt_genrand_close_open(&dsfmt) - 0.5); + v1[i][rand_i] = 2.0 * (dsfmt_genrand_close_open(&dsfmt) - 0.5); } StopTimer(3101); }/*#pragma omp parallel*/ /*Normalize v*/ - dnorm=0.0; + dnorm = 0.0; #pragma omp parallel for default(none) private(i) shared(v1, i_max, rand_i) reduction(+: dnorm) - for(i=1;i<=i_max;i++){ + for (i = 1; i <= i_max; i++) { dnorm += conj(v1[i][rand_i]) * v1[i][rand_i]; } dnorm = SumMPI_dc(dnorm); - dnorm=sqrt(dnorm); + dnorm = sqrt(dnorm); global_1st_norm[rand_i] = dnorm; #pragma omp parallel for default(none) private(i) shared(v0,v1) firstprivate(i_max, dnorm, rand_i) - for(i=1;i<=i_max;i++){ - v1[i][rand_i] = v1[i][rand_i] /dnorm; + for (i = 1; i <= i_max; i++) { + v1[i][rand_i] = v1[i][rand_i] / dnorm; v0[i][rand_i] = v1[i][rand_i]; } - + } TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQStep, "a", rand_i, step_i); return 0; diff --git a/src/StdFace b/src/StdFace index 92967761b..78c128e51 160000 --- a/src/StdFace +++ b/src/StdFace @@ -1 +1 @@ -Subproject commit 92967761b56e6ddf69b1039102dbf0d09256242c +Subproject commit 78c128e515331d183b3d76e3eca6481bdf0f2121 diff --git a/src/check.c b/src/check.c index 7a8d0fc6e..e40fb8452 100644 --- a/src/check.c +++ b/src/check.c @@ -195,6 +195,21 @@ int check(struct BindStruct *X){ X->Check.idim_max = comb_sum; switch(X->Def.iCalcType) { + case Lanczos: + switch (X->Def.iCalcModel) { + case Hubbard: + case HubbardNConserved: + case Kondo: + case KondoGC: + case Spin: + X->Check.max_mem = 5.5 * X->Check.idim_max * 8.0 / (pow(10, 9)); + break; + case HubbardGC: + case SpinGC: + X->Check.max_mem = 4.5 * X->Check.idim_max * 8.0 / (pow(10, 9)); + break; + } + break; case CG: switch (X->Def.iCalcModel) { case Hubbard: diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index b34db0356..c5ab683fa 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -569,6 +569,7 @@ int expec_cisajs( switch (X->Def.iCalcType) { case TPQCalc: + case cTPQ: step = X->Def.istep; TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGStart, "a", 0, step); break; @@ -578,6 +579,7 @@ int expec_cisajs( break; case FullDiag: case CG: + case Lanczos: break; } @@ -615,7 +617,21 @@ int expec_cisajs( for (istate = 0; istate < nstate; istate++) { switch (X->Def.iCalcType) { + case Lanczos: + if (X->Def.St == 0) { + sprintf(sdt, cFileName1BGreen_Lanczos, X->Def.CDataFileHead); + fprintf(stdoutMPI, "%s", cLogLanczosExpecOneBodyGStart); + TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecOneBodyGStart, "a"); + } + else if (X->Def.St == 1) { + sprintf(sdt, cFileName1BGreen_CG, X->Def.CDataFileHead); + TimeKeeper(X, cFileNameTimeKeep, cCGExpecOneBodyGStart, "a"); + fprintf(stdoutMPI, "%s", cLogCGExpecOneBodyGStart); + } + //vec=v0; + break; case TPQCalc: + case cTPQ: step = X->Def.istep; sprintf(sdt, cFileName1BGreen_TPQ, X->Def.CDataFileHead, istate, step); break; @@ -640,7 +656,12 @@ int expec_cisajs( }/*for (istate = 0; istate < nstate; istate++)*/ if (X->Def.St == 0) { - if (X->Def.iCalcType == TPQCalc) { + if (X->Def.iCalcType == Lanczos) { + TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecOneBodyGFinish, "a"); + fprintf(stdoutMPI, "%s", cLogLanczosExpecOneBodyGEnd); + TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecOneBodyGFinish, "a"); + } + else if (X->Def.iCalcType == TPQCalc || X->Def.iCalcType == cTPQ) { TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecOneBodyGFinish, "a", rand_i, step); } else if (X->Def.iCalcType == TimeEvolution) { diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index abf7a28ce..477afb039 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -911,7 +911,13 @@ firstprivate(i_max,X,org_isite1,org_isite3,org_sigma1,org_sigma2,org_sigma3,org_ * @retval -1 abnormally finished * */ -int expec_Sixbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *vec, FILE **_fp){ +int expec_Sixbody_SpinGCHalf( + struct BindStruct *X, + int nstate, + double complex** Xvec, + double complex**vec, + double complex**prod +){ long unsigned int i,j; long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4,tmp_org_isite5,tmp_org_isite6,tmp_org_isite7,tmp_org_isite8,tmp_org_isite9,tmp_org_isite10,tmp_org_isite11,tmp_org_isite12; long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4,tmp_org_sigma5,tmp_org_sigma6,tmp_org_sigma7,tmp_org_sigma8,tmp_org_sigma9,tmp_org_sigma10,tmp_org_sigma11,tmp_org_sigma12; @@ -921,17 +927,14 @@ int expec_Sixbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *v long unsigned int tmp_off=0; double complex tmp_V; double complex dam_pr; - double complex *vec_pr,*vec_pr_0,*vec_pr_1,*vec_pr_2; + double complex **vec_pr; long int i_max; i_max=X->Check.idim_max; + vec_pr = cd_2d_allocate(i_max + 1, nstate); for(i=0;iDef.NSBody;i++){ //printf("%d %d \n",i,X->Def.NSBody); - vec_pr_0 = cd_1d_allocate(i_max + 1); - vec_pr_1 = cd_1d_allocate(i_max + 1); - vec_pr_2 = cd_1d_allocate(i_max + 1); - vec_pr = cd_1d_allocate(i_max + 1); tmp_org_isite1 = X->Def.SBody[i][0]+1; tmp_org_sigma1 = X->Def.SBody[i][1]; @@ -986,18 +989,23 @@ int expec_Sixbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *v X->Large.mode = M_MLTPLY; /* |vec_pr_0>= c11a12|vec>*/ - mltplyHalfSpinGC_mini(X,tmp_org_isite11-1,tmp_org_sigma11,tmp_org_isite12-1,tmp_org_sigma12,nstate,vec_pr_0,vec); + zclear((i_max + 1) * nstate, &Xvec[0][0]); + zclear((i_max + 1) * nstate, &vec_pr[0][0]); + mltplyHalfSpinGC_mini(X,tmp_org_isite11-1,tmp_org_sigma11,tmp_org_isite12-1,tmp_org_sigma12,nstate, Xvec,vec); /* |vec_pr_1>= c9a10|vec_pr_0>*/ - mltplyHalfSpinGC_mini(X,tmp_org_isite9-1,tmp_org_sigma9,tmp_org_isite10-1,tmp_org_sigma10,nstate,vec_pr_1,vec_pr_0); + mltplyHalfSpinGC_mini(X,tmp_org_isite9-1,tmp_org_sigma9,tmp_org_isite10-1,tmp_org_sigma10,nstate, vec_pr, Xvec); + zclear((i_max + 1) * nstate, &Xvec[0][0]); /* |vec_pr_2>= c7a8|vec_pr_1>*/ - mltplyHalfSpinGC_mini(X,tmp_org_isite7-1,tmp_org_sigma7,tmp_org_isite8-1,tmp_org_sigma8, nstate,vec_pr_2,vec_pr_1); + mltplyHalfSpinGC_mini(X,tmp_org_isite7-1,tmp_org_sigma7,tmp_org_isite8-1,tmp_org_sigma8, nstate, Xvec, vec_pr); + zclear((i_max + 1) * nstate, &vec_pr[0][0]); /* |vec_pr>= c5a6|vec_pr_2>*/ - mltplyHalfSpinGC_mini(X,tmp_org_isite5-1,tmp_org_sigma5,tmp_org_isite6-1,tmp_org_sigma6, nstate,vec_pr,vec_pr_2); + mltplyHalfSpinGC_mini(X,tmp_org_isite5-1,tmp_org_sigma5,tmp_org_isite6-1,tmp_org_sigma6, nstate,vec_pr,Xvec); + zclear((i_max + 1) * nstate, &Xvec[0][0]); X->Large.mode = H_CORR; if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X,6)!=0){ //error message will be added - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); + zclear(nstate, prod[i]); continue; } /* @@ -1012,34 +1020,34 @@ int expec_Sixbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *v if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite){ //org_isite3 >= org_isite1 > Nsite //printf("D-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3){ //diagonal (for spin: cuadcdau=cuau) - dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite){ //org_isite3 > Nsite >= org_isite1 //printf("S-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else{ @@ -1049,47 +1057,35 @@ int expec_Sixbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *v if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j,i) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, vec, vec_pr, X); + dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr, X); } }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } } } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", - tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, - tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4, - tmp_org_isite5-1, tmp_org_sigma5, tmp_org_isite6-1, tmp_org_sigma6, - tmp_org_isite7-1, tmp_org_sigma7, tmp_org_isite8-1, tmp_org_sigma8, - tmp_org_isite9-1, tmp_org_sigma9, tmp_org_isite10-1, tmp_org_sigma10, - tmp_org_isite11-1, tmp_org_sigma11, tmp_org_isite12-1, tmp_org_sigma12, - creal(dam_pr),cimag(dam_pr)); - free_cd_1d_allocate(vec_pr); - free_cd_1d_allocate(vec_pr_0); - free_cd_1d_allocate(vec_pr_1); - free_cd_1d_allocate(vec_pr_2); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); } return 0; } @@ -1129,7 +1125,13 @@ int expec_cisajscktalt_Spin( * @retval -1 abnormally finished * */ -int expec_Fourbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *vec, FILE **_fp){ +int expec_Fourbody_SpinGCHalf( + struct BindStruct *X, + int nstate, + double complex** Xvec, + double complex**vec, + double complex**prod +){ long unsigned int i,j; long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4,tmp_org_isite5,tmp_org_isite6,tmp_org_isite7,tmp_org_isite8; long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4,tmp_org_sigma5,tmp_org_sigma6,tmp_org_sigma7,tmp_org_sigma8; @@ -1139,14 +1141,13 @@ int expec_Fourbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex * long unsigned int tmp_off=0; double complex tmp_V; double complex dam_pr; - double complex *vec_pr,*vec_pr_tmp; + double complex **vec_pr; long int i_max; i_max=X->Check.idim_max; + vec_pr = cd_2d_allocate(i_max + 1, nstate); for(i=0;iDef.NFBody;i++){ - vec_pr = cd_1d_allocate(i_max + 1); - vec_pr_tmp = cd_1d_allocate(i_max + 1); tmp_org_isite1 = X->Def.FBody[i][0]+1; tmp_org_sigma1 = X->Def.FBody[i][1]; @@ -1180,15 +1181,17 @@ int expec_Fourbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex * X->Large.mode = M_MLTPLY; /* |vec_pr_tmp>= c7a8|vec>*/ - mltplyHalfSpinGC_mini(X,tmp_org_isite7-1,tmp_org_sigma7,tmp_org_isite8-1,tmp_org_sigma8,nstate,vec_pr_tmp,vec); + zclear((i_max + 1) * nstate, &Xvec[0][0]); + zclear((i_max + 1) * nstate, &vec_pr[0][0]); + mltplyHalfSpinGC_mini(X,tmp_org_isite7-1,tmp_org_sigma7,tmp_org_isite8-1,tmp_org_sigma8,nstate,Xvec,vec); /* |vec_pr>= c5a6|vec_pr_tmp>*/ - mltplyHalfSpinGC_mini(X,tmp_org_isite5-1,tmp_org_sigma5,tmp_org_isite6-1,tmp_org_sigma6,nstate,vec_pr,vec_pr_tmp); + mltplyHalfSpinGC_mini(X,tmp_org_isite5-1,tmp_org_sigma5,tmp_org_isite6-1,tmp_org_sigma6,nstate,vec_pr, Xvec); + zclear((i_max + 1) * nstate, &Xvec[0][0]); X->Large.mode = H_CORR; if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X,4)!=0){ - //error message will be added - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); - continue; + zclear(nstate, prod[i]); + continue; } /* printf("check: %d %d %d %d %d %d %d %d %d %d %d %d \n", @@ -1202,34 +1205,34 @@ int expec_Fourbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex * if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite){ //org_isite3 >= org_isite1 > Nsite //printf("D-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3){ //diagonal (for spin: cuadcdau=cuau) - dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite){ //org_isite3 > Nsite >= org_isite1 //printf("S-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else{ @@ -1239,44 +1242,37 @@ int expec_Fourbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex * if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j,i) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, vec, vec_pr, X); + dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr, X); } }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } } } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", - tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, - tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4, - tmp_org_isite5-1, tmp_org_sigma5, tmp_org_isite6-1, tmp_org_sigma6, - tmp_org_isite7-1, tmp_org_sigma7, tmp_org_isite8-1, tmp_org_sigma8, - creal(dam_pr),cimag(dam_pr)); - free_cd_1d_allocate(vec_pr); - free_cd_1d_allocate(vec_pr_tmp); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); } + free_cd_2d_allocate(vec_pr); return 0; } @@ -1293,7 +1289,13 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) s * @retval -1 abnormally finished * */ -int expec_Threebody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *vec, FILE **_fp){ +int expec_Threebody_SpinGCHalf( + struct BindStruct *X, + int nstate, + double complex **Xvec, + double complex **vec, + double complex **prod +){ long unsigned int i,j; long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4,tmp_org_isite5,tmp_org_isite6; long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4,tmp_org_sigma5,tmp_org_sigma6; @@ -1303,13 +1305,13 @@ int expec_Threebody_SpinGCHalf(struct BindStruct *X, int nstate, double complex long unsigned int tmp_off=0; double complex tmp_V; double complex dam_pr; - double complex *vec_pr; + double complex **vec_pr; long int i_max; i_max=X->Check.idim_max; + vec_pr = cd_2d_allocate(i_max + 1, nstate); for(i=0;iDef.NTBody;i++){ - vec_pr = cd_1d_allocate(i_max + 1); tmp_org_isite1 = X->Def.TBody[i][0]+1; tmp_org_sigma1 = X->Def.TBody[i][1]; tmp_org_isite2 = X->Def.TBody[i][2]+1; @@ -1332,13 +1334,15 @@ int expec_Threebody_SpinGCHalf(struct BindStruct *X, int nstate, double complex X->Large.mode = M_MLTPLY; /* |vec_pr>= c5a6|phi>*/ + zclear((i_max + 1) * nstate, &Xvec[0][0]); + zclear((i_max + 1) * nstate, &vec_pr[0][0]); mltplyHalfSpinGC_mini(X,tmp_org_isite5-1,tmp_org_sigma5,tmp_org_isite6-1,tmp_org_sigma6,nstate,vec_pr,vec); X->Large.mode = H_CORR; if(Rearray_Interactions(i, &org_isite1, &org_isite2, &org_isite3, &org_isite4, &org_sigma1, &org_sigma2, &org_sigma3, &org_sigma4, &tmp_V, X,3)!=0){ //error message will be added - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n",tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, tmp_org_isite3-1,tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4,0.0,0.0); - continue; + zclear(nstate, prod[i]); + continue; } /* printf("check: %d %d %d %d %d %d %d %d %d %d %d %d \n", @@ -1352,34 +1356,34 @@ int expec_Threebody_SpinGCHalf(struct BindStruct *X, int nstate, double complex if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite){ //org_isite3 >= org_isite1 > Nsite //printf("D-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3){ //diagonal (for spin: cuadcdau=cuau) - dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite){ //org_isite3 > Nsite >= org_isite1 //printf("S-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, vec, vec_pr); + dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else{ @@ -1389,42 +1393,37 @@ int expec_Threebody_SpinGCHalf(struct BindStruct *X, int nstate, double complex if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j,i) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, vec, vec_pr, X); + dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr, X); } }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, vec, vec_pr, X, &tmp_off); + dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ dam_pr = 0.0; #pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ -firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(vec,vec_pr) +firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, vec, nstate, vec_pr, X, &tmp_off); + dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); } } } } - dam_pr = SumMPI_dc(dam_pr); - fprintf(*_fp," %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", - tmp_org_isite1-1, tmp_org_sigma1, tmp_org_isite2-1, tmp_org_sigma2, - tmp_org_isite3-1, tmp_org_sigma3, tmp_org_isite4-1, tmp_org_sigma4, - tmp_org_isite5-1, tmp_org_sigma5, tmp_org_isite6-1, tmp_org_sigma6, - creal(dam_pr),cimag(dam_pr)); - free_cd_1d_allocate(vec_pr); + MultiVecProdMPI(i_max, nstate, vec, Xvec, prod[i]); } + free_cd_2d_allocate(vec_pr); return 0; } @@ -1444,11 +1443,23 @@ int expec_cisajscktalt_SpinGC( int nstate, double complex **Xvec, double complex **vec, - double complex **prod + double complex **prod, + double complex** prod_2, + double complex** prod_3, + double complex** prod_4 ) { int info = 0; if (X->Def.iFlgGeneralSpin == FALSE) { info = expec_cisajscktalt_SpinGCHalf(X, nstate, Xvec, vec, prod); + if (X->Def.NTBody > 0) { + info = expec_Threebody_SpinGCHalf(X, nstate,Xvec, vec, prod_2); + } + if (X->Def.NFBody > 0) { + info = expec_Fourbody_SpinGCHalf(X, nstate, Xvec, vec, prod_3); + } + if (X->Def.NSBody > 0) { + info = expec_Sixbody_SpinGCHalf(X, nstate, Xvec, vec, prod_4); + } } else { info = expec_cisajscktalt_SpinGCGeneral(X, nstate, Xvec, vec, prod); @@ -1480,9 +1491,9 @@ int expec_cisajscktaltdc double complex **vec ) { FILE *fp; - char sdt[D_FileNameMax]; + char sdt[D_FileNameMax], sdt_2[D_FileNameMax], sdt_3[D_FileNameMax], sdt_4[D_FileNameMax], * tmp_char; long unsigned int irght, ilft, ihfbit, icaca; - double complex **prod; + double complex **prod, ** prod_2, ** prod_3, ** prod_4; //For TPQ int step = 0, rand_i = 0, istate; @@ -1495,6 +1506,9 @@ int expec_cisajscktaltdc //Make File Name for output prod = cd_2d_allocate(X->Def.NCisAjtCkuAlvDC, nstate); + prod_2 = cd_2d_allocate(X->Def.NTBody, nstate); + prod_3 = cd_2d_allocate(X->Def.NFBody, nstate); + prod_4 = cd_2d_allocate(X->Def.NSBody, nstate); switch (X->Def.iCalcType) { case TPQCalc: case cTPQ: @@ -1532,7 +1546,7 @@ int expec_cisajscktaltdc break; case SpinGC: - if (expec_cisajscktalt_SpinGC(X, nstate, Xvec, vec, prod) != 0) { + if (expec_cisajscktalt_SpinGC(X, nstate, Xvec, vec, prod, prod_2, prod_3, prod_4) != 0) { return -1; } break; @@ -1543,17 +1557,46 @@ int expec_cisajscktaltdc for (istate = 0; istate < nstate; istate++) { switch (X->Def.iCalcType) { + case Lanczos: + if (X->Def.St == 0) { + sprintf(sdt, cFileName2BGreen_Lanczos, X->Def.CDataFileHead); + sprintf(sdt_2, cFileName3BGreen_Lanczos, X->Def.CDataFileHead); + sprintf(sdt_3, cFileName4BGreen_Lanczos, X->Def.CDataFileHead); + sprintf(sdt_4, cFileName6BGreen_Lanczos, X->Def.CDataFileHead); + TimeKeeper(X, cFileNameTimeKeep, cLanczosExpecTwoBodyGStart, "a"); + fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGStart); + } + else if (X->Def.St == 1) { + sprintf(sdt, cFileName2BGreen_CG, X->Def.CDataFileHead); + sprintf(sdt_2, cFileName3BGreen_Lanczos, X->Def.CDataFileHead); + sprintf(sdt_3, cFileName4BGreen_Lanczos, X->Def.CDataFileHead); + sprintf(sdt_4, cFileName6BGreen_Lanczos, X->Def.CDataFileHead); + TimeKeeper(X, cFileNameTimeKeep, cCGExpecTwoBodyGStart, "a"); + fprintf(stdoutMPI, "%s", cLogLanczosExpecTwoBodyGStart); + } + break; case TPQCalc: + case cTPQ: step = X->Def.istep; sprintf(sdt, cFileName2BGreen_TPQ, X->Def.CDataFileHead, istate, step); - break; + sprintf(sdt_2, cFileName3BGreen_TPQ, X->Def.CDataFileHead, istate, step); + sprintf(sdt_3, cFileName4BGreen_TPQ, X->Def.CDataFileHead, istate, step); + sprintf(sdt_4, cFileName6BGreen_TPQ, X->Def.CDataFileHead, istate, step); + break; case TimeEvolution: step = X->Def.istep; sprintf(sdt, cFileName2BGreen_TE, X->Def.CDataFileHead, step); + sprintf(sdt_2, cFileName3BGreen_TE, X->Def.CDataFileHead, step); + sprintf(sdt_3, cFileName4BGreen_TE, X->Def.CDataFileHead, step); + sprintf(sdt_4, cFileName6BGreen_TE, X->Def.CDataFileHead, step); break; case FullDiag: case CG: sprintf(sdt, cFileName2BGreen_FullDiag, X->Def.CDataFileHead, istate); + sprintf(sdt, cFileName2BGreen_FullDiag, X->Def.CDataFileHead, istate); + sprintf(sdt_2, cFileName3BGreen_FullDiag, X->Def.CDataFileHead, istate); + sprintf(sdt_3, cFileName4BGreen_FullDiag, X->Def.CDataFileHead, istate); + sprintf(sdt_4, cFileName6BGreen_FullDiag, X->Def.CDataFileHead, istate); break; } if (childfopenMPI(sdt, "w", &fp) == 0) { @@ -1567,10 +1610,49 @@ int expec_cisajscktaltdc } fclose(fp); } - else return -1; + if (X->Def.NTBody > 0) { + if (childfopenMPI(sdt_2, "w", &fp) == 0) { + for (icaca = 0; icaca < X->Def.NTBody; icaca++) { + fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", + X->Def.TBody[icaca][0], X->Def.TBody[icaca][1], X->Def.TBody[icaca][2], X->Def.TBody[icaca][3], + X->Def.TBody[icaca][4], X->Def.TBody[icaca][5], X->Def.TBody[icaca][6], X->Def.TBody[icaca][7], + X->Def.TBody[icaca][8], X->Def.TBody[icaca][9], X->Def.TBody[icaca][10], X->Def.TBody[icaca][11], + creal(prod_2[icaca][istate]), cimag(prod_2[icaca][istate])); + } + fclose(fp); + } + } + if (X->Def.NFBody > 0) { + if (childfopenMPI(sdt_3, "w", &fp) == 0) { + for (icaca = 0; icaca < X->Def.NFBody; icaca++) { + fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", + X->Def.FBody[icaca][0], X->Def.FBody[icaca][1], X->Def.FBody[icaca][2], X->Def.FBody[icaca][3], + X->Def.FBody[icaca][4], X->Def.FBody[icaca][5], X->Def.FBody[icaca][6], X->Def.FBody[icaca][7], + X->Def.FBody[icaca][8], X->Def.FBody[icaca][9], X->Def.FBody[icaca][10], X->Def.FBody[icaca][11], + X->Def.FBody[icaca][12], X->Def.FBody[icaca][13], X->Def.FBody[icaca][14], X->Def.FBody[icaca][15], + creal(prod_3[icaca][istate]), cimag(prod_3[icaca][istate])); + } + fclose(fp); + } + } + if (X->Def.NSBody > 0) { + if (childfopenMPI(sdt_4, "w", &fp) == 0) { + for (icaca = 0; icaca < X->Def.NSBody; icaca++) { + fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", + X->Def.SBody[icaca][0], X->Def.SBody[icaca][1], X->Def.SBody[icaca][2], X->Def.SBody[icaca][3], + X->Def.SBody[icaca][4], X->Def.SBody[icaca][5], X->Def.SBody[icaca][6], X->Def.SBody[icaca][7], + X->Def.SBody[icaca][8], X->Def.SBody[icaca][9], X->Def.SBody[icaca][10], X->Def.SBody[icaca][11], + X->Def.SBody[icaca][12], X->Def.SBody[icaca][13], X->Def.SBody[icaca][14], X->Def.SBody[icaca][15], + X->Def.SBody[icaca][16], X->Def.SBody[icaca][17], X->Def.SBody[icaca][18], X->Def.SBody[icaca][19], + X->Def.SBody[icaca][20], X->Def.SBody[icaca][21], X->Def.SBody[icaca][22], X->Def.SBody[icaca][23], + creal(prod_4[icaca][istate]), cimag(prod_4[icaca][istate])); + } + fclose(fp); + } + } }/*for (istate = 0; istate < nstate; istate++)*/ - if (X->Def.iCalcType == TPQCalc) { + if (X->Def.iCalcType == TPQCalc || X->Def.iCalcType == cTPQ) { TimeKeeperWithRandAndStep(X, cFileNameTimeKeep, cTPQExpecTwoBodyGFinish, "a", rand_i, step); } else if (X->Def.iCalcType == TimeEvolution) { @@ -1591,5 +1673,8 @@ int expec_cisajscktaltdc */ //[e] free_cd_2d_allocate(prod); + free_cd_2d_allocate(prod_2); + free_cd_2d_allocate(prod_3); + free_cd_2d_allocate(prod_4); return 0; } diff --git a/src/expec_energy_flct.c b/src/expec_energy_flct.c index bdd29e0fe..9645fab11 100644 --- a/src/expec_energy_flct.c +++ b/src/expec_energy_flct.c @@ -727,6 +727,7 @@ int expec_energy_flct( switch (X->Def.iCalcType) { case TPQCalc: + case cTPQ: case TimeEvolution: #ifdef _DEBUG fprintf(stdoutMPI, "%s", cLogExpecEnergyStart); @@ -735,6 +736,7 @@ int expec_energy_flct( break; case FullDiag: case CG: + case Lanczos: break; default: return -1; diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index 343c34072..7556c1621 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -654,7 +654,7 @@ int expec_totalspin int expec_totalSz( struct BindStruct* X, - double complex* vec + double complex** vec ) { X->Large.mode = M_TOTALS; switch (X->Def.iCalcModel) { @@ -693,7 +693,7 @@ int expec_totalSz( void totalSz_HubbardGC ( struct BindStruct* X, - double complex* vec + double complex** vec ) { long unsigned int j; @@ -718,7 +718,7 @@ void totalSz_HubbardGC num1_sz = num1_up - num1_down; #pragma omp parallel for reduction(+:spn_z) default(none) firstprivate(i_max) private(j) shared(num1_sz,vec) for (j = 1; j <= i_max; j++) { - spn_z += (num1_sz) / 2.0 * conj(vec[j]) * vec[j]; + spn_z += (num1_sz) / 2.0 * conj(vec[j][0]) * vec[j][0]; } #endif } @@ -733,7 +733,7 @@ void totalSz_HubbardGC num1_up = ibit1_up / is1_up; ibit1_down = list_1_j & is1_down; num1_down = ibit1_down / is1_down; - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + spn_z += conj(vec[j][0]) * vec[j][0] * (num1_up - num1_down) / 2.0; } } } @@ -753,7 +753,7 @@ void totalSz_HubbardGC void totalSz_SpinGC ( struct BindStruct* X, - double complex* vec + double complex** vec ) { long unsigned int j, list_1_j; long unsigned int isite1; @@ -776,7 +776,7 @@ void totalSz_SpinGC num1_down = 1 - num1_up; #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, is1_up, num1_up, num1_down) shared(vec) for (j = 1; j <= i_max; j++) { - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + spn_z += conj(vec[j][0]) * vec[j][0] * (num1_up - num1_down) / 2.0; } #endif } @@ -788,7 +788,7 @@ void totalSz_SpinGC ibit1_up = list_1_j & is1_up; num1_up = ibit1_up / is1_up; num1_down = 1 - num1_up; - spn_z += conj(vec[j]) * vec[j] * (num1_up - num1_down) / 2.0; + spn_z += conj(vec[j][0]) * vec[j][0] * (num1_up - num1_down) / 2.0; } }//else } @@ -800,14 +800,14 @@ void totalSz_SpinGC spn_z1 = 0.5 * GetLocal2Sz(isite1, (unsigned long int) myrank, X->Def.SiteToBit, X->Def.Tpow); #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(spn_z1, i_max) shared(vec) for (j = 1; j <= i_max; j++) { - spn_z += conj(vec[j]) * vec[j] * spn_z1; + spn_z += conj(vec[j][0]) * vec[j][0] * spn_z1; } } else { #pragma omp parallel for reduction(+: spn_z) default(none) firstprivate(i_max, isite1, X) private(spn_z1) shared(vec) for (j = 1; j <= i_max; j++) { spn_z1 = 0.5 * GetLocal2Sz(isite1, j - 1, X->Def.SiteToBit, X->Def.Tpow); - spn_z += conj(vec[j]) * vec[j] * spn_z1; + spn_z += conj(vec[j][0]) * vec[j][0] * spn_z1; } } }//isite1 diff --git a/src/include/Lanczos_EigenValue.h b/src/include/Lanczos_EigenValue.h index b94075295..ff71e3e81 100644 --- a/src/include/Lanczos_EigenValue.h +++ b/src/include/Lanczos_EigenValue.h @@ -17,11 +17,11 @@ int Lanczos_EigenValue(struct BindStruct *X); int Lanczos_GetTridiagonalMatrixComponents(struct BindStruct *X, double *alpha, double *beta, double complex *_v1, unsigned long int *Lanczos_step); -int ReadInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1, unsigned long int *liLanczosStp_vec); +int ReadInitialVector(struct BindStruct *X, double complex** tmp_v0, double complex **tmp_v1, unsigned long int *liLanczosStp_vec); -int OutputLanczosVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1, unsigned long int liLanczosStp_vec); +int OutputLanczosVector(struct BindStruct *X, double complex** tmp_v0, double complex **tmp_v1, unsigned long int liLanczosStp_vec); -void SetInitialVector(struct BindStruct *X, double complex* tmp_v0, double complex *tmp_v1); +void SetInitialVector(struct BindStruct *X, double complex** tmp_v0, double complex **tmp_v1); int ReadTMComponents( struct BindStruct *X, diff --git a/src/include/MakeIniVec.h b/src/include/MakeIniVec.h index d3d8cfd62..2325a773b 100644 --- a/src/include/MakeIniVec.h +++ b/src/include/MakeIniVec.h @@ -16,6 +16,5 @@ #pragma once #include "Common.h" int MakeIniVec( - int rand_i, struct BindStruct *X ); diff --git a/src/include/expec_cisajscktaltdc.h b/src/include/expec_cisajscktaltdc.h index 476c4bed1..0042ee0a2 100644 --- a/src/include/expec_cisajscktaltdc.h +++ b/src/include/expec_cisajscktaltdc.h @@ -66,5 +66,5 @@ void expec_cisajscktaltdc_alldiag_spin( double complex *vec ); -int expec_Threebody_SpinGCHalf(struct BindStruct *X,int nstate, double complex *vec, FILE **_fp); -int expec_Fourbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex *vec, FILE **_fp); +int expec_Threebody_SpinGCHalf(struct BindStruct *X,int nstate, double complex** Xvec, double complex **vec, double complex** prod); +int expec_Fourbody_SpinGCHalf(struct BindStruct *X, int nstate, double complex** Xvec, double complex** vec, double complex** prod); diff --git a/src/include/expec_totalspin.h b/src/include/expec_totalspin.h index 108cd8fc5..1633224c8 100644 --- a/src/include/expec_totalspin.h +++ b/src/include/expec_totalspin.h @@ -20,5 +20,5 @@ int expec_totalspin(struct BindStruct *X, int nstate, double complex **vec); int expec_totalSz ( struct BindStruct* X, - double complex* vec + double complex**vec ); diff --git a/src/input.c b/src/input.c index a0d551ad9..0a4069eab 100644 --- a/src/input.c +++ b/src/input.c @@ -49,8 +49,8 @@ int inputHam(struct BindStruct *X){ fgetsMPI(ctmp2, sizeof(ctmp2) / sizeof(char), fp); sscanf(ctmp2, "%ld %ld %lf %lf\n", &ham_i, &ham_j, &dHam_re, &dHam_im); - v0[ham_i][ham_j]=dHam_re+I*dHam_im; - v0[ham_j][ham_i]=conj(v0[ham_i][ham_j]); + v0[ham_i][ham_j-1]=dHam_re+I*dHam_im; + v0[ham_j][ham_i-1]=conj(v0[ham_i][ham_j-1]); } fclose(fp); return 0; diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 685cdf297..96bda9638 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -997,21 +997,34 @@ double complex child_GC_CisAitCjuAju_GeneralSpin_MPIsingle( firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, dmv, num1, off) \ shared (tmp_v0, tmp_v1, v1buf,nstate,one) { + if (X->Large.mode == M_MLTPLY || X->Large.mode == M_CALCSPEC) { #pragma omp for - for (j = 1; j <= X->Check.idim_max; j++) { - if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - dmv = tmp_V; - zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); - } - else if (GetOffCompGeneralSpin(j - 1, isite, FinSpin, IniSpin, &off, - X->Def.SiteToBit, X->Def.Tpow) == TRUE) - { - dmv = conj(tmp_V); - zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); - } - }/*for (j = 1; j <= X->Check.idim_max; j++)*/ + for (j = 1; j <= X->Check.idim_max; j++) { + if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, + X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + dmv = tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); + } + else if (GetOffCompGeneralSpin(j - 1, isite, FinSpin, IniSpin, &off, + X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + dmv = conj(tmp_V); + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); + } + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ + } + else { +#pragma omp for + for (j = 1; j <= X->Check.idim_max; j++) { + if (GetOffCompGeneralSpin(j - 1, isite, IniSpin, FinSpin, &off, + X->Def.SiteToBit, X->Def.Tpow) == TRUE) + { + dmv = tmp_V; + zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); + } + }/*for (j = 1; j <= X->Check.idim_max; j++)*/ + } }/*End of parallel region*/ }/*double complex child_GC_CisAitCjuAju_GeneralSpin_MPIsingle*/ /** diff --git a/src/mltplySpin.c b/src/mltplySpin.c index 9fdcdb4a6..66ddf1c26 100644 --- a/src/mltplySpin.c +++ b/src/mltplySpin.c @@ -626,7 +626,7 @@ shared(tmp_v0, tmp_v1,one,nstate) if (X->Def.InterAll_OffDiagonal[i][0] + 1 > X->Def.Nsite && X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { StartTimer(521); - GC_general_int_spin_MPIdouble(i, X, tmp_v0, nstate, tmp_v1); + GC_general_int_spin_MPIdouble(i, X, nstate, tmp_v0, tmp_v1); StopTimer(521); } else if (X->Def.InterAll_OffDiagonal[i][4] + 1 > X->Def.Nsite) { diff --git a/src/output.c b/src/output.c index e3b171dcd..527012db4 100644 --- a/src/output.c +++ b/src/output.c @@ -97,8 +97,8 @@ int outputHam(struct BindStruct *X){ fprintf(fp, "%ld %ld %ld \n", imax, imax, ihermite); for (i=1; i<=imax; i++){ for (j=1; j<=i; j++){ - if(cabs(v0[i][j])>1.0e-13){ - fprintf(fp, "%ld %ld %lf %lf\n",i,j,creal(v0[i][j]),cimag(v0[i][j])); + if(cabs(v0[i][j-1])>1.0e-13){ + fprintf(fp, "%ld %ld %lf %lf\n",i,j,creal(v0[i][j-1]),cimag(v0[i][j-1])); } } } diff --git a/src/readdef.c b/src/readdef.c index a6db77613..ef33190d4 100644 --- a/src/readdef.c +++ b/src/readdef.c @@ -270,10 +270,6 @@ int ReadcalcmodFile( } if(CheckWords(ctmp, "CalcType")==0){ X->iCalcType=itmp; - if (X->iCalcType == Lanczos) { - fprintf(stdoutMPI, " LOBPCG is used alternative to Lanczos.\n"); - X->iCalcType = CG; - } } else if(CheckWords(ctmp, "FlgFiniteTemperature")==0){ X->iFlgFiniteTemperature = itmp; @@ -2030,7 +2026,6 @@ int ReadDefFileIdxPara( } }/*for (i = 0; i < X->NPairExcitationOperator[idx]; ++i)*/ idx++; - printf("debug %d %d %d\n", idx, X->NNPairExcitationOperator, X->NPairExcitationOperator[0]); } if (idx != X->NNPairExcitationOperator) { fclose(fp); diff --git a/src/xsetmem.c b/src/xsetmem.c index 897368239..708ab3798 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -179,7 +179,7 @@ int setmem_large else if (X->Def.iCalcType == CG) { nstate = X->Def.k_exct; } - else if (X->Def.iCalcType == TPQCalc) { + else if (X->Def.iCalcType == TPQCalc || X->Def.iCalcType == cTPQ) { nstate = NumAve; } else { @@ -189,9 +189,9 @@ int setmem_large v1 = cd_2d_allocate(X->Check.idim_max + 1, nstate); if (X->Def.iCalcType == TimeEvolution || X->Def.iCalcType == cTPQ) { - v2 = cd_1d_allocate(X->Check.idim_max + 1); + v2 = cd_2d_allocate(X->Check.idim_max + 1, nstate); } else { - v2 = cd_1d_allocate(1); + v2 = cd_2d_allocate(1, 1); } if (X->Def.iCalcType == TPQCalc || X->Def.iCalcType == cTPQ) { @@ -199,6 +199,8 @@ int setmem_large } else { vg = cd_2d_allocate(X->Check.idim_max + 1, nstate); } + alpha = d_1d_allocate(X->Def.Lanczos_max + 1); + beta = d_1d_allocate(X->Def.Lanczos_max + 1); if (X->Def.iCalcType == TPQCalc || X->Def.iCalcType == cTPQ || X->Def.iFlgCalcSpec != CALCSPEC_NOT) { vec = cd_2d_allocate(X->Def.Lanczos_max + 1, X->Def.Lanczos_max + 1); diff --git a/test/lobcg_genspin_ladder.sh b/test/lobcg_genspin_ladder.sh index f589ed637..9364eaff4 100755 --- a/test/lobcg_genspin_ladder.sh +++ b/test/lobcg_genspin_ladder.sh @@ -44,14 +44,14 @@ test "${diff}" = "0.000000" # Check one-body G cat > reference.dat < paste2.dat diff=`awk ' @@ -63,14 +63,14 @@ echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste3.dat diff=`awk ' @@ -84,534 +84,534 @@ test "${diff}" = "0.000000" # Check two-body G cat > reference.dat < paste5.dat diff=`awk ' @@ -623,534 +623,534 @@ echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste6.dat diff=`awk ' diff --git a/test/lobcg_kondo_chain.sh b/test/lobcg_kondo_chain.sh index 0aed0fbe7..dd233ab1d 100755 --- a/test/lobcg_kondo_chain.sh +++ b/test/lobcg_kondo_chain.sh @@ -42,32 +42,32 @@ test "${diff}" = "0.000000" # Check one-body G cat > reference.dat < paste2.dat diff=`awk ' @@ -79,32 +79,32 @@ echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste3.dat diff=`awk ' @@ -118,102 +118,102 @@ test "${diff}" = "0.000000" # Check two-body G cat > reference.dat < paste5.dat diff=`awk ' @@ -225,102 +225,102 @@ echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste6.dat diff=`awk ' diff --git a/test/lobcg_kondogc_chain.sh b/test/lobcg_kondogc_chain.sh index ae0347588..11a2474ed 100755 --- a/test/lobcg_kondogc_chain.sh +++ b/test/lobcg_kondogc_chain.sh @@ -47,32 +47,32 @@ test "${diff}" = "0.000000" # Check one-body G cat > reference.dat < paste2.dat diff=`awk ' @@ -84,32 +84,32 @@ echo "Diff output/zvo_cisajs_eigen0.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste3.dat diff=`awk ' @@ -121,32 +121,32 @@ echo "Diff output/zvo_cisajs_eigen1.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste4.dat diff=`awk ' @@ -160,102 +160,102 @@ test "${diff}" = "0.000000" # Check two-body G cat > reference.dat < paste5.dat diff=`awk ' @@ -267,102 +267,102 @@ echo "Diff output/zvo_cisajscktalt_eigen0.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste6.dat diff=`awk ' @@ -374,102 +374,102 @@ echo "Diff output/zvo_cisajscktalt_eigen1.dat : " ${diff} test "${diff}" = "0.000000" cat > reference.dat < paste7.dat diff=`awk ' diff --git a/test/spectrum_spin_kagome.sh b/test/spectrum_spin_kagome.sh index ad55f842a..b1768dde0 100755 --- a/test/spectrum_spin_kagome.sh +++ b/test/spectrum_spin_kagome.sh @@ -58,7 +58,7 @@ J0 = 1.0 J1 = 0.5 J2 = 0.7 J'=0.2 -2Sz = 1 +2Sz = -1 SpectrumQW = 0.5 SpectrumQL = 0.5 NOmega = 5 @@ -99,7 +99,7 @@ J0 = 1.0 J1 = 0.5 J2 = 0.7 J'=0.2 -2Sz = 1 +2Sz = -1 SpectrumQW = 0.5 SpectrumQL = 0.5 NOmega = 5 diff --git a/test/te_hubbard_chain_interall.sh b/test/te_hubbard_chain_interall.sh index d78f5b5e6..4566d04ef 100755 --- a/test/te_hubbard_chain_interall.sh +++ b/test/te_hubbard_chain_interall.sh @@ -2,7 +2,7 @@ mkdir -p te_hubbard_chain_interall/ cd te_hubbard_chain_interall -python "$1/test/testTECalc.py" -p "../../src/HPhi" -mpi "${MPIRUN}" +python3 "$1/test/testTECalc.py" -p "../../src/HPhi" -mpi "${MPIRUN}" # Check value: flct cat > reference.dat < reference.dat < reference.dat < reference.dat < Date: Tue, 18 Oct 2022 16:56:55 +0900 Subject: [PATCH 36/50] Previous commit was failed in macos because of absence of inclusion. --- src/HPhiMain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/HPhiMain.c b/src/HPhiMain.c index 67d9fba0a..eec26380f 100644 --- a/src/HPhiMain.c +++ b/src/HPhiMain.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include From 69febe0e754b36f85aa538f6c2ba6335d791236d Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Wed, 19 Oct 2022 00:44:46 +0900 Subject: [PATCH 37/50] FullDiag by ScaLAPACK did not work. --- src/include/matrixlapack.h | 2 +- src/include/matrixscalapack.h | 2 +- src/lapack_diag.c | 4 +-- src/matrixscalapack.c | 2 +- src/phys.c | 44 +++---------------------------- src/vec12.c | 1 + test/fulldiag_genspin_ladder.sh | 1 + test/fulldiag_genspingc_ladder.sh | 1 + test/fulldiag_ham_io.sh | 2 ++ test/fulldiag_hubbard_chain.sh | 1 + test/fulldiag_hubbardgc_tri.sh | 1 + test/fulldiag_kondo_chain.sh | 1 + test/fulldiag_kondogc_chain.sh | 1 + test/fulldiag_spin_tri.sh | 1 + test/fulldiag_spingc_tri.sh | 1 + 15 files changed, 19 insertions(+), 46 deletions(-) diff --git a/src/include/matrixlapack.h b/src/include/matrixlapack.h index e26ad0b91..64475c48f 100644 --- a/src/include/matrixlapack.h +++ b/src/include/matrixlapack.h @@ -33,5 +33,5 @@ #include int ZHEEVall(int xNsize, double complex **A, double *r,double complex **vec); - +int DSEVvector(int xNsize, double** A, double* r, double** vec); #endif diff --git a/src/include/matrixscalapack.h b/src/include/matrixscalapack.h index f9e3f1240..05c8a6389 100644 --- a/src/include/matrixscalapack.h +++ b/src/include/matrixscalapack.h @@ -54,7 +54,7 @@ long int *GetMatElementInRank(long int i, long int j, long int nprow, long int n void DivMat(long int m, long int n, double complex Aorgmn, double complex *A, int *desca); void GetEigenVector(long int i, long int m, double complex *Z, int *descZ, double complex *vec); int diag_scalapack_cmp(long int xNsize, double complex **A, - double complex *r, double complex *Z, int *descZ); + double *r, double complex *Z, int *descZ); extern int use_scalapack; #endif diff --git a/src/lapack_diag.c b/src/lapack_diag.c index fc7424dcc..781d48eb9 100644 --- a/src/lapack_diag.c +++ b/src/lapack_diag.c @@ -71,9 +71,9 @@ struct BindStruct *X//!<[inout] mp = numroc_(&xMsize, &mb, &myrow, &i_zero, &nprow); nq = numroc_(&xMsize, &mb, &mycol, &i_zero, &npcol); Z_vec = malloc(mp*nq*sizeof(complex double)); - diag_scalapack_cmp(xMsize, Ham, v0, Z_vec, descZ_vec); + diag_scalapack_cmp(xMsize, v0, X->Phys.energy, Z_vec, descZ_vec); } else { - ZHEEVall(xMsize, Ham, v0, v1); + ZHEEVall(xMsize, v0, X->Phys.energy, v1); } #else ZHEEVall(xMsize, v0, X->Phys.energy, v1); diff --git a/src/matrixscalapack.c b/src/matrixscalapack.c index cbc526e41..16bf75f4c 100644 --- a/src/matrixscalapack.c +++ b/src/matrixscalapack.c @@ -198,7 +198,7 @@ void GetEigenVector(long int i, long int m, double complex *Z, int *descZ, doubl * @author Yusuke Konishi (Academeia Co., Ltd.) */ int diag_scalapack_cmp(long int xNsize, double complex **A, - double complex *r, double complex *Z, int *descZ) { + double *r, double complex *Z, int *descZ) { const int i_one=1, i_zero=0; const long int i_negone=-1; const double zero=0.0, one=1.0; diff --git a/src/phys.c b/src/phys.c index 023c3ee80..9f781abac 100644 --- a/src/phys.c +++ b/src/phys.c @@ -62,34 +62,24 @@ void phys(struct BindStruct *X, //!<[inout] vec_tmp = malloc(i_max*sizeof(double complex)); } for (i = 0; i < neig; i++) { - for (j = 0; j < i_max; j++) { - v0[j + 1] = 0.0; - } if (use_scalapack) { MPI_Comm_rank(MPI_COMM_WORLD, &rank); GetEigenVector(i, i_max, Z_vec, descZ_vec, vec_tmp); if (rank == 0) { for (j = 0; j < i_max; j++) { - v0[j + 1] = vec_tmp[j]; + v0[j + 1][i] = vec_tmp[j]; } } else { for (j = 0; j < i_max; j++) { - v0[j + 1] = 0.0; + v0[j + 1][i] = 0.0; } } } else { if (X->Def.iCalcType == FullDiag) { - if (myrank == 0) { - for (j = 0; j < i_max; j++) { - v0[j + 1] = v1[i][j]; - } - } - } - else { for (j = 0; j < i_max; j++) { - v0[j + 1] = v1[i][j]; + v0[j + 1][i] = v1[j][i]; } } } @@ -109,29 +99,12 @@ void phys(struct BindStruct *X, //!<[inout] exitMPI(-1); } -#ifdef _SCALAPACK - if (use_scalapack) { - if (X->Def.iCalcType == FullDiag) { - X->Phys.s2 = 0.0; - X->Phys.Sz = 0.0; - } - } - else { - if (X->Def.iCalcType == FullDiag) { - if (expec_totalspin(X, v1) != 0) { - fprintf(stderr, "Error: calc TotalSpin.\n"); - exitMPI(-1); - } - } - } -#else if (X->Def.iCalcType == FullDiag) { if (expec_totalspin(X, neig, v1) != 0) { fprintf(stderr, "Error: calc TotalSpin.\n"); exitMPI(-1); } } -#endif for (i = 0; i < neig; i++) { if (X->Def.iCalcModel == Spin || X->Def.iCalcModel == SpinGC) { @@ -141,19 +114,8 @@ void phys(struct BindStruct *X, //!<[inout] tmp_N = X->Phys.num_up[i] + X->Phys.num_down[i]; } if (X->Def.iCalcType == FullDiag) { -#ifdef _SCALAPACK - if (use_scalapack) { - fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf Doublon=%10lf \n", i, X->Phys.energy, tmp_N, - X->Phys.Sz, X->Phys.doublon); - } - else { - fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf S2=%10lf Doublon=%10lf \n", i, X->Phys.energy, tmp_N, - X->Phys.Sz, X->Phys.s2, X->Phys.doublon); - } -#else fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf S2=%10lf Doublon=%10lf \n", i, X->Phys.energy[i], tmp_N, X->Phys.Sz[i], X->Phys.s2[i], X->Phys.doublon[i]); -#endif } else if (X->Def.iCalcType == CG) fprintf(stdoutMPI, "i=%5ld Energy=%10lf N=%10lf Sz=%10lf Doublon=%10lf \n", diff --git a/src/vec12.c b/src/vec12.c index 46316e749..7e33c1592 100644 --- a/src/vec12.c +++ b/src/vec12.c @@ -22,6 +22,7 @@ into ::vec #include "wrapperMPI.h" #include "common/setmemory.h" #include "xsetmem.h" +#include "matrixlapack.h" /** @brief Diagonalize a tri-diagonal matrix and store eigenvectors into ::vec diff --git a/test/fulldiag_genspin_ladder.sh b/test/fulldiag_genspin_ladder.sh index 63c9cd18d..690d82759 100755 --- a/test/fulldiag_genspin_ladder.sh +++ b/test/fulldiag_genspin_ladder.sh @@ -13,6 +13,7 @@ J0 = 1.0 J1 = 1.0 2S = 3 2Sz = 0 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in diff --git a/test/fulldiag_genspingc_ladder.sh b/test/fulldiag_genspingc_ladder.sh index 33c7f54ee..f3c40b65b 100755 --- a/test/fulldiag_genspingc_ladder.sh +++ b/test/fulldiag_genspingc_ladder.sh @@ -12,6 +12,7 @@ lattice = "ladder" J0 = 1.0 J1 = 1.0 2S = 3 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in diff --git a/test/fulldiag_ham_io.sh b/test/fulldiag_ham_io.sh index b3c776116..d44a73d82 100755 --- a/test/fulldiag_ham_io.sh +++ b/test/fulldiag_ham_io.sh @@ -13,6 +13,7 @@ U = 4.0 nelec = 4 2Sz = 0 HamIO = "out" +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan1.in @@ -27,6 +28,7 @@ U = 4.0 nelec = 4 2Sz = 0 HamIO = "in" +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan2.in diff --git a/test/fulldiag_hubbard_chain.sh b/test/fulldiag_hubbard_chain.sh index 970eaedea..97e1b9401 100755 --- a/test/fulldiag_hubbard_chain.sh +++ b/test/fulldiag_hubbard_chain.sh @@ -12,6 +12,7 @@ t = 1.0 U = 4.0 nelec = 4 2Sz = 0 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in diff --git a/test/fulldiag_hubbardgc_tri.sh b/test/fulldiag_hubbardgc_tri.sh index c1462d204..817b792cd 100755 --- a/test/fulldiag_hubbardgc_tri.sh +++ b/test/fulldiag_hubbardgc_tri.sh @@ -13,6 +13,7 @@ method = "FullDiag" lattice = "Triangular" t = 1.0 U = 4.0 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in diff --git a/test/fulldiag_kondo_chain.sh b/test/fulldiag_kondo_chain.sh index a34985e1e..006303132 100755 --- a/test/fulldiag_kondo_chain.sh +++ b/test/fulldiag_kondo_chain.sh @@ -12,6 +12,7 @@ t = 1.0 J = 4.0 nelec = 3 2Sz = 0 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in diff --git a/test/fulldiag_kondogc_chain.sh b/test/fulldiag_kondogc_chain.sh index 3b19050b4..20aa41c28 100755 --- a/test/fulldiag_kondogc_chain.sh +++ b/test/fulldiag_kondogc_chain.sh @@ -10,6 +10,7 @@ method = "FullDiag" lattice = "chain" t = 1.0 J = 4.0 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in diff --git a/test/fulldiag_spin_tri.sh b/test/fulldiag_spin_tri.sh index 76b912b5a..fed4c9a7c 100755 --- a/test/fulldiag_spin_tri.sh +++ b/test/fulldiag_spin_tri.sh @@ -13,6 +13,7 @@ method = "FullDiag" lattice = "triangular" J = 1.0 2Sz = 0 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in diff --git a/test/fulldiag_spingc_tri.sh b/test/fulldiag_spingc_tri.sh index 83da4dde1..e9a044236 100755 --- a/test/fulldiag_spingc_tri.sh +++ b/test/fulldiag_spingc_tri.sh @@ -12,6 +12,7 @@ model = "SpinGC" method = "FullDiag" lattice = "triangular" J = 1.0 +ScaLAPACK = 1 EOF ${MPIRUNFC} ../../src/HPhi -s stan.in From 8c598eeb7a4fd885b478a576861b702226396f27 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Thu, 20 Oct 2022 21:27:58 +0900 Subject: [PATCH 38/50] Precious verion was not build in macos because of incomplete declearation. Adopt the warning message by -Wall of gcc (delete unused variables and arguments. --- src/CalcByCanonicalTPQ.c | 3 +- src/CalcSpectrumByBiCG.c | 2 +- src/CalcSpectrumByLanczos.c | 2 +- src/CalcSpectrumByTPQ.c | 6 +- src/FirstMultiply.c | 4 +- src/Lanczos_EigenValue.c | 7 +- src/MakeIniVec.c | 2 - src/PairExHubbard.c | 2 +- src/PairExSpin.c | 12 +- src/StdFace | 2 +- src/diagonalcalc.c | 40 ++--- src/eigenIO.c | 19 ++- src/expec_cisajs.c | 8 +- src/expec_cisajscktaltdc.c | 256 ++++++++++------------------ src/expec_totalspin.c | 77 ++++----- src/include/CalcSpectrumByLanczos.h | 2 +- src/include/CalcSpectrumByTPQ.h | 2 +- src/include/Common.h | 2 +- src/include/Lanczos_EigenValue.h | 2 +- src/include/eigenIO.h | 12 +- src/include/log.h | 1 + src/include/matrixlapack.h | 1 + src/include/mltplyHubbard.h | 17 +- src/include/mltplyHubbardCore.h | 41 ++--- src/include/mltplyMPIHubbard.h | 8 +- src/include/mltplyMPIHubbardCore.h | 34 ++-- src/include/mltplyMPISpin.h | 6 +- src/include/mltplyMPISpinCore.h | 58 +++---- src/include/mltplySpin.h | 10 +- src/include/mltplySpinCore.h | 28 ++- src/include/struct.h | 4 +- src/include/xsetmem.h | 3 +- src/lapack_diag.c | 2 +- src/matrixlapack.c | 1 + src/matrixscalapack.c | 3 +- src/mltplyHubbard.c | 58 +++---- src/mltplyHubbardCore.c | 72 ++++---- src/mltplyMPIHubbard.c | 12 +- src/mltplyMPIHubbardCore.c | 70 ++++---- src/mltplyMPISpin.c | 8 +- src/mltplyMPISpinCore.c | 90 +++++----- src/mltplySpin.c | 44 ++--- src/mltplySpinCore.c | 52 +++--- src/phys.c | 2 +- src/xsetmem.c | 2 + 45 files changed, 475 insertions(+), 614 deletions(-) diff --git a/src/CalcByCanonicalTPQ.c b/src/CalcByCanonicalTPQ.c index a63c0ef7c..0971f51d8 100644 --- a/src/CalcByCanonicalTPQ.c +++ b/src/CalcByCanonicalTPQ.c @@ -61,7 +61,7 @@ int CalcByCanonicalTPQ( unsigned long int i_max; int step_iO = 0; FILE* fp; - double inv_temp, Ns, delta_tau; + double inv_temp, delta_tau; struct TimeKeepStruct tstruct; size_t byte_size; @@ -81,7 +81,6 @@ int CalcByCanonicalTPQ( X->Bind.Def.St = 0; fprintf(stdoutMPI, "%s", cLogTPQ_Start); - Ns = 1.0 * X->Bind.Def.NsiteMPI; //fprintf(stdoutMPI, cLogTPQRand, rand_i+1, rand_max); iret = 0; //X->Bind.Def.irand = rand_i; diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index d413861d5..b0834a69d 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -177,7 +177,7 @@ int CalcSpectrumByBiCG( unsigned long int idim, i_max; FILE *fp; size_t byte_size; - int idcSpectrum, iret; + int idcSpectrum; unsigned long int liLanczosStp_vec = 0; double complex **vL, **v12, **v14, *res_proj; int stp, status[3], iomega; diff --git a/src/CalcSpectrumByLanczos.c b/src/CalcSpectrumByLanczos.c index 24fad869d..f4f1e1cac 100644 --- a/src/CalcSpectrumByLanczos.c +++ b/src/CalcSpectrumByLanczos.c @@ -42,7 +42,7 @@ /// \author Kazuyoshi Yoshimi (The University of Tokyo) int CalcSpectrumByLanczos( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double dnorm, int Nomega, double complex *dcSpectrum, diff --git a/src/CalcSpectrumByTPQ.c b/src/CalcSpectrumByTPQ.c index 5afbc993a..f291a84a4 100644 --- a/src/CalcSpectrumByTPQ.c +++ b/src/CalcSpectrumByTPQ.c @@ -90,7 +90,7 @@ int ReadTPQData( /// \retval FALSE fail to calculate spectrum. /// \retval TRUE sucsceed to calculate spectrum. int GetCalcSpectrumTPQ(double complex dcomega, double dtemp, double dspecificheat, - double ene, double *tmp_E, int Nsite, int idim_max, double complex * dc_tmpSpec) + double ene, double *tmp_E, int idim_max, double complex * dc_tmpSpec) { int l; double tmp_dcSpec; @@ -129,7 +129,7 @@ int GetCalcSpectrumTPQ(double complex dcomega, double dtemp, double dspecifichea /// \author Kazuyoshi Yoshimi (The University of Tokyo) int CalcSpectrumByTPQ( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double dnorm, int Nomega, double complex *dcSpectrum, @@ -239,7 +239,7 @@ int CalcSpectrumByTPQ( TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumFromTridiagonalStart, "a"); for( i = 0 ; i < Nomega; i++) { dctmp_Spectrum=0; - iret = GetCalcSpectrumTPQ(dcomega[i], dtemp, dspecificHeat, dene, tmp_E, X->Bind.Def.Nsite, stp, &dctmp_Spectrum); + iret = GetCalcSpectrumTPQ(dcomega[i], dtemp, dspecificHeat, dene, tmp_E, stp, &dctmp_Spectrum); if (iret != TRUE) { //ReAlloc alpha, beta and Set alpha_start and beta_start in Lanczos_EigenValue return FALSE; diff --git a/src/FirstMultiply.c b/src/FirstMultiply.c index a60bc7a33..c7951bc4e 100644 --- a/src/FirstMultiply.c +++ b/src/FirstMultiply.c @@ -45,9 +45,7 @@ int FirstMultiply(struct BindStruct *X) { long int i, i_max; double complex dnorm; double Ns; - long unsigned int u_long_i; - dsfmt_t dsfmt; - int mythread, rand_i, iret; + int rand_i, iret; Ns = 1.0*X->Def.NsiteMPI; i_max = X->Check.idim_max; diff --git a/src/Lanczos_EigenValue.c b/src/Lanczos_EigenValue.c index a06e3e124..3b4d155c6 100644 --- a/src/Lanczos_EigenValue.c +++ b/src/Lanczos_EigenValue.c @@ -24,6 +24,7 @@ #include "Lanczos_EigenValue.h" #include "wrapperMPI.h" #include "CalcTime.h" +#include "matrixlapack.h" /** * @file Lanczos_EigenValue.c @@ -325,7 +326,7 @@ int Lanczos_GetTridiagonalMatrixComponents( struct BindStruct *X, double *_alpha, double *_beta, - double complex *tmp_v1, + double complex **tmp_v1, unsigned long int *liLanczos_step ) { @@ -353,12 +354,12 @@ int Lanczos_GetTridiagonalMatrixComponents( #pragma omp parallel for default(none) private(i) shared(v0, v1, tmp_v1) firstprivate(i_max) for (i = 1; i <= i_max; i++) { v0[i][0] = 0.0; - v1[i][0] = tmp_v1[i]; + v1[i][0] = tmp_v1[i][0]; } stp = 0; mltply(X, 1, v0, tmp_v1); TimeKeeperWithStep(X, cFileNameTimeKeep, c_Lanczos_SpectrumStep, "a", stp); - alpha1 = creal(VecProdMPI(i_max, tmp_v1, &v0[0][0]));// alpha = v^{\dag}*H*v + alpha1 = creal(VecProdMPI(i_max, &tmp_v1[0][0], &v0[0][0]));// alpha = v^{\dag}*H*v _alpha[1] = alpha1; cbeta1 = 0.0; fprintf(stdoutMPI, " Step / Step_max alpha beta \n"); diff --git a/src/MakeIniVec.c b/src/MakeIniVec.c index ed1c594db..1649d267a 100644 --- a/src/MakeIniVec.c +++ b/src/MakeIniVec.c @@ -42,14 +42,12 @@ int MakeIniVec(struct BindStruct *X) { long int i, i_max; double complex dnorm; - double Ns; long unsigned int u_long_i; dsfmt_t dsfmt; int mythread, rand_i; double rand_X,rand_Y; double complex rand_Z1,rand_Z2; - Ns = 1.0*X->Def.NsiteMPI; i_max = X->Check.idim_max; for (rand_i = 0; rand_i < NumAve; rand_i++) { diff --git a/src/PairExHubbard.c b/src/PairExHubbard.c index 15ab1e937..bb3d1beb2 100644 --- a/src/PairExHubbard.c +++ b/src/PairExHubbard.c @@ -108,7 +108,7 @@ int GetPairExcitedStateHubbardGC( #pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isite1, tmp_trans) shared(tmp_v0,tmp_v1,nstate) for (j = 1; j <= i_max; j++) { - GC_AisCis(j, nstate, tmp_v0, tmp_v1, X, isite1, -tmp_trans); + GC_AisCis(j, nstate, tmp_v0, tmp_v1, isite1, -tmp_trans); } } else { diff --git a/src/PairExSpin.c b/src/PairExSpin.c index f4b60abae..ab7922bd8 100644 --- a/src/PairExSpin.c +++ b/src/PairExSpin.c @@ -78,7 +78,7 @@ int GetPairExcitedStateHalfSpinGC( #pragma omp parallel for default(none) private(j, tmp_sgn,dmv) \ firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(one,nstate,tmp_v0, tmp_v1) for (j = 1; j <= i_max; j++) { - dmv = (1.0 - child_SpinGC_CisAis(j, X, isite1, org_sigma1))* (-tmp_trans); + dmv = (1.0 - child_SpinGC_CisAis(j, isite1, org_sigma1))* (-tmp_trans); zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } @@ -87,7 +87,7 @@ int GetPairExcitedStateHalfSpinGC( #pragma omp parallel for default(none) private(j, tmp_sgn,dmv) \ firstprivate(i_max, isite1, org_sigma1, X,tmp_trans) shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { - dmv = child_SpinGC_CisAis(j, X, isite1, org_sigma1)* tmp_trans; + dmv = child_SpinGC_CisAis(j, isite1, org_sigma1)* tmp_trans; zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } @@ -98,7 +98,7 @@ int GetPairExcitedStateHalfSpinGC( #pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off,dmv) \ firstprivate(i_max, isite1, org_sigma2, X, tmp_trans) shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { - tmp_sgn = child_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); + tmp_sgn = child_SpinGC_CisAit(j, isite1, org_sigma2, &tmp_off); if (tmp_sgn != 0) { dmv = (double complex)tmp_sgn * tmp_trans; zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[tmp_off + 1], &one); @@ -267,7 +267,7 @@ int GetPairExcitedStateHalfSpin( if (org_isite1 == org_isite2) { if (org_isite1 > X->Def.Nsite) { is1_up = X->Def.Tpow[org_isite1 - 1]; - ibit1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, org_sigma1); + ibit1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, is1_up, org_sigma1); if (X->Def.PairExcitationOperator[iEx][i][4] == 0) { if (ibit1 == 0) { dmv = -tmp_trans; @@ -294,7 +294,7 @@ int GetPairExcitedStateHalfSpin( #pragma omp parallel for default(none) private(j,dmv) \ firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { - dmv = (1.0 - child_Spin_CisAis(j, X, isite1, org_sigma1)) * (-tmp_trans); + dmv = (1.0 - child_Spin_CisAis(j, isite1, org_sigma1)) * (-tmp_trans); zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } @@ -302,7 +302,7 @@ firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstat #pragma omp parallel for default(none) private(j,dmv) \ firstprivate(i_max,isite1,org_sigma1,X,tmp_trans) shared(tmp_v0,tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { - dmv = child_Spin_CisAis(j, X, isite1, org_sigma1) * tmp_trans; + dmv = child_Spin_CisAis(j, isite1, org_sigma1) * tmp_trans; zaxpy_(&nstate, &dmv, tmp_v1[j], &one, tmp_v0[j], &one); } } diff --git a/src/StdFace b/src/StdFace index 78c128e51..73721cd3e 160000 --- a/src/StdFace +++ b/src/StdFace @@ -1 +1 @@ -Subproject commit 78c128e515331d183b3d76e3eca6481bdf0f2121 +Subproject commit 73721cd3e0dfb9e48995a15e5616849ffcd8900c diff --git a/src/diagonalcalc.c b/src/diagonalcalc.c index e5fb6df28..a95e29057 100644 --- a/src/diagonalcalc.c +++ b/src/diagonalcalc.c @@ -114,8 +114,8 @@ int SetDiagonalTEInterAll( if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; - num1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, isigma1); - num2 = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, is2_up, isigma2); + num1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, is1_up, isigma1); + num2 = child_SpinGC_CisAis((unsigned long int) myrank + 1, is2_up, isigma2); }/*if (X->Def.iFlgGeneralSpin == FALSE)*/ else {//start:generalspin num1 = BitCheckGeneral((unsigned long int) myrank, isite1, isigma1, @@ -190,13 +190,13 @@ firstprivate(i_max, dtmp_V) private(j) if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, isigma2); if (num2 != 0) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1)\ firstprivate(i_max, dtmp_V, is1_up, isigma1, X) private(num1, j) for (j = 1; j <= i_max; j++) { - num1 = child_SpinGC_CisAis(j, X, is1_up, isigma1); + num1 = child_SpinGC_CisAis(j, is1_up, isigma1); tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; } } @@ -221,13 +221,13 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, isigma2); if (num2 != 0) { #pragma omp parallel for default(none) shared(tmp_v0, tmp_v1) \ firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) for (j = 1; j <= i_max; j++) { - num1 = child_Spin_CisAis(j, X, is1_up, isigma1); + num1 = child_Spin_CisAis(j, is1_up, isigma1); tmp_v0[j] += dtmp_V * num1 * tmp_v1[j]; } } @@ -301,8 +301,8 @@ private(num1, ibit1_spin, num2, ibit2_spin) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) \ private(j, num1, num2) for (j = 1; j <= i_max; j++) { - num1 = child_Spin_CisAis(j, X, is1_up, isigma1); - num2 = child_Spin_CisAis(j, X, is2_up, isigma2); + num1 = child_Spin_CisAis(j, is1_up, isigma1); + num2 = child_Spin_CisAis(j, is2_up, isigma2); tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; } } @@ -329,8 +329,8 @@ private(j, num1) shared(tmp_v0, tmp_v1) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) \ private(j, num1, num2) for (j = 1; j <= i_max; j++) { - num1 = child_SpinGC_CisAis(j, X, is1_up, isigma1); - num2 = child_SpinGC_CisAis(j, X, is2_up, isigma2); + num1 = child_SpinGC_CisAis(j, is1_up, isigma1); + num2 = child_SpinGC_CisAis(j, is2_up, isigma2); tmp_v0[j] += dtmp_V * num1*num2*tmp_v1[j]; } } @@ -1673,8 +1673,8 @@ firstprivate(i_max, dtmp_V, num2, num1) private(ibit1_spin, j) if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; - num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, isigma1); - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is1_up, isigma1); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, isigma2); #pragma omp parallel for default(none) shared(list_Diagonal) \ firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num1, num2) private(j) @@ -1754,12 +1754,12 @@ firstprivate(i_max, dtmp_V, is1_spin, num2) private(num1, ibit1_spin, j) if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, isigma2); #pragma omp parallel for default(none) shared(list_Diagonal) \ firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) for (j = 1; j <= i_max; j++) { - num1 = child_SpinGC_CisAis(j, X, is1_up, isigma1); + num1 = child_SpinGC_CisAis(j, is1_up, isigma1); list_Diagonal[j] += num1*num2*dtmp_V; } }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ @@ -1783,12 +1783,12 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) if (X->Def.iFlgGeneralSpin == FALSE) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, isigma2); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, isigma2); #pragma omp parallel for default(none) shared(list_Diagonal) \ firstprivate(i_max, dtmp_V, is1_up, isigma1, X, num2) private(j, num1) for (j = 1; j <= i_max; j++) { - num1 = child_Spin_CisAis(j, X, is1_up, isigma1); + num1 = child_Spin_CisAis(j, is1_up, isigma1); list_Diagonal[j] += num1 * num2*dtmp_V; } }/* if (X->Def.iFlgGeneralSpin == FALSE)*/ @@ -1857,8 +1857,8 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) is2_up = X->Def.Tpow[isite2 - 1]; #pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) for (j = 1; j <= i_max; j++) { - num1 = child_Spin_CisAis(j, X, is1_up, isigma1); - num2 = child_Spin_CisAis(j, X, is2_up, isigma2); + num1 = child_Spin_CisAis(j, is1_up, isigma1); + num2 = child_Spin_CisAis(j, is2_up, isigma2); list_Diagonal[j] += num1 * num2*dtmp_V; } } @@ -1881,8 +1881,8 @@ firstprivate(i_max, dtmp_V, isite1, isigma1, X) private(j, num1) is2_up = X->Def.Tpow[isite2 - 1]; #pragma omp parallel for default(none) shared(list_Diagonal) firstprivate(i_max, dtmp_V, is1_up, is2_up, isigma1, isigma2, X) private(j, num1, num2) for (j = 1; j <= i_max; j++) { - num1 = child_SpinGC_CisAis(j, X, is1_up, isigma1); - num2 = child_SpinGC_CisAis(j, X, is2_up, isigma2); + num1 = child_SpinGC_CisAis(j, is1_up, isigma1); + num2 = child_SpinGC_CisAis(j, is2_up, isigma2); list_Diagonal[j] += num1 * num2*dtmp_V; } } diff --git a/src/eigenIO.c b/src/eigenIO.c index 68510b785..af687ffff 100644 --- a/src/eigenIO.c +++ b/src/eigenIO.c @@ -69,7 +69,7 @@ int OutputCmpEigenValue(int xNsize, complex double *ene, char *filename) { return 0; } - +/* int OutputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc, char *filename) { FILE *fp = NULL; @@ -84,7 +84,8 @@ int OutputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc return 0; } - +*/ +/* int OutputCmpEigenVec(int xNsize, const int nene, complex double **vec, const int nproc, char *filename) { FILE *fp = NULL; @@ -99,7 +100,8 @@ int OutputCmpEigenVec(int xNsize, const int nene, complex double **vec, const in return 0; } - +*/ +/* int InputRealEigenValue(int xNsize, double *ene, char *filename) { FILE *fp = NULL; @@ -114,7 +116,8 @@ int InputRealEigenValue(int xNsize, double *ene, char *filename) { return 0; } - +*/ +/* int InputCmpEigenValue(int xNsize, complex double *ene, char *filename) { FILE *fp = NULL; fp = fopen(filename, "rb+"); @@ -128,7 +131,8 @@ int InputCmpEigenValue(int xNsize, complex double *ene, char *filename) { return 0; } - +*/ +/* int InputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc, char *filename) { FILE *fp = NULL; @@ -143,7 +147,8 @@ int InputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc, return 0; } - +*/ +/* int InputCmpEigenVec(int xNsize, const int nene, complex double **vec, const int nproc, char *filename) { FILE *fp = NULL; @@ -158,4 +163,4 @@ int InputCmpEigenVec(int xNsize, const int nene, complex double **vec, const int return 0; } - +*/ diff --git a/src/expec_cisajs.c b/src/expec_cisajs.c index c5ab683fa..accfc2954 100644 --- a/src/expec_cisajs.c +++ b/src/expec_cisajs.c @@ -252,7 +252,7 @@ int expec_cisajs_SpinHalf( if (org_isite1 == org_isite2) { if (org_isite1 > X->Def.Nsite) { is1_up = X->Def.Tpow[org_isite1 - 1]; - ibit1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); + ibit1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is1_up, org_sigma1); if (ibit1 != 0) { zaxpy_long(i_max*nstate, 1.0, &vec[1][0], &Xvec[1][0]); } @@ -262,7 +262,7 @@ int expec_cisajs_SpinHalf( #pragma omp parallel for default(none) private(j,dmv) \ firstprivate(i_max, isite1, org_sigma1, X) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { - dmv = child_Spin_CisAis(j, X, isite1, org_sigma1); + dmv = child_Spin_CisAis(j, isite1, org_sigma1); zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } } @@ -379,7 +379,7 @@ int expec_cisajs_SpinGCHalf( #pragma omp parallel for default(none) private(j, tmp_sgn,dmv) \ firstprivate(i_max, isite1, org_sigma1, X) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { - dmv = child_SpinGC_CisAis(j, X, isite1, org_sigma1); + dmv = child_SpinGC_CisAis(j, isite1, org_sigma1); zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } } @@ -388,7 +388,7 @@ int expec_cisajs_SpinGCHalf( #pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off,dmv) \ firstprivate(i_max, isite1, org_sigma2, X) shared(vec,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { - tmp_sgn = child_SpinGC_CisAit(j, X, isite1, org_sigma2, &tmp_off); + tmp_sgn = child_SpinGC_CisAit(j, isite1, org_sigma2, &tmp_off); if (tmp_sgn != 0) { dmv = (double complex)tmp_sgn; zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[tmp_off + 1][0], &one); diff --git a/src/expec_cisajscktaltdc.c b/src/expec_cisajscktaltdc.c index 477afb039..e88e1f393 100644 --- a/src/expec_cisajscktaltdc.c +++ b/src/expec_cisajscktaltdc.c @@ -226,7 +226,7 @@ int expec_cisajscktalt_HubbardGC( } }//InterPE else { - general_int_GetInfo(i, X, org_isite1, org_isite2, org_isite3, org_isite4, + general_int_GetInfo(X, org_isite1, org_isite2, org_isite3, org_isite4, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V); i_max = X->Large.i_max; @@ -244,7 +244,7 @@ int expec_cisajscktalt_HubbardGC( #pragma omp parallel for default(none) private(j) shared(vec,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { - GC_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec, X, &tmp_off); + GC_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec); } } else if (isite1 == isite2 && isite3 != isite4) { @@ -252,7 +252,7 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { GC_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, - tmp_V, nstate, Xvec, vec, X, &tmp_off); + tmp_V, nstate, Xvec, vec, &tmp_off); } } else if (isite1 != isite2 && isite3 == isite4) { @@ -260,7 +260,7 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { GC_CisAjtCkuAku_element(j, isite1, isite2, isite3, Asum, Adiff, - tmp_V, nstate, Xvec, vec, X, &tmp_off); + tmp_V, nstate, Xvec, vec, &tmp_off); } } else if (isite1 != isite2 && isite3 != isite4) { @@ -268,7 +268,7 @@ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,t firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2,tmp_V) for (j = 1; j <= i_max; j++) { GC_CisAjtCkuAlv_element(j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, - tmp_V, nstate, Xvec, vec, X, &tmp_off_2); + tmp_V, nstate, Xvec, vec, &tmp_off_2); } } } @@ -349,7 +349,7 @@ int expec_cisajscktalt_Hubbard( }//InterPE else { general_int_GetInfo( - i, X, org_isite1, org_isite2, org_isite3, org_isite4, + X, org_isite1, org_isite2, org_isite3, org_isite4, org_sigma1, org_sigma2, org_sigma3, org_sigma4, tmp_V ); @@ -369,7 +369,7 @@ int expec_cisajscktalt_Hubbard( #pragma omp parallel for default(none) private(j) shared(vec,tmp_V,Xvec,nstate) \ firstprivate(i_max,X,isite1,isite2,isite4,isite3,Asum,Bsum,Adiff,Bdiff,tmp_off,tmp_off_2) for (j = 1; j <= i_max; j++) { - CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec, X, &tmp_off); + CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, Xvec, vec); } } else if (isite1 == isite2 && isite3 != isite4) { @@ -444,13 +444,13 @@ int expec_cisajscktalt_SpinHalf( if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal is1_up = X->Def.Tpow[org_isite1 - 1]; is2_up = X->Def.Tpow[org_isite3 - 1]; - num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); + num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is1_up, org_sigma1); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, org_sigma3); zaxpy_long(i_max*nstate, tmp_V * num1*num2, &vec[1][0], &Xvec[1][0]); } else if (org_isite1 == org_isite3 && org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) { is1_up = X->Def.Tpow[org_isite1 - 1]; - num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, org_sigma1); + num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is1_up, org_sigma1); zaxpy_long(i_max*nstate, tmp_V * num1, &vec[1][0], &Xvec[1][0]); } else if (org_sigma1 == org_sigma4 && org_sigma2 == org_sigma3) {//exchange @@ -466,11 +466,11 @@ int expec_cisajscktalt_SpinHalf( if (org_sigma1 == org_sigma2 && org_sigma3 == org_sigma4) { //diagonal is1_up = X->Def.Tpow[org_isite1 - 1]; is2_up = X->Def.Tpow[org_isite3 - 1]; - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, org_sigma3); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, org_sigma3); #pragma omp parallel for default(none)shared(vec,Xvec,nstate,one) \ - firstprivate(i_max, tmp_V, is1_up, org_sigma1, X, num2) private(j, num1,dmv) + firstprivate(i_max, tmp_V, is1_up, org_sigma1, num2) private(j, num1,dmv) for (j = 1; j <= i_max; j++) { - num1 = child_Spin_CisAis(j, X, is1_up, org_sigma1); + num1 = child_Spin_CisAis(j, is1_up, org_sigma1); dmv = tmp_V * num1*num2; zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } @@ -492,14 +492,14 @@ int expec_cisajscktalt_SpinHalf( firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off, tmp_V) for (j = 1; j <= i_max; j++) { CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, - tmp_V, nstate, Xvec, vec, X); + tmp_V, nstate, Xvec, vec); } } else if (org_isite1 == org_isite3 && org_sigma1 == org_sigma4 && org_sigma3 == org_sigma2) { #pragma omp parallel for default(none) private(j, dmv) \ firstprivate(i_max,X,isA_up,org_sigma1, tmp_V) shared(vec, list_1,Xvec,nstate,one) for (j = 1; j <= i_max; j++) { - dmv = tmp_V * child_Spin_CisAis(j, X, isA_up, org_sigma1); + dmv = tmp_V * child_Spin_CisAis(j, isA_up, org_sigma1); zaxpy_(&nstate, &dmv, &vec[j][0], &one, &Xvec[j][0], &one); } } @@ -726,7 +726,7 @@ int expec_cisajscktalt_SpinGCHalf( firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, - tmp_V, nstate, Xvec, vec, X); + tmp_V, nstate, Xvec, vec); } } else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { @@ -734,7 +734,7 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, - tmp_V, nstate, Xvec, vec, X, &tmp_off); + tmp_V, nstate, Xvec, vec, &tmp_off); } } else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { @@ -742,7 +742,7 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, - tmp_V, nstate, Xvec, vec, X, &tmp_off); + tmp_V, nstate, Xvec, vec, &tmp_off); } } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { @@ -750,7 +750,7 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V) for (j = 1; j <= i_max; j++) { GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, - tmp_V, nstate, Xvec, vec, X, &tmp_off); + tmp_V, nstate, Xvec, vec, &tmp_off); } } } @@ -919,14 +919,13 @@ int expec_Sixbody_SpinGCHalf( double complex**prod ){ long unsigned int i,j; - long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4,tmp_org_isite5,tmp_org_isite6,tmp_org_isite7,tmp_org_isite8,tmp_org_isite9,tmp_org_isite10,tmp_org_isite11,tmp_org_isite12; - long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4,tmp_org_sigma5,tmp_org_sigma6,tmp_org_sigma7,tmp_org_sigma8,tmp_org_sigma9,tmp_org_sigma10,tmp_org_sigma11,tmp_org_sigma12; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4,org_isite5,org_isite6,org_isite7,org_isite8,org_isite9,org_isite10,org_isite11,org_isite12; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4,org_sigma5,org_sigma6,org_sigma7,org_sigma8,org_sigma9,org_sigma10,org_sigma11,org_sigma12; + long unsigned int tmp_org_isite5,tmp_org_isite6,tmp_org_isite7,tmp_org_isite8,tmp_org_isite9,tmp_org_isite10,tmp_org_isite11,tmp_org_isite12; + long unsigned int tmp_org_sigma5,tmp_org_sigma6,tmp_org_sigma7,tmp_org_sigma8,tmp_org_sigma9,tmp_org_sigma10,tmp_org_sigma11,tmp_org_sigma12; + long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; + long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; long unsigned int isA_up, isB_up; long unsigned int tmp_off=0; double complex tmp_V; - double complex dam_pr; double complex **vec_pr; long int i_max; @@ -936,16 +935,6 @@ int expec_Sixbody_SpinGCHalf( for(i=0;iDef.NSBody;i++){ //printf("%d %d \n",i,X->Def.NSBody); - tmp_org_isite1 = X->Def.SBody[i][0]+1; - tmp_org_sigma1 = X->Def.SBody[i][1]; - tmp_org_isite2 = X->Def.SBody[i][2]+1; - tmp_org_sigma2 = X->Def.SBody[i][3]; - /**/ - tmp_org_isite3 = X->Def.SBody[i][4]+1; - tmp_org_sigma3 = X->Def.SBody[i][5]; - tmp_org_isite4 = X->Def.SBody[i][6]+1; - tmp_org_sigma4 = X->Def.SBody[i][7]; - /**/ tmp_org_isite5 = X->Def.SBody[i][8]+1; tmp_org_sigma5 = X->Def.SBody[i][9]; tmp_org_isite6 = X->Def.SBody[i][10]+1; @@ -965,27 +954,6 @@ int expec_Sixbody_SpinGCHalf( tmp_org_sigma11 = X->Def.SBody[i][21]; tmp_org_isite12 = X->Def.SBody[i][22]+1; tmp_org_sigma12 = X->Def.SBody[i][23]; - - /**/ - org_isite5 = X->Def.SBody[i][8]+1; - org_sigma5 = X->Def.SBody[i][9]; - org_isite6 = X->Def.SBody[i][10]+1; - org_sigma6 = X->Def.SBody[i][11]; - /**/ - org_isite7 = X->Def.SBody[i][12]+1; - org_sigma7 = X->Def.SBody[i][13]; - org_isite8 = X->Def.SBody[i][14]+1; - org_sigma8 = X->Def.SBody[i][15]; - /**/ - org_isite9 = X->Def.SBody[i][16]+1; - org_sigma9 = X->Def.SBody[i][17]; - org_isite10 = X->Def.SBody[i][18]+1; - org_sigma10 = X->Def.SBody[i][19]; - /**/ - org_isite11 = X->Def.SBody[i][20]+1; - org_sigma11 = X->Def.SBody[i][21]; - org_isite12 = X->Def.SBody[i][22]+1; - org_sigma12 = X->Def.SBody[i][23]; X->Large.mode = M_MLTPLY; /* |vec_pr_0>= c11a12|vec>*/ @@ -1016,38 +984,37 @@ int expec_Sixbody_SpinGCHalf( ); */ - dam_pr=0.0; if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite){ //org_isite3 >= org_isite1 > Nsite //printf("D-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3){ //diagonal (for spin: cuadcdau=cuau) - dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite){ //org_isite3 > Nsite >= org_isite1 //printf("S-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else{ @@ -1055,32 +1022,28 @@ int expec_Sixbody_SpinGCHalf( isA_up = X->Def.Tpow[org_isite2-1]; isB_up = X->Def.Tpow[org_isite4-1]; if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j,i) \ +#pragma omp parallel for default(none) private(j,i) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr, X); + GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr); } }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } } } @@ -1133,14 +1096,13 @@ int expec_Fourbody_SpinGCHalf( double complex**prod ){ long unsigned int i,j; - long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4,tmp_org_isite5,tmp_org_isite6,tmp_org_isite7,tmp_org_isite8; - long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4,tmp_org_sigma5,tmp_org_sigma6,tmp_org_sigma7,tmp_org_sigma8; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4,org_isite5,org_isite6,org_isite7,org_isite8; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4,org_sigma5,org_sigma6,org_sigma7,org_sigma8; + long unsigned int tmp_org_isite5,tmp_org_isite6,tmp_org_isite7,tmp_org_isite8; + long unsigned int tmp_org_sigma5,tmp_org_sigma6,tmp_org_sigma7,tmp_org_sigma8; + long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; + long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; long unsigned int isA_up, isB_up; long unsigned int tmp_off=0; double complex tmp_V; - double complex dam_pr; double complex **vec_pr; long int i_max; @@ -1149,16 +1111,6 @@ int expec_Fourbody_SpinGCHalf( for(i=0;iDef.NFBody;i++){ - tmp_org_isite1 = X->Def.FBody[i][0]+1; - tmp_org_sigma1 = X->Def.FBody[i][1]; - tmp_org_isite2 = X->Def.FBody[i][2]+1; - tmp_org_sigma2 = X->Def.FBody[i][3]; - /**/ - tmp_org_isite3 = X->Def.FBody[i][4]+1; - tmp_org_sigma3 = X->Def.FBody[i][5]; - tmp_org_isite4 = X->Def.FBody[i][6]+1; - tmp_org_sigma4 = X->Def.FBody[i][7]; - /**/ tmp_org_isite5 = X->Def.FBody[i][8]+1; tmp_org_sigma5 = X->Def.FBody[i][9]; tmp_org_isite6 = X->Def.FBody[i][10]+1; @@ -1168,16 +1120,6 @@ int expec_Fourbody_SpinGCHalf( tmp_org_sigma7 = X->Def.FBody[i][13]; tmp_org_isite8 = X->Def.FBody[i][14]+1; tmp_org_sigma8 = X->Def.FBody[i][15]; - /**/ - org_isite5 = X->Def.FBody[i][8]+1; - org_sigma5 = X->Def.FBody[i][9]; - org_isite6 = X->Def.FBody[i][10]+1; - org_sigma6 = X->Def.FBody[i][11]; - /**/ - org_isite7 = X->Def.FBody[i][12]+1; - org_sigma7 = X->Def.FBody[i][13]; - org_isite8 = X->Def.FBody[i][14]+1; - org_sigma8 = X->Def.FBody[i][15]; X->Large.mode = M_MLTPLY; /* |vec_pr_tmp>= c7a8|vec>*/ @@ -1201,38 +1143,37 @@ int expec_Fourbody_SpinGCHalf( ); */ - dam_pr=0.0; if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite){ //org_isite3 >= org_isite1 > Nsite //printf("D-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3){ //diagonal (for spin: cuadcdau=cuau) - dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite){ //org_isite3 > Nsite >= org_isite1 //printf("S-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else{ @@ -1240,32 +1181,28 @@ int expec_Fourbody_SpinGCHalf( isA_up = X->Def.Tpow[org_isite2-1]; isB_up = X->Def.Tpow[org_isite4-1]; if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j,i) \ +#pragma omp parallel for default(none) private(j,i) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr, X); + GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr); } }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } } } @@ -1297,14 +1234,13 @@ int expec_Threebody_SpinGCHalf( double complex **prod ){ long unsigned int i,j; - long unsigned int tmp_org_isite1,tmp_org_isite2,tmp_org_isite3,tmp_org_isite4,tmp_org_isite5,tmp_org_isite6; - long unsigned int tmp_org_sigma1,tmp_org_sigma2,tmp_org_sigma3,tmp_org_sigma4,tmp_org_sigma5,tmp_org_sigma6; - long unsigned int org_isite1,org_isite2,org_isite3,org_isite4,org_isite5,org_isite6; - long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4,org_sigma5,org_sigma6; + long unsigned int tmp_org_isite5,tmp_org_isite6; + long unsigned int tmp_org_sigma5,tmp_org_sigma6; + long unsigned int org_isite1,org_isite2,org_isite3,org_isite4; + long unsigned int org_sigma1,org_sigma2,org_sigma3,org_sigma4; long unsigned int isA_up, isB_up; long unsigned int tmp_off=0; double complex tmp_V; - double complex dam_pr; double complex **vec_pr; long int i_max; @@ -1312,25 +1248,10 @@ int expec_Threebody_SpinGCHalf( vec_pr = cd_2d_allocate(i_max + 1, nstate); for(i=0;iDef.NTBody;i++){ - tmp_org_isite1 = X->Def.TBody[i][0]+1; - tmp_org_sigma1 = X->Def.TBody[i][1]; - tmp_org_isite2 = X->Def.TBody[i][2]+1; - tmp_org_sigma2 = X->Def.TBody[i][3]; - /**/ - tmp_org_isite3 = X->Def.TBody[i][4]+1; - tmp_org_sigma3 = X->Def.TBody[i][5]; - tmp_org_isite4 = X->Def.TBody[i][6]+1; - tmp_org_sigma4 = X->Def.TBody[i][7]; - /**/ tmp_org_isite5 = X->Def.TBody[i][8]+1; tmp_org_sigma5 = X->Def.TBody[i][9]; tmp_org_isite6 = X->Def.TBody[i][10]+1; tmp_org_sigma6 = X->Def.TBody[i][11]; - /**/ - org_isite5 = X->Def.TBody[i][8]+1; - org_sigma5 = X->Def.TBody[i][9]; - org_isite6 = X->Def.TBody[i][10]+1; - org_sigma6 = X->Def.TBody[i][11]; X->Large.mode = M_MLTPLY; /* |vec_pr>= c5a6|phi>*/ @@ -1352,38 +1273,37 @@ int expec_Threebody_SpinGCHalf( ); */ - dam_pr=0.0; if(org_isite1>X->Def.Nsite && org_isite3>X->Def.Nsite){ //org_isite3 >= org_isite1 > Nsite //printf("D-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAju_spin_MPIdouble( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_isite1 ==org_isite3 && org_sigma1 ==org_sigma4 && org_sigma2 ==org_sigma3){ //diagonal (for spin: cuadcdau=cuau) - dam_pr += child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAis_spin_MPIdouble((org_isite1-1), org_sigma1, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAjv_spin_MPIdouble(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCjuAju_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCiuAiv_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else if(org_isite3>X->Def.Nsite || org_isite1>X->Def.Nsite){ //org_isite3 > Nsite >= org_isite1 //printf("S-MPI \n"); if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr += child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAju_spin_MPIsingle( (org_isite1-1), org_sigma1, (org_isite3-1), org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAisCjuAjv_spin_MPIsingle(org_isite1-1, org_sigma1, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr += child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCjuAju_spin_MPIsingle(org_isite1-1, org_sigma2, org_isite3-1, org_sigma3, tmp_V, X, nstate, Xvec, vec_pr); } else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr += child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); + child_GC_CisAitCiuAiv_spin_MPIsingle(org_isite1-1, org_sigma1, org_sigma2, org_isite3-1, org_sigma3, org_sigma4, tmp_V, X, nstate, Xvec, vec_pr); } } else{ @@ -1391,32 +1311,28 @@ int expec_Threebody_SpinGCHalf( isA_up = X->Def.Tpow[org_isite2-1]; isB_up = X->Def.Tpow[org_isite4-1]; if(org_sigma1==org_sigma2 && org_sigma3==org_sigma4 ){ //diagonal - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j,i) \ +#pragma omp parallel for default(none) private(j,i) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr +=GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr, X); + GC_CisAisCisAis_spin_element(j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, Xvec, vec_pr); } }else if(org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAisCitAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAitCiuAiu_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } }else if(org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4){ - dam_pr = 0.0; -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j) \ +#pragma omp parallel for default(none) private(j) \ firstprivate(i_max,X,isA_up,isB_up,org_sigma2,org_sigma4,tmp_off,tmp_V,nstate) shared(Xvec,vec_pr) for(j=1;j<=i_max;j++){ - dam_pr += GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, X, &tmp_off); + GC_CisAitCiuAiv_spin_element(j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, Xvec, vec_pr, &tmp_off); } } } @@ -1491,7 +1407,7 @@ int expec_cisajscktaltdc double complex **vec ) { FILE *fp; - char sdt[D_FileNameMax], sdt_2[D_FileNameMax], sdt_3[D_FileNameMax], sdt_4[D_FileNameMax], * tmp_char; + char sdt[D_FileNameMax], sdt_2[D_FileNameMax], sdt_3[D_FileNameMax], sdt_4[D_FileNameMax]; long unsigned int irght, ilft, ihfbit, icaca; double complex **prod, ** prod_2, ** prod_3, ** prod_4; //For TPQ @@ -1613,7 +1529,7 @@ int expec_cisajscktaltdc if (X->Def.NTBody > 0) { if (childfopenMPI(sdt_2, "w", &fp) == 0) { for (icaca = 0; icaca < X->Def.NTBody; icaca++) { - fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", + fprintf(fp, " %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %.10lf %.10lf \n", X->Def.TBody[icaca][0], X->Def.TBody[icaca][1], X->Def.TBody[icaca][2], X->Def.TBody[icaca][3], X->Def.TBody[icaca][4], X->Def.TBody[icaca][5], X->Def.TBody[icaca][6], X->Def.TBody[icaca][7], X->Def.TBody[icaca][8], X->Def.TBody[icaca][9], X->Def.TBody[icaca][10], X->Def.TBody[icaca][11], @@ -1625,7 +1541,7 @@ int expec_cisajscktaltdc if (X->Def.NFBody > 0) { if (childfopenMPI(sdt_3, "w", &fp) == 0) { for (icaca = 0; icaca < X->Def.NFBody; icaca++) { - fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", + fprintf(fp, " %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %.10lf %.10lf \n", X->Def.FBody[icaca][0], X->Def.FBody[icaca][1], X->Def.FBody[icaca][2], X->Def.FBody[icaca][3], X->Def.FBody[icaca][4], X->Def.FBody[icaca][5], X->Def.FBody[icaca][6], X->Def.FBody[icaca][7], X->Def.FBody[icaca][8], X->Def.FBody[icaca][9], X->Def.FBody[icaca][10], X->Def.FBody[icaca][11], @@ -1638,7 +1554,7 @@ int expec_cisajscktaltdc if (X->Def.NSBody > 0) { if (childfopenMPI(sdt_4, "w", &fp) == 0) { for (icaca = 0; icaca < X->Def.NSBody; icaca++) { - fprintf(fp, " %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %4ld %.10lf %.10lf \n", + fprintf(fp, " %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %.10lf %.10lf \n", X->Def.SBody[icaca][0], X->Def.SBody[icaca][1], X->Def.SBody[icaca][2], X->Def.SBody[icaca][3], X->Def.SBody[icaca][4], X->Def.SBody[icaca][5], X->Def.SBody[icaca][6], X->Def.SBody[icaca][7], X->Def.SBody[icaca][8], X->Def.SBody[icaca][9], X->Def.SBody[icaca][10], X->Def.SBody[icaca][11], diff --git a/src/expec_totalspin.c b/src/expec_totalspin.c index 7556c1621..a8d25b4b7 100644 --- a/src/expec_totalspin.c +++ b/src/expec_totalspin.c @@ -224,8 +224,6 @@ void totalspin_Spin( double complex spn_z = 0.0; double complex spn_z1 = 0.0; double complex spn_z2 = 0.0; - double complex spn_zd = 0.0; - double complex spn = 0.0; long unsigned int i_max; i_max = X->Check.idim_max; @@ -243,9 +241,9 @@ void totalspin_Spin( is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; is_up = is1_up + is2_up; - num1_up = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, is1_up, 1); + num1_up = child_SpinGC_CisAis((unsigned long int) myrank + 1, is1_up, 1); num1_down = 1 - num1_up; - num2_up = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, is2_up, 1); + num2_up = child_SpinGC_CisAis((unsigned long int) myrank + 1, is2_up, 1); num2_down = 1 - num2_up; spn_z = (num1_up - num1_down) * (num2_up - num2_down); @@ -259,7 +257,7 @@ void totalspin_Spin( X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] / 2.0; } } else {//off diagonal - spn += child_general_int_spin_TotalS_MPIdouble(isite1 - 1, isite2 - 1, X, nstate, vec, vec); + child_general_int_spin_TotalS_MPIdouble(isite1 - 1, isite2 - 1, X, nstate, vec, vec); } #endif } @@ -276,7 +274,7 @@ void totalspin_Spin( is1_up = X->Def.Tpow[tmp_isite1 - 1]; is2_up = X->Def.Tpow[tmp_isite2 - 1]; - num2_up = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, is2_up, 1); + num2_up = child_SpinGC_CisAis((unsigned long int) myrank + 1, is2_up, 1); num2_down = 1 - num2_up; //diagonal @@ -289,9 +287,9 @@ void totalspin_Spin( X->Phys.s2[istate] += conj(vec[j][istate]) * vec[j][istate] * spn_z / 4.0; } if (isite1 < isite2) { - spn += child_general_int_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec); + child_general_int_spin_MPIsingle(isite1 - 1, 0, 1, isite2 - 1, 1, 0, 1.0, X, nstate, vec, vec); } else { - spn += conj(child_general_int_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec)); + child_general_int_spin_MPIsingle(isite2 - 1, 1, 0, isite1 - 1, 0, 1, 1.0, X, nstate, vec, vec); } #endif }//isite1 > Nsite || isite2 > Nsite @@ -454,9 +452,9 @@ void totalspin_SpinGC( if (isite1 > X->Def.Nsite && isite2 > X->Def.Nsite) { is1_up = X->Def.Tpow[isite1 - 1]; is2_up = X->Def.Tpow[isite2 - 1]; - num1_up = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is1_up, 1); + num1_up = child_SpinGC_CisAis((unsigned long int)myrank + 1, is1_up, 1); num1_down = 1 - num1_up; - num2_up = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); + num2_up = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, 1); num2_down = 1 - num2_up; spn_z2 = (num1_up - num1_down)*(num2_up - num2_down) / 4.0; for (j = 1; j <= i_max; j++) { @@ -485,7 +483,7 @@ void totalspin_SpinGC( } is1_up = X->Def.Tpow[tmp_isite1 - 1]; is2_up = X->Def.Tpow[tmp_isite2 - 1]; - num2_up = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, is2_up, 1); + num2_up = child_SpinGC_CisAis((unsigned long int)myrank + 1, is2_up, 1); num2_down = 1 - num2_up; //diagonal for (j = 1; j <= i_max; j++) { @@ -652,35 +650,6 @@ int expec_totalspin return 0; } -int expec_totalSz( - struct BindStruct* X, - double complex** vec -) { - X->Large.mode = M_TOTALS; - switch (X->Def.iCalcModel) { - case Spin: - X->Phys.Sz[0] = X->Def.Total2SzMPI / 2.; - break; - case SpinGC: - totalSz_SpinGC(X, vec); - break; - case Hubbard: - case Kondo: - X->Phys.Sz[0] = X->Def.Total2SzMPI / 2.; - - break; - case HubbardGC: - case KondoGC: - totalSz_HubbardGC(X, vec); - break; - default: - X->Phys.Sz[0] = 0.0; - } - - return 0; -} - - /** * @brief function of calculating totalSz for Hubbard model in grand canonical ensemble * @@ -815,3 +784,31 @@ void totalSz_SpinGC spn_z = SumMPI_dc(spn_z); X->Phys.Sz[0] = creal(spn_z); } + +int expec_totalSz( + struct BindStruct* X, + double complex** vec +) { + X->Large.mode = M_TOTALS; + switch (X->Def.iCalcModel) { + case Spin: + X->Phys.Sz[0] = X->Def.Total2SzMPI / 2.; + break; + case SpinGC: + totalSz_SpinGC(X, vec); + break; + case Hubbard: + case Kondo: + X->Phys.Sz[0] = X->Def.Total2SzMPI / 2.; + + break; + case HubbardGC: + case KondoGC: + totalSz_HubbardGC(X, vec); + break; + default: + X->Phys.Sz[0] = 0.0; + } + + return 0; +} diff --git a/src/include/CalcSpectrumByLanczos.h b/src/include/CalcSpectrumByLanczos.h index a483e672a..2a7724e2b 100644 --- a/src/include/CalcSpectrumByLanczos.h +++ b/src/include/CalcSpectrumByLanczos.h @@ -18,7 +18,7 @@ int CalcSpectrumByLanczos( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double norm, int Nomega, double complex *dcSpectrum, diff --git a/src/include/CalcSpectrumByTPQ.h b/src/include/CalcSpectrumByTPQ.h index b5f71bad5..3b9e4ab79 100644 --- a/src/include/CalcSpectrumByTPQ.h +++ b/src/include/CalcSpectrumByTPQ.h @@ -18,7 +18,7 @@ int CalcSpectrumByTPQ( struct EDMainCalStruct *X, - double complex *tmp_v1, + double complex **tmp_v1, double norm, int Nomega, double complex *dcSpectrum, diff --git a/src/include/Common.h b/src/include/Common.h index 613ae2f16..f0126f302 100644 --- a/src/include/Common.h +++ b/src/include/Common.h @@ -30,8 +30,8 @@ #include #include "ProgressMessage.h" #include "ErrorMessage.h" -#include "struct.h" #include "log.h" +#include "struct.h" #include "dSFMT.h" #endif /* HPHI_COMMON_H */ diff --git a/src/include/Lanczos_EigenValue.h b/src/include/Lanczos_EigenValue.h index ff71e3e81..3973d0dfa 100644 --- a/src/include/Lanczos_EigenValue.h +++ b/src/include/Lanczos_EigenValue.h @@ -15,7 +15,7 @@ /* along with this program. If not, see . */ #pragma once int Lanczos_EigenValue(struct BindStruct *X); -int Lanczos_GetTridiagonalMatrixComponents(struct BindStruct *X, double *alpha, double *beta, double complex *_v1, unsigned long int *Lanczos_step); +int Lanczos_GetTridiagonalMatrixComponents(struct BindStruct *X, double *alpha, double *beta, double complex **_v1, unsigned long int *Lanczos_step); int ReadInitialVector(struct BindStruct *X, double complex** tmp_v0, double complex **tmp_v1, unsigned long int *liLanczosStp_vec); diff --git a/src/include/eigenIO.h b/src/include/eigenIO.h index 5b13ca893..39860ddc6 100644 --- a/src/include/eigenIO.h +++ b/src/include/eigenIO.h @@ -21,9 +21,9 @@ int OutputRealEigenValue(int xNsize, double *ene, char *filename); int OutputCmpEigenValue(int xNsize, complex double *ene, char *filename); -int OutputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc, char *filename); -int OutputCmpEigenVec(int xNsize, const int nene, complex double **vec, const int nproc, char *filename); -int InputRealEigenValue(int xNsize, double *ene, char *filename); -int InputCmpEigenValue(int xNsize, complex double *ene, char *filename); -int InputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc, char *filename); -int InputCmpEigenVec(int xNsize, const int nene, complex double **vec, const int nproc, char *filename); +//int OutputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc, char *filename); +//int OutputCmpEigenVec(int xNsize, const int nene, complex double **vec, const int nproc, char *filename); +//int InputRealEigenValue(int xNsize, double *ene, char *filename); +//int InputCmpEigenValue(int xNsize, complex double *ene, char *filename); +//int InputRealEigenVec(int xNsize, const int nene, double **vec, const int nproc, char *filename); +//int InputCmpEigenVec(int xNsize, const int nene, complex double **vec, const int nproc, char *filename); diff --git a/src/include/log.h b/src/include/log.h index a428fd60e..986aa074a 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -16,6 +16,7 @@ #pragma once #include "global.h" #include "LogMessage.h" +#include "struct.h" int TimeKeeper ( diff --git a/src/include/matrixlapack.h b/src/include/matrixlapack.h index 64475c48f..a49c51b5a 100644 --- a/src/include/matrixlapack.h +++ b/src/include/matrixlapack.h @@ -34,4 +34,5 @@ #include int ZHEEVall(int xNsize, double complex **A, double *r,double complex **vec); int DSEVvector(int xNsize, double** A, double* r, double** vec); +int DSEVvalue(int xNsize, double** A, double* r); #endif diff --git a/src/include/mltplyHubbard.h b/src/include/mltplyHubbard.h index 360815837..469fa46c6 100644 --- a/src/include/mltplyHubbard.h +++ b/src/include/mltplyHubbard.h @@ -23,7 +23,7 @@ int mltplyHubbard(struct BindStruct *X, int nstate, double complex **tmp_v0,doub int mltplyHubbardGC(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -double complex GC_general_hopp +void GC_general_hopp ( int nstate, double complex **tmp_v0, double complex **tmp_v1, @@ -31,22 +31,21 @@ double complex GC_general_hopp double complex trans ); - -double complex GC_general_int( +void GC_general_int( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex general_int +void general_int ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex general_hopp +void general_hopp ( int nstate, double complex** tmp_v0, @@ -55,7 +54,7 @@ double complex general_hopp double complex trans ); -double complex exchange +void exchange ( int nstate, double complex **tmp_v0, @@ -63,21 +62,21 @@ double complex exchange struct BindStruct *X ); -double complex pairhopp +void pairhopp ( int nstate, double complex** tmp_v0, double complex** tmp_v1, struct BindStruct* X ); -double complex GC_exchange +void GC_exchange ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex GC_pairhopp +void GC_pairhopp ( int nstate, double complex **tmp_v0, double complex **tmp_v1, diff --git a/src/include/mltplyHubbardCore.h b/src/include/mltplyHubbardCore.h index 52c244343..fc557cb7e 100644 --- a/src/include/mltplyHubbardCore.h +++ b/src/include/mltplyHubbardCore.h @@ -19,7 +19,7 @@ #include "Common.h" -double complex pairhopp_element +void pairhopp_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -28,7 +28,7 @@ double complex pairhopp_element long unsigned int *tmp_off ); -double complex GC_exchange_element +void GC_exchange_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -37,7 +37,7 @@ double complex GC_exchange_element long unsigned int *tmp_off ); -double complex GC_pairhopp_element +void GC_pairhopp_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -46,7 +46,7 @@ double complex GC_pairhopp_element long unsigned int *tmp_off ); -double complex exchange_element +void exchange_element ( long unsigned int j, int nstate, double complex **tmp_v0, @@ -55,19 +55,17 @@ double complex exchange_element long unsigned int *tmp_off ); -double complex CisAisCisAis_element +void CisAisCisAis_element ( long unsigned int j, long unsigned int isite1, long unsigned int isite3, double complex tmp_V, int nstate, double complex **tmp_v0, - double complex **tmp_v1, - struct BindStruct *X, - long unsigned int *tmp_off + double complex **tmp_v1 ); -double complex CisAisCjtAku_element +void CisAisCjtAku_element ( long unsigned int j, long unsigned int isite1, @@ -82,7 +80,7 @@ double complex CisAisCjtAku_element long unsigned int *tmp_off ); -double complex CisAjtCkuAku_element +void CisAjtCkuAku_element ( long unsigned int j, long unsigned int isite1, @@ -97,7 +95,7 @@ double complex CisAjtCkuAku_element long unsigned int *tmp_off ); -double complex CisAjtCkuAlv_element +void CisAjtCkuAlv_element ( long unsigned int j, long unsigned int isite1, @@ -115,19 +113,17 @@ double complex CisAjtCkuAlv_element long unsigned int *tmp_off_2 ); //[s]Grand canonical -double complex GC_CisAisCisAis_element +void GC_CisAisCisAis_element ( long unsigned int j, long unsigned int isite1, long unsigned int isite3, double complex tmp_V, int nstate, double complex **tmp_v0, - double complex **tmp_v1, - struct BindStruct *X, - long unsigned int *tmp_off + double complex **tmp_v1 ); -double complex GC_CisAisCjtAku_element +void GC_CisAisCjtAku_element ( long unsigned int j, long unsigned int isite1, @@ -138,11 +134,10 @@ double complex GC_CisAisCjtAku_element double complex tmp_V, int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int *tmp_off ); -double complex GC_CisAjtCkuAku_element +void GC_CisAjtCkuAku_element ( long unsigned int j, long unsigned int isite1, @@ -153,11 +148,10 @@ double complex GC_CisAjtCkuAku_element double complex tmp_V, int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int *tmp_off ); -double complex GC_CisAjtCkuAlv_element +void GC_CisAjtCkuAlv_element ( long unsigned int j, long unsigned int isite1, @@ -171,7 +165,6 @@ double complex GC_CisAjtCkuAlv_element double complex tmp_V, int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int *tmp_off_2 ); //[e]Grand canonical @@ -182,7 +175,6 @@ void GC_CisAis int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int is1_spin, double complex tmp_trans ); @@ -191,7 +183,6 @@ void GC_AisCis( long unsigned int j, int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int is1_spin, double complex tmp_trans ); @@ -199,7 +190,6 @@ void GC_AisCis( int child_CisAis ( long unsigned int list_1_j, - struct BindStruct *X, long unsigned int is1_spin ); @@ -218,7 +208,6 @@ int child_CisAjt int child_GC_CisAjt ( long unsigned int list_1_j, - struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, @@ -248,7 +237,6 @@ void GC_CisAjt int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int is1_spin, long unsigned int is2_spin, long unsigned int sum_spin, @@ -269,7 +257,6 @@ int general_hopp_GetInfo int general_int_GetInfo ( - int iInterAll, struct BindStruct *X, long unsigned int isite1, long unsigned int isite2, diff --git a/src/include/mltplyMPIHubbard.h b/src/include/mltplyMPIHubbard.h index 7f68de628..1d731ff6e 100644 --- a/src/include/mltplyMPIHubbard.h +++ b/src/include/mltplyMPIHubbard.h @@ -29,7 +29,7 @@ void GC_general_hopp_MPIdouble double complex **tmp_v1 ); -double complex child_GC_general_hopp_MPIdouble +void child_GC_general_hopp_MPIdouble ( int org_isite1, int org_ispin1, @@ -49,7 +49,7 @@ void GC_general_hopp_MPIsingle double complex **tmp_v1 ); -double complex child_GC_general_hopp_MPIsingle +void child_GC_general_hopp_MPIsingle ( int org_isite1, int org_ispin1, @@ -70,7 +70,7 @@ void general_hopp_MPIdouble double complex **tmp_v1 ); -double complex child_general_hopp_MPIdouble +void child_general_hopp_MPIdouble ( int org_isite1, int org_ispin1, @@ -90,7 +90,7 @@ void general_hopp_MPIsingle double complex **tmp_v1 ); -double complex child_general_hopp_MPIsingle +void child_general_hopp_MPIsingle ( int org_isite1, int org_ispin1, diff --git a/src/include/mltplyMPIHubbardCore.h b/src/include/mltplyMPIHubbardCore.h index cdce59b35..aad46402f 100644 --- a/src/include/mltplyMPIHubbardCore.h +++ b/src/include/mltplyMPIHubbardCore.h @@ -75,7 +75,7 @@ int GetSgnInterAll unsigned long int *offbit ); -double complex child_GC_CisAisCjtAjt_Hubbard_MPI +void child_GC_CisAisCjtAjt_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -87,7 +87,7 @@ double complex child_GC_CisAisCjtAjt_Hubbard_MPI double complex **tmp_v1 ); -double complex child_GC_CisAjtCkuAlv_Hubbard_MPI +void child_GC_CisAjtCkuAlv_Hubbard_MPI ( int isite1, int isigma1, @@ -103,7 +103,7 @@ double complex child_GC_CisAjtCkuAlv_Hubbard_MPI double complex **tmp_v1 ); -double complex child_GC_CisAjtCkuAku_Hubbard_MPI +void child_GC_CisAjtCkuAku_Hubbard_MPI ( int isite1, int isigma1, @@ -117,7 +117,7 @@ double complex child_GC_CisAjtCkuAku_Hubbard_MPI double complex **tmp_v1 ); -double complex child_GC_CisAisCjtAku_Hubbard_MPI +void child_GC_CisAisCjtAku_Hubbard_MPI ( int isite1, int isigma1, @@ -131,7 +131,7 @@ double complex child_GC_CisAisCjtAku_Hubbard_MPI double complex **tmp_v1 ); -double complex child_GC_CisAis_Hubbard_MPI +void child_GC_CisAis_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -141,7 +141,7 @@ double complex child_GC_CisAis_Hubbard_MPI double complex **tmp_v1 ); -double complex child_GC_CisAjt_Hubbard_MPI +void child_GC_CisAjt_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -153,7 +153,7 @@ double complex child_GC_CisAjt_Hubbard_MPI double complex **tmp_v1 ); -double complex child_CisAisCjtAjt_Hubbard_MPI +void child_CisAisCjtAjt_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -165,7 +165,7 @@ double complex child_CisAisCjtAjt_Hubbard_MPI double complex **tmp_v1 ); -double complex child_CisAjtCkuAlv_Hubbard_MPI +void child_CisAjtCkuAlv_Hubbard_MPI ( int isite1, int isigma1, @@ -181,7 +181,7 @@ double complex child_CisAjtCkuAlv_Hubbard_MPI double complex **tmp_v1 ); -double complex child_CisAjtCkuAku_Hubbard_MPI +void child_CisAjtCkuAku_Hubbard_MPI ( int isite1, int isigma1, @@ -195,7 +195,7 @@ double complex child_CisAjtCkuAku_Hubbard_MPI double complex **tmp_v1 ); -double complex child_CisAisCjtAku_Hubbard_MPI +void child_CisAisCjtAku_Hubbard_MPI ( int isite1, int isigma1, @@ -209,7 +209,7 @@ double complex child_CisAisCjtAku_Hubbard_MPI double complex **tmp_v1 ); -double complex child_CisAis_Hubbard_MPI +void child_CisAis_Hubbard_MPI ( int org_isite1, int org_ispin1, @@ -219,7 +219,7 @@ double complex child_CisAis_Hubbard_MPI double complex **tmp_v1 ); -double complex child_CisAjt_MPIdouble +void child_CisAjt_MPIdouble ( int org_isite1, int org_ispin1, @@ -232,7 +232,7 @@ double complex child_CisAjt_MPIdouble double complex **tmp_v1 ); -double complex child_CisAjt_MPIsingle +void child_CisAjt_MPIsingle ( int org_isite1, int org_ispin1, @@ -245,7 +245,7 @@ double complex child_CisAjt_MPIsingle ); -double complex child_GC_Cis_MPI +void child_GC_Cis_MPI ( int org_isite, int org_ispin, @@ -256,7 +256,7 @@ double complex child_GC_Cis_MPI unsigned long int *Tpow ); -double complex child_GC_Ajt_MPI +void child_GC_Ajt_MPI ( int org_isite, int org_ispin, @@ -268,7 +268,7 @@ double complex child_GC_Ajt_MPI long unsigned int *Tpow ); -double complex child_Cis_MPI +void child_Cis_MPI ( int org_isite, unsigned int org_ispin, @@ -282,7 +282,7 @@ double complex child_Cis_MPI long unsigned int _ihfbit ); -double complex child_Ajt_MPI +void child_Ajt_MPI ( int org_isite, unsigned int org_ispin, diff --git a/src/include/mltplyMPISpin.h b/src/include/mltplyMPISpin.h index c698ee864..f188cf1ab 100644 --- a/src/include/mltplyMPISpin.h +++ b/src/include/mltplyMPISpin.h @@ -29,7 +29,7 @@ void general_int_spin_MPIdouble double complex **tmp_v1 ); -double complex child_general_int_spin_MPIdouble +void child_general_int_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -45,7 +45,7 @@ double complex child_general_int_spin_MPIdouble ); -double complex child_general_int_spin_TotalS_MPIdouble +void child_general_int_spin_TotalS_MPIdouble ( int org_isite1, int org_isite3, @@ -63,7 +63,7 @@ void general_int_spin_MPIsingle double complex **tmp_v1 ); -double complex child_general_int_spin_MPIsingle +void child_general_int_spin_MPIsingle ( int org_isite1, int org_ispin1, diff --git a/src/include/mltplyMPISpinCore.h b/src/include/mltplyMPISpinCore.h index 6828f1f20..f8bd2b1c9 100644 --- a/src/include/mltplyMPISpinCore.h +++ b/src/include/mltplyMPISpinCore.h @@ -21,7 +21,7 @@ #include #include "struct.h" -double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIdouble +void child_GC_CisAisCjuAjv_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -34,7 +34,7 @@ double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAitCjuAju_GeneralSpin_MPIdouble +void child_GC_CisAitCjuAju_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -47,7 +47,7 @@ double complex child_GC_CisAitCjuAju_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAitCjuAjv_GeneralSpin_MPIdouble +void child_GC_CisAitCjuAjv_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -62,7 +62,7 @@ double complex child_GC_CisAitCjuAjv_GeneralSpin_MPIdouble ); //general spin - single -double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIsingle +void child_GC_CisAisCjuAjv_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -75,7 +75,7 @@ double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAitCjuAju_GeneralSpin_MPIsingle +void child_GC_CisAitCjuAju_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -88,7 +88,7 @@ double complex child_GC_CisAitCjuAju_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAitCjuAjv_GeneralSpin_MPIsingle +void child_GC_CisAitCjuAjv_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -102,7 +102,7 @@ double complex child_GC_CisAitCjuAjv_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAit_GeneralSpin_MPIdouble +void child_GC_CisAit_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -113,7 +113,7 @@ double complex child_GC_CisAit_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAis_GeneralSpin_MPIdouble +void child_GC_CisAis_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -123,7 +123,7 @@ double complex child_GC_CisAis_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_AisCis_GeneralSpin_MPIdouble +void child_GC_AisCis_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -133,7 +133,7 @@ double complex child_GC_AisCis_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAisCjuAju_GeneralSpin_MPIdouble +void child_GC_CisAisCjuAju_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -145,7 +145,7 @@ double complex child_GC_CisAisCjuAju_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAisCjuAju_GeneralSpin_MPIsingle +void child_GC_CisAisCjuAju_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -157,7 +157,7 @@ double complex child_GC_CisAisCjuAju_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex child_CisAit_GeneralSpin_MPIdouble +void child_CisAit_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -170,7 +170,7 @@ double complex child_CisAit_GeneralSpin_MPIdouble ); -double complex child_GC_CisAitCiuAiv_spin_MPIdouble +void child_GC_CisAitCiuAiv_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -184,7 +184,7 @@ double complex child_GC_CisAitCiuAiv_spin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAisCjuAjv_spin_MPIdouble +void child_GC_CisAisCjuAjv_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -197,7 +197,7 @@ double complex child_GC_CisAisCjuAjv_spin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAitCjuAju_spin_MPIdouble +void child_GC_CisAitCjuAju_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -210,7 +210,7 @@ double complex child_GC_CisAitCjuAju_spin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAisCjuAju_spin_MPIdouble +void child_GC_CisAisCjuAju_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -222,7 +222,7 @@ double complex child_GC_CisAisCjuAju_spin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAitCiuAiv_spin_MPIsingle +void child_GC_CisAitCiuAiv_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -236,7 +236,7 @@ double complex child_GC_CisAitCiuAiv_spin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAisCjuAjv_spin_MPIsingle +void child_GC_CisAisCjuAjv_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -249,7 +249,7 @@ double complex child_GC_CisAisCjuAjv_spin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAitCjuAju_spin_MPIsingle +void child_GC_CisAitCjuAju_spin_MPIsingle ( int org_isite1, int org_ispin2, @@ -261,7 +261,7 @@ double complex child_GC_CisAitCjuAju_spin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAisCjuAju_spin_MPIsingle +void child_GC_CisAisCjuAju_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -273,7 +273,7 @@ double complex child_GC_CisAisCjuAju_spin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAisCjuAju_spin_MPIsingle +void child_GC_CisAisCjuAju_spin_MPIsingle ( int org_isite1, int org_ispin1, @@ -285,7 +285,7 @@ double complex child_GC_CisAisCjuAju_spin_MPIsingle double complex **tmp_v1 ); -double complex child_GC_CisAit_spin_MPIdouble +void child_GC_CisAit_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -296,7 +296,7 @@ double complex child_GC_CisAit_spin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_CisAis_spin_MPIdouble +void child_GC_CisAis_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -306,7 +306,7 @@ double complex child_GC_CisAis_spin_MPIdouble double complex **tmp_v1 ); -double complex child_GC_AisCis_spin_MPIdouble +void child_GC_AisCis_spin_MPIdouble ( int org_isite1, int org_ispin1, @@ -316,7 +316,7 @@ double complex child_GC_AisCis_spin_MPIdouble double complex **tmp_v1 ); -double complex child_CisAit_spin_MPIdouble +void child_CisAit_spin_MPIdouble ( int org_isite1, int org_ispin2, @@ -328,7 +328,7 @@ double complex child_CisAit_spin_MPIdouble unsigned long int idim_max ); -double complex child_CisAisCjuAju_GeneralSpin_MPIdouble +void child_CisAisCjuAju_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -340,7 +340,7 @@ double complex child_CisAisCjuAju_GeneralSpin_MPIdouble double complex **tmp_v1 ); -double complex child_CisAitCjuAjv_GeneralSpin_MPIdouble +void child_CisAitCjuAjv_GeneralSpin_MPIdouble ( int org_isite1, int org_ispin1, @@ -355,7 +355,7 @@ double complex child_CisAitCjuAjv_GeneralSpin_MPIdouble ); //general spin - single -double complex child_CisAisCjuAju_GeneralSpin_MPIsingle +void child_CisAisCjuAju_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, @@ -367,7 +367,7 @@ double complex child_CisAisCjuAju_GeneralSpin_MPIsingle double complex **tmp_v1 ); -double complex child_CisAitCjuAjv_GeneralSpin_MPIsingle +void child_CisAitCjuAjv_GeneralSpin_MPIsingle ( int org_isite1, int org_ispin1, diff --git a/src/include/mltplySpin.h b/src/include/mltplySpin.h index 7e6672de3..a6c2b6c79 100644 --- a/src/include/mltplySpin.h +++ b/src/include/mltplySpin.h @@ -44,14 +44,14 @@ int mltplyGeneralSpinGC(struct BindStruct *X, int nstate, double complex **tmp_v int mltplySpinGCBoost(struct BindStruct *X, int nstate, double complex **tmp_v0,double complex **tmp_v1); -double complex GC_general_int_spin +void GC_general_int_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex general_int_spin +void general_int_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, @@ -59,21 +59,21 @@ double complex general_int_spin ); -double complex GC_exchange_spin +void GC_exchange_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex exchange_spin +void exchange_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, struct BindStruct *X ); -double complex GC_pairlift_spin +void GC_pairlift_spin ( int nstate, double complex **tmp_v0, double complex **tmp_v1, diff --git a/src/include/mltplySpinCore.h b/src/include/mltplySpinCore.h index cad8eec97..6043e4e17 100644 --- a/src/include/mltplySpinCore.h +++ b/src/include/mltplySpinCore.h @@ -19,7 +19,7 @@ #include "Common.h" -double complex exchange_spin_element +void exchange_spin_element ( long unsigned int j, int nstate, @@ -29,7 +29,7 @@ double complex exchange_spin_element long unsigned int *tmp_off ); -double complex GC_pairlift_spin_element +void GC_pairlift_spin_element ( long unsigned int j, int nstate, @@ -39,7 +39,7 @@ double complex GC_pairlift_spin_element long unsigned int *tmp_off ); -double complex GC_exchange_spin_element +void GC_exchange_spin_element ( long unsigned int j, int nstate, @@ -61,7 +61,7 @@ int child_exchange_spin_element ); //[s]Spin -double complex CisAisCisAis_spin_element +void CisAisCisAis_spin_element ( long unsigned int j, long unsigned int isA_up, @@ -70,8 +70,7 @@ double complex CisAisCisAis_spin_element long unsigned int org_sigma4, double complex tmp_V, int nstate, double complex **tmp_v0, - double complex **tmp_v1, - struct BindStruct *X + double complex **tmp_v1 ); double complex CisAisCitAiu_spin_element @@ -118,7 +117,7 @@ double complex CisAitCiuAiv_spin_element //[e]Spin //[s]GC Spin -double complex GC_CisAisCisAis_spin_element +void GC_CisAisCisAis_spin_element ( long unsigned int j, long unsigned int isA_up, @@ -127,11 +126,10 @@ double complex GC_CisAisCisAis_spin_element long unsigned int org_sigma4, double complex tmp_V, int nstate, double complex **tmp_v0, - double complex **tmp_v1, - struct BindStruct *X + double complex **tmp_v1 ); -double complex GC_CisAisCitAiu_spin_element +void GC_CisAisCitAiu_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -141,11 +139,10 @@ double complex GC_CisAisCitAiu_spin_element double complex tmp_V, int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int *tmp_off ); -double complex GC_CisAitCiuAiu_spin_element +void GC_CisAitCiuAiu_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -155,11 +152,10 @@ double complex GC_CisAitCiuAiu_spin_element double complex tmp_V, int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int *tmp_off ); -double complex GC_CisAitCiuAiv_spin_element +void GC_CisAitCiuAiv_spin_element ( long unsigned int j, long unsigned int org_sigma2, @@ -169,7 +165,6 @@ double complex GC_CisAitCiuAiv_spin_element double complex tmp_V, int nstate, double complex **tmp_v0, double complex **tmp_v1, - struct BindStruct *X, long unsigned int *tmp_off_2 ); //[e]GC Spin @@ -201,7 +196,6 @@ int pairlift_spin_GetInfo int child_SpinGC_CisAit( long unsigned int j, -struct BindStruct *X, long unsigned int is1_spin, long unsigned int sigma2, long unsigned int *tmp_off @@ -217,14 +211,12 @@ int child_Spin_CisAit( int child_Spin_CisAis( long unsigned int j, -struct BindStruct *X, long unsigned int is1_spin, long unsigned int sigma1 ); int child_SpinGC_CisAis( long unsigned int j, -struct BindStruct *X, long unsigned int is1_spin, long unsigned int sigma1 ); diff --git a/src/include/struct.h b/src/include/struct.h index 80fe55e74..b21e3e221 100644 --- a/src/include/struct.h +++ b/src/include/struct.h @@ -24,8 +24,8 @@ */ #ifndef HPHI_STRUCT_H #define HPHI_STRUCT_H - -#include "Common.h" +#include +#include /*=================================================================================================*/ //For TEM diff --git a/src/include/xsetmem.h b/src/include/xsetmem.h index b34addef0..c99f0e2b6 100644 --- a/src/include/xsetmem.h +++ b/src/include/xsetmem.h @@ -39,7 +39,7 @@ int GetlistSize struct BindStruct *X ); - +/* void setmem_IntAll_Diagonal ( int **InterAllOffDiagonal, @@ -48,3 +48,4 @@ void setmem_IntAll_Diagonal double *ParaInterAllDiagonal, const int NInterAll ); +*/ diff --git a/src/lapack_diag.c b/src/lapack_diag.c index 781d48eb9..210db2d01 100644 --- a/src/lapack_diag.c +++ b/src/lapack_diag.c @@ -42,7 +42,7 @@ struct BindStruct *X//!<[inout] #ifdef _SCALAPACK int rank, size, nprocs, nprow, npcol, myrow, mycol, ictxt; int i_negone=-1, i_zero=0, iam; - long int mb, nb, mp, nq; + long int mb, mp, nq; int dims[2]={0,0}; #endif diff --git a/src/matrixlapack.c b/src/matrixlapack.c index e65caffe1..332e2383d 100644 --- a/src/matrixlapack.c +++ b/src/matrixlapack.c @@ -38,6 +38,7 @@ int dsyevd_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *iwork, int *liwork, int *info); int zheevd_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int *iwork, int *liwork, int *info); #else +int dsyev_(char *jobz, char* uplo, int* n, double* a, int* lda, double* w, double* work, int* lwork, int* info); int zheev_(char *jobz, char *uplo, int *n, double complex *a, int *lda, double *w, double complex *work, int *lwork, double *rwork, int *info); #endif diff --git a/src/matrixscalapack.c b/src/matrixscalapack.c index 16bf75f4c..1fda9edf2 100644 --- a/src/matrixscalapack.c +++ b/src/matrixscalapack.c @@ -201,7 +201,6 @@ int diag_scalapack_cmp(long int xNsize, double complex **A, double *r, double complex *Z, int *descZ) { const int i_one=1, i_zero=0; const long int i_negone=-1; - const double zero=0.0, one=1.0; long int m, n, mb, nb; int nprow, npcol; int myrow, mycol, info, lld; @@ -213,7 +212,7 @@ int diag_scalapack_cmp(long int xNsize, double complex **A, int rank, size, iam, nprocs; long int lwork, lrwork; int dims[2]={0,0}; - long int i, j, ip, jp; + long int i, j; m=n=xNsize; MPI_Comm_rank(MPI_COMM_WORLD, &rank); diff --git a/src/mltplyHubbard.c b/src/mltplyHubbard.c index 0d84cb5cb..87086925b 100644 --- a/src/mltplyHubbard.c +++ b/src/mltplyHubbard.c @@ -263,7 +263,7 @@ int mltplyHubbard( sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - general_int_GetInfo(i, X, isite1, isite2, isite3, isite4, + general_int_GetInfo(X, isite1, isite2, isite3, isite4, sigma1, sigma2, sigma3, sigma4, tmp_V); general_int(nstate, tmp_v0, tmp_v1, X); @@ -346,7 +346,6 @@ int mltplyHubbardGC( long unsigned int isite3, isite4, sigma3, sigma4; long unsigned int ibitsite1, ibitsite2, ibitsite3, ibitsite4; - double complex dam_pr = 0.0; double complex tmp_trans; /*[s] For InterAll */ double complex tmp_V; @@ -389,7 +388,7 @@ int mltplyHubbardGC( return -1; } tmp_trans = -X->Def.EDParaGeneralTransfer[idx]; - dam_pr = GC_general_hopp(nstate, tmp_v0, tmp_v1, X, tmp_trans); + GC_general_hopp(nstate, tmp_v0, tmp_v1, X, tmp_trans); } StopTimer(213); } @@ -419,22 +418,21 @@ int mltplyHubbardGC( ibitsite3 = X->Def.OrgTpow[2 * isite3 - 2 + sigma3]; ibitsite4 = X->Def.OrgTpow[2 * isite4 - 2 + sigma4]; if (ibitsite1 == ibitsite2 && ibitsite3 == ibitsite4) - dam_pr = child_GC_CisAisCjtAjt_Hubbard_MPI( + child_GC_CisAisCjtAjt_Hubbard_MPI( isite1 - 1, sigma1, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 == ibitsite2 && ibitsite3 != ibitsite4) - dam_pr = child_GC_CisAisCjtAku_Hubbard_MPI( + child_GC_CisAisCjtAku_Hubbard_MPI( isite1 - 1, sigma1, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 != ibitsite2 && ibitsite3 == ibitsite4) - dam_pr = child_GC_CisAjtCkuAku_Hubbard_MPI( + child_GC_CisAjtCkuAku_Hubbard_MPI( isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, tmp_V, X, nstate, tmp_v0, tmp_v1); else if (ibitsite1 != ibitsite2 && ibitsite3 != ibitsite4) - dam_pr = child_GC_CisAjtCkuAlv_Hubbard_MPI( + child_GC_CisAjtCkuAlv_Hubbard_MPI( isite1 - 1, sigma1, isite2 - 1, sigma2, isite3 - 1, sigma3, isite4 - 1, sigma4, tmp_V, X, nstate, tmp_v0, tmp_v1); StopTimer(221); }//InterPE else{ StartTimer(222); - dam_pr = 0.0; for(ihermite=0; ihermite<2; ihermite++){ idx=i+ihermite; isite1 = X->Def.InterAll_OffDiagonal[idx][0] + 1; @@ -447,9 +445,9 @@ int mltplyHubbardGC( sigma4 = X->Def.InterAll_OffDiagonal[idx][7]; tmp_V = X->Def.ParaInterAll_OffDiagonal[idx]; - general_int_GetInfo(i, X, isite1, isite2, isite3, isite4, - sigma1, sigma2, sigma3, sigma4, tmp_V); - dam_pr += GC_general_int(nstate, tmp_v0, tmp_v1, X); + general_int_GetInfo(X, isite1, isite2, isite3, isite4, + sigma1, sigma2, sigma3, sigma4, tmp_V); + GC_general_int(nstate, tmp_v0, tmp_v1, X); }/*for(ihermite=0; ihermite<2; ihermite++)*/ StopTimer(222); } @@ -466,7 +464,7 @@ int mltplyHubbardGC( || X->Def.PairHopping[i][1] + 1 > X->Def.Nsite) { StartTimer(231); - dam_pr = child_GC_CisAjtCkuAlv_Hubbard_MPI( + child_GC_CisAjtCkuAlv_Hubbard_MPI( X->Def.PairHopping[i][0], sigma1, X->Def.PairHopping[i][1], sigma1, X->Def.PairHopping[i][0], sigma2, X->Def.PairHopping[i][1], sigma2, X->Def.ParaPairHopping[i], X, nstate, tmp_v0, tmp_v1); @@ -477,7 +475,7 @@ int mltplyHubbardGC( for (ihermite = 0; ihermite<2; ihermite++) { idx = i + ihermite; pairhopp_GetInfo(idx, X); - dam_pr += GC_pairhopp(nstate, tmp_v0, tmp_v1, X); + GC_pairhopp(nstate, tmp_v0, tmp_v1, X); }/*for (ihermite = 0; ihermite<2; ihermite++)*/ StopTimer(232); } @@ -494,7 +492,7 @@ int mltplyHubbardGC( || X->Def.ExchangeCoupling[i][1] + 1 > X->Def.Nsite) { StartTimer(241); - dam_pr = child_GC_CisAjtCkuAlv_Hubbard_MPI( + child_GC_CisAjtCkuAlv_Hubbard_MPI( X->Def.ExchangeCoupling[i][0], sigma1, X->Def.ExchangeCoupling[i][1], sigma1, X->Def.ExchangeCoupling[i][1], sigma2, X->Def.ExchangeCoupling[i][0], sigma2, X->Def.ParaExchangeCoupling[i], X, nstate, tmp_v0, tmp_v1); @@ -503,7 +501,7 @@ int mltplyHubbardGC( else { StartTimer(242); exchange_GetInfo(i, X); - dam_pr = GC_exchange(nstate, tmp_v0, tmp_v1, X); + GC_exchange(nstate, tmp_v0, tmp_v1, X); StopTimer(242); } }/*for (i = 0; i < X->Def.NExchangeCoupling; i++)*/ @@ -522,7 +520,7 @@ int mltplyHubbardGC( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex pairhopp( +void pairhopp( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -542,7 +540,7 @@ double complex pairhopp( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex exchange( +void exchange( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -562,7 +560,7 @@ double complex exchange( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex general_hopp( +void general_hopp( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -586,7 +584,7 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_general_hopp( +void GC_general_hopp( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -606,13 +604,13 @@ double complex GC_general_hopp( #pragma omp parallel for default(none) \ private(j) firstprivate(i_max,X,isite1, trans) shared(tmp_v0, tmp_v1,nstate) for (j = 1; j <= i_max; j++) - GC_CisAis(j, nstate, tmp_v0, tmp_v1, X, isite1, trans); + GC_CisAis(j, nstate, tmp_v0, tmp_v1, isite1, trans); }/*if (isite1 == isite2)*/ else { #pragma omp parallel for default(none) private(j,tmp_off) shared(tmp_v0,tmp_v1,nstate) \ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) for (j = 1; j <= i_max; j++) - GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, trans, &tmp_off); + GC_CisAjt(j, nstate, tmp_v0, tmp_v1, isite1, isite2, Asum, Adiff, trans, &tmp_off); } }/*double complex GC_general_hopp*/ /** @@ -620,7 +618,7 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2,trans) @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex general_int( +void general_int( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -655,7 +653,7 @@ firstprivate(i_max, X, isite1, isite2, isite3, isite4, Asum, Bsum, Adiff, Bdiff, if (isite1 == isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1); }/*if (isite1 == isite2 && isite3 == isite4)*/ else if (isite1 == isite2 && isite3 != isite4) { #pragma omp for @@ -681,7 +679,7 @@ firstprivate(i_max, X, isite1, isite2, isite3, isite4, Asum, Bsum, Adiff, Bdiff, @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_general_int( +void GC_general_int( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -714,24 +712,24 @@ shared(tmp_v0, tmp_v1,nstate) if (isite1 == isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - GC_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + GC_CisAisCisAis_element(j, isite1, isite3, tmp_V, nstate, tmp_v0, tmp_v1); }/*if (isite1 == isite2 && isite3 == isite4)*/ else if (isite1 == isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) - GC_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + GC_CisAisCjtAku_element(j, isite1, isite3, isite4, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, &tmp_off); }/*if (isite1 == isite2 && isite3 != isite4)*/ else if (isite1 != isite2 && isite3 == isite4) { #pragma omp for for (j = 1; j <= i_max; j++) GC_CisAjtCkuAku_element( - j, isite1, isite2, isite3, Asum, Adiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + j, isite1, isite2, isite3, Asum, Adiff, tmp_V, nstate, tmp_v0, tmp_v1, &tmp_off); }/*if (isite1 != isite2 && isite3 == isite4)*/ else if (isite1 != isite2 && isite3 != isite4) { #pragma omp for for (j = 1; j <= i_max; j++) GC_CisAjtCkuAlv_element( - j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off_2); + j, isite1, isite2, isite3, isite4, Asum, Adiff, Bsum, Bdiff, tmp_V, nstate, tmp_v0, tmp_v1, &tmp_off_2); }/*if (isite1 != isite2 && isite3 != isite4)*/ }/*End of parallel region*/ }/*double complex GC_general_int*/ @@ -740,7 +738,7 @@ shared(tmp_v0, tmp_v1,nstate) @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_pairhopp( +void GC_pairhopp( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -760,7 +758,7 @@ firstprivate(i_max,X,off) private(j) shared(tmp_v0, tmp_v1,nstate) @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_exchange( +void GC_exchange( int nstate,//!<[in] Number of vectors double complex **tmp_v0, double complex **tmp_v1, diff --git a/src/mltplyHubbardCore.c b/src/mltplyHubbardCore.c index 82eb6d87e..9c1204cc9 100644 --- a/src/mltplyHubbardCore.c +++ b/src/mltplyHubbardCore.c @@ -74,7 +74,6 @@ int general_hopp_GetInfo( @author Kazuyoshi Yoshimi (The University of Tokyo) */ int general_int_GetInfo( - int iInterAll,//!<[in] It is not used struct BindStruct *X,//!<[inout] long unsigned int isite1,//!<[in] Site index long unsigned int isite2,//!<[in] Site index @@ -234,7 +233,6 @@ void GC_CisAis( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector - struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of @f$(i \sigma)@f$ double complex tmp_trans//!<[in] Transfer integral ) { @@ -254,9 +252,9 @@ void GC_CisAis( */ void GC_AisCis( long unsigned int j,//!<[in] Index of element of wavefunction - int nstate, double complex **tmp_v0,//!<[inout] Result vector + int nstate, + double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector - struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of @f$(i \sigma)@f$ double complex tmp_trans//!<[in] Transfer integral ) { @@ -278,7 +276,6 @@ void GC_AisCis( */ int child_CisAis( long unsigned int list_1_j, - struct BindStruct *X, long unsigned int is1_spin ) { int A_ibit_tmp; @@ -335,7 +332,6 @@ void GC_CisAjt( long unsigned int j,//!<[in] Index of wavefunction int nstate, double complex **tmp_v0,//!<[in] @f$v_0 = H v_1@f$ double complex **tmp_v1,//!<[in]Vector to be producted - struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Mask for occupation of (is) long unsigned int is2_spin,//!<[in] Mask for occupation of (jt) long unsigned int sum_spin,//!<[in] Mask for hopping @@ -385,7 +381,7 @@ int child_CisAjt( long unsigned int off; int sgn = 1; - sgn = child_GC_CisAjt(list_1_j, X, is1_spin, is2_spin, sum_spin, diff_spin, tmp_off); + sgn = child_GC_CisAjt(list_1_j, is1_spin, is2_spin, sum_spin, diff_spin, tmp_off); if (sgn != 0) { if(GetOffComp(list_2_1, list_2_2, *tmp_off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, &off)!=TRUE){ *tmp_off = 0; @@ -407,7 +403,6 @@ int child_CisAjt( */ int child_GC_CisAjt( long unsigned int list_1_j,//!<[in] ::list_1 ? - struct BindStruct *X,//!<[in] long unsigned int is1_spin,//!<[in] Mask for occupation of (is) long unsigned int is2_spin,//!<[in] Mask for occupation of (jt) long unsigned int sum_spin,//!<[in] Mask for hopping @@ -445,7 +440,7 @@ int child_GC_CisAjt( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex exchange_element( +void exchange_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate, //!<[in] Number of vectors double complex **tmp_v0,//!<[inout] @f$v_0 = H v_1@f$ @@ -498,7 +493,7 @@ double complex exchange_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex pairhopp_element( +void pairhopp_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction @@ -542,7 +537,7 @@ double complex pairhopp_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_exchange_element( +void GC_exchange_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction @@ -592,7 +587,7 @@ double complex GC_exchange_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_pairhopp_element( +void GC_pairhopp_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction @@ -636,22 +631,20 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex CisAisCisAis_element( +void CisAisCisAis_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 double complex tmp_V,//!<[in] Coupling constant int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction - double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] - long unsigned int *tmp_off//!<[out] Index of final wavefunction + double complex **tmp_v1//!<[in] Wavefunction to be multiplied ) { int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_CisAis(list_1[j], X, isite3); - tmp_sgn *= child_CisAis(list_1[j], X, isite1); + tmp_sgn = child_CisAis(list_1[j], isite3); + tmp_sgn *= child_CisAis(list_1[j], isite1); dmv = tmp_V * tmp_sgn; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*double complex CisAisCisAis_element*/ @@ -661,7 +654,7 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex CisAisCjtAku_element( +void CisAisCjtAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 @@ -680,7 +673,7 @@ double complex CisAisCjtAku_element( int one = 1; tmp_sgn = child_CisAjt(list_1[j], X, isite3, isite4, Bsum, Bdiff, tmp_off); if (tmp_sgn != 0) { - tmp_sgn *= child_CisAis(list_1[*tmp_off], X, isite1); + tmp_sgn *= child_CisAis(list_1[*tmp_off], isite1); if (tmp_sgn != 0) { dmv = tmp_V * tmp_sgn; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off][0], &one); @@ -693,7 +686,7 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex CisAjtCkuAku_element( +void CisAjtCkuAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -710,7 +703,7 @@ double complex CisAjtCkuAku_element( int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_CisAis(list_1[j], X, isite3); + tmp_sgn = child_CisAis(list_1[j], isite3); if (tmp_sgn != 0) { tmp_sgn *= child_CisAjt(list_1[j], X, isite1, isite2, Asum, Adiff, tmp_off); if (tmp_sgn != 0) { @@ -725,7 +718,7 @@ term of canonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex CisAjtCkuAlv_element( +void CisAjtCkuAlv_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -747,7 +740,7 @@ double complex CisAjtCkuAlv_element( int one = 1; double complex dmv; - tmp_sgn = child_GC_CisAjt(list_1[j], X, isite3, isite4, Bsum, Bdiff, &tmp_off_1); + tmp_sgn = child_GC_CisAjt(list_1[j], isite3, isite4, Bsum, Bdiff, &tmp_off_1); if (tmp_sgn != 0) { tmp_sgn *= child_CisAjt(tmp_off_1, X, isite1, isite2, Asum, Adiff, tmp_off_2); @@ -764,22 +757,20 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAisCisAis_element( +void GC_CisAisCisAis_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 double complex tmp_V,//!<[in] Coupling constant int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction - double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] - long unsigned int *tmp_off//!<[out] Index of final wavefunction + double complex **tmp_v1//!<[in] Wavefunction to be multiplied ) { int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_CisAis(j - 1, X, isite3); - tmp_sgn *= child_CisAis(j - 1, X, isite1); + tmp_sgn = child_CisAis(j - 1, isite3); + tmp_sgn *= child_CisAis(j - 1, isite1); if (tmp_sgn != 0) { dmv = tmp_V * tmp_sgn; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); @@ -791,7 +782,7 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAisCjtAku_element( +void GC_CisAisCjtAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite3,//!<[in] Site 3 @@ -802,15 +793,14 @@ double complex GC_CisAisCjtAku_element( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_GC_CisAjt((j - 1), X, isite3, isite4, Bsum, Bdiff, tmp_off); + tmp_sgn = child_GC_CisAjt((j - 1), isite3, isite4, Bsum, Bdiff, tmp_off); if (tmp_sgn != 0) { - tmp_sgn *= child_CisAis(*tmp_off, X, isite1); + tmp_sgn *= child_CisAis(*tmp_off, isite1); if (tmp_sgn != 0) { dmv = tmp_V * tmp_sgn; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); @@ -823,7 +813,7 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAjtCkuAku_element( +void GC_CisAjtCkuAku_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -834,15 +824,14 @@ double complex GC_CisAjtCkuAku_element( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_CisAis((j - 1), X, isite3); + tmp_sgn = child_CisAis(j - 1, isite3); if (tmp_sgn != 0) { - tmp_sgn *= child_GC_CisAjt((j - 1), X, isite1, isite2, Asum, Adiff, tmp_off); + tmp_sgn *= child_GC_CisAjt(j - 1, isite1, isite2, Asum, Adiff, tmp_off); if (tmp_sgn != 0) { dmv = tmp_V * tmp_sgn; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); @@ -855,7 +844,7 @@ term of grandcanonical Hubbard system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAjtCkuAlv_element( +void GC_CisAjtCkuAlv_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isite1,//!<[in] Site 1 long unsigned int isite2,//!<[in] Site 2 @@ -869,7 +858,6 @@ double complex GC_CisAjtCkuAlv_element( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] long unsigned int *tmp_off_2//!<[out] Index of final wavefunction ) { int tmp_sgn; @@ -877,9 +865,9 @@ double complex GC_CisAjtCkuAlv_element( double complex dmv; int one = 1; - tmp_sgn = child_GC_CisAjt((j - 1), X, isite3, isite4, Bsum, Bdiff, &tmp_off_1); + tmp_sgn = child_GC_CisAjt(j - 1, isite3, isite4, Bsum, Bdiff, &tmp_off_1); if (tmp_sgn != 0) { - tmp_sgn *= child_GC_CisAjt(tmp_off_1, X, isite1, isite2, Asum, Adiff, tmp_off_2); + tmp_sgn *= child_GC_CisAjt(tmp_off_1, isite1, isite2, Asum, Adiff, tmp_off_2); if (tmp_sgn != 0) { dmv = tmp_V * tmp_sgn; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off_2 + 1][0], &one); diff --git a/src/mltplyMPIHubbard.c b/src/mltplyMPIHubbard.c index 28f0ad9c0..75b109248 100644 --- a/src/mltplyMPIHubbard.c +++ b/src/mltplyMPIHubbard.c @@ -45,7 +45,7 @@ When both site1 and site2 are in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) @return fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ */ -double complex child_GC_general_hopp_MPIdouble( +void child_GC_general_hopp_MPIdouble( int org_isite1,//!<[in] @f$i_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_ispin1,//!<[in] @f$\sigma_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_isite2,//!<[in] @f$i_2@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ @@ -91,7 +91,7 @@ When both site1 and site2 are in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) @return fragment of @f$\langle v_1|{\hat H}|v_1\rangle@f$ */ -double complex child_CisAjt_MPIdouble( +void child_CisAjt_MPIdouble( int org_isite1,//!<[in] @f$i_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_ispin1,//!<[in] @f$\sigma_1@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ int org_isite2,//!<[in] @f$i_2@f$ of @f$c_{i_1 \sigma_1}^\dagger c_{i_2 \sigma_2}@f$ @@ -165,7 +165,7 @@ void GC_general_hopp_MPIsingle( @author Mitsuaki Kawamura (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_general_hopp_MPIsingle( +void child_GC_general_hopp_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -255,7 +255,7 @@ void general_hopp_MPIdouble( When both site1 and site2 are in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex child_general_hopp_MPIdouble( +void child_general_hopp_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -330,7 +330,7 @@ void general_hopp_MPIsingle( When only site2 is in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex child_general_hopp_MPIsingle( +void child_general_hopp_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -404,7 +404,7 @@ double complex child_general_hopp_MPIsingle( When only site2 is in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex child_CisAjt_MPIsingle( +void child_CisAjt_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 diff --git a/src/mltplyMPIHubbardCore.c b/src/mltplyMPIHubbardCore.c index 101ce22aa..610723938 100644 --- a/src/mltplyMPIHubbardCore.c +++ b/src/mltplyMPIHubbardCore.c @@ -217,7 +217,7 @@ int GetSgnInterAll( if (tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1 * 2; else diffA = tmp_ispin1-tmp_ispin2*2; - tmp_sgn=child_GC_CisAjt(orgbit, X, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, &tmp_off); + tmp_sgn=child_GC_CisAjt(orgbit, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, &tmp_off); if(tmp_sgn ==0){ *offbit =0; *Fsgn = 0; @@ -239,7 +239,7 @@ int GetSgnInterAll( else{ if(tmp_ispin2 > tmp_ispin1) diffA = tmp_ispin2 - tmp_ispin1*2; else diffA = tmp_ispin1-tmp_ispin2*2; - tmp_sgn *=child_GC_CisAjt(tmp_off, X, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, offbit); + tmp_sgn *=child_GC_CisAjt(tmp_off, tmp_ispin1, tmp_ispin2, tmp_ispin1+tmp_ispin2, diffA, offbit); if(tmp_sgn ==0){ *offbit =0; @@ -256,7 +256,7 @@ int GetSgnInterAll( @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{jt}@f$ term of grandcanonical Hubbard system */ -double complex child_GC_CisAisCjtAjt_Hubbard_MPI( +void child_GC_CisAisCjtAjt_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -300,7 +300,7 @@ double complex child_GC_CisAisCjtAjt_Hubbard_MPI( @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ term of grandcanonical Hubbard system */ -double complex child_GC_CisAjtCkuAku_Hubbard_MPI( +void child_GC_CisAjtCkuAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -368,12 +368,12 @@ firstprivate(i_max,X,Asum,Adiff,isite1,isite2, tmp_V) \ { #pragma omp for for (j = 1; j <= i_max; j++) - GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off); + GC_CisAjt(j, nstate, tmp_v0, tmp_v1, isite2, isite1, Asum, Adiff, tmp_V, &tmp_off); if (X->Large.mode != M_CORR) { #pragma omp for for (j = 1; j <= i_max; j++) - GC_CisAjt(j, nstate, tmp_v0, tmp_v1, X, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off); + GC_CisAjt(j, nstate, tmp_v0, tmp_v1, isite1, isite2, Asum, Adiff, tmp_V, &tmp_off); }/*if (X->Large.mode != M_CORR)*/ }/*End of paralle region*/ return; @@ -424,7 +424,7 @@ firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4) @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ term of grandcanonical Hubbard system */ -double complex child_GC_CisAisCjtAku_Hubbard_MPI( +void child_GC_CisAisCjtAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -445,7 +445,7 @@ double complex child_GC_CisAisCjtAku_Hubbard_MPI( @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ term of grandcanonical Hubbard system */ -double complex child_GC_CisAjtCkuAlv_Hubbard_MPI( +void child_GC_CisAjtCkuAlv_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -521,7 +521,7 @@ double complex child_GC_CisAjtCkuAlv_Hubbard_MPI( #pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) for (j = 1; j <= i_max; j++) - GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off); + GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, isite1, isite4, (isite1 + isite4), Adiff, tmp_V, &tmp_off); //calc -CisAku njv child_GC_CisAjtCkuAku_Hubbard_MPI(org_isite1, org_ispin1, org_isite4, org_ispin4, @@ -530,7 +530,7 @@ double complex child_GC_CisAjtCkuAlv_Hubbard_MPI( #pragma omp parallel for default(none) private(j, tmp_off) \ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, tmp_v0,nstate) for (j = 1; j <= i_max; j++) - GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, X, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off); + GC_CisAjt(j - 1, nstate, tmp_v0, tmp_v1, isite4, isite1, (isite1 + isite4), Adiff, tmp_V, &tmp_off); //calc -njvCkuAis child_GC_CisAisCjtAku_Hubbard_MPI(org_isite2, org_ispin2, org_isite4, org_ispin4, @@ -560,13 +560,13 @@ double complex child_GC_CisAjtCkuAlv_Hubbard_MPI( else Bdiff = isite3 - isite4 * 2; if (iFlgHermite == FALSE) { - Fsgn = child_GC_CisAjt((long unsigned int) myrank, X, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2); - Fsgn *= child_GC_CisAjt(tmp_off2, X, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off); + Fsgn = child_GC_CisAjt((long unsigned int) myrank, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2); + Fsgn *= child_GC_CisAjt(tmp_off2, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == FALSE)*/ else { - Fsgn = child_GC_CisAjt((long unsigned int) myrank, X, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2); - Fsgn *= child_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); + Fsgn = child_GC_CisAjt((long unsigned int) myrank, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2); + Fsgn *= child_GC_CisAjt(tmp_off2, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == TRUE)*/ @@ -590,7 +590,7 @@ double complex child_GC_CisAjtCkuAlv_Hubbard_MPI( @brief Compute @f$c_{is}^\dagger c_{is}@f$ term of grandcanonical Hubbard system */ -double complex child_GC_CisAis_Hubbard_MPI( +void child_GC_CisAis_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_V,//!<[in] Coupling constant @@ -626,7 +626,7 @@ double complex child_GC_CisAis_Hubbard_MPI( @brief Compute @f$c_{is}^\dagger c_{jt}@f$ term of grandcanonical Hubbard system */ -double complex child_GC_CisAjt_Hubbard_MPI( +void child_GC_CisAjt_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -653,7 +653,7 @@ double complex child_GC_CisAjt_Hubbard_MPI( @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{jt}@f$ term of canonical Hubbard system */ -double complex child_CisAisCjtAjt_Hubbard_MPI( +void child_CisAisCjtAjt_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -693,7 +693,7 @@ shared(tmp_v0,tmp_v1,list_1,org_isite1,org_ispin1,org_isite3,org_ispin3,nstate,o @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{lv}@f$ term of canonical Hubbard system */ -double complex child_CisAjtCkuAlv_Hubbard_MPI( +void child_CisAjtCkuAlv_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -806,13 +806,13 @@ firstprivate(i_max, tmp_V, X, isite1, isite4, Adiff) shared(tmp_v1, nstate, tmp_ else Bdiff = isite3 - isite4 * 2; if (iFlgHermite == FALSE) { - Fsgn = child_GC_CisAjt((long unsigned int) myrank, X, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2); - Fsgn *= child_GC_CisAjt(tmp_off2, X, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off); + Fsgn = child_GC_CisAjt((long unsigned int) myrank, isite2, isite1, (isite1 + isite2), Adiff, &tmp_off2); + Fsgn *= child_GC_CisAjt(tmp_off2, isite4, isite3, (isite3 + isite4), Bdiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == FALSE)*/ else { - Fsgn = child_GC_CisAjt((long unsigned int) myrank, X, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2); - Fsgn *= child_GC_CisAjt(tmp_off2, X, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); + Fsgn = child_GC_CisAjt((long unsigned int) myrank, isite3, isite4, (isite3 + isite4), Bdiff, &tmp_off2); + Fsgn *= child_GC_CisAjt(tmp_off2, isite1, isite2, (isite1 + isite2), Adiff, &tmp_off); tmp_V *= Fsgn; }/*if (iFlgHermite == TRUE)*/ #pragma omp parallel default(none) private(j,ioff) \ @@ -858,7 +858,7 @@ org_isite1, org_ispin1, org_isite2, org_ispin2, org_isite3, org_ispin3, org_isit @brief Compute @f$c_{is}^\dagger c_{jt} c_{ku}^\dagger c_{ku}@f$ term of canonical Hubbard system */ -double complex child_CisAjtCkuAku_Hubbard_MPI( +void child_CisAjtCkuAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite2,//!<[in] Site 2 @@ -986,7 +986,7 @@ firstprivate(idim_max_buf,tmp_V,X,tmp_isite1,tmp_isite2,tmp_isite3,tmp_isite4,is @brief Compute @f$c_{is}^\dagger c_{is} c_{jt}^\dagger c_{ku}@f$ term of canonical Hubbard system */ -double complex child_CisAisCjtAku_Hubbard_MPI( +void child_CisAisCjtAku_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -999,16 +999,12 @@ double complex child_CisAisCjtAku_Hubbard_MPI( double complex **tmp_v0,//!<[inout] Resulting wavefunction double complex **tmp_v1//!<[inout] Initial wavefunction ) { - double complex dam_pr = 0; - - dam_pr = child_CisAjtCkuAku_Hubbard_MPI( + child_CisAjtCkuAku_Hubbard_MPI( org_isite4, org_ispin4, org_isite3, org_ispin3, org_isite1, org_ispin1, conj(tmp_V), X, nstate, tmp_v0, tmp_v1); - - return conj(dam_pr); }/*double complex child_CisAisCjtAku_Hubbard_MPI*/ -double complex child_CisAis_Hubbard_MPI( +void child_CisAis_Hubbard_MPI( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_V,//!<[in] Coupling constant @@ -1034,7 +1030,7 @@ double complex child_CisAis_Hubbard_MPI( { #pragma omp for for (j = 1; j <= i_max; j++) { - if (child_CisAis(list_1[j], X, isite1) != 0) { + if (child_CisAis(list_1[j], isite1) != 0) { zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*if (X_CisAis(list_1[j], X, isite1) != 0)*/ }/*for (j = 1; j <= i_max; j++)*/ @@ -1048,7 +1044,7 @@ double complex child_CisAis_Hubbard_MPI( @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -double complex child_GC_Cis_MPI( +void child_GC_Cis_MPI( int org_isite,//!<[in] Site i int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant//!<[in] @@ -1096,7 +1092,7 @@ double complex child_GC_Cis_MPI( @author Kazuyoshi Yoshimi (The University of Tokyo) @author Youhei Yamaji (The University of Tokyo) */ -double complex child_GC_Ajt_MPI( +void child_GC_Ajt_MPI( int org_isite,//!<[in] Site j int org_ispin,//!<[in] Spin t double complex tmp_trans,//!<[in] Coupling constant//!<[in] @@ -1137,7 +1133,7 @@ double complex child_GC_Ajt_MPI( @brief Compute @f$c_{is}^\dagger@f$ term of canonical Hubbard system */ -double complex child_Cis_MPI( +void child_Cis_MPI( int org_isite,//!<[in] Site i unsigned int org_ispin,//!<[in] Spin s double complex tmp_trans,//!<[in] Coupling constant @@ -1154,7 +1150,7 @@ double complex child_Cis_MPI( unsigned long int idim_max_buf = 0; unsigned long int j = 0; unsigned long int ioff = 0; - double complex trans, dmv, dam_pr; + double complex trans; int one = 1; // org_isite >= Nsite @@ -1195,7 +1191,7 @@ firstprivate(idim_max_buf, trans, ioff, _irght, _ilft, _ihfbit, list_2_1, list_2 @brief Compute @f$c_{jt}@f$ term of canonical Hubbard system */ -double complex child_Ajt_MPI( +void child_Ajt_MPI( int org_isite,//!<[in] Site j unsigned int org_ispin,//!<[in] Spin t double complex tmp_trans,//!<[in] Coupling constant @@ -1212,7 +1208,7 @@ double complex child_Ajt_MPI( unsigned long int idim_max_buf = 0; unsigned long int j = 0; unsigned long int ioff = 0; - double complex trans, dmv, dam_pr; + double complex trans; int one = 1; // org_isite >= Nsite diff --git a/src/mltplyMPISpin.c b/src/mltplyMPISpin.c index b19463ca7..6f665b0ec 100644 --- a/src/mltplyMPISpin.c +++ b/src/mltplyMPISpin.c @@ -47,7 +47,7 @@ void general_int_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex child_general_int_spin_MPIdouble( +void child_general_int_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -100,7 +100,7 @@ double complex child_general_int_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex child_general_int_spin_TotalS_MPIdouble( +void child_general_int_spin_TotalS_MPIdouble( int org_isite1,//!<[in] site 1 int org_isite3,//!<[in] site 3 struct BindStruct *X,//!<[inout] @@ -158,7 +158,7 @@ void general_int_spin_MPIsingle( @brief General interaction term of canonical spin system. site 3 is in the inter process region */ -double complex child_general_int_spin_MPIsingle( +void child_general_int_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -232,7 +232,7 @@ void GC_general_int_spin_MPIdouble( ){ if (X->Def.InterAll_OffDiagonal[i_int][1] == X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] != X->Def.InterAll_OffDiagonal[i_int][7]) { - GC_CisAisCjuAjv_spin_MPIdouble(i_int, X, tmp_v0, nstate, tmp_v1); + GC_CisAisCjuAjv_spin_MPIdouble(i_int, X, nstate, tmp_v0, tmp_v1); } else if (X->Def.InterAll_OffDiagonal[i_int][1] != X->Def.InterAll_OffDiagonal[i_int][3] && X->Def.InterAll_OffDiagonal[i_int][5] == X->Def.InterAll_OffDiagonal[i_int][7]) { diff --git a/src/mltplyMPISpinCore.c b/src/mltplyMPISpinCore.c index 96bda9638..2cb4870ab 100644 --- a/src/mltplyMPISpinCore.c +++ b/src/mltplyMPISpinCore.c @@ -95,7 +95,7 @@ void GC_CisAitCiuAiv_spin_MPIdouble( @author Kazuyoshi Yoshimi (The University of Tokyo) @author Mitsuaki Kawamura (The University of Tokyo) */ -double complex child_GC_CisAitCiuAiv_spin_MPIdouble( +void child_GC_CisAitCiuAiv_spin_MPIdouble( int org_isite1,//!<[in] site i int org_ispin1,//!<[in] spin s int org_ispin2,//!<[in] spin t @@ -161,8 +161,7 @@ void GC_CisAisCjuAjv_spin_MPIdouble( double complex **tmp_v1 /**< [in] v0 = H v1*/ ){ #ifdef MPI - double complex dam_pr; - dam_pr = child_GC_CisAisCjuAjv_spin_MPIdouble( + child_GC_CisAisCjuAjv_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); @@ -173,7 +172,7 @@ void GC_CisAisCjuAjv_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAisCjuAjv_spin_MPIdouble( +void child_GC_CisAisCjuAjv_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -198,11 +197,11 @@ double complex child_GC_CisAisCjuAjv_spin_MPIdouble( mask2 = (int)X->Def.Tpow[org_isite3]; origin = myrank ^ mask2; state2 = (origin & mask2) / mask2; - num1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, mask1, org_ispin1); + num1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, mask1, org_ispin1); if (num1 != 0 && state2 == org_ispin4) { Jint = tmp_J; } - else if (child_SpinGC_CisAis(origin + 1, X, mask1, org_ispin1) == TRUE && state2 == org_ispin3) { + else if (child_SpinGC_CisAis(origin + 1, mask1, org_ispin1) == TRUE && state2 == org_ispin3) { Jint = conj(tmp_J); if (X->Large.mode == M_CORR ||X->Large.mode == H_CORR || X->Large.mode == M_CALCSPEC) Jint = 0; } @@ -228,8 +227,7 @@ void GC_CisAitCjuAju_spin_MPIdouble( double complex **tmp_v1//!<[in] v0 = H v1 ) { - double complex dam_pr; - dam_pr = child_GC_CisAitCjuAju_spin_MPIdouble( + child_GC_CisAitCjuAju_spin_MPIdouble( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.ParaInterAll_OffDiagonal[i_int], X, nstate, tmp_v0, tmp_v1); @@ -239,7 +237,7 @@ void GC_CisAitCjuAju_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAitCjuAju_spin_MPIdouble( +void child_GC_CisAitCjuAju_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -264,7 +262,7 @@ double complex child_GC_CisAitCjuAju_spin_MPIdouble( origin = myrank ^ mask1; state1 = (origin & mask1) / mask1; mask2 = (int)X->Def.Tpow[org_isite3]; - num1 = child_SpinGC_CisAis(origin + 1, X, mask2, org_ispin3); + num1 = child_SpinGC_CisAis(origin + 1, mask2, org_ispin3); if (state1 == org_ispin2) { if (num1 != 0) { Jint = tmp_J; @@ -274,7 +272,7 @@ double complex child_GC_CisAitCjuAju_spin_MPIdouble( } }/*if (state1 == org_ispin2)*/ else {//state1 = org_ispin1 - num1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, mask2, org_ispin3); + num1 = child_SpinGC_CisAis((unsigned long int) myrank + 1, mask2, org_ispin3); if (num1 != 0) { Jint = conj(tmp_J); if (X->Large.mode == M_CORR ||X->Large.mode == H_CORR || X->Large.mode == M_CALCSPEC) { @@ -296,7 +294,7 @@ double complex child_GC_CisAitCjuAju_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAisCjuAju_spin_MPIdouble( +void child_GC_CisAisCjuAju_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -313,8 +311,8 @@ double complex child_GC_CisAisCjuAju_spin_MPIdouble( int one = 1; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; - num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, mask1, org_ispin1); - num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, X, mask2, org_ispin3); + num1 = child_SpinGC_CisAis((unsigned long int)myrank + 1, mask1, org_ispin1); + num2 = child_SpinGC_CisAis((unsigned long int)myrank + 1, mask2, org_ispin3); #pragma omp parallel default(none) private(j, dmv) \ firstprivate(tmp_J, X, num1, num2) shared(tmp_v1, tmp_v0,nstate,one) @@ -331,7 +329,7 @@ double complex child_GC_CisAisCjuAju_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAisCjuAju_spin_MPIsingle( +void child_GC_CisAisCjuAju_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -349,14 +347,14 @@ double complex child_GC_CisAisCjuAju_spin_MPIsingle( Jint = tmp_J; mask1 = (int)X->Def.Tpow[org_isite1]; mask2 = (int)X->Def.Tpow[org_isite3]; - num2 = child_SpinGC_CisAis((unsigned long int) myrank + 1, X, mask2, org_ispin3); + num2 = child_SpinGC_CisAis((unsigned long int) myrank + 1, mask2, org_ispin3); #pragma omp parallel default(none) private(j, dmv, num1) \ firstprivate(Jint, X, num2, mask1, org_ispin1) shared(tmp_v1, tmp_v0,nstate,one) { #pragma omp for for (j = 1; j <= X->Check.idim_max; j++) { - num1 = child_SpinGC_CisAis(j, X, mask1, org_ispin1); + num1 = child_SpinGC_CisAis(j, mask1, org_ispin1); dmv = Jint * num1 * num2; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*for (j = 1; j <= X->Check.idim_max; j++)*/ @@ -374,8 +372,7 @@ void GC_CisAitCiuAiv_spin_MPIsingle( double complex **tmp_v0,//!<[out] Result v0 = H v1 double complex **tmp_v1//!<[in] v0 = H v1 ){ - double complex dam_pr; - dam_pr =child_GC_CisAitCiuAiv_spin_MPIsingle( + child_GC_CisAitCiuAiv_spin_MPIsingle( X->Def.InterAll_OffDiagonal[i_int][0], X->Def.InterAll_OffDiagonal[i_int][1], X->Def.InterAll_OffDiagonal[i_int][3], X->Def.InterAll_OffDiagonal[i_int][4], X->Def.InterAll_OffDiagonal[i_int][5], X->Def.InterAll_OffDiagonal[i_int][7], @@ -385,7 +382,8 @@ void GC_CisAitCiuAiv_spin_MPIsingle( @brief Exchange and Pairlifting term in Spin model + GC When only site2 is in the inter process region. @author Mitsuaki Kawamura (The University of Tokyo) -*/double complex child_GC_CisAitCiuAiv_spin_MPIsingle( +*/ +void child_GC_CisAitCiuAiv_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -435,7 +433,7 @@ void GC_CisAitCiuAiv_spin_MPIsingle( { #pragma omp for for (j = 0; j < idim_max_buf; j++) { - state1 = child_SpinGC_CisAit(j + 1, X, mask1, state1check, &ioff); + state1 = child_SpinGC_CisAit(j + 1, mask1, state1check, &ioff); if (state1 != 0) { zaxpy_(&nstate, &Jint, &v1buf[j + 1][0], &one, &tmp_v0[ioff + 1][0], &one); }/*if (state1 != 0)*/ @@ -464,7 +462,7 @@ child_GC_CisAisCjuAjv_spin_MPIsingle( When only site2 is in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAisCjuAjv_spin_MPIsingle( +void child_GC_CisAisCjuAjv_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 2 int org_isite3,//!<[in] Site 1 @@ -541,7 +539,7 @@ void GC_CisAitCjuAju_spin_MPIsingle( When only site2 is in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAitCjuAju_spin_MPIsingle( +void child_GC_CisAitCjuAju_spin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin2,//!<[in] Spin 2 int org_isite3,//!<[in] Site 3 @@ -594,7 +592,7 @@ double complex child_GC_CisAitCjuAju_spin_MPIsingle( @brief @f$c_{is}^\dagger c_{is} c_{ju}^\dagger c_{jv}@f$ term in Spin model. When both site1 and site3 are in the inter process region. */ -double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIdouble( +void child_GC_CisAisCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -606,9 +604,9 @@ double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIdouble( double complex **tmp_v0,//!<[inout] @f${\bf v}_0=H {\bf v}_1@f$ double complex **tmp_v1//!<[in] Vector to be producted ) { - unsigned long int off, j; - int origin, ierr; - double complex tmp_V, dmv, dam_pr; + unsigned long int off; + int origin; + double complex tmp_V; if (org_isite1 == org_isite3 && org_ispin1 == org_ispin4) {//cisaisciuais=0 && cisaiucisais=0 return; } @@ -640,7 +638,7 @@ double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIdouble( @brief @f$c_{is}^\dagger c_{it} c_{ju}^\dagger c_{ju}@f$ term in Spin model. When both site1 and site3 are in the inter process region. */ -double complex child_GC_CisAitCjuAju_GeneralSpin_MPIdouble( +void child_GC_CisAitCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -688,7 +686,7 @@ double complex child_GC_CisAitCjuAju_GeneralSpin_MPIdouble( @brief Compute @f$c_{is}^\dagger c_{it} c_{ju}^\dagger c_{jv}@f$ term in the grandcanonical general spin system when both site is in the inter process region */ -double complex child_GC_CisAitCjuAjv_GeneralSpin_MPIdouble( +void child_GC_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -751,7 +749,7 @@ double complex child_GC_CisAitCjuAjv_GeneralSpin_MPIdouble( @brief Compute @f$c_{is}^\dagger c_{is} c_{ju}^\dagger c_{ju}@f$ term in the grandcanonical general spin system when both site is in the inter process region */ -double complex child_GC_CisAisCjuAju_GeneralSpin_MPIdouble( +void child_GC_CisAisCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -782,7 +780,7 @@ double complex child_GC_CisAisCjuAju_GeneralSpin_MPIdouble( @brief Compute @f$c_{is}^\dagger c_{it}@f$ term in the grandcanonical general spin system when both site is in the inter process region */ -double complex child_GC_CisAit_GeneralSpin_MPIdouble( +void child_GC_CisAit_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -818,7 +816,7 @@ double complex child_GC_CisAit_GeneralSpin_MPIdouble( @brief Compute @f$c_{is}^\dagger c_{is}@f$ term in the grandcanonical general spin system when both site is in the inter process region */ -double complex child_GC_CisAis_GeneralSpin_MPIdouble( +void child_GC_CisAis_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant @@ -843,7 +841,7 @@ double complex child_GC_CisAis_GeneralSpin_MPIdouble( @brief Compute @f$c_{is} c_{is}^\dagger@f$ term in the grandcanonical general spin system when both site is in the inter process region */ -double complex child_GC_AisCis_GeneralSpin_MPIdouble( +void child_GC_AisCis_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant @@ -868,7 +866,7 @@ double complex child_GC_AisCis_GeneralSpin_MPIdouble( @brief Compute @f$c_{is}^\dagger c_{it}@f$ term in the canonical general spin system when both site is in the inter process region */ -double complex child_CisAit_GeneralSpin_MPIdouble( +void child_CisAit_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -914,7 +912,7 @@ shared (tmp_v0, tmp_v1, v1buf,nstate,one) @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{jv}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region */ -double complex child_GC_CisAisCjuAjv_GeneralSpin_MPIsingle( +void child_GC_CisAisCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -967,7 +965,7 @@ private(j, num1) shared (tmp_v0, tmp_v1, v1buf,nstate,one) @brief Compute @f$c_{is}^\dagger c_{it}c_{ju}^\dagger c_{ju}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region */ -double complex child_GC_CisAitCjuAju_GeneralSpin_MPIsingle( +void child_GC_CisAitCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1031,7 +1029,7 @@ shared (tmp_v0, tmp_v1, v1buf,nstate,one) @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{jv}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region */ -double complex child_GC_CisAitCjuAjv_GeneralSpin_MPIsingle( +void child_GC_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1091,7 +1089,7 @@ firstprivate(X, tmp_V, isite, IniSpin, FinSpin) private(j, off) \ @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{ju}@f$ term in the grandcanonical general spin system when one of these site is in the inter process region */ -double complex child_GC_CisAisCjuAju_GeneralSpin_MPIsingle( +void child_GC_CisAisCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1129,7 +1127,7 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) \ @brief Compute @f$c_{is}^\dagger c_{it}c_{ju}^\dagger c_{jv}@f$ term in the canonical general spin system when both sites are in the inter process region */ -double complex child_CisAitCjuAjv_GeneralSpin_MPIdouble( +void child_CisAitCjuAjv_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1196,7 +1194,7 @@ private(j, off) shared (tmp_v0, tmp_v1, list_1buf, v1buf,nstate,one) @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{ju}@f$ term in the canonical general spin system when both sites are in the inter process region */ -double complex child_CisAisCjuAju_GeneralSpin_MPIdouble( +void child_CisAisCjuAju_GeneralSpin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1242,7 +1240,7 @@ double complex child_CisAisCjuAju_GeneralSpin_MPIdouble( @brief Compute @f$c_{is}^\dagger c_{is}c_{ju}^\dagger c_{ju}@f$ term in the canonical general spin system when one of these sites is in the inter process region */ -double complex child_CisAisCjuAju_GeneralSpin_MPIsingle( +void child_CisAisCjuAju_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_isite3,//!<[in] Site 3 @@ -1281,7 +1279,7 @@ firstprivate(X, tmp_V, org_isite1, org_ispin1) private(j, dmv, num1) \ @brief Compute @f$c_{is}^\dagger c_{it}c_{ju}^\dagger c_{jv}@f$ term in the canonical general spin system when one of these sites is in the inter process region */ -double complex child_CisAitCjuAjv_GeneralSpin_MPIsingle( +void child_CisAitCjuAjv_GeneralSpin_MPIsingle( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1345,7 +1343,7 @@ private(j, off, tmp_off) shared (tmp_v0, tmp_v1, list_1buf, v1buf,nstate,one) When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAit_spin_MPIdouble( +void child_GC_CisAit_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 int org_ispin2,//!<[in] Spin 2 @@ -1386,7 +1384,7 @@ double complex child_GC_CisAit_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_CisAit_spin_MPIdouble( +void child_CisAit_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin2,//!<[in] Spin 2 double complex tmp_trans,//!<[in] Coupling constant @@ -1431,7 +1429,7 @@ shared(v1buf, tmp_v0,nstate,one) When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_CisAis_spin_MPIdouble( +void child_GC_CisAis_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant @@ -1451,7 +1449,7 @@ double complex child_GC_CisAis_spin_MPIdouble( When both site1 and site2 are in the inter process region. @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex child_GC_AisCis_spin_MPIdouble( +void child_GC_AisCis_spin_MPIdouble( int org_isite1,//!<[in] Site 1 int org_ispin1,//!<[in] Spin 1 double complex tmp_trans,//!<[in] Coupling constant diff --git a/src/mltplySpin.c b/src/mltplySpin.c index 66ddf1c26..ec76bcb45 100644 --- a/src/mltplySpin.c +++ b/src/mltplySpin.c @@ -463,17 +463,11 @@ void mltplyHalfSpinGC_mini( double complex **tmp_v1//!<[in] Input producted vector ) { long unsigned int j; - long unsigned int i; - long unsigned int off = 0; - long unsigned int is1_spin = 0; /**/ long unsigned int isite1; long unsigned int org_isite1, org_isite2; - long unsigned int org_sigma1, org_sigma2, org_sigma3, org_sigma4; - long unsigned int isA_up, isB_up; + long unsigned int org_sigma1, org_sigma2; long unsigned int tmp_off = 0; - double complex dam_pr; - double complex tmp_trans; long int tmp_sgn; /*[s] For InterAll */ double complex tmp_V; @@ -482,8 +476,7 @@ void mltplyHalfSpinGC_mini( long unsigned int i_max; i_max = X->Check.idim_max; - int ihermite=0; - int idx=0, one = 1; + int one = 1; //EDGeneralTransfer[i][0] -> site_i //EDGeneralTransfer[i][1] -> spin_i @@ -494,32 +487,31 @@ void mltplyHalfSpinGC_mini( org_isite2 = site_j+1; org_sigma1 = spin_i; org_sigma2 = spin_j; - dam_pr=0.0; if(org_isite1 == org_isite2){ if(org_isite1 > X->Def.Nsite){ if(org_sigma1==org_sigma2){ // longitudinal magnetic field - dam_pr += child_GC_CisAis_spin_MPIdouble(org_isite1-1, org_sigma1, 1.0, X, nstate, tmp_v0, tmp_v1); + child_GC_CisAis_spin_MPIdouble(org_isite1-1, org_sigma1, 1.0, X, nstate, tmp_v0, tmp_v1); }else{ // transverse magnetic field X->Large.mode = M_MLTPLY2; - dam_pr += child_GC_CisAit_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, 1.0, X, nstate, tmp_v0, tmp_v1); + child_GC_CisAit_spin_MPIdouble(org_isite1-1, org_sigma1, org_sigma2, 1.0, X, nstate, tmp_v0, tmp_v1); X->Large.mode = M_MLTPLY; } }else{ isite1 = X->Def.Tpow[org_isite1-1]; if(org_sigma1==org_sigma2){ // longitudinal magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, tmp_V) \ +#pragma omp parallel for default(none) private(j, tmp_sgn, tmp_V) \ firstprivate(i_max, isite1, org_sigma1, X) shared(tmp_v0,tmp_v1,one,nstate) for(j=1;j<=i_max;j++){ - tmp_V = child_SpinGC_CisAis(j, X, isite1, org_sigma1); + tmp_V = child_SpinGC_CisAis(j, isite1, org_sigma1); zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); } }else{ // transverse magnetic field -#pragma omp parallel for default(none) reduction(+:dam_pr) private(j, tmp_sgn, tmp_off, tmp_V) \ +#pragma omp parallel for default(none) private(j, tmp_sgn, tmp_off, tmp_V) \ firstprivate(i_max, isite1, org_sigma2, X) shared(tmp_v0,tmp_v1,one,nstate) for(j=1;j <= i_max;j++){ - tmp_sgn = child_SpinGC_CisAit(j,X,isite1,org_sigma2,&tmp_off); + tmp_sgn = child_SpinGC_CisAit(j,isite1,org_sigma2,&tmp_off); if(tmp_sgn !=0){ tmp_V = tmp_sgn; zaxpy_(&nstate, &tmp_V, &tmp_v1[j][0], &one, &tmp_v0[tmp_off + 1][0], &one); @@ -607,7 +599,7 @@ int mltplyHalfSpinGC( private(j, tmp_sgn) firstprivate(i_max, is1_spin, sigma2, X,off, tmp_trans) \ shared(tmp_v0, tmp_v1,one,nstate) for (j = 1; j <= i_max; j++) { - tmp_sgn = child_SpinGC_CisAit(j, X, is1_spin, sigma2, &off); + tmp_sgn = child_SpinGC_CisAit(j, is1_spin, sigma2, &off); if(tmp_sgn !=0){ zaxpy_(&nstate, &tmp_trans, &tmp_v1[j][0], &one, &tmp_v0[off + 1][0], &one); }/*if(tmp_sgn !=0)*/ @@ -927,7 +919,7 @@ shared(tmp_v0, tmp_v1,one,nstate) @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex exchange_spin( +void exchange_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -947,7 +939,7 @@ double complex exchange_spin( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_exchange_spin( +void GC_exchange_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -967,7 +959,7 @@ double complex GC_exchange_spin( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_pairlift_spin( +void GC_pairlift_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -987,7 +979,7 @@ double complex GC_pairlift_spin( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex general_int_spin( +void general_int_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -1024,7 +1016,7 @@ shared(tmp_v1, tmp_v0,one,nstate) @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_general_int_spin( +void GC_general_int_spin( int nstate, double complex **tmp_v0,//!<[inout] Result vector double complex **tmp_v1,//!<[in] Input producted vector @@ -1056,25 +1048,25 @@ firstprivate(i_max,X,isA_up,isB_up,org_sigma1,org_sigma2,org_sigma3,org_sigma4,t #pragma omp for for (j = 1; j <= i_max; j++) GC_CisAisCisAis_spin_element( - j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, tmp_v0, tmp_v1, X); + j, isA_up, isB_up, org_sigma2, org_sigma4, tmp_V, nstate, tmp_v0, tmp_v1); } else if (org_sigma1 == org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) GC_CisAisCitAiu_spin_element( - j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, &tmp_off); } else if (org_sigma1 != org_sigma2 && org_sigma3 == org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) GC_CisAitCiuAiu_spin_element( - j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, &tmp_off); } else if (org_sigma1 != org_sigma2 && org_sigma3 != org_sigma4) { #pragma omp for for (j = 1; j <= i_max; j++) GC_CisAitCiuAiv_spin_element( - j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, X, &tmp_off); + j, org_sigma2, org_sigma4, isA_up, isB_up, tmp_V, nstate, tmp_v0, tmp_v1, &tmp_off); } }/*End of parallel region*/ }/*double complex GC_general_int_spin*/ diff --git a/src/mltplySpinCore.c b/src/mltplySpinCore.c index d3ac95de7..24c0cf80f 100644 --- a/src/mltplySpinCore.c +++ b/src/mltplySpinCore.c @@ -145,7 +145,7 @@ int child_Spin_CisAit( long unsigned int list_1_j; long unsigned int off; list_1_j = list_1_org[j]; - if (child_SpinGC_CisAit(list_1_j + 1, X, is1_spin, sigma2, &off) != 0) { + if (child_SpinGC_CisAit(list_1_j + 1, is1_spin, sigma2, &off) != 0) { GetOffComp(list_2_1, list_2_2, off, X->Large.irght, X->Large.ilft, X->Large.ihfbit, tmp_off); return 1; } @@ -162,7 +162,6 @@ int child_Spin_CisAit( */ int child_Spin_CisAis( long unsigned int j,//!<[in] Index of wavefunction - struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Bit mask long unsigned int sigma1//!<[in] Target spin state ) { @@ -179,7 +178,6 @@ int child_Spin_CisAis( */ int child_SpinGC_CisAis( long unsigned int j,//!<[in] Index of wavefunction - struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Bit mask long unsigned int sigma1//!<[in] Target spin state ) { @@ -199,7 +197,6 @@ int child_SpinGC_CisAis( */ int child_SpinGC_CisAit( long unsigned int j,//!<[in] Index of wavefunction - struct BindStruct *X,//!<[inout] long unsigned int is1_spin,//!<[in] Bit mask for computing spin state long unsigned int sigma2,//!<[in] Spin state at site 2 long unsigned int *tmp_off//!<[out] Index of final wavefunction @@ -266,7 +263,7 @@ int child_exchange_spin_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex exchange_spin_element( +void exchange_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[out] Resulting wavefunction @@ -300,7 +297,7 @@ double complex exchange_spin_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_exchange_spin_element( +void GC_exchange_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[out] Resulting wavefunction @@ -331,7 +328,7 @@ double complex GC_exchange_spin_element( @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_pairlift_spin_element( +void GC_pairlift_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[out] Resulting wavefunction @@ -361,7 +358,7 @@ canonical spsin system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex CisAisCisAis_spin_element( +void CisAisCisAis_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isA_up,//!<[in] Bit mask for spin 1 long unsigned int isB_up,//!<[in] Bit mask for spin 2 @@ -370,15 +367,14 @@ double complex CisAisCisAis_spin_element( double complex tmp_V,//!<[in] Coupling constatnt int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[in] Resulting wavefunction - double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X//!<[inout] + double complex **tmp_v1//!<[in] Wavefunction to be multiplied ) { int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_Spin_CisAis(j, X, isB_up, org_sigma4); - tmp_sgn *= child_Spin_CisAis(j, X, isA_up, org_sigma2); + tmp_sgn = child_Spin_CisAis(j, isB_up, org_sigma4); + tmp_sgn *= child_Spin_CisAis(j, isA_up, org_sigma2); dmv = tmp_sgn * tmp_V; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); }/*double complex CisAisCisAis_spin_element*/ @@ -392,7 +388,7 @@ grandcanonical spsin system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAisCisAis_spin_element( +void GC_CisAisCisAis_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int isA_up,//!<[in] Bit mask for spin 1 long unsigned int isB_up,//!<[in] Bit mask for spin 2 @@ -401,15 +397,14 @@ double complex GC_CisAisCisAis_spin_element( double complex tmp_V,//!<[in] Coupling constatnt int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[in] Resulting wavefunction - double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X//!<[inout] + double complex **tmp_v1//!<[in] Wavefunction to be multiplied ) { int tmp_sgn; double complex dmv = 0; int one = 1; - tmp_sgn = child_SpinGC_CisAis(j, X, isB_up, org_sigma4); - tmp_sgn *= child_SpinGC_CisAis(j, X, isA_up, org_sigma2); + tmp_sgn = child_SpinGC_CisAis(j, isB_up, org_sigma4); + tmp_sgn *= child_SpinGC_CisAis(j, isA_up, org_sigma2); if (tmp_sgn != 0) { dmv = tmp_sgn * tmp_V; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[j][0], &one); @@ -421,7 +416,7 @@ grandcanonical spsin system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAisCitAiu_spin_element( +void GC_CisAisCitAiu_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 @@ -431,15 +426,14 @@ double complex GC_CisAisCitAiu_spin_element( int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[in] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_SpinGC_CisAit(j, X, isB_up, org_sigma4, tmp_off); + tmp_sgn = child_SpinGC_CisAit(j, isB_up, org_sigma4, tmp_off); if (tmp_sgn != 0) { - tmp_sgn *= child_SpinGC_CisAis((*tmp_off + 1), X, isA_up, org_sigma2); + tmp_sgn *= child_SpinGC_CisAis(*tmp_off + 1, isA_up, org_sigma2); if (tmp_sgn != 0) { dmv = tmp_sgn * tmp_V; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); @@ -452,7 +446,7 @@ grandcanonical spsin system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAitCiuAiu_spin_element( +void GC_CisAitCiuAiu_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 @@ -461,15 +455,14 @@ double complex GC_CisAitCiuAiu_spin_element( double complex tmp_V,//!<[in] Coupling constatnt int nstate, double complex **tmp_v0,//!<[in] Resulting wavefunction double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] long unsigned int *tmp_off//!<[out] Index of final wavefunction ) { int tmp_sgn; double complex dmv; int one = 1; - tmp_sgn = child_SpinGC_CisAis(j, X, isB_up, org_sigma4); + tmp_sgn = child_SpinGC_CisAis(j, isB_up, org_sigma4); if (tmp_sgn != 0) { - tmp_sgn *= child_SpinGC_CisAit(j, X, isA_up, org_sigma2, tmp_off); + tmp_sgn *= child_SpinGC_CisAit(j, isA_up, org_sigma2, tmp_off); if (tmp_sgn != 0) { dmv = tmp_sgn * tmp_V; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off + 1][0], &one); @@ -482,7 +475,7 @@ grandcanonical spsin system @author Takahiro Misawa (The University of Tokyo) @author Kazuyoshi Yoshimi (The University of Tokyo) */ -double complex GC_CisAitCiuAiv_spin_element( +void GC_CisAitCiuAiv_spin_element( long unsigned int j,//!<[in] Index of initial wavefunction long unsigned int org_sigma2,//!<[in] Target for spin 1 long unsigned int org_sigma4,//!<[in] Target for spin 2 @@ -491,17 +484,16 @@ double complex GC_CisAitCiuAiv_spin_element( double complex tmp_V,//!<[in] Coupling constatnt int nstate,//!<[in] Number of vectors double complex **tmp_v0,//!<[in] Resulting wavefunction - double complex **tmp_v1,//!<[in] Wavefunction to be multiplied - struct BindStruct *X,//!<[inout] + double complex **tmp_v1,//!<[in] Wavefunction to be multiplieds long unsigned int *tmp_off_2//!<[out] Index of final wavefunction ) { int tmp_sgn; long unsigned int tmp_off_1; double complex dmv; int one = 1; - tmp_sgn = child_SpinGC_CisAit(j, X, isB_up, org_sigma4, &tmp_off_1); + tmp_sgn = child_SpinGC_CisAit(j, isB_up, org_sigma4, &tmp_off_1); if (tmp_sgn != 0) { - tmp_sgn *= child_SpinGC_CisAit((tmp_off_1 + 1), X, isA_up, org_sigma2, tmp_off_2); + tmp_sgn *= child_SpinGC_CisAit(tmp_off_1 + 1, isA_up, org_sigma2, tmp_off_2); if (tmp_sgn != 0) { dmv = tmp_sgn * tmp_V; zaxpy_(&nstate, &dmv, &tmp_v1[j][0], &one, &tmp_v0[*tmp_off_2 + 1][0], &one); diff --git a/src/phys.c b/src/phys.c index 9f781abac..5f8fac14c 100644 --- a/src/phys.c +++ b/src/phys.c @@ -52,7 +52,7 @@ void phys(struct BindStruct *X, //!<[inout] double tmp_N; #ifdef _SCALAPACK double complex *vec_tmp; - int ictxt, ierr, rank; + int rank; long unsigned int j, i_max; i_max = X->Check.idim_max; diff --git a/src/xsetmem.c b/src/xsetmem.c index 708ab3798..103864c4b 100644 --- a/src/xsetmem.c +++ b/src/xsetmem.c @@ -247,6 +247,7 @@ int setmem_large /// \param NInterAll [in] Total number of InterAll interactions. /// \author Kazuyoshi Yoshimi /// \version 1.2 +/* void setmem_IntAll_Diagonal( int **InterAllOffDiagonal, double complex *ParaInterAllOffDiagonal, @@ -259,6 +260,7 @@ void setmem_IntAll_Diagonal( InterAllDiagonal = i_2d_allocate(NInterAll, 4); ParaInterAllDiagonal = d_1d_allocate(NInterAll); } +*/ /// /// \brief Set size of lists for the canonical ensemble. /// \param X [in,out] Give the information for getting the list size and get the lists.\n From fa5ce0e1c05716e60c1f758bb89f7c2647ddc1cf Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Mon, 24 Oct 2022 17:32:45 +0900 Subject: [PATCH 39/50] omega in spectrum appears as (w - e_j + e_i), not (w - e_j). --- src/CalcSpectrum.c | 96 +++++++++++++++++++++------- src/CalcSpectrumByBiCG.c | 6 +- src/CalcSpectrumByFullDiag.c | 16 +++-- src/StdFace | 2 +- src/global.c | 2 +- src/include/CalcSpectrumByBiCG.h | 2 +- src/include/CalcSpectrumByFullDiag.h | 2 +- test/spectrum_genspin_ladder.sh | 12 ++-- test/spectrum_genspingc_ladder.sh | 8 +-- test/spectrum_hubbard_square.sh | 30 ++++++--- test/spectrum_hubbardgc_tri.sh | 30 ++++++--- test/spectrum_kondo_chain.sh | 30 ++++++--- test/spectrum_kondogc_chain.sh | 30 ++++++--- test/spectrum_spin_kagome.sh | 18 ++++-- test/spectrum_spingc_honey.sh | 18 ++++-- 15 files changed, 204 insertions(+), 98 deletions(-) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index ab7cc545d..b42fd4007 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -110,10 +110,10 @@ int SetOmega }/**/ //Read Lanczos_Step if (X->iFlgSpecOmegaMax == FALSE) { - X->dcOmegaMax = Emax * (double)X->Nsite; + X->dcOmegaMax = Emax * (double)X->Nsite - E1; } if (X->iFlgSpecOmegaMin == FALSE) { - X->dcOmegaMin = E1; + X->dcOmegaMin = 0.0; } }/*Omegamax and omegamin is not specified in modpara*/ @@ -349,31 +349,36 @@ int MakeExcitedList( /// \retval TRUE Success to output the spectrum. int OutputSpectrum( struct EDMainCalStruct *X, + int nstate, int Nomega, int NdcSpectrum, double complex **dcSpectrum, - double complex *dcomega) + double complex **dcomega, + double *energy0) { FILE *fp; char sdt[D_FileNameMax]; - int iomega, idcSpectrum; + int iomega, idcSpectrum, istate; //output spectrum - sprintf(sdt, cFileNameCalcDynamicalGreen, X->Bind.Def.CDataFileHead); - if(childfopenMPI(sdt, "w", &fp)!=0){ - return FALSE; - } + for (istate = 0; istate < nstate; istate++) { + sprintf(sdt, cFileNameCalcDynamicalGreen, X->Bind.Def.CDataFileHead, istate); + if (childfopenMPI(sdt, "w", &fp) != 0) { + return FALSE; + } - for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { - for (iomega = 0; iomega < Nomega; iomega++) { - fprintf(fp, "%.10lf %.10lf %.10lf %.10lf \n", - creal(dcomega[iomega] - X->Bind.Def.dcOmegaOrg), cimag(dcomega[iomega] - X->Bind.Def.dcOmegaOrg), - creal(dcSpectrum[iomega][idcSpectrum]), cimag(dcSpectrum[iomega][idcSpectrum])); - }/*for (i = 0; i < Nomega; i++)*/ - fprintf(fp, "\n"); - } + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + for (iomega = 0; iomega < Nomega; iomega++) { + fprintf(fp, "%.10lf %.10lf %.10lf %.10lf \n", + creal(dcomega[istate][iomega] - X->Bind.Def.dcOmegaOrg - energy0[istate]), + cimag(dcomega[istate][iomega] - X->Bind.Def.dcOmegaOrg), + creal(dcSpectrum[iomega][idcSpectrum]), cimag(dcSpectrum[iomega][idcSpectrum])); + }/*for (i = 0; i < Nomega; i++)*/ + fprintf(fp, "\n"); + } - fclose(fp); + fclose(fp); + } return TRUE; }/*int OutputSpectrum*/ /// \brief Parent function to calculate the excited state. @@ -431,12 +436,27 @@ int CalcSpectrum( double complex **v1Org; /**< Input vector to calculate spectrum function.*/ //ToDo: Nomega should be given as a parameter - int Nomega; + int Nomega, nstate, istate; double complex OmegaMax, OmegaMin; double complex **dcSpectrum; - double complex *dcomega; + double complex **dcomega; + double *energy0; size_t byte_size; + if (X->Bind.Def.iCalcType == FullDiag) { + nstate = X->Bind.Check.idim_max; + } + else if (X->Bind.Def.iCalcType == CG) { + nstate = X->Bind.Def.k_exct; + } + else if (X->Bind.Def.iCalcType == TPQCalc || X->Bind.Def.iCalcType == cTPQ) { + nstate = NumAve; + } + else { + nstate = 1; + } + energy0 = d_1d_allocate(nstate); + if (X->Bind.Def.iFlgCalcSpec == CALCSPEC_SCRATCH) { X->Bind.Def.Nsite = X->Bind.Def.NsiteMPI; X->Bind.Def.Total2Sz = X->Bind.Def.Total2SzMPI; @@ -480,15 +500,42 @@ int CalcSpectrum( X->Bind.Def.dcOmegaOrg = I * (X->Bind.Def.dcOmegaMax - X->Bind.Def.dcOmegaMin) / (double)X->Bind.Def.iNOmega; } } + // + // Read energy origin of each state + // + if (X->Bind.Def.iCalcType == FullDiag) { + strcpy(sdt, cFileNameEigenvalue_Lanczos); + childfopenMPI(sdt, "r", &fp); + for (istate = 0; istate < nstate; istate++) { + fgetsMPI(sdt, D_FileNameMax, fp); + sscanf(sdt, "%ld%lf", &i, &energy0[istate]); + } + fclose(fp); + } + else if (X->Bind.Def.iCalcType == CG) { + sprintf(sdt, cFileNameEnergy_CG, X->Bind.Def.CDataFileHead); + childfopenMPI(sdt, "r", &fp); + for (istate = 0; istate < nstate; istate++) { + fgetsMPI(sdt, D_FileNameMax, fp); + fgetsMPI(sdt, D_FileNameMax, fp); + sscanf(sdt, " Energy %lf", &energy0[istate]); + fgetsMPI(sdt, D_FileNameMax, fp); + fgetsMPI(sdt, D_FileNameMax, fp); + fgetsMPI(sdt, D_FileNameMax, fp); + } + fclose(fp); + } /* Set & malloc omega grid */ Nomega = X->Bind.Def.iNOmega; - dcomega = cd_1d_allocate(Nomega); + dcomega = cd_2d_allocate(nstate, Nomega); OmegaMax = X->Bind.Def.dcOmegaMax + X->Bind.Def.dcOmegaOrg; OmegaMin = X->Bind.Def.dcOmegaMin + X->Bind.Def.dcOmegaOrg; - for (i = 0; i < Nomega; i++) { - dcomega[i] = (OmegaMax - OmegaMin) / Nomega * i + OmegaMin; + for (istate = 0; istate < nstate; istate++) { + for (i = 0; i < Nomega; i++) { + dcomega[istate][i] = (OmegaMax - OmegaMin) / Nomega * i + OmegaMin + energy0[istate]; + } } fprintf(stdoutMPI, "\nFrequency range:\n"); @@ -584,9 +631,10 @@ int CalcSpectrum( fprintf(stdoutMPI, " End: Calculating a spectrum.\n\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumEnd, "a"); - iret = OutputSpectrum(X, Nomega, NdcSpectrum, dcSpectrum, dcomega); + iret = OutputSpectrum(X, nstate, Nomega, NdcSpectrum, dcSpectrum, dcomega, energy0); free_cd_2d_allocate(dcSpectrum); - free_cd_1d_allocate(dcomega); + free_cd_2d_allocate(dcomega); + free_d_1d_allocate(energy0); return TRUE; }/*int CalcSpectrum*/ diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index b0834a69d..be878a40d 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -169,7 +169,7 @@ int CalcSpectrumByBiCG( int Nomega,//!<[in] Number of Frequencies int NdcSpectrum, double complex **dcSpectrum,//!<[out] [Nomega] Spectrum - double complex *dcomega,//!<[in] [Nomega] Frequency + double complex **dcomega,//!<[in] [Nomega] Frequency double complex **v1Org ) { @@ -241,7 +241,7 @@ int CalcSpectrumByBiCG( /**
    • Input @f$\alpha, \beta@f$, projected residual, or start from scratch
    • */ - ReadTMComponents_BiCG(X, &v2[0][0], &v4[0][0], &v12[0][0], &v14[0][0], Nomega, NdcSpectrum, dcSpectrum, dcomega); + ReadTMComponents_BiCG(X, &v2[0][0], &v4[0][0], &v12[0][0], &v14[0][0], Nomega, NdcSpectrum, dcSpectrum, dcomega[0]); /**
    • @b DO BiCG loop
      • @@ -278,7 +278,7 @@ int CalcSpectrumByBiCG( for (iomega = 0; iomega < Nomega; iomega++) { fprintf(fp, "%7i %20.10e %20.10e %20.10e %20.10e\n", - stp, creal(dcomega[iomega]), + stp, creal(dcomega[0][iomega]), creal(dcSpectrum[iomega][0]), cimag(dcSpectrum[iomega][0]), resz[iomega]); } diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index 85b9210ae..29700d05a 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -39,7 +39,7 @@ int CalcSpectrumByFullDiag( int Nomega,//!<[in] Number of frequencies int NdcSpectrum, double complex **dcSpectrum,//!<[out] [Nomega] Spectrum - double complex *dcomega,//!<[in] [Nomega] Frequency + double complex **dcomega,//!<[in] [Nomega] Frequency double complex **v1Org ) { @@ -99,12 +99,14 @@ int CalcSpectrumByFullDiag(
      */ StartTimer(6304); - for (iomega = 0; iomega < Nomega; iomega++) { - dcSpectrum[iomega][idcSpectrum] = 0.0; - for (idim = 0; idim < idim_max_int; idim++) { - dcSpectrum[iomega][idcSpectrum] += vLvvRv[idim] / (dcomega[iomega] - X->Bind.Phys.energy[idim]); - }/*for (idim = 0; idim < idim_max_int; idim++)*/ - }/*for (iomega = 0; iomega < Nomega; iomega++)*/ + for (jdim = 0; jdim < idim_max_int; jdim++) { + for (iomega = 0; iomega < Nomega; iomega++) { + dcSpectrum[iomega][idcSpectrum] = 0.0; + for (idim = 0; idim < idim_max_int; idim++) { + dcSpectrum[iomega][idcSpectrum] += vLvvRv[idim] / (dcomega[jdim][iomega] - X->Bind.Phys.energy[idim]); + }/*for (idim = 0; idim < idim_max_int; idim++)*/ + }/*for (iomega = 0; iomega < Nomega; iomega++)*/ + } StopTimer(6304); }/*for (idcSpectrum = 1; idcSpectrum < NdcSpectrum; idcSpectrum++)*/ free_cd_2d_allocate(vL); diff --git a/src/StdFace b/src/StdFace index 73721cd3e..8e2db166f 160000 --- a/src/StdFace +++ b/src/StdFace @@ -1 +1 @@ -Subproject commit 73721cd3e0dfb9e48995a15e5616849ffcd8900c +Subproject commit 8e2db166fd6472a9681745a9c69c252917a6fc7c diff --git a/src/global.c b/src/global.c index 508ba2947..9e192985a 100644 --- a/src/global.c +++ b/src/global.c @@ -51,7 +51,7 @@ const char* cFileNameTimeEV_CG="Time_EigenVector.dat"; const char* cFileNameListModel="ListForModel_Ns%d_Nup%dNdown%d.dat"; const char* cFileNameOutputEigen="%s_eigenvec_%d_rank_%d.dat"; const char* cFileNameInputEigen="%s_eigenvec_%d_rank_%d.dat"; -const char* cFileNameCalcDynamicalGreen="%s_DynamicalGreen.dat"; +const char* cFileNameCalcDynamicalGreen="%s_DynamicalGreen_%d.dat"; const char* cFileNameTridiagonalMatrixComponents="%s_TMComponents.dat"; diff --git a/src/include/CalcSpectrumByBiCG.h b/src/include/CalcSpectrumByBiCG.h index 8946a4e28..822228203 100644 --- a/src/include/CalcSpectrumByBiCG.h +++ b/src/include/CalcSpectrumByBiCG.h @@ -23,6 +23,6 @@ int CalcSpectrumByBiCG( int Nomega, int NdcSpectrum, double complex **dcSpectrum, - double complex *dcomega, + double complex **dcomega, double complex **v1Org ); diff --git a/src/include/CalcSpectrumByFullDiag.h b/src/include/CalcSpectrumByFullDiag.h index c87cb100a..a45ca55c1 100644 --- a/src/include/CalcSpectrumByFullDiag.h +++ b/src/include/CalcSpectrumByFullDiag.h @@ -19,4 +19,4 @@ int CalcSpectrumByFullDiag( struct EDMainCalStruct *X, int Nomega, int NdcSpectrum, - double complex **dcSpectrum, double complex *dcomega, double complex **v1org); + double complex **dcSpectrum, double complex **dcomega, double complex **v1org); diff --git a/test/spectrum_genspin_ladder.sh b/test/spectrum_genspin_ladder.sh index f6688cfc3..40c7d1044 100755 --- a/test/spectrum_genspin_ladder.sh +++ b/test/spectrum_genspin_ladder.sh @@ -21,6 +21,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "SzSz" +OmegaMax = 430.8119368267894629 +Omegamin = -394.6033794632105014 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -32,13 +34,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/zvo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -59,6 +61,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "S+S-" +OmegaMax = 430.8119368267894629 +Omegamin = -394.6033794632105014 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -70,13 +74,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%7.5f", diff} ' paste2.dat` -echo "Diff output/zvo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.00000" exit $? diff --git a/test/spectrum_genspingc_ladder.sh b/test/spectrum_genspingc_ladder.sh index 194a20957..a31b44ded 100755 --- a/test/spectrum_genspingc_ladder.sh +++ b/test/spectrum_genspingc_ladder.sh @@ -31,13 +31,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -68,13 +68,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste2.dat` -echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_hubbard_square.sh b/test/spectrum_hubbard_square.sh index 97ce00466..632ad798a 100755 --- a/test/spectrum_hubbard_square.sh +++ b/test/spectrum_hubbard_square.sh @@ -21,6 +21,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "SzSz" +OmegaMin = -85.7470470447365507 +OmegaMax = 106.2529529552634493 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -32,13 +34,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -59,6 +61,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "S+S-" +OmegaMin = -85.7470470447365507 +OmegaMax = 106.2529529552634493 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -70,13 +74,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste2.dat` -echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" # # Density-Density spectrum @@ -97,6 +101,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Density" +OmegaMin = -85.7470470447365507 +OmegaMax = 106.2529529552634493 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -108,13 +114,13 @@ cat > reference.dat < paste3.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste3.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste3.dat` -echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Density) : " ${diff} test "${diff}" = "0.000000" # # Up-Up spectrum @@ -135,6 +141,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Up" +OmegaMin = -85.7470470447365507 +OmegaMax = 106.2529529552634493 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -146,13 +154,13 @@ cat > reference.dat < paste4.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste4.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste4.dat` -echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Up) : " ${diff} test "${diff}" = "0.000000" # # Down-Down spectrum @@ -173,6 +181,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Down" +OmegaMin = -85.7470470447365507 +OmegaMax = 106.2529529552634493 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -184,13 +194,13 @@ cat > reference.dat < paste5.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste5.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste5.dat` -echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_hubbardgc_tri.sh b/test/spectrum_hubbardgc_tri.sh index 67098c7a8..28cd36bc0 100755 --- a/test/spectrum_hubbardgc_tri.sh +++ b/test/spectrum_hubbardgc_tri.sh @@ -22,6 +22,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "SzSz" +OmegaMax = 129.3140744040596246 +Omegamin = -98.6859255959403754 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -33,13 +35,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -61,6 +63,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "S+S-" +OmegaMax = 129.3140744040596246 +Omegamin = -98.6859255959403754 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -72,13 +76,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste2.dat` -echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" # # Density-Density spectrum @@ -100,6 +104,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Density" +OmegaMax = 129.3140744040596246 +Omegamin = -98.6859255959403754 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -111,13 +117,13 @@ cat > reference.dat < paste3.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste3.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste3.dat` -echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Density) : " ${diff} test "${diff}" = "0.000000" # # Up-Up spectrum @@ -139,6 +145,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Down" +OmegaMax = 129.3140744040596246 +Omegamin = -98.6859255959403754 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -150,13 +158,13 @@ cat > reference.dat < paste4.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste4.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste4.dat` -echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Up) : " ${diff} test "${diff}" = "0.000000" # # Down-Down spectrum @@ -178,6 +186,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Up" +OmegaMax = 129.3140744040596246 +Omegamin = -98.6859255959403754 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -189,13 +199,13 @@ cat > reference.dat < paste5.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste5.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste5.dat` -echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_kondo_chain.sh b/test/spectrum_kondo_chain.sh index 57bcb79f3..9c7298f8e 100755 --- a/test/spectrum_kondo_chain.sh +++ b/test/spectrum_kondo_chain.sh @@ -20,6 +20,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "SzSz" +OmegaMax = 64.6776213781762834 +Omegamin = -39.3223786218237095 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -31,13 +33,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -57,6 +59,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "S+S-" +OmegaMax = 64.6776213781762834 +Omegamin = -39.3223786218237095 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -68,13 +72,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste2.dat` -echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" # # Density-Density spectrum @@ -94,6 +98,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Density" +OmegaMax = 64.6776213781762834 +Omegamin = -39.3223786218237095 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -105,13 +111,13 @@ cat > reference.dat < paste3.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste3.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste3.dat` -echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Density) : " ${diff} test "${diff}" = "0.000000" # # Up-Up spectrum @@ -131,6 +137,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Up" +OmegaMax = 64.6776213781762834 +Omegamin = -39.3223786218237095 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -142,13 +150,13 @@ cat > reference.dat < paste4.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste4.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste4.dat` -echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Up) : " ${diff} test "${diff}" = "0.000000" # # Down-Down spectrum @@ -168,6 +176,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Down" +OmegaMax = 64.6776213781762834 +Omegamin = -39.3223786218237095 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -179,13 +189,13 @@ cat > reference.dat < paste5.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste5.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste5.dat` -echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_kondogc_chain.sh b/test/spectrum_kondogc_chain.sh index f22527f94..0d19a1326 100755 --- a/test/spectrum_kondogc_chain.sh +++ b/test/spectrum_kondogc_chain.sh @@ -20,6 +20,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "SzSz" +OmegaMax = 72.6776213781761555 +Omegamin = -42.5223786218238544 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -31,13 +33,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -57,6 +59,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "S+S-" +OmegaMax = 72.6776213781761555 +Omegamin = -42.5223786218238544 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -68,13 +72,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste2.dat` -echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" # # Density-Density spectrum @@ -94,6 +98,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Density" +OmegaMax = 72.6776213781761555 +Omegamin = -42.5223786218238544 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -105,13 +111,13 @@ cat > reference.dat < paste3.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste3.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste3.dat` -echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Density) : " ${diff} test "${diff}" = "0.000000" # # Up-Up spectrum @@ -131,6 +137,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Down" +OmegaMax = 72.6776213781761555 +Omegamin = -42.5223786218238544 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -142,13 +150,13 @@ cat > reference.dat < paste4.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste4.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste4.dat` -echo "Diff output/vo_DynamicalGreen.dat (Up) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Up) : " ${diff} test "${diff}" = "0.000000" # # Down-Down spectrum @@ -168,6 +176,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Up" +OmegaMax = 72.6776213781761555 +Omegamin = -42.5223786218238544 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -179,13 +189,13 @@ cat > reference.dat < paste5.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste5.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste5.dat` -echo "Diff output/vo_DynamicalGreen.dat (Down) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Down) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_spin_kagome.sh b/test/spectrum_spin_kagome.sh index b1768dde0..c903078a4 100755 --- a/test/spectrum_spin_kagome.sh +++ b/test/spectrum_spin_kagome.sh @@ -24,6 +24,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "SzSz" +OmegaMax = 41.4523534273467433 +Omegamin = -34.1476465726532510 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -35,13 +37,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -65,6 +67,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "S+S-" +OmegaMax = 41.4523534273467433 +Omegamin = -34.1476465726532510 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -76,13 +80,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste2.dat` -echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" # # Density-Density spectrum @@ -106,6 +110,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Density" +OmegaMax = 41.4523534273467433 +Omegamin = -34.1476465726532510 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -117,13 +123,13 @@ cat > reference.dat < paste3.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste3.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste3.dat` -echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Density) : " ${diff} test "${diff}" = "0.000000" exit $? diff --git a/test/spectrum_spingc_honey.sh b/test/spectrum_spingc_honey.sh index 34e141fd5..5ba456c05 100755 --- a/test/spectrum_spingc_honey.sh +++ b/test/spectrum_spingc_honey.sh @@ -28,6 +28,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "SzSz" +OmegaMax = 80.1384732749672111 +Omegamin = -58.7615267250327946 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -39,13 +41,13 @@ cat > reference.dat < paste1.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste1.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste1.dat` -echo "Diff output/vo_DynamicalGreen.dat (SzSz) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (SzSz) : " ${diff} test "${diff}" = "0.000000" # # S+S- spectrum @@ -73,6 +75,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "S+S-" +OmegaMax = 80.1384732749672111 +Omegamin = -58.7615267250327946 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -84,13 +88,13 @@ cat > reference.dat < paste2.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste2.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste2.dat` -echo "Diff output/vo_DynamicalGreen.dat (S+S-) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (S+S-) : " ${diff} test "${diff}" = "0.000000" # # Density-Density spectrum @@ -118,6 +122,8 @@ NOmega = 5 OmegaIm = 1.0 CalcSpec = "Scratch" SpectrumType = "Density" +OmegaMax = 80.1384732749672111 +Omegamin = -58.7615267250327946 EOF ${MPIRUN} ../../src/HPhi -s stan2.in @@ -129,13 +135,13 @@ cat > reference.dat < paste3.dat +paste output/zvo_DynamicalGreen_0.dat reference.dat > paste3.dat diff=`awk ' BEGIN{diff=0.0} {diff+=sqrt(($3-$7)*($3-$7))+sqrt(($4-$8)*($4-$8))} END{printf "%8.6f", diff} ' paste3.dat` -echo "Diff output/vo_DynamicalGreen.dat (Density) : " ${diff} +echo "Diff output/zvo_DynamicalGreen_0.dat (Density) : " ${diff} test "${diff}" = "0.000000" exit $? From 2e3040cd2d0a6f0048fae1d070664dbd1cf1bd31 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Tue, 25 Oct 2022 10:18:45 +0900 Subject: [PATCH 40/50] Name of dynamical green's function was changed. Mesh plot -> line segment plot (2 blank lines) --- tool/dynamicalr2k.F90 | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tool/dynamicalr2k.F90 b/tool/dynamicalr2k.F90 index 036dea00e..acd8e9030 100644 --- a/tool/dynamicalr2k.F90 +++ b/tool/dynamicalr2k.F90 @@ -149,8 +149,8 @@ SUBROUTINE read_filename() READ(fi,*) keyname, eig0 CLOSE(fi) WRITE(*,*) " Minimum energy : ", eig0 - omegamin = omegamin - eig0 - omegamax = omegamax - eig0 + !omegamin = omegamin - eig0 + !omegamax = omegamax - eig0 ! END SUBROUTINE read_filename ! @@ -380,7 +380,7 @@ SUBROUTINE read_corrfile() ALLOCATE(cor0(nomega,ncor)) cor(1:nr,1:norb,1:nomega) = CMPLX(0d0, 0d0, KIND(1d0)) ! - OPEN(fi, file = TRIM(filehead) // "_DynamicalGreen.dat") + OPEN(fi, file = TRIM(filehead) // "_DynamicalGreen_0.dat") ! DO icor = 1, ncor DO iomega = 1, nomega @@ -481,6 +481,7 @@ SUBROUTINE output_cor() WRITE(fo,'(1000e15.5)') xk(ik), omega, cor_k(ik, 1:norb, iomega) END DO WRITE(fo,*) + WRITE(fo,*) END DO ! CLOSE(fo) @@ -492,7 +493,7 @@ SUBROUTINE output_cor() WRITE(fo,'(a,a,a,f10.5,a)',advance="no") "'", TRIM(kname(inode)), "' ", xk_label(inode), ", " END DO WRITE(fo,'(a,a,a,f10.5,a)') "'", TRIM(kname(nnode)), "' ", xk_label(nnode), ")" - WRITE(fo,'(a)') "set ylabel 'Energy from E_0'" + WRITE(fo,'(a)') "set ylabel 'Energy'" WRITE(fo,'(a)') "set zlabel 'Spectrum' rotate" WRITE(fo,'(a)') "set ticslevel 0" WRITE(fo,'(a)') "set xzeroaxis" From 22bc1de17e067ff8f6f8fe8e1fa2c299e48424f0 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Tue, 25 Oct 2022 10:46:45 +0900 Subject: [PATCH 41/50] Backup --- src/CalcByLOBPCG.c | 6 +-- src/CalcSpectrum.c | 65 +++++++++++++++------------- src/CalcSpectrumByFullDiag.c | 52 ++++++++++++---------- src/include/CalcSpectrumByFullDiag.h | 2 +- 4 files changed, 67 insertions(+), 58 deletions(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index b01703e2b..d09a53177 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -693,9 +693,9 @@ int CalcByLOBPCG( and read from files. */ fprintf(stdoutMPI, "An Eigenvector is inputted.\n"); + TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecStart, "a"); vin = cd_1d_allocate(X->Bind.Check.idim_max + 1); for (ie = 0; ie < X->Bind.Def.k_exct; ie++) { - TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecStart, "a"); sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, ie, myrank); childfopenALL(sdt, "rb", &fp); if (fp == NULL) { @@ -711,10 +711,10 @@ int CalcByLOBPCG( byte_size = fread(vin, sizeof(complex double), X->Bind.Check.idim_max + 1, fp); #pragma omp parallel for default(none) shared(v1,vin, i_max, ie), private(idim) for (idim = 1; idim <= i_max; idim++) { - v1[ie][idim] = vin[idim]; + v1[idim][ie] = vin[idim]; } + fclose(fp); }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ - fclose(fp); free_cd_1d_allocate(vin); TimeKeeper(&(X->Bind), cFileNameTimeKeep, cReadEigenVecFinish, "a"); diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index b42fd4007..e93d9a26b 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -352,7 +352,7 @@ int OutputSpectrum( int nstate, int Nomega, int NdcSpectrum, - double complex **dcSpectrum, + double complex ***dcSpectrum, double complex **dcomega, double *energy0) { @@ -372,7 +372,7 @@ int OutputSpectrum( fprintf(fp, "%.10lf %.10lf %.10lf %.10lf \n", creal(dcomega[istate][iomega] - X->Bind.Def.dcOmegaOrg - energy0[istate]), cimag(dcomega[istate][iomega] - X->Bind.Def.dcOmegaOrg), - creal(dcSpectrum[iomega][idcSpectrum]), cimag(dcSpectrum[iomega][idcSpectrum])); + creal(dcSpectrum[istate][iomega][idcSpectrum]), cimag(dcSpectrum[istate][iomega][idcSpectrum])); }/*for (i = 0; i < Nomega; i++)*/ fprintf(fp, "\n"); } @@ -433,12 +433,12 @@ int CalcSpectrum( int i_stp, NdcSpectrum; int iFlagListModified = FALSE; FILE *fp; - double complex **v1Org; /**< Input vector to calculate spectrum function.*/ + double complex **v1Org, *vin; /**< Input vector to calculate spectrum function.*/ //ToDo: Nomega should be given as a parameter int Nomega, nstate, istate; double complex OmegaMax, OmegaMin; - double complex **dcSpectrum; + double complex ***dcSpectrum; double complex **dcomega; double *energy0; size_t byte_size; @@ -470,8 +470,10 @@ int CalcSpectrum( } free_d_1d_allocate(list_Diagonal); free_cd_2d_allocate(v0); - v1Org = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); - for (i = 1; i <= X->Bind.Check.idim_max; i++) v1Org[i][0] = v1[i][0]; + v1Org = cd_2d_allocate(X->Bind.Check.idim_max + 1, nstate); + for (i = 1; i <= X->Bind.Check.idim_max; i++) + for (istate =0;istate < nstate;istate ++) + v1Org[i][istate] = v1[i][istate]; free_cd_2d_allocate(v1); #ifdef MPI free_lui_1d_allocate(list_1buf); @@ -559,7 +561,7 @@ int CalcSpectrum( fprintf(stderr, "Error: Both single and pair excitation operators exist.\n"); exitMPI(-1); } - dcSpectrum = cd_2d_allocate(Nomega, NdcSpectrum); + dcSpectrum = cd_3d_allocate(nstate, Nomega, NdcSpectrum); //Make New Lists if (MakeExcitedList(&(X->Bind), &iFlagListModified) == FALSE) { @@ -572,34 +574,37 @@ int CalcSpectrum( if (X->Bind.Def.iFlgCalcSpec == RECALC_NOT || X->Bind.Def.iFlgCalcSpec == RECALC_OUTPUT_TMComponents_VEC || (X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC && X->Bind.Def.iCalcType == CG)) { - v1Org = cd_2d_allocate(X->Bind.Check.idim_maxOrg + 1, 1); + v1Org = cd_2d_allocate(X->Bind.Check.idim_maxOrg + 1, nstate); + vin = cd_1d_allocate(X->Bind.Check.idim_maxOrg + 1); //input eigen vector StartTimer(6101); fprintf(stdoutMPI, " Start: An Eigenvector is inputted in CalcSpectrum.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorStart, "a"); - GetFileNameByKW(KWSpectrumVec, &defname); - strcat(defname, "_rank_%d.dat"); - // sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, X->Bind.Def.k_exct - 1, myrank); - sprintf(sdt, defname, myrank); - childfopenALL(sdt, "rb", &fp); - - if (fp == NULL) { - fprintf(stderr, "Error: A file of Inputvector does not exist.\n"); - return -1; - } - - byte_size = fread(&i_stp, sizeof(i_stp), 1, fp); - X->Bind.Large.itr = i_stp; //For TPQ - byte_size = fread(&i_max, sizeof(i_max), 1, fp); - if (i_max != X->Bind.Check.idim_maxOrg) { - fprintf(stderr, "Error: myrank=%d, i_max=%ld\n", myrank, i_max); - fprintf(stderr, "Error: A file of Input vector is incorrect.\n"); - return -1; - } - byte_size = fread(&v1Org[0][0], sizeof(complex double), i_max + 1, fp); - fclose(fp); + for (istate = 0; istate < nstate; istate++) { + sprintf(sdt, cFileNameInputEigen, nstate, istate, myrank); + childfopenALL(sdt, "rb", &fp); + if (fp == NULL) { + fprintf(stderr, "Error: Inputvector file is not found.\n"); + exitMPI(-1); + } + byte_size = fread(&i_stp, sizeof(int), 1, fp); + X->Bind.Large.itr = i_stp; //For TPQ + byte_size = fread(&i_max, sizeof(long int), 1, fp); + if (i_max != X->Bind.Check.idim_maxOrg) { + fprintf(stderr, "Error: Invalid Inputvector file.\n"); + exitMPI(-1); + } + byte_size = fread(vin, sizeof(complex double), i_max, fp); +#pragma omp parallel for default(none) shared(v1Org,vin, i_max, istate), private(i) + for (i = 1; i <= i_max; i++) { + v1Org[i][istate] = vin[i]; + } + fclose(fp); + }/*for (ie = 0; ie < X->Def.k_exct; ie++)*/ StopTimer(6101); if (byte_size == 0) printf("byte_size: %d \n", (int)byte_size); + + free_cd_1d_allocate(vin); } StopTimer(6100); @@ -632,7 +637,7 @@ int CalcSpectrum( fprintf(stdoutMPI, " End: Calculating a spectrum.\n\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_CalcSpectrumEnd, "a"); iret = OutputSpectrum(X, nstate, Nomega, NdcSpectrum, dcSpectrum, dcomega, energy0); - free_cd_2d_allocate(dcSpectrum); + free_cd_3d_allocate(dcSpectrum); free_cd_2d_allocate(dcomega); free_d_1d_allocate(energy0); return TRUE; diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index 29700d05a..1d3a4be1b 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -38,29 +38,31 @@ int CalcSpectrumByFullDiag( struct EDMainCalStruct *X,//!<[inout] int Nomega,//!<[in] Number of frequencies int NdcSpectrum, - double complex **dcSpectrum,//!<[out] [Nomega] Spectrum + double complex ***dcSpectrum,//!<[out] [Nomega] Spectrum double complex **dcomega,//!<[in] [Nomega] Frequency double complex **v1Org ) { - int idim, jdim, iomega; - int idim_max_int; + int idim0, idim1, idim2, iomega; + int idim_max_int, idim_maxorg_int; int idcSpectrum; - double complex **vR, **vL, vRv, vLv, *vLvvRv; + double complex **vR, **vL, **vRv, **vLv; /**
      • Generate fully stored Hamiltonian. Because ::v0 & ::v1 are overwritten, copy ::v0 into ::vg.
      • */ idim_max_int = (int)X->Bind.Check.idim_max; - vR = cd_2d_allocate(idim_max_int, 1); - vL = cd_2d_allocate(idim_max_int, 1); - vLvvRv = cd_1d_allocate(idim_max_int); + idim_maxorg_int = (int)X->Bind.Check.idim_maxOrg; + vR = cd_2d_allocate(idim_max_int+1, idim_maxorg_int); + vL = cd_2d_allocate(idim_max_int+1, idim_maxorg_int); + vLv = cd_2d_allocate(idim_max_int, idim_maxorg_int); + vRv = cd_2d_allocate(idim_max_int, idim_maxorg_int); StartTimer(6301); zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v0[0][0]); zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v1[0][0]); - for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) v1[idim][idim] = 1.0; + for (idim0 = 1; idim0 <= X->Bind.Check.idim_max; idim0++) v1[idim0][idim0] = 1.0; mltply(&(X->Bind), X->Bind.Check.idim_max, v0, v1); StopTimer(6301); /** @@ -75,20 +77,20 @@ int CalcSpectrumByFullDiag( where @f$c|0\rangle@f$ is ::vg. */ zclear(X->Bind.Check.idim_max, &vR[1][0]); - GetExcitedState(&(X->Bind), 1, vR, v1Org, 0); + GetExcitedState(&(X->Bind), X->Bind.Check.idim_maxOrg, vR, v1Org, 0); + for (idim0 = 1; idim0 < idim_max_int+1; idim0++) + for (idim1 = 0; idim1 < idim_max_int; idim1++) + for (idim2 = 0; idim2 < idim_maxorg_int; idim2++) + vRv[idim0][idim2] += conj(v0[idim0][idim1]) * vR[idim0][idim2]; for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { StartTimer(6303); zclear(X->Bind.Check.idim_max, &vL[1][0]); - GetExcitedState(&(X->Bind), 1, vL, v1Org, idcSpectrum + 1); - for (idim = 0; idim < idim_max_int; idim++) { - vRv = 0.0; - vLv = 0.0; - for (jdim = 0; jdim < idim_max_int; jdim++) { - vRv += conj(v1[jdim][idim]) * vR[jdim][1]; - vLv += conj(v1[jdim][idim]) * vL[jdim][1]; - } - vLvvRv[idim] = conj(vLv) * vRv; - }/*for (idim = 0; idim < idim_max_int; idim++)*/ + GetExcitedState(&(X->Bind), X->Bind.Check.idim_maxOrg, vL, v1Org, idcSpectrum + 1); + zclear(X->Bind.Check.idim_max* X->Bind.Check.idim_max, &vLv[0][0]); + for (idim0 = 1; idim0 < idim_max_int + 1; idim0++) + for (idim1 = 0; idim1 < idim_max_int; idim1++) + for (idim2 = 0; idim2 < idim_maxorg_int; idim2++) + vLv[idim0][idim2] += conj(v0[idim0][idim1]) * vL[idim0][idim2]; StopTimer(6303); /**
      • Compute spectrum @@ -99,11 +101,12 @@ int CalcSpectrumByFullDiag(
      */ StartTimer(6304); - for (jdim = 0; jdim < idim_max_int; jdim++) { + for (idim0 = 0; idim0 < idim_maxorg_int; idim0++) { for (iomega = 0; iomega < Nomega; iomega++) { - dcSpectrum[iomega][idcSpectrum] = 0.0; - for (idim = 0; idim < idim_max_int; idim++) { - dcSpectrum[iomega][idcSpectrum] += vLvvRv[idim] / (dcomega[jdim][iomega] - X->Bind.Phys.energy[idim]); + dcSpectrum[idim0][iomega][idcSpectrum] = 0.0; + for (idim1 = 0; idim1 < idim_max_int; idim1++) { + dcSpectrum[idim0][iomega][idcSpectrum] += conj(vLv[idim1][idim0]) * vRv[idim1][idim0] + / (dcomega[idim0][iomega] - X->Bind.Phys.energy[idim1]); }/*for (idim = 0; idim < idim_max_int; idim++)*/ }/*for (iomega = 0; iomega < Nomega; iomega++)*/ } @@ -111,7 +114,8 @@ int CalcSpectrumByFullDiag( }/*for (idcSpectrum = 1; idcSpectrum < NdcSpectrum; idcSpectrum++)*/ free_cd_2d_allocate(vL); free_cd_2d_allocate(vR); - free_cd_1d_allocate(vLvvRv); + free_cd_2d_allocate(vLv); + free_cd_2d_allocate(vRv); return TRUE; }/*CalcSpectrumByFullDiag*/ diff --git a/src/include/CalcSpectrumByFullDiag.h b/src/include/CalcSpectrumByFullDiag.h index a45ca55c1..af48048d1 100644 --- a/src/include/CalcSpectrumByFullDiag.h +++ b/src/include/CalcSpectrumByFullDiag.h @@ -19,4 +19,4 @@ int CalcSpectrumByFullDiag( struct EDMainCalStruct *X, int Nomega, int NdcSpectrum, - double complex **dcSpectrum, double complex **dcomega, double complex **v1org); + double complex ***dcSpectrum, double complex **dcomega, double complex **v1org); From b52fccd228d8abd1b840cd459ce564eff0bdc2fe Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Thu, 20 Apr 2023 10:01:28 +0900 Subject: [PATCH 42/50] BugFix: All test passes --- src/CalcSpectrum.c | 1 - src/CalcSpectrumByBiCG.c | 519 ++++++++++++++++++++++--------- src/CalcSpectrumByFullDiag.c | 9 +- src/include/CalcSpectrumByBiCG.h | 2 +- 4 files changed, 370 insertions(+), 161 deletions(-) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index e93d9a26b..fea57c586 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -427,7 +427,6 @@ int CalcSpectrum( struct EDMainCalStruct *X ) { char sdt[D_FileNameMax]; - char *defname; unsigned long int i; unsigned long int i_max = 0; int i_stp, NdcSpectrum; diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index be878a40d..98b991527 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -21,135 +21,135 @@ #include "FileIO.h" #include "wrapperMPI.h" #include "common/setmemory.h" -#include "komega/komega.h" #include "mltply.h" #include "CalcSpectrum.h" #include "mltplyCommon.h" #ifdef MPI #include #endif -/**@brief -Read @f$\alpha, \beta@f$, projected residual for restart +/* +*@brief Solve Shifted equation */ -void ReadTMComponents_BiCG( - struct EDMainCalStruct *X,//!<[inout] - double complex *v2,//!<[inout] [CheckList::idim_max] Residual vector - double complex *v4,//!<[inout] [CheckList::idim_max] Shadow esidual vector - double complex *v12,//!<[inout] [CheckList::idim_max] Old residual vector - double complex *v14,//!<[inout] [CheckList::idim_max] Old shadow residual vector - int Nomega,//!<[in] Number of frequencies - int NdcSpectrum, - double complex **dcSpectrum,//!<[inout] [Nomega] Projected result vector, spectrum - double complex *dcomega//!<[in] [Nomega] Frequency +void ShiftedEq( + int iter, + int Nomega, + int NdcSpectrum, + int* lz_conv, + double complex* alpha, + double complex* beta, + double complex* dcomega, + double complex z_seed, + double complex** pBiCG, + double complex** res_proj, + double complex** pi, + double complex** dcSpectrum ) { - char sdt[D_FileNameMax]; - char ctmp[256]; + int iomega, idcSpectrum; + double complex pi_2; - int status[3], idim_max2int, max_step, iter_old; - unsigned long int idx; - double complex *alphaCG, *betaCG, *res_save, z_seed; - double z_seed_r, z_seed_i, alpha_r, alpha_i, beta_r, beta_i, res_r, res_i; - FILE *fp; - int comm; + for (iomega = 0; iomega < Nomega; iomega++) { -#if defined(MPI) - comm = MPI_Comm_c2f(MPI_COMM_WORLD); -#else - comm = 0; -#endif - idim_max2int = (int)X->Bind.Check.idim_max; + if (lz_conv[iomega] == 1) continue; - if (X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents || - X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || - X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) { - sprintf(sdt, cFileNameTridiagonalMatrixComponents, X->Bind.Def.CDataFileHead); - if (childfopenALL(sdt, "rb", &fp) != 0) { - fprintf(stdoutMPI, "INFO: File for the restart is not found.\n"); - fprintf(stdoutMPI, " Start from SCRATCH.\n"); - max_step = (int)X->Bind.Def.Lanczos_max; - komega_bicg_init(&idim_max2int, &NdcSpectrum, &Nomega, &dcSpectrum[0][0], dcomega, &max_step, &eps_Lanczos, &comm); + if (iter == 1) + pi_2 = 1.0; + else + pi_2 = pi[iter - 2][iomega]; + + pi[iter][iomega] = (1.0 + alpha[iter] * (dcomega[iomega] - z_seed)) * pi[iter - 1][iomega] + - alpha[iter] * beta[iter] / alpha[iter - 1] * (pi_2 - pi[iter - 1][iomega]); + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + pBiCG[iomega][idcSpectrum] = res_proj[iter][idcSpectrum] / pi[iter - 1][iomega] + + (pi_2 / pi[iter - 1][iomega]) * (pi_2 / pi[iter - 1][iomega]) * beta[iter] * pBiCG[iomega][idcSpectrum]; + dcSpectrum[iomega][idcSpectrum] += + pi[iter - 1][iomega] / pi[iter][iomega] * alpha[iter] * pBiCG[iomega][idcSpectrum]; } - else { - fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp); - sscanf(ctmp, "%d", &iter_old); - if (X->Bind.Def.iFlgCalcSpec > RECALC_FROM_TMComponents) { - alphaCG = (double complex*)malloc((iter_old + X->Bind.Def.Lanczos_max) * sizeof(double complex)); - betaCG = (double complex*)malloc((iter_old + X->Bind.Def.Lanczos_max) * sizeof(double complex)); - res_save = (double complex*)malloc((iter_old + X->Bind.Def.Lanczos_max) * sizeof(double complex)); - } - else { - alphaCG = (double complex*)malloc(iter_old * sizeof(double complex)); - betaCG = (double complex*)malloc(iter_old * sizeof(double complex)); - res_save = (double complex*)malloc(iter_old * sizeof(double complex)); - } - fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp); - sscanf(ctmp, "%lf %lf\n", &z_seed_r, &z_seed_i); - z_seed = z_seed_r + I * z_seed_i; - - idx = 0; - while (fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp) != NULL) { - sscanf(ctmp, "%lf %lf %lf %lf %lf %lf\n", - &alpha_r, &alpha_i, &beta_r, &beta_i, &res_r, &res_i); - alphaCG[idx] = alpha_r + I * alpha_i; - betaCG[idx] = beta_r + I * beta_i; - res_save[idx] = res_r + I * res_i; - idx += 1; + }/*for (iomega = 0; iomega < Nomega; iomega++)*/ +} +/** +@brief Perform Seed Switch +*/ +void SeedSwitch( + int istate, + int iter, + int Nomega, + int NdcSpectrum, + int* lz_conv, + int* iz_seed, + double complex* z_seed, + double complex* rho, + double complex* dcomega, + long int ndim, + double complex** v2, + double complex** v3, + double complex** v4, + double complex** v5, + double complex** pi, + double complex* alpha, + double complex* beta, + double complex** res_proj +) { + double pi_min; + double complex pi_seed; + int iz_seed0, iomega, jter, idcSpectrum; + long int idim; + // + // Initialize for min + // + for (iomega = 0; iomega < Nomega; iomega++) + if (lz_conv[iomega] == 0) { + iz_seed0 = iomega; + pi_min = cabs(pi[iter][iz_seed0]); + } + // + // Search min. + // + for (iomega = 0; iomega < Nomega; iomega++) { + if (lz_conv[iomega] == 0) + if (cabs(pi[iter][iomega]) < pi_min) { + iz_seed0 = iomega; + pi_min = cabs(pi[iter][iomega]); } - fclose(fp); + }/*for (iomega = 0; iomega < Nomega; iomega++)*/ - if (X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents) X->Bind.Def.Lanczos_max = 0; - max_step = (int)(iter_old + X->Bind.Def.Lanczos_max); - - komega_bicg_restart(&idim_max2int, &NdcSpectrum, &Nomega, &dcSpectrum[0][0], dcomega, &max_step, &eps_Lanczos, status, - &iter_old, &v2[1], &v12[1], &v4[1], &v14[1], alphaCG, betaCG, &z_seed, res_save, &comm); - free(alphaCG); - free(betaCG); - free(res_save); - }/*if (childfopenALL(sdt, "rb", &fp) == 0)*/ - }/*if (X->Bind.Def.iFlgCalcSpec > RECALC_NOT)*/ - else { - max_step = (int)X->Bind.Def.Lanczos_max; - komega_bicg_init(&idim_max2int, &NdcSpectrum, &Nomega, &dcSpectrum[0][0], dcomega, &max_step, &eps_Lanczos, &comm); + if (cabs(pi[iter][iz_seed0]) < 1.0e-50) { + printf("Error : pi at seed (%d) is 0.", iz_seed0); + exitMPI(-1); } -}/*int ReadTMComponents_BiCG*/ -/**@brief -write @f$\alpha, \beta@f$, projected residual for restart -*/ -int OutputTMComponents_BiCG( - struct EDMainCalStruct *X,//!<[inout] - int liLanczosStp,//!<[in] the BiCG step - int nL -) -{ - char sdt[D_FileNameMax]; - unsigned long int stp; - FILE *fp; - double complex *alphaCG, *betaCG, *res_save, z_seed; - - alphaCG = (double complex*)malloc(liLanczosStp * sizeof(double complex)); - betaCG = (double complex*)malloc(liLanczosStp * sizeof(double complex)); - res_save = (double complex*)malloc(liLanczosStp*nL * sizeof(double complex)); - - komega_bicg_getcoef(alphaCG, betaCG, &z_seed, res_save); - - sprintf(sdt, cFileNameTridiagonalMatrixComponents, X->Bind.Def.CDataFileHead); - childfopenMPI(sdt, "w", &fp); - fprintf(fp, "%d \n", liLanczosStp); - fprintf(fp, "%.10lf %.10lf\n", creal(z_seed), cimag(z_seed)); - for (stp = 0; stp < liLanczosStp; stp++) { - fprintf(fp, "%25.16le %25.16le %25.16le %25.16le %25.16le %25.16le\n", - creal(alphaCG[stp]), cimag(alphaCG[stp]), - creal(betaCG[stp]), cimag(betaCG[stp]), - creal(res_save[stp]), cimag(res_save[stp])); + if (iz_seed0 != *iz_seed) { + + *iz_seed = iz_seed0; + *z_seed = dcomega[iz_seed0]; + + *rho /= (pi[iter - 1][iz_seed0] * pi[iter - 1][iz_seed0]); + + for (idim = 1; idim <= ndim; idim++) { + v2[idim][istate] /= pi[iter][iz_seed0]; + v4[idim][istate] /= conj(pi[iter][iz_seed0]); + v3[idim][istate] /= pi[iter - 1][iz_seed0]; + v5[idim][istate] /= conj(pi[iter - 1][iz_seed0]); + } + /* + For restarting + */ + for (jter = 1; jter <= iter; jter++) { + alpha[jter] *= pi[jter - 1][iz_seed0] / pi[jter][iz_seed0]; + if (jter != 1) + beta[jter] *= (pi[jter - 2][iz_seed0] / pi[jter - 1][iz_seed0])* (pi[jter - 2][iz_seed0] / pi[jter - 1][iz_seed0]); + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + res_proj[jter][idcSpectrum] /= pi[jter - 1][iz_seed0]; + } + } + + for (jter = 1; jter <= iter; jter++) { + pi_seed = pi[jter][iz_seed0]; + for (iomega = 0; iomega < Nomega; iomega++) + pi[jter][iomega] /= pi_seed; + } } - fclose(fp); - free(alphaCG); - free(betaCG); - free(res_save); - return TRUE; -}/*int OutputTMComponents_BiCG*/ +}/*void SeedSwitch*/ /** * @brief A main function to calculate spectrum by BiCG method * In this function, the @f$K\omega@f$ library is used. @@ -168,20 +168,21 @@ int CalcSpectrumByBiCG( double complex **v4,//!<[inout] [CheckList::idim_max] Work space for residual vector @f${\bf r}@f$ int Nomega,//!<[in] Number of Frequencies int NdcSpectrum, - double complex **dcSpectrum,//!<[out] [Nomega] Spectrum + double complex ***dcSpectrum,//!<[out] [Nomega] Spectrum double complex **dcomega,//!<[in] [Nomega] Frequency double complex **v1Org ) { - char sdt[D_FileNameMax]; + char sdt[D_FileNameMax], ctmp[256]; unsigned long int idim, i_max; FILE *fp; size_t byte_size; - int idcSpectrum; + int idcSpectrum, iter_old; unsigned long int liLanczosStp_vec = 0; - double complex **vL, **v12, **v14, *res_proj; - int stp, status[3], iomega; - double *resz; + double complex **vL, **v12, **v14, **v3, **v5, *z_seed, *rho, *rho_old, + ** alpha, ** beta, *** res_proj, *** pi, *** pBiCG, alpha_denom; + int stp, iomega, istate, *iz_seed, ** lz_conv, lz_conv_all; + double resz, *resnorm, dtmp[4]; fprintf(stdoutMPI, "##### Spectrum calculation with BiCG #####\n\n"); /** @@ -189,11 +190,23 @@ int CalcSpectrumByBiCG(
    • Malloc vector for old residual vector (@f${\bf r}_{\rm old}@f$) and old shadow residual vector (@f${\bf {\tilde r}}_{\rm old}@f$).
    • */ - v12 = cd_2d_allocate(X->Bind.Check.idim_max + 2, 1); - v14 = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); - vL = cd_2d_allocate(X->Bind.Check.idim_max + 1, 1); - resz = d_1d_allocate(Nomega); - res_proj = cd_1d_allocate(NdcSpectrum); + z_seed = cd_1d_allocate(X->Bind.Def.k_exct); + iz_seed = i_1d_allocate(X->Bind.Def.k_exct); + rho = cd_1d_allocate(X->Bind.Def.k_exct); + rho_old = cd_1d_allocate(X->Bind.Def.k_exct); + resnorm = d_1d_allocate(X->Bind.Def.k_exct); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + iz_seed[istate] = 0; + z_seed[istate] = dcomega[istate][iz_seed[istate]]; + rho[istate] = 1.0; + } + pBiCG = cd_3d_allocate(X->Bind.Def.k_exct, Nomega, NdcSpectrum); + v3 = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); + v5 = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); + v12 = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); + v14 = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); + vL = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); + lz_conv = i_2d_allocate(X->Bind.Def.k_exct, Nomega); /**
    • Set initial result vector(+shadow result vector) Read residual vectors if restart
    • @@ -233,15 +246,104 @@ int CalcSpectrumByBiCG( }/*if (X->Bind.Def.iFlgCalcSpec > RECALC_FROM_TMComponents)*/ else { zclear(X->Bind.Check.idim_max, &v2[1][0]); - GetExcitedState(&(X->Bind), 1, v2, v1Org, 0); -#pragma omp parallel for default(none) shared(v2,v4,v1Org,X) private(idim) + GetExcitedState(&(X->Bind), X->Bind.Def.k_exct, v2, v1Org, 0); +#pragma omp parallel for default(none) shared(v2,v4,v1Org,X) private(idim,istate) for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) - v4[idim][0] = v2[idim][0]; + for (istate = 0;istate < X->Bind.Def.k_exct;istate++) + v4[idim][istate] = v2[idim][istate]; } /**
    • Input @f$\alpha, \beta@f$, projected residual, or start from scratch
    • */ - ReadTMComponents_BiCG(X, &v2[0][0], &v4[0][0], &v12[0][0], &v14[0][0], Nomega, NdcSpectrum, dcSpectrum, dcomega[0]); + if (X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents || + X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents_VEC || + X->Bind.Def.iFlgCalcSpec == RECALC_INOUT_TMComponents_VEC) { + sprintf(sdt, cFileNameTridiagonalMatrixComponents, X->Bind.Def.CDataFileHead); + if (childfopenALL(sdt, "rb", &fp) != 0) { + fprintf(stdoutMPI, "INFO: File for the restart is not found.\n"); + fprintf(stdoutMPI, " Start from SCRATCH.\n"); + } + else { + fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp); + sscanf(ctmp, "%d", &iter_old); + if (X->Bind.Def.iFlgCalcSpec > RECALC_FROM_TMComponents) { + alpha = cd_2d_allocate(X->Bind.Def.k_exct, iter_old + X->Bind.Def.Lanczos_max); + beta = cd_2d_allocate(X->Bind.Def.k_exct, iter_old + X->Bind.Def.Lanczos_max); + res_proj = cd_3d_allocate(X->Bind.Def.k_exct, iter_old + X->Bind.Def.Lanczos_max, NdcSpectrum); + pi = cd_3d_allocate(X->Bind.Def.k_exct, iter_old + X->Bind.Def.Lanczos_max, Nomega); + } + else { + alpha = cd_2d_allocate(X->Bind.Def.k_exct, iter_old); + beta = cd_2d_allocate(X->Bind.Def.k_exct, iter_old); + res_proj = cd_3d_allocate(X->Bind.Def.k_exct, iter_old, NdcSpectrum); + pi = cd_3d_allocate(X->Bind.Def.k_exct, iter_old, Nomega); + } + fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + sscanf(ctmp, "%lf %lf\n", &dtmp[0], &dtmp[1]); + z_seed[istate] = dtmp[0] + I * dtmp[1]; + } + + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + for (stp = 0; stp < iter_old; stp++) { + fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp); + sscanf(ctmp, "%lf %lf %lf %lf\n", + &dtmp[0], &dtmp[1], &dtmp[2], &dtmp[3]); + alpha[istate][stp] = dtmp[0] + I * dtmp[1]; + beta[istate][stp] = dtmp[2] + I * dtmp[3]; + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + fgetsMPI(ctmp, sizeof(ctmp) / sizeof(char), fp); + sscanf(ctmp, "%lf %lf\n", &dtmp[0], &dtmp[1]); + res_proj[istate][stp][idcSpectrum] = dtmp[0] + I * dtmp[1]; + } + } + } + fclose(fp); + + for (stp = 1; stp <= iter_old; stp++) + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) + ShiftedEq(stp, Nomega, NdcSpectrum, lz_conv[istate], alpha[istate], beta[istate], dcomega[istate], + z_seed[istate], pBiCG[istate], res_proj[istate], pi[istate], dcSpectrum[istate]); + + MultiVecProdMPI(X->Bind.Check.idim_max, X->Bind.Def.k_exct, v5, v3, rho); + + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + SeedSwitch(istate, stp, Nomega, NdcSpectrum, lz_conv[istate], &iz_seed[istate], + &z_seed[istate], &rho[istate], dcomega[istate], X->Bind.Def.k_exct, v2, v3, v4, v5, + pi[istate], alpha[istate], beta[istate], res_proj[istate]); + } + + resnorm = d_1d_allocate(X->Bind.Def.k_exct); + NormMPI_dv(X->Bind.Check.idim_max, X->Bind.Def.k_exct, v2, resnorm); + + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) + for (iomega = 0; iomega < Nomega; iomega++) + if (fabs(resnorm[istate] / pi[istate][stp][iomega]) < eps_Lanczos) + lz_conv[istate][idcSpectrum] = 1; + free_d_1d_allocate(resnorm); + + if (X->Bind.Def.iFlgCalcSpec == RECALC_FROM_TMComponents) X->Bind.Def.Lanczos_max = 0; + + }/*if (childfopenALL(sdt, "rb", &fp) == 0)*/ + }/*if (X->Bind.Def.iFlgCalcSpec > RECALC_NOT)*/ + else { + iter_old = 0; + alpha = cd_2d_allocate(X->Bind.Def.k_exct, X->Bind.Def.Lanczos_max); + beta = cd_2d_allocate(X->Bind.Def.k_exct, X->Bind.Def.Lanczos_max); + res_proj = cd_3d_allocate(X->Bind.Def.k_exct, X->Bind.Def.Lanczos_max, NdcSpectrum); + pi = cd_3d_allocate(X->Bind.Def.k_exct, X->Bind.Def.Lanczos_max, Nomega); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + alpha[istate][0] = 1.0; + beta[istate][0] = 0.0; + for (iomega = 0; iomega < Nomega; iomega++) { + pi[istate][0][iomega] = 1.0; + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + pBiCG[istate][iomega][idcSpectrum] = 0.0; + dcSpectrum[istate][iomega][idcSpectrum] = 0.0; + } + }/*for (iomega = 0; iomega < Nomega; iomega++)*/ + }/*for (istate = 0; istate < nstate; istate++)*/ + } /**
    • @b DO BiCG loop
      • @@ -251,43 +353,126 @@ int CalcSpectrumByBiCG( fprintf(stdoutMPI, "\n Iteration Status Seed Residual-2-Norm\n"); childfopenMPI("residual.dat", "w", &fp); - for (stp = 1; stp <= X->Bind.Def.Lanczos_max; stp++) { + for (stp = iter_old + 1; stp <= iter_old + X->Bind.Def.Lanczos_max; stp++) { /**
      • @f${\bf v}_{2}={\hat H}{\bf v}_{12}, {\bf v}_{4}={\hat H}{\bf v}_{14}@f$, where @f${\bf v}_{12}, {\bf v}_{14}@f$ are old (shadow) residual vector.
      • */ - zclear(X->Bind.Check.idim_max, &v12[1][0]); - zclear(X->Bind.Check.idim_max, &v14[1][0]); - mltply(&X->Bind, 1, v12, v2); - mltply(&X->Bind, 1, v14, v4); + zclear(X->Bind.Check.idim_max * X->Bind.Def.k_exct, &v12[1][0]); + zclear(X->Bind.Check.idim_max * X->Bind.Def.k_exct, &v14[1][0]); + mltply(&X->Bind, X->Bind.Def.k_exct, v12, v2); + mltply(&X->Bind, X->Bind.Def.k_exct, v14, v4); for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { - zclear(X->Bind.Check.idim_max, &vL[1][0]); - GetExcitedState(&(X->Bind), 1, vL, v1Org, idcSpectrum + 1); - res_proj[idcSpectrum] = VecProdMPI(X->Bind.Check.idim_max, &vL[0][0], &v2[0][0]); + zclear(X->Bind.Check.idim_max * X->Bind.Def.k_exct, &vL[1][0]); + GetExcitedState(&(X->Bind), X->Bind.Def.k_exct, vL, v1Org, idcSpectrum + 1); + MultiVecProdMPI(X->Bind.Check.idim_max, X->Bind.Def.k_exct, vL, v2, rho_old); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++)res_proj[istate][stp][idcSpectrum] = rho_old[istate]; } /**
      • Update projected result vector dcSpectrum.
      • */ - komega_bicg_update(&v12[1][0], &v2[1][0], &v14[1][0], &v4[1][0], &dcSpectrum[0][0], res_proj, status); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++)rho_old[istate] = rho[istate]; + MultiVecProdMPI(X->Bind.Check.idim_max, X->Bind.Def.k_exct, v4, v2, rho); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + if (stp == 1) + beta[istate][stp] = 0.0; + else + beta[istate][stp] = rho[istate] / rho_old[istate]; + + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { + v12[idim][istate] = z_seed[istate] * v2[idim][istate] - v12[idim][istate]; + v14[idim][istate] = conj(z_seed[istate]) * v4[idim][istate] - v14[idim][istate]; + } + }/*for (istate = 0; istate < nstate; istate++)*/ + + MultiVecProdMPI(X->Bind.Check.idim_max, X->Bind.Def.k_exct, v4, v12, rho_old); + + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + alpha_denom = rho_old[istate] - beta[istate][stp] * rho[istate] / alpha[istate][stp - 1]; + + if (cabs(alpha_denom) < 1.0e-50) { + printf("Error : The denominator of alpha is zero.\n"); + exitMPI(-1); + } + else if (cabs(rho[istate]) < 1.0e-50) { + printf("Error : rho is zero.\n"); + exitMPI(-1); + } + alpha[istate][stp] = rho[istate] / alpha_denom; + /* + Shifted equation + */ + ShiftedEq(stp, Nomega, NdcSpectrum, lz_conv[istate], alpha[istate], beta[istate], dcomega[istate], + z_seed[istate], pBiCG[istate], res_proj[istate], pi[istate], dcSpectrum[istate]); + /* + Update residual + */ + for (idim = 1; idim <= X->Bind.Check.idim_max; idim++) { + v12[idim][istate] = (1.0 + alpha[istate][stp] * beta[istate][stp] / alpha[istate][stp - 1]) * v2[idim][istate] + - alpha[istate][stp] * v12[idim][istate] + - alpha[istate][stp] * beta[istate][stp] / alpha[istate][stp - 1] * v3[idim][istate]; + v3[idim][istate] = v2[idim][istate]; + v2[idim][istate] = v12[idim][istate]; + v14[idim][istate] = (1.0 + conj(alpha[istate][stp] * beta[istate][stp] / alpha[istate][stp - 1])) * v4[idim][istate] + - conj(alpha[istate][stp]) * v14[idim][istate] + - conj(alpha[istate][stp] * beta[istate][stp] / alpha[istate][stp - 1]) * v5[idim][istate]; + v5[idim][istate] = v4[idim][istate]; + v4[idim][istate] = v14[idim][istate]; + }/*for (idim = 0; idim < Check::idim_maxs; idim++)*/ + /* + Seed Switching + */ + SeedSwitch(istate, stp, Nomega, NdcSpectrum, lz_conv[istate], &iz_seed[istate], + &z_seed[istate], &rho[istate], dcomega[istate], X->Bind.Check.idim_max, v2, v3, v4, v5, + pi[istate], alpha[istate], beta[istate], res_proj[istate]); + }/*for (istate = 0; istaet < nstate; istate++)*/ + /* + Convergence check + */ + NormMPI_dv(X->Bind.Check.idim_max, X->Bind.Def.k_exct, v2, resnorm); + + lz_conv_all = 1; + fprintf(stdoutMPI, " %9d ", stp); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + for (iomega = 0; iomega < Nomega; iomega++) { + if (lz_conv[istate][iomega] == 0) + if(fabs(resnorm[istate] / pi[istate][stp][iomega]) < eps_Lanczos) + lz_conv[istate][iomega] = 1; + lz_conv_all *= lz_conv[istate][iomega]; + } + + fprintf(stdoutMPI, "%9d %25.15e", iz_seed[istate], resnorm[istate]); + }/*for (istate = 0; istate < nstate; istate++)*/ + fprintf(stdoutMPI, "\n"); + + if (lz_conv_all == 1) break; + /**
      • Output residuals at each frequency for some analysis
      • */ if (stp % 10 == 0) { - komega_bicg_getresidual(resz); for (iomega = 0; iomega < Nomega; iomega++) { - fprintf(fp, "%7i %20.10e %20.10e %20.10e %20.10e\n", - stp, creal(dcomega[0][iomega]), - creal(dcSpectrum[iomega][0]), cimag(dcSpectrum[iomega][0]), - resz[iomega]); + fprintf(fp, "%7i ", stp); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + resz = resnorm[istate] / cabs(pi[istate][stp][iomega]);//FIXME + + fprintf(fp, "%20.10e %20.10e %20.10e %20.10e ", + creal(dcomega[istate][iomega]), + creal(dcSpectrum[istate][iomega][0]), cimag(dcSpectrum[istate][iomega][0]),resz); + } + fprintf(fp, "\n"); } fprintf(fp, "\n"); } - fprintf(stdoutMPI, " %9d %9d %8d %25.15e\n", abs(status[0]), status[1], status[2], creal(v12[1][0])); - if (status[0] < 0) break; + }/*for (stp = 0; stp <= X->Bind.Def.Lanczos_max; stp++)*/ fclose(fp); + + if (stp >= iter_old + X->Bind.Def.Lanczos_max) + fprintf(stdoutMPI, "Remark : Not converged in iteration %d.", stp); + iter_old = stp; /**
    • @b END @b DO BiCG loop
    • @@ -297,8 +482,25 @@ int CalcSpectrumByBiCG( /**
    • Save @f$\alpha, \beta@f$, projected residual
    • */ - if (X->Bind.Def.iFlgCalcSpec != RECALC_FROM_TMComponents) - OutputTMComponents_BiCG(X, abs(status[0]), NdcSpectrum); + if (X->Bind.Def.iFlgCalcSpec != RECALC_FROM_TMComponents) { + sprintf(sdt, cFileNameTridiagonalMatrixComponents, X->Bind.Def.CDataFileHead); + childfopenMPI(sdt, "w", &fp); + fprintf(fp, "%d \n", iter_old); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) + fprintf(fp, "%.10lf %.10lf\n", creal(z_seed[istate]), cimag(z_seed[istate])); + for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + for (stp = 0; stp < iter_old; stp++) { + fprintf(fp, "%25.16le %25.16le %25.16le %25.16le\n", + creal(alpha[istate][stp]), cimag(alpha[istate][stp]), + creal(beta[istate][stp]), cimag(beta[istate][stp])); + for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { + fprintf(fp, "%25.16le %25.16le\n", + creal(res_proj[istate][stp][idcSpectrum]), cimag(res_proj[istate][stp][idcSpectrum])); + } + } + } + fclose(fp); + } /**
    • output vectors for recalculation
    @@ -308,30 +510,37 @@ int CalcSpectrumByBiCG( fprintf(stdoutMPI, " Start: Output vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_OutputSpectrumRecalcvecStart, "a"); - komega_bicg_getvec(&v12[1][0], &v14[1][0]); - sprintf(sdt, cFileNameOutputRestartVec, X->Bind.Def.CDataFileHead, myrank); if (childfopenALL(sdt, "wb", &fp) != 0) { exitMPI(-1); } - byte_size = fwrite(&status[0], sizeof(status[0]), 1, fp); + byte_size = fwrite(&iter_old, sizeof(iter_old), 1, fp); byte_size = fwrite(&X->Bind.Check.idim_max, sizeof(X->Bind.Check.idim_max), 1, fp); - byte_size = fwrite(&v2[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fwrite(&v12[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fwrite(&v4[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); - byte_size = fwrite(&v14[0], sizeof(complex double), X->Bind.Check.idim_max + 1, fp); + byte_size = fwrite(&v2[0][0], sizeof(complex double), (X->Bind.Check.idim_max + 1)* X->Bind.Def.k_exct, fp); + byte_size = fwrite(&v12[0][0], sizeof(complex double), (X->Bind.Check.idim_max + 1) * X->Bind.Def.k_exct, fp); + byte_size = fwrite(&v4[0][0], sizeof(complex double), (X->Bind.Check.idim_max + 1) * X->Bind.Def.k_exct, fp); + byte_size = fwrite(&v14[0][0], sizeof(complex double), (X->Bind.Check.idim_max + 1) * X->Bind.Def.k_exct, fp); fclose(fp); fprintf(stdoutMPI, " End: Output vectors for recalculation.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_OutputSpectrumRecalcvecEnd, "a"); }/*if (X->Bind.Def.iFlgCalcSpec > RECALC_FROM_TMComponents)*/ - komega_bicg_finalize(); - - free_d_1d_allocate(resz); + free_cd_1d_allocate(z_seed); + free_i_1d_allocate(iz_seed); + free_cd_1d_allocate(rho); + free_cd_1d_allocate(rho_old); + free_d_1d_allocate(resnorm); + free_cd_3d_allocate(pBiCG); + free_cd_2d_allocate(v3); + free_cd_2d_allocate(v5); free_cd_2d_allocate(v12); free_cd_2d_allocate(v14); free_cd_2d_allocate(vL); - free_cd_1d_allocate(res_proj); + free_i_2d_allocate(lz_conv); + free_cd_2d_allocate(alpha); + free_cd_2d_allocate(beta); + free_cd_3d_allocate(pi); + free_cd_3d_allocate(res_proj); return TRUE; }/*int CalcSpectrumByBiCG*/ diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index 1d3a4be1b..bac695f18 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -60,8 +60,9 @@ int CalcSpectrumByFullDiag( vRv = cd_2d_allocate(idim_max_int, idim_maxorg_int); StartTimer(6301); - zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v0[0][0]); - zclear((X->Bind.Check.idim_max + 1)*(X->Bind.Check.idim_max + 1), &v1[0][0]); + v0[0][0] = 0.0; + zclear((X->Bind.Check.idim_max + 1)*X->Bind.Check.idim_max, &v0[0][0]); + zclear((X->Bind.Check.idim_max + 1)*X->Bind.Check.idim_max, &v1[0][0]); for (idim0 = 1; idim0 <= X->Bind.Check.idim_max; idim0++) v1[idim0][idim0] = 1.0; mltply(&(X->Bind), X->Bind.Check.idim_max, v0, v1); StopTimer(6301); @@ -81,7 +82,7 @@ int CalcSpectrumByFullDiag( for (idim0 = 1; idim0 < idim_max_int+1; idim0++) for (idim1 = 0; idim1 < idim_max_int; idim1++) for (idim2 = 0; idim2 < idim_maxorg_int; idim2++) - vRv[idim0][idim2] += conj(v0[idim0][idim1]) * vR[idim0][idim2]; + vRv[idim1][idim2] += conj(v0[idim0][idim1]) * vR[idim0][idim2]; for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { StartTimer(6303); zclear(X->Bind.Check.idim_max, &vL[1][0]); @@ -90,7 +91,7 @@ int CalcSpectrumByFullDiag( for (idim0 = 1; idim0 < idim_max_int + 1; idim0++) for (idim1 = 0; idim1 < idim_max_int; idim1++) for (idim2 = 0; idim2 < idim_maxorg_int; idim2++) - vLv[idim0][idim2] += conj(v0[idim0][idim1]) * vL[idim0][idim2]; + vLv[idim1][idim2] += conj(v0[idim0][idim1]) * vL[idim0][idim2]; StopTimer(6303); /**
  • Compute spectrum diff --git a/src/include/CalcSpectrumByBiCG.h b/src/include/CalcSpectrumByBiCG.h index 822228203..a4767e5d0 100644 --- a/src/include/CalcSpectrumByBiCG.h +++ b/src/include/CalcSpectrumByBiCG.h @@ -22,7 +22,7 @@ int CalcSpectrumByBiCG( double complex **v4, int Nomega, int NdcSpectrum, - double complex **dcSpectrum, + double complex ***dcSpectrum, double complex **dcomega, double complex **v1Org ); From 68d9290137c7b388fac35325705aa9e293df66da Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Fri, 21 Apr 2023 15:25:58 +0900 Subject: [PATCH 43/50] Bugfix in spectrum by BiCG with multiple eigenstates Remove Komega --- src/CMakeLists.txt | 4 +- src/CalcSpectrum.c | 1 - src/CalcSpectrumByBiCG.c | 19 +- src/CalcSpectrumByFullDiag.c | 15 +- src/komega/CMakeLists.txt | 26 -- src/komega/komega.h | 91 ------ src/komega/komega_bicg.F90 | 552 ----------------------------------- src/komega/komega_math.F90 | 200 ------------- src/komega/komega_vals.F90 | 142 --------- src/komega/makefile_komega | 23 -- 10 files changed, 24 insertions(+), 1049 deletions(-) delete mode 100644 src/komega/CMakeLists.txt delete mode 100644 src/komega/komega.h delete mode 100644 src/komega/komega_bicg.F90 delete mode 100644 src/komega/komega_math.F90 delete mode 100644 src/komega/komega_vals.F90 delete mode 100644 src/komega/makefile_komega diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9530f775f..f6a1bb429 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,8 +4,6 @@ if(${CMAKE_PROJECT_NAME} STREQUAL "Project") message(FATAL_ERROR "cmake should be executed not for 'src' subdirectory, but for the top directory of HPhi.") endif(${CMAKE_PROJECT_NAME} STREQUAL "Project") -add_subdirectory(komega) - include_directories(include) include_directories(${STDFACE_DIR}) add_definitions(-D_HPhi) @@ -39,7 +37,7 @@ ${STDFACE_DIR}/src/TriangularLattice.c ${STDFACE_DIR}/src/Wannier90.c) add_executable(HPhi ${SOURCES} ${SOURCES_LANCZOS} ${SOURCES_CG} ${SOURCES_TPQ} ${SOURCES_SPECTRUM} ${SOURCES_TEM} ${SOURCES_MLTPLY} ${SOURCES_DIAG} ${SOURCES_PHYS} ${SOURCES_STDFACE} ${SOURCES_COMMON}) -target_link_libraries(HPhi komega ${LAPACK_LIBRARIES} m) +target_link_libraries(HPhi ${LAPACK_LIBRARIES} m) if(MPI_FOUND) target_link_libraries(HPhi ${MPI_C_LIBRARIES}) endif(MPI_FOUND) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index fea57c586..c831fde18 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -522,7 +522,6 @@ int CalcSpectrum( sscanf(sdt, " Energy %lf", &energy0[istate]); fgetsMPI(sdt, D_FileNameMax, fp); fgetsMPI(sdt, D_FileNameMax, fp); - fgetsMPI(sdt, D_FileNameMax, fp); } fclose(fp); } diff --git a/src/CalcSpectrumByBiCG.c b/src/CalcSpectrumByBiCG.c index 98b991527..79d994a05 100644 --- a/src/CalcSpectrumByBiCG.c +++ b/src/CalcSpectrumByBiCG.c @@ -49,7 +49,10 @@ void ShiftedEq( for (iomega = 0; iomega < Nomega; iomega++) { - if (lz_conv[iomega] == 1) continue; + if (lz_conv[iomega] == 1) { + pi[iter][iomega] = pi[iter - 1][iomega]; + continue; + } if (iter == 1) pi_2 = 1.0; @@ -96,11 +99,13 @@ void SeedSwitch( // // Initialize for min // + iz_seed0 = -1; for (iomega = 0; iomega < Nomega; iomega++) if (lz_conv[iomega] == 0) { iz_seed0 = iomega; pi_min = cabs(pi[iter][iz_seed0]); } + if (iz_seed0 == -1) return; // // Search min. // @@ -181,7 +186,7 @@ int CalcSpectrumByBiCG( unsigned long int liLanczosStp_vec = 0; double complex **vL, **v12, **v14, **v3, **v5, *z_seed, *rho, *rho_old, ** alpha, ** beta, *** res_proj, *** pi, *** pBiCG, alpha_denom; - int stp, iomega, istate, *iz_seed, ** lz_conv, lz_conv_all; + int stp, iomega, istate, *iz_seed, ** lz_conv, *lz_conv_state, lz_conv_all; double resz, *resnorm, dtmp[4]; fprintf(stdoutMPI, "##### Spectrum calculation with BiCG #####\n\n"); @@ -207,6 +212,7 @@ int CalcSpectrumByBiCG( v14 = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); vL = cd_2d_allocate(X->Bind.Check.idim_max + 1, X->Bind.Def.k_exct); lz_conv = i_2d_allocate(X->Bind.Def.k_exct, Nomega); + lz_conv_state = i_1d_allocate(X->Bind.Def.k_exct); /**
  • Set initial result vector(+shadow result vector) Read residual vectors if restart
  • @@ -375,6 +381,8 @@ int CalcSpectrumByBiCG( for (istate = 0; istate < X->Bind.Def.k_exct; istate++)rho_old[istate] = rho[istate]; MultiVecProdMPI(X->Bind.Check.idim_max, X->Bind.Def.k_exct, v4, v2, rho); for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + lz_conv_all *= lz_conv[istate][iomega]; + if (stp == 1) beta[istate][stp] = 0.0; else @@ -389,6 +397,8 @@ int CalcSpectrumByBiCG( MultiVecProdMPI(X->Bind.Check.idim_max, X->Bind.Def.k_exct, v4, v12, rho_old); for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + if (lz_conv_state[istate] == 1) continue; + alpha_denom = rho_old[istate] - beta[istate][stp] * rho[istate] / alpha[istate][stp - 1]; if (cabs(alpha_denom) < 1.0e-50) { @@ -435,12 +445,14 @@ int CalcSpectrumByBiCG( lz_conv_all = 1; fprintf(stdoutMPI, " %9d ", stp); for (istate = 0; istate < X->Bind.Def.k_exct; istate++) { + lz_conv_state[istate] = 1; for (iomega = 0; iomega < Nomega; iomega++) { if (lz_conv[istate][iomega] == 0) if(fabs(resnorm[istate] / pi[istate][stp][iomega]) < eps_Lanczos) lz_conv[istate][iomega] = 1; - lz_conv_all *= lz_conv[istate][iomega]; + lz_conv_state[istate] *= lz_conv[istate][iomega]; } + lz_conv_all *= lz_conv_state[istate]; fprintf(stdoutMPI, "%9d %25.15e", iz_seed[istate], resnorm[istate]); }/*for (istate = 0; istate < nstate; istate++)*/ @@ -538,6 +550,7 @@ int CalcSpectrumByBiCG( free_cd_2d_allocate(v14); free_cd_2d_allocate(vL); free_i_2d_allocate(lz_conv); + free_i_1d_allocate(lz_conv_state); free_cd_2d_allocate(alpha); free_cd_2d_allocate(beta); free_cd_3d_allocate(pi); diff --git a/src/CalcSpectrumByFullDiag.c b/src/CalcSpectrumByFullDiag.c index bac695f18..1a966a493 100644 --- a/src/CalcSpectrumByFullDiag.c +++ b/src/CalcSpectrumByFullDiag.c @@ -60,11 +60,10 @@ int CalcSpectrumByFullDiag( vRv = cd_2d_allocate(idim_max_int, idim_maxorg_int); StartTimer(6301); - v0[0][0] = 0.0; zclear((X->Bind.Check.idim_max + 1)*X->Bind.Check.idim_max, &v0[0][0]); zclear((X->Bind.Check.idim_max + 1)*X->Bind.Check.idim_max, &v1[0][0]); - for (idim0 = 1; idim0 <= X->Bind.Check.idim_max; idim0++) v1[idim0][idim0] = 1.0; - mltply(&(X->Bind), X->Bind.Check.idim_max, v0, v1); + for (idim0 = 1; idim0 <= X->Bind.Check.idim_max; idim0++) v1[idim0][idim0-1] = 1.0; + mltply(&(X->Bind), idim_max_int, v0, v1); StopTimer(6301); /**
  • ::v0 becomes eigenvalues in lapack_diag(), and @@ -77,17 +76,17 @@ int CalcSpectrumByFullDiag(
  • Compute @f$|\langle n|c|0\rangle|^2@f$ for all @f$n@f$ and store them into ::v1, where @f$c|0\rangle@f$ is ::vg.
  • */ - zclear(X->Bind.Check.idim_max, &vR[1][0]); - GetExcitedState(&(X->Bind), X->Bind.Check.idim_maxOrg, vR, v1Org, 0); + zclear(idim_max_int * idim_maxorg_int, &vR[1][0]); + GetExcitedState(&(X->Bind), idim_maxorg_int, vR, v1Org, 0); for (idim0 = 1; idim0 < idim_max_int+1; idim0++) for (idim1 = 0; idim1 < idim_max_int; idim1++) for (idim2 = 0; idim2 < idim_maxorg_int; idim2++) vRv[idim1][idim2] += conj(v0[idim0][idim1]) * vR[idim0][idim2]; for (idcSpectrum = 0; idcSpectrum < NdcSpectrum; idcSpectrum++) { StartTimer(6303); - zclear(X->Bind.Check.idim_max, &vL[1][0]); - GetExcitedState(&(X->Bind), X->Bind.Check.idim_maxOrg, vL, v1Org, idcSpectrum + 1); - zclear(X->Bind.Check.idim_max* X->Bind.Check.idim_max, &vLv[0][0]); + zclear(idim_max_int * idim_maxorg_int, &vL[1][0]); + GetExcitedState(&(X->Bind), idim_maxorg_int, vL, v1Org, idcSpectrum + 1); + zclear(idim_max_int*idim_maxorg_int, &vLv[0][0]); for (idim0 = 1; idim0 < idim_max_int + 1; idim0++) for (idim1 = 0; idim1 < idim_max_int; idim1++) for (idim2 = 0; idim2 < idim_maxorg_int; idim2++) diff --git a/src/komega/CMakeLists.txt b/src/komega/CMakeLists.txt deleted file mode 100644 index 0900defd7..000000000 --- a/src/komega/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -# include guard -cmake_minimum_required(VERSION 2.8) -if(${CMAKE_PROJECT_NAME} STREQUAL "Project") - message(FATAL_ERROR "cmake should be executed not for 'src' subdirectory, but for the top directory of HPhi.") -endif(${CMAKE_PROJECT_NAME} STREQUAL "Project") - -add_definitions(-D__NO_ZDOT) -set(SOURCES_KOMEGA komega_bicg.F90 komega_math.F90 komega_vals.F90) -if(MPI_Fortran_FOUND) - add_definitions(${MPI_Fortran_COMPILE_FLAGS}) - link_directories(${MPI_Fortran_LIBRARY_DIRS}) - include_directories(${MPI_Fortran_INCLUDE_PATH}) -endif(MPI_Fortran_FOUND) - -if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - if(CMAKE_Fortran_COMPILER_VERSION VERSION_GREATER_EQUAL 10.0.0) - add_definitions("-fallow-argument-mismatch") - endif() -endif() - -add_library(komega ${SOURCES_KOMEGA}) -target_link_libraries(komega ${LAPACK_LIBRARIES} m) -if(MPI_Fortran_FOUND) - target_link_libraries(komega ${MPI_Fortran_LIBRARIES}) -endif(MPI_Fortran_FOUND) -install(TARGETS komega ARCHIVE DESTINATION lib LIBRARY DESTINATION lib RUNTIME DESTINATION bin) diff --git a/src/komega/komega.h b/src/komega/komega.h deleted file mode 100644 index 607858298..000000000 --- a/src/komega/komega.h +++ /dev/null @@ -1,91 +0,0 @@ -/* -ISSP Math Library - A library for solving linear systems in materials science -Copyright (C) 2016 Mitsuaki Kawamura - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -For more details, See ‘COPYING.LESSER’ in the root directory of this library. -*/ - -#include -#pragma once - -void komega_bicg_init(int *ndim, int *nl, int *nz, double complex *x, - double complex *z, int *itermax, double *threshold, int *comm); -void komega_cocg_init(int *ndim, int *nl, int *nz, double complex *x, - double complex *z, int *itermax, double *threshold, int *comm); -void komega_cg_c_init(int *ndim, int *nl, int *nz, double complex *x, - double *z, int *itermax, double *threshold, int *comm); -void komega_cg_r_init(int *ndim, int *nl, int *nz, double *x, - double *z, int *itermax, double *threshold, int *comm); - -void komega_bicg_restart(int *ndim, int *nl, int *nz, double complex *x, - double complex *z, int *itermax, double *threshold, - int *status, int *iter_old, double complex *v2, - double complex *v12, double complex *v4, double complex *v14, - double complex *alpha_save, double complex *beta_save, - double complex *z_seed, double complex *r_l_save, int *comm); -void komega_cocg_restart(int *ndim, int *nl, int *nz, double complex *x, - double complex *z, int *itermax, double *threshold, - int *status, int *iter_old, double complex *v2, - double complex *v12, - double complex *alpha_save, double complex *beta_save, - double complex *z_seed, double complex *r_l_save, int *comm); -void komega_cg_c_restart(int *ndim, int *nl, int *nz, double complex *x, - double *z, int *itermax, double *threshold, - int *status, int *iter_old, double complex *v2, - double complex *v12, - double *alpha_save, double *beta_save, - double *z_seed, double complex *r_l_save, int *comm); -void komega_cg_r_restart(int *ndim, int *nl, int *nz, double *x, - double *z, int *itermax, double *threshold, - int *status, int *iter_old, double *v2, - double *v12, - double *alpha_save, double *beta_save, - double *z_seed, double *r_l_save, int *comm); - -void komega_bicg_update(double complex *v12, double complex *v2, - double complex *v14, double complex *v4, - double complex *x, double complex *r_l, int *status); -void komega_cocg_update(double complex *v12, double complex *v2, - double complex *x, double complex *r_l, int *status); -void komega_cg_c_update(double complex *v12, double complex *v2, - double complex *x, double complex *r_l, int *status); -void komega_cg_r_update(double *v12, double *v2, - double *x, double *r_l, int *status); - - -void komega_bicg_getcoef(double complex *alpha_save, double complex *beta_save, - double complex *z_seed, double complex *r_l_save); -void komega_cocg_getcoef(double complex *alpha_save, double complex *beta_save, - double complex *z_seed, double complex *r_l_save); -void komega_cg_c_getcoef(double *alpha_save, double *beta_save, - double *z_seed, double complex *r_l_save); -void komega_cg_r_getcoef(double *alpha_save, double *beta_save, - double *z_seed, double *r_l_save); - -void komega_bicg_getvec(double complex *r_old, double complex *r_tilde_old); -void komega_cocg_getvec(double complex *r_old); -void komega_cg_c_getvec(double complex *r_old); -void komega_cg_r_getvec(double *r_old); - -void komega_bicg_getresidual(double *res); -void komega_cocg_getresidual(double *res); -void komega_cg_c_getresidual(double *res); -void komega_cg_r_getresidual(double *res); - -void komega_bicg_finalize(); -void komega_cocg_finalize(); -void komega_cg_r_finalize(); -void komega_cg_c_finalize(); diff --git a/src/komega/komega_bicg.F90 b/src/komega/komega_bicg.F90 deleted file mode 100644 index f78035b5b..000000000 --- a/src/komega/komega_bicg.F90 +++ /dev/null @@ -1,552 +0,0 @@ -! -! ISSP Math Library - A library for solving linear systems in materials science -! Copyright (C) 2016 Mitsuaki Kawamura -! -! This library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! This library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with this library; if not, write to the Free Software -! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -! -! For more details, See `COPYING.LESSER' in the root directory of this library. -! -!> Routines for BiCG -!! -!!- \f$G_{i j}(z_k) = 0 (i=1 \cdots N_L,\; j = 1 \cdots N_R,\; k=1 \cdots N_z)\f$ -!!- do \f$j = 1 \cdots N_R\f$ -!! -!! - \f${\boldsymbol r} = {\boldsymbol \varphi_j}\f$, -!! - \f${\tilde {\boldsymbol r}} =\f$ an arbitrary vector, -!! \f${\boldsymbol r}^{\rm old} = {\tilde {\boldsymbol r}}^{\rm old} = {\bf 0}\f$ -!! - \f$p_{i k} = 0(i=1 \cdots N_L,\; k=1 \cdots N_z),\; \pi_k=\pi_k^{\rm old} = 1(k=1 \cdots N_z)\f$ -!! - \f$\rho = \infty,\; \alpha = 1,\; z_{\rm seed}=0\f$ -!! - do iteration -!! - \f$\circ\f$ Seed equation -!! - \f$\rho^{\rm old} = \rho,\; \rho = {\tilde {\boldsymbol r}}^* \cdot {\boldsymbol r}\f$ -!! - \f$\beta = \rho / \rho^{\rm old}\f$ -!! - \f${\boldsymbol q} = (z_{\rm seed} {\hat I} - {\hat H}){\boldsymbol r}\f$ -!! - \f$\alpha^{\rm old} = \alpha,\; -!! \alpha = \frac{\rho}{{\tilde {\boldsymbol r}}^*\cdot{\boldsymbol q} - \beta \rho / \alpha }\f$ -!! - \f$\circ\f$ Shifted equation -!! - do \f$k = 1 \cdots N_z\f$ -!! - \f$\pi_k^{\rm new} = [1+\alpha(z_k-z_{\rm seed})]\pi_k - -!! \frac{\alpha \beta}{\alpha^{\rm old}}(\pi_k^{\rm old} - \pi_k)\f$ -!! - do \f$i = 1 \cdots N_L\f$ -!! - \f$p_{i k} = \frac{1}{\pi_k} {\boldsymbol \varphi}_i^* \cdot {\boldsymbol r} + -!! \frac{\pi^{\rm old}_k \pi^{\rm old}_k}{\pi_k \pi_k} \beta p_{i k}\f$ -!! - \f$G_{i j}(z_k) = G_{i j}(z_k) + \frac{\pi_k}{\pi_k^{\rm new}} \alpha p_{i k}\f$ -!! - \f$\pi_k^{\rm old} = \pi_k\f$, \f$\pi_k = \pi_k^{\rm new}\f$ -!! - end do \f$i\f$ -!! - end do \f$k\f$ -!! - \f${\boldsymbol q} = \left( 1 + \frac{\alpha \beta}{\alpha^{\rm old}} \right) {\boldsymbol r} - -!! \alpha {\boldsymbol q} - \frac{\alpha \beta}{\alpha^{\rm old}} {\boldsymbol r}^{\rm old},\; -!! {\boldsymbol r}^{\rm old} = {\boldsymbol r},\; {\boldsymbol r} = {\boldsymbol q}\f$ -!! - \f${\boldsymbol q} = (z_{\rm seed}^* {\hat I} - {\hat H}) {\tilde {\boldsymbol r}},\; -!! {\boldsymbol q} = \left( 1 + \frac{\alpha^* \beta^*}{\alpha^{{\rm old}*}} \right) -!! {\tilde {\boldsymbol r}} - \alpha^* {\boldsymbol q} - -!! \frac{\alpha^* \beta^*}{\alpha^{{\rm old} *}} {\tilde {\boldsymbol r}}^{\rm old},\; -!! {\tilde {\boldsymbol r}}^{\rm old} = {\tilde {\boldsymbol r}},\; -!! {\tilde {\boldsymbol r}} = {\boldsymbol q}\f$ -!! - \f$\circ\f$ Seed switch -!! - Search \f$k\f$ which gives the smallest \f$|\pi_k|\f$ . -!! \f$\rightarrow z_{\rm seed},\; -!! \pi_{\rm seed},\; \pi_{\rm seed}^{\rm old}\f$ -!! - \f${\boldsymbol r} = {\boldsymbol r} / \pi_{\rm seed},\; -!! {\boldsymbol r}^{\rm old} = {\boldsymbol r}^{\rm old} / \pi_{\rm seed}^{\rm old},\; -!! {\tilde {\boldsymbol r}} = {\tilde {\boldsymbol r}} / \pi_{\rm seed}^*,\; -!! {\tilde {\boldsymbol r}}^{\rm old} = -!! {\tilde {\boldsymbol r}}^{\rm old} / \pi_{\rm seed}^{{\rm old}*}\f$ -!! - \f$\alpha = (\pi_{\rm seed}^{\rm old} / \pi_{\rm seed}) \alpha\f$, -!! \f$\rho = \rho / (\pi_{\rm seed}^{\rm old} \pi_{\rm seed}^{\rm old})\f$ -!! - \f$\{\pi_k = \pi_k / \pi_{\rm seed},\; \pi_k^{\rm old} = -!! \pi_k^{\rm old} / \pi_{\rm seed}^{\rm old}\}\f$ -!! - if( \f$|{\boldsymbol r}| <\f$ Threshold) exit -!! - end do iteration -!!- end do \f$j\f$ -!! -MODULE komega_bicg - ! - PRIVATE - ! - PUBLIC komega_BICG_init, komega_BICG_restart, komega_BICG_update, komega_BICG_getcoef, & - & komega_BICG_getvec, komega_BICG_finalize, komega_BICG_getresidual - ! -CONTAINS -!> -!! Shifted Part -!! -SUBROUTINE komega_BICG_shiftedeqn(r_l, x) - ! - USE komega_parameter, ONLY : iter, itermax, nl, nz, lz_conv - USE komega_vals_c, ONLY : alpha, alpha_old, beta, pi, pi_old, pi_save, z, z_seed - USE komega_vecs_c, ONLY : p - USE komega_math, ONLY : zaxpy - ! - IMPLICIT NONE - ! - COMPLEX(8),INTENT(IN) :: r_l(nl) - COMPLEX(8),INTENT(INOUT) :: x(nl,nz) - ! - INTEGER :: iz - COMPLEX(8) :: pi_new - ! - DO iz = 1, nz - ! - IF(lz_conv(iz)) cycle - ! - pi_new = (1d0 + alpha * (z(iz) - z_seed)) * pi(iz) & - & - alpha * beta / alpha_old * (pi_old(iz) - pi(iz)) - p(1:nl,iz) = r_l(1:nl) / pi(iz) & - & + (pi_old(iz) / pi(iz))**2 * beta * p(1:nl,iz) - CALL zaxpy(nl, pi(iz)/ pi_new * alpha, p(1:nl,iz), 1, x(1:nl,iz), 1) - pi_old(iz) = pi(iz) - pi(iz) = pi_new - ! - IF(itermax > 0) pi_save(iz,iter) = pi_new - ! - END DO - ! -END SUBROUTINE komega_BICG_shiftedeqn -!> -!! Seed Switching -!! -SUBROUTINE komega_BICG_seed_switch(v2, v4, status) - ! - USE komega_parameter, ONLY : iter, itermax, ndim, nz, nl, iz_seed, almost0, lz_conv - USE komega_vals_c, ONLY : alpha, alpha_save, beta_save, pi, pi_old, & - & pi_save, rho, z, z_seed - USE komega_vecs_c, ONLY : v3, v5, r_l_save - USE komega_math, ONLY : dscal, zscal - ! - IMPLICIT NONE - ! - COMPLEX(8),INTENT(INOUT) :: v2(ndim), v4(ndim) - INTEGER,INTENT(INOUT) :: status(3) - ! - INTEGER :: jter - COMPLEX(8) :: scale - ! - status(3) = MINLOC(ABS(pi(1:nz)), 1, .NOT. lz_conv(1:nz)) - ! - IF(ABS(pi(status(3))) < almost0) THEN - status(2) = 3 - END IF - ! - IF(status(3) /= iz_seed) THEN - ! - iz_seed = status(3) - z_seed = z(iz_seed) - ! - alpha = alpha * pi_old(iz_seed) / pi(iz_seed) - rho = rho / pi_old(iz_seed)**2 - ! - scale = 1d0 / pi(iz_seed) - CALL zscal(ndim, scale, v2, 1) - scale = 1d0 / CONJG(pi(iz_seed)) - CALL zscal(ndim, scale, v4, 1) - ! - scale = 1d0 / pi(iz_seed) - CALL zscal(nz,scale,pi,1) - ! - scale = 1d0 / pi_old(iz_seed) - CALL zscal(ndim, scale, v3, 1) - scale = 1d0 / CONJG(pi_old(iz_seed)) - CALL zscal(ndim, scale, v5, 1) - ! - scale = 1d0 / pi_old(iz_seed) - CALL zscal(nz,scale,pi_old,1) - ! - ! For restarting - ! - IF(itermax > 0) THEN - ! - DO jter = 1, iter - ! - alpha_save(jter) = alpha_save(jter) & - & * pi_save(iz_seed, jter - 1) / pi_save(iz_seed,jter) - beta_save(jter) = beta_save(jter) & - & * (pi_save(iz_seed, jter - 2) / pi_save(iz_seed,jter - 1))**2 - ! - scale = 1d0 / pi_save(iz_seed, jter - 1) - CALL zscal(nl, scale, r_l_save(1:nl,jter), 1) - ! - END DO - ! - DO jter = 1, iter - scale = 1d0 / pi_save(iz_seed, jter) - CALL zscal(nz,scale,pi_save(1:nz,jter),1) - END DO - ! - END IF - ! - END IF - ! -END SUBROUTINE komega_BICG_seed_switch -!> -!! Allocate & initialize variables -!! -SUBROUTINE komega_BICG_init(ndim0, nl0, nz0, x, z0, itermax0, threshold0, comm0) BIND(C) - ! - USE ISO_C_BINDING - USE komega_parameter, ONLY : iter, itermax, ndim, nl, nz, & - & threshold, iz_seed, lz_conv, lmpi, comm - USE komega_vals_c, ONLY : alpha, alpha_save, beta, beta_save, pi, & - & pi_old, pi_save, rho, z, z_seed - USE komega_vecs_c, ONLY : p, r_l_save, v3, v5 - USE komega_math, ONLY : zcopy - ! - IMPLICIT NONE - ! - INTEGER(C_INT),INTENT(IN) :: ndim0, nl0, nz0, itermax0 - REAL(C_DOUBLE),INTENT(IN) :: threshold0 - COMPLEX(C_DOUBLE_COMPLEX),INTENT(IN) :: z0(nz0) - COMPLEX(C_DOUBLE_COMPLEX),INTENT(OUT) :: x(nl0,nz0) - INTEGER(C_INT),INTENT(IN) :: comm0 - ! - ndim = ndim0 - nl = nl0 - nz = nz0 - itermax = itermax0 - threshold = threshold0 - ! - comm = comm0 -#if defined(MPI) - lmpi = .TRUE. -#else - lmpi = .FALSE. -#endif - ! - ALLOCATE(z(nz), v3(ndim), v5(ndim), pi(nz), pi_old(nz), p(nl,nz), lz_conv(nz)) - CALL zcopy(nz,z0,1,z,1) - v3(1:ndim) = CMPLX(0d0, 0d0, KIND(0d0)) - v5(1:ndim) = CMPLX(0d0, 0d0, KIND(0d0)) - p(1:nl,1:nz) = CMPLX(0d0, 0d0, KIND(0d0)) - x(1:nl,1:nz) = CMPLX(0d0, 0d0, KIND(0d0)) - pi(1:nz) = CMPLX(1d0, 0d0, KIND(0d0)) - pi_old(1:nz) = CMPLX(1d0, 0d0, KIND(0d0)) - rho = CMPLX(1d0, 0d0, KIND(0d0)) - alpha = CMPLX(1d0, 0d0, KIND(0d0)) - beta = CMPLX(0d0, 0d0, KIND(0d0)) - iz_seed = 1 - z_seed = z(iz_seed) - iter = 0 - lz_conv(1:nz) = .FALSE. - ! - IF(itermax > 0) THEN - ALLOCATE(alpha_save(itermax), beta_save(itermax), & - & r_l_save(nl,itermax), pi_save(nz,-1:itermax)) - pi_save(1:nz,-1:0) = CMPLX(1d0, 0d0, KIND(0d0)) - END IF - ! -END SUBROUTINE komega_BICG_init -! -! Restart by input -! -SUBROUTINE komega_BICG_restart(ndim0, nl0, nz0, x, z0, itermax0, threshold0, status, & -& iter_old, v2, v12, v4, v14, alpha_save0, beta_save0, z_seed0, r_l_save0, comm0) BIND(C) - ! - USE ISO_C_BINDING - USE komega_parameter, ONLY : iter, itermax, ndim, nl, threshold, iz_seed, lz_conv, nz, resnorm - USE komega_vals_c, ONLY : alpha, alpha_old, alpha_save, beta, beta_save, rho, z_seed, pi - USE komega_vecs_c, ONLY : r_l_save, v3, v5 - USE komega_math, ONLY : zcopy, zdotcMPI - ! - IMPLICIT NONE - ! - INTEGER(C_INT),INTENT(IN) :: ndim0, nl0, nz0, itermax0 - REAL(C_DOUBLE),INTENT(IN) :: threshold0 - COMPLEX(C_DOUBLE_COMPLEX),INTENT(IN) :: z0(nz0) - COMPLEX(C_DOUBLE_COMPLEX),INTENT(OUT) :: x(nl0,nz0) - INTEGER(C_INT),INTENT(OUT) :: status(3) - INTEGER(C_INT),INTENT(IN) :: comm0 - ! - ! For Restarting - ! - INTEGER(C_INT),INTENT(IN) :: iter_old - COMPLEX(C_DOUBLE_COMPLEX),INTENT(IN) :: & - & alpha_save0(iter_old), beta_save0(iter_old), z_seed0 - COMPLEX(C_DOUBLE_COMPLEX),INTENT(IN) :: r_l_save0(nl0,iter_old) - COMPLEX(C_DOUBLE_COMPLEX),INTENT(INOUT) :: v2(ndim), v12(ndim) - COMPLEX(C_DOUBLE_COMPLEX),INTENT(INOUT) :: v4(ndim), v14(ndim) - ! - INTEGER :: iz - ! - CALL komega_BICG_init(ndim0, nl0, nz0, x, z0, itermax0, threshold0, comm0) - ! - z_seed = z_seed0 - iz_seed = 0 - ! - status(1:3) = 0 - ! - DO iter = 1, iter_old - ! - beta = beta_save0(iter) - alpha_old = alpha - alpha = alpha_save0(iter) - ! - ! For restarting - ! - IF(itermax > 0) THEN - alpha_save(iter) = alpha - beta_save(iter) = beta - CALL zcopy(nl,r_l_save0(1:nl,iter),1,r_l_save(1:nl,iter),1) - END IF - ! - ! Shifted equation - ! - CALL komega_BICG_shiftedeqn(r_l_save0(1:nl,iter), x) - ! - END DO - ! - ! Rewind - ! - iter = iter_old - ! - CALL zcopy(ndim,v12,1,v3,1) - CALL zcopy(ndim,v14,1,v5,1) - rho = zdotcMPI(ndim,v5,v3) - ! - ! Seed Switching - ! - CALL komega_BICG_seed_switch(v2,v4,status) - ! - ! Convergence check - ! - resnorm = SQRT(DBLE(zdotcMPI(ndim,v2,v2))) - ! - DO iz = 1, nz - IF(ABS(resnorm/pi(iz)) < threshold) lz_conv(iz) = .TRUE. - END DO - ! - IF(resnorm < threshold) THEN - ! - ! Converged - ! - status(1) = - iter - status(2) = 0 - ELSE IF(iter == itermax) THEN - ! - ! NOT Converged in itermax - ! - status(1) = - iter - status(2) = 1 - ELSE IF(status(2) == 3) THEN - ! - ! pi_seed becomes zero - ! - status(1) = - iter - ELSE - ! - ! Continue - ! - status(1) = iter - status(2) = 0 - END IF - ! - IF(ndim > 0) v12(1) = resnorm - ! -END SUBROUTINE komega_BICG_restart -!> -!! Update x, p, r -!! -SUBROUTINE komega_BICG_update(v12, v2, v14, v4, x, r_l, status) BIND(C) - ! - USE ISO_C_BINDING - USE komega_parameter, ONLY : iter, itermax, ndim, nl, nz, & - & threshold, almost0, lz_conv, resnorm - USE komega_vals_c, ONLY : alpha, alpha_old, alpha_save, & - & beta, beta_save, rho, z_seed, pi - USE komega_vecs_c, ONLY : r_l_save, v3, v5 - USE komega_math, ONLY : zdotcMPI, zcopy - ! - IMPLICIT NONE - ! - COMPLEX(C_DOUBLE_COMPLEX),INTENT(INOUT) :: v12(ndim), v2(ndim), v14(ndim), v4(ndim), x(nl,nz) - COMPLEX(C_DOUBLE_COMPLEX),INTENT(IN) :: r_l(nl) - INTEGER(C_INT),INTENT(INOUT) :: status(3) - ! - INTEGER :: iz - COMPLEX(8) :: rho_old, alpha_denom - ! - iter = iter + 1 - status(1:3) = 0 - ! - rho_old = rho - rho = zdotcMPI(ndim,v4,v2) - IF(iter == 1) THEN - beta = CMPLX(0d0, 0d0, KIND(0d0)) - ELSE - beta = rho / rho_old - END IF - v12(1:ndim) = z_seed * v2(1:ndim) - v12(1:ndim) - v14(1:ndim) = CONJG(z_seed) * v4(1:ndim) - v14(1:ndim) - alpha_old = alpha - alpha_denom = zdotcMPI(ndim,v4,v12) - beta * rho / alpha - ! - IF(ABS(alpha_denom) < almost0) THEN - status(2) = 2 - ELSE IF(ABS(rho) < almost0) THEN - status(2) = 4 - END IF - alpha = rho / alpha_denom - ! - ! For restarting - ! - IF(itermax > 0) THEN - alpha_save(iter) = alpha - beta_save(iter) = beta - CALL zcopy(nl,r_l,1,r_l_save(1:nl,iter),1) - END IF - ! - ! Shifted equation - ! - CALL komega_BICG_shiftedeqn(r_l, x) - ! - ! Update residual - ! - v12(1:ndim) = (1d0 + alpha * beta / alpha_old) * v2(1:ndim) & - & - alpha * v12(1:ndim) & - & - alpha * beta / alpha_old * v3(1:ndim) - CALL zcopy(ndim,v2,1,v3,1) - CALL zcopy(ndim,v12,1,v2,1) - v14(1:ndim) = (1d0 + CONJG(alpha * beta / alpha_old)) * v4(1:ndim) & - & - CONJG(alpha) * v14(1:ndim) & - & - CONJG(alpha * beta / alpha_old) * v5(1:ndim) - CALL zcopy(ndim,v4,1,v5,1) - CALL zcopy(ndim,v14,1,v4,1) - ! - ! Seed Switching - ! - CALL komega_BICG_seed_switch(v2,v4,status) - ! - ! Convergence check - ! - resnorm = SQRT(DBLE(zdotcMPI(ndim,v2,v2))) - ! - DO iz = 1, nz - IF(ABS(resnorm/pi(iz)) < threshold) lz_conv(iz) = .TRUE. - END DO - ! - IF(resnorm < threshold) THEN - ! - ! Converged - ! - status(1) = - iter - status(2) = 0 - ELSE IF(iter == itermax) THEN - ! - ! NOT Converged in itermax - ! - status(1) = - iter - status(2) = 1 - ELSE IF(status(2) == 2) THEN - ! - ! alpha becomes infinite - ! - status(1) = - iter - ELSE IF(status(2) == 3) THEN - ! - ! pi_seed becomes zero - ! - status(1) = - iter - ELSE IF(status(2) == 4) THEN - ! - ! rho becomes zero - ! - status(1) = - iter - ELSE - ! - ! Continue - ! - status(1) = iter - status(2) = 0 - END IF - ! - IF(ndim > 0) v12(1) = resnorm - ! -END SUBROUTINE komega_BICG_update -!> -!! Return saved alpha, beta, r_l -!! -SUBROUTINE komega_BICG_getcoef(alpha_save0, beta_save0, z_seed0, r_l_save0) BIND(C) - ! - USE ISO_C_BINDING - USE komega_parameter, ONLY : iter, nl - USE komega_vals_c, ONLY : alpha_save, beta_save, z_seed - USE komega_vecs_c, ONLY : r_l_save - USE komega_math, ONLY : dcopy, zcopy - ! - IMPLICIT NONE - ! - COMPLEX(C_DOUBLE_COMPLEX),INTENT(OUT) :: alpha_save0(iter), beta_save0(iter), z_seed0 - COMPLEX(C_DOUBLE_COMPLEX),INTENT(OUT) :: r_l_save0(nl,iter) - ! - z_seed0 = z_seed - CALL zcopy(iter,alpha_save,1,alpha_save0,1) - CALL zcopy(iter,beta_save,1,beta_save0,1) - CALL zcopy(nl*iter,r_l_save,1,r_l_save0,1) - ! -END SUBROUTINE komega_BICG_getcoef -!> -!! Return r_old -!! -SUBROUTINE komega_BICG_getvec(r_old, r_tilde_old) BIND(C) - ! - USE ISO_C_BINDING - USE komega_parameter, ONLY : ndim - USE komega_vecs_c, ONLY : v3, v5 - USE komega_math, ONLY : zcopy - ! - IMPLICIT NONE - ! - COMPLEX(C_DOUBLE_COMPLEX),INTENT(OUT) :: r_old(ndim), r_tilde_old(ndim) - ! - CALL zcopy(ndim,v3,1,r_old,1) - CALL zcopy(ndim,v5,1,r_tilde_old,1) - ! -END SUBROUTINE komega_BICG_getvec -!> -!! Return Residual Norm -!! -SUBROUTINE komega_BICG_getresidual(res) BIND(C) - ! - USE ISO_C_BINDING - USE komega_parameter, ONLY : nz, resnorm - USE komega_vals_c, ONLY : pi - ! - IMPLICIT NONE - ! - REAL(C_DOUBLE),INTENT(OUT) :: res(nz) - ! - res(1:nz) = resnorm / ABS(pi(1:nz)) - ! -END SUBROUTINE komega_BICG_getresidual -!> -!! Deallocate private arrays -!! -SUBROUTINE komega_BICG_finalize() BIND(C) - ! - USE komega_parameter, ONLY : itermax, lz_conv - USE komega_vals_c, ONLY : alpha_save, beta_save, & - & pi, pi_old, pi_save, z - USE komega_vecs_c, ONLY : p, r_l_save, v3, v5 - ! - IMPLICIT NONE - ! - DEALLOCATE(z, v3, v5, pi, pi_old, p, lz_conv) - ! - IF(itermax > 0) THEN - DEALLOCATE(alpha_save, beta_save, r_l_save, pi_save) - END IF - ! -END SUBROUTINE komega_BICG_finalize -! -END MODULE komega_bicg diff --git a/src/komega/komega_math.F90 b/src/komega/komega_math.F90 deleted file mode 100644 index 48f61562b..000000000 --- a/src/komega/komega_math.F90 +++ /dev/null @@ -1,200 +0,0 @@ -! -! ISSP Math Library - A library for solving linear systems in materials science -! Copyright (C) 2016 Mitsuaki Kawamura -! -! This library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! This library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with this library; if not, write to the Free Software -! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -! -! For more details, See `COPYING.LESSER' in the root directory of this library. -! -MODULE komega_math - ! - IMPLICIT NONE - ! - INTERFACE - ! - REAL(8) FUNCTION ddot(n,dx,incx,dy,incy) - REAL(8) dx(*),dy(*) - INTEGER incx,incy,n - END FUNCTION ddot - ! - COMPLEX(8) FUNCTION zdotc(n,zx,incx,zy,incy) - COMPLEX(8) zx(*),zy(*) - INTEGER incx,incy,n - END FUNCTION zdotc - ! - COMPLEX(8) FUNCTION zdotu(n,zx,incx,zy,incy) - COMPLEX(8) zx(*),zy(*) - INTEGER incx,incy,n - END FUNCTION zdotu - ! - SUBROUTINE dscal(n,da,dx,incx) - REAL(8) da,dx(*) - INTEGER incx,n - END SUBROUTINE dscal - ! - SUBROUTINE zscal(n,za,zx,incx) - COMPLEX(8) za,zx(*) - INTEGER incx,n - END SUBROUTINE zscal - ! - SUBROUTINE dcopy(n,dx,incx,dy,incy) - REAL(8) dx(*),dy(*) - INTEGER incx,incy,n - END SUBROUTINE dcopy - ! - SUBROUTINE zcopy(n,zx,incx,zy,incy) - COMPLEX(8) zx(*),zy(*) - INTEGER incx,incy,n - END SUBROUTINE zcopy - ! - SUBROUTINE daxpy(n,da,dx,incx,dy,incy) - REAL(8) dx(*),dy(*),da - INTEGER incx,incy,n - END SUBROUTINE daxpy - ! - SUBROUTINE zaxpy(n,za,zx,incx,zy,incy) - COMPLEX(8) zx(*),zy(*),za - INTEGER incx,incy,n - END SUBROUTINE zaxpy - ! - END INTERFACE - ! -CONTAINS -! -! ddot with MPI allreduce -! -FUNCTION ddotMPI(n,dx,dy) RESULT(prod) - ! -#if defined(MPI) - use mpi, only : MPI_IN_PLACE, MPI_DOUBLE_PRECISION, MPI_SUM - USE komega_parameter, ONLY : comm, lmpi -#endif - ! - IMPLICIT NONE - ! - INTEGER,INTENT(IN) :: n - REAL(8),INTENT(IN) :: dx(n), dy(n) - REAL(8) prod - ! -#if defined(MPI) - INTEGER :: ierr -#endif - ! - prod = ddot(n,dx,1,dy,1) - ! -#if defined(MPI) - IF(lmpi) & - & call MPI_allREDUCE(MPI_IN_PLACE, prod, 1, & - & MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr) -#endif - ! -END FUNCTION ddotMPI -! -! zdotc with MPI allreduce -! -FUNCTION zdotcMPI(n,zx,zy) RESULT(prod) - ! -#if defined(MPI) - use mpi, only : MPI_IN_PLACE, MPI_DOUBLE_COMPLEX, MPI_SUM - USE komega_parameter, ONLY : comm, lmpi -#endif - ! - IMPLICIT NONE - ! - INTEGER,INTENT(IN) :: n - COMPLEX(8),INTENT(IN) :: zx(n), zy(n) - COMPLEX(8) prod - ! -#if defined(MPI) - INTEGER :: ierr -#endif - ! -#if defined(__NO_ZDOT) - prod = DOT_PRODUCT(zx,zy) -#else - prod = zdotc(n,zx,1,zy,1) -#endif - ! -#if defined(MPI) - IF(lmpi) & - & call MPI_allREDUCE(MPI_IN_PLACE, prod, 1, & - & MPI_DOUBLE_COMPLEX, MPI_SUM, comm, ierr) -#endif - ! -END FUNCTION zdotcMPI -! -! zdotu with MPI allreduce -! -FUNCTION zdotuMPI(n,zx,zy) RESULT(prod) - ! -#if defined(MPI) - use mpi, only : MPI_IN_PLACE, MPI_DOUBLE_COMPLEX, MPI_SUM - USE komega_parameter, ONLY : comm, lmpi -#endif - ! - IMPLICIT NONE - ! - INTEGER,INTENT(IN) :: n - COMPLEX(8),INTENT(IN) :: zx(n), zy(n) - COMPLEX(8) prod - ! -#if defined(MPI) - INTEGER :: ierr -#endif - ! -#if defined(__NO_ZDOT) - prod = SUM(zx(1:n) * zy(1:n)) -#else - prod = zdotu(n,zx,1,zy,1) -#endif - ! -#if defined(MPI) - IF(lmpi) & - & call MPI_allREDUCE(MPI_IN_PLACE, prod, 1, & - & MPI_DOUBLE_COMPLEX, MPI_SUM, comm, ierr) -#endif - ! -END FUNCTION zdotuMPI -! -! MAXVAL with MPI allreduce (for real(8)) -! -FUNCTION dabsmax(array, n) RESULT(maxarray) - ! -#if defined(MPI) - use mpi, only : MPI_IN_PLACE, MPI_DOUBLE_PRECISION, MPI_MAX - USE komega_parameter, ONLY : comm, lmpi -#endif - ! - IMPLICIT NONE - ! - INTEGER,INTENT(IN) :: n - REAL(8),INTENT(IN) :: array(n) - REAL(8) maxarray - ! -#if defined(MPI) - INTEGER :: ierr -#endif - ! - maxarray = MAXVAL(ABS(array)) - ! -#if defined(MPI) - IF(lmpi) & - & call MPI_allREDUCE(MPI_IN_PLACE, maxarray, 1, & - & MPI_DOUBLE_PRECISION, MPI_MAX, comm, ierr) -#endif - ! -END FUNCTION dabsmax -! -end MODULE komega_math diff --git a/src/komega/komega_vals.F90 b/src/komega/komega_vals.F90 deleted file mode 100644 index 83ef4e724..000000000 --- a/src/komega/komega_vals.F90 +++ /dev/null @@ -1,142 +0,0 @@ -! -! ISSP Math Library - A library for solving linear systems in materials science -! Copyright (C) 2016 Mitsuaki Kawamura -! -! This library is free software; you can redistribute it and/or -! modify it under the terms of the GNU Lesser General Public -! License as published by the Free Software Foundation; either -! version 2.1 of the License, or (at your option) any later version. -! -! This library is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -! Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public -! License along with this library; if not, write to the Free Software -! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -! -! For more details, See `COPYING.LESSER' in the root directory of this library. -! -MODULE komega_parameter - ! - IMPLICIT NONE - ! - REAL(8),PARAMETER :: & - & almost0 = 1d-50 - ! - INTEGER,SAVE :: & - & comm, & !< Communicator for MPI - & iz_seed, & !< Index of frequency of seed - & ndim, & !< Dimension of Hamiltonian - & nl, & !< Dimension of projection - & nz, & !< Number of Frequency (Shift) - & itermax, & !< Maximum number of iteration - & iter !< Counter of iteration - ! - LOGICAL,SAVE :: & - & lmpi !< Use MPI or not - ! - REAL(8),SAVE :: & - & threshold, & !< Convergence threshold - & resnorm !< Residual norm - ! - LOGICAL,ALLOCATABLE,SAVE :: & - & lz_conv(:) !< If converged at this frequency -> .TRUE. - ! -#if defined(__KOMEGA_THREAD) - !$OMP THREADPRIVATE(comm, iz_seed, ndim, nl, nz, itermax, & - !$OMP & iter, threshold, resnorm, lz_conv) -#endif - ! -END MODULE komega_parameter -! -!> Variables for CG -!! \f$\alpha\f$ -! -MODULE komega_vals_r - ! - IMPLICIT NONE - ! - REAL(8),SAVE :: z_seed !< Seed frequency - REAL(8),SAVE :: rho !< \f$\rho\f$ of BiCG - REAL(8),SAVE :: alpha !< \f$\alpha\f$ of BiCG - REAL(8),SAVE :: alpha_old !< \f$\alpha\f$ at the previous step - REAL(8),SAVE :: beta !< \f$\beta\f$ of BiCG - ! - REAL(8),ALLOCATABLE,SAVE :: z(:) !< Frequencies - REAL(8),ALLOCATABLE,SAVE :: pi(:) !< \f$\pi\f$ of BiCG - REAL(8),ALLOCATABLE,SAVE :: pi_old(:) !< \f$\pi\f$ at the previous step - REAL(8),ALLOCATABLE,SAVE :: pi_save(:,:) !< \f$\pi\f$ saved for the restart - REAL(8),ALLOCATABLE,SAVE :: alpha_save(:) !< \f$\alpha\f$ saved for restart - REAL(8),ALLOCATABLE,SAVE :: beta_save(:) !< \f$\beta\f$ saved for restart - ! -#if defined(__KOMEGA_THREAD) - !$OMP THREADPRIVATE(z_seed, rho, alpha, alpha_old, beta, z, pi, & - !$OMP & pi_old, pi_save, alpha_save, beta_save) -#endif - ! -END MODULE komega_vals_r -! -! -! -MODULE komega_vecs_r - ! - IMPLICIT NONE - ! - REAL(8),ALLOCATABLE,SAVE :: & - & v3(:), & - & p(:,:), & - & r_l_save(:,:) - ! -#if defined(__KOMEGA_THREAD) - !$OMP THREADPRIVATE(v3, p, r_l_save) -#endif - ! -END MODULE komega_vecs_r -! -! -! -MODULE komega_vals_c - ! - IMPLICIT NONE - ! - COMPLEX(8),SAVE :: & - & z_seed, & - & rho, & - & alpha, & - & alpha_old, & - & beta - ! - COMPLEX(8),ALLOCATABLE,SAVE :: & - & z(:), & - & pi(:), & - & pi_old(:), & - & pi_save(:,:), & - & alpha_save(:), & - & beta_save(:) - ! -#if defined(__KOMEGA_THREAD) - !$OMP THREADPRIVATE(z_seed, rho, alpha, alpha_old, beta, z, pi, pi_old, & - !$OMP & pi_save, alpha_save, beta_save) -#endif - ! -END MODULE komega_vals_c -! -! -! -MODULE komega_vecs_c - ! - IMPLICIT NONE - ! - COMPLEX(8),ALLOCATABLE,SAVE :: & - & v3(:), & - & v5(:), & - & p(:,:), & - & r_l_save(:,:) - ! -#if defined(__KOMEGA_THREAD) - !$OMP THREADPRIVATE(v3, v5, p, r_l_save) -#endif - ! -END MODULE komega_vecs_c diff --git a/src/komega/makefile_komega b/src/komega/makefile_komega deleted file mode 100644 index 0994f9f0d..000000000 --- a/src/komega/makefile_komega +++ /dev/null @@ -1,23 +0,0 @@ -include ../make.sys - -.SUFFIXES : -.SUFFIXES : .o .F90 - -OBJS = \ -komega_bicg.o \ -komega_math.o \ -komega_vals.o - -libkomega.a:$(OBJS) - ar -r -v $(AROPT) libkomega.a $(OBJS) - -.F90.o: - $(F90) -c $< $(FFLAGS) - -clean: - rm -f *.o *.a *.mod - -komega_bicg.o:komega_math.o -komega_bicg.o:komega_vals.o -komega_math.o:komega_vals.o - From 6c591ad486094384fc7a473e843363dc94ceb373 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Wed, 7 Jun 2023 11:31:47 +0900 Subject: [PATCH 44/50] Update manual for dynamical green's function. dynamicalr2k, greenr2k: temperature dependent --- doc/ja/source/algorithm/DynamicalGreen_ja.rst | 31 ++- doc/ja/source/algorithm/Partition_ja.rst | 53 ++++ doc/ja/source/algorithm/al-index.rst | 2 +- .../expertmode_ja/PairExcitation_file_ja.rst | 82 +++++-- .../SingleExcitation_file_ja.rst | 54 ++-- src/StdFace | 2 +- tool/dynamicalr2k.F90 | 232 ++++++++++++++---- tool/greenr2k.F90 | 103 +++++++- 8 files changed, 454 insertions(+), 105 deletions(-) create mode 100644 doc/ja/source/algorithm/Partition_ja.rst diff --git a/doc/ja/source/algorithm/DynamicalGreen_ja.rst b/doc/ja/source/algorithm/DynamicalGreen_ja.rst index 72a8ca038..faa211f4e 100644 --- a/doc/ja/source/algorithm/DynamicalGreen_ja.rst +++ b/doc/ja/source/algorithm/DynamicalGreen_ja.rst @@ -1,27 +1,38 @@ .. highlight:: none -動的グリーン関数 ----------------- +動的Green関数 +------------- -:math:`{\cal H}\Phi`\ では励起状態\ :math:`|\Phi ' \rangle = \hat{O} | \Phi _0 \rangle`\ に対する動的関数 +:math:`{\cal H}\Phi`\ では動的関数 -.. math:: I(z) = \langle \Phi ' | \frac{1}{ {\cal H}- z\hat{I} } | \Phi '\rangle +.. math:: G_n^{O_l,O_r}(z) = \langle \Phi_n | \hat{O}_l (z + E_n - \hat{\cal H})^{-1} \hat{O}_r| \Phi_n \rangle -を計算することができます。 演算子\ :math:`\hat{O}`\ はシングル励起状態 +を計算することができます。 演算子\ :math:`\hat{O}_{l,r}`\ はシングル励起状態 -.. math:: \sum_{i, \sigma_1} A_{i \sigma_1} c_{i \sigma_1} (c_{i\sigma_1}^{\dagger}) +.. math:: \sum_{i, \sigma_1} A_{i \sigma_1} c_{i \sigma_1} \quad \textrm{or} \quad \sum_{i, \sigma_1} A_{i \sigma_1} c_{i\sigma_1}^{\dagger} およびペア励起状態 -.. math:: \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i \sigma_1}c_{j \sigma_2}^{\dagger} (c_{i\sigma_1}^{\dagger}c_{j\sigma_2}) +.. math:: \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i \sigma_1}c_{j \sigma_2}^{\dagger} \quad \textrm{or} \quad + \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i\sigma_1}^{\dagger}c_{j\sigma_2} として、それぞれ定義することが出来ます。例えば、動的スピン感受率を計算する場合はペア励起演算子を用い -.. math:: \hat{O} = \hat{S}({\bf k}) = \sum_{j}\hat{S}_j^z e^{i {\bf k} \cdot \bf {r}_j} = \sum_{j}\frac{1}{2} (c_{j\uparrow}^{\dagger}c_{j\uparrow}-c_{j\downarrow}^{\dagger}c_{j\downarrow})e^{i {\bf k} \cdot \bf {r}_j} +.. math:: \hat{O}_r = \hat{S}_{\textbf{R}=\textbf{0}}^z = \frac{1}{2} (c_{\textbf{0}\uparrow}^{\dagger}c_{\textbf{0}\uparrow}-c_{\textbf{0}\downarrow}^{\dagger}c_{\textbf{0}\downarrow}) + \\ + \hat{O}_l = \hat{S}_{\textbf{R}}^z = \frac{1}{2} (c_{\textbf{R}\uparrow}^{\dagger}c_{\textbf{R}\uparrow}-c_{\textbf{R}\downarrow}^{\dagger}c_{\textbf{R}\downarrow}) -のように定義することで計算することができます。 -なお、動的グリーン関数の計算には、Lanczos法を用いた連分数展開による解法 [1]_ とシフト型クリロフ理論による解法 [2]_ の2つが実装されています。 +として、:math:`G_n^{O_l,O_r}(z)\equiv G_n^{\textbf{R}}(z)` を計算し、ポストプロセスで + +.. math:: G_n^{\textbf{k}}(z) \equiv \sum_{\textbf{R}} \exp(i\textbf{k}\cdot\textbf{R}) G_n^{\textbf{R}}(z) + +のようにFourier変換を行い計算することができます。 +なお、動的関数の計算には、Lanczos法を用いた連分数展開による解法 [1]_ 、シフト型クリロフ理論による解法 [2]_ 、およびLehmann表示による動的関数 + +.. math:: G_n^{O_l,O_r}(z) = \sum_{m} \frac{\langle \Phi_n | \hat{O}_l | \Phi_m \rangle \langle \Phi_m |\hat{O}_r| \Phi_n \rangle}{z + E_n - E_m} + +を全対角化により直接計算する手法の3つが実装されています。 詳細については各文献を参照してください。 .. [1] \E. Dagotto, Rev. Mod. Phys. **66**, 763-840 (1994). diff --git a/doc/ja/source/algorithm/Partition_ja.rst b/doc/ja/source/algorithm/Partition_ja.rst new file mode 100644 index 000000000..0af02b08d --- /dev/null +++ b/doc/ja/source/algorithm/Partition_ja.rst @@ -0,0 +1,53 @@ +.. highlight:: none + +.. _Sec:sec_partion_function: + +分配関数と有限温度物理量 +------------------------ + +分配関数 + +.. math:: + + Z(T) &= \sum_{i=1}^N \exp\left(-\frac{E_i}{T}\right) + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + 1 + \exp\left(-\frac{E_2-E_1}{T}\right)+ \exp\left(-\frac{E_3-E_1}{T}\right) + \cdots + + \exp\left(-\frac{E_N-E_1}{T}\right) + \right] + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + 1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ + 1 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ + 1 + \dots + \left[ + 1 + \exp\left(-\frac{E_N-E_{N-1}}{T}\right) + \right] + \right] + \right] + \right] + +有限温度物理量 + +.. math:: + + O(T) &= \frac{1}{Z(T)}\sum_i O_i \exp\left(-\frac{E_i}{T}\right) + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + O_1 + O_2 \exp\left(-\frac{E_2-E_1}{T}\right) + O_3\exp\left(-\frac{E_3-E_1}{T}\right) + \cdots + + O_N\exp\left(-\frac{E_N-E_1}{T}\right) + \right] + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + O_1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ + O_2 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ + O_3 + \dots + \left[ + O_{N-1} + O_N\exp\left(-\frac{E_N-E_{N-1}}{T}\right) + \right] + \right] + \right] + \right] + diff --git a/doc/ja/source/algorithm/al-index.rst b/doc/ja/source/algorithm/al-index.rst index ea65139cf..c1ab06481 100644 --- a/doc/ja/source/algorithm/al-index.rst +++ b/doc/ja/source/algorithm/al-index.rst @@ -11,4 +11,4 @@ DynamicalGreen_ja Realtime_ja Bogoliubov_ja - + Partition_ja diff --git a/doc/ja/source/filespecification/expertmode_ja/PairExcitation_file_ja.rst b/doc/ja/source/filespecification/expertmode_ja/PairExcitation_file_ja.rst index e28ebeb62..8fce494e6 100644 --- a/doc/ja/source/filespecification/expertmode_ja/PairExcitation_file_ja.rst +++ b/doc/ja/source/filespecification/expertmode_ja/PairExcitation_file_ja.rst @@ -5,22 +5,52 @@ PairExcitation指定ファイル ~~~~~~~~~~~~~~~~~~~~~~~~~~ -二体励起状態を作成するための演算子\ :math:`c_{i\sigma_1}c_{j\sigma_2}^{\dagger}(c_{i\sigma_1}^{\dagger}c_{j\sigma_2})`\ を定義します。なお、\ :math:`c_{i\sigma_1}c_{j\sigma_2}^{\dagger}`\ と\ :math:`c_{i\sigma_1}^{\dagger}c_{j\sigma_2}`\ を混同することは出来ません。また、\ :math:`S_z`\ 保存の系に対しては\ :math:`\sigma_1=\sigma_2`\ とする必要があります。 +動的関数 + +.. math:: G_n^{O_l,O_r}(z) = \langle \Phi_n | \hat{O}_l (z + E_n - \hat{\cal H})^{-1} \hat{O}_r| \Phi_n \rangle + +において、\ :math:`\hat{O}_{l,r}`\ として二体励起状態を作成するための演算子 + +.. math:: \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i \sigma_1}c_{j \sigma_2}^{\dagger} \quad \textrm{or} \quad + \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i\sigma_1}^{\dagger}c_{j\sigma_2} + +を定義します。 +ひとつの\ :math:`\hat{O}_r`\ と複数(1個以上)の\ :math:`\hat{O}_l`\ を指定することにより、効率よく計算を行うことが可能です。 +なお、\ :math:`c_{i\sigma_1}c_{j\sigma_2}^{\dagger}`\ と\ :math:`c_{i\sigma_1}^{\dagger}c_{j\sigma_2}`\ を混同することは出来ません。 以下にファイル例を記載します。 +この例では、 + +.. math:: + + \hat{O}_r = \hat{S}_{\textbf{R}=\textbf{0}}^z = \frac{1}{2} (c_{\textbf{0}\uparrow}^{\dagger}c_{\textbf{0}\uparrow}-c_{\textbf{0}\downarrow}^{\dagger}c_{\textbf{0}\downarrow}) + \\ + \hat{O}_l = \hat{S}_{\textbf{R}}^z = \frac{1}{2} (c_{\textbf{R}\uparrow}^{\dagger}c_{\textbf{R}\uparrow}-c_{\textbf{R}\downarrow}^{\dagger}c_{\textbf{R}\downarrow}) + +としています。 :: - =============================== - NPair 24 - =============================== - ======== Pair Excitation ====== - =============================== - 0 0 0 0 0 1.0 0.0 - 0 1 0 1 0 1.0 0.0 - 1 0 1 0 0 1.0 0.0 - (continue...) - 11 0 11 0 0 1.0 0.0 - 11 1 11 1 0 1.0 0.0 + ============================================= + NPair 9 + ============================================= + =============== Pair Excitation ============= + ============================================= + 2 + 0 0 0 0 1 -0.500000000000000 0.0 + 0 1 0 1 1 0.500000000000000 0.0 + 2 + 0 0 0 0 1 -0.500000000000000 0.0 + 0 1 0 1 1 0.500000000000000 0.0 + 2 + 1 0 1 0 1 -0.500000000000000 0.0 + 1 1 1 1 1 0.500000000000000 0.0 + 2 + 2 0 2 0 1 -0.500000000000000 0.0 + 2 1 2 1 1 0.500000000000000 0.0 + 2 + 3 0 3 0 1 -0.500000000000000 0.0 + 3 1 3 1 1 0.500000000000000 0.0 + : ファイル形式 ^^^^^^^^^^^^ @@ -34,7 +64,16 @@ PairExcitation指定ファイル - 3-5行: ヘッダ(何が書かれても問題ありません)。 - 6行以降: - [int02] [int03] [int04] [int05] [int06] [double01] [double02] + + :: + + [int02] + [int03] [int04] [int05] [int06] [int07] [double01] [double02] + : + [int02]個繰り返し + + というブロックを[int01]個繰り返す。 + 1個目のブロックが\ :math:`\hat{O}_{r}`\ 、その後が\ :math:`\hat{O}_{l}`\ を表す。 パラメータ ^^^^^^^^^^ @@ -49,16 +88,23 @@ PairExcitation指定ファイル **形式 :** int型 (空白不可) - **説明 :** 二体励起演算子の総数を指定します。 + **説明 :** 演算子\ :math:`\hat{O}_{r}`\ と\ :math:`\hat{O}_{l}`\ の数を合わせた総数を指定します。 + 上の場合は1個の\ :math:`\hat{O}_{r}`\ と8個の\ :math:`\hat{O}_{l}`\ を合わせた9個となります。 -- :math:`[`\ int02\ :math:`]`, :math:`[`\ int04\ :math:`]` +- :math:`[`\ int02\ :math:`]` + + **形式 :** int型 (空白不可) + + **説明 :** 各演算子\ :math:`\hat{O}_{r,l}`\ に含まれる項数を指定します。 + +- :math:`[`\ int03\ :math:`]`, :math:`[`\ int05\ :math:`]` **形式 :** int型 (空白不可) **説明 :** サイト番号を指定する整数。0以上\ ``Nsite``\ 未満で指定します。 -- :math:`[`\ int03\ :math:`]`, :math:`[`\ int05\ :math:`]` +- :math:`[`\ int04\ :math:`]`, :math:`[`\ int06\ :math:`]` **形式 :** int型 (空白不可) @@ -70,7 +116,7 @@ PairExcitation指定ファイル (:math:`-S-0.5, -S+0.5, \cdots, S+0.5`\ に対応\ :math:`)` | を選択することが出来ます。 -- :math:`[`\ int06\ :math:`]` +- :math:`[`\ int07\ :math:`]` **形式 :** int型 (空白不可) @@ -84,7 +130,7 @@ PairExcitation指定ファイル **形式 :** double型 (空白不可) **説明 :** - :math:`c_{i\sigma_1}c_{j\sigma_2}^{\dagger} ( c_{i\sigma_1}^{\dagger}c_{j\sigma_2})`\ の実部を\ :math:`[`\ double01\ :math:`]`\ 、虚部を\ :math:`[`\ double02\ :math:`]`\ でそれぞれ指定します。 + :math:`A_{i \sigma_1 j \sigma_2}`\ の実部を\ :math:`[`\ double01\ :math:`]`\ 、虚部を\ :math:`[`\ double02\ :math:`]`\ でそれぞれ指定します。 使用ルール ^^^^^^^^^^ diff --git a/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst b/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst index 1557e92b4..be5214a40 100644 --- a/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst +++ b/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst @@ -5,21 +5,34 @@ SingleExcitation指定ファイル ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -一体励起状態を作成するための演算子\ :math:`c_{i\sigma_1}(c_{i\sigma_1}^{\dagger})`\ を定義します。以下にファイル例を記載します。 +動的関数 + +.. math:: G_n^{O_l,O_r}(z) = \langle \Phi_n | \hat{O}_l (z + E_n - \hat{\cal H})^{-1} \hat{O}_r| \Phi_n \rangle + +において、演算子\ :math:`\hat{O}_{l,r}`\ を一体励起演算子 + +.. math:: \sum_{i, \sigma_1} A_{i \sigma_1} c_{i \sigma_1} \quad \textrm{or} \quad \sum_{i, \sigma_1} A_{i \sigma_1} c_{i\sigma_1}^{\dagger} + +として定義します。 +ひとつの\ :math:`\hat{O}_r`\ と複数(1個以上)の\ :math:`\hat{O}_l`\ を指定することにより、効率よく計算を行うことが可能です。 +以下にファイル例を記載します。 :: =============================== - NSingle 24 + NSingle 13 =============================== ======== Single Excitation ====== =============================== - 0 0 0 1.0 0.0 - 0 1 0 1.0 0.0 - 1 0 0 1.0 0.0 - (continue...) - 11 0 0 1.0 0.0 - 11 1 0 1.0 0.0 + 1 + 0 0 0 1.0 0.0 + 1 + 0 0 0 1.0 0.0 + 1 + 1 0 0 1.0 0.0 + (continue...) + 1 + 11 0 0 1.0 0.0 ファイル形式 ^^^^^^^^^^^^ @@ -32,7 +45,11 @@ SingleExcitation指定ファイル - 3-5行: ヘッダ(何が書かれても問題ありません)。 -- 6行以降: [int02] [int03] [int04] [double01] [double02] +- 6行以降: + :: + + [int02] + [int03] [int04] [int05] [double01] [double02] パラメータ ^^^^^^^^^^ @@ -47,16 +64,23 @@ SingleExcitation指定ファイル **形式 :** int型 (空白不可) - **説明 :** 一体励起演算子の総数を指定します。 + **説明 :** 演算子\ :math:`\hat{O}_{r}`\ と\ :math:`\hat{O}_{l}`\ の数を合わせた総数を指定します。 + 上の場合は1個の\ :math:`\hat{O}_{r}`\ と12個の\ :math:`\hat{O}_{l}`\ を合わせた13個となります。 - :math:`[`\ int02\ :math:`]` **形式 :** int型 (空白不可) + **説明 :** 一体励起演算子の総数を指定します。 + +- :math:`[`\ int03\ :math:`]` + + **形式 :** int型 (空白不可) + **説明 :** サイト番号を指定する整数。0以上\ ``Nsite``\ 未満で指定します。 -- :math:`[`\ int03\ :math:`]` +- :math:`[`\ int04\ :math:`]` **形式 :** int型 (空白不可) @@ -68,7 +92,7 @@ SingleExcitation指定ファイル (:math:`-S-0.5, -S+0.5, \cdots, S+0.5`\ に対応\ :math:`)` | を選択することが出来ます。 -- :math:`[`\ int04\ :math:`]` +- :math:`[`\ int05\ :math:`]` **形式 :** int型 (空白不可) @@ -82,7 +106,7 @@ SingleExcitation指定ファイル **形式 :** double型 (空白不可) **説明 - :**\ :math:`c_{i\sigma_1}(c_{i\sigma_1}^{\dagger})`\ の実部を\ :math:`[`\ double01\ :math:`]`\ 、虚部を\ :math:`[`\ double02\ :math:`]`\ でそれぞれ指定します。 + :**\ :math:`A_{i \sigma_1}`\ の実部を\ :math:`[`\ double01\ :math:`]`\ 、虚部を\ :math:`[`\ double02\ :math:`]`\ でそれぞれ指定します。 使用ルール ^^^^^^^^^^ @@ -93,9 +117,9 @@ SingleExcitation指定ファイル - 成分が重複して指定された場合にはエラー終了します。 -- :math:`[`\ int01\ :math:`]`\ と定義されている一体励起演算子の総数が異なる場合はエラー終了します。 +- :math:`[`\ int01\ :math:`]`\ 、:math:`[`\ int02\ :math:`]`\ と定義されている一体励起演算子の総数が異なる場合はエラー終了します。 -- :math:`[`\ int02\ :math:`]`-:math:`[`\ int04\ :math:`]`\ を指定する際、範囲外の整数を指定した場合はエラー終了します。 +- :math:`[`\ int03\ :math:`]`-:math:`[`\ int05\ :math:`]`\ を指定する際、範囲外の整数を指定した場合はエラー終了します。 .. raw:: latex diff --git a/src/StdFace b/src/StdFace index 8e2db166f..a66e232f3 160000 --- a/src/StdFace +++ b/src/StdFace @@ -1 +1 @@ -Subproject commit 8e2db166fd6472a9681745a9c69c252917a6fc7c +Subproject commit a66e232f3a40fefdd542f9997d5698a8440f674c diff --git a/tool/dynamicalr2k.F90 b/tool/dynamicalr2k.F90 index acd8e9030..1fa15d084 100644 --- a/tool/dynamicalr2k.F90 +++ b/tool/dynamicalr2k.F90 @@ -3,6 +3,8 @@ MODULE dynamical_val IMPLICIT NONE ! INTEGER,SAVE :: & + & ntemp, & ! Number of temperature + & nwfc, & & nomega, & & nk_line, & ! Numberof along each k line & nnode, & ! Number of node of k-path @@ -30,13 +32,15 @@ MODULE dynamical_val & indx(:,:) ! (nr,norb) Mapping index for each Correlation function ! REAL(8),ALLOCATABLE,SAVE :: & + & temp(:), & ! (ntemp) Temperature + & energy(:), & ! (nwfc) energy eigenvalue & knode(:,:), & ! (3,nnode) Nodes of k path & phase(:,:), & ! (125,nr) Boundary phase & kvec(:,:) ! (3,nk) k-vector in the 1st BZ ! COMPLEX(8),ALLOCATABLE,SAVE :: & - & cor(:,:,:), & ! (nr,norb,nomega) Correlation function in real space - & cor_k(:,:,:) ! (nk,norb,nomega) Correlation function in the k-space + & cor(:,:,:,:), & ! (nr,norb,nomega,nwfc) Correlation function in real space + & cor_k(:,:,:,:) ! (nk,norb,nomega,nwfc) Correlation function in the k-space ! CHARACTER(256),ALLOCATABLE :: & & kname(:) ! (nnode) Label of k-point node @@ -68,12 +72,12 @@ END SUBROUTINE key2lower ! SUBROUTINE read_filename() ! - USE dynamical_val, ONLY : file_gindx, filehead, nsite, omegamin, omegamax, nomega + USE dynamical_val, ONLY : file_gindx, filehead, nsite, omegamin, & + & omegamax, nomega, nwfc, energy IMPLICIT NONE ! - INTEGER :: fi = 10 - CHARACTER(256) :: modpara, keyname, namelist - REAL(8) :: eig0 + INTEGER :: fi = 10, iwfc, calctype + CHARACTER(256) :: modpara, keyname, namelist, calcmod ! WRITE(*,*) WRITE(*,*) "##### Read HPhi Input Files #####" @@ -97,6 +101,8 @@ SUBROUTINE read_filename() READ(fi,*) keyname, file_gindx ELSE IF(TRIM(ADJUSTL(keyname)) == "modpara") THEN READ(fi,*) keyname, modpara + ELSE IF(TRIM(ADJUSTL(keyname)) == "calcmod") THEN + READ(fi,*) keyname, calcmod ELSE READ(fi,*) keyname END IF @@ -108,6 +114,28 @@ SUBROUTINE read_filename() ! WRITE(*,*) " Excitation Index file : ", TRIM(ADJUSTL(file_gindx)) WRITE(*,*) " ModPara file : ", TRIM(ADJUSTL(modpara)) + WRITE(*,*) " CalcMod file : ", TRIM(ADJUSTL(calcmod)) + ! + ! Read from CalcMod file + ! + OPEN(fi,file = TRIM(calcmod)) + ! + DO + READ(fi,*,END=30) keyname + BACKSPACE(fi) + CALL key2lower(keyname) + ! + IF(TRIM(ADJUSTL(keyname)) == "calctype") THEN + READ(fi,*) keyname, calctype + ELSE + READ(fi,*) keyname + END IF + END DO + ! +30 CONTINUE + WRITE(*,*) " Read from ", TRIM(calcmod) + WRITE(*,*) " CalcType : ", calctype + CLOSE(FI) ! ! Read from Modpara file ! @@ -128,29 +156,53 @@ SUBROUTINE read_filename() READ(fi,*) keyname, omegamax ELSE IF(TRIM(ADJUSTL(keyname)) == "omegamin") THEN READ(fi,*) keyname, omegamin + ELSE IF(TRIM(ADJUSTL(keyname)) == "exct") THEN + READ(fi,*) keyname, nwfc ELSE READ(fi,*) keyname END IF END DO ! 20 CONTINUE + ! + ! FullDiag + ! + IF(calctype == 2) THEN + OPEN(fi, file = "output/CHECK_Memory.dat") + READ(fi, '(25x,i16)') nwfc + CLOSE(fi) + END IF + ! WRITE(*,*) " Read from ", TRIM(modpara) WRITE(*,*) " FileHead : ", TRIM(ADJUSTL(filehead)) WRITE(*,*) " Number of site : ", nsite WRITE(*,*) " Number of omega : ", nomega WRITE(*,*) " Minimum Omega : ", omegamin WRITE(*,*) " Maximum Omega : ", omegamax + WRITE(*,*) " Number of states : ", nwfc CLOSE(FI) ! filehead = "output/" // TRIM(ADJUSTL(filehead)) + ALLOCATE(energy(nwfc)) ! - OPEN(fi,file = TRIM(filehead)//"_energy.dat") - READ(fi,*) keyname - READ(fi,*) keyname, eig0 - CLOSE(fi) - WRITE(*,*) " Minimum energy : ", eig0 - !omegamin = omegamin - eig0 - !omegamax = omegamax - eig0 + IF(calctype == 2) THEN + OPEN(fi,file = "output/Eigenvalue.dat") + DO iwfc = 1, nwfc + READ(fi,*) keyname, energy(iwfc) + WRITE(*,*) " Energy ", iwfc, " : ", energy(iwfc) + END DO + CLOSE(fi) + ELSE + OPEN(fi,file = TRIM(filehead)//"_energy.dat") + DO iwfc = 1, nwfc + READ(fi,*) keyname + READ(fi,*) keyname, energy(iwfc) + READ(fi,*) keyname + READ(fi,*) keyname + WRITE(*,*) " Energy ", iwfc, " : ", energy(iwfc) + END DO + CLOSE(fi) + END IF ! END SUBROUTINE read_filename ! @@ -158,8 +210,8 @@ END SUBROUTINE read_filename ! SUBROUTINE read_geometry() ! - USE dynamical_val, ONLY : recipr, box, nsite, phase, irv, rindx, orb, & - & nr, nreq, norb, nnode, knode, nk_line, kname + USE dynamical_val, ONLY : recipr, box, nsite, phase, irv, rindx, orb, ntemp, & + & nr, nreq, norb, nnode, knode, nk_line, kname, temp IMPLICIT NONE ! INTEGER :: fi = 10, isite, ii, ir, ipiv(3), irv0(3), i1, i2, i3, inode @@ -236,6 +288,16 @@ SUBROUTINE read_geometry() WRITE(*,'(a,a,3f10.5)') " ", TRIM(kname(inode)), knode(1:3,inode) END DO ! + ! (Optional) Temperature + ! + READ(fi,*,iostat=ii) ntemp + IF(ii == 0) THEN + ALLOCATE(temp(ntemp)) + READ(fi,*) temp(1:ntemp) + ELSE + ntemp = 0 + END IF + ! CLOSE(fi) ! ! Compute Reciprocal Lattice Vector @@ -369,35 +431,41 @@ END SUBROUTINE read_corrindx ! SUBROUTINE read_corrfile() ! - USE dynamical_val, ONLY : filehead, ncor, indx, cor, norb, nr, nomega + USE dynamical_val, ONLY : filehead, ncor, indx, cor, norb, nr, nomega, nwfc IMPLICIT NONE ! - INTEGER :: fi = 10, icor, iorb, ir, iomega + INTEGER :: fi = 10, icor, iorb, ir, iomega, iwfc COMPLEX(8),ALLOCATABLE :: cor0(:,:) REAL(8) :: dtmp(4) + CHARACTER(256) :: fname ! - ALLOCATE(cor(nr,norb,nomega)) + ALLOCATE(cor(nr,norb,nomega,nwfc)) ALLOCATE(cor0(nomega,ncor)) - cor(1:nr,1:norb,1:nomega) = CMPLX(0d0, 0d0, KIND(1d0)) - ! - OPEN(fi, file = TRIM(filehead) // "_DynamicalGreen_0.dat") - ! - DO icor = 1, ncor - DO iomega = 1, nomega + cor(1:nr,1:norb,1:nomega,1:nwfc) = CMPLX(0d0, 0d0, KIND(1d0)) + ! + DO iwfc = 1, nwfc + ! + WRITE(fname,'(a,a,i0,a)') TRIM(filehead), "_DynamicalGreen_", iwfc-1, ".dat" + OPEN(fi, file = TRIM(fname)) + ! + DO icor = 1, ncor + DO iomega = 1, nomega READ(fi,*) dtmp(1:4) cor0(iomega,icor) = CMPLX(dtmp(3), dtmp(4), KIND(1d0)) - END DO - END DO - ! - CLOSE(fi) - ! - ! Map it into Up-Up(1) and Down-Down(2) Correlation - ! - DO iorb = 1, norb - DO ir = 1, nr - cor(ir, iorb, 1:nomega) = cor0(1:nomega, indx(ir, iorb)) - END DO - END DO + END DO + END DO + ! + CLOSE(fi) + ! + ! Map it into Up-Up(1) and Down-Down(2) Correlation + ! + DO iorb = 1, norb + DO ir = 1, nr + cor(ir, iorb, 1:nomega, iwfc) = cor0(1:nomega, indx(ir, iorb)) + END DO + END DO + ! + END DO ! iwfc ! DEALLOCATE(cor0, indx) ! @@ -407,14 +475,15 @@ END SUBROUTINE read_corrfile ! SUBROUTINE dynamical_cor() ! - USE dynamical_val, ONLY : cor, cor_k, kvec, nk, nr, nreq, norb, irv, phase, nomega + USE dynamical_val, ONLY : cor, cor_k, kvec, nk, nr, nreq, norb, irv, & + & phase, nomega, nwfc IMPLICIT NONE ! INTEGER :: ik, ir, ireq REAL(8) :: tpi = 2.0 * ACOS(-1d0), theta COMPLEX(8),ALLOCATABLE :: fmat(:,:) ! - ALLOCATE(fmat(nk,nr), cor_k(nk,norb,nomega)) + ALLOCATE(fmat(nk,nr), cor_k(nk,norb,nomega,nwfc)) ! ! Matirx for Fourier trans. exp(-i k R) ! @@ -430,7 +499,7 @@ SUBROUTINE dynamical_cor() END DO ! ir = 1, nr END DO ! ik = 1, nk ! - CALL zgemm('N', 'N', nk, norb*nomega, nr, CMPLX(1d0, 0d0, KIND(1d0)), fmat, nk, & + CALL zgemm('N', 'N', nk, norb*nomega*nwfc, nr, CMPLX(1d0, 0d0, KIND(1d0)), fmat, nk, & & cor, nr, CMPLX(0d0,0d0,KIND(1d0)), cor_k, nk) ! DEALLOCATE(fmat, cor) @@ -441,13 +510,15 @@ END SUBROUTINE dynamical_cor ! SUBROUTINE output_cor() ! - USE dynamical_val, ONLY : cor_k, nk, nnode, knode, nk_line, kname, norb, & - & recipr, filehead, nomega, omegamin, omegamax + USE dynamical_val, ONLY : cor_k, nk, nnode, knode, nk_line, kname, norb, ntemp, energy, & + & recipr, filehead, nomega, omegamin, omegamax, nwfc, temp IMPLICIT NONE ! - INTEGER :: fo = 20, ik, inode, ikk, iomega - REAL(8) :: dk(3), dk_cart(3), xk(nk), & + INTEGER :: fo = 20, ik, inode, ikk, iomega, iwfc, itemp, ndeg + REAL(8) :: dk(3), dk_cart(3), xk(nk), Zpart, & & xk_label(nnode), klength, omega + COMPLEX(8),ALLOCATABLE :: cor_t(:,:,:) + CHARACTER(256) :: fname ! ! Compute x-position for plotting band ! @@ -471,20 +542,75 @@ SUBROUTINE output_cor() WRITE(*,*) "##### Output Files #####" WRITE(*,*) ! - WRITE(*,*) " Correlation in k-space : ", TRIM(filehead) // "_dyn.dat" - ! - OPEN(fo, file = TRIM(filehead) // "_dyn.dat") + DO iwfc = 1, nwfc + ! + WRITE(fname,'(a,a,i0,a)') TRIM(filehead), "_dyn", iwfc-1, ".dat" + WRITE(*,*) " Correlation in k-space : ", TRIM(fname) + ! + OPEN(fo, file = TRIM(fname)) + ! + DO ik = 1, nk + DO iomega = 1, nomega + omega = (omegamax - omegamin) * DBLE(iomega - 1) / DBLE(nomega) + omegamin + WRITE(fo,'(1000e15.5)') xk(ik), omega, cor_k(ik, 1:norb, iomega, iwfc) + END DO + WRITE(fo,*) + WRITE(fo,*) + END DO + ! + CLOSE(fo) + ! + END DO ! - DO ik = 1, ikk - DO iomega = 1, nomega + ! Output temperature dependent correlation function + ! + ALLOCATE(cor_t(nk,norb,nomega)) + ! + DO itemp = 1, ntemp + ! + IF(temp(itemp) < 1.0d-8) THEN + ! + ! Zero temperature + ! + ndeg = COUNT(energy(1:nwfc)-energy(1) < 1.0d-5) + cor_t(:,:,:) = SUM(cor_k(:,:,:,1:ndeg), 4) / dble(ndeg) + ! + ELSE + ! + Zpart = 1.0d0 + cor_t(:,:,:) = cor_k(:,:,:,nwfc) + DO iwfc = nwfc, 2, -1 + Zpart = 1.0d0 + exp(-(energy(iwfc)-energy(iwfc-1))/temp(itemp))*Zpart + cor_t(:,:,:) = cor_k(:,:,:,iwfc-1) & + & + exp(-(energy(iwfc)-energy(iwfc-1))/temp(itemp))*cor_t(:,:,:) + END DO + cor_t(:,:,:) = cor_t(:,:,:) / Zpart + ! + END IF + ! + WRITE(fname,'(a,a,i0,a)') TRIM(filehead), "_dyn_t", itemp, ".dat" + WRITE(*,*) " Correlation in k-space : ", TRIM(fname) + ! + OPEN(fo, file = TRIM(fname)) + ! + WRITE(fo,'(a,e15.5)') "#Temperature: ", temp(itemp) + ! + DO ik = 1, nk + DO iomega = 1, nomega omega = (omegamax - omegamin) * DBLE(iomega - 1) / DBLE(nomega) + omegamin - WRITE(fo,'(1000e15.5)') xk(ik), omega, cor_k(ik, 1:norb, iomega) - END DO - WRITE(fo,*) - WRITE(fo,*) + WRITE(fo,'(1000e15.5)') xk(ik), omega, cor_t(ik, 1:norb, iomega) + END DO + WRITE(fo,*) + WRITE(fo,*) + END DO + ! + CLOSE(fo) + ! END DO ! - CLOSE(fo) + DEALLOCATE(cor_t) + ! + ! Gnuplot setting file ! OPEN(fo, file = "kpath.gp") ! diff --git a/tool/greenr2k.F90 b/tool/greenr2k.F90 index 1472978f3..8cc3d5809 100644 --- a/tool/greenr2k.F90 +++ b/tool/greenr2k.F90 @@ -3,6 +3,7 @@ MODULE fourier_val IMPLICIT NONE ! INTEGER,SAVE :: & + & ntemp, & ! Number of temperature & interval, & & numave, & & nkg(3), & ! k-grid for momentum ditribution @@ -35,6 +36,8 @@ MODULE fourier_val & indx(:,:,:,:) ! (nr,8,norb,norb) Mapping index for each Correlation function ! REAL(8),ALLOCATABLE,SAVE :: & + & temp(:), & ! (ntemp) Temperature + & energy(:), & ! (nwfc) energy eigenvalue & knode(:,:), & ! (3,nnode) Nodes of k path & phase(:,:), & ! (125,nr) Boundary phase & kvec(:,:) ! (3,nk) k-vector in the 1st BZ @@ -95,7 +98,7 @@ END SUBROUTINE key2lower SUBROUTINE read_filename() ! USE fourier_val, ONLY : file_one, file_two, filehead, nsite, nwfc, & - & filetail, calctype, numave, interval + & filetail, calctype, numave, interval, energy IMPLICIT NONE ! INTEGER :: fi = 10, lanczos_max, irun, istep, iwfc, idx_start @@ -249,7 +252,7 @@ SUBROUTINE read_filename() ! FullDiag ! OPEN(fi, file = "output/CHECK_Memory.dat") - READ(fi, '(" MAX DIMENSION idim_max=1", i16)') nwfc + READ(fi, '(25x, i16)') nwfc CLOSE(fi) ALLOCATE(filetail(nwfc)) ! @@ -259,6 +262,15 @@ SUBROUTINE read_filename() ! WRITE(*,*) " Method : Full Diagonalization" ! + ALLOCATE(energy(nwfc)) + ! + OPEN(fi,file = "output/Eigenvalue.dat") + DO iwfc = 1, nwfc + READ(fi,*) keyname, energy(iwfc) + WRITE(*,*) " Energy ", iwfc, " : ", energy(iwfc) + END DO + CLOSE(fi) + ! ELSE IF (calctype == 3) THEN ! ! LOBCG @@ -270,6 +282,16 @@ SUBROUTINE read_filename() ! WRITE(*,*) " Method : LOBCG" ! + OPEN(fi,file = TRIM(filehead)//"_energy.dat") + DO iwfc = 1, nwfc + READ(fi,*) keyname + READ(fi,*) keyname, energy(iwfc) + READ(fi,*) keyname + READ(fi,*) keyname + WRITE(*,*) " Energy ", iwfc, " : ", energy(iwfc) + END DO + CLOSE(fi) + ! ELSE ! ! mVMC @@ -293,8 +315,8 @@ END SUBROUTINE read_filename ! SUBROUTINE read_geometry() ! - USE fourier_val, ONLY : recipr, box, nsite, phase, irv, rindx, orb, & - & nr, nreq, norb, nnode, knode, nk_line, kname, nkg + USE fourier_val, ONLY : recipr, box, nsite, phase, irv, rindx, orb, ntemp, & + & nr, nreq, norb, nnode, knode, nk_line, kname, temp, nkg IMPLICIT NONE ! INTEGER :: fi = 10, isite, ii, ir, ipiv(3), irv0(3), i1, i2, i3, inode @@ -373,6 +395,16 @@ SUBROUTINE read_geometry() READ(fi,*) nkg(1:3) WRITE(*,'(a,3i3)') "k-grid for momentum distribution :", nkg(1:3) ! + ! (Optional) Temperature + ! + READ(fi,*,iostat=ii) ntemp + IF(ii == 0) THEN + ALLOCATE(temp(ntemp)) + READ(fi,*) temp(1:ntemp) + ELSE + ntemp = 0 + END IF + ! CLOSE(fi) ! ! Compute Reciprocal Lattice Vector @@ -819,12 +851,15 @@ END SUBROUTINE fourier_cor SUBROUTINE output_cor() ! USE fourier_val, ONLY : cor_k, nk, nnode, knode, nk_line, kname, norb, interval, & - & nwfc, recipr, filehead, filetail, calctype, nkg, numave + & nwfc, recipr, filehead, filetail, calctype, nkg, numave, & + & nwfc, temp, ntemp, energy IMPLICIT NONE ! - INTEGER :: fo = 20, ik, iwfc, inode, iorb, jorb, ii, ikk, iwfc1, iwfc2, istep + INTEGER :: fo = 20, ik, iwfc, inode, iorb, jorb, ii, ikk, iwfc1, iwfc2, istep, & + & itemp, ndeg REAL(8) :: dk(3), dk_cart(3), xk(nk), & - & xk_label(nnode), klength + & xk_label(nnode), klength, Zpart + COMPLEX(8),ALLOCATABLE :: cor_t(:,:,:,:) CHARACTER(256) :: filename COMPLEX(8),ALLOCATABLE :: cor_ave(:,:,:,:), cor_err(:,:,:,:) ! @@ -959,6 +994,60 @@ SUBROUTINE output_cor() ! END DO ! + ! Output temperature dependent correlation function + ! + ALLOCATE(cor_t(ikk,6,norb,norb)) + ! + DO itemp = 1, ntemp + ! + IF(temp(itemp) < 1.0d-8) THEN + ! + ! Zero temperature + ! + ndeg = COUNT(energy(1:nwfc)-energy(1) < 1.0d-5) + cor_t(1:ikk,:,:,:) = SUM(cor_k(1:ikk,:,:,:,1:ndeg), 4) / dble(ndeg) + ! + ELSE + ! + Zpart = 1.0d0 + cor_t(1:ikk,:,:,:) = cor_k(1:ikk,:,:,:,nwfc) + DO iwfc = nwfc, 2, -1 + Zpart = 1.0d0 + exp(-(energy(iwfc)-energy(iwfc-1))/temp(itemp))*Zpart + cor_t(1:ikk,:,:,:) = cor_k(1:ikk,:,:,:,iwfc-1) & + & + exp(-(energy(iwfc)-energy(iwfc-1))/temp(itemp))*cor_t(1:ikk,:,:,:) + END DO + cor_t(1:ikk,:,:,:) = cor_t(1:ikk,:,:,:) / Zpart + ! + END IF + ! + WRITE(filename,'(a,a,i0,a)') TRIM(filehead), "_corr_t", itemp, ".dat" + OPEN(fo, file = TRIM(filename)) + ! + WRITE(fo,'(a,e15.5)') "#Temperature: ", temp(itemp) + WRITE(fo,*) "# k-length[1]" + ii = 1 + DO iorb = 1, norb + DO jorb = 1, norb + WRITE(fo,'(a,i3,a,i3)') "# Orbital", iorb, " to Orbital", jorb + WRITE(fo,'(a,i4,a,i4,a,i4,a,i4,a)') & + & "# UpUp[", ii+1, ",", ii+2, "] (Re. Im.) DownDown[", ii+3, ",", ii+4, "]" + WRITE(fo,'(a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a,i4,a)') & + & "# Density[", ii+5, ",", ii+6, "] SzSz[", ii+7, ",", ii+8, & + & "] S+S-[", ii+9, ",", ii+10, "] S.S[", ii+11, ",", ii+12, "]" + ii = ii+12 + END DO + END DO + ! + DO ik = 1, ikk + WRITE(fo,'(1000e15.5)') xk(ik), cor_t(ik, 1:6, 1:norb, 1:norb) + END DO + ! + CLOSE(fo) + ! + END DO !itemp = 1, ntemp + ! + DEALLOCATE(cor_t) + ! END IF ! IF(calctype == 4) ! OPEN(fo, file = "kpath.gp") From a43467086b57267fdeb19b87beaa24de2da05ff6 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Thu, 8 Jun 2023 12:49:38 +0900 Subject: [PATCH 45/50] Manual for dynamical correlation functions are updated --- doc/en/source/algorithm/DynamicalGreen_en.rst | 49 +++++----- doc/en/source/algorithm/Partition_en.rst | 55 ++++++++++++ doc/en/source/algorithm/TPQ_en.rst | 4 +- doc/en/source/algorithm/al-index.rst | 1 + .../expertmode_en/ModPara_file_en.rst | 2 +- .../expertmode_en/PairExcitation_file_en.rst | 89 ++++++++++++++----- .../SingleExcitation_file_en.rst | 59 ++++++++---- .../outputfiles_en/DynamicalGreen_en.rst | 7 +- ...ters_for_the_dynamical_Greens_function.rst | 15 +++- doc/en/source/fourier/format.rst | 7 ++ doc/en/source/fourier/tutorial.rst | 83 ++++++++++++++++- doc/en/source/fourier/util.rst | 32 +++++-- doc/en/source/tutorial/standardmode_en.rst | 2 +- doc/ja/source/algorithm/Partition_ja.rst | 78 ++++++++-------- .../SingleExcitation_file_ja.rst | 4 +- .../outputfiles_ja/DynamicalGreen_ja.rst | 4 +- doc/ja/source/fourier/format.rst | 7 ++ doc/ja/source/fourier/tutorial.rst | 3 +- doc/ja/source/fourier/util.rst | 13 ++- tool/dynamicalr2k.F90 | 5 +- 20 files changed, 396 insertions(+), 123 deletions(-) create mode 100644 doc/en/source/algorithm/Partition_en.rst diff --git a/doc/en/source/algorithm/DynamicalGreen_en.rst b/doc/en/source/algorithm/DynamicalGreen_en.rst index 407fbe402..095671aec 100644 --- a/doc/en/source/algorithm/DynamicalGreen_en.rst +++ b/doc/en/source/algorithm/DynamicalGreen_en.rst @@ -1,33 +1,42 @@ .. highlight:: none -Dynamical Green’s function +Dynamical Green's function -------------------------- -Using :math:`{\mathcal H}\Phi`, we can calculate a dynamical Green’s -function +Using :math:`{\mathcal H}\Phi`, we can calculate a dynamical Green's function -.. math:: I(z) = \langle \Phi ' | \frac{1}{ {\mathcal H}- z\hat{I} } | \Phi '\rangle, +.. math:: G_n^{O_l,O_r}(z) = \langle \Phi_n | \hat{O}_l (z + E_n - \hat{\cal H})^{-1} \hat{O}_r| \Phi_n \rangle -where :math:`|\Phi ' \rangle = \hat{O} | \Phi _0 \rangle` is an -excited state and :math:`\hat{O}` is an excitation operator defined as a -single excitation operator +where :math:`\hat{O}_{l,r}` is a single exciation operator -.. math:: \sum_{i, \sigma_1} A_{i \sigma_1} c_{i \sigma_1} (c_{i\sigma_1}^{\dagger}) +.. math:: \sum_{i, \sigma_1} A_{i \sigma_1} c_{i \sigma_1} \quad \textrm{or} \quad \sum_{i, \sigma_1} A_{i \sigma_1} c_{i\sigma_1}^{\dagger} -or a pair excitation operator +or a pair-exciation operator -.. math:: \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i \sigma_1}c_{j \sigma_2}^{\dagger} (c_{i\sigma_1}^{\dagger}c_{j\sigma_2}). +.. math:: \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i \sigma_1}c_{j \sigma_2}^{\dagger} \quad \textrm{or} \quad + \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} c_{i\sigma_1}^{\dagger}c_{j\sigma_2}. -For example, the dynamical spin susceptibilities can be calculated by -defining :math:`\hat{O}` as +For example, to compute the dynamical spin susceptibility, we use pair excitation operators -.. math:: \hat{O} = \hat{S}({\bf k}) = \sum_{j}\hat{S}_j^z e^{i {\bf k} \cdot \bf {r}_j} = \sum_{j}\frac{1}{2} (c_{j\uparrow}^{\dagger}c_{j\uparrow}-c_{j\downarrow}^{\dagger}c_{j\downarrow})e^{i {\bf k} \cdot \bf {r}_j}. +.. math:: \hat{O}_r = \hat{S}_{\textbf{R}=\textbf{0}}^z = \frac{1}{2} (c_{\textbf{0}\uparrow}^{\dagger}c_{\textbf{0}\uparrow}-c_{\textbf{0}\downarrow}^{\dagger}c_{\textbf{0}\downarrow}) + \\ + \hat{O}_l = \hat{S}_{\textbf{R}}^z = \frac{1}{2} (c_{\textbf{R}\uparrow}^{\dagger}c_{\textbf{R}\uparrow}-c_{\textbf{R}\downarrow}^{\dagger}c_{\textbf{R}\downarrow}) -There are two modes implemented in :math:`{\cal H}\Phi`. One is the -continued fraction expansion method by using Lanczos method - [#]_ and the other is the shifted Krylov -method [#]_ . See the reference -for the details of each algorithm. +to generate :math:`G_n^{O_l,O_r}(z)\equiv G_n^{\textbf{R}}(z)`, +then perform the Fourier transformation -.. [#] \E. Dagotto, Rev. Mod. Phys. **66**, 763-840 (1994). -.. [#] \S.Yamamoto, T. Sogabe, T. Hoshi, S.-L. Zhang, T. Fujiwara, Journal of the Physical Society of Japan **77**, 114713 (2008). \ No newline at end of file +.. math:: G_n^{\textbf{k}}(z) \equiv \sum_{\textbf{R}} \exp(i\textbf{k}\cdot\textbf{R}) G_n^{\textbf{R}}(z) + +as a postprocess. + +Three modes are implemented in :math:`{\cal H}\Phi`: +The continued fraction expansion method by using Lanczos method [1]_, +the shifted Krylov method [2]_, and +the Lehmann representation with the full diagonallization + +.. math:: G_n^{O_l,O_r}(z) = \sum_{m} \frac{\langle \Phi_n | \hat{O}_l | \Phi_m \rangle \langle \Phi_m |\hat{O}_r| \Phi_n \rangle}{z + E_n - E_m}. + +See the reference for the details of each algorithm. + +.. [1] \E. Dagotto, Rev. Mod. Phys. **66**, 763-840 (1994). +.. [2] \S.Yamamoto, T. Sogabe, T. Hoshi, S.-L. Zhang, T. Fujiwara, Journal of the Physical Society of Japan **77**, 114713 (2008). diff --git a/doc/en/source/algorithm/Partition_en.rst b/doc/en/source/algorithm/Partition_en.rst new file mode 100644 index 000000000..9d0128e30 --- /dev/null +++ b/doc/en/source/algorithm/Partition_en.rst @@ -0,0 +1,55 @@ +.. highlight:: none + +.. _Sec:sec_partion_function: + +Partition function and quantities at finite-temperature +------------------------------------------------------- + +To avoid overflow/underflow, we compute as follows: + +Partition function + +.. math:: + + Z(T) &= \sum_{i=1}^N \exp\left(-\frac{E_i}{T}\right) + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + 1 + \exp\left(-\frac{E_2-E_1}{T}\right)+ \exp\left(-\frac{E_3-E_1}{T}\right) + \cdots + + \exp\left(-\frac{E_N-E_1}{T}\right) + \right] + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + 1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ + 1 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ + 1 + \dots + \left[ + 1 + \exp\left(-\frac{E_N-E_{N-1}}{T}\right) + \right] + \right] + \right] + \right] + +Quantity at finite tempearture + +.. math:: + + O(T) &= \frac{1}{Z(T)}\sum_i O_i \exp\left(-\frac{E_i}{T}\right) + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + O_1 + O_2 \exp\left(-\frac{E_2-E_1}{T}\right) + O_3\exp\left(-\frac{E_3-E_1}{T}\right) + \cdots + + O_N\exp\left(-\frac{E_N-E_1}{T}\right) + \right] + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + O_1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ + O_2 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ + O_3 + \dots + \left[ + O_{N-1} + O_N\exp\left(-\frac{E_N-E_{N-1}}{T}\right) + \right] + \right] + \right] + \right] + diff --git a/doc/en/source/algorithm/TPQ_en.rst b/doc/en/source/algorithm/TPQ_en.rst index 8b9c62249..cf97d1a39 100644 --- a/doc/en/source/algorithm/TPQ_en.rst +++ b/doc/en/source/algorithm/TPQ_en.rst @@ -26,7 +26,7 @@ Details of implementation ------------------------- **Construction of the micro canonical TPQ (mTPQ) state** -^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Here, we explain how to construct the micro canonical TPQ (mTPQ) state [1]_, which offers the simplest method for calculating finite-temperature properties. @@ -59,7 +59,7 @@ we perform some independent calculations by changing :math:`|\Phi_{\rm rand}\ran Since the temperature **Construction of the canonical TPQ (cTPQ) state** -^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Here, we explain how to construct the canonical TPQ (cTPQ) state [2]_, which is another way to construct the TPQ state. In the cTPQ method, :math:`\exp[-\beta\hat{\mathcal H}/2]` is diff --git a/doc/en/source/algorithm/al-index.rst b/doc/en/source/algorithm/al-index.rst index 429cff569..02b3003bd 100644 --- a/doc/en/source/algorithm/al-index.rst +++ b/doc/en/source/algorithm/al-index.rst @@ -11,4 +11,5 @@ Algorithm DynamicalGreen_en Realtime_en Bogoliubov_en + Partition_en diff --git a/doc/en/source/filespecification/expertmode_en/ModPara_file_en.rst b/doc/en/source/filespecification/expertmode_en/ModPara_file_en.rst index 2aab02ce8..0f4485c93 100644 --- a/doc/en/source/filespecification/expertmode_en/ModPara_file_en.rst +++ b/doc/en/source/filespecification/expertmode_en/ModPara_file_en.rst @@ -232,7 +232,7 @@ CG method   TPQ (mTPQ/cTPQ) method -~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~ * ``Lanczos_max`` diff --git a/doc/en/source/filespecification/expertmode_en/PairExcitation_file_en.rst b/doc/en/source/filespecification/expertmode_en/PairExcitation_file_en.rst index 291c2f1bb..9a7bbe5fd 100644 --- a/doc/en/source/filespecification/expertmode_en/PairExcitation_file_en.rst +++ b/doc/en/source/filespecification/expertmode_en/PairExcitation_file_en.rst @@ -5,27 +5,51 @@ PairExcitation file ------------------- -The operators to generate the pair excited state -:math:`c_{i\sigma_1}c_{j\sigma_2}^{\dagger}(c_{i\sigma_1}^{\dagger}c_{j\sigma_2})` -are defined. The type of pair excitation operators -(:math:`c_{i\sigma_1}c_{j\sigma_2}^{\dagger}` or -:math:`c_{i\sigma_1}^{\dagger}c_{j\sigma_2}`) must be same in the input -file. In the :math:`S_z` conserved system, :math:`\sigma_1` must be -equal to :math:`\sigma_2`. An example of the file format is as follows. +To compute the dynamical correlation function + +.. math:: G_n^{O_l,O_r}(z) = \langle \Phi_n | \hat{O}_l (z + E_n - \hat{\cal H})^{-1} \hat{O}_r| \Phi_n \rangle, + +we set a pair-excitation operator as + +.. math:: + + \hat{O}_{l,r} = \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} + c_{i \sigma_1}c_{j \sigma_2}^{\dagger} \quad \textrm{or} \quad + \sum_{i, j, \sigma_1, \sigma_2} A_{i \sigma_1 j \sigma_2} + c_{i\sigma_1}^{\dagger}c_{j\sigma_2} + +We can compute efficiently by using single :math:`\hat{O}_r` and multiple :math:`\hat{O}_l`. + +The type of pair excitation operators (:math:`c_{i\sigma_1}c_{j\sigma_2}^{\dagger}` or +:math:`c_{i\sigma_1}^{\dagger}c_{j\sigma_2}`) must be the same in the input file. + +In the :math:`S_z` conserved system, :math:`\sigma_1` must be equal to :math:`\sigma_2`. + +An example of the file format is as follows. :: - =============================== - NPair 24 - =============================== - ======== Pair Excitation ====== - =============================== - 0 0 0 0 0 1.0 0.0 - 0 1 0 1 0 1.0 0.0 - 1 0 1 0 0 1.0 0.0 - (continue...) - 11 0 11 0 0 1.0 0.0 - 11 1 11 1 0 1.0 0.0 + ============================================= + NPair 9 + ============================================= + =============== Pair Excitation ============= + ============================================= + 2 + 0 0 0 0 1 -0.500000000000000 0.0 + 0 1 0 1 1 0.500000000000000 0.0 + 2 + 0 0 0 0 1 -0.500000000000000 0.0 + 0 1 0 1 1 0.500000000000000 0.0 + 2 + 1 0 1 0 1 -0.500000000000000 0.0 + 1 1 1 1 1 0.500000000000000 0.0 + 2 + 2 0 2 0 1 -0.500000000000000 0.0 + 2 1 2 1 1 0.500000000000000 0.0 + 2 + 3 0 3 0 1 -0.500000000000000 0.0 + 3 1 3 1 1 0.500000000000000 0.0 + : .. _file_format_16: @@ -39,7 +63,16 @@ File format * Lines 3-5: Header * Lines 6-: - [int02]  [int03]  [int04]  [int05]  [int06]  [double01]  [double02]. + + Repeat the following block [int01] times. + The first block corresponds to :math:`\hat{O}_{r}` and other blocks correspond to :math:`\hat{O}_{l}`. + + :: + + [int02] + [int03] [int04] [int05] [int06] [int07] [double01] [double02] + : + (Repeat [int02] times) .. _parameters_16: @@ -57,17 +90,25 @@ Parameters **Type :** Int (a blank parameter is not allowed) - **Description :** An integer giving the total number of pair - excitation operators. + **Description :** An integer giving the total number of pair-excitation operators + :math:`\hat{O}_r` and :math:`\hat{O}_l`. + For the above example, we have 9 operators (one :math:`\hat{O}_{r}` and 8 :math:`\hat{O}_{l}`), -* [int02], [int04] +* [int02] + + **Type :** Int (a blank parameter is not allowed) + + **Description :** An integer giving the total number of pair-excitation operators + included in each :math:`\hat{O}_r` and :math:`\hat{O}_l`. + +* [int03], [int05] **Type :** Int (a blank parameter is not allowed) **Description :** An integer giving a site index (:math:`0<=` [int02], [int04] :math:`<` ``Nsite``). -* [int03], [int05] +* [int04], [int06] **Type :** Int (a blank parameter is not allowed) @@ -121,4 +162,4 @@ Use rules .. raw:: latex - \newpage \ No newline at end of file + \newpage diff --git a/doc/en/source/filespecification/expertmode_en/SingleExcitation_file_en.rst b/doc/en/source/filespecification/expertmode_en/SingleExcitation_file_en.rst index b0c31175f..2031c64ce 100644 --- a/doc/en/source/filespecification/expertmode_en/SingleExcitation_file_en.rst +++ b/doc/en/source/filespecification/expertmode_en/SingleExcitation_file_en.rst @@ -5,23 +5,37 @@ SingleExcitation file --------------------- -The operators to generate the single excited state -:math:`c_{i\sigma_1}(c_{i\sigma_1}^{\dagger})` are defined. An example -of the file format is as follows. +To compute the ynamical correlation function + +.. math:: G_n^{O_l,O_r}(z) = \langle \Phi_n | \hat{O}_l (z + E_n - \hat{\cal H})^{-1} \hat{O}_r| \Phi_n \rangle, + +we set a single-exciation operator + +.. math:: + + \hat{O}_{l,r} = \sum_{i, \sigma_1} A_{i \sigma_1} c_{i \sigma_1} \quad + \textrm{or} \quad \sum_{i, \sigma_1} A_{i \sigma_1} c_{i\sigma_1}^{\dagger}. + +We can compute efficiently by using single :math:`\hat{O}_r` and multiple :math:`\hat{O}_l`. + +An example of the file format is as follows. :: =============================== - NSingle 24 + NSingle 13 =============================== ======== Single Excitation ====== =============================== - 0 0 0 1.0 0.0 - 0 1 0 1.0 0.0 - 1 0 0 1.0 0.0 - (continue...) - 11 0 0 1.0 0.0 - 11 1 0 1.0 0.0 + 1 + 0 0 0 1.0 0.0 + 1 + 0 0 0 1.0 0.0 + 1 + 1 0 0 1.0 0.0 + (continue...) + 1 + 11 0 0 1.0 0.0 .. _file_format_15: @@ -34,7 +48,12 @@ File format * Lines 3-5: Header -* Lines 6-: [int02]  [int03]  [int04]  [double01]  [double02]. +* Lines 6-: + :: + + [int02] + [int03] [int04] [int05] [double01] [double02] + : .. _parameters_15: @@ -52,17 +71,25 @@ Parameters **Type :** Int (a blank parameter is not allowed) - **Description :** An integer giving the total number of total number - of single excitation operators. + **Description :** An integer giving the total number of single excitation operators + :math:`\hat{O}_r` and :math:`\hat{O}_l`. + For the above example, we have 13 operators (one :math:`\hat{O}_{r}` and 12 :math:`\hat{O}_{l}`), * [int02] **Type :** Int (a blank parameter is not allowed) + **Description :** An integer giving the total number of single excitation operators + included in each :math:`\hat{O}_r` and :math:`\hat{O}_l`. + +* [int03] + + **Type :** Int (a blank parameter is not allowed) + **Description :** An integer giving a site index (:math:`0<=` [int02] :math:`<` ``Nsite``). -* [int03] +* [int04] **Type :** Int (a blank parameter is not allowed) @@ -74,7 +101,7 @@ Parameters | :math:`0, 1, \cdots, 2S+1` (corresponding to -:math:`S-0.5, -S+0.5, \cdots S+0.5`). -* [int04] +* [int05] **Type :** Int (a blank parameter is not allowed) @@ -115,4 +142,4 @@ Use rules .. raw:: latex - \newpage \ No newline at end of file + \newpage diff --git a/doc/en/source/filespecification/outputfiles_en/DynamicalGreen_en.rst b/doc/en/source/filespecification/outputfiles_en/DynamicalGreen_en.rst index 9144583b6..b022667c1 100644 --- a/doc/en/source/filespecification/outputfiles_en/DynamicalGreen_en.rst +++ b/doc/en/source/filespecification/outputfiles_en/DynamicalGreen_en.rst @@ -13,7 +13,7 @@ function. An example of the file format is as follows. File name ~~~~~~~~~ -* ##_DynamicalGreen.dat +* ##_DynamicalGreen_*.dat ## indicates [string02] in a ModPara file. @@ -25,6 +25,9 @@ File format * Lines 1-: [double01]  [double02]  [double03]  [double04] +NOmega (Specified in ModPara) lines are printed. +Then, after a blank line, a block corresponds to the next :math:`\hat{O}_l` is printed. + .. _parameters_42: Parameters @@ -49,4 +52,4 @@ Parameters .. raw:: latex - \newpage \ No newline at end of file + \newpage diff --git a/doc/en/source/filespecification/standardmode_en/Parameters_for_the_dynamical_Greens_function.rst b/doc/en/source/filespecification/standardmode_en/Parameters_for_the_dynamical_Greens_function.rst index d0bce1264..17b8b8c53 100644 --- a/doc/en/source/filespecification/standardmode_en/Parameters_for_the_dynamical_Greens_function.rst +++ b/doc/en/source/filespecification/standardmode_en/Parameters_for_the_dynamical_Greens_function.rst @@ -31,10 +31,13 @@ Parameters for the dynamical Green’s function * ``SpectrumType`` **Type :** String (choose from ``"SzSz"``, ``"S+S-"``, ``"Density"``, - ``"up"``, ``"down"``. ``"SzSz"`` as default.) + ``"up"``, ``"down"``, ``"SzSz_R"``, ``"S+S-_R"``, ``"Density_R"``, ``"up_R"``, + ``"down_R"``. ``"SzSz"`` as default.) **Description :** The type of the dynamical Green’s function to be - computed is specified. ``"SzSz"`` for + computed is specified. + The following values are used For the correlation function in the reciplocal space: + ``"SzSz"`` for :math:`\langle {S}^z_{-\bf q} {S}^z_{\bf q}\rangle`, ``"S+S-"`` for :math:`\langle {S}^{+}_{-\bf q} {S}^{-}_{\bf q}\rangle`, ``"Density"`` for :math:`\langle {n}_{-\bf q} {n}_{\bf q}\rangle`, @@ -42,6 +45,14 @@ Parameters for the dynamical Green’s function :math:`\langle {c}^{\dagger}_{{\bf q} \uparrow} {c}_{{\bf q} \uparrow}\rangle`, ``"down"`` for :math:`\langle {c}^{\dagger}_{{\bf q} \downarrow} {c}_{{\bf q} \downarrow}\rangle`. + For the real space, the following values are used: + ``"SzSz_R"`` for :math:`\langle {\hat S}_{z R} {\hat S}_{z 0}\rangle`, + ``"S+S-_R"`` for :math:`\langle {\hat S}^{+}_{R} {\hat S}^{-}_{0}\rangle`, + ``"Density_R"`` for :math:`\langle {\hat n}_{R} {\hat n}_{0}\rangle`, + ``"up_R"`` for :math:`\langle {\hat c}^{\dagger}_{R \uparrow} {\hat c}_{0 \uparrow}\rangle`, and + ``"down_R"`` for :math:`\langle {\hat c}^{\dagger}_{R \downarrow} {\hat c}_{0 \downarrow}\rangle`. + Here :math:`R` spans all site index. + See :ref:`Fourier-Transformation utility ` to compute dynamical correlation function in the reciplocal space with the Fourier transformation. * ``SpectrumQW``, ``SpectrumQL`` diff --git a/doc/en/source/fourier/format.rst b/doc/en/source/fourier/format.rst index 95aedfe3b..73a4066e2 100644 --- a/doc/en/source/fourier/format.rst +++ b/doc/en/source/fourier/format.rst @@ -35,6 +35,11 @@ the information of the cell and geometry is generated automatically. M 0.5 0.5 0 (6) G 0 0 0 (6) 16 16 1 (7) + 4 (8) + 0.0 (9) + 0.1 (9) + 0.3 (9) + 1.0 (9) #. The unit lattice vectors. Arbitrary unit (Generated by Standard mode). #. The phase for the one-body term across boundaries of the simulation cell (degree unit, @@ -48,6 +53,8 @@ the information of the cell and geometry is generated automatically. along high symmetry line. #. Fractional coordinate of *k* nodes. #. The *k* grid to plot the isosurface of the momentum distribution function. +#. (Optional) Number of temperature points for full-diagonalization and LOBPCG +#. (Optional) Temperature, the number of which is specified above One- and Two-body correlation function in the site representation ----------------------------------------------------------------- diff --git a/doc/en/source/fourier/tutorial.rst b/doc/en/source/fourier/tutorial.rst index 9269a5804..74608cc80 100644 --- a/doc/en/source/fourier/tutorial.rst +++ b/doc/en/source/fourier/tutorial.rst @@ -1,7 +1,7 @@ .. _tutorial: -Tutorial -======== +Tutorial for static correlation function +======================================== In this tutorial, we explain through a sample calculation of the 8-site Hubbard model on the square lattice. @@ -125,3 +125,82 @@ Related files - kpath.gp (:ref:`gnuplot`) - output/zvo_corr.dat (:ref:`zvocorr`) + +Tutorial for dynamical correlation function +=========================================== + +In this tutorial, we consider one-dimentional Heisenberg model with 12 sites. + +Run HPhi +-------- + +We compute the ground state and the correlation function. +Input file is as follows: + +:: + + model = Spin + lattice = Chain + method = CG + L = 12 + 2Sz = 0 + J = 1.0 + CalcSpec = Scratch + SpectrumType = SzSz_r + OmegaIm = 0.1 + OmegaMin = -6.0 + OmegaMax = -2.0 + +.. code-block:: bash + + $ HPhi -s input + +Then, we obtain files for dynamical correlation function in ``output/``. + +Releted files + +- stan.in (See manual of mVMC/:math:`{\mathcal H}\Phi`) + +Fourier transformation of correlation function +---------------------------------------------- + +Perform Fourier transformation with the utility ``dynamicalr2k``. + +.. code-block:: bash + + $ echo "4 20 + G 0 0 0 + X 0.5 0 0 + M 0.5 0.5 0 + G 0 0 0 + 1 1 1" >> geometry.dat + $ dynamicalr2k namelist.def geometry.dat + +Then, we obtain files for Fourier-transformed dynamical correlation function in ``output/``. + +Releted files + +- output/zvo_DynamicalGreen.dat +- geometry.dat (:ref:`geometry`) +- output/zvo_dyn.dat + +Display correlation function +---------------------------- + +Plot correlation functions in the :math:`k` space by using gnuplot. + +:: + + load "kpath.gp" + splot "output/zvo_dyn.dat" u 1:2:(-$4) w l + +.. _dynamicalr2gpng: + +.. figure:: ../../../figs/dynamicalr2g.png + + Imaginary part of the correlation function :math:`\langle{\bf S}_{\bf k}\cdot{\bf S}_{\bf k}\rangle(\omega)` (fourth column of an output file) + +Releted files + +- kpath.gp (:ref:`gnuplot`) +- output/zvo_dyn*.dat diff --git a/doc/en/source/fourier/util.rst b/doc/en/source/fourier/util.rst index 93fd440e7..313916fbf 100644 --- a/doc/en/source/fourier/util.rst +++ b/doc/en/source/fourier/util.rst @@ -8,7 +8,7 @@ This utility is used as follows: $ ${PATH}/greenr2k ${NAMELIST} ${GEOMETRY} where ``${PATH}`` is the path to the directory where -the executable ``fourier`` exists, +the executable ``greenr2k`` exists, ${NAMELIST} is the NameList input-file name of :math:`{\mathcal H}\Phi`/mVMC, and ${GEOMETRY} is the path to the :ref:`geometry` file. @@ -24,7 +24,7 @@ HPhi-Lanczos In this case, ``HPhi`` writes correlation functions to the files ``zvo_cisajs.dat`` (one body) and ``zvo_cisajscktalt.dat`` (two body) in ``output/`` directory. -``fourier`` utility reads this files, performs the Fourier transformation, and +``greenr2k`` utility reads this files, performs the Fourier transformation, and generate single file ``zvo_corr.dat`` in ``output/`` directory. HPhi-TPQ @@ -33,7 +33,7 @@ HPhi-TPQ ``HPhi`` writes correlation functions to files ``zvo_cisajs_run*step*.dat`` (one body), ``zvo_cisajscktalt_run*step*.dat`` (two body) at each trial and TPQ step to the ``output/`` directory. -``fourier`` utility reads the one- and the two-body correlation function at each trial/TPQ-step, +``greenr2k`` utility reads the one- and the two-body correlation function at each trial/TPQ-step, and performs Fourier transformation, and write to a file ``zvo_corr_run*step*.dat`` in ``output/`` directory. @@ -43,10 +43,13 @@ HPhi-Full diagonalization and LOBCG ``HPhi`` writes correlation functions to files ``zvo_cisajs_eigen*.dat`` (one body) and ``zvo_cisajscktalt_eigen*.dat`` (two body) for each wavefunction to the ``output/`` directory. -``fourier`` utility reads the one- and the two-body correlation function at each state +``greenr2k`` utility reads the one- and the two-body correlation function at each state and performs Fourier transformation, and write to a file ``zvo_corr_eigen*.dat`` in ``output/``. +If the temperature which is the optional imput is specified, +the temperature-dependent correlation function is written into files ``zvo_corr_t*.dat`` in the ``output/`` directory. + mVMC ~~~~ @@ -54,7 +57,7 @@ mVMC in ``ModPara`` file, and it generates ``zvo_cisajs_???.dat`` (one body) and ``zvo_cisajscktalt_???.dat`` (two body) in ``output/`` directory. -``fourier`` utility reads all of these files, performs Fourier transformation, +``greenr2k`` utility reads all of these files, performs Fourier transformation, computes the average .. math:: @@ -74,3 +77,22 @@ and the standard error of the real- and imaginary-part of each correlation function, and writes them to a file ``zvo_corr_eigen*.dat`` in ``output/`` directory. + +Behavior of ``dynamicalr2k`` utility +==================================== + +This utility is used as follows: + +.. code-block:: bash + + $ ${PATH}/dynamicalr2k ${NAMELIST} ${GEOMETRY} + +where ``${PATH}`` is the path to the directory where +the executable ``dynamicalr2k`` exists, +${NAMELIST} is the NameList input-file name of :math:`{\mathcal H}\Phi`, and +${GEOMETRY} is the path to the :ref:`geometry` file. + +In the default setting, ``HPhi`` generate site-dependent dynamical green's function into files ``zvo_DynamicalGreen_*.dat`` in ``output/`` directory. +``dynamicalr2k`` reads these files, performes Fourier transform, and output files ``zvo_dyn*.dat`` in ``output/`` directory. + +If the temperature which is the optional parameter is specified, the temperature-dependent Green's funtion is outputted into files ``zvo_dyn_t*.dat`` in ``output/`` directory. diff --git a/doc/en/source/tutorial/standardmode_en.rst b/doc/en/source/tutorial/standardmode_en.rst index 0c0e3b64d..71b97f087 100644 --- a/doc/en/source/tutorial/standardmode_en.rst +++ b/doc/en/source/tutorial/standardmode_en.rst @@ -308,4 +308,4 @@ In Eigenvalue.dat, an eigennumber and an eigenvalue are outputted for each line. Other tutorials --------------- -There are many tutorials in ``samples``. For more details, please see ``README.md`` at each directory or `the manual for HPhi-tutorial`_ . +There are many tutorials in ``samples``. For more details, please see ``README.md`` at each directory or `the manual for HPhi-tutorial `_ . diff --git a/doc/ja/source/algorithm/Partition_ja.rst b/doc/ja/source/algorithm/Partition_ja.rst index 0af02b08d..eaec5d1a0 100644 --- a/doc/ja/source/algorithm/Partition_ja.rst +++ b/doc/ja/source/algorithm/Partition_ja.rst @@ -5,49 +5,47 @@ 分配関数と有限温度物理量 ------------------------ +オーバーフロー/アンダーフローを防ぐために次のようにして計算する。 + 分配関数 -.. math:: - - Z(T) &= \sum_{i=1}^N \exp\left(-\frac{E_i}{T}\right) - \nonumber \\ - &= \exp\left(-\frac{E_1}{T}\right) \left[ - 1 + \exp\left(-\frac{E_2-E_1}{T}\right)+ \exp\left(-\frac{E_3-E_1}{T}\right) - \cdots - + \exp\left(-\frac{E_N-E_1}{T}\right) - \right] - \nonumber \\ - &= \exp\left(-\frac{E_1}{T}\right) \left[ - 1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ - 1 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ - 1 + \dots - \left[ - 1 + \exp\left(-\frac{E_N-E_{N-1}}{T}\right) - \right] - \right] - \right] - \right] +.. math:: Z(T) &= \sum_{i=1}^N \exp\left(-\frac{E_i}{T}\right) + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + 1 + \exp\left(-\frac{E_2-E_1}{T}\right)+ \exp\left(-\frac{E_3-E_1}{T}\right) + \cdots + + \exp\left(-\frac{E_N-E_1}{T}\right) + \right] + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + 1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ + 1 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ + 1 + \dots + \left[ + 1 + \exp\left(-\frac{E_N-E_{N-1}}{T}\right) + \right] + \right] + \right] + \right] 有限温度物理量 -.. math:: - - O(T) &= \frac{1}{Z(T)}\sum_i O_i \exp\left(-\frac{E_i}{T}\right) - \nonumber \\ - &= \exp\left(-\frac{E_1}{T}\right) \left[ - O_1 + O_2 \exp\left(-\frac{E_2-E_1}{T}\right) + O_3\exp\left(-\frac{E_3-E_1}{T}\right) - \cdots - + O_N\exp\left(-\frac{E_N-E_1}{T}\right) - \right] - \nonumber \\ - &= \exp\left(-\frac{E_1}{T}\right) \left[ - O_1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ - O_2 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ - O_3 + \dots - \left[ - O_{N-1} + O_N\exp\left(-\frac{E_N-E_{N-1}}{T}\right) - \right] - \right] - \right] - \right] +.. math:: O(T) &= \frac{1}{Z(T)}\sum_i O_i \exp\left(-\frac{E_i}{T}\right) + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + O_1 + O_2 \exp\left(-\frac{E_2-E_1}{T}\right) + O_3\exp\left(-\frac{E_3-E_1}{T}\right) + \cdots + + O_N\exp\left(-\frac{E_N-E_1}{T}\right) + \right] + \nonumber \\ + &= \exp\left(-\frac{E_1}{T}\right) \left[ + O_1 + \exp\left(-\frac{E_2-E_1}{T}\right)\left[ + O_2 + \exp\left(-\frac{E_3-E_2}{T}\right)\left[ + O_3 + \dots + \left[ + O_{N-1} + O_N\exp\left(-\frac{E_N-E_{N-1}}{T}\right) + \right] + \right] + \right] + \right] diff --git a/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst b/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst index be5214a40..8a83571ab 100644 --- a/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst +++ b/doc/ja/source/filespecification/expertmode_ja/SingleExcitation_file_ja.rst @@ -50,6 +50,7 @@ SingleExcitation指定ファイル [int02] [int03] [int04] [int05] [double01] [double02] + : パラメータ ^^^^^^^^^^ @@ -71,7 +72,8 @@ SingleExcitation指定ファイル **形式 :** int型 (空白不可) - **説明 :** 一体励起演算子の総数を指定します。 + **説明 :** 各\ :math:`\hat{O}_r`\, \ :math:`\hat{O}_l`\ にふくまれる + 一体励起演算子の総数をそれぞれ指定します。 - :math:`[`\ int03\ :math:`]` diff --git a/doc/ja/source/filespecification/outputfiles_ja/DynamicalGreen_ja.rst b/doc/ja/source/filespecification/outputfiles_ja/DynamicalGreen_ja.rst index 078bf105a..efa680892 100644 --- a/doc/ja/source/filespecification/outputfiles_ja/DynamicalGreen_ja.rst +++ b/doc/ja/source/filespecification/outputfiles_ja/DynamicalGreen_ja.rst @@ -10,7 +10,7 @@ DynamicalGreen.dat ファイル名 ^^^^^^^^^^ -- ##\_DynamicalGreen.dat +- ##\_DynamicalGreen_*.dat ##はModParaファイル内の[string02]で指定されるヘッダを表します。 @@ -20,6 +20,8 @@ DynamicalGreen.dat - 1行目-: :math:`[`\ double01\ :math:`]` :math:`[`\ double02\ :math:`]` :math:`[`\ double03\ :math:`]` :math:`[`\ double04\ :math:`]` +以下ModParaで指定したNOmega個分出力されたのち、空行を挟んで次の\ :math:`\hat{O}_l`\ に対応するブロックが出力される。 + パラメータ ^^^^^^^^^^ diff --git a/doc/ja/source/fourier/format.rst b/doc/ja/source/fourier/format.rst index 5791fda20..08f651101 100644 --- a/doc/ja/source/fourier/format.rst +++ b/doc/ja/source/fourier/format.rst @@ -36,6 +36,11 @@ mVMC/:math:`{\mathcal H}\Phi` のスタンンダードモードを用いた場 M 0.5 0.5 0 (6) G 0 0 0 (6) 16 16 1 (7) + 4 (8) + 0.0 (9) + 0.1 (9) + 0.3 (9) + 1.0 (9) #. 単位格子ベクトル. 任意の単位 (スタンダードモードで自動生成). #. 1体項がシミュレーションセルの境界を跨いだときに付く位相(単位degree) @@ -48,6 +53,8 @@ mVMC/:math:`{\mathcal H}\Phi` のスタンンダードモードを用いた場 #. *k* パスのノード(対称性の高い点)の数と, ノード間の *k* 点の分割数. #. *k* ノードのラベルとフラクショナル座標 #. 運動量分布関数のFermiSurferファイルを作成する時の *k* グリッド +#. (オプショナル)全対角化・LOBPCG法による有限温度物理量計算での温度点の数 +#. (オプショナル)上で指定した数だけ温度を指定する サイト表示の1体および2体相関関数 -------------------------------- diff --git a/doc/ja/source/fourier/tutorial.rst b/doc/ja/source/fourier/tutorial.rst index 21e83c16d..c4d74b5e1 100644 --- a/doc/ja/source/fourier/tutorial.rst +++ b/doc/ja/source/fourier/tutorial.rst @@ -174,7 +174,8 @@ HPhi の実行 G 0 0 0 X 0.5 0 0 M 0.5 0.5 0 - G 0 0 0" >> geometry.dat + G 0 0 0 + 1 1 1" >> geometry.dat $ dynamicalr2k namelist.def geometry.dat これにより, カレントディレクトリの ``output/`` 以下に diff --git a/doc/ja/source/fourier/util.rst b/doc/ja/source/fourier/util.rst index c23815bf7..9f02dc8f9 100644 --- a/doc/ja/source/fourier/util.rst +++ b/doc/ja/source/fourier/util.rst @@ -45,6 +45,9 @@ HPhi-全対角化およびLOBCG 1体および2体の相関関数を読み込みFourier変換を行った後, ``zvo_corr_eigen*.dat`` という名前のファイルとして ``output/`` ディレクトリに出力する. +オプションで指定可能な温度を指定した場合には、 +``zvo_corr_t*.dat`` という名前のファイルとして ``output/`` ディレクトリに出力する. + mVMC ~~~~ @@ -85,10 +88,12 @@ mVMC $ ${PATH}/dynamicalr2k ${NAMELIST} ${GEOMETRY} ここで, ``${PATH}`` は ``dynamicalr2k`` ユーティリティのバイナリのあるディレクトリのパス, -${NAMELIST}は :math:`{\mathcal H}\Phi`/mVMC の NameList インプットファイル名, +${NAMELIST}は :math:`{\mathcal H}\Phi` の NameList インプットファイル名, ${GEOMETRY}は :ref:`geometry` ファイルへのパスである. この場合に ``HPhi`` が ``output/`` ディレクトリに出力するサイト表示の動的相関関数は, -``zvo_DynamicalGreen.dat`` である. -``greenr2k`` ユーティリティーは, これらを読み込みFourier変換を行った後, -単一のファイル ``zvo_corr.dat`` を ``output/`` ディレクトリに出力する. +``zvo_DynamicalGreen_*.dat`` である. +``dynamicalr2k`` ユーティリティーは, これらを読み込みFourier変換を行った後, +ファイル ``zvo_dyn*.dat`` を ``output/`` ディレクトリに出力する. + +オプションで指定可能な温度を指定した場合には, ファイル ``zvo_dyn_t*.dat`` を ``output/`` ディレクトリに出力する. diff --git a/tool/dynamicalr2k.F90 b/tool/dynamicalr2k.F90 index 1fa15d084..d0776c08a 100644 --- a/tool/dynamicalr2k.F90 +++ b/tool/dynamicalr2k.F90 @@ -6,6 +6,7 @@ MODULE dynamical_val & ntemp, & ! Number of temperature & nwfc, & & nomega, & + & nkg(3), & ! k-grid for momentum ditribution & nk_line, & ! Numberof along each k line & nnode, & ! Number of node of k-path & nr, & ! Number of R-vector @@ -211,7 +212,7 @@ END SUBROUTINE read_filename SUBROUTINE read_geometry() ! USE dynamical_val, ONLY : recipr, box, nsite, phase, irv, rindx, orb, ntemp, & - & nr, nreq, norb, nnode, knode, nk_line, kname, temp + & nr, nreq, norb, nnode, knode, nk_line, kname, temp, nkg IMPLICIT NONE ! INTEGER :: fi = 10, isite, ii, ir, ipiv(3), irv0(3), i1, i2, i3, inode @@ -287,6 +288,8 @@ SUBROUTINE read_geometry() READ(fi,*) kname(inode), knode(1:3,inode) WRITE(*,'(a,a,3f10.5)') " ", TRIM(kname(inode)), knode(1:3,inode) END DO + READ(fi,*) nkg(1:3) + WRITE(*,'(a,3i3)') "k-grid for momentum distribution :", nkg(1:3) ! ! (Optional) Temperature ! From a3610f93c95b5fd16e7edaa92607784be0f21c8d Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Thu, 8 Jun 2023 13:45:32 +0900 Subject: [PATCH 46/50] This preamble must be included to avoid error in make tutorial-en-pdf --- doc/tutorial/en/source/conf.py | 2 +- src/StdFace | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tutorial/en/source/conf.py b/doc/tutorial/en/source/conf.py index 6105d0b0e..dffcead4b 100644 --- a/doc/tutorial/en/source/conf.py +++ b/doc/tutorial/en/source/conf.py @@ -141,7 +141,7 @@ # Additional stuff for the LaTeX preamble. # - # 'preamble': '', + 'preamble': '\\usepackage{braket}', # Latex figure (float) alignment # diff --git a/src/StdFace b/src/StdFace index a66e232f3..6bd71f26e 160000 --- a/src/StdFace +++ b/src/StdFace @@ -1 +1 @@ -Subproject commit a66e232f3a40fefdd542f9997d5698a8440f674c +Subproject commit 6bd71f26eb7b067c5ac8f8a3a6b2fe3a7e08fc4a From 464912d28090a3beef6fdce40c9d8b986bf942e6 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Thu, 19 Oct 2023 01:52:01 +0900 Subject: [PATCH 47/50] Dry run should be executed in serial --- test/lobcg_spingc_Sy.sh | 2 +- tool/cTPQ.F90 | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/test/lobcg_spingc_Sy.sh b/test/lobcg_spingc_Sy.sh index f754ecb39..f1d320b8b 100755 --- a/test/lobcg_spingc_Sy.sh +++ b/test/lobcg_spingc_Sy.sh @@ -19,7 +19,7 @@ exct = 1 LanczosEps = 16 EOF -${MPIRUN} ../../src/HPhi -sdry stan.in +../../src/HPhi -sdry stan.in cat > trans.def < __HPhi_temp__" From 8de798492a4d97c36806619ba20c1a7eefc760d6 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Mon, 30 Oct 2023 23:29:02 +0900 Subject: [PATCH 48/50] In Fugaku and FX1000, "work" variable for ZHEEVD mst be allocated posix_memalign (NOT malloc/calloc). Otherwise HPhi crashes when we use SVE. --- CMakeLists.txt | 2 -- src/CalcByLOBPCG.c | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fbe722c3..579d48bb6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,6 @@ enable_testing() project(HPhi NONE) option(USE_SCALAPACK "Use Scalapack" OFF) -set(CMAKE_C_FLAGS "-Wall") - if(CONFIG) message(STATUS "Loading configration: " ${PROJECT_SOURCE_DIR}/config/${CONFIG}.cmake) include(${PROJECT_SOURCE_DIR}/config/${CONFIG}.cmake) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index d09a53177..d5084da85 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -66,11 +66,16 @@ static int diag_ovrp( lwork = nsub*nsub + 2 * nsub; lrwork = 3 * nsub*nsub + (4 + (int)log2(nsub) + 1) * nsub + 1; - iwork = (int*)malloc(liwork * sizeof(int)); - rwork = (double*)malloc(lrwork * sizeof(double)); - work = (double complex*)malloc(lwork * sizeof(double complex)); - mat = (double complex*)malloc(nsub*nsub * sizeof(double complex)); - for (isub = 0; isub < nsub*nsub; isub++)mat[isub] = 0.0; + iwork = i_1d_allocate(liwork); + rwork = d_1d_allocate(lrwork); + mat = cd_1d_allocate(nsub*nsub); +#ifdef FUJITSU + void *vptr; + posix_memalign(&vptr, 256, lwork * sizeof(double complex)); + work = (double complex*)vptr; +#else + work = cd_1d_allocate(lwork); +#endif /**@brief (1) Compute @f${\hat O}^{-1/2}@f$ with diagonalizing overrap matrix */ @@ -122,10 +127,10 @@ static int diag_ovrp( // printf("%d %d %15.5f %15.5f %15.5f\n", info, nsub2, eig[0], eig[1], eig[2]); for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub]; - free(mat); - free(work); - free(rwork); - free(iwork); + free_cd_1d_allocate(mat); + free_cd_1d_allocate(work); + free_d_1d_allocate(rwork); + free_i_1d_allocate(iwork); return(nsub2); }/*void diag_ovrp*/ From 84a1aea4b5bb839f440b5d9b4e5bd8480d63293f Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Thu, 1 Feb 2024 17:22:21 +0900 Subject: [PATCH 49/50] (Bug Fix) Tutorial for the spectrum calculation did not work after the modification of the file-format change in spectrum function. --- samples/tutorial_4.2/All.sh | 4 ++-- samples/tutorial_4.2/OpticalSpectrum.py | 18 ++++++++++-------- src/CalcSpectrum.c | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/samples/tutorial_4.2/All.sh b/samples/tutorial_4.2/All.sh index b09b39d48..b022d5b31 100644 --- a/samples/tutorial_4.2/All.sh +++ b/samples/tutorial_4.2/All.sh @@ -17,7 +17,7 @@ cp *.def ./DG_M #[e] for negative omega #[s] sorting -sort -r -n DG_P/zvo_DynamicalGreen.dat > tmp_P -cat DG_M/zvo_DynamicalGreen.dat > tmp_M +sort -r -n DG_P/zvo_DynamicalGreen_0.dat > tmp_P +cat DG_M/zvo_DynamicalGreen_0.dat > tmp_M paste tmp_P tmp_M > optical.dat #[e] sorting diff --git a/samples/tutorial_4.2/OpticalSpectrum.py b/samples/tutorial_4.2/OpticalSpectrum.py index 575983a35..b873e7fec 100755 --- a/samples/tutorial_4.2/OpticalSpectrum.py +++ b/samples/tutorial_4.2/OpticalSpectrum.py @@ -135,14 +135,16 @@ def get_energies(self): list_trans = [0, 0, 0, 0] # x,y,z,orb with open("Current.def", 'w') as f: print("======optical conductivity ", file=f) - print("NCurrent %4d " % (4*All_N), file=f) + print("NCurrent 2 ", file=f) print("======optical conductivity ", file=f) print("======optical conductivity ", file=f) print("======optical conductivity ", file=f) - for spin_i in range(0,2): - for all_i in range(0,All_N): - list_trans[0] = 1 # only +1 for x direction - list_site = lattice.func_site(all_i, list_org) - all_j = lattice.func_strans(list_trans, list_site, list_org) - print("%4d %4d %4d %4d 1 0 1 " % (all_i, spin_i, all_j, spin_i), file=f) - print("%4d %4d %4d %4d 1 0 -1 " % (all_j, spin_i, all_i, spin_i), file=f) + for leftright in range(2): + print("%4d" % (4*All_N), file=f) + for spin_i in range(0,2): + for all_i in range(0,All_N): + list_trans[0] = 1 # only +1 for x direction + list_site = lattice.func_site(all_i, list_org) + all_j = lattice.func_strans(list_trans, list_site, list_org) + print("%4d %4d %4d %4d 1 0 1 " % (all_i, spin_i, all_j, spin_i), file=f) + print("%4d %4d %4d %4d 1 0 -1 " % (all_j, spin_i, all_i, spin_i), file=f) diff --git a/src/CalcSpectrum.c b/src/CalcSpectrum.c index c831fde18..c5e1011fb 100644 --- a/src/CalcSpectrum.c +++ b/src/CalcSpectrum.c @@ -579,7 +579,7 @@ int CalcSpectrum( fprintf(stdoutMPI, " Start: An Eigenvector is inputted in CalcSpectrum.\n"); TimeKeeper(&(X->Bind), cFileNameTimeKeep, c_InputEigenVectorStart, "a"); for (istate = 0; istate < nstate; istate++) { - sprintf(sdt, cFileNameInputEigen, nstate, istate, myrank); + sprintf(sdt, cFileNameInputEigen, X->Bind.Def.CDataFileHead, istate, myrank); childfopenALL(sdt, "rb", &fp); if (fp == NULL) { fprintf(stderr, "Error: Inputvector file is not found.\n"); From d9e207254e37c3ba88d42e1425761d3ea3c0aab9 Mon Sep 17 00:00:00 2001 From: Mitsuaki Kawamura Date: Fri, 19 Apr 2024 14:04:18 +0900 Subject: [PATCH 50/50] Fix unstable behaviorin LOBPCG: The resulting vectors of subspace diagonalization should be the same across processes. This caused error in Fugaku with SVE. Also fix typo of overlap --- src/CalcByLOBPCG.c | 39 +++++++++++++++++++---------------- src/include/wrapperMPI.h | 3 +++ src/wrapperMPI.c | 44 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/src/CalcByLOBPCG.c b/src/CalcByLOBPCG.c index d5084da85..ff47d74a4 100644 --- a/src/CalcByLOBPCG.c +++ b/src/CalcByLOBPCG.c @@ -48,10 +48,10 @@ void zgemm_(char *transa, char *transb, int *m, int *n, int *k, double complex * with the Lowdin's orthogonalization @return the truncated dimension, nsub2 */ -static int diag_ovrp( +static int diag_ovrlp( int nsub,//!<[in] Original dimension of subspace double complex *hsub,//!<[inout] (nsub*nsub) subspace hamiltonian -> eigenvector - double complex *ovlp,//!<[inout] (nsub*nsub) Overrap matrix -> @f${\hat O}^{1/2}@f$ + double complex *ovrlp,//!<[inout] (nsub*nsub) overlap matrix -> @f${\hat O}^{1/2}@f$ double *eig//!<[out] (nsub) Eigenvalue ) { @@ -77,9 +77,9 @@ static int diag_ovrp( work = cd_1d_allocate(lwork); #endif /**@brief - (1) Compute @f${\hat O}^{-1/2}@f$ with diagonalizing overrap matrix + (1) Compute @f${\hat O}^{-1/2}@f$ with diagonalizing overlap matrix */ - zheevd_(&jobz, &uplo, &nsub, ovlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); + zheevd_(&jobz, &uplo, &nsub, ovrlp, &nsub, eig, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); /**@brief @f[ {\hat O}^{-1/2} = \left(\frac{|O_1\rangle}{\sqrt{o_1}}, \frac{|O_2\rangle}{\sqrt{o_2}}, @@ -94,21 +94,21 @@ static int diag_ovrp( for (isub = 0; isub < nsub; isub++) { if (eig[isub] > 1.0e-10) { /*to be changed default 1.0e-14*/ for (jsub = 0; jsub < nsub; jsub++) - ovlp[jsub + nsub*nsub2] = ovlp[jsub + nsub*isub] / sqrt(eig[isub]); + ovrlp[jsub + nsub*nsub2] = ovrlp[jsub + nsub*isub] / sqrt(eig[isub]); nsub2 += 1; } } for (isub = nsub2; isub < nsub; isub++) for (jsub = 0; jsub < nsub; jsub++) - ovlp[jsub + nsub*isub] = 0.0; + ovrlp[jsub + nsub*isub] = 0.0; /** (2) Transform @f${\hat H}'\equiv {\hat O}^{-1/2 \dagger}{\hat H}{\hat O}^{-1/2}@f$. @f${\hat H}'@f$ is nsub2*nsub2 matrix. */ transa = 'N'; - zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovlp, &nsub, &zero, mat, &nsub); + zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, hsub, &nsub, ovrlp, &nsub, &zero, mat, &nsub); transa = 'C'; - zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, mat, &nsub, &zero, hsub, &nsub); + zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovrlp, &nsub, mat, &nsub, &zero, hsub, &nsub); /** (3) Diagonalize @f${\hat H}'@f$. It is the standard eigenvalue problem. @f[ @@ -123,7 +123,7 @@ static int diag_ovrp( @f] */ transa = 'N'; - zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovlp, &nsub, hsub, &nsub, &zero, mat, &nsub); + zgemm_(&transa, &transb, &nsub, &nsub, &nsub, &one, ovrlp, &nsub, hsub, &nsub, &zero, mat, &nsub); // printf("%d %d %15.5f %15.5f %15.5f\n", info, nsub2, eig[0], eig[1], eig[2]); for (isub = 0; isub < nsub*nsub; isub++)hsub[isub] = mat[isub]; @@ -132,8 +132,11 @@ static int diag_ovrp( free_d_1d_allocate(rwork); free_i_1d_allocate(iwork); - return(nsub2); -}/*void diag_ovrp*/ + BcastMPI_cv(0, nsub * nsub, hsub); + BcastMPI_cv(0, nsub * nsub, ovrlp); + BcastMPI_dv(0, nsub, eig); + return(BcastMPI_i(0, nsub2)); +}/*void diag_ovrlp*/ /**@brief Compute adaptively shifted preconditionar written in S. Yamada, et al., Transactions of JSCES, Paper No. 20060027 (2006). @@ -359,7 +362,7 @@ int LOBPCG_Main( int ii, jj, ie, nsub, stp, nsub_cut, nstate; double complex ***wxp/*[0] w, [1] x, [2] p of Ref.1*/, ***hwxp/*[0] h*w, [1] h*x, [2] h*p of Ref.1*/, - ****hsub, ****ovlp; /*Subspace Hamiltonian and Overlap*/ + ****hsub, ****ovrlp; /*Subspace Hamiltonian and Overlap*/ double *eig, *dnorm, eps_LOBPCG, eigabs_max, preshift, precon, dnormmax, *eigsub, eig_pos_shift; char tN = 'N', tC = 'C'; double complex one = 1.0, zero = 0.0; @@ -372,7 +375,7 @@ int LOBPCG_Main( dnorm = d_1d_allocate(X->Def.k_exct); eigsub = d_1d_allocate(nsub); hsub = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); - ovlp = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); + ovrlp = cd_4d_allocate(3, X->Def.k_exct, 3, X->Def.k_exct); i_max = X->Check.idim_max; i4_max = (int)i_max; @@ -500,7 +503,7 @@ private(idim,precon,ie) TimeKeeperWithStep(X, cFileNameTimeKeep, cLanczos_EigenValueStep, "a", stp); /**@brief -
  • Compute subspace Hamiltonian and overrap matrix: +
  • Compute subspace Hamiltonian and overlap matrix: @f${\hat H}_{\rm sub}=\{{\bf w},{\bf x},{\bf p}\}^\dagger \{{\bf W},{\bf X},{\bf P}\}@f$, @f${\hat O}=\{{\bf w},{\bf x},{\bf p}\}^\dagger \{{\bf w},{\bf x},{\bf p}\}@f$,
  • @@ -508,12 +511,12 @@ private(idim,precon,ie) for (ii = 0; ii < 3; ii++) { for (jj = 0; jj < 3; jj++) { zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one, - &wxp[ii][1][0], &nstate, &wxp[jj][1][0], &nstate, &zero, &ovlp[jj][0][ii][0], &nsub); + &wxp[ii][1][0], &nstate, &wxp[jj][1][0], &nstate, &zero, &ovrlp[jj][0][ii][0], &nsub); zgemm_(&tN, &tC, &nstate, &nstate, &i4_max, &one, &wxp[ii][1][0], &nstate, &hwxp[jj][1][0], &nstate, &zero, &hsub[jj][0][ii][0], &nsub); } } - SumMPI_cv(nsub*nsub, &ovlp[0][0][0][0]); + SumMPI_cv(nsub*nsub, &ovrlp[0][0][0][0]); SumMPI_cv(nsub*nsub, &hsub[0][0][0][0]); for (ie = 0; ie < X->Def.k_exct; ie++) @@ -523,7 +526,7 @@ private(idim,precon,ie) generalized eigenvalue problem: @f${\hat H}_{\rm sub}{\bf v}={\hat O}\mu_{\rm sub}{\bf v}@f$, @f${\bf v}=(\alpha, \beta, \gamma)@f$ */ - nsub_cut = diag_ovrp(nsub, &hsub[0][0][0][0], &ovlp[0][0][0][0], eigsub); + nsub_cut = diag_ovrlp(nsub, &hsub[0][0][0][0], &ovrlp[0][0][0][0], eigsub); /**@brief
  • Update @f$\mu=(\mu+\mu_{\rm sub})/2@f$
  • */ @@ -604,7 +607,7 @@ private(idim,precon,ie) free_d_1d_allocate(dnorm); free_d_1d_allocate(eigsub); free_cd_4d_allocate(hsub); - free_cd_4d_allocate(ovlp); + free_cd_4d_allocate(ovrlp); free_cd_3d_allocate(hwxp); /**@brief
  • Output resulting vectors for restart
  • diff --git a/src/include/wrapperMPI.h b/src/include/wrapperMPI.h index 9bea50e78..51033958e 100644 --- a/src/include/wrapperMPI.h +++ b/src/include/wrapperMPI.h @@ -37,6 +37,9 @@ void SumMPI_cv(int nnorm, double complex *norm); unsigned long int SumMPI_li(unsigned long int idim); int SumMPI_i(int idim); unsigned long int BcastMPI_li(int root, unsigned long int idim); +int BcastMPI_i(int root, int nsub); +void BcastMPI_dv(int root, int nlen, double* vector); +void BcastMPI_cv(int root, int nlen, double complex* vector); double NormMPI_dc(unsigned long int idim, double complex *_v1); void NormMPI_dv(unsigned long int ndim, int nstate, double complex **_v1, double *dnorm); double complex VecProdMPI(long unsigned int ndim, double complex *v1, double complex *v2); diff --git a/src/wrapperMPI.c b/src/wrapperMPI.c index 2c54dfb3e..e5d68650f 100644 --- a/src/wrapperMPI.c +++ b/src/wrapperMPI.c @@ -313,6 +313,50 @@ unsigned long int BcastMPI_li( return(idim0); }/*unsigned long int BcastMPI_li*/ /** +@brief MPI wrapper function to broadcast integer across processes. +@return Broadcasted value across processes. +@author Mitsuaki Kawamura (The University of Tokyo) +*/ +int BcastMPI_i( + int root,//!<[in] The source process of the broadcast + int nsub//!<[in] Value to be broadcasted +) { + int nsub0; + nsub0 = nsub; +#ifdef MPI + MPI_Bcast(&nsub0, 1, MPI_INT, root, MPI_COMM_WORLD); +#endif + return(nsub0); +}/*int BcastMPI_i*/ +/** +@brief MPI wrapper function to broadcast double precision vector. + And store it in place. +@author Mitsuaki Kawamura (The University of Tokyo) +*/ +void BcastMPI_dv( + int root,//!<[in] The source process of the broadcast + int nlen,//!<[in] Length of broadcasted vector + double *vector//!<[inout] Broadcasted vector +) { +#ifdef MPI + MPI_Bcast(vector, nlen, MPI_DOUBLE_PRECISION, root, MPI_COMM_WORLD); +#endif +}/*void BcastMPI_dv*/ +/** +@brief MPI wrapper function to broadcast double complex vector. + And store it in place. +@author Mitsuaki Kawamura (The University of Tokyo) +*/ +void BcastMPI_cv( + int root,//!<[in] The source process of the broadcast + int nlen,//!<[in] Length of broadcasted vector + double complex* vector//!<[inout] Broadcasted vector +) { +#ifdef MPI + MPI_Bcast(vector, nlen, MPI_DOUBLE_COMPLEX, root, MPI_COMM_WORLD); +#endif +}/*void BcastMPI_cv*/ +/** @brief Compute norm of process-distributed vector @f$|{\bf v}_1|^2@f$ @return Norm @f$|{\bf v}_1|^2@f$