From 3f787eb25d13947ef3becc6cab8aaf6b3d1c516f Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Mon, 3 May 2021 17:42:44 -0700 Subject: [PATCH 01/45] declaring ddot in util.fh causes problems with cublas module Signed-off-by: Jeff Hammond --- src/ddscf/fast/potential.F | 2 ++ src/ddscf/riscf_trans.F | 3 +++ src/driver/opt_drv.F | 13 +++++++++++++ src/geom/geom.F | 6 ++++++ src/geom/geom_hnd.F | 6 +++++- src/hessian/analytic/dft/xc_d2expl.F | 2 ++ src/mcscf/detci/detci_dens.F | 8 +++++++- src/mp2_grad/mp2_back_transform.F | 4 ++++ src/mp2_grad/mp2_make_tuhf.F | 2 ++ src/mp2_grad/mp2_read_tiajb.F | 2 ++ src/nwdft/coulomb/dft_fitcd.F | 4 ++++ src/nwdft/coulomb/dft_fitvc.F | 2 ++ src/nwdft/dftgrad/dftg_cdfit.F | 2 ++ src/nwdft/dftgrad/dftg_gridv0b.F | 3 +++ src/nwdft/grid/grid_quadv0b.F | 3 +++ src/nwdft/so_dft/dft_scf_utils_so.F | 2 ++ src/nwdft/so_dft/sym_mo_adapt_so.F | 2 ++ src/nwdft/xc/xc_eval_fnl.F | 2 ++ src/nwdft/xc/xc_tabcd.F | 2 ++ src/nwpw/band/minimizer/c_bybminimize.F | 2 ++ src/nwpw/band/minimizer/c_bybminimize2.F | 2 ++ src/nwpw/nwpwlib/ion/ion.F | 2 ++ src/optim/mepgs/mepgs_drv.F | 14 ++++++++++++++ src/optim/neb/neb_drv.F | 2 ++ src/optim/string/string.F | 3 +++ src/optim/tropt/tropt_drv.F | 8 ++++++++ src/symmetry/sym_bs_ir_id.F | 3 +++ src/symmetry/sym_bs_irrep.F | 3 +++ src/symmetry/sym_mo_adapt.F | 2 ++ src/tce/ddotfile.F | 6 ++++++ src/tce/tce_diagnose_t1.F | 2 ++ src/tce/tce_mbpt2.F | 2 ++ src/tce/tce_residual_t1.F | 4 ++++ src/tce/tce_residual_t2.F | 6 ++++++ src/tce/tce_residual_t3.F | 4 ++++ src/tce/tce_residual_t3a.F | 2 ++ src/tce/tce_residual_t4.F | 4 ++++ src/util/util.fh | 2 -- src/vib/vib_eckart.F | 4 ++++ src/vib/vib_nmass.F | 4 ++++ src/vib/vib_tors.F | 2 ++ 41 files changed, 149 insertions(+), 4 deletions(-) diff --git a/src/ddscf/fast/potential.F b/src/ddscf/fast/potential.F index 62e597a57de..52ef519ac0d 100644 --- a/src/ddscf/fast/potential.F +++ b/src/ddscf/fast/potential.F @@ -20,6 +20,8 @@ double precision function potential(basis, g_dens, x, y, z) integer l_buf, l_scr integer k_buf, k_scr double precision pot, r(3) + double precision ddot + external ddot c r(1) = x r(2) = y diff --git a/src/ddscf/riscf_trans.F b/src/ddscf/riscf_trans.F index cc4e6536de0..6f5b03cc10e 100644 --- a/src/ddscf/riscf_trans.F +++ b/src/ddscf/riscf_trans.F @@ -41,6 +41,9 @@ subroutine riscf_trans_int (g_three, ao_basis, nsh, nbf, nsqhalf, integer nxtask external nxtask + double precision ddot + external ddot + nproc = ga_nnodes() if ( odisk ) then diff --git a/src/driver/opt_drv.F b/src/driver/opt_drv.F index c53d09578e7..4449c0b03aa 100644 --- a/src/driver/opt_drv.F +++ b/src/driver/opt_drv.F @@ -1424,6 +1424,8 @@ subroutine driver_hessian_update(geom,rtdb) integer istrss double precision strss1,strss2,dum1,dum2,dum3,dum4 logical redo_hessian + double precision ddot + external ddot ! This is a statement function ind(i,j) = k_hess + i + (j-1)*nvar - 1 @@ -1651,6 +1653,8 @@ subroutine driver_opt_search_dir(geom) logical ophigh logical geom_lattice_get external geom_lattice_get + double precision ddot + external ddot c ophigh = util_print('high', print_high) c @@ -2001,6 +2005,8 @@ subroutine driver_line_search1(rtdb,geom) double precision e0, e1, e2p, dsgrad double precision hess, a0, a1, a2 double precision driver_energy_step + double precision ddot + external ddot c dsgrad = ddot(nvar, ds, 1, g, 1) if (dsgrad*alpha .ge. 0d0) then @@ -2154,6 +2160,8 @@ subroutine driver_line_search2(rtdb,geom) integer i, j double precision walka(5), walke(5),dum logical success + double precision ddot + external ddot c dsgrad = ddot(nvar, ds, 1, g, 1) if (dsgrad*alpha .ge. 0d0) then @@ -2763,6 +2771,8 @@ subroutine driver_cart_pmat(rtdb, geom) logical omm logical task_qmmm logical opt_geom_cart_coords_get + double precision ddot + external ddot ind(i,j) = k_pmat + i-1 + (j-1)*ncart c c FRACTIONAL? @@ -3118,6 +3128,9 @@ subroutine driver_sad_search_dir(rtdb,geom,istep) c logical geom_print_zmatrix, omatchneg save saddir, evalp + + double precision ddot + external ddot c omatchneg = .true. ovtol = 0.7d0 diff --git a/src/geom/geom.F b/src/geom/geom.F index bc49298b44b..e0136f3df9a 100644 --- a/src/geom/geom.F +++ b/src/geom/geom.F @@ -555,6 +555,8 @@ logical function geom_rtdb_load(rtdb, geom, name) logical geom_check_handle, geom_rtdb_in, geom_get_user_scale external geom_check_handle, geom_rtdb_in, geom_get_user_scale logical getsym + double precision ddot + external ddot c geom_rtdb_load = geom_check_handle(geom, 'geom_rtdb_load') if (.not. geom_rtdb_load) return @@ -948,6 +950,8 @@ subroutine geom_compute_values(geom) logical geom_tag_to_element external geom_tag_to_element logical is_atom + double precision ddot + external ddot is_atom(i) = (.not. inp_compare(.false., 'bq', tags(i,geom)(1:2))) c e = 0.0d0 @@ -2697,6 +2701,8 @@ logical function geom_print(geom) $ geom_print_zmatrix, geom_any_finuc double precision deter3 external deter3 + double precision ddot + external ddot c if (.not. geom_check_handle(geom, 'geom_print')) then geom_print = .false. diff --git a/src/geom/geom_hnd.F b/src/geom/geom_hnd.F index 5da6d2171f6..89fb1e6b83f 100644 --- a/src/geom/geom_hnd.F +++ b/src/geom/geom_hnd.F @@ -4522,6 +4522,8 @@ SUBROUTINE HND_TFTR(H,F,Q,T,IA,M,N,NDIM) integer IA(1) integer ij,j,ik,max,i,k double precision small,zero,dum,qij,hij + double precision ddot + external ddot DATA SMALL /1.0D-11/ DATA ZERO /0.0D+00/ IJ = 0 @@ -5387,7 +5389,9 @@ double precision function out_of_plane_angle(a,b,c) c containing a and b. The sign is determined in a left-handed c sense ... so that if a=x, b=y then for c=+z angle=+90. c - double precision ddot, d(3), abc, theta + double precision d(3), abc, theta + double precision ddot + external ddot c call cross_product(a,b,d) abc = ddot(3,c,1,d,1)/sqrt( diff --git a/src/hessian/analytic/dft/xc_d2expl.F b/src/hessian/analytic/dft/xc_d2expl.F index cfb4f8c4d86..ca881c8e744 100644 --- a/src/hessian/analytic/dft/xc_d2expl.F +++ b/src/hessian/analytic/dft/xc_d2expl.F @@ -186,6 +186,8 @@ Subroutine xc_d2expl(tol_rho, scr, double precision duefac double precision dabsmax external dabsmax + double precision ddot + external ddot c c d2Exc / d2fxc dp(i) dp(j) / dfxc d2p(i) c ----- = sum sum | ----------- ----- ----- + sum | ----- ------ diff --git a/src/mcscf/detci/detci_dens.F b/src/mcscf/detci/detci_dens.F index e01f5e34e49..3a78e019ec9 100644 --- a/src/mcscf/detci/detci_dens.F +++ b/src/mcscf/detci/detci_dens.F @@ -33,13 +33,15 @@ subroutine detci_onepdm( norb, nsym, nela, nelb, nstra, nstrb, integer g_dtmp integer ii, jj, iex, ph double precision xx + double precision ddot + external ddot c c c myid = ga_nodeid() *ga:1:0 if (.not.(ga_create(MT_DBL, norb, norb, 'd', norb, 0, g_dtmp ))) - $ call errquit('detci_twopdm: cannot create global',0, GA_ERR) + $ call errquit('detci_onepdm: cannot create global',0, GA_ERR) call ga_zero(g_dtmp) call ga_distribution( g_civec, myid, rlo, rhi, cilo, cihi ) if (((cilo.ne.0).and.(cihi.ne.-1)).and. @@ -162,6 +164,8 @@ subroutine detci_twopdm( norb, nsym, nela, nelb, nstra, nstrb, integer l_t, k_t, l_s, k_s, lds ** integer g_dentmp integer myid, nn, rlo, rhi, cilo, cihi, dilo, dihi + double precision ddot + external ddot c c myid = ga_nodeid() @@ -425,6 +429,8 @@ subroutine detci_twopdm_ab( norb, nsym, nela, nelb, nstra, nstrb, double precision tx integer nxtask external nxtask + double precision ddot + external ddot c c c Initialize parallel stuff diff --git a/src/mp2_grad/mp2_back_transform.F b/src/mp2_grad/mp2_back_transform.F index da6f683f003..77d6cd21574 100644 --- a/src/mp2_grad/mp2_back_transform.F +++ b/src/mp2_grad/mp2_back_transform.F @@ -808,6 +808,8 @@ subroutine mp2_nonsep( $ ylo_cur, yhi_cur, udim_cur, vdim_cur, xdim_cur, ydim_cur logical status, odebug, odoit, sym_shell_quartet, oenergy external sym_shell_quartet + double precision ddot + external ddot c odebug = util_print('mp2_backt', print_debug) oenergy = util_print('backtenergy', print_debug) @@ -1122,6 +1124,8 @@ subroutine mp2_copyback0(dowork,g_a,g_a_trans, integer t_ilo, t_ihi, t_jlo, t_jhi integer nsegs,iseg_in double precision tr,tr_tr + double precision ddot + external ddot c call ga_distribution(g_a_trans, ga_nodeid(), T t_ilo, t_ihi, t_jlo, t_jhi) diff --git a/src/mp2_grad/mp2_make_tuhf.F b/src/mp2_grad/mp2_make_tuhf.F index aa4931d4564..3ed99890a59 100644 --- a/src/mp2_grad/mp2_make_tuhf.F +++ b/src/mp2_grad/mp2_make_tuhf.F @@ -51,6 +51,8 @@ subroutine mp2_make_tuhf(nbf,noa_lo,noa_hi,nva_lo,nva_hi, logical otdebug c integer l_ia_uv, k_ia_uv, l_tmp, k_tmp, l_ia_jb, k_ia_jb + double precision ddot + external ddot c #include "bitops.fh" c diff --git a/src/mp2_grad/mp2_read_tiajb.F b/src/mp2_grad/mp2_read_tiajb.F index bb3ca7246c8..3a96f3486cb 100644 --- a/src/mp2_grad/mp2_read_tiajb.F +++ b/src/mp2_grad/mp2_read_tiajb.F @@ -12,6 +12,8 @@ subroutine mp2_read_tijab(nv_lo, nv_hi, irs, symia, integer tunit double precision tunitptr double precision t(*) + double precision ddot + external ddot c c Read t(j,b,i,a) all j, b for given i, a taking into c account symmetry blocking diff --git a/src/nwdft/coulomb/dft_fitcd.F b/src/nwdft/coulomb/dft_fitcd.F index 6ceb7b43c7e..cd6889272f4 100644 --- a/src/nwdft/coulomb/dft_fitcd.F +++ b/src/nwdft/coulomb/dft_fitcd.F @@ -59,6 +59,8 @@ Subroutine dft_fitcd(nfit,CD_coef, i3c_ERI, Ecoul1, integer LU,ierr,ilo,ihi,jlo,jhi,nnii integer adrc,ldc,iptr,intdum character*255 errmsg + double precision ddot + external ddot c c Fit electronic charge density. The fitting coefficients are obtained by @@ -383,6 +385,8 @@ Subroutine mull_pop_fit(basis, natoms, nshells, nbf_cd, double precision cd_coef(nbf_cd), cgtf(nbf_cd) character*1 shell_labels(nshells), ang_mom_label(11) logical oprint_mull_fit + double precision ddot + external ddot c #include "bas.fh" #include "mafdecls.fh" diff --git a/src/nwdft/coulomb/dft_fitvc.F b/src/nwdft/coulomb/dft_fitvc.F index f2d03ca1982..12d52bdbee6 100644 --- a/src/nwdft/coulomb/dft_fitvc.F +++ b/src/nwdft/coulomb/dft_fitvc.F @@ -57,6 +57,8 @@ Subroutine dft_fitvc(CD_coef, i3c_ERI, Ecoul2, g_vc, integer k_at,l_at,atom_c_in,atom_d_in logical v_nonzero external nxtask + double precision ddot + external ddot c if(dermat) call errquit( C 'fitvc: dermat not coded yet',0,0) diff --git a/src/nwdft/dftgrad/dftg_cdfit.F b/src/nwdft/dftgrad/dftg_cdfit.F index ed52e51aa30..93adc88e375 100644 --- a/src/nwdft/dftgrad/dftg_cdfit.F +++ b/src/nwdft/dftgrad/dftg_cdfit.F @@ -120,6 +120,8 @@ Subroutine dftg_cdfit_gen(geom, AO_bas_han, CD_bas_han, c double precision dabsmax external nxtask,schwarz_shell,dabsmax + double precision ddot + external ddot nproc = ga_nnodes() me = ga_nodeid() c diff --git a/src/nwdft/dftgrad/dftg_gridv0b.F b/src/nwdft/dftgrad/dftg_gridv0b.F index 2bc6394e83c..6425a6f70ff 100644 --- a/src/nwdft/dftgrad/dftg_gridv0b.F +++ b/src/nwdft/dftgrad/dftg_gridv0b.F @@ -158,6 +158,9 @@ subroutine dftg_gridv0b(nqpts,rad,ictr_buf,iga_dens, c dVxc*P contribution. logical do_tddftvxc logical ldew2 ! Prevent weighting of derivative matrices + + double precision ddot + external ddot c c Evaluate the AO basis set at each of the quad. points. c allocate arrays for exponents and contraction coefficients diff --git a/src/nwdft/grid/grid_quadv0b.F b/src/nwdft/grid/grid_quadv0b.F index 5583a2e961e..a1037354f5c 100644 --- a/src/nwdft/grid/grid_quadv0b.F +++ b/src/nwdft/grid/grid_quadv0b.F @@ -101,6 +101,9 @@ subroutine grid_quadv0b( double precision StericEnergy_qm, StericEnergy_fde double precision StericEnergy_tot + double precision ddot + external ddot + pname = 'grid_quadv0b: ' mbf_fde = 0 ! npol = 0 diff --git a/src/nwdft/so_dft/dft_scf_utils_so.F b/src/nwdft/so_dft/dft_scf_utils_so.F index 4c9ad6ff8c8..0dad21f99f4 100644 --- a/src/nwdft/so_dft/dft_scf_utils_so.F +++ b/src/nwdft/so_dft/dft_scf_utils_so.F @@ -85,6 +85,8 @@ subroutine diag_fock(nbf_mo,ia,g_fockso,ibuff,g_moso,iwork,irwork, integer info c integer i,j,i1 + double precision ddot + external ddot c c Prepare arrays for diagonalization c diff --git a/src/nwdft/so_dft/sym_mo_adapt_so.F b/src/nwdft/so_dft/sym_mo_adapt_so.F index eea60379d26..eec941ac15e 100644 --- a/src/nwdft/so_dft/sym_mo_adapt_so.F +++ b/src/nwdft/so_dft/sym_mo_adapt_so.F @@ -62,6 +62,8 @@ subroutine sym_movecs_adapt_so(basis, thresh, g_vecs, irs, nmixed) integer l_u,k_u logical sym_char_table_so external sym_char_table_so + double precision ddot + external ddot c logical odebug ! True if debugging logical owarn ! True if to print warning messages diff --git a/src/nwdft/xc/xc_eval_fnl.F b/src/nwdft/xc/xc_eval_fnl.F index ba491d83902..b81915a0b13 100644 --- a/src/nwdft/xc/xc_eval_fnl.F +++ b/src/nwdft/xc/xc_eval_fnl.F @@ -98,6 +98,8 @@ Subroutine xc_eval_fnl(rho, delrho, Amat, Amat2, Cmat, Cmat2, double precision eps,dumd integer nx,nc,dumi parameter (eps=1.e-8) + double precision ddot + external ddot c c Initialize the XC potential and energy sampling matrices. c diff --git a/src/nwdft/xc/xc_tabcd.F b/src/nwdft/xc/xc_tabcd.F index c457761ec28..aae03ad0688 100644 --- a/src/nwdft/xc/xc_tabcd.F +++ b/src/nwdft/xc/xc_tabcd.F @@ -209,6 +209,8 @@ Subroutine xc_tabcd(what,l3d_dum, data nbhandl2 /0./ save nbhandl1 save nbhandl2 + double precision ddot + external ddot c c 0: l3d=.f. & n3d=1 ccc rhs: l3d=.true. & n3d=3 diff --git a/src/nwpw/band/minimizer/c_bybminimize.F b/src/nwpw/band/minimizer/c_bybminimize.F index 21b60b2c8c0..4f085641d70 100644 --- a/src/nwpw/band/minimizer/c_bybminimize.F +++ b/src/nwpw/band/minimizer/c_bybminimize.F @@ -86,6 +86,8 @@ subroutine c_bybminimize(E,deltae,deltac,current_iteration, external cpsi_1_noupdate_energy,cpsi_eigenvalue,ion_disp_energy logical ion_disp_on external ion_disp_on + double precision ddot + external ddot Ein = E(1) diff --git a/src/nwpw/band/minimizer/c_bybminimize2.F b/src/nwpw/band/minimizer/c_bybminimize2.F index 0b21cf07a97..190d6b6c69a 100644 --- a/src/nwpw/band/minimizer/c_bybminimize2.F +++ b/src/nwpw/band/minimizer/c_bybminimize2.F @@ -106,6 +106,8 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, external control_ks_algorithm integer control_ks_maxit_orb,control_ks_maxit_orbs external control_ks_maxit_orb,control_ks_maxit_orbs + double precision ddot + external ddot Ein = E(1) call Parallel_taskid(taskid) diff --git a/src/nwpw/nwpwlib/ion/ion.F b/src/nwpw/nwpwlib/ion/ion.F index c8d93364850..00ed875e88f 100644 --- a/src/nwpw/nwpwlib/ion/ion.F +++ b/src/nwpw/nwpwlib/ion/ion.F @@ -4797,6 +4797,8 @@ subroutine ion_eckart(oprint,NAT,NAT3,removerotation, double precision test_norm integer i, j, k, l, m, n, mu, nu, indx, iatom, iaxis, itemp integer nhess, nhesst,IMAX + double precision ddot + external ddot C**** nhess = nat3*nat3 nhesst = nat3*(nat3+1)/2 ! dimension of lower triangular hessian diff --git a/src/optim/mepgs/mepgs_drv.F b/src/optim/mepgs/mepgs_drv.F index 4e5f71883ac..8cd5e12b5f9 100644 --- a/src/optim/mepgs/mepgs_drv.F +++ b/src/optim/mepgs/mepgs_drv.F @@ -430,6 +430,8 @@ subroutine mepgs_cent(rtdb, geom, geoma, pgref, sfactor, string) logical gsopt_geom_cart_coords_get logical gsopt_geom_cart_coords_set logical ophigh + double precision ddot + external ddot c ophigh = util_print('high', print_high) CCCCCCCCCCCCCCCCCCCCCC @@ -511,6 +513,8 @@ double precision function mepgs_cosang(avec,bvec,angle) double precision avec(nvar), bvec(nvar) c double precision ctheta, factor(2) + double precision ddot + external ddot c mepgs_cosang = 0.0 c @@ -555,6 +559,8 @@ subroutine mepgs_hessian_update() double precision dsds, dshds, dsdg integer l_hess, k_hess, i, j integer ind + double precision ddot + external ddot ind(i,j) = k_hess + i + (j-1)*nvar - 1 c if (.not. ma_push_get(mt_dbl, nvar**2, 'hess', @@ -2188,6 +2194,8 @@ subroutine gsopt_hessian_update() double precision dsds, dshds, dsdg integer l_hess, k_hess, i, j integer ind + double precision ddot + external ddot ind(i,j) = k_hess + i + (j-1)*nvar - 1 c if (alpha .eq. 0d0) call errquit @@ -2462,6 +2470,8 @@ subroutine gsopt_cart_pmat(rtdb, geom) integer ind logical task_qmmm logical gsopt_geom_cart_coords_get + double precision ddot + external ddot ind(i,j) = k_pmat + i-1 + (j-1)*ncart c c FRACTIONAL? @@ -2656,6 +2666,8 @@ subroutine gsopt_compute_info() double precision desphere(max_nvar) double precision zeta(max_nvar) double precision norm + double precision ddot + external ddot c c Compute stuff used for printing and convergence tests c @@ -3058,6 +3070,8 @@ subroutine gsopt_pickstp(rtdb, geom, istep) double precision trustds ! restriction of step in opt. variable logical ophigh logical gsopt_geom_cart_coords_get + double precision ddot + external ddot c c get the hessian and gradient with appropriate projectors c applied following peng, ayala, schlegel and frisch so that diff --git a/src/optim/neb/neb_drv.F b/src/optim/neb/neb_drv.F index c6a2f578630..e69f709c609 100644 --- a/src/optim/neb/neb_drv.F +++ b/src/optim/neb/neb_drv.F @@ -52,6 +52,8 @@ logical function neb(rtdb) external energy_bead_list character*7 bead_index_name external bead_index_name + double precision ddot + external ddot oprint = ga_nodeid() .eq. 0 diff --git a/src/optim/string/string.F b/src/optim/string/string.F index e45ff408343..d17f976a0c4 100644 --- a/src/optim/string/string.F +++ b/src/optim/string/string.F @@ -467,6 +467,9 @@ subroutine zts_meps(maxit,nbeads,tol,stepsize,string_algorithm, character*7 bead_index_name external bead_index_name + double precision ddot + external ddot + ! Setup the problem ! "Secret" options with defaults that generally do not need changed if (.not. rtdb_get(rtdb, 'string:linopt', mt_log,1,linopt)) diff --git a/src/optim/tropt/tropt_drv.F b/src/optim/tropt/tropt_drv.F index 332d50dc157..b369854d6d8 100644 --- a/src/optim/tropt/tropt_drv.F +++ b/src/optim/tropt/tropt_drv.F @@ -1392,6 +1392,8 @@ subroutine tropt_hessian_update() double precision dsds, dshds, dsdg integer l_hess, k_hess, i, j integer ind + double precision ddot + external ddot ind(i,j) = k_hess + i + (j-1)*nvar - 1 c if (alpha .eq. 0d0) call errquit @@ -1781,6 +1783,8 @@ subroutine tropt_cart_pmat(rtdb, geom) integer ind logical task_qmmm logical tropt_geom_cart_coords_get + double precision ddot + external ddot ind(i,j) = k_pmat + i-1 + (j-1)*ncart c c FRACTIONAL? @@ -2515,6 +2519,8 @@ subroutine tropt_pickstp(rtdb,geom,istep) double precision trustds ! restriction of step in opt. variable logical geom_print_zmatrix logical ophigh + double precision ddot + external ddot c c get the hessian and gradient with appropriate projectors c applied following peng, ayala, schlegel and frisch so that @@ -2929,6 +2935,8 @@ subroutine tropt_etaylor(epredict) integer l_hess, k_hess double precision epredict double precision gamma(nvar) + double precision ddot + external ddot ! ! *** calculate predicted energy change *** ! diff --git a/src/symmetry/sym_bs_ir_id.F b/src/symmetry/sym_bs_ir_id.F index 0c86e6c8add..917ee9b021f 100644 --- a/src/symmetry/sym_bs_ir_id.F +++ b/src/symmetry/sym_bs_ir_id.F @@ -47,6 +47,9 @@ subroutine sym_bas_irrep_id(basis, oprint, mbf, njr, integer ncent_unique integer centlist(100), jc +c + double precision ddot + external ddot c c Get basis and geom info c diff --git a/src/symmetry/sym_bs_irrep.F b/src/symmetry/sym_bs_irrep.F index c89c695fae7..f2ad6f3c8ba 100644 --- a/src/symmetry/sym_bs_irrep.F +++ b/src/symmetry/sym_bs_irrep.F @@ -40,6 +40,9 @@ subroutine sym_bas_irreps(basis, oprint, nbf_per_ir) c double precision sym_trace_bas_op external sym_trace_bas_op +c + double precision ddot + external ddot c c Get basis and geom info c diff --git a/src/symmetry/sym_mo_adapt.F b/src/symmetry/sym_mo_adapt.F index 8c06df96f55..efd6357d31e 100644 --- a/src/symmetry/sym_mo_adapt.F +++ b/src/symmetry/sym_mo_adapt.F @@ -57,6 +57,8 @@ subroutine sym_movecs_adapt(basis, thresh, g_vecs, irs, nmixed) double precision v(maxireps), vnorm integer idamax external idamax + double precision ddot + external ddot c logical odebug ! True if debugging logical owarn ! True if to print warning messages diff --git a/src/tce/ddotfile.F b/src/tce/ddotfile.F index 40a0428e9b9..2da5f52a19f 100644 --- a/src/tce/ddotfile.F +++ b/src/tce/ddotfile.F @@ -29,6 +29,8 @@ double precision function ddotfile(d_1,d_2,size) EXTERNAL NXTASK cc external nxtask logical noloadbalance + double precision ddot + external ddot c c new c @@ -143,6 +145,8 @@ double precision function ddotfile_1(d_1,d_2,size) INTEGER NXTASK EXTERNAL NXTASK logical noloadbalance + double precision ddot + external ddot c c new c @@ -267,6 +271,8 @@ double precision function ddotfile_2(d_1,d_2,size) INTEGER NXTASK EXTERNAL NXTASK logical noloadbalance + double precision ddot + external ddot c c new c diff --git a/src/tce/tce_diagnose_t1.F b/src/tce/tce_diagnose_t1.F index 15c4c0c23f0..b4444cdb989 100644 --- a/src/tce/tce_diagnose_t1.F +++ b/src/tce/tce_diagnose_t1.F @@ -21,6 +21,8 @@ subroutine tce_diagnose_t1(d_r1,k_r1_offset,residual) INTEGER NXTASK EXTERNAL NXTASK logical nodezero + double precision ddot + external ddot c c ===================== c Zero scratch residual diff --git a/src/tce/tce_mbpt2.F b/src/tce/tce_mbpt2.F index 9ff749dd271..fbf1e8eceed 100644 --- a/src/tce/tce_mbpt2.F +++ b/src/tce/tce_mbpt2.F @@ -35,6 +35,8 @@ subroutine tce_mbpt2(d_mo2e,k_2e_offset, double precision cpu double precision wall logical nodezero + double precision ddot + external ddot c nodezero=(ga_nodeid().eq.0) cpu=-util_cpusec() diff --git a/src/tce/tce_residual_t1.F b/src/tce/tce_residual_t1.F index a6ccae7ff1c..2b42a9835af 100644 --- a/src/tce/tce_residual_t1.F +++ b/src/tce/tce_residual_t1.F @@ -27,6 +27,8 @@ subroutine tce_residual_t1(d_r1,k_r1_offset,residual) INTEGER NXTASK EXTERNAL NXTASK logical nodezero + double precision ddot + external ddot c c new c @@ -130,6 +132,8 @@ subroutine tce_residual_tr1(d_r1,k_r1_offset,residual) INTEGER NXTASK EXTERNAL NXTASK logical nodezero + double precision ddot + external ddot c c new c diff --git a/src/tce/tce_residual_t2.F b/src/tce/tce_residual_t2.F index 938f386dd5b..0c105123c4f 100644 --- a/src/tce/tce_residual_t2.F +++ b/src/tce/tce_residual_t2.F @@ -30,6 +30,8 @@ subroutine tce_residual_t2(d_r2,k_r2_offset,residual) INTEGER NXTASK EXTERNAL NXTASK logical nodezero + double precision ddot + external ddot c c new c @@ -139,6 +141,8 @@ subroutine tce_residual_tr2(d_r2,k_r2_offset,residual) INTEGER NXTASK EXTERNAL NXTASK logical nodezero + double precision ddot + external ddot c c new c @@ -252,6 +256,8 @@ subroutine tce_residual_t2a(d_r2,k_r2_offset,residual) INTEGER NXTASK EXTERNAL NXTASK logical nodezero + double precision ddot + external ddot c c ===================== c Zero scratch residual diff --git a/src/tce/tce_residual_t3.F b/src/tce/tce_residual_t3.F index 34c671092cc..5c9d2389587 100644 --- a/src/tce/tce_residual_t3.F +++ b/src/tce/tce_residual_t3.F @@ -30,6 +30,8 @@ subroutine tce_residual_t3(d_r3,k_r3_offset,residual) integer nxtask external nxtask logical nodezero + double precision ddot + external ddot c c new c @@ -173,6 +175,8 @@ subroutine tce_residual_tr3(d_r3,k_r3_offset,residual) integer nxtask external nxtask logical nodezero + double precision ddot + external ddot c c new c diff --git a/src/tce/tce_residual_t3a.F b/src/tce/tce_residual_t3a.F index 7a32c4728dd..0353ca44a58 100644 --- a/src/tce/tce_residual_t3a.F +++ b/src/tce/tce_residual_t3a.F @@ -31,6 +31,8 @@ subroutine tce_residual_t3a(d_r3,k_r3_offset,residual) external nxtask logical nodezero logical acolo + double precision ddot + external ddot c c new c diff --git a/src/tce/tce_residual_t4.F b/src/tce/tce_residual_t4.F index 090fb7cd38c..86413aea57d 100644 --- a/src/tce/tce_residual_t4.F +++ b/src/tce/tce_residual_t4.F @@ -32,6 +32,8 @@ subroutine tce_residual_t4(d_r4,k_r4_offset,residual) integer nxtask external nxtask logical nodezero + double precision ddot + external ddot c c new c @@ -208,6 +210,8 @@ subroutine tce_residual_tr4(d_r4,k_r4_offset,residual) integer nxtask external nxtask logical nodezero + double precision ddot + external ddot c c new c diff --git a/src/util/util.fh b/src/util/util.fh index 6c01182d3ec..95d91b1de95 100644 --- a/src/util/util.fh +++ b/src/util/util.fh @@ -2,7 +2,6 @@ c C$Id$ logical util_print - double precision ddot double precision util_cpusec double precision util_wallsec double precision util_random @@ -12,7 +11,6 @@ C$Id$ logical util_nwchemrc_get logical util_module_avail external util_print - external ddot external util_cpusec external util_wallsec external util_random diff --git a/src/vib/vib_eckart.F b/src/vib/vib_eckart.F index 1d316b3ab36..42b431b632f 100644 --- a/src/vib/vib_eckart.F +++ b/src/vib/vib_eckart.F @@ -22,6 +22,8 @@ SUBROUTINE vib_eckart( HESS, HESSP, HESST, COORD, VC , DOUBLE PRECISION UNIVEC(3), TEST(6,6), VNORM, temp, dotval, rnorm double precision test_norm integer i, j, k, l, m, n, mu, nu, indx, iatom, iaxis, itemp + double precision ddot + external ddot C**** C**** construct translation unit vectors; these are stored in the C**** first three columns of array VC, the rotation vectors will @@ -200,6 +202,8 @@ SUBROUTINE vib_eckart_trans( HESS, HESSP, HESST, COORD, VC , DOUBLE PRECISION UNIVEC(3), TEST(3,3), VNORM, temp, dotval, rnorm double precision test_norm integer i, j, k, l, m, n, mu, nu, indx, iatom, iaxis, itemp + double precision ddot + external ddot C**** C**** construct translation unit vectors; these are stored in the C**** first three columns of array VC, the rotation vectors will diff --git a/src/vib/vib_nmass.F b/src/vib/vib_nmass.F index ea81a2d2b5c..fed5956d4d8 100644 --- a/src/vib/vib_nmass.F +++ b/src/vib/vib_nmass.F @@ -39,6 +39,8 @@ subroutine vib_vecnormal(vectors,nvec) c double precision vnorm integer col + double precision ddot + external ddot do col = 1,nvec vnorm = ddot(nvec,vectors(1,col),1,vectors(1,col),1) vnorm = sqrt(1.0d00/vnorm) @@ -60,6 +62,8 @@ subroutine vib_vecphase(coord,vectors,nvec) c double precision vnorm integer col + double precision ddot + external ddot do col = 1,nvec vnorm = ddot(nvec,coord,1,vectors(1,col),1) if (vnorm.lt.0.0d0) then diff --git a/src/vib/vib_tors.F b/src/vib/vib_tors.F index cbacf329fc8..d0e4cb9fb2c 100644 --- a/src/vib/vib_tors.F +++ b/src/vib/vib_tors.F @@ -32,6 +32,8 @@ SUBROUTINE vib_TORS(EQVAL,NOINT,I,J,K,L,C,B,NDIM) double precision dotpj, dotpk, sinpj, sinpk double precision smi, smj, sml, sense, f1, f2, dot integer m, nocol1, nocol2, nocol3, nocol4 + double precision ddot + external ddot C C C From a367b13b5aed4722faaf0d768b60015f90c6640e Mon Sep 17 00:00:00 2001 From: edoapra Date: Tue, 4 May 2021 10:26:37 -0700 Subject: [PATCH 02/45] ddot declaration --- src/util/ga_mix.F | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/util/ga_mix.F b/src/util/ga_mix.F index 58cd548332b..7e29313ff68 100644 --- a/src/util/ga_mix.F +++ b/src/util/ga_mix.F @@ -5,6 +5,8 @@ subroutine ga_mix(g_a, n, nvec, b, ld) #include "global.fh" #include "mafdecls.fh" #include "util.fh" + external ddot + double precision ddot integer g_a integer n, nvec, ld double precision b(ld,nvec) From 90414b378c3697ea97ab21688e14203943ac87dd Mon Sep 17 00:00:00 2001 From: edoapra Date: Wed, 5 May 2021 10:07:16 -0700 Subject: [PATCH 03/45] use curl when available --- src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh b/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh index 84e78e67fcd..45cf56a1951 100755 --- a/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh +++ b/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh @@ -1,10 +1,17 @@ #!/usr/bin/env bash rm -f dftd3.f nwpwxc_vdw3a.F -if [[ -f "dftd3.tgz" ]]; then - echo "using existing" dftd3.tgz +URL="https://www.chemie.uni-bonn.de/pctc/mulliken-center/software/dft-d3/" +TGZ=dftd3.tgz +if [[ -f "$TGZ" ]]; then + echo "using existing" "$TGZ" else - echo "downloading" dftd3.tgz - wget https://www.chemie.uni-bonn.de/pctc/mulliken-center/software/dft-d3/dftd3.tgz + echo "downloading" "$TGZ" + CURL_YES=`curl -O 2>&1 | head -1 | awk ' /URL/ {print "Y";exit};{print "N"}'` + if [ $CURL_YES = "Y" ]; then + curl -L "$URL"/"$TGZ" -o "$TGZ" + else + wget "$URL"/"$TGZ" + fi fi tar xzf dftd3.tgz dftd3.f mv dftd3.f nwpwxc_vdw3a.F From ff287e202bca309e49af1a640a3053c81c200d6b Mon Sep 17 00:00:00 2001 From: edoapra Date: Wed, 5 May 2021 12:16:55 -0700 Subject: [PATCH 04/45] silence compiler warnings --- src/nwdft/xc/xc_cr2scan.F | 5 +++++ src/nwdft/xc/xc_cr2scanl.F | 19 +++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/nwdft/xc/xc_cr2scan.F b/src/nwdft/xc/xc_cr2scan.F index 88f8b06ec90..02112b67052 100644 --- a/src/nwdft/xc/xc_cr2scan.F +++ b/src/nwdft/xc/xc_cr2scan.F @@ -160,6 +160,11 @@ Subroutine xc_cr2scan(tol_rho, cfac, rho, delrho, Amat, ds = 1d0 dx = 1d0 gc = 1d0 + zeta=0d0 + opz=1d0 + omz=1d0 + opz23=1d0 + omz23=1d0 else zeta = (rho(n,2) - rho(n,3))/ntot if (zeta.lt.-1d0) zeta=-1d0 diff --git a/src/nwdft/xc/xc_cr2scanl.F b/src/nwdft/xc/xc_cr2scanl.F index 2e174c9746f..8f8f3892ccb 100644 --- a/src/nwdft/xc/xc_cr2scanl.F +++ b/src/nwdft/xc/xc_cr2scanl.F @@ -162,6 +162,12 @@ Subroutine xc_cr2scanl(tol_rho, cfac, rho, delrho, laprho, Amat, & (delrho(n,3,1)+delrho(n,3,2))**2 end if c + dtdnb = 0d0 + dtdgb = 0d0 + dtdlb = 0d0 + dtdna = 0d0 + dtdga = 0d0 + dtdla = 0d0 if (ipol.eq.1) then pa = dn2/(4d0*ckf2*n83) qa = laprho(n,1)/(4d0*ckf2*n53) @@ -192,10 +198,6 @@ Subroutine xc_cr2scanl(tol_rho, cfac, rho, delrho, laprho, Amat, & F53*dfsdqa*qa/rho(n,2)) dtdga = tuega*dfsdpa/(ckf2*(2d0*rho(n,2))**F83) dtdla = tuega*dfsdqa/(2d0*ckf2*(2d0*rho(n,2))**F53) - else - dtdna = 0d0 - dtdga = 0d0 - dtdla = 0d0 endif if (rho(n,3).gt.tol_rho) then pb = (delrho(n,1,2)**2 + @@ -207,10 +209,6 @@ Subroutine xc_cr2scanl(tol_rho, cfac, rho, delrho, laprho, Amat, & F53*dfsdqb*qb/rho(n,3)) dtdgb = tuegb*dfsdpb/(ckf2*(2d0*rho(n,3))**F83) dtdlb = tuegb*dfsdqb/(2d0*ckf2*(2d0*rho(n,3))**F53) - else - dtdnb = 0d0 - dtdgb = 0d0 - dtdlb = 0d0 endif endif @@ -229,6 +227,11 @@ Subroutine xc_cr2scanl(tol_rho, cfac, rho, delrho, laprho, Amat, ds = 1d0 dx = 1d0 gc = 1d0 + zeta=0d0 + opz=1d0 + omz=1d0 + opz23=1d0 + omz23=1d0 else zeta = (rho(n,2) - rho(n,3))/ntot if (zeta.lt.-1d0) zeta=-1d0 From ac743fa11fb9cd5b71d28be51d2746069db98d98 Mon Sep 17 00:00:00 2001 From: edoapra Date: Wed, 5 May 2021 17:34:52 -0700 Subject: [PATCH 05/45] added 5 download attempts --- src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh b/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh index 45cf56a1951..e27626d73c4 100755 --- a/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh +++ b/src/nwpw/nwpwlib/nwpwxc/build_dftd3a.sh @@ -7,12 +7,18 @@ if [[ -f "$TGZ" ]]; then else echo "downloading" "$TGZ" CURL_YES=`curl -O 2>&1 | head -1 | awk ' /URL/ {print "Y";exit};{print "N"}'` - if [ $CURL_YES = "Y" ]; then - curl -L "$URL"/"$TGZ" -o "$TGZ" + tries=0 ; until [ "$tries" -ge 5 ] ; do + if [ $CURL_YES = "Y" ]; then + curl -L "$URL"/"$TGZ" -o "$TGZ" && break else - wget "$URL"/"$TGZ" + wget "$URL"/"$TGZ" && break fi -fi + tries=$((tries+1)) ; echo attempt no. $tries ; sleep 5 ; done +fi +if [[ ! -f "$TGZ" ]]; then + echo "download failed" + exit 1 +fi tar xzf dftd3.tgz dftd3.f mv dftd3.f nwpwxc_vdw3a.F patch -p0 < nwpwxc_vdw3a.patch From a21982ca65871e8bce37d7ec0f0108f9faff3156 Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 6 May 2021 10:37:29 -0700 Subject: [PATCH 06/45] increased buffer size to 1024 https://github.com/conda-forge/staged-recipes/pull/14725#commitcomment-50480065 --- src/config/depend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/config/depend.c b/src/config/depend.c index 38d07115730..32efadcb7b6 100644 --- a/src/config/depend.c +++ b/src/config/depend.c @@ -8,6 +8,7 @@ #include #include #include +#define MAXBUF 1024 extern char *strdup(const char *); char *makefile; @@ -15,7 +16,7 @@ char backup[] = "makefile.bak"; void copy_truncate_makefile(const char *backup) { - char buf[8192]; + char buf[32*MAXBUF]; FILE *in; int i, j, ninbuf; char line[] = @@ -121,7 +122,6 @@ void skip_white_space(FILE *file) char *include_directive(FILE *file) { -#define MAXBUF 256 char tmp[MAXBUF]; int n = 0; int i; @@ -330,7 +330,7 @@ Original code: while (nincfile--) { char *incname = incfiles[nincfile]; - char path[256]; + char path[MAXBUF]; for (i=0; i Date: Thu, 6 May 2021 14:04:51 -0700 Subject: [PATCH 07/45] Updated control_Ep and control_Sp to avoid using rtdb on the fly,,.EJB --- src/nwpw/nwpwlib/control/control.F | 37 +++++++---------------------- src/nwpw/nwpwlib/control/control.fh | 4 ++-- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/src/nwpw/nwpwlib/control/control.F b/src/nwpw/nwpwlib/control/control.F index 5785f1840a4..4c392110865 100644 --- a/src/nwpw/nwpwlib/control/control.F +++ b/src/nwpw/nwpwlib/control/control.F @@ -1233,6 +1233,12 @@ logical function control_read(code_in,rtdb) if (.not.btdb_get(rtdb,'nwpw:attenuation',mt_dbl,1,attenuation)) > attenuation = 0.5d0 +* **** set preconditioning parameters Ep,Sp **** + if (.not.btdb_get(rtdb,'nwpw:Eprecondition',mt_dbl,1,Ep)) + > Ep = 20.0d0 + if (.not.btdb_get(rtdb,'nwpw:Sprecondition',mt_dbl,1,Sp)) + > Sp = 200.0d0 + * **** set out of time variables **** est_step_time = -1 est_finish_time = -1 @@ -4178,7 +4184,6 @@ subroutine control_mullikenparameters(atom,rcut,lmbda) - * *************************** * * * * * control_Ep * @@ -4187,20 +4192,7 @@ subroutine control_mullikenparameters(atom,rcut,lmbda) real*8 function control_Ep() implicit none -#include "bafdecls.fh" -#include "btdb.fh" - -* **** control_rtdb common block **** - integer rtdb - common / control_rtdb1 / rtdb - - real*8 Ep - - if (.not.btdb_get(rtdb,'nwpw:Eprecondition', - > mt_dbl,1,Ep)) - > then - Ep = 20.0d0 - end if +#include "control.fh" control_Ep = Ep return @@ -4216,20 +4208,7 @@ real*8 function control_Ep() real*8 function control_Sp() implicit none -#include "bafdecls.fh" -#include "btdb.fh" - -* **** control_rtdb common block **** - integer rtdb - common / control_rtdb1 / rtdb - - real*8 Sp - - if (.not.btdb_get(rtdb,'nwpw:Sprecondition', - > mt_dbl,1,Sp)) - > then - Sp = 200.0d0 - end if +#include "control.fh" control_Sp = Sp return diff --git a/src/nwpw/nwpwlib/control/control.fh b/src/nwpw/nwpwlib/control/control.fh index 58e785b2f45..c35b7618eeb 100644 --- a/src/nwpw/nwpwlib/control/control.fh +++ b/src/nwpw/nwpwlib/control/control.fh @@ -15,7 +15,7 @@ real*8 tolerances(3),scaling(2),sa_decay(2) real*8 time_step,fake_mass,ks_alpha,fractional_alpha real*8 ecut,wcut,rcut - real*8 cpu1_time,cpu2_time,attenuation + real*8 cpu1_time,cpu2_time,attenuation,Ep,Sp real*8 bo_time_step,bo_fake_mass,kerker_g0 real*8 smooth_cutoff_values(2) integer bo_steps(2),bo_algorithm @@ -35,7 +35,7 @@ > scaling,sa_decay,smooth_cutoff_values, > time_step,fake_mass,ks_alpha, > fractional_alpha, - > ecut,wcut,rcut,attenuation, + > ecut,wcut,rcut,attenuation,Ep,Sp, > bo_time_step,bo_fake_mass,kerker_g0, > bo_steps,bo_algorithm, > mapping,mapping1d,np_dimensions, From 4f6ec0b7b8eba18b7db8c09fced0eb3fe1fdac29 Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Thu, 6 May 2021 14:29:02 -0700 Subject: [PATCH 08/45] ...EJB --- src/nwpw/band/lib/ke/cke.F | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nwpw/band/lib/ke/cke.F b/src/nwpw/band/lib/ke/cke.F index 2f3951ce72b..7a65c11bb4a 100644 --- a/src/nwpw/band/lib/ke/cke.F +++ b/src/nwpw/band/lib/ke/cke.F @@ -102,11 +102,12 @@ subroutine cke_init() > dbl_mb(tg_indx+(nb-1)*npack1)) end do end if - + value = BA_pop_stack(tmp2(2)) value = value.and.BA_pop_stack(tmp1(2)) if (.not. value) > call errquit('cke_init:popping stack memory',0,MA_ERR) + return end From 7cfcb81a605409f87266135fc35070c147c58648 Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Thu, 6 May 2021 14:30:53 -0700 Subject: [PATCH 09/45] ...EJB --- src/nwpw/pspw/lib/psi/psi.F | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/nwpw/pspw/lib/psi/psi.F b/src/nwpw/pspw/lib/psi/psi.F index 3d0912e2e2e..8fabb8df945 100644 --- a/src/nwpw/pspw/lib/psi/psi.F +++ b/src/nwpw/pspw/lib/psi/psi.F @@ -304,8 +304,9 @@ subroutine psi_minimize_f_orb() > 0.001d0,ii,error_out,e0) !write(*,*) "e0:",ii,l,e0,error_out l = l+1 - if ((error_out.gt.maxerror).and.(l.le.4)) go to 3 - if ((error_out.gt.maxerror).and.(l2.le.1)) then + if ((error_out.gt.maxerror).and.(l.le.(1+(l2-1)*3))) go to 3 + if (((error_out.gt.maxerror).or.(e0.gt.4.0d0)) + > .and.(l2.le.1)) then call Pack_c_Zero(1,dcpl_mb(psi1(1) +(ii-1)*npack1)) call Pack_c_setzero(1,1.0d0,dcpl_mb(psi1(1) +(ii-1)*npack1)) go to 2 @@ -2785,7 +2786,8 @@ subroutine psi_minimize_virtual() > 0.001d0,ii,error_out,e0) l = l+1 if ((error_out.gt.maxerror).and.(l.le.(1+(l2-1)*3))) go to 3 - if ((error_out.gt.maxerror).and.(l2.le.1)) then + if (((error_out.gt.maxerror).or.(e0.gt.4.0d0)) + > .and.(l2.le.1)) then call Pack_c_Zero(1, > dcpl_mb(psi1_excited(1) +(ii-1)*npack1)) call Pack_c_setzero(1,1.0d0, From f935ff14c65dbc9564ba4c85bea8ddd10b7e15d1 Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Thu, 6 May 2021 16:00:20 -0700 Subject: [PATCH 10/45] ...EJB --- src/nwpw/band/lib/psi/cpsi_KS.F | 120 +++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 19 deletions(-) diff --git a/src/nwpw/band/lib/psi/cpsi_KS.F b/src/nwpw/band/lib/psi/cpsi_KS.F index 3e464ec36d3..c918b2157de 100644 --- a/src/nwpw/band/lib/psi/cpsi_KS.F +++ b/src/nwpw/band/lib/psi/cpsi_KS.F @@ -2650,22 +2650,23 @@ subroutine cpsi_minimize_virtual() implicit none #include "bafdecls.fh" +#include "errquit.fh" #include "cpsi_common.fh" !*** local variables *** - integer maxit_orb,taskid_k + integer maxit_orb integer ii,l,l2,nb,epsi_ptr,eig_ptr real*8 sum,maxerror,error_out,e0 !*** external functions *** integer cpsi_data_get_ptr - real*8 control_tole external cpsi_data_get_ptr + real*8 control_tole external control_tole + maxit_orb=120 maxerror = control_tole() - call Parallel3d_taskid_k(taskid_k) do nb=1,nbrillq do ii=1,(ne_excited(1)+ne_excited(2)) @@ -2678,10 +2679,7 @@ subroutine cpsi_minimize_virtual() call cpsi_project_out_virtual1(nb,ii,dbl_mb(epsi_ptr)) !*** normalize **** - call Cram_cc_dot(nb, - > dbl_mb(epsi_ptr), - > dbl_mb(epsi_ptr), - > sum) + call Cram_cc_dot(nb,dbl_mb(epsi_ptr),dbl_mb(epsi_ptr),sum) sum = 1.0d0/dsqrt(sum) call Cram_c_SMul1(nb,sum,dbl_mb(epsi_ptr)) @@ -2689,13 +2687,17 @@ subroutine cpsi_minimize_virtual() !*** minimize orbital **** l = 0 3 call cpsi_KS_update_virtual(maxit_orb, - > maxerror, - > 0.001d0,nb,ii,error_out,e0) + > maxerror, + > 0.001d0,nb,ii,error_out,e0) l = l+1 - if ((error_out.gt.maxerror).and.(l.le.(1+(l2-1)*3))) go to 3 - if ((error_out.gt.maxerror).and.(l2.le.1)) then - call Cram_c_Zero(nb,dbl_mb(epsi_ptr)) - call Cram_c_setzero(1,1.0d0,dbl_mb(epsi_ptr)) + + if ((error_out.gt.maxerror).and.(l.le.(1+(l2-1)*3))) then + go to 3 + end if + + if (((error_out.gt.maxerror).or.(e0.gt.4.0d0)) + > .and.(l2.le.1)) then + call cpsi_corrector_orb(nb,dbl_mb(epsi_ptr)) go to 2 end if @@ -2703,11 +2705,54 @@ subroutine cpsi_minimize_virtual() end do end do + + call cpsi_sort_virtual() + return end * + +* *********************************** +* * * +* * cpsi_corrector_orb * +* * * +* *********************************** + subroutine cpsi_corrector_orb(nb,orb) + implicit none + integer nb + complex*16 orb(*) + +#include "bafdecls.fh" +#include "errquit.fh" +#include "cpsi_common.fh" + + !***** local variables **** + integer k,g(2) + real*8 sum + + if (.not. BA_push_get(mt_dcpl,nfft3d,'g',g(2),g(1))) + > call errquit('cpsi_corrector_orb:out stack',0,MA_ERR) + + do k=1,nfft3d + dcpl_mb(g(1)+k-1)=dcmplx(0.1d0,0.002d0*dsin(0.001d0*k)) + end do + call C3dB_rc_fft3f(1,dcpl_mb(g(1))) + call Cram_c_pack(nb,dcpl_mb(g(1))) + call Cram_cc_dot(nb,dcpl_mb(g(1)),dcpl_mb(g(1)),sum) + sum = 1.0d0/dsqrt(sum) + call Cram_c_SMul1(nb,sum,dcpl_mb(g(1))) + + call Parallel_shared_vector_zero(.true.,2*npack1,orb) + call Cram_c_Copy(nb,dcpl_mb(g(1)),orb) + + if (.not.BA_pop_stack(g(2))) + > call errquit('cpsi_corrector_orb:pop stack',1,MA_ERR) + + return + end + c subroutine cpsi_check_orthodebug(nb,i,Horb) c implicit none c integer nb,i @@ -2942,23 +2987,37 @@ subroutine cpsi_KS_update_virtual(maxiteration, #include "bafdecls.fh" #include "errquit.fh" +#include "util.fh" +#include "stdio.fh" #include "cpsi_common.fh" * **** local variables **** - logical value,done,oneloop - integer it +c integer MASTER,taskid +c parameter (MASTER=0) +c logical oprint + + logical value,done,oneloop,precondition + integer it,pit real*8 e0,eold,percent_error,error0,de0,lmbda_r0,lmbda_r1 - real*8 theta + real*8 theta,ep,sp integer r1(2),t0(2),t(2),g(2) integer psi_ptr * **** external functions **** - integer cpsi_data_get_ptr - external cpsi_data_get_ptr + integer cpsi_data_get_ptr,Pneb_convert_nb + external cpsi_data_get_ptr,Pneb_convert_nb + logical control_print + external control_print + real*8 control_Ep,control_Sp + external control_Ep,control_Sp psi_ptr=cpsi_data_get_ptr(psi1_excited_tag,nb,i) +c call Parallel3d_taskid_i(taskid) +c call Parallel3d_taskid_k(taskid_k) +c oprint= ((taskid.eq.MASTER).and.control_print(print_medium)) + lmbda_r0 = 1.0d0 value = BA_push_get(mt_dcpl,npack1,'t0',t0(2),t0(1)) @@ -2971,11 +3030,15 @@ subroutine cpsi_KS_update_virtual(maxiteration, if (.not. value) call errquit( > 'cpsi_KS_update_virtual: out of stack memory',0, MA_ERR) + ep = control_Ep() + sp = control_Sp() + precondition = .true. done = .false. error0 = 0.0d0 e0 = 0.0d0 theta = -3.14159d0/600.0d0 it = 0 + pit = 0 2 continue it = it + 1 @@ -2990,17 +3053,25 @@ subroutine cpsi_KS_update_virtual(maxiteration, percent_error=0.0d0 if(error0.ne.0.0d0) - A percent_error = dabs(e0-eold)/error0 + > percent_error = dabs(e0-eold)/error0 done = ((it.gt.maxiteration) > .or. > (dabs(e0-eold).lt.maxerror)) + precondition = (dabs(e0-eold).gt.(sp*maxerror)) if (done) go to 4 call Cram_c_Copy(nb,dcpl_mb(g(1)),dcpl_mb(r1(1))) call Cram_cc_daxpy(nb,e0,dbl_mb(psi_ptr),dcpl_mb(r1(1))) +* **** preconditioning **** + if (precondition) then + pit = pit + 1 + call cke_Precondition(nb,npack1,1, + > dbl_mb(psi_ptr), + > dcpl_mb(g(1))) + end if * *** determine conjuagate direction *** call Cram_cc_dot(nb,dcpl_mb(r1(1)), @@ -3051,6 +3122,17 @@ subroutine cpsi_KS_update_virtual(maxiteration, if (.not.value) call errquit( > 'cpsi_KS_update_virtual: popping stack memory',1,MA_ERR) +c if (oprint) then +c write(luout,921) Pneb_convert_nb(nb),nb,i,-e0, +c > dabs(e0-eold),it,pit,ep,sp +c 921 format(5x,"nb",I4,1x,"nbq",I4,1x,"orbital",I4," current e=",E10.3, +c > " (error=",E9.3,")", +c > " iterations",I4,"(",I4, +c > " preconditioned, Ep,Sp=",F5.1,F7.1,")") +c end if + + + error_out = dabs(e0-eold) e0 = -e0 return From cb94bc12a3dc913b042942ef4aae426f0927f0fc Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Thu, 6 May 2021 19:55:34 -0700 Subject: [PATCH 11/45] debugging fractional with minimizer 8....EJB --- src/nwpw/band/lib/psi/cpsi.F | 1 + src/nwpw/band/minimizer/c_bybminimize2.F | 12 ++- src/nwpw/band/minimizer/c_cgsd_energy.F | 95 ++++++++++++++++++++---- 3 files changed, 92 insertions(+), 16 deletions(-) diff --git a/src/nwpw/band/lib/psi/cpsi.F b/src/nwpw/band/lib/psi/cpsi.F index 1856d87d3ae..6a79d74f2d9 100644 --- a/src/nwpw/band/lib/psi/cpsi.F +++ b/src/nwpw/band/lib/psi/cpsi.F @@ -2526,6 +2526,7 @@ subroutine cpsi_1define_occupation(initial_alpha,use_hml) end + c set nwpw:fractional_smeartype 1 #0-none, 1-Fermi-Dirac, 2-Gaussian, 3-Hermite c 4-Marzari-Vanderbilt diff --git a/src/nwpw/band/minimizer/c_bybminimize2.F b/src/nwpw/band/minimizer/c_bybminimize2.F index 190d6b6c69a..f0b4e47378c 100644 --- a/src/nwpw/band/minimizer/c_bybminimize2.F +++ b/src/nwpw/band/minimizer/c_bybminimize2.F @@ -175,7 +175,7 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, * **** set the initial density **** if (current_iteration.eq.1) then Enew = cpsi_1energy() - !write(*,*) "Enew=",Enew+eion,Enew + if (oprint) write(*,*) "Enew=",Enew+eion,Enew alpha = control_ks_alpha() deltae = -9232323299.0d0 ks_deltae = tole @@ -198,9 +198,12 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, call cpsi_2to1() end if if (control_fractional()) then - call cpsi_1define_occupation(0.0d0,.false.) + !call cpsi_1define_occupation(0.0d0,.false.) + call cpsi_1define_occupation(-1.0d0,.false.) Enew = Enew + cpsi_smearcorrection() end if + if (oprint) write(*,*) "THIRD EIGPRINT" + call cpsi_printeig_debug() else call cpsi_get_density(1,dbl_mb(rho_in(1))) call cpsi_get_density(1,dbl_mb(rho_out(1))) @@ -270,6 +273,9 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, if (control_fractional()) Enew = Enew + cpsi_smearcorrection() deltae = Enew-Eold + if (oprint) write(*,*) "Fourth EIGPRINT" + call cpsi_printeig_debug() + call cpsi_get_density(1,dbl_mb(rho_in(1))) @@ -466,6 +472,8 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, E(1) = E(1) + E(33) end if + if (oprint) write(*,*) "FINAL c_bybminmize2 EIGPRINT" + call cpsi_printeig_debug() return end diff --git a/src/nwpw/band/minimizer/c_cgsd_energy.F b/src/nwpw/band/minimizer/c_cgsd_energy.F index ed634c90a44..e3d8beac409 100644 --- a/src/nwpw/band/minimizer/c_cgsd_energy.F +++ b/src/nwpw/band/minimizer/c_cgsd_energy.F @@ -128,27 +128,29 @@ real*8 function c_cgsd_energy(newpsi) if ((minimizer.eq.5).or.(minimizer.eq.8)) it_out = 1 if (newpsi) then - if (minimizer.ne.4) call c_sdminimize(10) + if (minimizer.lt.4) call c_sdminimize(10) call c_bybminimize0() if (control_fractional()) then - Enew = cpsi_1energy() + Enew = cpsi_1energy() + ewald_e() + if (mprint) write(*,*) "Enew+ewald=",Enew call cpsi_1gen_hml() - !write(*,*) "Start ENEW = ",enew + ewald_e() - !if (minimizer.ne.4) then - call cpsi_diagonalize_hml() + call cpsi_diagonalize_hml() + + if (mprint) write(*,*) "First EIG OCC:" + call cpsi_printeig_debug() + call cpsi_1rotate2() call cpsi_2to1() call cpsi_1define_occupation(-1.0d0,.false.) - !else - ! call cpsi_1define_occupation(-1.0d0,.true.) - !end if + if (mprint) write(*,*) "Second EIG OCC:" + call cpsi_printeig_debug() end if end if 2 continue icount = icount + 1 if (stalled) then - if (minimizer.ne.4) call c_sdminimize(0) + if (minimizer.lt.4) call c_sdminimize(0) bfgscount = 0 end if @@ -247,11 +249,14 @@ real*8 function c_cgsd_energy(newpsi) * **** diagonalize hamiltonian and rotate psi **** - call cpsi_1gen_hml() - call cpsi_diagonalize_hml() - if (.not.control_fractional()) then - call cpsi_1rotate2() - call cpsi_2to1() + !**** NEED TO CHECK THIS LOGIC AGAIN **** + if (minimizer.ne.8) then + call cpsi_1gen_hml() + call cpsi_diagonalize_hml() + if (.not.control_fractional()) then + call cpsi_1rotate2() + call cpsi_2to1() + end if end if @@ -434,3 +439,65 @@ real*8 function c_cgsd_energy(newpsi) end + + subroutine cpsi_printeig_debug() + implicit none + +#include "stdio.fh" +#include "util.fh" + + integer MASTER,taskid + parameter (MASTER=0) + + logical mprint + integer nb,i + real*8 f0,f1,f2,f3,f4,f5,f6 + + logical control_print + external control_print + integer brillioun_nbrillioun,cpsi_ne + external brillioun_nbrillioun,cpsi_ne + real*8 brillioun_weight_brdcst + real*8 brillioun_ks_brdcst + real*8 brillioun_k_brdcst + real*8 cpsi_eigenvalue_brdcst + real*8 cpsi_occupation_brdcst + external brillioun_weight_brdcst + external brillioun_ks_brdcst + external brillioun_k_brdcst + external cpsi_eigenvalue_brdcst + external cpsi_occupation_brdcst + + + call Parallel_taskid(taskid) + + mprint = (taskid.eq.MASTER).and.control_print(print_medium) + + do nb=1,brillioun_nbrillioun() + f0 = brillioun_weight_brdcst(nb) + f1 = brillioun_ks_brdcst(1,nb) + f2 = brillioun_ks_brdcst(2,nb) + f3 = brillioun_ks_brdcst(3,nb) + f4 = brillioun_k_brdcst(1,nb) + f5 = brillioun_k_brdcst(2,nb) + f6 = brillioun_k_brdcst(3,nb) + if (mprint) then + write(luout,1508) nb,f0,f1,f2,f3,f4,f5,f6 + write(luout,1500) + end if + do i=0,cpsi_ne(1)-1 + f1 = cpsi_eigenvalue_brdcst(nb,1,cpsi_ne(1)-i) + f2 = cpsi_occupation_brdcst(nb,1,cpsi_ne(1)-i) + if (mprint) write(luout,1510) f1,f1*27.2116d0,f2 + end do + end do + + 1500 FORMAT(/' orbital energies:') + 1508 FORMAT(/' Brillouin zone point: ',i6, + > /' weight=',f10.6, + > /' k =<',3f8.3,'> . ', + > /' =<',3f8.3,'>') + 1510 FORMAT(4(E18.7,' (',F8.3,'eV) occ=',F5.3)) + + return + end From ece96d259a984e48562a539cfa50bb90032159a8 Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Fri, 7 May 2021 10:18:28 -0700 Subject: [PATCH 12/45] debuggin fractional occupation...EJB --- src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F | 29 +++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F index e37c3541e97..be0c7687604 100644 --- a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F +++ b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F @@ -115,12 +115,14 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) #include "nwpw_scf_mixing.fh" * **** local variables **** - logical value + integer MASTER,taskid + parameter (MASTER=0) + logical value,oprint integer i,j,info,k,ipiv,ms,shift integer rr_ptr,ss_ptr,tt_ptr,ff_ptr integer V0,V1,Vout0,Vout1,F0,F1,Vbar0,Vbar1 integer dV,U,dF,dFi - real*8 sum0,sum1,beta,p00,p01,p11,alpha1,r00 + real*8 sum0,sum1,beta,p00,p01,p11,alpha1,r00,small real*8 BB(40,40),BBB(40,40) * **** external functions **** @@ -133,6 +135,8 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) c alpha1 = 1.0d0 c end if alpha1 = alpha + call Parallel_taskid(taskid) + oprint = (taskid.eq.MASTER) * **** simple mixing **** if (algorithm.eq.0) then @@ -213,6 +217,8 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) !$OMP MASTER scf_error = dsqrt(scf_error) !$OMP END MASTER + if (oprint) + > write(*,*) "Broyden SCF MIXING: scf_error=",scf_error !**** Beta = / **** @@ -224,6 +230,8 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) call D3dB_SumAll(sum0) call D3dB_SumAll(sum1) beta = sum0/sum1 + + if (oprint) write(*,*) "Broyden SCF MIXING: betar=",beta @@ -325,6 +333,8 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) !$OMP MASTER scf_error = dsqrt(scf_error) !$OMP END MASTER + if (oprint) + > write(*,*) "Johnson SCF MIXING: m,scf_error=",m,scf_error !*** dF = dF(m-1), U = U(m-1) *** call nwpw_list_ptr(1,(5+m-1),dF) @@ -362,11 +372,20 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) call Parallel_shared_vector_zero(.true.,max_list*max_list,B) !$OMP MASTER + + if (oprint) + > write(*,*) "Johnson SCF MIXING small:",small + small = 0.0d0 do i=1,m-1 do j=1,m-1 B(i,j) = A(i,j) + small = small + dabs(A(i,j)) end do + if (oprint) + > write(*,*) "Johnson SCF MIXING C,B:",C(i),(B(i,j),j=1,m-1) end do + small = small/dble(m-1)**2 + do i=1,m-1 do j=1,m-1 @@ -374,7 +393,7 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) end do end do do i=1,m-1 - Binv(i,i) = 1.0d0 + Binv(i,i) = 1.0d0*small end do call DGESV((m-1),(m-1), @@ -388,8 +407,10 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) do i=1,m-1 d(i) = 0.0d0 do j=1,m-1 - d(i) = d(i) - c(j)*Binv(j,i) + d(i) = d(i) - (c(j)/small)*Binv(j,i) end do + if (oprint) + > write(*,*) "Johnson SCF MIXING d,Binv:",d(i),(Binv(j,i),j=1,m-1) end do !$OMP END MASTER From cae5a62c7987214d813b0d6e56d06e7f49f4c46e Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Fri, 7 May 2021 10:48:47 -0700 Subject: [PATCH 13/45] ipiv defined incorrectly...EJB --- src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F | 7 ++++--- src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.fh | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F index be0c7687604..6fdbf0e96f5 100644 --- a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F +++ b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F @@ -118,13 +118,14 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) integer MASTER,taskid parameter (MASTER=0) logical value,oprint - integer i,j,info,k,ipiv,ms,shift + integer i,j,info,k,ms,shift integer rr_ptr,ss_ptr,tt_ptr,ff_ptr integer V0,V1,Vout0,Vout1,F0,F1,Vbar0,Vbar1 integer dV,U,dF,dFi real*8 sum0,sum1,beta,p00,p01,p11,alpha1,r00,small real*8 BB(40,40),BBB(40,40) + * **** external functions **** real*8 ddot external ddot @@ -373,8 +374,6 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) call Parallel_shared_vector_zero(.true.,max_list*max_list,B) !$OMP MASTER - if (oprint) - > write(*,*) "Johnson SCF MIXING small:",small small = 0.0d0 do i=1,m-1 do j=1,m-1 @@ -386,6 +385,8 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) end do small = small/dble(m-1)**2 + if (oprint) + > write(*,*) "Johnson SCF MIXING small:",small do i=1,m-1 do j=1,m-1 diff --git a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.fh b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.fh index 74bdfb7191a..c34e25fe7b1 100644 --- a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.fh +++ b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.fh @@ -3,11 +3,12 @@ c $Id$ c integer max_list parameter (max_list=40) + integer ipiv(max_list) real*8 c(max_list),d(max_list),B(max_list,max_list),alpha real*8 A(max_list,max_list),w(max_list),w0 real*8 Binv(max_list,max_list) integer n2ft3d,npack1,neall,nsize,max_m,m,algorithm,ispin common / nwpw_scf_mixing_block / A,B,Binv,c,d,w,w0,alpha, - > n2ft3d,npack1,neall, + > ipiv,n2ft3d,npack1,neall, > nsize,max_m,m,algorithm,ispin From 4aaaf394e8c036aaa03d0653e0c7e33eb6a4baef Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Fri, 7 May 2021 10:57:45 -0700 Subject: [PATCH 14/45] removed debug print statements --- src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F | 21 ++------------------ 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F index 6fdbf0e96f5..e8295348d35 100644 --- a/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F +++ b/src/nwpw/nwpwlib/utilities/nwpw_scf_mixing.F @@ -115,9 +115,7 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) #include "nwpw_scf_mixing.fh" * **** local variables **** - integer MASTER,taskid - parameter (MASTER=0) - logical value,oprint + logical value integer i,j,info,k,ms,shift integer rr_ptr,ss_ptr,tt_ptr,ff_ptr integer V0,V1,Vout0,Vout1,F0,F1,Vbar0,Vbar1 @@ -136,8 +134,7 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) c alpha1 = 1.0d0 c end if alpha1 = alpha - call Parallel_taskid(taskid) - oprint = (taskid.eq.MASTER) + * **** simple mixing **** if (algorithm.eq.0) then @@ -218,8 +215,6 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) !$OMP MASTER scf_error = dsqrt(scf_error) !$OMP END MASTER - if (oprint) - > write(*,*) "Broyden SCF MIXING: scf_error=",scf_error !**** Beta = / **** @@ -232,9 +227,6 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) call D3dB_SumAll(sum1) beta = sum0/sum1 - if (oprint) write(*,*) "Broyden SCF MIXING: betar=",beta - - !**** Vbar1 = (1-Beta)*Vout1 + Beta*Vout0 **** call Parallel_shared_vector_copy(.true.,nsize, @@ -334,8 +326,6 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) !$OMP MASTER scf_error = dsqrt(scf_error) !$OMP END MASTER - if (oprint) - > write(*,*) "Johnson SCF MIXING: m,scf_error=",m,scf_error !*** dF = dF(m-1), U = U(m-1) *** call nwpw_list_ptr(1,(5+m-1),dF) @@ -380,14 +370,9 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) B(i,j) = A(i,j) small = small + dabs(A(i,j)) end do - if (oprint) - > write(*,*) "Johnson SCF MIXING C,B:",C(i),(B(i,j),j=1,m-1) end do small = small/dble(m-1)**2 - if (oprint) - > write(*,*) "Johnson SCF MIXING small:",small - do i=1,m-1 do j=1,m-1 Binv(i,j) = 0.0d0 @@ -410,8 +395,6 @@ subroutine nwpw_scf_mixing(vout,vnew,deltae,scf_error) do j=1,m-1 d(i) = d(i) - (c(j)/small)*Binv(j,i) end do - if (oprint) - > write(*,*) "Johnson SCF MIXING d,Binv:",d(i),(Binv(j,i),j=1,m-1) end do !$OMP END MASTER From 56e450a790ce454a3d841e11ae1c052df32d2c12 Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Fri, 7 May 2021 11:05:44 -0700 Subject: [PATCH 15/45] removed debug print statements...EJB --- src/nwpw/band/minimizer/c_bybminimize2.F | 10 +--------- src/nwpw/band/minimizer/c_cgsd_energy.F | 7 ------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/nwpw/band/minimizer/c_bybminimize2.F b/src/nwpw/band/minimizer/c_bybminimize2.F index f0b4e47378c..e887cd24253 100644 --- a/src/nwpw/band/minimizer/c_bybminimize2.F +++ b/src/nwpw/band/minimizer/c_bybminimize2.F @@ -175,7 +175,6 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, * **** set the initial density **** if (current_iteration.eq.1) then Enew = cpsi_1energy() - if (oprint) write(*,*) "Enew=",Enew+eion,Enew alpha = control_ks_alpha() deltae = -9232323299.0d0 ks_deltae = tole @@ -202,8 +201,7 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, call cpsi_1define_occupation(-1.0d0,.false.) Enew = Enew + cpsi_smearcorrection() end if - if (oprint) write(*,*) "THIRD EIGPRINT" - call cpsi_printeig_debug() + else call cpsi_get_density(1,dbl_mb(rho_in(1))) call cpsi_get_density(1,dbl_mb(rho_out(1))) @@ -273,9 +271,6 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, if (control_fractional()) Enew = Enew + cpsi_smearcorrection() deltae = Enew-Eold - if (oprint) write(*,*) "Fourth EIGPRINT" - call cpsi_printeig_debug() - call cpsi_get_density(1,dbl_mb(rho_in(1))) @@ -472,9 +467,6 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, E(1) = E(1) + E(33) end if - if (oprint) write(*,*) "FINAL c_bybminmize2 EIGPRINT" - call cpsi_printeig_debug() - return end diff --git a/src/nwpw/band/minimizer/c_cgsd_energy.F b/src/nwpw/band/minimizer/c_cgsd_energy.F index e3d8beac409..6c59c1084d5 100644 --- a/src/nwpw/band/minimizer/c_cgsd_energy.F +++ b/src/nwpw/band/minimizer/c_cgsd_energy.F @@ -132,18 +132,11 @@ real*8 function c_cgsd_energy(newpsi) call c_bybminimize0() if (control_fractional()) then Enew = cpsi_1energy() + ewald_e() - if (mprint) write(*,*) "Enew+ewald=",Enew call cpsi_1gen_hml() call cpsi_diagonalize_hml() - - if (mprint) write(*,*) "First EIG OCC:" - call cpsi_printeig_debug() - call cpsi_1rotate2() call cpsi_2to1() call cpsi_1define_occupation(-1.0d0,.false.) - if (mprint) write(*,*) "Second EIG OCC:" - call cpsi_printeig_debug() end if end if From 4b2b1af52ebc503a26e83e458dcd7d2849bb463b Mon Sep 17 00:00:00 2001 From: edoapra Date: Fri, 7 May 2021 12:55:13 -0700 Subject: [PATCH 16/45] script to download tarballs for sites without network connection [ci skip] --- contrib/getfiles.nwchem | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 contrib/getfiles.nwchem diff --git a/contrib/getfiles.nwchem b/contrib/getfiles.nwchem new file mode 100644 index 00000000000..3476308a26a --- /dev/null +++ b/contrib/getfiles.nwchem @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# +# $Id$ +cd $NWCHEM_TOP/src/nwpw/nwpwlib/nwpwxc/ +rm -f dftd3.tgz +wget https://www.chemie.uni-bonn.de/pctc/mulliken-center/software/dft-d3/dftd3.tgz +cd $NWCHEM_TOP/src/libext/openblas +VERSION=0.3.13 +rm -rf OpenBLAS*gz +curl -L https://github.com/xianyi/OpenBLAS/archive/v${VERSION}.tar.gz -o OpenBLAS-${VERSION}.tar.gz +cd $NWCHEM_TOP/src/libext/scalapack +COMMIT=bc6cad585362aa58e05186bb85d4b619080c45a9 +rm -f scalapack-$COMMIT.zip +curl -L https://github.com/Reference-ScaLAPACK/scalapack/archive/$COMMIT.zip -o scalapack-$COMMIT.zip + From c8f49127fce5fa9c54047efdca8e3308bb248d3b Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Sun, 9 May 2021 14:00:09 -0700 Subject: [PATCH 17/45] ...EJB --- src/nwpw/pspw/cgsd/bybminimize2.F | 11 ++----- src/nwpw/pspw/cgsd/cgsd_energy.F | 46 ++++++++++++++++++++++++++-- src/nwpw/pspw/makepsi/wvfnc_adjust.F | 16 +++++++--- 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/src/nwpw/pspw/cgsd/bybminimize2.F b/src/nwpw/pspw/cgsd/bybminimize2.F index c79f62610a2..26dc8cf9e88 100644 --- a/src/nwpw/pspw/cgsd/bybminimize2.F +++ b/src/nwpw/pspw/cgsd/bybminimize2.F @@ -220,12 +220,8 @@ subroutine bybminimize2(E,deltae,deltac,current_iteration, ks_deltae = tole value = .false. + if (rho_read) call rho_2to1() -c if (control_fractional()) then -c if (control_use_fractional_rho()) then -c value = psi_try_read_density(1) -c end if -c end if call electron_gen_vall() call psi_get_density(1,dbl_mb(rho_in(1))) call psi_get_density(1,dbl_mb(rho_out(1))) @@ -311,6 +307,7 @@ subroutine bybminimize2(E,deltae,deltac,current_iteration, !$OMP BARRIER + e00 = psi_1energy() !$OMP MASTER Eold_shared = Enew_shared @@ -320,13 +317,11 @@ subroutine bybminimize2(E,deltae,deltac,current_iteration, deltae = Enew_shared - Eold_shared !$OMP END MASTER + !call electron_gen_vall() call psi_get_density(1,dbl_mb(rho_in(1))) * **** compute deltaV **** -c call dcopy(ispin*n2ft3d, -c > dbl_mb(rho_in(1)),1, -c > dbl_mb(rho_junk(1)),1) call Parallel_shared_vector_copy(.true.,ispin*n2ft3d, > dbl_mb(rho_in(1)), > dbl_mb(rho_junk(1))) diff --git a/src/nwpw/pspw/cgsd/cgsd_energy.F b/src/nwpw/pspw/cgsd/cgsd_energy.F index 881684fd2dc..0c2aa75424d 100644 --- a/src/nwpw/pspw/cgsd/cgsd_energy.F +++ b/src/nwpw/pspw/cgsd/cgsd_energy.F @@ -154,8 +154,8 @@ real*8 function cgsd_energy(newpsi) call psi_diagonalize_hml_assending() call psi_1rotate2() call psi_2to1() - !call psi_1define_occupation(-1.0d0,.false.) - call psi_1define_occupation(1.0d0,.false.) + call psi_1define_occupation(-1.0d0,.false.) + !call psi_1define_occupation(1.0d0,.false.) end if end if @@ -828,6 +828,48 @@ real*8 function cgsd_energy(newpsi) end + + subroutine psi_printeigs_debug() + implicit none + +#include "stdio.fh" + + integer MASTER,taskid + parameter (MASTER=0) + + integer i,NN + real*8 EV + + integer psi_ne + external psi_ne + real*8 psi_eigenvalue,psi_occupation + external psi_eigenvalue,psi_occupation + + NN=psi_ne(1)-psi_ne(2) + EV=27.2116d0 + + call Parallel_taskid(taskid) + if (taskid.eq.MASTER) then + do i=1,NN + write(luout,1511) psi_eigenvalue(1,i), + > psi_eigenvalue(1,i)*EV, + > psi_occupation(1,i) + end do + do i=1,psi_ne(2) + write(luout,1511) psi_eigenvalue(1,i+NN), + > psi_eigenvalue(1,i+NN)*EV, + > psi_occupation(1,i+NN), + > psi_eigenvalue(2,i), + > psi_eigenvalue(2,i)*EV, + > psi_occupation(2,i) + end do + end if + + return + 1511 FORMAT(2(E18.7,' (',F8.3,'eV) occ=',F5.3)) + end + + * ******************************* * * * * * cgsd_energy_gradient * diff --git a/src/nwpw/pspw/makepsi/wvfnc_adjust.F b/src/nwpw/pspw/makepsi/wvfnc_adjust.F index b3db6244d04..d2754abd2a7 100644 --- a/src/nwpw/pspw/makepsi/wvfnc_adjust.F +++ b/src/nwpw/pspw/makepsi/wvfnc_adjust.F @@ -22,7 +22,7 @@ subroutine wvfnc_adjust(wavefunction_filename,ispin,nein) parameter (MASTER=0) integer NMAX - integer filling(2) + integer filling(2),irm_excited integer fractional_orbitals(2),ne(2) character*255 new_filename,old_filename,emo_filename @@ -78,7 +78,7 @@ subroutine wvfnc_adjust(wavefunction_filename,ispin,nein) > ispin, > ne, > fractional, - > fractional_orbitals) + > fractional_orbitals,irm_excited) !*** remove temporary wvfnc_adjust file *** call util_file_unlink(old_filename) @@ -92,6 +92,9 @@ subroutine wvfnc_adjust(wavefunction_filename,ispin,nein) end if call ga_sync() + call Parallel_Brdcst_ivalue(MASTER,irm_excited) + if (irm_excited.eq.1) call control_unset_excited_ne() !*** remove excited_ne from rtdb *** + return end @@ -103,7 +106,7 @@ subroutine sub_wvfnc_adjust(NMAX,filling, > ispin, > ne, > fractional, - > frac_orb) + > frac_orb,irm_excited) implicit none integer NMAX integer filling(4,NMAX,2) @@ -113,6 +116,7 @@ subroutine sub_wvfnc_adjust(NMAX,filling, integer ispin,ne(2) logical fractional integer frac_orb(2) + integer irm_excited #include "bafdecls.fh" #include "errquit.fh" @@ -138,6 +142,7 @@ subroutine sub_wvfnc_adjust(NMAX,filling, double precision GCDOTC,util_random external GCDOTC,util_random + irm_excited = 0 p = util_random(5291999) !*** initialize the random sequence **** @@ -362,7 +367,10 @@ subroutine sub_wvfnc_adjust(NMAX,filling, if (emo_found) then call closefile(3) - if (emo_used) call util_file_unlink(emo_filename) !*** remove emo_filename *** + if (emo_used) then + call util_file_unlink(emo_filename) !*** remove emo_filename *** + irm_excited = 1 + end if end if call closefile(5) call closefile(6) From 2ada604988552b16956e8d625fb6082e717cf262 Mon Sep 17 00:00:00 2001 From: edoapra Date: Mon, 10 May 2021 16:15:04 -0700 Subject: [PATCH 18/45] introduce icds keyword for SMD non-aqueous solvents https://github.com/nwchemgit/nwchem/issues/363 --- src/solvation/cosmo_initialize.F | 1 + src/solvation/cosmo_input.F | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/solvation/cosmo_initialize.F b/src/solvation/cosmo_initialize.F index 78297283caf..f0211b0cff6 100644 --- a/src/solvation/cosmo_initialize.F +++ b/src/solvation/cosmo_initialize.F @@ -346,6 +346,7 @@ subroutine cosmo_initialize(rtdb,geom,basis,oprint) status = rtdb_get(rtdb,'cosmo:solg',mt_dbl,1,solg) status = rtdb_get(rtdb,'cosmo:solh',mt_dbl,1,solh) status = rtdb_get(rtdb,'cosmo:soln',mt_dbl,1,soln) + status = rtdb_get(rtdb,'cosmo:icds',mt_int,1,icds) c c set sola,solb,solc,solg,solh,soln,icds parameters (either from c solv_data or user-defined) diff --git a/src/solvation/cosmo_input.F b/src/solvation/cosmo_input.F index 0c4e2b73e22..3447e9a80d9 100644 --- a/src/solvation/cosmo_input.F +++ b/src/solvation/cosmo_input.F @@ -106,6 +106,7 @@ subroutine cosmo_input(rtdb) double precision soln double precision pol_cosmo_vem(2) double precision polgs_cosmo_vem, poles_cosmo_vem + integer icds c c vem model parameters c @@ -295,6 +296,10 @@ subroutine cosmo_input(rtdb) status = inp_f(soln) if (.not. rtdb_put(rtdb,'cosmo:soln',mt_dbl,1,soln)) $ call errquit('cosmo_input: rtdb put failed',911,RTDB_ERR) + else if(inp_compare(.false.,'icds',field)) then + status = inp_i(icds) + if (.not. rtdb_put(rtdb,'cosmo:icds',mt_int,1,icds)) + $ call errquit('cosmo_input: rtdb put failed',912,RTDB_ERR) c c <-- MN solvation models c From f2f5cc62cbc94b30ba2cc93342373f5115c878f2 Mon Sep 17 00:00:00 2001 From: edoapra Date: Tue, 11 May 2021 10:18:44 -0700 Subject: [PATCH 19/45] fortran flags for nvfortran/pgf90. might fix https://github.com/edoapra/simint-generator/issues/4 --- src/NWints/simint/libsimint_source/build_simint.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/NWints/simint/libsimint_source/build_simint.sh b/src/NWints/simint/libsimint_source/build_simint.sh index fd6a4ce08ae..4119c11fc9a 100755 --- a/src/NWints/simint/libsimint_source/build_simint.sh +++ b/src/NWints/simint/libsimint_source/build_simint.sh @@ -167,6 +167,8 @@ elif [ ${FC} == xlf ] || [ ${FC} == xlf_r ] || [ ${FC} == xlf90 ]|| [ ${FC} == Fortran_FLAGS=" -qintsize=8 -qextname -qpreprocess" elif [ ${FC} == ifort ]; then Fortran_FLAGS="-i8 -fpp" +elif [ ${FC} == nvfortran ] || [ ${FC} == pgf90 ] ; then + Fortran_FLAGS="-i8 -cpp" fi if [[ -z "${SIMINT_BUILD_TYPE}" ]]; then SIMINT_BUILD_TYPE=Release From 5e9dea6a2f227206efe69edcfd9350a8f24455f5 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 May 2021 10:58:45 -0700 Subject: [PATCH 20/45] add test for presence of curl and patch Signed-off-by: Jeff Hammond --- src/NWints/simint/libsimint_source/build_simint.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/NWints/simint/libsimint_source/build_simint.sh b/src/NWints/simint/libsimint_source/build_simint.sh index 4119c11fc9a..925447dbc85 100755 --- a/src/NWints/simint/libsimint_source/build_simint.sh +++ b/src/NWints/simint/libsimint_source/build_simint.sh @@ -12,6 +12,16 @@ if [ -z "$(command -v python3)" ]; then echo please install python3 exit 1 fi +if [ -z "$(command -v curl)" ]; then + echo curl not installed + echo please install curl + exit 1 +fi +if [ -z "$(command -v patch)" ]; then + echo patch not installed + echo please install patch + exit 1 +fi UNAME_S=$(uname -s) if [[ ${UNAME_S} == Linux ]]; then CPU_FLAGS=$(cat /proc/cpuinfo | grep flags |tail -n 1) From 3b0903232c4f7afeffcaf9ac68a47af95507bffa Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Tue, 11 May 2021 12:10:22 -0700 Subject: [PATCH 21/45] adding diis histories count --- src/nwpw/band/minimizer/band_minimizer.F | 8 ++++++-- src/nwpw/band/minimizer/c_bybminimize.F | 5 ++++- src/nwpw/band/minimizer/c_bybminimize2.F | 6 +++++- src/nwpw/nwpwlib/control/control.F | 23 +++++++++++++++++++++++ src/nwpw/nwpwlib/control/control.fh | 2 ++ src/nwpw/pspw/cgsd/bybminimize.F | 5 ++++- src/nwpw/pspw/cgsd/bybminimize2.F | 5 ++++- src/nwpw/pspw/cgsd/cgsdv5.F | 10 ++++++---- 8 files changed, 54 insertions(+), 10 deletions(-) diff --git a/src/nwpw/band/minimizer/band_minimizer.F b/src/nwpw/band/minimizer/band_minimizer.F index 57d24691110..a13c588bf5f 100644 --- a/src/nwpw/band/minimizer/band_minimizer.F +++ b/src/nwpw/band/minimizer/band_minimizer.F @@ -148,6 +148,8 @@ logical function band_minimizer(rtdb,flag) external control_fractional_smeartype real*8 control_fractional_kT,control_fractional_alpha external control_fractional_kT,control_fractional_alpha + integer control_diis_histories + external control_diis_histories c character*255 cpsp_comment,comment c external cpsp_comment @@ -546,9 +548,10 @@ logical function band_minimizer(rtdb,flag) if (control_scf_algorithm().eq.0) > write(luout,1293) "simple mixing" if (control_scf_algorithm().eq.1) - > write(luout,1293) "Anderson potential mixing" + > write(luout,1293) "Broyden mixing" if (control_scf_algorithm().eq.2) - > write(luout,1293) "Johnson-Pulay mixing" + > write(luout,1289) "Johnson-Pulay mixing", + > control_diis_histories() if (control_scf_algorithm().eq.3) > write(luout,1293) "Anderson density mixing" if (minimizer.eq.5) write(luout,1296) "potential" @@ -855,6 +858,7 @@ logical function band_minimizer(rtdb,flag) 1280 FORMAT(5X, ' time step=',F10.2,5X,'fictitious mass=',F10.1) 1281 FORMAT(5X, ' maximum iterations =',I10, > ' ( ',I4,' inner ',I6,' outer )') + 1289 FORMAT(5X, ' scf algorithm = ',A,' (',I2,' histories)') 1290 FORMAT(5X, ' tolerance=',E9.3,' (energy)',E12.3, & ' (density)') 1291 FORMAT(/' Kohn-Sham scf parameters:') diff --git a/src/nwpw/band/minimizer/c_bybminimize.F b/src/nwpw/band/minimizer/c_bybminimize.F index 4f085641d70..335a0a116e8 100644 --- a/src/nwpw/band/minimizer/c_bybminimize.F +++ b/src/nwpw/band/minimizer/c_bybminimize.F @@ -89,6 +89,8 @@ subroutine c_bybminimize(E,deltae,deltac,current_iteration, double precision ddot external ddot + integer control_diis_histories + external control_diis_histories Ein = E(1) call Parallel_taskid(taskid) @@ -169,7 +171,8 @@ subroutine c_bybminimize(E,deltae,deltac,current_iteration, * **** iniitialize SCF Mixing **** call nwpw_scf_mixing_init(control_scf_algorithm(),alpha, - > 5,ispin,2*nfft3d,dcpl_mb(vall_out(1))) + > control_diis_histories(), + > ispin,2*nfft3d,dcpl_mb(vall_out(1))) * ***** diis loop **** diff --git a/src/nwpw/band/minimizer/c_bybminimize2.F b/src/nwpw/band/minimizer/c_bybminimize2.F index e887cd24253..669c6d24480 100644 --- a/src/nwpw/band/minimizer/c_bybminimize2.F +++ b/src/nwpw/band/minimizer/c_bybminimize2.F @@ -109,6 +109,9 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, double precision ddot external ddot + integer control_diis_histories + external control_diis_histories + Ein = E(1) call Parallel_taskid(taskid) oprint = (taskid.eq.MASTER).and.control_print(print_medium) @@ -225,7 +228,8 @@ subroutine c_bybminimize2(E,deltae,deltac,current_iteration, * **** iniitialize SCF Mixing **** call nwpw_scf_mixing_init(control_scf_algorithm(),alpha, - > 5,ispin,nfft3d,dbl_mb(rho_out(1))) + > control_diis_histories(), + > ispin,nfft3d,dbl_mb(rho_out(1))) * **** iniitialize blocked cg **** diff --git a/src/nwpw/nwpwlib/control/control.F b/src/nwpw/nwpwlib/control/control.F index 4c392110865..6696692e619 100644 --- a/src/nwpw/nwpwlib/control/control.F +++ b/src/nwpw/nwpwlib/control/control.F @@ -512,6 +512,10 @@ logical function control_read(code_in,rtdb) > mt_int,1,scf_algorithm)) > scf_algorithm = 3 + if (.not.btdb_get(rtdb,'nwpw:diis_histories', + > mt_int,1,diis_histories)) + > diis_histories = 15 + if (.not.btdb_get(rtdb,'nwpw:ks_algorithm', > mt_int,1,ks_algorithm)) > ks_algorithm = 0 @@ -767,6 +771,11 @@ logical function control_read(code_in,rtdb) if (.not.btdb_get(rtdb,'nwpw:scf_algorithm', > mt_int,1,scf_algorithm)) > scf_algorithm = 3 + + if (.not.btdb_get(rtdb,'nwpw:diis_histories', + > mt_int,1,diis_histories)) + > diis_histories = 15 + if (.not.btdb_get(rtdb,'nwpw:ks_algorithm', > mt_int,1,ks_algorithm)) > ks_algorithm = 0 @@ -2360,6 +2369,20 @@ integer function control_scf_algorithm() return end +* *********************************** +* * * +* * control_diis_histories * +* * * +* *********************************** + integer function control_diis_histories() + implicit none + +#include "control.fh" + + control_diis_histories = diis_histories + return + end + * *********************************** * * * * * control_ks_algorithm * diff --git a/src/nwpw/nwpwlib/control/control.fh b/src/nwpw/nwpwlib/control/control.fh index c35b7618eeb..e0f09fef919 100644 --- a/src/nwpw/nwpwlib/control/control.fh +++ b/src/nwpw/nwpwlib/control/control.fh @@ -30,6 +30,7 @@ logical two_comp_ppot,frozen,pio,fast_erf,fmm,smooth_cutoff logical hess_model,periodic_dipole,precondition integer maxit_orb,maxit_orbs,scf_algorithm,ks_algorithm + integer diis_histories integer symm_number,minimizer common / control_block / unita,unita_frozen,tolerances, > scaling,sa_decay,smooth_cutoff_values, @@ -44,6 +45,7 @@ > code, > ispin,multiplicity, > maxit_orb,maxit_orbs,scf_algorithm, + > diis_histories, > ks_algorithm,minimizer, > symm_number, > move,frac_coord,SA,fei,fei_quench, diff --git a/src/nwpw/pspw/cgsd/bybminimize.F b/src/nwpw/pspw/cgsd/bybminimize.F index 3f448e6922f..d3b6b9a0191 100644 --- a/src/nwpw/pspw/cgsd/bybminimize.F +++ b/src/nwpw/pspw/cgsd/bybminimize.F @@ -122,6 +122,8 @@ subroutine bybminimize(E,deltae,deltac,current_iteration, integer control_ks_maxit_orb,control_ks_maxit_orbs external control_ks_maxit_orb,control_ks_maxit_orbs + integer control_diis_histories + external control_diis_histories Ein = E(1) @@ -226,7 +228,8 @@ subroutine bybminimize(E,deltae,deltac,current_iteration, * **** iniitialize SCF Mixing **** call nwpw_scf_mixing_init(control_scf_algorithm(),alpha, - > 5,ispin,n2ft3d,dbl_mb(vall_out(1))) + > control_diis_histories(), + > ispin,n2ft3d,dbl_mb(vall_out(1))) * **** iniitialize RMM-DIIS **** if (control_ks_algorithm().eq.1) call pspw_rmmdiis_init(5) diff --git a/src/nwpw/pspw/cgsd/bybminimize2.F b/src/nwpw/pspw/cgsd/bybminimize2.F index 26dc8cf9e88..9ca93d52f4b 100644 --- a/src/nwpw/pspw/cgsd/bybminimize2.F +++ b/src/nwpw/pspw/cgsd/bybminimize2.F @@ -147,6 +147,8 @@ subroutine bybminimize2(E,deltae,deltac,current_iteration, integer control_ks_maxit_orb,control_ks_maxit_orbs external control_ks_maxit_orb,control_ks_maxit_orbs + integer control_diis_histories + external control_diis_histories Ein = E(1) call Parallel_taskid(taskid) @@ -262,7 +264,8 @@ subroutine bybminimize2(E,deltae,deltac,current_iteration, * **** iniitialize SCF Mixing **** call nwpw_scf_mixing_init(control_scf_algorithm(),alpha, - > 5,ispin,n2ft3d,dbl_mb(rho_out(1))) + > control_diis_histories(), + > ispin,n2ft3d,dbl_mb(rho_out(1))) * **** iniitialize RMM-DIIS **** if (control_ks_algorithm().eq.1) call pspw_rmmdiis_init(5) diff --git a/src/nwpw/pspw/cgsd/cgsdv5.F b/src/nwpw/pspw/cgsd/cgsdv5.F index d2a660a2128..9a6586c804c 100644 --- a/src/nwpw/pspw/cgsd/cgsdv5.F +++ b/src/nwpw/pspw/cgsd/cgsdv5.F @@ -132,8 +132,8 @@ logical function cgsdv5(rtdb,flag) external pspw_charge_found,ion_q_FixIon integer control_minimizer,control_scf_algorithm external control_minimizer,control_scf_algorithm - integer control_ks_algorithm - external control_ks_algorithm + integer control_ks_algorithm,control_diis_histories + external control_ks_algorithm,control_diis_histories real*8 control_ks_alpha,control_kerker_g0 external control_ks_alpha,control_kerker_g0 logical control_print,control_balance @@ -586,9 +586,10 @@ logical function cgsdv5(rtdb,flag) if (control_scf_algorithm().eq.0) > write(luout,1293) "simple mixing" if (control_scf_algorithm().eq.1) - > write(luout,1293) "Anderson potential mixing" + > write(luout,1293) "Broyden mixing" if (control_scf_algorithm().eq.2) - > write(luout,1293) "Johnson-Pulay mixing" + > write(luout,1289) "Johnson-Pulay mixing", + > control_diis_histories() if (control_scf_algorithm().eq.3) > write(luout,1293) "Anderson density mixing" if (minimizer.eq.5) write(luout,1296) "potential" @@ -924,6 +925,7 @@ logical function cgsdv5(rtdb,flag) 1280 FORMAT(5X, ' time step=',F10.2,5X,'fictitious mass=',F10.1) 1281 FORMAT(5X, ' maximum iterations =',I10, > ' ( ',I4,' inner ',I6,' outer )') + 1289 FORMAT(5X, ' SCF algorithm = ',A,' (',I2,' histories)') 1290 FORMAT(5X, ' tolerance=',E9.3,' (energy)',E12.3, & ' (density)') 1291 FORMAT(/' Kohn-Sham scf parameters:') From 16e698507cbef34dab990b3eebd20356f64c6531 Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Tue, 11 May 2021 12:20:46 -0700 Subject: [PATCH 22/45] Added diis_histories option to scf keyword...EJB --- src/nwpw/nwpw_input.F | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/nwpw/nwpw_input.F b/src/nwpw/nwpw_input.F index e75e23bc714..59ec09e3b7d 100644 --- a/src/nwpw/nwpw_input.F +++ b/src/nwpw/nwpw_input.F @@ -1155,6 +1155,14 @@ subroutine nwpw_input(rtdb) > 4800,RTDB_ERR) if(.not.rtdb_put(rtdb,'nwpw:ks_maxit_orbs',mt_int,1,nx)) > call errquit('nwpw_input: error writing to rtdb', + > 4800,RTDB_ERR) + end if + if (inp_compare(.false.,zone_name,'diis_histories')) then + if (.not.inp_i(nx)) + > call errquit('nwpw_input: error reading diis histories', + > 4800,RTDB_ERR) + if(.not.rtdb_put(rtdb,'nwpw:diis_histories',mt_int,1,nx)) + > call errquit('nwpw_input: error writing to rtdb', > 4800,RTDB_ERR) end if From 2d4b7c7b705407fd737e4f904e7cfbe1b30d8773 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 11 May 2021 12:21:47 -0700 Subject: [PATCH 23/45] use curl or wget Signed-off-by: Jeff Hammond --- .../simint/libsimint_source/build_simint.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/NWints/simint/libsimint_source/build_simint.sh b/src/NWints/simint/libsimint_source/build_simint.sh index 925447dbc85..ddff453720a 100755 --- a/src/NWints/simint/libsimint_source/build_simint.sh +++ b/src/NWints/simint/libsimint_source/build_simint.sh @@ -12,9 +12,9 @@ if [ -z "$(command -v python3)" ]; then echo please install python3 exit 1 fi -if [ -z "$(command -v curl)" ]; then - echo curl not installed - echo please install curl +if [ -z "$(command -v curl)" ] && [ -z "$(command -v wget)" ]; then + echo curl and wget not installed + echo please install curl or wget exit 1 fi if [ -z "$(command -v patch)" ]; then @@ -60,8 +60,16 @@ fi PERMUTE_SLOW=${SIMINT_MAXAM} GITHUB_USERID=edoapra rm -rf simint.l${SIMINT_MAXAM}_p${PERMUTE_SLOW}_d${DERIVE}* *-chem-simint-generator-?????? simint-chem-simint-generator.tar.gz simint_lib -curl -L https://github.com/${GITHUB_USERID}/simint-generator/tarball/master -o simint-chem-simint-generator.tar.gz -#curl -LJ https://github.com/simint-chem/simint-generator/tarball/master -o simint-chem-simint-generator.tar.gz + +GITHUB_URL=https://github.com/${GITHUB_USERID}/simint-generator/tarball/master +#GITHUB_URL=https://github.com/simint-chem/simint-generator/tarball/master +TAR_NAME=simint-chem-simint-generator.tar.gz +if [ ! -z "$(command -v curl)" ] ; then + curl -L "${GITHUB_URL}" -o "${TAR_NAME}" +else + wget -O "${TAR_NAME}" "${GITHUB_URL}" +fi + tar xzf simint-chem-simint-generator.tar.gz cd *-simint-generator-??????? rm -f generator_types.patch From a1a55750b4fd51144ead3d6a262e345f18287e90 Mon Sep 17 00:00:00 2001 From: edoapra Date: Tue, 11 May 2021 10:44:31 -0700 Subject: [PATCH 24/45] use gcc and g++ with nvfortran/pgf90 --- src/NWints/simint/libsimint_source/build_simint.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/NWints/simint/libsimint_source/build_simint.sh b/src/NWints/simint/libsimint_source/build_simint.sh index 4119c11fc9a..75450607414 100755 --- a/src/NWints/simint/libsimint_source/build_simint.sh +++ b/src/NWints/simint/libsimint_source/build_simint.sh @@ -169,6 +169,8 @@ elif [ ${FC} == ifort ]; then Fortran_FLAGS="-i8 -fpp" elif [ ${FC} == nvfortran ] || [ ${FC} == pgf90 ] ; then Fortran_FLAGS="-i8 -cpp" + CC=gcc + CXX=g++ fi if [[ -z "${SIMINT_BUILD_TYPE}" ]]; then SIMINT_BUILD_TYPE=Release From 08a9df852022fe2124b9a8db27683097c8e682c5 Mon Sep 17 00:00:00 2001 From: edoapra Date: Tue, 11 May 2021 10:43:28 -0700 Subject: [PATCH 25/45] new nvfortran simint step --- .github/workflows/github_actions.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/github_actions.yml b/.github/workflows/github_actions.yml index 5db0bbe4a4b..88eef200c18 100644 --- a/.github/workflows/github_actions.yml +++ b/.github/workflows/github_actions.yml @@ -93,6 +93,14 @@ jobs: nwchem_modules: "nwdft solvation driver" fc: gfortran-10 use_simint: 1 + - os: ubuntu-20.04 + experimental: true + mpi_impl: mpich + armci_network: mpi-ts + nwchem_modules: "tinyqmpw python" + fc: nvfortran + cc: gcc + use_simint: 1 - os: ubuntu-20.04 experimental: true mpi_impl: mpich @@ -143,6 +151,7 @@ jobs: NWCHEM_MODULES: ${{ matrix.nwchem_modules }} USE_SIMINT: ${{ matrix.use_simint }} FC: ${{ matrix.fc }} + CC: ${{ matrix.cc }} COMEX_MAX_NB_OUTSTANDING: 4 SIMINT_MAXAM: 3 SIMINT_VECTOR: avx2 From 9e6e98aef00ce233fa88af0299f4b91353553153 Mon Sep 17 00:00:00 2001 From: edoapra Date: Tue, 11 May 2021 11:04:40 -0700 Subject: [PATCH 26/45] remove CC definition from matrix --- .github/workflows/github_actions.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/github_actions.yml b/.github/workflows/github_actions.yml index 88eef200c18..7b038f94ea9 100644 --- a/.github/workflows/github_actions.yml +++ b/.github/workflows/github_actions.yml @@ -151,7 +151,6 @@ jobs: NWCHEM_MODULES: ${{ matrix.nwchem_modules }} USE_SIMINT: ${{ matrix.use_simint }} FC: ${{ matrix.fc }} - CC: ${{ matrix.cc }} COMEX_MAX_NB_OUTSTANDING: 4 SIMINT_MAXAM: 3 SIMINT_VECTOR: avx2 From abb3207305d418acf76785dc220301bb345a1ce3 Mon Sep 17 00:00:00 2001 From: edoapra Date: Tue, 11 May 2021 15:40:41 -0700 Subject: [PATCH 27/45] large speedup of simint build by setting build=release for generator --- src/NWints/simint/libsimint_source/build_simint.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/NWints/simint/libsimint_source/build_simint.sh b/src/NWints/simint/libsimint_source/build_simint.sh index 251bf9b5f5e..7de2434a837 100755 --- a/src/NWints/simint/libsimint_source/build_simint.sh +++ b/src/NWints/simint/libsimint_source/build_simint.sh @@ -130,7 +130,10 @@ if [[ ${CMAKE_VER} -lt 3 ]]; then echo define the CMAKE env. variable exit 1 fi -$CMAKE ../ +if [[ -z "${SIMINT_BUILD_TYPE}" ]]; then + SIMINT_BUILD_TYPE=Release +fi +$CMAKE -DCMAKE_BUILD_TYPE="${SIMINT_BUILD_TYPE}" ../ make -j2 cd .. #./create.py -g build/generator/ostei -l 6 -p 4 -d 1 simint.l6_p4_d1 @@ -190,9 +193,6 @@ elif [ ${FC} == nvfortran ] || [ ${FC} == pgf90 ] ; then CC=gcc CXX=g++ fi -if [[ -z "${SIMINT_BUILD_TYPE}" ]]; then - SIMINT_BUILD_TYPE=Release -fi echo Fortran_FLAGS equal "$Fortran_FLAGS" FC="${FC}" CXX="${CXX}" $CMAKE \ -DCMAKE_BUILD_TYPE="${SIMINT_BUILD_TYPE}" -DSIMINT_VECTOR=${VEC} \ From 0ba6d634982f8264eed2bf3cadc5e3d18a0a7307 Mon Sep 17 00:00:00 2001 From: edoapra Date: Wed, 12 May 2021 09:57:24 -0700 Subject: [PATCH 28/45] missing arguments spotted by Michael.Klemm@amd.com --- src/optim/neb/neb_utils.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optim/neb/neb_utils.F b/src/optim/neb/neb_utils.F index 9912a32a303..5cb778a10ab 100644 --- a/src/optim/neb/neb_utils.F +++ b/src/optim/neb/neb_utils.F @@ -2201,7 +2201,7 @@ real*8 function neb_line_energy(bead_list,kbeads,alpha,opt) > dbl_mb(c1(1)), > dbl_mb(e1(1)), > dbl_mb(t1(1)), - > dbl_mb(g1(1))) + > dbl_mb(g1(1)),.false.,.false.) shift = (m-1)*ng call dcopy(ng,dbl_mb(c1(1)),1,dbl_mb(cs(1)+shift),1) call dcopy(ng,dbl_mb(g1(1)),1,dbl_mb(gs(1)+shift),1) From 07473ed86a778169cffcdbe690ddda7329addfa2 Mon Sep 17 00:00:00 2001 From: edoapra Date: Wed, 12 May 2021 16:50:50 -0700 Subject: [PATCH 29/45] riscv64 options --- src/config/makefile.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/config/makefile.h b/src/config/makefile.h index e8b11758542..73781e20a86 100644 --- a/src/config/makefile.h +++ b/src/config/makefile.h @@ -1807,6 +1807,13 @@ endif FFLAGS_FORGA = -mabi=64 CFLAGS_FORGA = -mabi=64 endif + ifeq ($(_CPU),riscv64) + DONTHAVEM64OPT=Y + COPTIONS = -march=rv64gc -mabi=lp64d + FOPTIONS = -march=rv64gc -mabi=lp64d + FFLAGS_FORGA = -march=rv64gc -mabi=lp64d + CFLAGS_FORGA = -march=rv64gc -mabi=lp64d + endif ifeq ($(_CC),gcc) ifneq ($(DONTHAVEM64OPT),Y) COPTIONS = -m64 From 624ce28d5e07328412cd9cd0aff3263b1683f17e Mon Sep 17 00:00:00 2001 From: edoapra Date: Wed, 12 May 2021 17:29:35 -0700 Subject: [PATCH 30/45] default to peigs_CPU=PENTIUM if peigs_CPU is not defined --- src/peigs/DEFS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/peigs/DEFS b/src/peigs/DEFS index 325ed4e575b..9c0859f46b7 100644 --- a/src/peigs/DEFS +++ b/src/peigs/DEFS @@ -583,6 +583,9 @@ ifeq ($(FC),xlf) endif endif + ifndef peigs_CPU + peigs_CPU = PENTIUM + endif endif#end of LINUX64 ifeq ($(peigs_TARGET),cray-sv2) From 0a3d926025979528107cf669f291218880d7e830 Mon Sep 17 00:00:00 2001 From: Eric Bylaska Date: Thu, 13 May 2021 08:11:43 -0700 Subject: [PATCH 31/45] debugging pspw fractional optimizer...EJB --- src/nwpw/pspw/cgsd/bybminimize2.F | 4 +++- src/nwpw/pspw/cgsd/cgsd_energy.F | 19 +++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/nwpw/pspw/cgsd/bybminimize2.F b/src/nwpw/pspw/cgsd/bybminimize2.F index 9ca93d52f4b..026f0643284 100644 --- a/src/nwpw/pspw/cgsd/bybminimize2.F +++ b/src/nwpw/pspw/cgsd/bybminimize2.F @@ -239,7 +239,8 @@ subroutine bybminimize2(E,deltae,deltac,current_iteration, if (dohfx) call psi_1genrho() if (control_fractional()) then call psi_1assending_occupation() - call psi_1define_occupation(0.0d0,.false.) + !call psi_1define_occupation(0.0d0,.false.) + call psi_1define_occupation(-1.0d0,.false.) Enew1 = Enew1 + psi_smearcorrection() end if !$OMP MASTER @@ -302,6 +303,7 @@ subroutine bybminimize2(E,deltae,deltac,current_iteration, call psi_1rotate2() call psi_2to1() + !$OMP BARRIER * **** define fractional occupation **** diff --git a/src/nwpw/pspw/cgsd/cgsd_energy.F b/src/nwpw/pspw/cgsd/cgsd_energy.F index 0c2aa75424d..91bc9f1eef8 100644 --- a/src/nwpw/pspw/cgsd/cgsd_energy.F +++ b/src/nwpw/pspw/cgsd/cgsd_energy.F @@ -146,17 +146,16 @@ real*8 function cgsd_energy(newpsi) if (minimizer.eq.8) it_out = 1 if ((newpsi).or.(nwpw_cosmo_firsttime())) then call pspw_Lin_HFX_reset() - call sdminimize(15) + if (minimizer.lt.4) call sdminimize(15) call bybminimize0() - if (control_fractional()) then - call psi_1toelectron() - call psi_1gen_hml() - call psi_diagonalize_hml_assending() - call psi_1rotate2() - call psi_2to1() - call psi_1define_occupation(-1.0d0,.false.) - !call psi_1define_occupation(1.0d0,.false.) - end if +c if (control_fractional()) then +c call psi_1toelectron() +c call psi_1gen_hml() +c call psi_diagonalize_hml_assending() +c call psi_1rotate2() +c call psi_2to1() +c call psi_1define_occupation(-1.0d0,.false.) +c end if end if 2 continue From 56875e03d10ae8b48c554a567dbc7cf13d89bcef Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 13 May 2021 10:31:36 -0700 Subject: [PATCH 32/45] fix for Simint memory requirements https://github.com/nwchemgit/nwchem/issues/372 --- src/NWints/simint/source/nwcsim_facef90.F | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/NWints/simint/source/nwcsim_facef90.F b/src/NWints/simint/source/nwcsim_facef90.F index 26520868c11..46a2cf3f40a 100644 --- a/src/NWints/simint/source/nwcsim_facef90.F +++ b/src/NWints/simint/source/nwcsim_facef90.F @@ -102,6 +102,7 @@ subroutine nwcsim_init(rtdb,nbas,bases,num_der) & call errquit(pname//'Exiting ',8, BASIS_ERR) if (.not. geom_cent_get(geom, iat, tag, & coord, q))call errquit + & (pname//'Exiting ',9, GEOM_ERR) nwcsim_noshell(bas)=nwcsim_noshell(bas)+1 call simint_initialize_shell( S smnt_sh(nwcsim_noshell(bas),bas)) @@ -124,10 +125,10 @@ subroutine nwcsim_init(rtdb,nbas,bases,num_der) c c memory allocation c - isz_2e4c = max(isz_2e4c, - S simint_eri_worksize(num_der, max_ang)) +C simint_eri_workmem gives the minimum size of the workspace required in bytes mem_2e4c = max(mem_2e4c, - S simint_eri_workmem(num_der, max_ang)) + S simint_eri_workmem(num_der, max_ang))/ + M MA_sizeof(MT_INT,1,MT_BYTE) enddo ! basis loop endif c @@ -136,16 +137,16 @@ subroutine nwcsim_init(rtdb,nbas,bases,num_der) c SIMINT_PRIM_SCREEN_STAT needs 4 more doubles c isz_2e4c = isz_2e4c + 4 c - call util_align(isz_2e4c,SIMINT_SIMD_LEN) - call util_align(mem_2e4c,SIMINT_SIMD_LEN) c - iszb_2e4c=isz_2e4c +c iszb_2e4c=isz_2e4c if(num_der.eq.1) then memb_2e4c = mem_2e4c + mem_2e4c/5 else memb_2e4c = mem_2e4c + mem_2e4c/10 ! +10% to be safe endif + call util_align(isz_2e4c,SIMINT_SIMD_LEN) + call util_align(mem_2e4c,SIMINT_SIMD_LEN) call util_align(memb_2e4c,SIMINT_SIMD_LEN) mem_2e3c = mem_2e4c mem_2e2c = mem_2e4c From 2707ea08b695731c903bf76c2d96136feb97e6e7 Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 13 May 2021 10:48:48 -0700 Subject: [PATCH 33/45] refresh github action cache to test latest simint commits --- .github/workflows/github_actions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github_actions.yml b/.github/workflows/github_actions.yml index 7b038f94ea9..6a72d8628e0 100644 --- a/.github/workflows/github_actions.yml +++ b/.github/workflows/github_actions.yml @@ -174,7 +174,7 @@ jobs: with: path: | ~/cache - key: ${{ matrix.os }}-${{ matrix.mpi_impl}}-${{ matrix.fc}}-xcode${{ matrix.xcode_version}}-simint${{ matrix.use_simint}}-blas${{ matrix.blas}}-simd${{ steps.get-simd.outputs.simd }}-nwchemcache-v004 + key: ${{ matrix.os }}-${{ matrix.mpi_impl}}-${{ matrix.fc}}-xcode${{ matrix.xcode_version}}-simint${{ matrix.use_simint}}-blas${{ matrix.blas}}-simd${{ steps.get-simd.outputs.simd }}-nwchemcache-v005 - name: build environment run: | pwd From 05908c3366b8a314af4c3e06333230593a76010a Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 13 May 2021 13:32:28 -0700 Subject: [PATCH 34/45] added ifort simint step --- .github/workflows/github_actions.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/github_actions.yml b/.github/workflows/github_actions.yml index 6a72d8628e0..475c3418fd6 100644 --- a/.github/workflows/github_actions.yml +++ b/.github/workflows/github_actions.yml @@ -97,10 +97,18 @@ jobs: experimental: true mpi_impl: mpich armci_network: mpi-ts - nwchem_modules: "tinyqmpw python" + nwchem_modules: "nwdft solvation driver" fc: nvfortran cc: gcc use_simint: 1 + - os: ubuntu-20.04 + experimental: true + mpi_impl: intel + armci_network: mpi-ts + nwchem_modules: "nwdft solvation driver" + fc: ifort + cc: icc + use_simint: 1 - os: ubuntu-20.04 experimental: true mpi_impl: mpich From 6ff10eeed4e0d3c14406329ae3701d10f610bd55 Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 13 May 2021 13:32:28 -0700 Subject: [PATCH 35/45] require gcc6 for skylake --- src/NWints/simint/libsimint_source/build_simint.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/NWints/simint/libsimint_source/build_simint.sh b/src/NWints/simint/libsimint_source/build_simint.sh index 7de2434a837..079cc076dc2 100755 --- a/src/NWints/simint/libsimint_source/build_simint.sh +++ b/src/NWints/simint/libsimint_source/build_simint.sh @@ -51,6 +51,19 @@ else VEC=scalar fi echo VEC $VEC +if [[ "${VEC}" == "avx512" ]]; then +if [[ -z "${CC}" ]]; then + CC=cc +fi +let GCCVERSIONGT5=$(expr `${CC} -dumpversion | cut -f1 -d.` \> 5) + if [[ ${GCCVERSIONGT5} != 1 ]]; then + echo + echo you have gcc version $(${CC} -dumpversion | cut -f1 -d.) + echo gcc version 6 and later needed for skylake + echo + exit 1 + fi +fi SRC_HOME=`pwd` DERIV=1 if [[ -z "${SIMINT_MAXAM}" ]]; then From 5d09697f4967485a69c990732319635366445347 Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 13 May 2021 15:24:05 -0700 Subject: [PATCH 36/45] changes for l=7 aka k functions --- src/basis/bas_input.F | 4 ++-- src/basis/basis.F | 3 ++- src/util/nwc_const.fh | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/basis/bas_input.F b/src/basis/bas_input.F index d521d3995d8..8c9f610ff02 100644 --- a/src/basis/bas_input.F +++ b/src/basis/bas_input.F @@ -358,7 +358,7 @@ subroutine bas_input_body(basis, osegment, oIs_rel, oHas_Star) integer nltypes ! No. of known angular momentum types integer nsptypes ! No. of known sp type shells integer nopts ! No. of options - parameter (nltypes = 7, nsptypes = 3, nopts = 6) + parameter (nltypes = 8, nsptypes = 3, nopts = 6) character*1 ltypes(nltypes) character*2 sptypes(nsptypes) character*8 opts(nopts) @@ -374,7 +374,7 @@ subroutine bas_input_body(basis, osegment, oIs_rel, oHas_Star) cc AJL/Begin/SPIN-POLARISED ECPs integer channel ! Both (Default)=0; Alpha=1; Beta=2 cc AJL/End - data ltypes /'s','p','d','f','g','h','i'/ + data ltypes /'s','p','d','f','g','h','i','k'/ data sptypes / 'sp', 'l ', 'ul'/ data spvalues/ -1 , -1 , -1 / data opts / 'except', 'library', 'file', 'rel', 'nelec' , diff --git a/src/basis/basis.F b/src/basis/basis.F index 8a4a6b9670f..a6571e3de55 100644 --- a/src/basis/basis.F +++ b/src/basis/basis.F @@ -658,7 +658,7 @@ logical function bas_print(basisin) integer atn, len_tag, len_ele character*2 symbol character*16 element - character*3 ctype(0:6),cltype(2) + character*3 ctype(0:7),cltype(2) character*3 shell_type *. . . . . . . . . . . ! Room for tag+space+(+element+) = 16+1+1+16+1 character*35 buffer @@ -683,6 +683,7 @@ logical function bas_print(basisin) ctype(4)='G' ctype(5)='H' ctype(6)='I' + ctype(7)='K' cltype(1)='SP' cltype(2)='SPD' bas_print = .true. diff --git a/src/util/nwc_const.fh b/src/util/nwc_const.fh index 3aa4a44b9ca..b7263626150 100644 --- a/src/util/nwc_const.fh +++ b/src/util/nwc_const.fh @@ -49,9 +49,9 @@ parameter (nw_max_coor = 3*nw_max_atom) *------------------------------------------------------------------------------ * Maximum angular momentum (union of all integral functionality) -* 0=S, 1=P, 2=D, 3=F, 4=G, 5=H, 6=I +* 0=S, 1=P, 2=D, 3=F, 4=G, 5=H, 6=I, 7=K integer nw_max_angular - parameter (nw_max_angular = 6) + parameter (nw_max_angular = 7) *------------------------------------------------------------------------------ * Maximum number of primitive gaussians in a segmented shell *....................................................... or general contraction From d06274c2b40654e984a21a7b10fc057fa97ad8b9 Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 13 May 2021 19:51:11 -0700 Subject: [PATCH 37/45] switch to OpenBLAS 0.3.15. fix avx512 detection with icc --- src/libext/openblas/build_openblas.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/libext/openblas/build_openblas.sh b/src/libext/openblas/build_openblas.sh index a73c9aa7a23..5aa104e9b60 100755 --- a/src/libext/openblas/build_openblas.sh +++ b/src/libext/openblas/build_openblas.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -v arch=`uname -m` -VERSION=0.3.13 +VERSION=0.3.15 #COMMIT=974acb39ff86121a5a94be4853f58bd728b56b81 BRANCH=develop #if [ -f OpenBLAS-${VERSION}.tar.gz ]; then @@ -98,6 +98,11 @@ if [[ "$FORCETARGET" == *"SKYLAKEX"* ]]; then exit 1 fi fi +#this fixes avx512 detection for icc +if [[ "${CC}" == "icc" ]]; then + FORCETARGET+=HOSTCC=\"icc -xhost\" +fi + #disable threading for ppc64le since it uses OPENMP echo arch is "$arch" if [[ "$arch" == "ppc64le" ]]; then From 9f062ecb156365c0d5548343ad21e4585026368b Mon Sep 17 00:00:00 2001 From: edoapra Date: Thu, 13 May 2021 19:57:35 -0700 Subject: [PATCH 38/45] fixed check for existing openblas tarball --- src/libext/openblas/build_openblas.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libext/openblas/build_openblas.sh b/src/libext/openblas/build_openblas.sh index 5aa104e9b60..b9557867df8 100755 --- a/src/libext/openblas/build_openblas.sh +++ b/src/libext/openblas/build_openblas.sh @@ -4,10 +4,10 @@ arch=`uname -m` VERSION=0.3.15 #COMMIT=974acb39ff86121a5a94be4853f58bd728b56b81 BRANCH=develop -#if [ -f OpenBLAS-${VERSION}.tar.gz ]; then -# echo "using existing" OpenBLAS-${VERSION}.tar.gz -if [ -f OpenBLAS-$COMMIT.zip ]; then - echo "using existing" OpenBLAS-${COMMIT}.zip +if [ -f OpenBLAS-${VERSION}.tar.gz ]; then + echo "using existing" OpenBLAS-${VERSION}.tar.gz +#if [ -f OpenBLAS-$COMMIT.zip ]; then +# echo "using existing" OpenBLAS-${COMMIT}.zip else rm -rf OpenBLAS* # curl -L https://github.com/xianyi/OpenBLAS/archive/$COMMIT.zip -o OpenBLAS-$COMMIT.zip From bbb4773f42fb0429b53ad1ba7b0a60954baab093 Mon Sep 17 00:00:00 2001 From: edoapra Date: Fri, 14 May 2021 10:31:00 -0700 Subject: [PATCH 39/45] scalapack size bug fix. should address the mac accelerate failures --- src/libext/scalapack/build_scalapa.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/libext/scalapack/build_scalapa.sh b/src/libext/scalapack/build_scalapa.sh index 3ed6245f353..90016eeda86 100755 --- a/src/libext/scalapack/build_scalapa.sh +++ b/src/libext/scalapack/build_scalapa.sh @@ -83,7 +83,7 @@ if [[ "$BLAS_SIZE" != "$SCALAPACK_SIZE" ]] ; then exit 1 fi -if [[ -z "${SCALAPCK_SIZE}" ]]; then +if [[ -z "${SCALAPACK_SIZE}" ]]; then SCALAPACK_SIZE=8 fi if [[ "$BLAS_SIZE" == 4 ]] && [[ -z "$USE_64TO32" ]] ; then @@ -141,7 +141,7 @@ GOTCLANG=$( mpicc -dM -E - /dev/null |grep __clang__|head -1|cut - if [[ ${GOTCLANG} == "1" ]] ; then C_FLAGS=" -Wno-error=implicit-function-declaration " fi - +echo "SCALAPACK_SIZE" is $SCALAPACK_SIZE if [[ "$SCALAPACK_SIZE" == 8 ]] ; then GFORTRAN_EXTRA=$(echo $FC | cut -c 1-8) if [[ ${FC} == gfortran ]] || [[ ${FC} == f95 ]] || [[ ${GFORTRAN_EXTRA} == gfortran ]] ; then @@ -153,6 +153,7 @@ if [[ "$SCALAPACK_SIZE" == 8 ]] ; then fi C_FLAGS+=" -DInt=long" fi +echo compiling with CC=mpicc FC=$MPIF90 CFLAGS="$C_FLAGS" FFLAGS="$Fortran_FLAGS" $CMAKE -Wno-dev ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_FLAGS="$C_FLAGS" -DCMAKE_Fortran_FLAGS="$Fortran_FLAGS" -DTEST_SCALAPACK=OFF -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DBLAS_openblas_LIBRARY="$BLASOPT" -DBLAS_LIBRARIES="$BLASOPT" -DLAPACK_openblas_LIBRARY="$BLASOPT" -DLAPACK_LIBRARIES="$BLASOPT" CC=mpicc FC=$MPIF90 CFLAGS="$C_FLAGS" FFLAGS="$Fortran_FLAGS" $CMAKE -Wno-dev ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_FLAGS="$C_FLAGS" -DCMAKE_Fortran_FLAGS="$Fortran_FLAGS" -DTEST_SCALAPACK=OFF -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DBLAS_openblas_LIBRARY="$BLASOPT" -DBLAS_LIBRARIES="$BLASOPT" -DLAPACK_openblas_LIBRARY="$BLASOPT" -DLAPACK_LIBRARIES="$BLASOPT" make V=0 -j3 scalapack/fast mkdir -p ../../../lib From 367ce443496367e20cc150b58d5384cbe74407a8 Mon Sep 17 00:00:00 2001 From: edoapra Date: Fri, 14 May 2021 11:45:42 -0700 Subject: [PATCH 40/45] fix wrong position of parenthesis https://github.com/nwchemgit/nwchem/issues/372#issuecomment-841131707 --- src/NWints/simint/source/nwcsim_facef90.F | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/NWints/simint/source/nwcsim_facef90.F b/src/NWints/simint/source/nwcsim_facef90.F index 46a2cf3f40a..2b479ee684a 100644 --- a/src/NWints/simint/source/nwcsim_facef90.F +++ b/src/NWints/simint/source/nwcsim_facef90.F @@ -125,10 +125,8 @@ subroutine nwcsim_init(rtdb,nbas,bases,num_der) c c memory allocation c -C simint_eri_workmem gives the minimum size of the workspace required in bytes mem_2e4c = max(mem_2e4c, - S simint_eri_workmem(num_der, max_ang))/ - M MA_sizeof(MT_INT,1,MT_BYTE) + S simint_eri_worksize(num_der, max_ang)) enddo ! basis loop endif c From 4ccdc364cb069dcef6867d0d3ab0f9b447db6320 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sun, 2 May 2021 16:46:21 -0700 Subject: [PATCH 41/45] use do concurrent in 64-to-32 copy --- src/ccsd/convert_single_double.F | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/ccsd/convert_single_double.F b/src/ccsd/convert_single_double.F index fe598ceb89d..140276e3168 100644 --- a/src/ccsd/convert_single_double.F +++ b/src/ccsd/convert_single_double.F @@ -9,10 +9,9 @@ subroutine copy_32_to_64(n, a32, a64) real(kind=dp), intent(out) :: a64(n) ! integer :: i - !$OMP SIMD - do i=1, n + do concurrent (i=1:n) a64(i) = real(a32(i), kind=dp) - enddo + end do end subroutine subroutine copy_64_to_32(n, a64, a32) @@ -26,10 +25,9 @@ subroutine copy_64_to_32(n, a64, a32) real(kind=sp), intent(out) :: a32(n) ! integer :: i - !$OMP SIMD - do i=1, n + do concurrent (i=1:n) a32(i) = real(a64(i), kind=sp) - enddo + end do end subroutine subroutine add_32_to_64(n, a32, a64) @@ -43,10 +41,9 @@ subroutine add_32_to_64(n, a32, a64) real(kind=dp), intent(inout) :: a64(n) ! integer :: i - !$OMP SIMD - do i=1, n + do concurrent (i=1:n) a64(i) = a64(i) + real(a32(i), kind=dp) - enddo + end do end subroutine subroutine add_64_to_32(n, a64, a32) @@ -60,8 +57,7 @@ subroutine add_64_to_32(n, a64, a32) real(kind=sp), intent(inout) :: a32(n) ! integer :: i - !$OMP SIMD - do i=1, n + do concurrent (i=1:n) a32(i) = a32(i) + real(a64(i), kind=sp) - enddo + end do end subroutine From b081e18b64bf7f9e3abeedc870bb8c6eee89fe45 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Sun, 2 May 2021 17:09:25 -0700 Subject: [PATCH 42/45] NVIDIA GPU version of CCSD(T) semidirect (trpdrv) at build time, you must specify USE_OPENACC_TRPDRV=1 and NWCHEM_LINK_CUDA=1. the former turns on the OpenACC+CUBLAS compilation in TRPDRV. the latter turns on the use of CUDA managed memory in GA. for now, you must specify DEV_GA=1 at build time, until https://github.com/GlobalArrays/ga/pull/210 is part of GA release version NWChem uses. at run time, you must specify MA_USE_CUDA_MEM=1 this causes MA to use CUDA managed memory, without which the code will crash, if the OS does not support system unified memory. this code does not yet deal with how MPI and multi-GPU systems interact. --- src/ccsd/GNUmakefile | 6 + src/ccsd/aoccsd2.F | 29 ++ src/ccsd/ccsd_trpdrv_openacc.F | 572 +++++++++++++++++++++++++++++++++ src/config/makefile.h | 4 + src/tools/GNUmakefile | 4 + 5 files changed, 615 insertions(+) create mode 100644 src/ccsd/ccsd_trpdrv_openacc.F diff --git a/src/ccsd/GNUmakefile b/src/ccsd/GNUmakefile index cecc25724b1..4516406a42f 100644 --- a/src/ccsd/GNUmakefile +++ b/src/ccsd/GNUmakefile @@ -109,6 +109,12 @@ ifeq ($(HAVE_SET_GA_PROPERTY),Y) LIB_DEFINES += -DHAVE_SET_GA_PROPERTY endif +ifdef USE_OPENACC_TRPDRV + OBJ_OPTIMIZE += ccsd_trpdrv_openacc.o + USES_BLAS += ccsd_trpdrv_openacc.F + FOPTIONS += -acc -gpu=managed -cuda -cudalib=cublas +endif + ifeq ($(ARMCI_NETWORK),MPI-PR) LIB_DEFINES += -DACC_STRIPS diff --git a/src/ccsd/aoccsd2.F b/src/ccsd/aoccsd2.F index 6829c5c51bf..87e258f12cd 100644 --- a/src/ccsd/aoccsd2.F +++ b/src/ccsd/aoccsd2.F @@ -16,6 +16,7 @@ subroutine aoccsd(basis,ncor,nocc,nvir,ndel,nact,nbf,maxit, logical oconverged, occd, use_trpdrv_nb logical use_trpdrv_omp, use_trpdrv_bgp2 logical use_trpdrv_omp_mp + logical use_trpdrv_openacc logical use_trpdrv_offload c #include "ccsd_len.fh" @@ -717,6 +718,9 @@ subroutine ccsd_iterdrv2(rtdb,basis,nsh,ncor,nocc,nvir,nact,nbf, if (.not. rtdb_get(rtdb, 'ccsd:use_trpdrv_omp_mp', mt_log, 1, 1 use_trpdrv_omp_mp)) 2 use_trpdrv_omp_mp=.false. + if (.not. rtdb_get(rtdb, 'ccsd:use_trpdrv_openacc', mt_log, 1, + 1 use_trpdrv_openacc)) + 2 use_trpdrv_offload=.false. if (.not. rtdb_get(rtdb, 'ccsd:use_trpdrv_offload', mt_log, 1, 1 use_trpdrv_offload)) 2 use_trpdrv_offload=.false. @@ -976,6 +980,31 @@ subroutine ccsd_iterdrv2(rtdb,basis,nsh,ncor,nocc,nvir,nact,nbf, $ dbl_mb(k_trp_Kka), dbl_mb(k_trp_Jij), dbl_mb(k_trp_Jkj), $ dbl_mb(k_trp_Kij), dbl_mb(k_trp_Kkj), dbl_mb(k_trp_Dja), $ dbl_mb(k_trp_Djka), dbl_mb(k_trp_Djia)) +c + else if (use_trpdrv_openacc) then +#if defined(USE_OPENACC_TRPDRV) +#ifndef USE_F90_ALLOCATABLE +#error You must set USE_F90_ALLOCATABLE if USE_OPENACC_TRPDRV is set! +#endif + if (iam.eq.0.and.oprint) then + write(luout,1808) nvpass,util_wallsec() + call util_flush(luout) + endif + 1808 format(' commencing triples evaluation - openacc version',i8, + I ' at ',f20.2,' secs') + call ccsd_trpdrv_openacc(dbl_mb(k_t1), + $ f1n,f1t,f2n,f2t,f3n,f3t,f4n,f4t, + $ eorb,g_objo,g_objv,g_coul,g_exch,ncor,nocc,nvir,iprt, + $ empt(1),empt(2),oseg_lo,oseg_hi,kchunk, + $ dbl_mb(k_trp_Tij), dbl_mb(k_trp_Tkj), dbl_mb(k_trp_Tia), + $ dbl_mb(k_trp_Tka), dbl_mb(k_trp_Xia), dbl_mb(k_trp_Xka), + $ dbl_mb(k_trp_Jia), dbl_mb(k_trp_Jka), dbl_mb(k_trp_Kia), + $ dbl_mb(k_trp_Kka), dbl_mb(k_trp_Jij), dbl_mb(k_trp_Jkj), + $ dbl_mb(k_trp_Kij), dbl_mb(k_trp_Kkj), dbl_mb(k_trp_Dja), + $ dbl_mb(k_trp_Djka), dbl_mb(k_trp_Djia)) +#else + call errquit('aoccsd: trpdrv_openacc disabled ',0,0) +#endif c elseif (use_trpdrv_omp_mp) then #ifndef TRPMIXP_OFF diff --git a/src/ccsd/ccsd_trpdrv_openacc.F b/src/ccsd/ccsd_trpdrv_openacc.F new file mode 100644 index 00000000000..4a174d5d883 --- /dev/null +++ b/src/ccsd/ccsd_trpdrv_openacc.F @@ -0,0 +1,572 @@ + subroutine ccsd_trpdrv_openacc(t1, + & f1n,f1t,f2n,f2t,f3n,f3t,f4n,f4t,eorb, + & g_objo,g_objv,g_coul,g_exch, + & ncor,nocc,nvir,iprt,emp4,emp5, + & oseg_lo,oseg_hi, kchunk, + & Tij, Tkj, Tia, Tka, Xia, Xka, Jia, Jka, Kia, Kka, + & Jij, Jkj, Kij, Kkj, Dja, Djka, Djia) + use iso_fortran_env + use cudafor + use cublas + implicit none +! +#include "errquit.fh" +#include "global.fh" +#include "ccsd_len.fh" +#include "ccsdps.fh" +#include "util.fh" +#include "msgids.fh" +#include "yflop.fh" +! + double precision, intent(inout) :: emp4,emp5 + double precision, intent(in) :: t1(*) + integer, intent(in) :: ncor,nocc,nvir + integer, intent(in) :: iprt + integer, intent(in) :: g_objo,g_objv,g_coul,g_exch + integer, intent(in) :: oseg_lo,oseg_hi, kchunk + double precision, intent(in), managed :: f1n(nvir,nvir) + double precision, intent(in), managed :: f2n(nvir,nvir) + double precision, intent(in), managed :: f3n(nvir,nvir) + double precision, intent(in), managed :: f4n(nvir,nvir) + double precision, intent(in), managed :: f1t(nvir,nvir) + double precision, intent(in), managed :: f2t(nvir,nvir) + double precision, intent(in), managed :: f3t(nvir,nvir) + double precision, intent(in), managed :: f4t(nvir,nvir) + double precision, intent(in), managed :: eorb(*) + double precision, intent(in), managed :: Tij(*), Tkj(*) + double precision, intent(in), managed :: Tia(*), Tka(*) + double precision, intent(in), managed :: Xia(*), Xka(*) + double precision, intent(in), managed :: Jia(*), Jka(*) + double precision, intent(in), managed :: Jij(*), Jkj(*) + double precision, intent(in), managed :: Kia(*), Kka(*) + double precision, intent(in), managed :: Kij(*), Kkj(*) + double precision, intent(in), managed :: Dja(*), Djka(*), Djia(*) +! used to make inline threaded tengy correct - for now +! it is correct that dint[cx]1 are paired with t1v2 and vice versa +! in the inlined tengy loops. see ccsd_tengy in ccsd_trpdrv.F for +! verification of the i-k and k-i pairing of these. + double precision, allocatable, managed :: dintc1(:),dintc2(:) + double precision, allocatable, managed :: dintx1(:),dintx2(:) + double precision, allocatable, managed :: t1v1(:),t1v2(:) + integer :: alloc_error, err +! + double precision :: emp4i,emp5i,emp4k,emp5k + double precision :: eaijk,denom + integer :: inode,next,nodes,iam + integer :: a,b,c,i,j,k,akold,av + ! chunking is the loop blocking size in the loop nest + ! formerly associated with the tengy routine. + ! we have not explored this paramater space but 32 is + ! optimal for TLB blocking in matrix transpose on most + ! architectures (especially x86). + integer, parameter :: chunking = 32 + integer :: bb,cc + integer :: klo, khi + integer nxtask + external nxtask + double precision perfm_flop,tzero,flopzero,t_flops,agg_flops + external perfm_flop +! +! Dependencies (global array, local array, handle): +! +! These are waited on first +! +! g_objv, Dja, nbh_objv1 +! g_objv, Djka(1+(k-klo)*nvir), nbh_objv4(k) +! g_objv, Djia, nbh_objv5 +! +! These are waited on later +! +! g_objv, Tka, nbh_objv2 +! g_objv, Xka, nbh_objv3 +! g_objv, Tia, nbh_objv6 +! g_objv, Xia, nbh_objv7 +! g_objo, Tkj, nbh_objo1 +! g_objo, Jkj, nbh_objo2 +! g_objo, Kkj, nbh_objo3 +! g_objo, Tij, nbh_objo4 +! g_objo, Jij, nbh_objo5 +! g_objo, Kij, nbh_objo6 +! g_exch, Kka, nbh_exch1 +! g_exch, Kia, nbh_exch2 +! g_coul, Jka, nbh_coul1 +! g_coul, Jia, nbh_coul2 +! +! non-blocking handles +! + integer nbh_objv1,nbh_objv2,nbh_objv3 + integer nbh_objv5,nbh_objv6,nbh_objv7 + integer nbh_objv4(nocc) +! + integer nbh_objo1,nbh_objo2,nbh_objo3 + integer nbh_objo4,nbh_objo5,nbh_objo6 +! + integer nbh_exch1,nbh_exch2,nbh_coul1,nbh_coul2 + integer n_progr,pct_progr + parameter(n_progr=20) + logical i_progr(n_progr+1) +! + integer(INT32) :: shi + type(cublasHandle) :: handle(8) + integer(kind=cuda_stream_kind) :: stream(8) + double precision :: tt0, tt1 +! + integer(INT32) :: nv4, no4 ! cublasDgemm requires 32-bit integers + integer(INT32) :: cu_op_n, cu_op_t + cu_op_n = CUBLAS_OP_N ! 0 + cu_op_t = CUBLAS_OP_T ! 1 +! + if (ga_nodeid().eq.0) then + write(6,99) + endif + 99 format(2x,'Using Fortran standard parallelism in CCSD(T)') + tzero=util_wallsec() + flopzero=perfm_flop() +! +! CUDA stuff +! + tt0 = util_wallsec() + do shi=1,8 + err = cudaStreamCreate(stream(shi)) + if (err.ne.0) call errquit('cudaStreamCreate',shi,UNKNOWN_ERR) + err = cublasCreate(handle(shi)) + if (err.ne.0) call errquit('cublasCreate',shi,UNKNOWN_ERR) + err = cublasSetStream(handle(shi), stream(shi)) + if (err.ne.0) call errquit('cublasSetStream',shi,UNKNOWN_ERR) + end do + tt1 = util_wallsec() + if (ga_nodeid().eq.0) then + write(6,500) tt1-tt0 + 500 format('CU init took ',e15.5,' seconds') + endif +! + allocate( dintc1(1:nvir), stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintc1',1,MA_ERR) + allocate( dintx1(1:nvir), stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintx1',2,MA_ERR) + allocate( t1v1(1:nvir), stat=alloc_error) + if (alloc_error.ne.0) call errquit('t1v1',3,MA_ERR) + allocate( dintc2(1:nvir), stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintc2',4,MA_ERR) + allocate( dintx2(1:nvir), stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintx2',5,MA_ERR) + allocate( t1v2(1:nvir), stat=alloc_error) + if (alloc_error.ne.0) call errquit('t1v2',6,MA_ERR) +! + nodes = ga_nnodes() + iam = ga_nodeid() +! +! call ga_sync() ! ga_sync called just before trpdrv in aoccsd2 +! + if (occsdps) then + call pstat_on(ps_trpdrv) + else + call qenter('trpdrv',0) + endif + do klo=1,n_progr+1 + i_progr(klo)=.true. + enddo + inode=-1 + next=nxtask(nodes, 1) + do klo = 1, nocc, kchunk + akold=0 + khi = min(nocc, klo+kchunk-1) + do a=oseg_lo,oseg_hi + av=a-ncor-nocc + do j=1,nocc + inode=inode+1 + if (inode.eq.next)then + + call ga_nbget(g_objv,1+(j-1)*lnov,j*lnov,av,av,Dja, + & lnov,nbh_objv1) + do k = klo, khi + call ga_nbget(g_objv,1+(j-1)*nvir+(k-1)*lnov, + & j*nvir+(k-1)*lnov,av,av, + & Djka(1+(k-klo)*nvir),nvir,nbh_objv4(k)) + enddo + call ga_nbget(g_objo,(klo-1)*lnvv+1,khi*lnvv,j,j,Tkj, + & (khi-klo+1)*lnvv,nbh_objo1) + call ga_nbget(g_objo,lnovv+(klo-1)*lnov+1, + & lnovv+khi*lnov,j,j,Jkj, + & (khi-klo+1)*lnov,nbh_objo2) + call ga_nbget(g_objo,lnovv+lnoov+(klo-1)*lnov+1, + & lnovv+lnoov+khi*lnov,j,j,Kkj, + & (khi-klo+1)*lnov,nbh_objo3) + if (akold .ne. a) then + akold = a + call ga_nbget(g_coul,1,lnvv,(a-oseg_lo)*nocc+klo, + & (a-oseg_lo)*nocc+khi,Jka,lnvv,nbh_coul1) + call ga_nbget(g_exch,1,lnvv,(a-oseg_lo)*nocc+klo, + & (a-oseg_lo)*nocc+khi,Kka,lnvv,nbh_exch1) + call ga_nbget(g_objv,1+lnoov+(klo-1)*lnov, + & lnoov+khi*lnov,av,av,Tka,(khi-klo+1)*lnov, + & nbh_objv2) + call ga_nbget(g_objv,1+2*lnoov+(klo-1)*lnov, + & 2*lnoov+khi*lnov,av,av,Xka,(khi-klo+1)*lnov, + & nbh_objv3) + endif + + do i=1,nocc + + call ga_nbget(g_objv,1+(j-1)*nvir+(i-1)*lnov, + & j*nvir+(i-1)*lnov,av,av,Djia,nvir,nbh_objv5) + call ga_nbget(g_objo,(i-1)*lnvv+1,i*lnvv,j,j,Tij, + & lnvv,nbh_objo4) + call ga_nbget(g_objo,lnovv+(i-1)*lnov+1, + & lnovv+i*lnov,j,j,Jij,lnov,nbh_objo5) + call ga_nbget(g_objo,lnovv+lnoov+(i-1)*lnov+1, + & lnovv+lnoov+i*lnov,j,j,Kij,lnov,nbh_objo6) + call ga_nbget(g_coul,1,lnvv,(a-oseg_lo)*nocc+i, + & (a-oseg_lo)*nocc+i,Jia,lnvv,nbh_coul2) + call ga_nbget(g_exch,1,lnvv,(a-oseg_lo)*nocc+i, + & (a-oseg_lo)*nocc+i,Kia,lnvv,nbh_exch2) + call ga_nbget(g_objv,1+lnoov+(i-1)*lnov, + & lnoov+i*lnov,av,av,Tia,lnov,nbh_objv6) + call ga_nbget(g_objv,1+2*lnoov+(i-1)*lnov, + & 2*lnoov+i*lnov,av,av,Xia,lnov,nbh_objv7) + +! call dcopy(nvir,t1((i-1)*nvir+1),1,t1v2,1) + t1v2(:) = t1((i-1)*nvir+1:i*nvir) + call ga_nbwait(nbh_objv1) ! Dja +! call dcopy(nvir,Dja(1+(i-1)*nvir),1,dintc1,1) + dintc1(:) = Dja(1+(i-1)*nvir:i*nvir) + call ga_nbwait(nbh_objv5) ! Djia +! call dcopy(nvir,Djia,1,dintx1,1) + dintx1(:) = Djia(1:nvir) + + do k=klo,min(khi,i) + + !call dcopy(nvir,t1((k-1)*nvir+1),1,t1v1,1) + t1v1(:) = t1((k-1)*nvir+1:k*nvir) + !call dcopy(nvir,Dja(1+(k-1)*nvir),1,dintc2,1) + dintc2(:) = Dja(1+(k-1)*nvir:k*nvir) + call ga_nbwait(nbh_objv4(k)) ! Djka + !call dcopy(nvir,Djka(1+(k-klo)*nvir),1,dintx2,1) + dintx2(:) = Djka(1+(k-klo)*nvir:(k-klo+1)*nvir) + emp4i = 0.0d0 + emp5i = 0.0d0 + emp4k = 0.0d0 + emp5k = 0.0d0 + if (occsdps) then + call pstat_on(ps_doxxx) + else + call qenter('doxxx',0) + endif +! +! These are the input dependencies for the DGEMM calls below. +! We wait on all of them here because GA is not even remotely thread-safe. +! All of these are independent of k, so we wait on them only +! at the first trip of the loop. +! + if (k.eq.klo) then + call ga_nbwait(nbh_objv2) + call ga_nbwait(nbh_objv3) + call ga_nbwait(nbh_objv6) + call ga_nbwait(nbh_objv7) + call ga_nbwait(nbh_objo1) + call ga_nbwait(nbh_objo2) + call ga_nbwait(nbh_objo3) + call ga_nbwait(nbh_objo4) + call ga_nbwait(nbh_objo5) + call ga_nbwait(nbh_objo6) + call ga_nbwait(nbh_exch1) + call ga_nbwait(nbh_exch2) + call ga_nbwait(nbh_coul1) + call ga_nbwait(nbh_coul2) + endif + + nv4 = nvir ! no possibility of overflow + no4 = nocc + + err = cublasDgemm_v2(handle(1), + & cu_op_n,cu_op_t, + & nv4,nv4,nv4,1.0d0, + & Jia,nv4,Tkj(1+(k-klo)*lnvv),nv4,0.0d0, + & f1n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(1), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Tia,nv4,Kkj(1+(k-klo)*lnov),no4,1.0d0, + & f1n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + + err = cublasDgemm_v2(handle(2), + & cu_op_n,cu_op_t, + & nv4,nv4,nv4,1.0d0, + & Kia,nv4,Tkj(1+(k-klo)*lnvv),nv4,0.0d0, + & f2n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(2), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Xia,nv4,Kkj(1+(k-klo)*lnov),no4,1.0d0, + & f2n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + + err = cublasDgemm_v2(handle(3), + & cu_op_n,cu_op_n, + & nv4,nv4,nv4,1.0d0, + & Jia,nv4,Tkj(1+(k-klo)*lnvv),nv4,0.0d0, + & f3n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(3), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Tia,nv4,Jkj(1+(k-klo)*lnov),no4,1.0d0, + & f3n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + + err = cublasDgemm_v2(handle(4), + & cu_op_n,cu_op_n, + & nv4,nv4,nv4,1.0d0, + & Kia,nv4,Tkj(1+(k-klo)*lnvv),nv4,0.0d0, + & f4n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(4), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Xia,nv4,Jkj(1+(k-klo)*lnov),no4,1.0d0, + & f4n,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + + err = cublasDgemm_v2(handle(5), + & cu_op_n,cu_op_t, + & nv4,nv4,nv4,1.0d0, + & Jka(1+(k-klo)*lnvv),nv4,Tij,nv4,0.0d0, + & f1t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(5), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Tka(1+(k-klo)*lnov),nv4,Kij,no4,1.0d0, + & f1t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + + err = cublasDgemm_v2(handle(6), + & cu_op_n,cu_op_t, + & nv4,nv4,nv4,1.0d0, + & Kka(1+(k-klo)*lnvv),nv4,Tij,nv4,0.0d0, + & f2t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(6), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Xka(1+(k-klo)*lnov),nv4,Kij,no4,1.0d0, + & f2t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + + err = cublasDgemm_v2(handle(7), + & cu_op_n,cu_op_n, + & nv4,nv4,nv4,1.0d0, + & Jka(1+(k-klo)*lnvv),nv4,Tij,nv4,0.0d0, + & f3t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(7), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Tka(1+(k-klo)*lnov),nv4,Jij,no4,1.0d0, + & f3t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + + err = cublasDgemm_v2(handle(8), + & cu_op_n,cu_op_n, + & nv4,nv4,nv4,1.0d0, + & Kka(1+(k-klo)*lnvv),nv4,Tij,nv4,0.0d0, + & f4t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + err = cublasDgemm_v2(handle(8), + & cu_op_n,cu_op_n, + & nv4,nv4,no4,-1.0d0, + & Xka(1+(k-klo)*lnov),nv4,Jij,no4,1.0d0, + & f4t,nv4) + if (err.ne.0) then + call errquit('cublasDgemm_v2',err,UNKNOWN_ERR) + endif + +! this is necessary if OpenACC is not used below +! err = cudaDeviceSynchronize() + + do shi=1,8 + err = cudaStreamSynchronize(stream(i)) + if (err.ne.0) then + call errquit('cudaStreamSynchronize',err, + & UNKNOWN_ERR) + endif + end do + + if (occsdps) then + call pstat_off(ps_doxxx) + call pstat_on(ps_tengy) + else + call qexit('doxxx',0) + call qenter('tengy',0) + endif + + eaijk=eorb(a) - ( eorb(ncor+i) + & +eorb(ncor+j) + & +eorb(ncor+k) ) +#ifdef USE_YFLOP + flops_ycount = flops_ycount + nvir*nvir*( + D 3 + 2*( + E 12 + + E 11 + + E 11 ) + + 5 2*27 ) +#endif + +!$acc parallel loop collapse(2) private(denom) +!$acc& reduction(+:emp4i,emp4k,emp5i,emp5k) + do b=1,nvir + do c=1,nvir + denom=-1.0d0/( eorb(ncor+nocc+b) + & +eorb(ncor+nocc+c)+eaijk ) + emp4i=emp4i+denom* + & (f1t(b,c)+f1n(c,b)+f2t(c,b)+f3n(b,c)+f4n(c,b))* + & (f1t(b,c)-2*f2t(b,c)-2*f3t(b,c)+f4t(b,c)) + & -denom* + & (f1n(b,c)+f1t(c,b)+f2n(c,b)+f3n(c,b))* + & (2*f1t(b,c)-f2t(b,c)-f3t(b,c)+2*f4t(b,c)) + & +3*denom*( + & f1n(b,c)*(f1n(b,c)+f3n(c,b)+2*f4t(c,b))+ + & f2n(b,c)*f2t(c,b)+f3n(b,c)*f4t(b,c)) + emp4k=emp4k+denom* + & (f1n(b,c)+f1t(c,b)+f2n(c,b)+f3t(b,c)+f4t(c,b))* + & (f1n(b,c)-2*f2n(b,c)-2*f3n(b,c)+f4n(b,c)) + & -denom* + & (f1t(b,c)+f1n(c,b)+f2t(c,b)+f3t(c,b))* + & (2*f1n(b,c)-f2n(b,c)-f3n(b,c)+2*f4n(b,c)) + & +3*denom*( + & f1t(b,c)*(f1t(b,c)+f3t(c,b)+2*f4n(c,b))+ + & f2t(b,c)*f2n(c,b)+f3t(b,c)*f4n(b,c)) + emp5i=emp5i+denom*t1v1(b)*dintx1(c)* + & ( f1t(b,c)+f2n(b,c)+f4n(c,b) + & -2*(f3t(b,c)+f4n(b,c)+f2n(c,b)+ + & f1n(b,c)+f2t(b,c)+f3n(c,b)) + & +4*(f3n(b,c)+f4t(b,c)+f1n(c,b))) + & +denom*t1v1(b)*dintc1(c)* + & ( f1n(b,c)+f4n(b,c)+f1t(c,b) + & -2*(f2n(b,c)+f3n(b,c)+f2t(c,b))) + emp5k=emp5k+denom*t1v2(b)*dintx2(c)* + & ( f1n(b,c)+f2t(b,c)+f4t(c,b) + & -2*(f3n(b,c)+f4t(b,c)+f2t(c,b)+ + & f1t(b,c)+f2n(b,c)+f3t(c,b)) + & +4*(f3t(b,c)+f4n(b,c)+f1t(c,b))) + & +denom*t1v2(b)*dintc2(c)* + & ( f1t(b,c)+f4t(b,c)+f1n(c,b) + & -2*(f2t(b,c)+f3t(b,c)+f2n(c,b))) + end do + end do + if (occsdps) then + call pstat_off(ps_tengy) + else + call qexit('tengy',0) + endif + + emp4 = emp4 + emp4i + emp5 = emp5 + emp5i + if (i.ne.k) then + emp4 = emp4 + emp4k + emp5 = emp5 + emp5k + end if ! (i.ne.k) + end do ! k + end do ! i + if (iprt.gt.50)then + write(6,1234)iam,a,j,emp4,emp5 + 1234 format(' iam aijk',3i5,2e15.5) + end if + next=nxtask(nodes, 1) + if(ga_nodeid().eq.0) then + pct_progr=(a-(ncor+nocc)+((klo-1)/kchunk)*nvir)*n_progr/ + / ((nocc/kchunk)*nvir)+1 + if(i_progr(pct_progr)) then + i_progr(pct_progr)=.false. + write(6,4321) ' ccsd(t): done ', + A a-(ncor+nocc)+((klo-1)/kchunk)*nvir, + O ' out of ',(nocc/kchunk)*nvir, + O ' progress: ', + O ((a-(ncor+nocc)+((klo-1)/kchunk)*nvir)*100)/ + D ((nocc/kchunk)*nvir), + P '%, Gflops=',(perfm_flop()-flopzero)/ + D (util_wallsec()-tzero), + P ' at ',(util_wallsec()-tzero),' secs' + call util_flush(6) + 4321 format(a,i8,a,i8,a,i3,a,1pg11.4,a,0pf10.1,a) + endif + endif + end if + end do + end do + end do + call ga_sync() + next=nxtask(-nodes, 1) + t_flops=util_wallsec()-tzero + agg_flops=perfm_flop()-flopzero + call ga_dgop(msg_cc_diis1,agg_flops,1, '+') + if(ga_nodeid().eq.0) then + write(6,4322) ' ccsd(t): 100% done, Aggregate Gflops=', + P agg_flops/t_flops, + P ' in ',t_flops,' secs' + 4322 format(a,1pg11.4,a,0pf10.1,a) + call util_flush(6) + endif + call ga_sync() + if (occsdps) then + call pstat_off(ps_trpdrv) + else + call qexit('trpdrv',0) + endif +! + deallocate( dintc1, stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintc1',11,MA_ERR) + deallocate( dintx1, stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintx1',12,MA_ERR) + deallocate( t1v1, stat=alloc_error) + if (alloc_error.ne.0) call errquit('t1v1',13,MA_ERR) + deallocate( dintc2, stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintc2',14,MA_ERR) + deallocate( dintx2, stat=alloc_error) + if (alloc_error.ne.0) call errquit('dintx2',15,MA_ERR) + deallocate( t1v2, stat=alloc_error) + if (alloc_error.ne.0) call errquit('t1v2',16,MA_ERR) +! +! CUDA stuff +! + do shi=1,8 + err = cublasDestroy(handle(shi)) + if (err.ne.0) call errquit('cublasDestroy',shi,UNKNOWN_ERR) + err = cudaStreamDestroy(stream(shi)) + if (err.ne.0) call errquit('cudaStreamDestroy',shi,UNKNOWN_ERR) + end do +! + end diff --git a/src/config/makefile.h b/src/config/makefile.h index 73781e20a86..e1b2d86b255 100644 --- a/src/config/makefile.h +++ b/src/config/makefile.h @@ -2827,6 +2827,10 @@ else CORE_LIBS += $(BLASOPT) endif +ifdef NWCHEM_LINK_CUDA +CORE_LIBS += -stdpar -acc -gpu=managed -cuda -cudalib=cublas +endif + ifdef BLASOPT BLAS_SUPPLIED=Y endif diff --git a/src/tools/GNUmakefile b/src/tools/GNUmakefile index dfa715b99eb..3e495e274f0 100644 --- a/src/tools/GNUmakefile +++ b/src/tools/GNUmakefile @@ -630,6 +630,10 @@ ifndef ARMCI_NETWORK ARMCI_NETWORK=MPI-TS MAYBE_ARMCI = --with-mpi-ts endif +# CUDA UM support +ifdef NWCHEM_LINK_CUDA + MAYBE_ARMCI += --enable-cuda-mem +endif # # Apparently weak bindings do not work with CYGWIN64 at the moment. There seems # to be an issue with the COFF object format that gets in the way (with ELF From 9034ac7a73b43eea51b4986ad0c34a6b7183b446 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 14 May 2021 15:56:18 -0700 Subject: [PATCH 43/45] stdpar not required --- src/config/makefile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/makefile.h b/src/config/makefile.h index e1b2d86b255..f924f75db85 100644 --- a/src/config/makefile.h +++ b/src/config/makefile.h @@ -2828,7 +2828,7 @@ else endif ifdef NWCHEM_LINK_CUDA -CORE_LIBS += -stdpar -acc -gpu=managed -cuda -cudalib=cublas +CORE_LIBS += -acc -gpu=managed -cuda -cudalib=cublas endif ifdef BLASOPT From 8047d5119d98342c9bf5d7745a18488fa4e2ec5a Mon Sep 17 00:00:00 2001 From: edoapra Date: Fri, 14 May 2021 17:27:12 -0700 Subject: [PATCH 44/45] update for building on cray --- src/libext/scalapack/build_scalapa.sh | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/libext/scalapack/build_scalapa.sh b/src/libext/scalapack/build_scalapa.sh index 90016eeda86..6932d298f1a 100755 --- a/src/libext/scalapack/build_scalapa.sh +++ b/src/libext/scalapack/build_scalapa.sh @@ -17,8 +17,19 @@ get_cmake38(){ fi } +MPICC=mpicc if [[ "$FC" = "ftn" ]] ; then MPIF90="ftn" + # ugly hack to get mpicc on cray + if [[ -z "${INTEL_PATH}" ]]; then + echo + echo Intel compilers not loaded + echo please execute "module load intel" for building Scalapack + echo + exit 1 + else + MPICC=$INTEL_PATH/linux/mpi/intel64/bin/mpicc + fi else if ! [ -x "$(command -v mpif90)" ]; then echo @@ -137,7 +148,7 @@ fi # Fortran_FLAGS+=-I"$NWCHEM_TOP"/src/libext/include #fi #fix for clang 12 error in implicit-function-declaration -GOTCLANG=$( mpicc -dM -E - /dev/null |grep __clang__|head -1|cut -c19) +GOTCLANG=$( "$MPICC" -dM -E - /dev/null |grep __clang__|head -1|cut -c19) if [[ ${GOTCLANG} == "1" ]] ; then C_FLAGS=" -Wno-error=implicit-function-declaration " fi @@ -153,8 +164,15 @@ if [[ "$SCALAPACK_SIZE" == 8 ]] ; then fi C_FLAGS+=" -DInt=long" fi -echo compiling with CC=mpicc FC=$MPIF90 CFLAGS="$C_FLAGS" FFLAGS="$Fortran_FLAGS" $CMAKE -Wno-dev ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_FLAGS="$C_FLAGS" -DCMAKE_Fortran_FLAGS="$Fortran_FLAGS" -DTEST_SCALAPACK=OFF -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DBLAS_openblas_LIBRARY="$BLASOPT" -DBLAS_LIBRARIES="$BLASOPT" -DLAPACK_openblas_LIBRARY="$BLASOPT" -DLAPACK_LIBRARIES="$BLASOPT" -CC=mpicc FC=$MPIF90 CFLAGS="$C_FLAGS" FFLAGS="$Fortran_FLAGS" $CMAKE -Wno-dev ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_FLAGS="$C_FLAGS" -DCMAKE_Fortran_FLAGS="$Fortran_FLAGS" -DTEST_SCALAPACK=OFF -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DBLAS_openblas_LIBRARY="$BLASOPT" -DBLAS_LIBRARIES="$BLASOPT" -DLAPACK_openblas_LIBRARY="$BLASOPT" -DLAPACK_LIBRARIES="$BLASOPT" +if [[ "$CRAY_CPU_TARGET" == "mic-knl" ]]; then + module swap craype-mic-knl craype-haswell + KNL_SWAP=1 +fi +echo compiling with CC="$MPICC" FC=$MPIF90 CFLAGS="$C_FLAGS" FFLAGS="$Fortran_FLAGS" $CMAKE -Wno-dev ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_FLAGS="$C_FLAGS" -DCMAKE_Fortran_FLAGS="$Fortran_FLAGS" -DTEST_SCALAPACK=OFF -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DBLAS_openblas_LIBRARY="$BLASOPT" -DBLAS_LIBRARIES="$BLASOPT" -DLAPACK_openblas_LIBRARY="$BLASOPT" -DLAPACK_LIBRARIES="$BLASOPT" +CC="$MPICC" FC=$MPIF90 CFLAGS="$C_FLAGS" FFLAGS="$Fortran_FLAGS" $CMAKE -Wno-dev ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_FLAGS="$C_FLAGS" -DCMAKE_Fortran_FLAGS="$Fortran_FLAGS" -DTEST_SCALAPACK=OFF -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DBLAS_openblas_LIBRARY="$BLASOPT" -DBLAS_LIBRARIES="$BLASOPT" -DLAPACK_openblas_LIBRARY="$BLASOPT" -DLAPACK_LIBRARIES="$BLASOPT" make V=0 -j3 scalapack/fast mkdir -p ../../../lib cp lib/libscalapack.a ../../../lib/libnwc_scalapack.a +if [[ "$KNL_SWAP" == "1" ]]; then + module swap craype-haswell craype-mic-knl +fi From 760e87875134392cf1aba3e2ebcaced1d90fbaa0 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Fri, 14 May 2021 18:39:18 -0700 Subject: [PATCH 45/45] fix bug: openacc was not initialized --- src/ccsd/aoccsd2.F | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccsd/aoccsd2.F b/src/ccsd/aoccsd2.F index 87e258f12cd..82516c0f0e0 100644 --- a/src/ccsd/aoccsd2.F +++ b/src/ccsd/aoccsd2.F @@ -720,7 +720,7 @@ subroutine ccsd_iterdrv2(rtdb,basis,nsh,ncor,nocc,nvir,nact,nbf, 2 use_trpdrv_omp_mp=.false. if (.not. rtdb_get(rtdb, 'ccsd:use_trpdrv_openacc', mt_log, 1, 1 use_trpdrv_openacc)) - 2 use_trpdrv_offload=.false. + 2 use_trpdrv_openacc=.false. if (.not. rtdb_get(rtdb, 'ccsd:use_trpdrv_offload', mt_log, 1, 1 use_trpdrv_offload)) 2 use_trpdrv_offload=.false.