From e77a35cc3b0419a5d9d21c99d6487a5f40e0cee7 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Fri, 9 Mar 2018 11:25:06 -0800 Subject: [PATCH 1/5] Add new integrals --- ChangeLog | 2 + README.md | 4 +- scripts/auto_intor.cl | 2 + src/autocode/grad1.c | 145 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index e12c944..4703da5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,5 @@ +Version 3.0.9 (2018-03-08): + * New integrals (i |nabla j) and -1/2 *(i | nabla^2 j) Version 3.0.8 (2018-02-14): * Fix Fortran APIs Version 3.0.7 (2018-01-15): diff --git a/README.md b/README.md index 4e22349..c0865fc 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ qcint (quick libcint) An optimized libcint branch for X86 platform -version 3.0.8 +version 3.0.9 -2018-02-14 +2018-03-08 What is qcint diff --git a/scripts/auto_intor.cl b/scripts/auto_intor.cl index 57237a0..5f2540e 100644 --- a/scripts/auto_intor.cl +++ b/scripts/auto_intor.cl @@ -109,7 +109,9 @@ (gen-cint "grad1.c" '("int1e_ipovlp" (nabla \|)) + '("int1e_ovlpip" (\| nabla)) '("int1e_ipkin" (.5 nabla \| p dot p)) + '("int1e_kinip" (.5 \| p dot p nabla)) '("int1e_ipnuc" (nabla \| nuc \|)) '("int1e_iprinv" (nabla \| rinv \|)) '("int1e_rinv" (\| rinv \|)) diff --git a/src/autocode/grad1.c b/src/autocode/grad1.c index 47f7210..51c3fa4 100644 --- a/src/autocode/grad1.c +++ b/src/autocode/grad1.c @@ -85,6 +85,59 @@ return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); } // int1e_ipovlp_spinor ALL_CINT1E(int1e_ipovlp) //ALL_CINT1E_FORTRAN_(cint1e_ipovlp) +/* */ +static void CINTgout1e_int1e_ovlpip(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { +CINTg1e_ovlp(g, envs, count); +int nf = envs->nf; +int nfc = nf * 3; +int ix, iy, iz, n; +DECLARE_GOUT; +double *RESTRICT g0 = g; +double *RESTRICT g1 = g0 + envs->g_size * 3 * SIMDD; +__MD r1; +__MD rs[3]; +G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0); +for (n = 0; n < nf; n++) { +ix = idx[0+n*3]; +iy = idx[1+n*3]; +iz = idx[2+n*3]; +rs[0] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[1] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[2] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +r1 = + rs[0]; GOUT_SCATTER(gout, n*3+0, r1); +r1 = + rs[1]; GOUT_SCATTER(gout, n*3+1, r1); +r1 = + rs[2]; GOUT_SCATTER(gout, n*3+2, r1); +}} +void int1e_ovlpip_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) { +int ng[] = {0, 1, 0, 0, 1, 1, 1, 3}; +CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env); +} +int int1e_ovlpip_cart(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 1, 0, 0, 1, 1, 1, 3}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_ovlpip; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e); +} // int1e_ovlpip_cart +int int1e_ovlpip_sph(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 1, 0, 0, 1, 1, 1, 3}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_ovlpip; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e); +} // int1e_ovlpip_sph +int int1e_ovlpip_spinor(double complex *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 1, 0, 0, 1, 1, 1, 3}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_ovlpip; +return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); +} // int1e_ovlpip_spinor +ALL_CINT1E(int1e_ovlpip) +//ALL_CINT1E_FORTRAN_(cint1e_ovlpip) /* */ static void CINTgout1e_int1e_ipkin(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { CINTg1e_ovlp(g, envs, count); @@ -177,6 +230,98 @@ return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); } // int1e_ipkin_spinor ALL_CINT1E(int1e_ipkin) //ALL_CINT1E_FORTRAN_(cint1e_ipkin) +/* */ +static void CINTgout1e_int1e_kinip(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { +CINTg1e_ovlp(g, envs, count); +int nf = envs->nf; +int nfc = nf * 3; +int ix, iy, iz, n; +DECLARE_GOUT; +double *RESTRICT g0 = g; +double *RESTRICT g1 = g0 + envs->g_size * 3 * SIMDD; +double *RESTRICT g2 = g1 + envs->g_size * 3 * SIMDD; +double *RESTRICT g3 = g2 + envs->g_size * 3 * SIMDD; +double *RESTRICT g4 = g3 + envs->g_size * 3 * SIMDD; +double *RESTRICT g5 = g4 + envs->g_size * 3 * SIMDD; +double *RESTRICT g6 = g5 + envs->g_size * 3 * SIMDD; +double *RESTRICT g7 = g6 + envs->g_size * 3 * SIMDD; +__MD r1; +__MD rs[27]; +G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0); +G1E_D_J(g2, g0, envs->i_l+0, envs->j_l+1, 0); +G1E_D_J(g3, g2, envs->i_l+0, envs->j_l+0, 0); +G1E_D_J(g4, g0, envs->i_l+0, envs->j_l+2, 0); +G1E_D_J(g5, g4, envs->i_l+0, envs->j_l+0, 0); +G1E_D_J(g6, g4, envs->i_l+0, envs->j_l+1, 0); +G1E_D_J(g7, g6, envs->i_l+0, envs->j_l+0, 0); +for (n = 0; n < nf; n++) { +ix = idx[0+n*3]; +iy = idx[1+n*3]; +iz = idx[2+n*3]; +rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD); +r1 = - rs[0] - rs[12] - rs[24]; GOUT_SCATTER(gout, n*3+0, r1); +r1 = - rs[1] - rs[13] - rs[25]; GOUT_SCATTER(gout, n*3+1, r1); +r1 = - rs[2] - rs[14] - rs[26]; GOUT_SCATTER(gout, n*3+2, r1); +}} +void int1e_kinip_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 3}; +CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env); +} +int int1e_kinip_cart(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 3}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_kinip; +envs.common_factor *= 0.5; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e); +} // int1e_kinip_cart +int int1e_kinip_sph(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 3}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_kinip; +envs.common_factor *= 0.5; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e); +} // int1e_kinip_sph +int int1e_kinip_spinor(double complex *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 3}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_kinip; +envs.common_factor *= 0.5; +return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); +} // int1e_kinip_spinor +ALL_CINT1E(int1e_kinip) +//ALL_CINT1E_FORTRAN_(cint1e_kinip) /* */ static void CINTgout1e_int1e_ipnuc(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { int nf = envs->nf; From 02f7bab139086003b233301c05dfe51e3e39ff54 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Tue, 20 Mar 2018 17:29:06 -0700 Subject: [PATCH 2/5] New integrals --- ChangeLog | 6 + README.md | 4 +- scripts/auto_intor.cl | 4 + src/autocode/intor1.c | 424 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 436 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4703da5..2cb628f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Version 3.0.10 (2018-03-20): + * New integrals + , + , + and + Version 3.0.9 (2018-03-08): * New integrals (i |nabla j) and -1/2 *(i | nabla^2 j) Version 3.0.8 (2018-02-14): diff --git a/README.md b/README.md index c0865fc..8b0d0f4 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ qcint (quick libcint) An optimized libcint branch for X86 platform -version 3.0.9 +version 3.0.10 -2018-03-08 +2018-03-20 What is qcint diff --git a/scripts/auto_intor.cl b/scripts/auto_intor.cl index 5f2540e..3eba739 100644 --- a/scripts/auto_intor.cl +++ b/scripts/auto_intor.cl @@ -35,6 +35,10 @@ '("int1e_prinvxp" (p* \| rinv cross p \| )) '("int1e_pnucxp" (p* \| nuc cross p \| )) '("int2e_p1vxp1" (p* \, cross p \| \, )) ; SSO + '("int1e_irp" ( \| rc nabla \| )) + '("int1e_irrp" ( \| rc rc nabla \| )) + '("int1e_irpr" ( \| rc nabla rc \| )) + '("int1e_irrr" ( \| rc rc rc \| )) ) (gen-cint "intor2.c" diff --git a/src/autocode/intor1.c b/src/autocode/intor1.c index 9630e14..0304158 100644 --- a/src/autocode/intor1.c +++ b/src/autocode/intor1.c @@ -2350,3 +2350,427 @@ return CINT2e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_2e1, &c2s_sf_2e2) } // int2e_p1vxp1_spinor ALL_CINT(int2e_p1vxp1) //ALL_CINT_FORTRAN_(cint2e_p1vxp1) +/* */ +static void CINTgout1e_int1e_irp(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { +CINTg1e_ovlp(g, envs, count); +int nf = envs->nf; +int nfc = nf * 9; +int ix, iy, iz, n; +DECLARE_GOUT; +double *RESTRICT g0 = g; +double *RESTRICT g1 = g0 + envs->g_size * 3 * SIMDD; +double *RESTRICT g2 = g1 + envs->g_size * 3 * SIMDD; +double *RESTRICT g3 = g2 + envs->g_size * 3 * SIMDD; +double drj[3]; +drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0]; +drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1]; +drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2]; +__MD r1; +__MD rs[9]; +G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g2, g0, envs->i_l+0, envs->j_l+1, 0); +G1E_D_J(g3, g2, envs->i_l+0, envs->j_l+0, 0); +for (n = 0; n < nf; n++) { +ix = idx[0+n*3]; +iy = idx[1+n*3]; +iz = idx[2+n*3]; +rs[0] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[1] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[2] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[3] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[4] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[5] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[6] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[7] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[8] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +r1 = + rs[0]; GOUT_SCATTER(gout, n*9+0, r1); +r1 = + rs[1]; GOUT_SCATTER(gout, n*9+1, r1); +r1 = + rs[2]; GOUT_SCATTER(gout, n*9+2, r1); +r1 = + rs[3]; GOUT_SCATTER(gout, n*9+3, r1); +r1 = + rs[4]; GOUT_SCATTER(gout, n*9+4, r1); +r1 = + rs[5]; GOUT_SCATTER(gout, n*9+5, r1); +r1 = + rs[6]; GOUT_SCATTER(gout, n*9+6, r1); +r1 = + rs[7]; GOUT_SCATTER(gout, n*9+7, r1); +r1 = + rs[8]; GOUT_SCATTER(gout, n*9+8, r1); +}} +void int1e_irp_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) { +int ng[] = {0, 2, 0, 0, 2, 1, 1, 9}; +CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env); +} +int int1e_irp_cart(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 2, 0, 0, 2, 1, 1, 9}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irp; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e); +} // int1e_irp_cart +int int1e_irp_sph(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 2, 0, 0, 2, 1, 1, 9}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irp; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e); +} // int1e_irp_sph +int int1e_irp_spinor(double complex *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 2, 0, 0, 2, 1, 1, 9}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irp; +return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); +} // int1e_irp_spinor +ALL_CINT1E(int1e_irp) +//ALL_CINT1E_FORTRAN_(cint1e_irp) +/* */ +static void CINTgout1e_int1e_irrp(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { +CINTg1e_ovlp(g, envs, count); +int nf = envs->nf; +int nfc = nf * 27; +int ix, iy, iz, n; +DECLARE_GOUT; +double *RESTRICT g0 = g; +double *RESTRICT g1 = g0 + envs->g_size * 3 * SIMDD; +double *RESTRICT g2 = g1 + envs->g_size * 3 * SIMDD; +double *RESTRICT g3 = g2 + envs->g_size * 3 * SIMDD; +double *RESTRICT g4 = g3 + envs->g_size * 3 * SIMDD; +double *RESTRICT g5 = g4 + envs->g_size * 3 * SIMDD; +double *RESTRICT g6 = g5 + envs->g_size * 3 * SIMDD; +double *RESTRICT g7 = g6 + envs->g_size * 3 * SIMDD; +double drj[3]; +drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0]; +drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1]; +drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2]; +__MD r1; +__MD rs[27]; +G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g2, g0, envs->i_l+0, envs->j_l+1, 0); +G1E_D_J(g3, g2, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g4, g0, envs->i_l+0, envs->j_l+2, 0); +G1E_D_J(g5, g4, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g6, g4, envs->i_l+0, envs->j_l+1, 0); +G1E_D_J(g7, g6, envs->i_l+0, envs->j_l+0, 0); +for (n = 0; n < nf; n++) { +ix = idx[0+n*3]; +iy = idx[1+n*3]; +iz = idx[2+n*3]; +rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD); +r1 = + rs[0]; GOUT_SCATTER(gout, n*27+0, r1); +r1 = + rs[1]; GOUT_SCATTER(gout, n*27+1, r1); +r1 = + rs[2]; GOUT_SCATTER(gout, n*27+2, r1); +r1 = + rs[3]; GOUT_SCATTER(gout, n*27+3, r1); +r1 = + rs[4]; GOUT_SCATTER(gout, n*27+4, r1); +r1 = + rs[5]; GOUT_SCATTER(gout, n*27+5, r1); +r1 = + rs[6]; GOUT_SCATTER(gout, n*27+6, r1); +r1 = + rs[7]; GOUT_SCATTER(gout, n*27+7, r1); +r1 = + rs[8]; GOUT_SCATTER(gout, n*27+8, r1); +r1 = + rs[9]; GOUT_SCATTER(gout, n*27+9, r1); +r1 = + rs[10]; GOUT_SCATTER(gout, n*27+10, r1); +r1 = + rs[11]; GOUT_SCATTER(gout, n*27+11, r1); +r1 = + rs[12]; GOUT_SCATTER(gout, n*27+12, r1); +r1 = + rs[13]; GOUT_SCATTER(gout, n*27+13, r1); +r1 = + rs[14]; GOUT_SCATTER(gout, n*27+14, r1); +r1 = + rs[15]; GOUT_SCATTER(gout, n*27+15, r1); +r1 = + rs[16]; GOUT_SCATTER(gout, n*27+16, r1); +r1 = + rs[17]; GOUT_SCATTER(gout, n*27+17, r1); +r1 = + rs[18]; GOUT_SCATTER(gout, n*27+18, r1); +r1 = + rs[19]; GOUT_SCATTER(gout, n*27+19, r1); +r1 = + rs[20]; GOUT_SCATTER(gout, n*27+20, r1); +r1 = + rs[21]; GOUT_SCATTER(gout, n*27+21, r1); +r1 = + rs[22]; GOUT_SCATTER(gout, n*27+22, r1); +r1 = + rs[23]; GOUT_SCATTER(gout, n*27+23, r1); +r1 = + rs[24]; GOUT_SCATTER(gout, n*27+24, r1); +r1 = + rs[25]; GOUT_SCATTER(gout, n*27+25, r1); +r1 = + rs[26]; GOUT_SCATTER(gout, n*27+26, r1); +}} +void int1e_irrp_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env); +} +int int1e_irrp_cart(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irrp; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e); +} // int1e_irrp_cart +int int1e_irrp_sph(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irrp; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e); +} // int1e_irrp_sph +int int1e_irrp_spinor(double complex *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irrp; +return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); +} // int1e_irrp_spinor +ALL_CINT1E(int1e_irrp) +//ALL_CINT1E_FORTRAN_(cint1e_irrp) +/* */ +static void CINTgout1e_int1e_irpr(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { +CINTg1e_ovlp(g, envs, count); +int nf = envs->nf; +int nfc = nf * 27; +int ix, iy, iz, n; +DECLARE_GOUT; +double *RESTRICT g0 = g; +double *RESTRICT g1 = g0 + envs->g_size * 3 * SIMDD; +double *RESTRICT g2 = g1 + envs->g_size * 3 * SIMDD; +double *RESTRICT g3 = g2 + envs->g_size * 3 * SIMDD; +double *RESTRICT g4 = g3 + envs->g_size * 3 * SIMDD; +double *RESTRICT g5 = g4 + envs->g_size * 3 * SIMDD; +double *RESTRICT g6 = g5 + envs->g_size * 3 * SIMDD; +double *RESTRICT g7 = g6 + envs->g_size * 3 * SIMDD; +double drj[3]; +drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0]; +drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1]; +drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2]; +__MD r1; +__MD rs[27]; +G1E_RCJ(g1, g0, envs->i_l+0, envs->j_l+0, 0); +G1E_D_J(g2, g0, envs->i_l+0, envs->j_l+1, 0); +G1E_RCJ(g3, g2, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g4, g0, envs->i_l+0, envs->j_l+2, 0); +G1E_RCJ(g5, g4, envs->i_l+0, envs->j_l+0, 0); +G1E_D_J(g6, g4, envs->i_l+0, envs->j_l+1, 0); +G1E_RCJ(g7, g6, envs->i_l+0, envs->j_l+0, 0); +for (n = 0; n < nf; n++) { +ix = idx[0+n*3]; +iy = idx[1+n*3]; +iz = idx[2+n*3]; +rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD); +r1 = + rs[0]; GOUT_SCATTER(gout, n*27+0, r1); +r1 = + rs[1]; GOUT_SCATTER(gout, n*27+1, r1); +r1 = + rs[2]; GOUT_SCATTER(gout, n*27+2, r1); +r1 = + rs[3]; GOUT_SCATTER(gout, n*27+3, r1); +r1 = + rs[4]; GOUT_SCATTER(gout, n*27+4, r1); +r1 = + rs[5]; GOUT_SCATTER(gout, n*27+5, r1); +r1 = + rs[6]; GOUT_SCATTER(gout, n*27+6, r1); +r1 = + rs[7]; GOUT_SCATTER(gout, n*27+7, r1); +r1 = + rs[8]; GOUT_SCATTER(gout, n*27+8, r1); +r1 = + rs[9]; GOUT_SCATTER(gout, n*27+9, r1); +r1 = + rs[10]; GOUT_SCATTER(gout, n*27+10, r1); +r1 = + rs[11]; GOUT_SCATTER(gout, n*27+11, r1); +r1 = + rs[12]; GOUT_SCATTER(gout, n*27+12, r1); +r1 = + rs[13]; GOUT_SCATTER(gout, n*27+13, r1); +r1 = + rs[14]; GOUT_SCATTER(gout, n*27+14, r1); +r1 = + rs[15]; GOUT_SCATTER(gout, n*27+15, r1); +r1 = + rs[16]; GOUT_SCATTER(gout, n*27+16, r1); +r1 = + rs[17]; GOUT_SCATTER(gout, n*27+17, r1); +r1 = + rs[18]; GOUT_SCATTER(gout, n*27+18, r1); +r1 = + rs[19]; GOUT_SCATTER(gout, n*27+19, r1); +r1 = + rs[20]; GOUT_SCATTER(gout, n*27+20, r1); +r1 = + rs[21]; GOUT_SCATTER(gout, n*27+21, r1); +r1 = + rs[22]; GOUT_SCATTER(gout, n*27+22, r1); +r1 = + rs[23]; GOUT_SCATTER(gout, n*27+23, r1); +r1 = + rs[24]; GOUT_SCATTER(gout, n*27+24, r1); +r1 = + rs[25]; GOUT_SCATTER(gout, n*27+25, r1); +r1 = + rs[26]; GOUT_SCATTER(gout, n*27+26, r1); +}} +void int1e_irpr_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env); +} +int int1e_irpr_cart(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irpr; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e); +} // int1e_irpr_cart +int int1e_irpr_sph(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irpr; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e); +} // int1e_irpr_sph +int int1e_irpr_spinor(double complex *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irpr; +return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); +} // int1e_irpr_spinor +ALL_CINT1E(int1e_irpr) +//ALL_CINT1E_FORTRAN_(cint1e_irpr) +/* */ +static void CINTgout1e_int1e_irrr(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) { +CINTg1e_ovlp(g, envs, count); +int nf = envs->nf; +int nfc = nf * 27; +int ix, iy, iz, n; +DECLARE_GOUT; +double *RESTRICT g0 = g; +double *RESTRICT g1 = g0 + envs->g_size * 3 * SIMDD; +double *RESTRICT g2 = g1 + envs->g_size * 3 * SIMDD; +double *RESTRICT g3 = g2 + envs->g_size * 3 * SIMDD; +double *RESTRICT g4 = g3 + envs->g_size * 3 * SIMDD; +double *RESTRICT g5 = g4 + envs->g_size * 3 * SIMDD; +double *RESTRICT g6 = g5 + envs->g_size * 3 * SIMDD; +double *RESTRICT g7 = g6 + envs->g_size * 3 * SIMDD; +double drj[3]; +drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0]; +drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1]; +drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2]; +__MD r1; +__MD rs[27]; +G1E_RCJ(g1, g0, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g2, g0, envs->i_l+0, envs->j_l+1, 0); +G1E_RCJ(g3, g2, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g4, g0, envs->i_l+0, envs->j_l+2, 0); +G1E_RCJ(g5, g4, envs->i_l+0, envs->j_l+0, 0); +G1E_RCJ(g6, g4, envs->i_l+0, envs->j_l+1, 0); +G1E_RCJ(g7, g6, envs->i_l+0, envs->j_l+0, 0); +for (n = 0; n < nf; n++) { +ix = idx[0+n*3]; +iy = idx[1+n*3]; +iz = idx[2+n*3]; +rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD); +rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD); +rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD); +rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD); +rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD); +rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD); +rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD); +rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD); +r1 = + rs[0]; GOUT_SCATTER(gout, n*27+0, r1); +r1 = + rs[1]; GOUT_SCATTER(gout, n*27+1, r1); +r1 = + rs[2]; GOUT_SCATTER(gout, n*27+2, r1); +r1 = + rs[3]; GOUT_SCATTER(gout, n*27+3, r1); +r1 = + rs[4]; GOUT_SCATTER(gout, n*27+4, r1); +r1 = + rs[5]; GOUT_SCATTER(gout, n*27+5, r1); +r1 = + rs[6]; GOUT_SCATTER(gout, n*27+6, r1); +r1 = + rs[7]; GOUT_SCATTER(gout, n*27+7, r1); +r1 = + rs[8]; GOUT_SCATTER(gout, n*27+8, r1); +r1 = + rs[9]; GOUT_SCATTER(gout, n*27+9, r1); +r1 = + rs[10]; GOUT_SCATTER(gout, n*27+10, r1); +r1 = + rs[11]; GOUT_SCATTER(gout, n*27+11, r1); +r1 = + rs[12]; GOUT_SCATTER(gout, n*27+12, r1); +r1 = + rs[13]; GOUT_SCATTER(gout, n*27+13, r1); +r1 = + rs[14]; GOUT_SCATTER(gout, n*27+14, r1); +r1 = + rs[15]; GOUT_SCATTER(gout, n*27+15, r1); +r1 = + rs[16]; GOUT_SCATTER(gout, n*27+16, r1); +r1 = + rs[17]; GOUT_SCATTER(gout, n*27+17, r1); +r1 = + rs[18]; GOUT_SCATTER(gout, n*27+18, r1); +r1 = + rs[19]; GOUT_SCATTER(gout, n*27+19, r1); +r1 = + rs[20]; GOUT_SCATTER(gout, n*27+20, r1); +r1 = + rs[21]; GOUT_SCATTER(gout, n*27+21, r1); +r1 = + rs[22]; GOUT_SCATTER(gout, n*27+22, r1); +r1 = + rs[23]; GOUT_SCATTER(gout, n*27+23, r1); +r1 = + rs[24]; GOUT_SCATTER(gout, n*27+24, r1); +r1 = + rs[25]; GOUT_SCATTER(gout, n*27+25, r1); +r1 = + rs[26]; GOUT_SCATTER(gout, n*27+26, r1); +}} +void int1e_irrr_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env); +} +int int1e_irrr_cart(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irrr; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e); +} // int1e_irrr_cart +int int1e_irrr_sph(double *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irrr; +return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e); +} // int1e_irrr_sph +int int1e_irrr_spinor(double complex *out, int *dims, int *shls, +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) { +int ng[] = {0, 3, 0, 0, 3, 1, 1, 27}; +CINTEnvVars envs; +CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env); +envs.f_gout = &CINTgout1e_int1e_irrr; +return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e); +} // int1e_irrr_spinor +ALL_CINT1E(int1e_irrr) +//ALL_CINT1E_FORTRAN_(cint1e_irrr) From 606f97f1b34d3bb96c2bb51e1f28fff71c1ab30a Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Fri, 23 Mar 2018 19:17:04 -0700 Subject: [PATCH 3/5] Export breit integrals --- ChangeLog | 2 ++ README.md | 4 +-- src/breit.c | 99 +++++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 81 insertions(+), 24 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2cb628f..bd54609 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,5 @@ +Version 3.0.11 (2018-03-23): + * Export breit integrals Version 3.0.10 (2018-03-20): * New integrals , diff --git a/README.md b/README.md index 8b0d0f4..f78e0dd 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ qcint (quick libcint) An optimized libcint branch for X86 platform -version 3.0.10 +version 3.0.11 -2018-03-20 +2018-03-23 What is qcint diff --git a/src/breit.c b/src/breit.c index b4d3418..2fb12f0 100644 --- a/src/breit.c +++ b/src/breit.c @@ -38,52 +38,107 @@ #include "misc.h" #include "c2f.h" +static void _copy(double complex *out, double complex *in, + int *dims, int *counts); + #define DECLARE(X) int X(double complex *opijkl, int *shls, \ int *atm, int natm, \ int *bas, int nbas, double *env, CINTOpt *opt) #define BREIT0(X) \ -DECLARE(cint2e_##X); \ -DECLARE(cint2e_gauge_r1_##X); \ -DECLARE(cint2e_gauge_r2_##X); \ -void cint2e_breit_##X##_optimizer(CINTOpt **opt, int *atm, int natm, \ +DECLARE(int2e_##X); \ +DECLARE(int2e_gauge_r1_##X); \ +DECLARE(int2e_gauge_r2_##X); \ +void int2e_breit_##X##_optimizer(CINTOpt **opt, int *atm, int natm, \ int *bas, int nbas, double *env) \ { \ *opt = NULL; \ } \ -int cint2e_breit_##X(double complex *opijkl, int *shls, \ - int *atm, int natm, \ - int *bas, int nbas, double *env, CINTOpt *opt) \ +int int2e_breit_##X##_spinor(double complex *out, int *dims, int *shls, \ +int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) \ { \ - int has_value = cint2e_##X(opijkl, shls, atm, natm, bas, nbas, env, NULL); \ - \ - const int ip = CINTcgto_spinor(shls[0], bas); \ - const int jp = CINTcgto_spinor(shls[1], bas); \ - const int kp = CINTcgto_spinor(shls[2], bas); \ - const int lp = CINTcgto_spinor(shls[3], bas); \ - const int nop = ip * jp * kp * lp; \ - double complex *buf = malloc(sizeof(double complex) * nop); \ + if (out == NULL) { \ + int cache_size1 = int2e_gauge_r1_##X##_spinor(NULL, NULL, shls, \ + atm, natm, bas, nbas, env, NULL, cache); \ + int cache_size2 = int2e_gauge_r2_##X##_spinor(NULL, NULL, shls, \ + atm, natm, bas, nbas, env, NULL, cache); \ + return MAX(cache_size1, cache_size2); \ + } \ +\ + int counts[4]; \ + counts[0] = CINTcgto_spinor(shls[0], bas); \ + counts[1] = CINTcgto_spinor(shls[1], bas); \ + counts[2] = CINTcgto_spinor(shls[2], bas); \ + counts[3] = CINTcgto_spinor(shls[3], bas); \ + const int nop = counts[0] * counts[1] * counts[2] * counts[3]; \ + double complex *buf = malloc(sizeof(double complex) * nop * 2); \ + double complex *buf1 = buf + nop; \ int i; \ - has_value = (cint2e_gauge_r1_##X(buf, shls, atm, natm, bas, nbas, env, NULL) || \ +\ + int has_value = int2e_##X##_spinor(buf, NULL, shls, \ + atm, natm, bas, nbas, env, NULL, cache); \ +\ + has_value = (int2e_gauge_r1_##X##_spinor(buf1, NULL, shls, \ + atm, natm, bas, nbas, env, NULL, cache) || \ has_value); \ /* [1/2 gaunt] - [1/2 xxx*\sigma\dot r1] */ \ if (has_value) { \ for (i = 0; i < nop; i++) { \ - opijkl[i] = -opijkl[i] - buf[i]; \ + buf[i] = -buf1[i] - buf[i]; \ } \ } \ /* ... [- 1/2 xxx*\sigma\dot(-r2)] */ \ - has_value = (cint2e_gauge_r2_##X(buf, shls, atm, natm, bas, nbas, env, NULL) || \ + has_value = (int2e_gauge_r2_##X##_spinor(buf1, NULL, shls, \ + atm, natm, bas, nbas, env, NULL, cache) || \ has_value); \ - if (has_value) { \ + if (dims == NULL) { \ + for (i = 0; i < nop; i++) { \ + out[i] = (buf[i] + buf1[i]) * .5; \ + } \ + } else { \ for (i = 0; i < nop; i++) { \ - opijkl[i] = (opijkl[i] + buf[i]) * .5; \ + buf[i] = (buf[i] + buf1[i]) * .5; \ } \ + _copy(out, buf, dims, counts); \ } \ free(buf); \ return has_value; \ +} \ +int cint2e_breit_##X##_spinor(double *out, int *shls, int *atm, int natm, \ + int *bas, int nbas, double *env) { \ + return int2e_breit_##X##_spinor((double complex *)out, NULL, shls, \ + atm, natm, bas, nbas, env, NULL, NULL); \ } +static void _copy(double complex *out, double complex *in, + int *dims, int *counts) +{ + int ni = dims[0]; + int nj = dims[1]; + int nk = dims[2]; + int di = counts[0]; + int dj = counts[1]; + int dk = counts[2]; + int dl = counts[3]; + int nij = ni * nj; + int dij = di * dj; + int nijk = nij * nk; + int dijk = dij * dk; + int i, j, k, l; + double complex *pout, *pin; + for (l = 0; l < dl; l++) { + for (k = 0; k < dk; k++) { + pout = out + k * nij; + pin = in + k * dij; + for (j = 0; j < dj; j++) { + for (i = 0; i < di; i++) { + pout[j*ni+i] = pin[j*di+i]; + } } + } + out += nijk; + in += dijk; + } +} BREIT0(ssp1ssp2); BREIT0(ssp1sps2); @@ -233,7 +288,7 @@ int int2e_breit_r1p2_spinor(double complex *out, int *dims, int *shls, return CINT2e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_2e1i, &c2s_sf_2e2i); } // int2e_breit_r1p2_spinor ALL_CINT(int2e_breit_r1p2) -//ALL_CINT_FORTRAN_(cint2e_breit_r1p2) +//ALL_CINT_FORTRAN_(int2e_breit_r1p2) /* based on * '("int2e_breit_r2p2" ( nabla \, r0 \| dot nabla-r12 \| \, nabla )) @@ -376,4 +431,4 @@ int int2e_breit_r2p2_spinor(double complex *out, int *dims, int *shls, return CINT2e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_2e1i, &c2s_sf_2e2i); } // int2e_breit_r2p2_spinor ALL_CINT(int2e_breit_r2p2) -//ALL_CINT_FORTRAN_(cint2e_breit_r2p2) +//ALL_CINT_FORTRAN_(int2e_breit_r2p2) From 8246c590402fabfee58be96e779843483fe48497 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Fri, 23 Mar 2018 19:32:50 -0700 Subject: [PATCH 4/5] Improved breit integral driver --- src/breit.c | 158 ++++++++++++++++++++++++++++------------------------ 1 file changed, 85 insertions(+), 73 deletions(-) diff --git a/src/breit.c b/src/breit.c index 2fb12f0..c906b22 100644 --- a/src/breit.c +++ b/src/breit.c @@ -38,112 +38,124 @@ #include "misc.h" #include "c2f.h" -static void _copy(double complex *out, double complex *in, - int *dims, int *counts); +#define DECLARE(X) int X(double complex *out, int *dims, int *shls, \ + int *atm, int natm, int *bas, int nbas, double *env, \ + CINTOpt *opt, double *cache) -#define DECLARE(X) int X(double complex *opijkl, int *shls, \ - int *atm, int natm, \ - int *bas, int nbas, double *env, CINTOpt *opt) - -#define BREIT0(X) \ -DECLARE(int2e_##X); \ -DECLARE(int2e_gauge_r1_##X); \ -DECLARE(int2e_gauge_r2_##X); \ +#define BREIT0(X, ncomp_tensor) \ +DECLARE(int2e_##X##_spinor); \ +DECLARE(int2e_gauge_r1_##X##_spinor); \ +DECLARE(int2e_gauge_r2_##X##_spinor); \ void int2e_breit_##X##_optimizer(CINTOpt **opt, int *atm, int natm, \ - int *bas, int nbas, double *env) \ + int *bas, int nbas, double *env) \ { \ *opt = NULL; \ } \ int int2e_breit_##X##_spinor(double complex *out, int *dims, int *shls, \ -int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) \ + int *atm, int natm, int *bas, int nbas, double *env, \ + CINTOpt *opt, double *cache) \ { \ - if (out == NULL) { \ - int cache_size1 = int2e_gauge_r1_##X##_spinor(NULL, NULL, shls, \ - atm, natm, bas, nbas, env, NULL, cache); \ - int cache_size2 = int2e_gauge_r2_##X##_spinor(NULL, NULL, shls, \ - atm, natm, bas, nbas, env, NULL, cache); \ - return MAX(cache_size1, cache_size2); \ - } \ -\ - int counts[4]; \ - counts[0] = CINTcgto_spinor(shls[0], bas); \ - counts[1] = CINTcgto_spinor(shls[1], bas); \ - counts[2] = CINTcgto_spinor(shls[2], bas); \ - counts[3] = CINTcgto_spinor(shls[3], bas); \ - const int nop = counts[0] * counts[1] * counts[2] * counts[3]; \ - double complex *buf = malloc(sizeof(double complex) * nop * 2); \ - double complex *buf1 = buf + nop; \ - int i; \ -\ - int has_value = int2e_##X##_spinor(buf, NULL, shls, \ - atm, natm, bas, nbas, env, NULL, cache); \ -\ - has_value = (int2e_gauge_r1_##X##_spinor(buf1, NULL, shls, \ - atm, natm, bas, nbas, env, NULL, cache) || \ - has_value); \ - /* [1/2 gaunt] - [1/2 xxx*\sigma\dot r1] */ \ - if (has_value) { \ - for (i = 0; i < nop; i++) { \ - buf[i] = -buf1[i] - buf[i]; \ - } \ - } \ - /* ... [- 1/2 xxx*\sigma\dot(-r2)] */ \ - has_value = (int2e_gauge_r2_##X##_spinor(buf1, NULL, shls, \ - atm, natm, bas, nbas, env, NULL, cache) || \ - has_value); \ - if (dims == NULL) { \ - for (i = 0; i < nop; i++) { \ - out[i] = (buf[i] + buf1[i]) * .5; \ - } \ - } else { \ - for (i = 0; i < nop; i++) { \ - buf[i] = (buf[i] + buf1[i]) * .5; \ - } \ - _copy(out, buf, dims, counts); \ - } \ - free(buf); \ - return has_value; \ + return _int2e_breit_drv(out, dims, shls, atm, natm, bas, nbas, env, opt, cache, \ + ncomp_tensor, &int2e_##X##_spinor, \ + &int2e_gauge_r1_##X##_spinor, &int2e_gauge_r2_##X##_spinor); \ } \ -int cint2e_breit_##X##_spinor(double *out, int *shls, int *atm, int natm, \ - int *bas, int nbas, double *env) { \ - return int2e_breit_##X##_spinor((double complex *)out, NULL, shls, \ - atm, natm, bas, nbas, env, NULL, NULL); \ +int cint2e_breit_##X##_spinor(double complex *out, int *shls, \ + int *atm, int natm, int *bas, int nbas, double *env, \ + CINTOpt *opt) \ +{ \ + return int2e_breit_##X##_spinor(out, NULL, shls, \ + atm, natm, bas, nbas, env, opt, NULL); \ } -static void _copy(double complex *out, double complex *in, - int *dims, int *counts) +static void _copy_to_out(double complex *out, double complex *in, int *dims, int *counts) { + if (out == in) { + return; + } int ni = dims[0]; int nj = dims[1]; int nk = dims[2]; + int nij = ni * nj; + int nijk = nij * nk; int di = counts[0]; int dj = counts[1]; int dk = counts[2]; int dl = counts[3]; - int nij = ni * nj; int dij = di * dj; - int nijk = nij * nk; int dijk = dij * dk; int i, j, k, l; - double complex *pout, *pin; + double complex *pin, *pout; for (l = 0; l < dl; l++) { for (k = 0; k < dk; k++) { - pout = out + k * nij; pin = in + k * dij; + pout = out + k * nij; for (j = 0; j < dj; j++) { for (i = 0; i < di; i++) { pout[j*ni+i] = pin[j*di+i]; } } } - out += nijk; in += dijk; + out += nijk; + } +} + +static int _int2e_breit_drv(double complex *out, int *dims, int *shls, + int *atm, int natm, int *bas, int nbas, double *env, + CINTOpt *opt, double *cache, int ncomp_tensor, + int (*f_gaunt)(), int (*f_gauge_r1)(), int (*f_gauge_r2)()) +{ + if (out == NULL) { + int cache_size1 = (*f_gauge_r1)(NULL, NULL, shls, + atm, natm, bas, nbas, env, NULL, cache); + int cache_size2 = (*f_gauge_r2)(NULL, NULL, shls, + atm, natm, bas, nbas, env, NULL, cache); + return MAX(cache_size1, cache_size2); + } + + int counts[4]; + counts[0] = CINTcgto_spinor(shls[0], bas); + counts[1] = CINTcgto_spinor(shls[1], bas); + counts[2] = CINTcgto_spinor(shls[2], bas); + counts[3] = CINTcgto_spinor(shls[3], bas); + int nop = counts[0] * counts[1] * counts[2] * counts[3] * ncomp_tensor; + double complex *buf = malloc(sizeof(double complex) * nop*2); + double complex *buf1; + if (dims == NULL) { + dims = counts; + buf1 = out; + } else { + buf1 = buf + nop; + } + + int has_value = (*f_gaunt)(buf1, NULL, shls, atm, natm, bas, nbas, env, NULL, cache); + + int i; + has_value = ((*f_gauge_r1)(buf, NULL, shls, atm, natm, bas, nbas, env, NULL, cache) || + has_value); + /* [1/2 gaunt] - [1/2 xxx*\sigma1\dot r1] */ + if (has_value) { + for (i = 0; i < nop; i++) { + buf1[i] = -buf1[i] - buf[i]; + } } + /* ... [- 1/2 xxx*\sigma1\dot(-r2)] */ + has_value = ((*f_gauge_r2)(buf, NULL, shls, atm, natm, bas, nbas, env, NULL, cache) || + has_value); + if (has_value) { + for (i = 0; i < nop; i++) { + buf1[i] = (buf1[i] + buf[i]) * .5; + } + } + _copy_to_out(out, buf1, dims, counts); + free(buf); + return has_value; } -BREIT0(ssp1ssp2); -BREIT0(ssp1sps2); -BREIT0(sps1ssp2); -BREIT0(sps1sps2); + +BREIT0(ssp1ssp2, 1); +BREIT0(ssp1sps2, 1); +BREIT0(sps1ssp2, 1); +BREIT0(sps1sps2, 1); /* based on * '("int2e_breit_r1p2" ( nabla \, r0 \| dot nabla-r12 \| \, nabla )) From 824ccc7052d8362366439a209ee47d136d99a846 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Sun, 15 Apr 2018 17:14:27 -0700 Subject: [PATCH 5/5] Update version tag in cmakefile --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bdd5b33..8e81712 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required (VERSION 2.6) project (qcint C) -set(qcint_VERSION "3.0.6") +set(qcint_VERSION "3.0.11") if ("${CMAKE_BUILD_TYPE}" STREQUAL "") set(CMAKE_BUILD_TYPE RELWITHDEBINFO)