From e77a35cc3b0419a5d9d21c99d6487a5f40e0cee7 Mon Sep 17 00:00:00 2001
From: Qiming Sun <osirpt.sun@gmail.com>
Date: Fri, 9 Mar 2018 11:25:06 -0800
Subject: [PATCH 1/5] Add new integrals

---
 ChangeLog             |   2 +
 README.md             |   4 +-
 scripts/auto_intor.cl |   2 +
 src/autocode/grad1.c  | 145 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e12c944..4703da5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,5 @@
+Version 3.0.9  (2018-03-08):
+	* New integrals (i |nabla j) and  -1/2 *(i | nabla^2 j)
 Version 3.0.8  (2018-02-14):
 	* Fix Fortran APIs
 Version 3.0.7  (2018-01-15):
diff --git a/README.md b/README.md
index 4e22349..c0865fc 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@ qcint (quick libcint)
 
 An optimized libcint branch for X86 platform
 
-version 3.0.8
+version 3.0.9
 
-2018-02-14
+2018-03-08
 
 
 What is qcint
diff --git a/scripts/auto_intor.cl b/scripts/auto_intor.cl
index 57237a0..5f2540e 100644
--- a/scripts/auto_intor.cl
+++ b/scripts/auto_intor.cl
@@ -109,7 +109,9 @@
 
 (gen-cint "grad1.c"
   '("int1e_ipovlp"              (nabla \|))
+  '("int1e_ovlpip"              (\| nabla))
   '("int1e_ipkin"               (.5 nabla \| p dot p))
+  '("int1e_kinip"               (.5 \| p dot p nabla))
   '("int1e_ipnuc"               (nabla \| nuc \|))
   '("int1e_iprinv"              (nabla \| rinv \|))
   '("int1e_rinv"                (\| rinv \|))
diff --git a/src/autocode/grad1.c b/src/autocode/grad1.c
index 47f7210..51c3fa4 100644
--- a/src/autocode/grad1.c
+++ b/src/autocode/grad1.c
@@ -85,6 +85,59 @@ return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
 } // int1e_ipovlp_spinor
 ALL_CINT1E(int1e_ipovlp)
 //ALL_CINT1E_FORTRAN_(cint1e_ipovlp)
+/* <i|OVLP |NABLA j> */
+static void CINTgout1e_int1e_ovlpip(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
+CINTg1e_ovlp(g, envs, count);
+int nf = envs->nf;
+int nfc = nf * 3;
+int ix, iy, iz, n;
+DECLARE_GOUT;
+double *RESTRICT g0 = g;
+double *RESTRICT g1 = g0  + envs->g_size * 3 * SIMDD;
+__MD r1;
+__MD rs[3];
+G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0);
+for (n = 0; n < nf; n++) {
+ix = idx[0+n*3];
+iy = idx[1+n*3];
+iz = idx[2+n*3];
+rs[0] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[1] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[2] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+r1 = + rs[0]; GOUT_SCATTER(gout, n*3+0, r1);
+r1 = + rs[1]; GOUT_SCATTER(gout, n*3+1, r1);
+r1 = + rs[2]; GOUT_SCATTER(gout, n*3+2, r1);
+}}
+void int1e_ovlpip_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) {
+int ng[] = {0, 1, 0, 0, 1, 1, 1, 3};
+CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env);
+}
+int int1e_ovlpip_cart(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 1, 0, 0, 1, 1, 1, 3};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_ovlpip;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e);
+} // int1e_ovlpip_cart
+int int1e_ovlpip_sph(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 1, 0, 0, 1, 1, 1, 3};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_ovlpip;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e);
+} // int1e_ovlpip_sph
+int int1e_ovlpip_spinor(double complex *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 1, 0, 0, 1, 1, 1, 3};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_ovlpip;
+return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
+} // int1e_ovlpip_spinor
+ALL_CINT1E(int1e_ovlpip)
+//ALL_CINT1E_FORTRAN_(cint1e_ovlpip)
 /* <NABLA i|OVLP |P DOT P j> */
 static void CINTgout1e_int1e_ipkin(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
 CINTg1e_ovlp(g, envs, count);
@@ -177,6 +230,98 @@ return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
 } // int1e_ipkin_spinor
 ALL_CINT1E(int1e_ipkin)
 //ALL_CINT1E_FORTRAN_(cint1e_ipkin)
+/* <i|OVLP |P DOT P NABLA j> */
+static void CINTgout1e_int1e_kinip(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
+CINTg1e_ovlp(g, envs, count);
+int nf = envs->nf;
+int nfc = nf * 3;
+int ix, iy, iz, n;
+DECLARE_GOUT;
+double *RESTRICT g0 = g;
+double *RESTRICT g1 = g0  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g2 = g1  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g3 = g2  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g4 = g3  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g5 = g4  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g6 = g5  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g7 = g6  + envs->g_size * 3 * SIMDD;
+__MD r1;
+__MD rs[27];
+G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0);
+G1E_D_J(g2, g0, envs->i_l+0, envs->j_l+1, 0);
+G1E_D_J(g3, g2, envs->i_l+0, envs->j_l+0, 0);
+G1E_D_J(g4, g0, envs->i_l+0, envs->j_l+2, 0);
+G1E_D_J(g5, g4, envs->i_l+0, envs->j_l+0, 0);
+G1E_D_J(g6, g4, envs->i_l+0, envs->j_l+1, 0);
+G1E_D_J(g7, g6, envs->i_l+0, envs->j_l+0, 0);
+for (n = 0; n < nf; n++) {
+ix = idx[0+n*3];
+iy = idx[1+n*3];
+iz = idx[2+n*3];
+rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD);
+r1 = - rs[0] - rs[12] - rs[24]; GOUT_SCATTER(gout, n*3+0, r1);
+r1 = - rs[1] - rs[13] - rs[25]; GOUT_SCATTER(gout, n*3+1, r1);
+r1 = - rs[2] - rs[14] - rs[26]; GOUT_SCATTER(gout, n*3+2, r1);
+}}
+void int1e_kinip_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 3};
+CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env);
+}
+int int1e_kinip_cart(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 3};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_kinip;
+envs.common_factor *= 0.5;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e);
+} // int1e_kinip_cart
+int int1e_kinip_sph(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 3};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_kinip;
+envs.common_factor *= 0.5;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e);
+} // int1e_kinip_sph
+int int1e_kinip_spinor(double complex *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 3};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_kinip;
+envs.common_factor *= 0.5;
+return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
+} // int1e_kinip_spinor
+ALL_CINT1E(int1e_kinip)
+//ALL_CINT1E_FORTRAN_(cint1e_kinip)
 /* <NABLA i|NUC |j> */
 static void CINTgout1e_int1e_ipnuc(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
 int nf = envs->nf;

From 02f7bab139086003b233301c05dfe51e3e39ff54 Mon Sep 17 00:00:00 2001
From: Qiming Sun <osirpt.sun@gmail.com>
Date: Tue, 20 Mar 2018 17:29:06 -0700
Subject: [PATCH 2/5] New integrals

---
 ChangeLog             |   6 +
 README.md             |   4 +-
 scripts/auto_intor.cl |   4 +
 src/autocode/intor1.c | 424 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 436 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 4703da5..2cb628f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Version 3.0.10  (2018-03-20):
+	* New integrals
+	  <i | r p | j>,
+	  <i | r r p | j>,
+	  <i | r p r | j> and
+	  <i | r r r | j>
 Version 3.0.9  (2018-03-08):
 	* New integrals (i |nabla j) and  -1/2 *(i | nabla^2 j)
 Version 3.0.8  (2018-02-14):
diff --git a/README.md b/README.md
index c0865fc..8b0d0f4 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@ qcint (quick libcint)
 
 An optimized libcint branch for X86 platform
 
-version 3.0.9
+version 3.0.10
 
-2018-03-08
+2018-03-20
 
 
 What is qcint
diff --git a/scripts/auto_intor.cl b/scripts/auto_intor.cl
index 5f2540e..3eba739 100644
--- a/scripts/auto_intor.cl
+++ b/scripts/auto_intor.cl
@@ -35,6 +35,10 @@
   '("int1e_prinvxp"             (p* \| rinv cross p \| ))
   '("int1e_pnucxp"              (p* \| nuc cross p \| ))
   '("int2e_p1vxp1"              (p* \, cross p \| \, )) ; SSO
+  '("int1e_irp"                 ( \| rc nabla \| ))
+  '("int1e_irrp"                ( \| rc rc nabla \| ))
+  '("int1e_irpr"                ( \| rc nabla rc \| ))
+  '("int1e_irrr"                ( \| rc rc rc \| ))
 )
 
 (gen-cint "intor2.c"
diff --git a/src/autocode/intor1.c b/src/autocode/intor1.c
index 9630e14..0304158 100644
--- a/src/autocode/intor1.c
+++ b/src/autocode/intor1.c
@@ -2350,3 +2350,427 @@ return CINT2e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_2e1, &c2s_sf_2e2)
 } // int2e_p1vxp1_spinor
 ALL_CINT(int2e_p1vxp1)
 //ALL_CINT_FORTRAN_(cint2e_p1vxp1)
+/* <i|RC NABLA |j> */
+static void CINTgout1e_int1e_irp(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
+CINTg1e_ovlp(g, envs, count);
+int nf = envs->nf;
+int nfc = nf * 9;
+int ix, iy, iz, n;
+DECLARE_GOUT;
+double *RESTRICT g0 = g;
+double *RESTRICT g1 = g0  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g2 = g1  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g3 = g2  + envs->g_size * 3 * SIMDD;
+double drj[3];
+drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0];
+drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1];
+drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2];
+__MD r1;
+__MD rs[9];
+G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g2, g0, envs->i_l+0, envs->j_l+1, 0);
+G1E_D_J(g3, g2, envs->i_l+0, envs->j_l+0, 0);
+for (n = 0; n < nf; n++) {
+ix = idx[0+n*3];
+iy = idx[1+n*3];
+iz = idx[2+n*3];
+rs[0] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[1] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[2] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[3] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[4] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[5] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[6] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[7] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[8] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+r1 = + rs[0]; GOUT_SCATTER(gout, n*9+0, r1);
+r1 = + rs[1]; GOUT_SCATTER(gout, n*9+1, r1);
+r1 = + rs[2]; GOUT_SCATTER(gout, n*9+2, r1);
+r1 = + rs[3]; GOUT_SCATTER(gout, n*9+3, r1);
+r1 = + rs[4]; GOUT_SCATTER(gout, n*9+4, r1);
+r1 = + rs[5]; GOUT_SCATTER(gout, n*9+5, r1);
+r1 = + rs[6]; GOUT_SCATTER(gout, n*9+6, r1);
+r1 = + rs[7]; GOUT_SCATTER(gout, n*9+7, r1);
+r1 = + rs[8]; GOUT_SCATTER(gout, n*9+8, r1);
+}}
+void int1e_irp_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) {
+int ng[] = {0, 2, 0, 0, 2, 1, 1, 9};
+CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env);
+}
+int int1e_irp_cart(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 2, 0, 0, 2, 1, 1, 9};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irp;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e);
+} // int1e_irp_cart
+int int1e_irp_sph(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 2, 0, 0, 2, 1, 1, 9};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irp;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e);
+} // int1e_irp_sph
+int int1e_irp_spinor(double complex *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 2, 0, 0, 2, 1, 1, 9};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irp;
+return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
+} // int1e_irp_spinor
+ALL_CINT1E(int1e_irp)
+//ALL_CINT1E_FORTRAN_(cint1e_irp)
+/* <i|RC RC NABLA |j> */
+static void CINTgout1e_int1e_irrp(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
+CINTg1e_ovlp(g, envs, count);
+int nf = envs->nf;
+int nfc = nf * 27;
+int ix, iy, iz, n;
+DECLARE_GOUT;
+double *RESTRICT g0 = g;
+double *RESTRICT g1 = g0  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g2 = g1  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g3 = g2  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g4 = g3  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g5 = g4  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g6 = g5  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g7 = g6  + envs->g_size * 3 * SIMDD;
+double drj[3];
+drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0];
+drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1];
+drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2];
+__MD r1;
+__MD rs[27];
+G1E_D_J(g1, g0, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g2, g0, envs->i_l+0, envs->j_l+1, 0);
+G1E_D_J(g3, g2, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g4, g0, envs->i_l+0, envs->j_l+2, 0);
+G1E_D_J(g5, g4, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g6, g4, envs->i_l+0, envs->j_l+1, 0);
+G1E_D_J(g7, g6, envs->i_l+0, envs->j_l+0, 0);
+for (n = 0; n < nf; n++) {
+ix = idx[0+n*3];
+iy = idx[1+n*3];
+iz = idx[2+n*3];
+rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD);
+r1 = + rs[0]; GOUT_SCATTER(gout, n*27+0, r1);
+r1 = + rs[1]; GOUT_SCATTER(gout, n*27+1, r1);
+r1 = + rs[2]; GOUT_SCATTER(gout, n*27+2, r1);
+r1 = + rs[3]; GOUT_SCATTER(gout, n*27+3, r1);
+r1 = + rs[4]; GOUT_SCATTER(gout, n*27+4, r1);
+r1 = + rs[5]; GOUT_SCATTER(gout, n*27+5, r1);
+r1 = + rs[6]; GOUT_SCATTER(gout, n*27+6, r1);
+r1 = + rs[7]; GOUT_SCATTER(gout, n*27+7, r1);
+r1 = + rs[8]; GOUT_SCATTER(gout, n*27+8, r1);
+r1 = + rs[9]; GOUT_SCATTER(gout, n*27+9, r1);
+r1 = + rs[10]; GOUT_SCATTER(gout, n*27+10, r1);
+r1 = + rs[11]; GOUT_SCATTER(gout, n*27+11, r1);
+r1 = + rs[12]; GOUT_SCATTER(gout, n*27+12, r1);
+r1 = + rs[13]; GOUT_SCATTER(gout, n*27+13, r1);
+r1 = + rs[14]; GOUT_SCATTER(gout, n*27+14, r1);
+r1 = + rs[15]; GOUT_SCATTER(gout, n*27+15, r1);
+r1 = + rs[16]; GOUT_SCATTER(gout, n*27+16, r1);
+r1 = + rs[17]; GOUT_SCATTER(gout, n*27+17, r1);
+r1 = + rs[18]; GOUT_SCATTER(gout, n*27+18, r1);
+r1 = + rs[19]; GOUT_SCATTER(gout, n*27+19, r1);
+r1 = + rs[20]; GOUT_SCATTER(gout, n*27+20, r1);
+r1 = + rs[21]; GOUT_SCATTER(gout, n*27+21, r1);
+r1 = + rs[22]; GOUT_SCATTER(gout, n*27+22, r1);
+r1 = + rs[23]; GOUT_SCATTER(gout, n*27+23, r1);
+r1 = + rs[24]; GOUT_SCATTER(gout, n*27+24, r1);
+r1 = + rs[25]; GOUT_SCATTER(gout, n*27+25, r1);
+r1 = + rs[26]; GOUT_SCATTER(gout, n*27+26, r1);
+}}
+void int1e_irrp_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env);
+}
+int int1e_irrp_cart(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irrp;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e);
+} // int1e_irrp_cart
+int int1e_irrp_sph(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irrp;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e);
+} // int1e_irrp_sph
+int int1e_irrp_spinor(double complex *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irrp;
+return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
+} // int1e_irrp_spinor
+ALL_CINT1E(int1e_irrp)
+//ALL_CINT1E_FORTRAN_(cint1e_irrp)
+/* <i|RC NABLA RC |j> */
+static void CINTgout1e_int1e_irpr(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
+CINTg1e_ovlp(g, envs, count);
+int nf = envs->nf;
+int nfc = nf * 27;
+int ix, iy, iz, n;
+DECLARE_GOUT;
+double *RESTRICT g0 = g;
+double *RESTRICT g1 = g0  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g2 = g1  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g3 = g2  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g4 = g3  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g5 = g4  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g6 = g5  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g7 = g6  + envs->g_size * 3 * SIMDD;
+double drj[3];
+drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0];
+drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1];
+drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2];
+__MD r1;
+__MD rs[27];
+G1E_RCJ(g1, g0, envs->i_l+0, envs->j_l+0, 0);
+G1E_D_J(g2, g0, envs->i_l+0, envs->j_l+1, 0);
+G1E_RCJ(g3, g2, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g4, g0, envs->i_l+0, envs->j_l+2, 0);
+G1E_RCJ(g5, g4, envs->i_l+0, envs->j_l+0, 0);
+G1E_D_J(g6, g4, envs->i_l+0, envs->j_l+1, 0);
+G1E_RCJ(g7, g6, envs->i_l+0, envs->j_l+0, 0);
+for (n = 0; n < nf; n++) {
+ix = idx[0+n*3];
+iy = idx[1+n*3];
+iz = idx[2+n*3];
+rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD);
+r1 = + rs[0]; GOUT_SCATTER(gout, n*27+0, r1);
+r1 = + rs[1]; GOUT_SCATTER(gout, n*27+1, r1);
+r1 = + rs[2]; GOUT_SCATTER(gout, n*27+2, r1);
+r1 = + rs[3]; GOUT_SCATTER(gout, n*27+3, r1);
+r1 = + rs[4]; GOUT_SCATTER(gout, n*27+4, r1);
+r1 = + rs[5]; GOUT_SCATTER(gout, n*27+5, r1);
+r1 = + rs[6]; GOUT_SCATTER(gout, n*27+6, r1);
+r1 = + rs[7]; GOUT_SCATTER(gout, n*27+7, r1);
+r1 = + rs[8]; GOUT_SCATTER(gout, n*27+8, r1);
+r1 = + rs[9]; GOUT_SCATTER(gout, n*27+9, r1);
+r1 = + rs[10]; GOUT_SCATTER(gout, n*27+10, r1);
+r1 = + rs[11]; GOUT_SCATTER(gout, n*27+11, r1);
+r1 = + rs[12]; GOUT_SCATTER(gout, n*27+12, r1);
+r1 = + rs[13]; GOUT_SCATTER(gout, n*27+13, r1);
+r1 = + rs[14]; GOUT_SCATTER(gout, n*27+14, r1);
+r1 = + rs[15]; GOUT_SCATTER(gout, n*27+15, r1);
+r1 = + rs[16]; GOUT_SCATTER(gout, n*27+16, r1);
+r1 = + rs[17]; GOUT_SCATTER(gout, n*27+17, r1);
+r1 = + rs[18]; GOUT_SCATTER(gout, n*27+18, r1);
+r1 = + rs[19]; GOUT_SCATTER(gout, n*27+19, r1);
+r1 = + rs[20]; GOUT_SCATTER(gout, n*27+20, r1);
+r1 = + rs[21]; GOUT_SCATTER(gout, n*27+21, r1);
+r1 = + rs[22]; GOUT_SCATTER(gout, n*27+22, r1);
+r1 = + rs[23]; GOUT_SCATTER(gout, n*27+23, r1);
+r1 = + rs[24]; GOUT_SCATTER(gout, n*27+24, r1);
+r1 = + rs[25]; GOUT_SCATTER(gout, n*27+25, r1);
+r1 = + rs[26]; GOUT_SCATTER(gout, n*27+26, r1);
+}}
+void int1e_irpr_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env);
+}
+int int1e_irpr_cart(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irpr;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e);
+} // int1e_irpr_cart
+int int1e_irpr_sph(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irpr;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e);
+} // int1e_irpr_sph
+int int1e_irpr_spinor(double complex *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irpr;
+return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
+} // int1e_irpr_spinor
+ALL_CINT1E(int1e_irpr)
+//ALL_CINT1E_FORTRAN_(cint1e_irpr)
+/* <i|RC RC RC |j> */
+static void CINTgout1e_int1e_irrr(double *gout, double *g, int *idx, CINTEnvVars *envs, int count) {
+CINTg1e_ovlp(g, envs, count);
+int nf = envs->nf;
+int nfc = nf * 27;
+int ix, iy, iz, n;
+DECLARE_GOUT;
+double *RESTRICT g0 = g;
+double *RESTRICT g1 = g0  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g2 = g1  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g3 = g2  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g4 = g3  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g5 = g4  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g6 = g5  + envs->g_size * 3 * SIMDD;
+double *RESTRICT g7 = g6  + envs->g_size * 3 * SIMDD;
+double drj[3];
+drj[0] = envs->rj[0] - envs->env[PTR_COMMON_ORIG+0];
+drj[1] = envs->rj[1] - envs->env[PTR_COMMON_ORIG+1];
+drj[2] = envs->rj[2] - envs->env[PTR_COMMON_ORIG+2];
+__MD r1;
+__MD rs[27];
+G1E_RCJ(g1, g0, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g2, g0, envs->i_l+0, envs->j_l+1, 0);
+G1E_RCJ(g3, g2, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g4, g0, envs->i_l+0, envs->j_l+2, 0);
+G1E_RCJ(g5, g4, envs->i_l+0, envs->j_l+0, 0);
+G1E_RCJ(g6, g4, envs->i_l+0, envs->j_l+1, 0);
+G1E_RCJ(g7, g6, envs->i_l+0, envs->j_l+0, 0);
+for (n = 0; n < nf; n++) {
+ix = idx[0+n*3];
+iy = idx[1+n*3];
+iz = idx[2+n*3];
+rs[0] = MM_LOAD(g7+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[1] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[2] = MM_LOAD(g6+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[3] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[4] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[5] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[6] = MM_LOAD(g5+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[7] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[8] = MM_LOAD(g4+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[9] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[10] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[11] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[12] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[13] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g7+iy*SIMDD) * MM_LOAD(g0+iz*SIMDD);
+rs[14] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g6+iy*SIMDD) * MM_LOAD(g1+iz*SIMDD);
+rs[15] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[16] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g5+iy*SIMDD) * MM_LOAD(g2+iz*SIMDD);
+rs[17] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g4+iy*SIMDD) * MM_LOAD(g3+iz*SIMDD);
+rs[18] = MM_LOAD(g3+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[19] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[20] = MM_LOAD(g2+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[21] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[22] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g3+iy*SIMDD) * MM_LOAD(g4+iz*SIMDD);
+rs[23] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g2+iy*SIMDD) * MM_LOAD(g5+iz*SIMDD);
+rs[24] = MM_LOAD(g1+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[25] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g1+iy*SIMDD) * MM_LOAD(g6+iz*SIMDD);
+rs[26] = MM_LOAD(g0+ix*SIMDD) * MM_LOAD(g0+iy*SIMDD) * MM_LOAD(g7+iz*SIMDD);
+r1 = + rs[0]; GOUT_SCATTER(gout, n*27+0, r1);
+r1 = + rs[1]; GOUT_SCATTER(gout, n*27+1, r1);
+r1 = + rs[2]; GOUT_SCATTER(gout, n*27+2, r1);
+r1 = + rs[3]; GOUT_SCATTER(gout, n*27+3, r1);
+r1 = + rs[4]; GOUT_SCATTER(gout, n*27+4, r1);
+r1 = + rs[5]; GOUT_SCATTER(gout, n*27+5, r1);
+r1 = + rs[6]; GOUT_SCATTER(gout, n*27+6, r1);
+r1 = + rs[7]; GOUT_SCATTER(gout, n*27+7, r1);
+r1 = + rs[8]; GOUT_SCATTER(gout, n*27+8, r1);
+r1 = + rs[9]; GOUT_SCATTER(gout, n*27+9, r1);
+r1 = + rs[10]; GOUT_SCATTER(gout, n*27+10, r1);
+r1 = + rs[11]; GOUT_SCATTER(gout, n*27+11, r1);
+r1 = + rs[12]; GOUT_SCATTER(gout, n*27+12, r1);
+r1 = + rs[13]; GOUT_SCATTER(gout, n*27+13, r1);
+r1 = + rs[14]; GOUT_SCATTER(gout, n*27+14, r1);
+r1 = + rs[15]; GOUT_SCATTER(gout, n*27+15, r1);
+r1 = + rs[16]; GOUT_SCATTER(gout, n*27+16, r1);
+r1 = + rs[17]; GOUT_SCATTER(gout, n*27+17, r1);
+r1 = + rs[18]; GOUT_SCATTER(gout, n*27+18, r1);
+r1 = + rs[19]; GOUT_SCATTER(gout, n*27+19, r1);
+r1 = + rs[20]; GOUT_SCATTER(gout, n*27+20, r1);
+r1 = + rs[21]; GOUT_SCATTER(gout, n*27+21, r1);
+r1 = + rs[22]; GOUT_SCATTER(gout, n*27+22, r1);
+r1 = + rs[23]; GOUT_SCATTER(gout, n*27+23, r1);
+r1 = + rs[24]; GOUT_SCATTER(gout, n*27+24, r1);
+r1 = + rs[25]; GOUT_SCATTER(gout, n*27+25, r1);
+r1 = + rs[26]; GOUT_SCATTER(gout, n*27+26, r1);
+}}
+void int1e_irrr_optimizer(CINTOpt **opt, int *atm, int natm, int *bas, int nbas, double *env) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTall_1e_optimizer(opt, ng, atm, natm, bas, nbas, env);
+}
+int int1e_irrr_cart(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irrr;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_cart_1e);
+} // int1e_irrr_cart
+int int1e_irrr_sph(double *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irrr;
+return CINT1e_drv(out, dims, &envs, opt, cache, &c2s_sph_1e);
+} // int1e_irrr_sph
+int int1e_irrr_spinor(double complex *out, int *dims, int *shls,
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) {
+int ng[] = {0, 3, 0, 0, 3, 1, 1, 27};
+CINTEnvVars envs;
+CINTinit_int1e_EnvVars(&envs, ng, shls, atm, natm, bas, nbas, env);
+envs.f_gout = &CINTgout1e_int1e_irrr;
+return CINT1e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_1e);
+} // int1e_irrr_spinor
+ALL_CINT1E(int1e_irrr)
+//ALL_CINT1E_FORTRAN_(cint1e_irrr)

From 606f97f1b34d3bb96c2bb51e1f28fff71c1ab30a Mon Sep 17 00:00:00 2001
From: Qiming Sun <osirpt.sun@gmail.com>
Date: Fri, 23 Mar 2018 19:17:04 -0700
Subject: [PATCH 3/5] Export breit integrals

---
 ChangeLog   |  2 ++
 README.md   |  4 +--
 src/breit.c | 99 +++++++++++++++++++++++++++++++++++++++++------------
 3 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 2cb628f..bd54609 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,5 @@
+Version 3.0.11  (2018-03-23):
+	* Export breit integrals
 Version 3.0.10  (2018-03-20):
 	* New integrals
 	  <i | r p | j>,
diff --git a/README.md b/README.md
index 8b0d0f4..f78e0dd 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,9 @@ qcint (quick libcint)
 
 An optimized libcint branch for X86 platform
 
-version 3.0.10
+version 3.0.11
 
-2018-03-20
+2018-03-23
 
 
 What is qcint
diff --git a/src/breit.c b/src/breit.c
index b4d3418..2fb12f0 100644
--- a/src/breit.c
+++ b/src/breit.c
@@ -38,52 +38,107 @@
 #include "misc.h"
 #include "c2f.h"
 
+static void _copy(double complex *out, double complex *in,
+                  int *dims, int *counts);
+
 #define DECLARE(X)      int X(double complex *opijkl, int *shls, \
                               int *atm, int natm, \
                               int *bas, int nbas, double *env, CINTOpt *opt)
 
 #define BREIT0(X) \
-DECLARE(cint2e_##X); \
-DECLARE(cint2e_gauge_r1_##X); \
-DECLARE(cint2e_gauge_r2_##X); \
-void cint2e_breit_##X##_optimizer(CINTOpt **opt, int *atm, int natm, \
+DECLARE(int2e_##X); \
+DECLARE(int2e_gauge_r1_##X); \
+DECLARE(int2e_gauge_r2_##X); \
+void int2e_breit_##X##_optimizer(CINTOpt **opt, int *atm, int natm, \
                                   int *bas, int nbas, double *env) \
 { \
         *opt = NULL; \
 } \
-int cint2e_breit_##X(double complex *opijkl, int *shls, \
-                          int *atm, int natm, \
-                          int *bas, int nbas, double *env, CINTOpt *opt) \
+int int2e_breit_##X##_spinor(double complex *out, int *dims, int *shls, \
+int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) \
 { \
-        int has_value = cint2e_##X(opijkl, shls, atm, natm, bas, nbas, env, NULL); \
- \
-        const int ip = CINTcgto_spinor(shls[0], bas); \
-        const int jp = CINTcgto_spinor(shls[1], bas); \
-        const int kp = CINTcgto_spinor(shls[2], bas); \
-        const int lp = CINTcgto_spinor(shls[3], bas); \
-        const int nop = ip * jp * kp * lp; \
-        double complex *buf = malloc(sizeof(double complex) * nop); \
+        if (out == NULL) { \
+                int cache_size1 = int2e_gauge_r1_##X##_spinor(NULL, NULL, shls, \
+                                atm, natm, bas, nbas, env, NULL, cache); \
+                int cache_size2 = int2e_gauge_r2_##X##_spinor(NULL, NULL, shls, \
+                                atm, natm, bas, nbas, env, NULL, cache); \
+                return MAX(cache_size1, cache_size2); \
+        } \
+\
+        int counts[4]; \
+        counts[0] = CINTcgto_spinor(shls[0], bas); \
+        counts[1] = CINTcgto_spinor(shls[1], bas); \
+        counts[2] = CINTcgto_spinor(shls[2], bas); \
+        counts[3] = CINTcgto_spinor(shls[3], bas); \
+        const int nop = counts[0] * counts[1] * counts[2] * counts[3]; \
+        double complex *buf = malloc(sizeof(double complex) * nop * 2); \
+        double complex *buf1 = buf + nop; \
         int i; \
-        has_value = (cint2e_gauge_r1_##X(buf, shls, atm, natm, bas, nbas, env, NULL) || \
+\
+        int has_value = int2e_##X##_spinor(buf, NULL, shls, \
+                                atm, natm, bas, nbas, env, NULL, cache); \
+\
+        has_value = (int2e_gauge_r1_##X##_spinor(buf1, NULL, shls, \
+                                atm, natm, bas, nbas, env, NULL, cache) || \
                      has_value); \
         /* [1/2 gaunt] - [1/2 xxx*\sigma\dot r1] */ \
         if (has_value) { \
                 for (i = 0; i < nop; i++) { \
-                        opijkl[i] = -opijkl[i] - buf[i]; \
+                        buf[i] = -buf1[i] - buf[i]; \
                 } \
         } \
         /* ... [- 1/2 xxx*\sigma\dot(-r2)] */ \
-        has_value = (cint2e_gauge_r2_##X(buf, shls, atm, natm, bas, nbas, env, NULL) || \
+        has_value = (int2e_gauge_r2_##X##_spinor(buf1, NULL, shls, \
+                                atm, natm, bas, nbas, env, NULL, cache) || \
                      has_value); \
-        if (has_value) { \
+        if (dims == NULL) { \
+                for (i = 0; i < nop; i++) { \
+                        out[i] = (buf[i] + buf1[i]) * .5; \
+                } \
+        } else { \
                 for (i = 0; i < nop; i++) { \
-                        opijkl[i] = (opijkl[i] + buf[i]) * .5; \
+                        buf[i] = (buf[i] + buf1[i]) * .5; \
                 } \
+                _copy(out, buf, dims, counts); \
         } \
         free(buf); \
         return has_value; \
+} \
+int cint2e_breit_##X##_spinor(double *out, int *shls, int *atm, int natm, \
+                              int *bas, int nbas, double *env) { \
+        return int2e_breit_##X##_spinor((double complex *)out, NULL, shls, \
+                                atm, natm, bas, nbas, env, NULL, NULL); \
 }
 
+static void _copy(double complex *out, double complex *in,
+                  int *dims, int *counts)
+{
+        int ni = dims[0];
+        int nj = dims[1];
+        int nk = dims[2];
+        int di = counts[0];
+        int dj = counts[1];
+        int dk = counts[2];
+        int dl = counts[3];
+        int nij = ni * nj;
+        int dij = di * dj;
+        int nijk = nij * nk;
+        int dijk = dij * dk;
+        int i, j, k, l;
+        double complex *pout, *pin;
+        for (l = 0; l < dl; l++) {
+                for (k = 0; k < dk; k++) {
+                        pout = out + k * nij;
+                        pin  = in  + k * dij;
+                        for (j = 0; j < dj; j++) {
+                        for (i = 0; i < di; i++) {
+                                pout[j*ni+i] = pin[j*di+i];
+                        } }
+                }
+                out += nijk;
+                in  += dijk;
+        }
+}
 
 BREIT0(ssp1ssp2);
 BREIT0(ssp1sps2);
@@ -233,7 +288,7 @@ int int2e_breit_r1p2_spinor(double complex *out, int *dims, int *shls,
         return CINT2e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_2e1i, &c2s_sf_2e2i);
 } // int2e_breit_r1p2_spinor
 ALL_CINT(int2e_breit_r1p2)
-//ALL_CINT_FORTRAN_(cint2e_breit_r1p2)
+//ALL_CINT_FORTRAN_(int2e_breit_r1p2)
 
 /* based on
  * '("int2e_breit_r2p2"  ( nabla \, r0 \| dot nabla-r12 \| \, nabla ))
@@ -376,4 +431,4 @@ int int2e_breit_r2p2_spinor(double complex *out, int *dims, int *shls,
         return CINT2e_spinor_drv(out, dims, &envs, opt, cache, &c2s_sf_2e1i, &c2s_sf_2e2i);
 } // int2e_breit_r2p2_spinor
 ALL_CINT(int2e_breit_r2p2)
-//ALL_CINT_FORTRAN_(cint2e_breit_r2p2)
+//ALL_CINT_FORTRAN_(int2e_breit_r2p2)

From 8246c590402fabfee58be96e779843483fe48497 Mon Sep 17 00:00:00 2001
From: Qiming Sun <osirpt.sun@gmail.com>
Date: Fri, 23 Mar 2018 19:32:50 -0700
Subject: [PATCH 4/5] Improved breit integral driver

---
 src/breit.c | 158 ++++++++++++++++++++++++++++------------------------
 1 file changed, 85 insertions(+), 73 deletions(-)

diff --git a/src/breit.c b/src/breit.c
index 2fb12f0..c906b22 100644
--- a/src/breit.c
+++ b/src/breit.c
@@ -38,112 +38,124 @@
 #include "misc.h"
 #include "c2f.h"
 
-static void _copy(double complex *out, double complex *in,
-                  int *dims, int *counts);
+#define DECLARE(X)      int X(double complex *out, int *dims, int *shls, \
+                              int *atm, int natm, int *bas, int nbas, double *env, \
+                              CINTOpt *opt, double *cache)
 
-#define DECLARE(X)      int X(double complex *opijkl, int *shls, \
-                              int *atm, int natm, \
-                              int *bas, int nbas, double *env, CINTOpt *opt)
-
-#define BREIT0(X) \
-DECLARE(int2e_##X); \
-DECLARE(int2e_gauge_r1_##X); \
-DECLARE(int2e_gauge_r2_##X); \
+#define BREIT0(X, ncomp_tensor) \
+DECLARE(int2e_##X##_spinor); \
+DECLARE(int2e_gauge_r1_##X##_spinor); \
+DECLARE(int2e_gauge_r2_##X##_spinor); \
 void int2e_breit_##X##_optimizer(CINTOpt **opt, int *atm, int natm, \
-                                  int *bas, int nbas, double *env) \
+                                 int *bas, int nbas, double *env) \
 { \
         *opt = NULL; \
 } \
 int int2e_breit_##X##_spinor(double complex *out, int *dims, int *shls, \
-int *atm, int natm, int *bas, int nbas, double *env, CINTOpt *opt, double *cache) \
+                             int *atm, int natm, int *bas, int nbas, double *env, \
+                             CINTOpt *opt, double *cache) \
 { \
-        if (out == NULL) { \
-                int cache_size1 = int2e_gauge_r1_##X##_spinor(NULL, NULL, shls, \
-                                atm, natm, bas, nbas, env, NULL, cache); \
-                int cache_size2 = int2e_gauge_r2_##X##_spinor(NULL, NULL, shls, \
-                                atm, natm, bas, nbas, env, NULL, cache); \
-                return MAX(cache_size1, cache_size2); \
-        } \
-\
-        int counts[4]; \
-        counts[0] = CINTcgto_spinor(shls[0], bas); \
-        counts[1] = CINTcgto_spinor(shls[1], bas); \
-        counts[2] = CINTcgto_spinor(shls[2], bas); \
-        counts[3] = CINTcgto_spinor(shls[3], bas); \
-        const int nop = counts[0] * counts[1] * counts[2] * counts[3]; \
-        double complex *buf = malloc(sizeof(double complex) * nop * 2); \
-        double complex *buf1 = buf + nop; \
-        int i; \
-\
-        int has_value = int2e_##X##_spinor(buf, NULL, shls, \
-                                atm, natm, bas, nbas, env, NULL, cache); \
-\
-        has_value = (int2e_gauge_r1_##X##_spinor(buf1, NULL, shls, \
-                                atm, natm, bas, nbas, env, NULL, cache) || \
-                     has_value); \
-        /* [1/2 gaunt] - [1/2 xxx*\sigma\dot r1] */ \
-        if (has_value) { \
-                for (i = 0; i < nop; i++) { \
-                        buf[i] = -buf1[i] - buf[i]; \
-                } \
-        } \
-        /* ... [- 1/2 xxx*\sigma\dot(-r2)] */ \
-        has_value = (int2e_gauge_r2_##X##_spinor(buf1, NULL, shls, \
-                                atm, natm, bas, nbas, env, NULL, cache) || \
-                     has_value); \
-        if (dims == NULL) { \
-                for (i = 0; i < nop; i++) { \
-                        out[i] = (buf[i] + buf1[i]) * .5; \
-                } \
-        } else { \
-                for (i = 0; i < nop; i++) { \
-                        buf[i] = (buf[i] + buf1[i]) * .5; \
-                } \
-                _copy(out, buf, dims, counts); \
-        } \
-        free(buf); \
-        return has_value; \
+        return _int2e_breit_drv(out, dims, shls, atm, natm, bas, nbas, env, opt, cache, \
+                                ncomp_tensor, &int2e_##X##_spinor, \
+                                &int2e_gauge_r1_##X##_spinor, &int2e_gauge_r2_##X##_spinor); \
 } \
-int cint2e_breit_##X##_spinor(double *out, int *shls, int *atm, int natm, \
-                              int *bas, int nbas, double *env) { \
-        return int2e_breit_##X##_spinor((double complex *)out, NULL, shls, \
-                                atm, natm, bas, nbas, env, NULL, NULL); \
+int cint2e_breit_##X##_spinor(double complex *out, int *shls, \
+                      int *atm, int natm, int *bas, int nbas, double *env, \
+                      CINTOpt *opt) \
+{ \
+        return int2e_breit_##X##_spinor(out, NULL, shls, \
+                                        atm, natm, bas, nbas, env, opt, NULL); \
 }
 
-static void _copy(double complex *out, double complex *in,
-                  int *dims, int *counts)
+static void _copy_to_out(double complex *out, double complex *in, int *dims, int *counts)
 {
+        if (out == in) {
+                return;
+        }
         int ni = dims[0];
         int nj = dims[1];
         int nk = dims[2];
+        int nij = ni * nj;
+        int nijk = nij * nk;
         int di = counts[0];
         int dj = counts[1];
         int dk = counts[2];
         int dl = counts[3];
-        int nij = ni * nj;
         int dij = di * dj;
-        int nijk = nij * nk;
         int dijk = dij * dk;
         int i, j, k, l;
-        double complex *pout, *pin;
+        double complex *pin, *pout;
         for (l = 0; l < dl; l++) {
                 for (k = 0; k < dk; k++) {
-                        pout = out + k * nij;
                         pin  = in  + k * dij;
+                        pout = out + k * nij;
                         for (j = 0; j < dj; j++) {
                         for (i = 0; i < di; i++) {
                                 pout[j*ni+i] = pin[j*di+i];
                         } }
                 }
-                out += nijk;
                 in  += dijk;
+                out += nijk;
+        }
+}
+
+static int _int2e_breit_drv(double complex *out, int *dims, int *shls,
+                            int *atm, int natm, int *bas, int nbas, double *env,
+                            CINTOpt *opt, double *cache, int ncomp_tensor,
+                            int (*f_gaunt)(), int (*f_gauge_r1)(), int (*f_gauge_r2)())
+{
+        if (out == NULL) {
+                int cache_size1 = (*f_gauge_r1)(NULL, NULL, shls,
+                                atm, natm, bas, nbas, env, NULL, cache);
+                int cache_size2 = (*f_gauge_r2)(NULL, NULL, shls,
+                                atm, natm, bas, nbas, env, NULL, cache);
+                return MAX(cache_size1, cache_size2);
+        }
+
+        int counts[4];
+        counts[0] = CINTcgto_spinor(shls[0], bas);
+        counts[1] = CINTcgto_spinor(shls[1], bas);
+        counts[2] = CINTcgto_spinor(shls[2], bas);
+        counts[3] = CINTcgto_spinor(shls[3], bas);
+        int nop = counts[0] * counts[1] * counts[2] * counts[3] * ncomp_tensor;
+        double complex *buf = malloc(sizeof(double complex) * nop*2);
+        double complex *buf1;
+        if (dims == NULL) {
+                dims = counts;
+                buf1 = out;
+        } else {
+                buf1 = buf + nop;
+        }
+
+        int has_value = (*f_gaunt)(buf1, NULL, shls, atm, natm, bas, nbas, env, NULL, cache);
+
+        int i;
+        has_value = ((*f_gauge_r1)(buf, NULL, shls, atm, natm, bas, nbas, env, NULL, cache) ||
+                     has_value);
+        /* [1/2 gaunt] - [1/2 xxx*\sigma1\dot r1] */
+        if (has_value) {
+                for (i = 0; i < nop; i++) {
+                        buf1[i] = -buf1[i] - buf[i];
+                }
         }
+        /* ... [- 1/2 xxx*\sigma1\dot(-r2)] */
+        has_value = ((*f_gauge_r2)(buf, NULL, shls, atm, natm, bas, nbas, env, NULL, cache) ||
+                     has_value);
+        if (has_value) {
+                for (i = 0; i < nop; i++) {
+                        buf1[i] = (buf1[i] + buf[i]) * .5;
+                }
+        }
+        _copy_to_out(out, buf1, dims, counts);
+        free(buf);
+        return has_value;
 }
 
-BREIT0(ssp1ssp2);
-BREIT0(ssp1sps2);
-BREIT0(sps1ssp2);
-BREIT0(sps1sps2);
+
+BREIT0(ssp1ssp2, 1);
+BREIT0(ssp1sps2, 1);
+BREIT0(sps1ssp2, 1);
+BREIT0(sps1sps2, 1);
 
 /* based on
  * '("int2e_breit_r1p2"  ( nabla \, r0 \| dot nabla-r12 \| \, nabla ))

From 824ccc7052d8362366439a209ee47d136d99a846 Mon Sep 17 00:00:00 2001
From: Qiming Sun <osirpt.sun@gmail.com>
Date: Sun, 15 Apr 2018 17:14:27 -0700
Subject: [PATCH 5/5] Update version tag in cmakefile

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bdd5b33..8e81712 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 2.6)
 project (qcint C)
-set(qcint_VERSION "3.0.6")
+set(qcint_VERSION "3.0.11")
 
 if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
   set(CMAKE_BUILD_TYPE RELWITHDEBINFO)