diff --git a/CMakeLists.txt b/CMakeLists.txt index 9edc043..db25d7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,8 +8,14 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "") endif() set(CMAKE_VERBOSE_MAKEFILE OFF) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -O2") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -funroll-loops -ftree-vectorize -ffast-math") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -fno-strict-overflow -fomit-frame-pointer") +if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Intel") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -unroll-aggressive -ipo ") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -fomit-frame-pointer") +else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -funroll-loops -ftree-vectorize -ffast-math") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-math-errno -fno-strict-overflow -fomit-frame-pointer") +endif() + if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} VERSION_GREATER 2.8.3) if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION} VERSION_GREATER 3.0.0) diff --git a/src/g3c1e.c b/src/g3c1e.c index ad0e5c6..584cc86 100644 --- a/src/g3c1e.c +++ b/src/g3c1e.c @@ -219,6 +219,9 @@ void CINTg3c1e_nuc(double *g, CINTEnvVars *envs, int count, int nuc_id) int i, j, k, n, ptr, off; __MD crijk[3]; __MD r0, r1, r2, rt2, fac1, aijk; + __MD scale; + + scale = MM_SET1(2./SQRTPI); aijk = MM_LOAD(envs->ai) + MM_LOAD(envs->aj) + MM_LOAD(envs->ak); MM_STORE(tau, aijk); @@ -227,14 +230,14 @@ void CINTg3c1e_nuc(double *g, CINTEnvVars *envs, int count, int nuc_id) } if (nuc_id < 0) { - fac1 = 2./SQRTPI * MM_LOAD(envs->fac) * MM_LOAD(tau) / aijk; + fac1 = scale * MM_LOAD(envs->fac) * MM_LOAD(tau) / aijk; cr = env + PTR_RINV_ORIG; } else if (atm(NUC_MOD_OF,nuc_id) == FRAC_CHARGE_NUC) { - fac1 = 2./SQRTPI * MM_SET1(-env[atm[PTR_FRAC_CHARGE+nuc_id*ATM_SLOTS]]); + fac1 = scale * MM_SET1(-env[atm[PTR_FRAC_CHARGE+nuc_id*ATM_SLOTS]]); fac1 = fac1 * MM_LOAD(envs->fac) * MM_LOAD(tau) / aijk; cr = env + atm(PTR_COORD, nuc_id); } else { - fac1 = 2./SQRTPI * MM_SET1(-fabs(atm[CHARGE_OF+nuc_id*ATM_SLOTS])); + fac1 = scale * MM_SET1(-fabs(atm[CHARGE_OF+nuc_id*ATM_SLOTS])); fac1 = fac1 * MM_LOAD(envs->fac) * MM_LOAD(tau) / aijk; cr = env + atm(PTR_COORD, nuc_id); }