From 7de9150598b23f7412d27772e39a530ba1832899 Mon Sep 17 00:00:00 2001 From: Qiming Sun Date: Fri, 10 Nov 2023 20:34:23 -0800 Subject: [PATCH] 4th, and 5th order XC derivatives (#1930) * Update conda build * Arbitrary order to transform_xc; patch xcfun up to 5th order * Update libxc itrf * Configure max_deriv_order; arbitrary order for ud2ts and ts2ud * Add 4th order XC deriv in CI job --- .github/workflows/ci_linux/build_pyscf.sh | 2 +- conda/conda_build_config.yaml | 3 +- docker/Dockerfile | 2 +- pyscf/dft/libxc.py | 358 +++++++---- pyscf/dft/numint.py | 50 +- pyscf/dft/numint2c.py | 47 +- pyscf/dft/test/test_xc_deriv.py | 264 ++++++++ pyscf/dft/test/test_xcfun.py | 22 +- pyscf/dft/xc_deriv.py | 268 +++++--- pyscf/dft/xcfun.py | 185 +++--- pyscf/lib/CMakeLists.txt | 17 +- pyscf/lib/config.h.in | 2 +- pyscf/lib/dft/CMakeLists.txt | 2 +- pyscf/lib/dft/libxc_itrf.c | 734 +++++++++++----------- pyscf/lib/dft/xc_deriv.c | 174 +++-- pyscf/lib/dft/xcfun_itrf.c | 41 +- pyscf/lib/libxcfun.patch | 112 +++- pyscf/lib/misc.py | 15 +- 18 files changed, 1440 insertions(+), 858 deletions(-) diff --git a/.github/workflows/ci_linux/build_pyscf.sh b/.github/workflows/ci_linux/build_pyscf.sh index e33ab6f09c..04e2aac97b 100755 --- a/.github/workflows/ci_linux/build_pyscf.sh +++ b/.github/workflows/ci_linux/build_pyscf.sh @@ -5,7 +5,7 @@ set -e cd ./pyscf/lib curl -L "https://github.com/pyscf/pyscf-build-deps/blob/master/pyscf-2.4a-deps.tar.gz?raw=true" | tar xzf - mkdir build; cd build -cmake -DBUILD_LIBXC=OFF -DBUILD_XCFUN=OFF -DBUILD_LIBCINT=OFF .. +cmake -DBUILD_LIBXC=OFF -DBUILD_XCFUN=OFF -DBUILD_LIBCINT=OFF -DXCFUN_MAX_ORDER=4 .. make -j4 cd .. rm -Rf build diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml index 8d657fe804..d7d85008cc 100644 --- a/conda/conda_build_config.yaml +++ b/conda/conda_build_config.yaml @@ -1,7 +1,6 @@ python: - - 3.6 - - 3.7 - 3.8 - 3.9 - 3.10 - 3.11 + - 3.12 diff --git a/docker/Dockerfile b/docker/Dockerfile index 004e470b98..6246ce9ba0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7-slim +FROM python:3.10-slim LABEL maintainer="Qiming Sun " diff --git a/pyscf/dft/libxc.py b/pyscf/dft/libxc.py index b5b4f781e2..d584278a80 100644 --- a/pyscf/dft/libxc.py +++ b/pyscf/dft/libxc.py @@ -30,6 +30,7 @@ from functools import lru_cache from pyscf import lib from pyscf.dft.xc.utils import remove_dup, format_xc_code +from pyscf.dft import xc_deriv from pyscf import __config__ _itrf = lib.load_library('libxc_itrf') @@ -942,6 +943,7 @@ def is_nlc(xc_code): def needs_laplacian(xc_code): return _itrf.LIBXC_needs_laplacian(xc_code) != 0 +@lru_cache(100) def max_deriv_order(xc_code): hyb, fn_facs = parse_xc(xc_code) if fn_facs: @@ -1405,200 +1407,302 @@ def eval_xc(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=Non see also libxc_itrf.c ''' # noqa: E501 - hyb, fn_facs = parse_xc(xc_code) - if omega is not None: - hyb = hyb[:2] + (float(omega),) - return _eval_xc(hyb, fn_facs, rho, spin, relativity, deriv, verbose) - - -def _eval_xc(hyb, fn_facs, rho, spin=0, relativity=0, deriv=1, verbose=None): - assert (deriv <= 3) - if spin == 0: - nspin = 1 - rho_u = rho_d = numpy.asarray(rho, order='C') - else: - nspin = 2 - rho_u = numpy.asarray(rho[0], order='C') - rho_d = numpy.asarray(rho[1], order='C') - assert (rho_u.dtype == numpy.double) - assert (rho_d.dtype == numpy.double) - - if rho_u.ndim == 1: - rho_u = rho_u.reshape(1,-1) - rho_d = rho_d.reshape(1,-1) - ngrids = rho_u.shape[1] - - fn_ids = [x[0] for x in fn_facs] - facs = [x[1] for x in fn_facs] - if hyb[2] != 0: - # Current implementation does not support different omegas for - # different RSH functionals if there are multiple RSHs - omega = [hyb[2]] * len(facs) - else: - omega = [0] * len(facs) - fn_ids_set = set(fn_ids) - if fn_ids_set.intersection(PROBLEMATIC_XC): - problem_xc = [PROBLEMATIC_XC[k] - for k in fn_ids_set.intersection(PROBLEMATIC_XC)] - warnings.warn('Libxc functionals %s may have discrepancy to xcfun ' - 'library.\n' % problem_xc) - - if any([needs_laplacian(fid) for fid in fn_ids]): - raise NotImplementedError('laplacian in meta-GGA method') - - n = len(fn_ids) - if (n == 0 or # xc_code = '' or xc_code = 'HF', an empty functional - all((is_lda(x) for x in fn_ids))): - if spin == 0: - nvar = 1 - xctype = 'R-LDA' - else: - nvar = 2 - xctype = 'U-LDA' - elif any((is_meta_gga(x) for x in fn_ids)): - if spin == 0: - nvar = 4 - xctype = 'R-MGGA' - else: - nvar = 9 - xctype = 'U-MGGA' - else: # GGA - if spin == 0: - nvar = 2 - xctype = 'R-GGA' - else: - nvar = 5 - xctype = 'U-GGA' - - # Check that the density rho has the appropriate shape - # should it be >= or ==, in test test_xcfun.py, test_vs_libxc_rks - # the density contain 6 rows independently of the functional - if xctype[2:] == 'LDA': - for rho_ud in [rho_u, rho_d]: - assert rho_ud.shape[0] >= 1 - - elif xctype[2:] == 'GGA': - for rho_ud in [rho_u, rho_d]: - assert rho_ud.shape[0] >= 4 - - elif xctype[2:] == 'MGGA': - for rho_ud in [rho_u, rho_d]: - assert rho_ud.shape[0] >= 6 - else: - raise ValueError("Unknown nvar {}".format(nvar)) - - outlen = (math.factorial(nvar+deriv) // - (math.factorial(nvar) * math.factorial(deriv))) - outbuf = numpy.zeros((outlen,ngrids)) - - density_threshold = 0 - _itrf.LIBXC_eval_xc(ctypes.c_int(n), - (ctypes.c_int*n)(*fn_ids), - (ctypes.c_double*n)(*facs), - (ctypes.c_double*n)(*omega), - ctypes.c_int(nspin), - ctypes.c_int(deriv), ctypes.c_int(ngrids), - rho_u.ctypes.data_as(ctypes.c_void_p), - rho_d.ctypes.data_as(ctypes.c_void_p), - outbuf.ctypes.data_as(ctypes.c_void_p), - ctypes.c_double(density_threshold)) - + outbuf = _eval_xc(xc_code, rho, spin, deriv, omega) exc = outbuf[0] vxc = fxc = kxc = None - if xctype == 'R-LDA': + xctype = xc_type(xc_code) + if xctype == 'LDA' and spin == 0: if deriv > 0: vxc = [outbuf[1]] if deriv > 1: fxc = [outbuf[2]] if deriv > 2: kxc = [outbuf[3]] - elif xctype == 'R-GGA': + elif xctype == 'GGA' and spin == 0: if deriv > 0: vxc = [outbuf[1], outbuf[2]] if deriv > 1: fxc = [outbuf[3], outbuf[4], outbuf[5]] if deriv > 2: kxc = [outbuf[6], outbuf[7], outbuf[8], outbuf[9]] - elif xctype == 'U-LDA': + elif xctype == 'LDA' and spin == 1: if deriv > 0: vxc = [outbuf[1:3].T] if deriv > 1: fxc = [outbuf[3:6].T] if deriv > 2: kxc = [outbuf[6:10].T] - elif xctype == 'U-GGA': + elif xctype == 'GGA' and spin == 1: if deriv > 0: vxc = [outbuf[1:3].T, outbuf[3:6].T] if deriv > 1: fxc = [outbuf[6:9].T, outbuf[9:15].T, outbuf[15:21].T] if deriv > 2: kxc = [outbuf[21:25].T, outbuf[25:34].T, outbuf[34:46].T, outbuf[46:56].T] - elif xctype == 'R-MGGA': + elif xctype == 'MGGA' and spin == 0: if deriv > 0: - vxc = [outbuf[1], outbuf[2], None, outbuf[4]] + vxc = [outbuf[1], outbuf[2], None, outbuf[3]] if deriv > 1: fxc = [ # v2rho2, v2rhosigma, v2sigma2, - outbuf[5], outbuf[6], outbuf[7], + outbuf[4], outbuf[5], outbuf[6], # v2lapl2, v2tau2, None, outbuf[9], # v2rholapl, v2rhotau, - None, outbuf[11], + None, outbuf[7], # v2lapltau, v2sigmalapl, v2sigmatau, - None, None, outbuf[14]] + None, None, outbuf[8]] if deriv > 2: # v3lapltau2 might not be strictly 0 # outbuf[18] = 0 kxc = [ # v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, - outbuf[15], outbuf[16], outbuf[17], outbuf[18], + outbuf[10], outbuf[11], outbuf[12], outbuf[13], # v3rho2lapl, v3rho2tau, - None, outbuf[20], + None, outbuf[14], # v3rhosigmalapl, v3rhosigmatau, - None, outbuf[22], + None, outbuf[15], # v3rholapl2, v3rholapltau, v3rhotau2, - None, None, outbuf[25], + None, None, outbuf[16], # v3sigma2lapl, v3sigma2tau, - None, outbuf[27], + None, outbuf[17], # v3sigmalapl2, v3sigmalapltau, v3sigmatau2, - None, None, outbuf[30], + None, None, outbuf[18], # v3lapl3, v3lapl2tau, v3lapltau2, v3tau3) - None, None, None, outbuf[34]] - elif xctype == 'U-MGGA': + None, None, None, outbuf[19]] + elif xctype == 'MGGA' and spin == 1: if deriv > 0: - vxc = [outbuf[1:3].T, outbuf[3:6].T, None, outbuf[8:10].T] + vxc = [outbuf[1:3].T, outbuf[3:6].T, None, outbuf[6:8].T] if deriv > 1: # v2lapltau might not be strictly 0 # outbuf[39:43] = 0 fxc = [ # v2rho2, v2rhosigma, v2sigma2, - outbuf[10:13].T, outbuf[13:19].T, outbuf[19:25].T, + outbuf[8:11].T, outbuf[11:17].T, outbuf[17:23].T, # v2lapl2, v2tau2, - None, outbuf[28:31].T, + None, outbuf[33:36].T, # v2rholapl, v2rhotau, - None, outbuf[35:39].T, + None, outbuf[23:27].T, # v2lapltau, v2sigmalapl, v2sigmatau, - None, None, outbuf[49:55].T] + None, None, outbuf[27:33].T] if deriv > 2: # v3lapltau2 might not be strictly 0 # outbuf[204:216] = 0 kxc = [ # v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, - outbuf[55:59].T, outbuf[59:68].T, outbuf[68:80].T, outbuf[80:90].T, + outbuf[36:40].T, outbuf[40:49].T, outbuf[49:61].T, outbuf[61:71].T, # v3rho2lapl, v3rho2tau, - None, outbuf[96:102].T, + None, outbuf[71:77].T, # v3rhosigmalapl, v3rhosigmatau, - None, outbuf[114:126].T, + None, outbuf[77:89].T, # v3rholapl2, v3rholapltau, v3rhotau2, - None, None, outbuf[140:146].T, + None, None, outbuf[89:95].T, # v3sigma2lapl, v3sigma2tau, - None, outbuf[158:170].T, + None, outbuf[95:107].T, # v3sigmalapl2, v3sigmalapltau, v3sigmatau2, - None, None, outbuf[191:200].T, + None, None, outbuf[107:116].T, # v3lapl3, v3lapl2tau, v3lapltau2, v3tau3) - None, None, None, outbuf[216:220].T] + None, None, None, outbuf[116:120].T] return exc, vxc, fxc, kxc +_GGA_SORT = { + (1, 2): numpy.array([ + 6, 7, 9, 10, 11, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, + ]), + (1, 3): numpy.array([ + 21, 22, 25, 26, 27, 23, 28, 29, 30, 34, 35, 36, 37, 38, 39, 24, 31, 32, + 33, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + ]), + (1, 4): numpy.array([ + 56, 57, 61, 62, 63, 58, 64, 65, 66, 73, 74, 75, 76, 77, 78, 59, 67, 68, + 69, 79, 80, 81, 82, 83, 84, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 60, + 70, 71, 72, 85, 86, 87, 88, 89, 90, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 123, 124, 125, + ]) +} +_MGGA_SORT = { + (0, 2): numpy.array([ + 0, # v2rho2 + 1, # v2rhosigma + 3, # v2rhotau + 2, # v2sigma2 + 4, # v2sigmatau + 5, # v2tau2 + ]) + 4, + (0, 3): numpy.array([ + 0, # v3rho3 + 1, # v3rho2sigma + 4, # v3rho2tau + 2, # v3rhosigma2 + 5, # v3rhosigmatau + 6, # v3rhotau2 + 3, # v3sigma3 + 7, # v3sigma2tau + 8, # v3sigmatau2 + 9, # v3tau3 + ]) + 10, + (0, 4): numpy.array([ + 0, # v4rho4 + 1, # v4rho3sigma + 5, # v4rho3tau + 2, # v4rho2sigma2 + 6, # v4rho2sigmatau + 7, # v4rho2tau2 + 3, # v4rhosigma3 + 8, # v4rhosigma2tau + 9, # v4rhosigmatau2 + 10, # v4rhotau3 + 4, # v4sigma4 + 11, # v4sigma3tau + 12, # v4sigma2tau2 + 13, # v4sigmatau3 + 14, # v4tau4 + ]) + 20, + (1, 2): numpy.array([ + 8, 9, 11, 12, 13, 23, 24, 10, 14, 15, 16, 25, 26, 17, 18, 19, 27, 28, + 20, 21, 29, 30, 22, 31, 32, 33, 34, 35, + ]), + (1, 3): numpy.array([ + 36, 37, 40, 41, 42, 71, 72, 38, 43, 44, 45, 73, 74, 49, 50, 51, 77, 78, + 52, 53, 79, 80, 54, 81, 82, 89, 90, 91, 39, 46, 47, 48, 75, 76, 55, 56, + 57, 83, 84, 58, 59, 85, 86, 60, 87, 88, 92, 93, 94, 61, 62, 63, 95, 96, + 64, 65, 97, 98, 66, 99, 100, 107, 108, 109, 67, 68, 101, 102, 69, 103, + 104, 110, 111, 112, 70, 105, 106, 113, 114, 115, 116, 117, 118, 119, + ]), + (1, 4): numpy.array([ + 120, 121, 125, 126, 127, 190, 191, 122, 128, 129, 130, 192, 193, 137, + 138, 139, 198, 199, 140, 141, 200, 201, 142, 202, 203, 216, 217, 218, + 123, 131, 132, 133, 194, 195, 143, 144, 145, 204, 205, 146, 147, 206, + 207, 148, 208, 209, 219, 220, 221, 155, 156, 157, 225, 226, 158, 159, + 227, 228, 160, 229, 230, 249, 250, 251, 161, 162, 231, 232, 163, 233, + 234, 252, 253, 254, 164, 235, 236, 255, 256, 257, 267, 268, 269, 270, + 124, 134, 135, 136, 196, 197, 149, 150, 151, 210, 211, 152, 153, 212, + 213, 154, 214, 215, 222, 223, 224, 165, 166, 167, 237, 238, 168, 169, + 239, 240, 170, 241, 242, 258, 259, 260, 171, 172, 243, 244, 173, 245, + 246, 261, 262, 263, 174, 247, 248, 264, 265, 266, 271, 272, 273, 274, + 175, 176, 177, 275, 276, 178, 179, 277, 278, 180, 279, 280, 295, 296, + 297, 181, 182, 281, 282, 183, 283, 284, 298, 299, 300, 184, 285, 286, + 301, 302, 303, 313, 314, 315, 316, 185, 186, 287, 288, 187, 289, 290, + 304, 305, 306, 188, 291, 292, 307, 308, 309, 317, 318, 319, 320, 189, + 293, 294, 310, 311, 312, 321, 322, 323, 324, 325, 326, 327, 328, 329, + ]) +} + +def eval_xc1(xc_code, rho, spin=0, deriv=1, omega=None): + '''Similar to eval_xc. + Returns an array with the order of derivatives following xcfun convention. + ''' + out = _eval_xc(xc_code, rho, spin, deriv=deriv, omega=omega) + xctype = xc_type(xc_code) + if deriv <= 1: + return out + elif xctype == 'LDA' or xctype == 'HF': + return out + elif xctype == 'GGA': + if spin == 0: + return out + else: + idx = [numpy.arange(6)] # up to deriv=1 + for i in range(2, deriv+1): + idx.append(_GGA_SORT[(spin, i)]) + else: # MGGA + if spin == 0: + idx = [numpy.arange(4)] # up to deriv=1 + else: + idx = [numpy.arange(8)] # up to deriv=1 + for i in range(2, deriv+1): + idx.append(_MGGA_SORT[(spin, i)]) + return out[numpy.hstack(idx)] + +def _eval_xc(xc_code, rho, spin=0, deriv=1, omega=None): + assert deriv <= max_deriv_order(xc_code) + xctype = xc_type(xc_code) + assert xctype in ('HF', 'LDA', 'GGA', 'MGGA') + + rho = numpy.asarray(rho, order='C', dtype=numpy.double) + if xctype == 'MGGA' and rho.shape[-2] == 6: + rho = numpy.asarray(rho[...,[0,1,2,3,5],:], order='C') + + hyb, fn_facs = parse_xc(xc_code) + if omega is not None: + hyb = hyb[:2] + (float(omega),) + + fn_ids = [x[0] for x in fn_facs] + facs = [x[1] for x in fn_facs] + if hyb[2] != 0: + # Current implementation does not support different omegas for + # different RSH functionals if there are multiple RSHs + omega = [hyb[2]] * len(facs) + else: + omega = [0] * len(facs) + fn_ids_set = set(fn_ids) + if fn_ids_set.intersection(PROBLEMATIC_XC): + problem_xc = [PROBLEMATIC_XC[k] + for k in fn_ids_set.intersection(PROBLEMATIC_XC)] + warnings.warn('Libxc functionals %s may have discrepancy to xcfun ' + 'library.\n' % problem_xc) + + if any([needs_laplacian(fid) for fid in fn_ids]): + raise NotImplementedError('laplacian in meta-GGA method') + + nvar, xlen = xc_deriv._XC_NVAR[xctype, spin] + ngrids = rho.shape[-1] + rho = rho.reshape(spin+1,nvar,ngrids) + outlen = lib.comb(xlen+deriv, deriv) + out = numpy.zeros((outlen,ngrids)) + n = len(fn_ids) + if n > 0: + density_threshold = 0 + _itrf.LIBXC_eval_xc(ctypes.c_int(n), + (ctypes.c_int*n)(*fn_ids), + (ctypes.c_double*n)(*facs), + (ctypes.c_double*n)(*omega), + ctypes.c_int(spin), ctypes.c_int(deriv), + ctypes.c_int(nvar), ctypes.c_int(ngrids), + ctypes.c_int(outlen), + rho.ctypes.data_as(ctypes.c_void_p), + out.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(density_threshold)) + return out + +def eval_xc_eff(xc_code, rho, deriv=1, omega=None): + r'''Returns the derivative tensor against the density parameters + + [density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a] + + or spin-polarized density parameters + + [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], + [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. + + It differs from the eval_xc method in the derivatives of non-local part. + The eval_xc method returns the XC functional derivatives to sigma + (|\nabla \rho|^2) + + Args: + rho: 2-dimensional or 3-dimensional array + Total density or (spin-up, spin-down) densities (and their + derivatives if GGA or MGGA functionals) on grids + + Kwargs: + deriv: int + derivative orders + omega: float + define the exponent in the attenuated Coulomb for RSH functional + ''' + xctype = xc_type(xc_code) + rho = numpy.asarray(rho, order='C', dtype=numpy.double) + if xctype == 'MGGA' and rho.shape[-2] == 6: + rho = numpy.asarray(rho[...,[0,1,2,3,5],:], order='C') + + spin_polarized = rho.ndim >= 2 and rho.shape[0] == 2 + if spin_polarized: + spin = 1 + else: + spin = 0 + out = eval_xc1(xc_code, rho, spin, deriv, omega) + return xc_deriv.transform_xc(rho, out, xctype, spin, deriv) def define_xc_(ni, description, xctype='LDA', hyb=0, rsh=(0,0,0)): '''Define XC functional. See also :func:`eval_xc` for the rules of input description. diff --git a/pyscf/dft/numint.py b/pyscf/dft/numint.py index 6e61dddda6..2269f94d71 100644 --- a/pyscf/dft/numint.py +++ b/pyscf/dft/numint.py @@ -2553,13 +2553,14 @@ def cache_xc_kernel(ni, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, ao_deriv = 2 if MGGA_DENSITY_LAPL else 1 else: ao_deriv = 0 + with_lapl = MGGA_DENSITY_LAPL if mo_coeff[0].ndim == 1: # RKS nao = mo_coeff.shape[0] rho = [] for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory=max_memory): - rho.append(ni.eval_rho2(mol, ao, mo_coeff, mo_occ, mask, xctype)) + rho.append(ni.eval_rho2(mol, ao, mo_coeff, mo_occ, mask, xctype, with_lapl)) rho = numpy.hstack(rho) if spin == 1: # RKS with nr_rks_fxc_st rho *= .5 @@ -2572,8 +2573,8 @@ def cache_xc_kernel(ni, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, rhob = [] for ao, mask, weight, coords \ in ni.block_loop(mol, grids, nao, ao_deriv, max_memory=max_memory): - rhoa.append(ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype)) - rhob.append(ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype)) + rhoa.append(ni.eval_rho2(mol, ao, mo_coeff[0], mo_occ[0], mask, xctype, with_lapl)) + rhob.append(ni.eval_rho2(mol, ao, mo_coeff[1], mo_occ[1], mask, xctype, with_lapl)) rho = (numpy.hstack(rhoa), numpy.hstack(rhob)) vxc, fxc = ni.eval_xc_eff(xc_code, rho, deriv=2, xctype=xctype)[1:3] return rho, vxc, fxc @@ -2681,42 +2682,25 @@ def eval_xc_eff(self, xc_code, rho, deriv=1, omega=None, xctype=None, ''' if omega is None: omega = self.omega if xctype is None: xctype = self._xc_type(xc_code) - rhop = numpy.asarray(rho) - if xctype == 'LDA': - spin_polarized = rhop.ndim >= 2 - else: - spin_polarized = rhop.ndim == 3 + rho = numpy.asarray(rho, order='C', dtype=numpy.double) + if xctype == 'MGGA' and rho.shape[-2] == 6: + rho = numpy.asarray(rho[...,[0,1,2,3,5],:], order='C') + spin_polarized = rho.ndim >= 2 and rho.shape[0] == 2 if spin_polarized: - assert rhop.shape[0] == 2 spin = 1 - if rhop.shape[1] == 5: # MGGA - ngrids = rhop.shape[2] - rhop = numpy.empty((2, 6, ngrids)) - rhop[0,:4] = rho[0][:4] - rhop[1,:4] = rho[1][:4] - rhop[:,4] = 0 - rhop[0,5] = rho[0][4] - rhop[1,5] = rho[1][4] else: spin = 0 - if rhop.shape[0] == 5: # MGGA - ngrids = rho.shape[1] - rhop = numpy.empty((6, ngrids)) - rhop[:4] = rho[:4] - rhop[4] = 0 - rhop[5] = rho[4] - - exc, vxc, fxc, kxc = self.eval_xc(xc_code, rhop, spin, 0, deriv, omega, - verbose) - if deriv > 2: - kxc = xc_deriv.transform_kxc(rhop, fxc, kxc, xctype, spin) - if deriv > 1: - fxc = xc_deriv.transform_fxc(rhop, vxc, fxc, xctype, spin) - if deriv > 0: - vxc = xc_deriv.transform_vxc(rhop, vxc, xctype, spin) - return exc, vxc, fxc, kxc + + out = self.libxc.eval_xc1(xc_code, rho, spin, deriv, omega) + evfk = [out[0]] + for order in range(1, deriv+1): + evfk.append(xc_deriv.transform_xc(rho, out, xctype, spin, order)) + if deriv < 3: + # The return has at least [e, v, f, k] terms + evfk.extend([None] * (3 - deriv)) + return evfk def _xc_type(self, xc_code): return self.libxc.xc_type(xc_code) diff --git a/pyscf/dft/numint2c.py b/pyscf/dft/numint2c.py index 4939542484..f34bc6b892 100644 --- a/pyscf/dft/numint2c.py +++ b/pyscf/dft/numint2c.py @@ -271,41 +271,29 @@ def _eval_xc_eff(ni, xc_code, rho, deriv=1, omega=None, xctype=None, else: raise RuntimeError(f'Unknown collinear scheme {ni.collinear}') - if xctype == 'MGGA': - ngrids = rho.shape[2] - rhop = np.empty((2, 6, ngrids)) - rhop[0,:4] = (t[:4] + s[:4]) * .5 - rhop[1,:4] = (t[:4] - s[:4]) * .5 - # Padding for laplacian - rhop[:,4] = 0 - rhop[0,5] = (t[4] + s[4]) * .5 - rhop[1,5] = (t[4] - s[4]) * .5 - else: - rhop = np.stack([(t + s) * .5, (t - s) * .5]) + rho = np.stack([(t + s) * .5, (t - s) * .5]) + if xctype == 'MGGA' and rho.shape[1] == 6: + rho = np.asarray(rho[:,[0,1,2,3,5],:], order='C') spin = 1 - exc, vxc, fxc, kxc = ni.eval_xc(xc_code, rhop, spin, 0, deriv, omega, - verbose) - if deriv > 2: - kxc = xc_deriv.transform_kxc(rhop, fxc, kxc, xctype, spin) - if deriv > 1: - fxc = xc_deriv.transform_fxc(rhop, vxc, fxc, xctype, spin) - if deriv > 0: - vxc = xc_deriv.transform_vxc(rhop, vxc, xctype, spin) - return exc, vxc, fxc, kxc + out = ni.libxc.eval_xc1(xc_code, rho, spin, deriv, omega) + evfk = [out[0]] + for order in range(1, deriv+1): + evfk.append(xc_deriv.transform_xc(rho, out, xctype, spin, order)) + if deriv < 3: + # The return has at least [e, v, f, k] terms + evfk.extend([None] * (3 - deriv)) + return evfk # * Mcfun requires functional derivatives to total-density and spin-density. # * Make it a global function than a closure so as to be callable by multiprocessing def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv): - exc, vxc, fxc, kxc = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype) - if deriv > 0: - vxc = xc_deriv.ud2ts(vxc) - if deriv > 1: - fxc = xc_deriv.ud2ts(fxc) - if deriv > 2: - kxc = xc_deriv.ud2ts(kxc) - return exc, vxc, fxc, kxc + evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype) + for order in range(1, deriv+1): + if evfk[order] is not None: + evfk[order] = xc_deriv.ud2ts(evfk[order]) + return evfk def mcfun_eval_xc_adapter(ni, xc_code): '''Wrapper to generate the eval_xc function required by mcfun''' @@ -566,9 +554,9 @@ def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, ao_deriv = 0 n2c = mo_coeff.shape[0] nao = n2c // 2 + with_lapl = numint.MGGA_DENSITY_LAPL if self.collinear[0] in ('m', 'n'): # mcol or ncol - with_lapl = False rho = [] for ao, mask, weight, coords \ in self.block_loop(mol, grids, nao, ao_deriv, max_memory): @@ -587,7 +575,6 @@ def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, dm_a = dm[:nao,:nao].real.copy('C') dm_b = dm[nao:,nao:].real.copy('C') ni = self._to_numint1c() - with_lapl = True hermi = 1 rhoa = [] rhob = [] diff --git a/pyscf/dft/test/test_xc_deriv.py b/pyscf/dft/test/test_xc_deriv.py index b3e293f63c..03655f45e1 100644 --- a/pyscf/dft/test/test_xc_deriv.py +++ b/pyscf/dft/test/test_xc_deriv.py @@ -117,6 +117,59 @@ def v5to6(v5): v6[:,[0,1,2,3,5]] = v5 return v6 +def eval_xc_eff(xc_code, rho, deriv, mod): + xctype = mod.xc_type(xc_code) + rhop = np.asarray(rho) + + if xctype == 'LDA': + spin_polarized = rhop.ndim >= 2 + else: + spin_polarized = rhop.ndim == 3 + + if spin_polarized: + assert rhop.shape[0] == 2 + spin = 1 + if rhop.ndim == 3 and rhop.shape[1] == 5: # MGGA + ngrids = rhop.shape[2] + rhop = np.empty((2, 6, ngrids)) + rhop[0,:4] = rho[0][:4] + rhop[1,:4] = rho[1][:4] + rhop[:,4] = 0 + rhop[0,5] = rho[0][4] + rhop[1,5] = rho[1][4] + else: + spin = 0 + if rhop.ndim == 2 and rhop.shape[0] == 5: # MGGA + ngrids = rho.shape[1] + rhop = np.empty((6, ngrids)) + rhop[:4] = rho[:4] + rhop[4] = 0 + rhop[5] = rho[4] + + exc, vxc, fxc, kxc = mod.eval_xc(xc_code, rhop, spin, 0, deriv) + if deriv > 2: + kxc = xc_deriv.transform_kxc(rhop, fxc, kxc, xctype, spin) + if deriv > 1: + fxc = xc_deriv.transform_fxc(rhop, vxc, fxc, xctype, spin) + if deriv > 0: + vxc = xc_deriv.transform_vxc(rhop, vxc, xctype, spin) + return exc, vxc, fxc, kxc + +def setUpModule(): + global rho + rho = np.array( + [[[ 0.17283732, 0.17272921, 0.17244017, 0.17181541, 0.17062690], + [-0.01025988,-0.02423402,-0.04315779,-0.06753381,-0.09742367], + [ 0.00219947, 0.00222727, 0.00226589, 0.00231774, 0.00238570], + [ 0.00151577, 0.00153381, 0.00155893, 0.00159277, 0.00163734], + [ 0.00323925, 0.00386831, 0.00520072, 0.00774571, 0.01218266]], + [[ 0.17443331, 0.17436845, 0.17413969, 0.17359613, 0.17251427], + [-0.00341093,-0.01727580,-0.03605226,-0.06023877,-0.08989467], + [ 0.00357202, 0.00361952, 0.00368537, 0.00377355, 0.00388873], + [ 0.00233614, 0.00236578, 0.00240683, 0.00246173, 0.00253334], + [ 0.00473343, 0.00533707, 0.00663345, 0.00912920, 0.01350123],]] + ) + class KnownValues(unittest.TestCase): def test_gga_deriv1(self): ng = 7 @@ -267,6 +320,217 @@ def test_ud2ts(self): self.assertAlmostEqual(abs(xc_deriv.ud2ts(f_ud) - f_ts).max(), 0, 12) self.assertAlmostEqual(abs(xc_deriv.ud2ts(k_ud) - k_ts).max(), 0, 12) + def test_libxc_lda_deriv3(self): + rho1 = rho[:,0].copy() + ref = eval_xc_eff('LDA,', rho1, 3, dft.libxc) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 51.36053114469969, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -13.323225829690143, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -6.912554696220437, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1,0].copy() + ref = eval_xc_eff('LDA,', rho1, 3, dft.libxc) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 20.21333987261437, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -5.269784014086463, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -2.7477984980958627, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + def test_libxc_gga_deriv3(self): + rho1 = rho[:,:4].copy() + ref = eval_xc_eff('PBE', rho1, 3, dft.libxc) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 61.29042037001073, 3) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -13.896034377219816, 4) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -7.616226587554259, 6) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1,:4].copy() + ni = numint.NumInt() + ref = eval_xc_eff('PBE', rho1, 3, dft.libxc) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 26.08081046374974, 3) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -5.559303849017572, 4) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -3.0715856471099032, 6) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + def test_libxc_mgga_deriv3(self): + rho1 = rho + ref = eval_xc_eff('M06', rho1, 3, dft.libxc) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 3461867.985594323, 1) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -19196.865088253828, 3) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), 90.99262909378264, 6) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1] + ni = numint.NumInt() + ref = eval_xc_eff('M06', rho1, 3, dft.libxc) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 2506574.915698602, 1) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -9308.64852580393, 3) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), 19.977512805950784, 7) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + @unittest.skip(dft.libxc.max_deriv_order('pbe,') <= 3) + def test_libxc_gga_deriv4(self): + rho1 = rho[:,:4].copy() + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=4) + self.assertAlmostEqual(xc1.sum(), -920.135878252819, 4) + + rho1 = rho[1,:4].copy() + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=4) + self.assertAlmostEqual(xc1.sum(), -869.6617638095072, 4) + + @unittest.skip(not hasattr(dft, 'xcfun')) + def test_xcfun_lda_deriv3(self): + rho1 = rho[:,0].copy() + ref = eval_xc_eff('LDA,', rho1, 3, dft.xcfun) + + xc1 = dft.xcfun.eval_xc_eff('LDA,', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 51.36053114469969, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('LDA,', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -13.323225829690143, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('LDA,', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -6.912554696220437, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1,0].copy() + ref = eval_xc_eff('LDA,', rho1, 3, dft.xcfun) + + xc1 = dft.xcfun.eval_xc_eff('LDA,', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 20.21333987261437, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('LDA,', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -5.269784014086463, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('LDA,', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -2.7477984980958627, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + @unittest.skip(not hasattr(dft, 'xcfun')) + def test_xcfun_gga_deriv3(self): + rho1 = rho[:,:4].copy() + ref = eval_xc_eff('PBE', rho1, 3, dft.xcfun) + + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 61.29042037001073, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -13.896034377219816, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -7.616226587554259, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1,:4].copy() + ref = eval_xc_eff('PBE', rho1, 3, dft.xcfun) + + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 26.08081046374974, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -5.559303849017572, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -3.0715856471099032, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + @unittest.skip(not hasattr(dft, 'xcfun')) + def test_xcfun_mgga_deriv3(self): + rho1 = rho + ref = eval_xc_eff('M06', rho1, 3, dft.xcfun) + + xc1 = dft.xcfun.eval_xc_eff('M06', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 3461867.985594323, 5) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('M06', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -19196.865088253828, 5) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('M06', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), 90.99262909378264, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1] + ref = eval_xc_eff('M06', rho1, 3, dft.xcfun) + + xc1 = dft.xcfun.eval_xc_eff('M06', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 2506574.915698602, 5) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('M06', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -9308.64852580393, 5) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.xcfun.eval_xc_eff('M06', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), 19.977512805950784, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + @unittest.skip(not (hasattr(dft, 'xcfun') and dft.xcfun.MAX_DERIV_ORDER > 3)) + def test_xcfun_gga_deriv4(self): + rho1 = rho[:,:4].copy() + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=4) + self.assertAlmostEqual(xc1.sum(), -920.135878252819, 9) + + rho1 = rho[1,:4].copy() + xc1 = dft.xcfun.eval_xc_eff('PBE', rho1, deriv=4) + self.assertAlmostEqual(xc1.sum(), -869.6617638095072, 9) + if __name__ == "__main__": print("Test xc_deriv") unittest.main() diff --git a/pyscf/dft/test/test_xcfun.py b/pyscf/dft/test/test_xcfun.py index 70bf9e7da1..c2c860f221 100644 --- a/pyscf/dft/test/test_xcfun.py +++ b/pyscf/dft/test/test_xcfun.py @@ -244,8 +244,15 @@ def test_vs_libxc_rks(self): rho = dft.numint.eval_rho(mol, ao, dm, xctype='MGGA', with_lapl=True) rhoa = rho[:,:200] def check(xc_code, deriv=3, e_place=9, v_place=8, f_place=6, k_place=4): - exc0, vxc0, fxc0, kxc0 = dft.libxc.eval_xc(xc_code, rhoa, 0, deriv=deriv) - exc1, vxc1, fxc1, kxc1 = dft.xcfun.eval_xc(xc_code, rhoa, 0, deriv=deriv) + xctype = dft.libxc.xc_type(xc_code) + if xctype == 'LDA': + nv = 1 + elif xctype == 'GGA': + nv = 4 + else: + nv = 6 + exc0, vxc0, fxc0, kxc0 = dft.libxc.eval_xc(xc_code, rhoa[:nv], 0, deriv=deriv) + exc1, vxc1, fxc1, kxc1 = dft.xcfun.eval_xc(xc_code, rhoa[:nv], 0, deriv=deriv) self.assertAlmostEqual(abs(exc0-exc1).max(), 0, e_place) if deriv > 0: for v0, v1 in zip(vxc0, vxc1): @@ -338,8 +345,15 @@ def test_vs_libxc_uks(self): rhoa = rho[:,:200] rhob = rhoa + rho[:,200:400] def check(xc_code, deriv=3, e_place=9, v_place=8, f_place=6, k_place=4): - exc0, vxc0, fxc0, kxc0 = dft.libxc.eval_xc(xc_code, (rhoa, rhob), 1, deriv=deriv) - exc1, vxc1, fxc1, kxc1 = dft.xcfun.eval_xc(xc_code, (rhoa, rhob), 1, deriv=deriv) + xctype = dft.libxc.xc_type(xc_code) + if xctype == 'LDA': + nv = 1 + elif xctype == 'GGA': + nv = 4 + else: + nv = 6 + exc0, vxc0, fxc0, kxc0 = dft.libxc.eval_xc(xc_code, (rhoa[:nv], rhob[:nv]), 1, deriv=deriv) + exc1, vxc1, fxc1, kxc1 = dft.xcfun.eval_xc(xc_code, (rhoa[:nv], rhob[:nv]), 1, deriv=deriv) self.assertAlmostEqual(abs(exc0-exc1).max(), 0, e_place) if deriv > 0: for v0, v1 in zip(vxc0, vxc1): diff --git a/pyscf/dft/xc_deriv.py b/pyscf/dft/xc_deriv.py index 408a0c25ca..59d4c4d905 100644 --- a/pyscf/dft/xc_deriv.py +++ b/pyscf/dft/xc_deriv.py @@ -19,6 +19,9 @@ Transform XC functional derivatives between different representations ''' +import math +import itertools +from functools import lru_cache import ctypes import numpy as np from pyscf import lib @@ -449,100 +452,201 @@ def _stack_fggg(fggg, axis=0, rho=None): fggg = _stack_fg(fggg, axis=axis+1, rho=rho) return _stack_fg(fggg, axis=axis, rho=rho) -def compress(vp, spin=0): - if spin != 0: # spin polarized - shape = vp.shape - comp_shape = [shape[i*2]*shape[i*2+1] for i in range(0, vp.ndim-1, 2)] - vp = vp.reshape(comp_shape + [shape[-1]]) - - order = vp.ndim - 1 - if order < 2: - pass - elif order == 2: # 2nd derivatives - vp = vp[np.tril_indices(vp.shape[0])] - elif order == 3: # 2nd derivatives - nd = vp.shape[0] - vp = np.vstack([vp[i][np.tril_indices(i+1)] for i in range(nd)]) - else: - raise NotImplementedError('High order derivatives') - return vp +@lru_cache(100) +def _product_uniq_indices(nvars, order): + ''' + Indexing the symmetry unique elements in cartensian product + ''' + # n = 0 + # for i range(nvars): + # for j in range(i, nvars): + # for k in range(k, nvars): + # ... + # prod[(i,j,k,...)] = n + # n += 1 + prod = np.ones([nvars] * order, dtype=int) + uniq_idx = list(itertools.combinations_with_replacement(range(nvars), order)) + prod[tuple(np.array(uniq_idx).T)] = np.arange(len(uniq_idx)) + + idx = np.where(prod) + sidx = np.sort(idx, axis=0) + prod[idx] = prod[tuple(sidx)] + return prod + +def _pair_combinations(lst): + n = len(lst) + if n <= 2: + return [[tuple(lst)]] + a = lst[0] + results = [] + for i in range(1, n): + pair = (a, lst[i]) + rests = _pair_combinations(lst[1:i]+lst[i+1:]) + for rest in rests: + rest.append(pair) + results.extend(rests) + return results + +def _unfold_gga(rho, xc_val, spin, order, nvar, xlen, reserve=0): + assert nvar >= 4 + ngrids = rho.shape[-1] + n_transform = order - reserve -def decompress(vp, spin=0): - order = vp.ndim - 1 - ngrids = vp.shape[-1] - if order < 2: - out = vp - elif order == 2: # 2nd derivatives - nd = vp.shape[0] - out = np.empty((nd, nd, ngrids)) - idx, idy = np.tril_indices(nd) - out[idx,idy] = vp - out[idy,idx] = vp - elif order == 3: # 2nd derivatives - nd = vp.shape[0] - out = np.empty((nd, nd, nd, ngrids)) - p1 = 0 - for i in range(nd): - idx, idy = np.tril_indices(i+1) - p0, p1 = p1, p1+idx.size - out[i,idx,idy] = vp[p0:p1] - out[i,idy,idx] = vp[p0:p1] - out[idx,i,idy] = vp[p0:p1] - out[idy,i,idx] = vp[p0:p1] - out[idx,idy,i] = vp[p0:p1] - out[idy,idx,i] = vp[p0:p1] + if spin == 0: + nvar2 = nvar + drv = libdft.VXCunfold_sigma_spin0 + else: + nvar2 = nvar * 2 + drv = libdft.VXCunfold_sigma_spin1 + + # xc_val[idx] expands unique xc elements to n-order tensors + idx = _product_uniq_indices(xlen, order) + xc_tensor = np.empty((xlen**reserve * nvar2**n_transform, ngrids)) + xc_tensor[:idx.size] = xc_val[idx.ravel()] + buf = np.empty_like(xc_tensor) + for i in range(n_transform): + xc_tensor, buf = buf, xc_tensor + ncounts = xlen**(order-1-i) * nvar2**i + drv(xc_tensor.ctypes, buf.ctypes, rho.ctypes, + ctypes.c_int(ncounts), ctypes.c_int(nvar), ctypes.c_int(ngrids)) + if spin == 0: + xc_tensor = xc_tensor.reshape([xlen]*reserve + [nvar]*n_transform + [ngrids]) else: - raise NotImplementedError('High order derivatives') + xc_tensor = xc_tensor.reshape([xlen]*reserve + [2,nvar]*n_transform + [ngrids]) + return xc_tensor + +def _diagonal_indices(idx, order): + assert order > 0 + n = len(idx) + diag_idx = [np.asarray(idx)] + for i in range(1, order): + last_dim = diag_idx[-1] + diag_idx = [np.repeat(idx, n) for x in diag_idx] + diag_idx.append(np.tile(last_dim, n)) + # repeat(diag_idx, 2) + return tuple(x for x in diag_idx for i in range(2)) + +_XC_NVAR = { + ('HF', 0): (1, 1), + ('HF', 1): (1, 2), + ('LDA', 0): (1, 1), + ('LDA', 1): (1, 2), + ('GGA', 0): (4, 2), + ('GGA', 1): (4, 5), + ('MGGA', 0): (5, 3), + ('MGGA', 1): (5, 7), +} + +def transform_xc(rho, xc_val, xctype, spin, order): + '''General transformation to construct XC derivative tensor''' + xc_val = np.asarray(xc_val, order='C') + if order == 0: + return xc_val[0] + + nvar, xlen = _XC_NVAR[xctype, spin] + ngrids = xc_val.shape[-1] + offsets = [0] + [count_combinations(xlen, o) for o in range(order+1)] + p0, p1 = offsets[order:order+2] + if xctype == 'LDA' or xctype == 'HF': + xc_out = xc_val[p0:p1] + if spin == 0: + return xc_out.reshape([1]*order + [ngrids]) + else: + idx = _product_uniq_indices(xlen, order) + return xc_out[idx].reshape([2,1]*order + [ngrids]) - if spin != 0: # spin polarized - shape = out.shape - nvar = shape[0] // 2 - decomp_shape = [(2, nvar)] * order + [ngrids] - # reshape to (2,n,2,n,...,ngrids) - out = out.reshape(np.hstack(decomp_shape)) - return out + rho = np.asarray(rho, order='C') + assert rho.size == (spin+1) * nvar * ngrids + xc_tensor = _unfold_gga(rho, xc_val[p0:p1], spin, order, nvar, xlen) + nabla_idx = np.arange(1, 4) + if spin == 0: + for n_pairs in range(1, order//2+1): + p0, p1 = offsets[order-n_pairs:order-n_pairs+2] + xc_sub = _unfold_gga(rho, xc_val[p0:p1], spin, order-n_pairs, + nvar, xlen, n_pairs) + # Just the sigma components + xc_sub = xc_sub[(1,)*n_pairs] * 2**n_pairs + + # low_sigmas indicates the index for the extra sigma terms + low_sigmas = itertools.combinations(range(order), n_pairs*2) + pair_combs = [list(itertools.chain(*p[::-1])) + for p in _pair_combinations(list(range(n_pairs*2)))] + diag_idx = _diagonal_indices(nabla_idx, n_pairs) + + for dim_lst in low_sigmas: + rest_dims = [i for i in range(xc_tensor.ndim) if i not in dim_lst] + for pair_comb in pair_combs: + xc_tensor_1 = xc_tensor.transpose( + [dim_lst[i] for i in pair_comb] + rest_dims) + xc_tensor_1[diag_idx] += xc_sub + else: + for n_pairs in range(1, order//2+1): + p0, p1 = offsets[order-n_pairs:order-n_pairs+2] + xc_sub = _unfold_gga(rho, xc_val[p0:p1], spin, order-n_pairs, + nvar, xlen, n_pairs) + # Just the sigma components + xc_sub = xc_sub[(slice(2,5),) * n_pairs] + for i in range(n_pairs): + xc_sub[(slice(None),)*i+(0,)] *= 2 + xc_sub[(slice(None),)*i+(2,)] *= 2 + sigma_idx = _product_uniq_indices(2, n_pairs*2) + xc_sub = xc_sub.reshape((3**n_pairs,) + xc_sub.shape[n_pairs:]) + xc_sub = xc_sub[sigma_idx] + + low_sigmas = itertools.combinations(range(order), n_pairs*2) + pair_combs = [list(itertools.chain(*p[::-1])) + for p in _pair_combinations(list(range(n_pairs*2)))] + diag_idx = _diagonal_indices(nabla_idx, n_pairs) + + for dim_lst in low_sigmas: + dim_lst2 = np.array(dim_lst)*2 + np.array([1, 0])[:,None] + rest_dims = [i for i in range(xc_tensor.ndim) if i not in dim_lst2] + for pair_comb in pair_combs: + leading_dims = list(dim_lst2[:,pair_comb].ravel()) + xc_tensor_1 = xc_tensor.transpose(leading_dims + rest_dims) + xc_tensor_1[diag_idx] += xc_sub + return xc_tensor + +def count_combinations(nvar, order): + '''sum(len(combinations_with_replacement(range(nvar), o) for o in range(order))''' + return lib.comb(nvar+order, order) def ud2ts(v_ud): '''XC derivatives spin-up/spin-down representations to total-density/spin-density representations''' v_ud = np.asarray(v_ud, order='C') - nvar, ngrids = v_ud.shape[-2:] - #:c = np.array([[.5, .5], # vrho = (va + vb) / 2 - #: [.5, -.5]]) # vs = (va - vb) / 2 - if v_ud.ndim == 3: # vxc - #:v_ts = np.einsum('ra,axg->rxg', c, v_ud) - drv = libdft.VXCud2ts_deriv1 - elif v_ud.ndim == 5: # fxc - #:v_ts = np.einsum('ra,axbyg->rxbyg', c, v_ud) - #:v_ts = np.einsum('sb,rxbyg->rxsyg', c, v_ts) - drv = libdft.VXCud2ts_deriv2 - elif v_ud.ndim == 7: # kxc - #:v_ts = np.einsum('ra,axbyczg->rxbyczg', c, v_ud) - #:v_ts = np.einsum('sb,rxbyczg->rxsyczg', c, v_ts) - #:v_ts = np.einsum('tc,rxsyczg->rxsytzg', c, v_ts) - drv = libdft.VXCud2ts_deriv3 - else: - raise NotImplementedError(f'Shape {v_ud.shape} not supported') v_ts = np.empty_like(v_ud) - drv(v_ts.ctypes.data_as(ctypes.c_void_p), - v_ud.ctypes.data_as(ctypes.c_void_p), - ctypes.c_int(nvar), ctypes.c_int(ngrids)) + nvar, ngrids = v_ts.shape[-2:] + nvar2 = nvar * 2 + order = v_ud.ndim // 2 + drv = libdft.VXCud2ts + v_ts, v_ud = v_ud, v_ts + for i in range(order): + v_ts, v_ud = v_ud, v_ts + n = nvar2**i + nvg = nvar * nvar2**(order-1-i) * ngrids + #:c = np.array([[.5, .5], # vrho = (va + vb) / 2 + #: [.5, -.5]]) # vs = (va - vb) / 2 + #:v_ts = np.einsum('ra,...ax...g->...rx...g', c, v_ud) + drv(v_ts.ctypes, v_ud.ctypes, ctypes.c_int(n), ctypes.c_int(nvg)) return v_ts def ts2ud(v_ts): '''XC derivatives total-density/spin-density representations to spin-up/spin-down representations''' - c = np.array([[1, 1], # va = vrho + vs - [1, -1]]) # vb = vrho - vs - if v_ts.ndim == 3: # vxc - v_ud = np.einsum('ra,axg->rxg', c, v_ts) - elif v_ts.ndim == 5: # fxc - v_ud = np.einsum('ra,axbyg->rxbyg', c, v_ts) - v_ud = np.einsum('sb,rxbyg->rxsyg', c, v_ud) - elif v_ts.ndim == 7: # kxc - v_ud = np.einsum('ra,axbyczg->rxbyczg', c, v_ts) - v_ud = np.einsum('sb,rxbyczg->rxsyczg', c, v_ud) - v_ud = np.einsum('tc,rxsyczg->rxsytzg', c, v_ud) - else: - raise NotImplementedError(f'Shape {v_ts.shape} not supported') + v_ts = np.asarray(v_ts, order='C') + v_ud = np.empty_like(v_ts) + nvar, ngrids = v_ts.shape[-2:] + nvar2 = nvar * 2 + order = v_ud.ndim // 2 + drv = libdft.VXCts2ud + v_ts, v_ud = v_ud, v_ts + for i in range(order): + v_ts, v_ud = v_ud, v_ts + n = nvar2**i + nvg = nvar * nvar2**(order-1-i) * ngrids + #:c = np.array([[1, 1], # va = vrho + vs + #: [1, -1]]) # vb = vrho - vs + #:v_ud = np.einsum('ra,...ax...g->...rx...g', c, v_ts) + drv(v_ud.ctypes, v_ts.ctypes, ctypes.c_int(n), ctypes.c_int(nvg)) return v_ud diff --git a/pyscf/dft/xcfun.py b/pyscf/dft/xcfun.py index 20942094bf..8fd498b338 100644 --- a/pyscf/dft/xcfun.py +++ b/pyscf/dft/xcfun.py @@ -28,12 +28,14 @@ import numpy from pyscf import lib from pyscf.dft.xc.utils import remove_dup, format_xc_code +from pyscf.dft import xc_deriv from pyscf import __config__ _itrf = lib.load_library('libxcfun_itrf') _itrf.xcfun_splash.restype = ctypes.c_char_p _itrf.xcfun_version.restype = ctypes.c_char_p +_itrf.XCFUN_eval_xc.restype = ctypes.c_int __version__ = _itrf.xcfun_version().decode("UTF-8") __reference__ = _itrf.xcfun_splash().decode("UTF-8") @@ -246,11 +248,11 @@ 42, 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 66, 70, 75]) ''' HYB_XC = {'PBE0' , 'PBE1PBE' , 'B3PW91' , 'B3P86' , 'B3LYP' , - 'B3PW91G' , 'B3P86G' , 'B3LYPG' , 'O3LYP' , 'CAMB3LYP', - 'B97XC' , 'B97_1XC' , 'B97_2XC' , 'M05XC' , 'TPSSH' , - 'HFLYP'} + 'B3PW91G' , 'B3P86G' , 'B3LYPG' , 'O3LYP' , 'CAMB3LYP', + 'B97XC' , 'B97_1XC' , 'B97_2XC' , 'M05XC' , 'TPSSH' , + 'HFLYP'} RSH_XC = {'CAMB3LYP'} -MAX_DERIV_ORDER = 3 +MAX_DERIV_ORDER = ctypes.c_int.in_dll(_itrf, 'XCFUN_max_deriv_order').value VV10_XC = { 'B97M_V' : (6.0, 0.01), @@ -509,18 +511,6 @@ def parse_token(token, suffix, search_xc_alias=False): 'M06-2X' : 'M062X', 'E-' : 'E_'} # For scientific notation - -def eval_xc(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=None): - r'''Interface to call xcfun library to evaluate XC functional, potential - and functional derivatives. - - See also :func:`pyscf.dft.libxc.eval_xc` - ''' - hyb, fn_facs = parse_xc(xc_code) - if omega is not None: - hyb = hyb[:2] + (float(omega),) - return _eval_xc(hyb, fn_facs, rho, spin, relativity, deriv, verbose) - XC_D0 = 0 XC_D1 = 1 XC_D2 = 2 @@ -827,93 +817,38 @@ def eval_xc(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=Non XC_D0000012 = 118 XC_D0000003 = 119 -def _eval_xc(hyb, fn_facs, rho, spin=0, relativity=0, deriv=1, verbose=None): - assert (deriv < 4) - if spin == 0: - rho_u = rho_d = numpy.asarray(rho, order='C') - else: - rho_u = numpy.asarray(rho[0], order='C') - rho_d = numpy.asarray(rho[1], order='C') - assert (rho_u.dtype == numpy.double) - assert (rho_d.dtype == numpy.double) - - if rho_u.ndim == 1: - rho_u = rho_u.reshape(1,-1) - rho_d = rho_d.reshape(1,-1) - ngrids = rho_u.shape[1] - - fn_ids = [x[0] for x in fn_facs] - facs = [x[1] for x in fn_facs] - if hyb[2] != 0: - # Current implementation does not support different omegas for - # different functionals - omega = [hyb[2]] * len(facs) - else: - omega = [0] * len(facs) - - n = len(fn_ids) - if (n == 0 or # xc_code = '' or xc_code = 'HF', an empty functional - all((is_lda(x) for x in fn_ids))): # LDA - if spin == 0: - nvar = 1 - xctype = 'R-LDA' - else: - nvar = 2 - xctype = 'U-LDA' - elif any((is_meta_gga(x) for x in fn_ids)): - if spin == 0: - nvar = 3 - xctype = 'R-MGGA' - else: - nvar = 7 - xctype = 'U-MGGA' - else: # GGA - if spin == 0: - nvar = 2 - xctype = 'R-GGA' - else: - nvar = 5 - xctype = 'U-GGA' - outlen = (math.factorial(nvar+deriv) // - (math.factorial(nvar) * math.factorial(deriv))) - outbuf = numpy.zeros((ngrids,outlen)) +def eval_xc(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=None): + r'''Interface to call xcfun library to evaluate XC functional, potential + and functional derivatives. Return deriviates following libxc convention. - if n > 0: - _itrf.XCFUN_eval_xc(ctypes.c_int(n), - (ctypes.c_int*n)(*fn_ids), - (ctypes.c_double*n)(*facs), - (ctypes.c_double*n)(*omega), - ctypes.c_int(spin), - ctypes.c_int(deriv), ctypes.c_int(rho_u.shape[1]), - rho_u.ctypes.data_as(ctypes.c_void_p), - rho_d.ctypes.data_as(ctypes.c_void_p), - outbuf.ctypes.data_as(ctypes.c_void_p)) - - outbuf = lib.transpose(outbuf) + See also :func:`pyscf.dft.libxc.eval_xc` + ''' + outbuf = eval_xc1(xc_code, rho, spin, deriv, omega) exc = outbuf[0] vxc = fxc = kxc = None - if xctype == 'R-LDA': + xctype = xc_type(xc_code) + if xctype == 'LDA' and spin == 0: if deriv > 0: vxc = [outbuf[1]] if deriv > 1: fxc = [outbuf[2]] if deriv > 2: kxc = [outbuf[3]] - elif xctype == 'R-GGA': + elif xctype == 'GGA' and spin == 0: if deriv > 0: vxc = [outbuf[1], outbuf[2]] if deriv > 1: fxc = [outbuf[3], outbuf[4], outbuf[5]] if deriv > 2: kxc = [outbuf[6], outbuf[7], outbuf[8], outbuf[9]] - elif xctype == 'U-LDA': + elif xctype == 'LDA' and spin == 1: if deriv > 0: vxc = [outbuf[1:3].T] if deriv > 1: fxc = [outbuf[3:6].T] if deriv > 2: kxc = [outbuf[6:10].T] - elif xctype == 'U-GGA': + elif xctype == 'GGA' and spin == 1: if deriv > 0: vxc = [outbuf[1:3].T, outbuf[3:6].T] if deriv > 1: @@ -933,7 +868,7 @@ def _eval_xc(hyb, fn_facs, rho, spin=0, relativity=0, deriv=1, verbose=None): # MGGA/MLGGA: Note the MLGGA interface are not implemented. MGGA only needs 3 # input arguments. To make the interface compatible with libxc, treat MGGA as # MLGGA - elif xctype == 'R-MGGA': + elif xctype == 'MGGA' and spin == 0: if deriv > 0: vxc = [outbuf[1], outbuf[2], None, outbuf[3]] if deriv > 1: @@ -962,7 +897,7 @@ def _eval_xc(hyb, fn_facs, rho, spin=0, relativity=0, deriv=1, verbose=None): None, None, outbuf[XC_D012], # v3lapl3, v3lapl2tau, v3lapltau2, v3tau3) None, None, None, outbuf[XC_D003]] - elif xctype == 'U-MGGA': + elif xctype == 'MGGA' and spin == 1: if deriv > 0: vxc = (outbuf[1:3].T, outbuf[3:6].T, None, outbuf[6:8].T) if deriv > 1: @@ -1018,6 +953,88 @@ def _eval_xc(hyb, fn_facs, rho, spin=0, relativity=0, deriv=1, verbose=None): outbuf[[XC_D0000030,XC_D0000021,XC_D0000012,XC_D0000003]].T] return exc, vxc, fxc, kxc +def eval_xc1(xc_code, rho, spin=0, deriv=1, omega=None): + '''Similar to eval_xc. + Returns an array with the order of derivatives following xcfun convention. + ''' + assert deriv <= MAX_DERIV_ORDER + xctype = xc_type(xc_code) + assert xctype in ('HF', 'LDA', 'GGA', 'MGGA') + + rho = numpy.asarray(rho, order='C', dtype=numpy.double) + if xctype == 'MGGA' and rho.shape[-2] == 6: + rho = numpy.asarray(rho[...,[0,1,2,3,5],:], order='C') + + hyb, fn_facs = parse_xc(xc_code) + if omega is not None: + hyb = hyb[:2] + (float(omega),) + + fn_ids = [x[0] for x in fn_facs] + facs = [x[1] for x in fn_facs] + if hyb[2] != 0: + # Current implementation does not support different omegas for + # different RSH functionals if there are multiple RSHs + omega = [hyb[2]] * len(facs) + else: + omega = [0] * len(facs) + + nvar, xlen = xc_deriv._XC_NVAR[xctype, spin] + ngrids = rho.shape[-1] + rho = rho.reshape(spin+1,nvar,ngrids) + outlen = lib.comb(xlen+deriv, deriv) + out = numpy.zeros((ngrids,outlen)) + n = len(fn_ids) + if n > 0: + err = _itrf.XCFUN_eval_xc(ctypes.c_int(n), + (ctypes.c_int*n)(*fn_ids), + (ctypes.c_double*n)(*facs), + (ctypes.c_double*n)(*omega), + ctypes.c_int(spin), ctypes.c_int(deriv), + ctypes.c_int(nvar), ctypes.c_int(ngrids), + ctypes.c_int(outlen), + rho.ctypes.data_as(ctypes.c_void_p), + out.ctypes.data_as(ctypes.c_void_p)) + if err != 0: + raise RuntimeError(f'Failed to eval {xc_code} for deriv={deriv}') + return lib.transpose(out) + +def eval_xc_eff(xc_code, rho, deriv=1, omega=None): + r'''Returns the derivative tensor against the density parameters + + [density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a] + + or spin-polarized density parameters + + [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], + [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. + + It differs from the eval_xc method in the derivatives of non-local part. + The eval_xc method returns the XC functional derivatives to sigma + (|\nabla \rho|^2) + + Args: + rho: 2-dimensional or 3-dimensional array + Total density or (spin-up, spin-down) densities (and their + derivatives if GGA or MGGA functionals) on grids + + Kwargs: + deriv: int + derivative orders + omega: float + define the exponent in the attenuated Coulomb for RSH functional + ''' + xctype = xc_type(xc_code) + rho = numpy.asarray(rho, order='C', dtype=numpy.double) + if xctype == 'MGGA' and rho.shape[-2] == 6: + rho = numpy.asarray(rho[...,[0,1,2,3,5],:], order='C') + + spin_polarized = rho.ndim >= 2 and rho.shape[0] == 2 + if spin_polarized: + spin = 1 + else: + spin = 0 + out = eval_xc1(xc_code, rho, spin, deriv, omega) + return xc_deriv.transform_xc(rho, out, xctype, spin, deriv) def define_xc_(ni, description, xctype='LDA', hyb=0, rsh=(0,0,0)): '''Define XC functional. See also :func:`eval_xc` for the rules of input description. diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt index 91f6aba135..d04e6444dc 100644 --- a/pyscf/lib/CMakeLists.txt +++ b/pyscf/lib/CMakeLists.txt @@ -100,6 +100,10 @@ if(ENABLE_OPENMP) endif() endif() +if(NOT XCFUN_MAX_ORDER) + set(XCFUN_MAX_ORDER 3) +endif() + #find_package(PythonInterp REQUIRED) #find_package(PythonLibs REQUIRED) #execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import numpy; print(numpy.get_include())" @@ -215,17 +219,14 @@ endif() # ENABLE_LIBXC if(ENABLE_XCFUN AND BUILD_XCFUN) ExternalProject_Add(libxcfun - #GIT_REPOSITORY https://github.com/sunqm/xcfun.git - GIT_REPOSITORY https://github.com/fishjojo/xcfun.git - # copy of v2.1.1, downgrade to cmake 3.5 - GIT_TAG cmake-3.5 - # Range seperated parameters can be set in the python code. This patch to - # RSH omega is not needed anymore. - #PATCH_COMMAND patch -p1 < ${PROJECT_SOURCE_DIR}/libxcfun.patch + GIT_REPOSITORY https://github.com/dftlibs/xcfun.git + GIT_TAG a89b783 + # This patch adds the xcfun derivative order up to 5 + PATCH_COMMAND git apply ${PROJECT_SOURCE_DIR}/libxcfun.patch PREFIX ${PROJECT_BINARY_DIR}/deps INSTALL_DIR ${PROJECT_SOURCE_DIR}/deps CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBUILD_SHARED_LIBS=1 - -DXCFUN_MAX_ORDER=3 -DENABLE_TESTALL=0 + -DXCFUN_MAX_ORDER=${XCFUN_MAX_ORDER} -DENABLE_TESTALL=0 -DCMAKE_INSTALL_PREFIX:PATH= -DCMAKE_CXX_CREATE_SHARED_LIBRARY=${CXX_LINK_TEMPLATE} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/pyscf/lib/config.h.in b/pyscf/lib/config.h.in index 596ac76849..fe7d506188 100644 --- a/pyscf/lib/config.h.in +++ b/pyscf/lib/config.h.in @@ -6,4 +6,4 @@ #endif #cmakedefine HAVE_FFS - +#define XCFUN_MAX_DERIV_ORDER @XCFUN_MAX_ORDER@ diff --git a/pyscf/lib/dft/CMakeLists.txt b/pyscf/lib/dft/CMakeLists.txt index c7095a8cd0..6b01b7eca0 100644 --- a/pyscf/lib/dft/CMakeLists.txt +++ b/pyscf/lib/dft/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_library(dft SHARED +add_library(dft SHARED CxLebedevGrid.c grid_basis.c nr_numint.c r_numint.c numint_uniform_grid.c xc_deriv.c nr_numint_sparse.c ) diff --git a/pyscf/lib/dft/libxc_itrf.c b/pyscf/lib/dft/libxc_itrf.c index 60641e5e43..76d7497980 100644 --- a/pyscf/lib/dft/libxc_itrf.c +++ b/pyscf/lib/dft/libxc_itrf.c @@ -73,19 +73,108 @@ v3sigma(10) = (uu_uu_uu, uu_uu_ud, uu_uu_dd, uu_ud_ud, uu_ud_dd, uu_dd_dd, ud_ud_ud, ud_ud_dd, ud_dd_dd, dd_dd_dd) */ + +#define LDA_NVAR 1 +#define GGA_NVAR 4 +#define MGGA_NVAR 5 + /* * rho_u/rho_d = (den,grad_x,grad_y,grad_z,laplacian,tau) * In spin restricted case (spin == 1), rho_u is assumed to be the * spin-free quantities, rho_d is not used. */ -static void _eval_xc(xc_func_type *func_x, int spin, int np, - double *rho_u, double *rho_d, - double *ex, double *vxc, double *fxc, double *kxc) +static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) { int i; - double *rho, *sigma, *lapl, *tau; + double *sigma, *tau; double *gxu, *gyu, *gzu, *gxd, *gyd, *gzd; - double *lapl_u, *lapl_d, *tau_u, *tau_d; + double *tau_u, *tau_d; + double *rho_d = rho_u + np * nvar; + + switch (nvar) { + case LDA_NVAR: + if (spin == 1) { + for (i = 0; i < np; i++) { + rho[i*2+0] = rho_u[i]; + rho[i*2+1] = rho_d[i]; + } + } else { + for (i = 0; i < np; i++) { + rho[i] = rho_u[i]; + } + } + break; + case GGA_NVAR: + if (spin == 1) { + sigma = rho + np * 2; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + gxd = rho_d + np; + gyd = rho_d + np * 2; + gzd = rho_d + np * 3; + for (i = 0; i < np; i++) { + rho[i*2+0] = rho_u[i]; + rho[i*2+1] = rho_d[i]; + sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; + sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; + } + } else { + sigma = rho + np; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + for (i = 0; i < np; i++) { + rho[i] = rho_u[i]; + sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + } + } + break; + case MGGA_NVAR: + if (spin == 1) { + sigma = rho + np * 2; + tau = sigma + np * 3; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + gxd = rho_d + np; + gyd = rho_d + np * 2; + gzd = rho_d + np * 3; + tau_u = rho_u + np * 4; + tau_d = rho_d + np * 4; + for (i = 0; i < np; i++) { + rho[i*2+0] = rho_u[i]; + rho[i*2+1] = rho_d[i]; + tau[i*2+0] = tau_u[i]; + tau[i*2+1] = tau_d[i]; + } + for (i = 0; i < np; i++) { + sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; + sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; + } + } else { + sigma = rho + np; + tau = sigma + np; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + tau_u = rho_u + np * 4; + for (i = 0; i < np; i++) { + rho[i] = rho_u[i]; + sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + tau[i] = tau_u[i]; + } + } + break; + } +} +static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, + double *rho, double *exc) +{ + double *sigma, *tau; + double *lapl = rho; double *vrho = NULL; double *vsigma = NULL; double *vlapl = NULL; @@ -120,6 +209,41 @@ static void _eval_xc(xc_func_type *func_x, int spin, int np, double *v3lapl2tau = NULL; double *v3lapltau2 = NULL; double *v3tau3 = NULL; + double *v4rho4 = NULL; + double *v4rho3sigma = NULL; + double *v4rho3lapl = NULL; + double *v4rho3tau = NULL; + double *v4rho2sigma2 = NULL; + double *v4rho2sigmalapl = NULL; + double *v4rho2sigmatau = NULL; + double *v4rho2lapl2 = NULL; + double *v4rho2lapltau = NULL; + double *v4rho2tau2 = NULL; + double *v4rhosigma3 = NULL; + double *v4rhosigma2lapl = NULL; + double *v4rhosigma2tau = NULL; + double *v4rhosigmalapl2 = NULL; + double *v4rhosigmalapltau= NULL; + double *v4rhosigmatau2 = NULL; + double *v4rholapl3 = NULL; + double *v4rholapl2tau = NULL; + double *v4rholapltau2 = NULL; + double *v4rhotau3 = NULL; + double *v4sigma4 = NULL; + double *v4sigma3lapl = NULL; + double *v4sigma3tau = NULL; + double *v4sigma2lapl2 = NULL; + double *v4sigma2lapltau = NULL; + double *v4sigma2tau2 = NULL; + double *v4sigmalapl3 = NULL; + double *v4sigmalapl2tau = NULL; + double *v4sigmalapltau2 = NULL; + double *v4sigmatau3 = NULL; + double *v4lapl4 = NULL; + double *v4lapl3tau = NULL; + double *v4lapl2tau2 = NULL; + double *v4lapltau3 = NULL; + double *v4tau4 = NULL; switch (func_x->info->family) { case XC_FAMILY_LDA: @@ -129,231 +253,207 @@ static void _eval_xc(xc_func_type *func_x, int spin, int np, // ex is the energy density // NOTE libxc library added ex/ec into vrho/vcrho // vrho = rho d ex/d rho + ex, see work_lda.c:L73 - if (spin == XC_POLARIZED) { - rho = malloc(sizeof(double) * np*2); - for (i = 0; i < np; i++) { - rho[i*2+0] = rho_u[i]; - rho[i*2+1] = rho_d[i]; + if (spin == 1) { + if (deriv > 0) { + vrho = exc + np; + } + if (deriv > 1) { + v2rho2 = vrho + np * 2; + } + if (deriv > 2) { + v3rho3 = v2rho2 + np * 3; + } + if (deriv > 3) { + v4rho4 = v3rho3 + np * 4; } - xc_lda_exc_vxc_fxc_kxc(func_x, np, rho, ex, vxc, fxc, kxc); - free(rho); } else { - rho = rho_u; - xc_lda_exc_vxc_fxc_kxc(func_x, np, rho, ex, vxc, fxc, kxc); + if (deriv > 0) { + vrho = exc + np; + } + if (deriv > 1) { + v2rho2 = vrho + np; + } + if (deriv > 2) { + v3rho3 = v2rho2 + np; + } + if (deriv > 3) { + v4rho4 = v3rho3 + np; + } } + xc_lda(func_x, np, rho, exc, vrho, v2rho2, v3rho3, v4rho4); break; case XC_FAMILY_GGA: #ifdef XC_FAMILY_HYB_GGA case XC_FAMILY_HYB_GGA: #endif - if (spin == XC_POLARIZED) { - rho = malloc(sizeof(double) * np * 5); + if (spin == 1) { sigma = rho + np * 2; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; - for (i = 0; i < np; i++) { - rho[i*2+0] = rho_u[i]; - rho[i*2+1] = rho_d[i]; - sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; - sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; - sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; - } - if (vxc != NULL) { - vrho = vxc; - vsigma = vxc + np * 2; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np * 2; } - if (fxc != NULL) { - v2rho2 = fxc; - v2rhosigma = fxc + np * 3; + if (deriv > 1) { + v2rho2 = vsigma + np * 3; + v2rhosigma = v2rho2 + np * 3; v2sigma2 = v2rhosigma + np * 6; // np*6 } - if (kxc != NULL) { - v3rho3 = kxc; - v3rho2sigma = kxc + np * 4; + if (deriv > 2) { + v3rho3 = v2sigma2 + np * 6; + v3rho2sigma = v3rho3 + np * 4; v3rhosigma2 = v3rho2sigma + np * 9; v3sigma3 = v3rhosigma2 + np * 12; // np*10 } - xc_gga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, ex, - vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, - v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3); - free(rho); - } else { - rho = rho_u; - sigma = malloc(sizeof(double) * np); - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - for (i = 0; i < np; i++) { - sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + if (deriv > 3) { + v4rho4 = v3sigma3 + np * 10 ; + v4rho3sigma = v4rho4 + np * 5 ; + v4rho2sigma2 = v4rho3sigma + np * 4*3 ; + v4rhosigma3 = v4rho2sigma2 + np * 3*6 ; + v4sigma4 = v4rhosigma3 + np * 2*10; } - if (vxc != NULL) { - vrho = vxc; - vsigma = vxc + np; + } else { + sigma = rho + np; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np; } - if (fxc != NULL) { - v2rho2 = fxc; - v2rhosigma = fxc + np; + if (deriv > 1) { + v2rho2 = vsigma + np; + v2rhosigma = v2rho2 + np; v2sigma2 = v2rhosigma + np; } - if (kxc != NULL) { - v3rho3 = kxc; + if (deriv > 2) { + v3rho3 = v2sigma2 + np; v3rho2sigma = v3rho3 + np; v3rhosigma2 = v3rho2sigma + np; v3sigma3 = v3rhosigma2 + np; } - xc_gga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, ex, - vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, - v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3); - free(sigma); + if (deriv > 3) { + v4rho4 = v3sigma3 + np; + v4rho3sigma = v4rho4 + np; + v4rho2sigma2 = v4rho3sigma + np; + v4rhosigma3 = v4rho2sigma2 + np; + v4sigma4 = v4rhosigma3 + np; + } } + xc_gga(func_x, np, rho, sigma, + exc, vrho, vsigma, + v2rho2, v2rhosigma, v2sigma2, + v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, + v4rho4, v4rho3sigma, v4rho2sigma2, v4rhosigma3, v4sigma4); break; case XC_FAMILY_MGGA: #ifdef XC_FAMILY_HYB_MGGA case XC_FAMILY_HYB_MGGA: #endif - if (spin == XC_POLARIZED) { - rho = malloc(sizeof(double) * np * 9); + if (spin == 1) { sigma = rho + np * 2; - lapl = sigma + np * 3; - tau = lapl + np * 2; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; - lapl_u = rho_u + np * 4; - tau_u = rho_u + np * 5; - lapl_d = rho_d + np * 4; - tau_d = rho_d + np * 5; - for (i = 0; i < np; i++) { - rho[i*2+0] = rho_u[i]; - rho[i*2+1] = rho_d[i]; - lapl[i*2+0] = lapl_u[i]; - lapl[i*2+1] = lapl_d[i]; - tau[i*2+0] = tau_u[i]; - tau[i*2+1] = tau_d[i]; + tau = sigma + np * 3; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np * 2; + vtau = vsigma + np * 3; } - for (i = 0; i < np; i++) { - sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; - sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; - sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; - } - if (vxc != NULL) { - vrho = vxc; - vsigma = vxc + np * 2; - vlapl = vsigma + np * 3; - vtau = vlapl + np * 2; // np*2 - } - if (fxc != NULL) { - v2rho2 = fxc; + if (deriv > 1) { + v2rho2 = vtau + np * 2; v2rhosigma = v2rho2 + np * 3; v2sigma2 = v2rhosigma + np * 6; - v2lapl2 = v2sigma2 + np * 6; - v2tau2 = v2lapl2 + np * 3; - v2rholapl = v2tau2 + np * 3; - v2rhotau = v2rholapl + np * 4; - v2lapltau = v2rhotau + np * 4; - v2sigmalapl = v2lapltau + np * 4; - v2sigmatau = v2sigmalapl + np * 6; - } - if (kxc != NULL) { - v3rho3 = kxc; + v2rhotau = v2sigma2 + np * 6; + v2sigmatau = v2rhotau + np * 4; + v2tau2 = v2sigmatau + np * 6; + } + if (deriv > 2) { + v3rho3 = v2tau2 + np * 3 ; v3rho2sigma = v3rho3 + np * 4 ; v3rhosigma2 = v3rho2sigma + np * 9 ; v3sigma3 = v3rhosigma2 + np * 12; - v3rho2lapl = v3sigma3 + np * 10; - v3rho2tau = v3rho2lapl + np * 6 ; - v3rhosigmalapl = v3rho2tau + np * 6 ; - v3rhosigmatau = v3rhosigmalapl + np * 12; - v3rholapl2 = v3rhosigmatau + np * 12; - v3rholapltau = v3rholapl2 + np * 6 ; - v3rhotau2 = v3rholapltau + np * 8 ; - v3sigma2lapl = v3rhotau2 + np * 6 ; - v3sigma2tau = v3sigma2lapl + np * 12; - v3sigmalapl2 = v3sigma2tau + np * 12; - v3sigmalapltau = v3sigmalapl2 + np * 9 ; - v3sigmatau2 = v3sigmalapltau + np * 12; - v3lapl3 = v3sigmatau2 + np * 9 ; - v3lapl2tau = v3lapl3 + np * 4 ; - v3lapltau2 = v3lapl2tau + np * 6 ; - v3tau3 = v3lapltau2 + np * 6 ; - } - xc_mgga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, lapl, tau, ex, - vrho, vsigma, vlapl, vtau, - v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, - v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, - v3rho3, v3rho2sigma, v3rho2lapl, v3rho2tau, v3rhosigma2, - v3rhosigmalapl, v3rhosigmatau, v3rholapl2, v3rholapltau, - v3rhotau2, v3sigma3, v3sigma2lapl, v3sigma2tau, - v3sigmalapl2, v3sigmalapltau, v3sigmatau2, v3lapl3, - v3lapl2tau, v3lapltau2, v3tau3); - free(rho); - } else { - rho = rho_u; - sigma = malloc(sizeof(double) * np); - lapl = rho_u + np * 4; - tau = rho_u + np * 5; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - for (i = 0; i < np; i++) { - sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + v3rho2tau = v3sigma3 + np * 10; + v3rhosigmatau = v3rho2tau + np * 6 ; + v3rhotau2 = v3rhosigmatau + np * 12; + v3sigma2tau = v3rhotau2 + np * 6 ; + v3sigmatau2 = v3sigma2tau + np * 12; + v3tau3 = v3sigmatau2 + np * 9 ; } - if (vxc != NULL) { - vsigma = vxc + np; - vlapl = vsigma + np; - vtau = vlapl + np; + if (deriv > 3) { + v4rho4 = v3tau3 + np * 4 ; + v4rho3sigma = v4rho4 + np * 5 ; + v4rho2sigma2 = v4rho3sigma + np * 4*3 ; + v4rhosigma3 = v4rho2sigma2 + np * 3*6 ; + v4sigma4 = v4rhosigma3 + np * 2*10 ; + v4rho3tau = v4sigma4 + np * 15 ; + v4rho2sigmatau = v4rho3tau + np * 4*2 ; + v4rho2tau2 = v4rho2sigmatau + np * 3*3*2; + v4rhosigma2tau = v4rho2tau2 + np * 3*3 ; + v4rhosigmatau2 = v4rhosigma2tau + np * 2*6*2; + v4rhotau3 = v4rhosigmatau2 + np * 2*3*3; + v4sigma3tau = v4rhotau3 + np * 2*4 ; + v4sigma2tau2 = v4sigma3tau + np * 10*2 ; + v4sigmatau3 = v4sigma2tau2 + np * 6*3 ; + v4tau4 = v4sigmatau3 + np * 3*4 ; } - if (fxc != NULL) { - v2rho2 = fxc; + } else { + sigma = rho + np; + tau = sigma + np; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np; + vtau = vsigma + np; + } + if (deriv > 1) { + v2rho2 = vtau + np; v2rhosigma = v2rho2 + np; v2sigma2 = v2rhosigma + np; - v2lapl2 = v2sigma2 + np; - v2tau2 = v2lapl2 + np; - v2rholapl = v2tau2 + np; - v2rhotau = v2rholapl + np; - v2lapltau = v2rhotau + np; - v2sigmalapl = v2lapltau + np; - v2sigmatau = v2sigmalapl + np; - } - if (kxc != NULL) { - v3rho3 = kxc; + v2rhotau = v2sigma2 + np; + v2sigmatau = v2rhotau + np; + v2tau2 = v2sigmatau + np; + } + if (deriv > 2) { + v3rho3 = v2tau2 + np; v3rho2sigma = v3rho3 + np; v3rhosigma2 = v3rho2sigma + np; v3sigma3 = v3rhosigma2 + np; - v3rho2lapl = v3sigma3 + np; - v3rho2tau = v3rho2lapl + np; - v3rhosigmalapl = v3rho2tau + np; - v3rhosigmatau = v3rhosigmalapl + np; - v3rholapl2 = v3rhosigmatau + np; - v3rholapltau = v3rholapl2 + np; - v3rhotau2 = v3rholapltau + np; - v3sigma2lapl = v3rhotau2 + np; - v3sigma2tau = v3sigma2lapl + np; - v3sigmalapl2 = v3sigma2tau + np; - v3sigmalapltau = v3sigmalapl2 + np; - v3sigmatau2 = v3sigmalapltau + np; - v3lapl3 = v3sigmatau2 + np; - v3lapl2tau = v3lapl3 + np; - v3lapltau2 = v3lapl2tau + np; - v3tau3 = v3lapltau2 + np; - } - xc_mgga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, lapl, tau, ex, - vxc, vsigma, vlapl, vtau, - v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, - v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, - v3rho3, v3rho2sigma, v3rho2lapl, v3rho2tau, v3rhosigma2, - v3rhosigmalapl, v3rhosigmatau, v3rholapl2, v3rholapltau, - v3rhotau2, v3sigma3, v3sigma2lapl, v3sigma2tau, - v3sigmalapl2, v3sigmalapltau, v3sigmatau2, v3lapl3, - v3lapl2tau, v3lapltau2, v3tau3); - free(sigma); + v3rho2tau = v3sigma3 + np; + v3rhosigmatau = v3rho2tau + np; + v3rhotau2 = v3rhosigmatau + np; + v3sigma2tau = v3rhotau2 + np; + v3sigmatau2 = v3sigma2tau + np; + v3tau3 = v3sigmatau2 + np; + } + if (deriv > 3) { + v4rho4 = v3tau3 + np; + v4rho3sigma = v4rho4 + np; + v4rho2sigma2 = v4rho3sigma + np; + v4rhosigma3 = v4rho2sigma2 + np; + v4sigma4 = v4rhosigma3 + np; + v4rho3tau = v4sigma4 + np; + v4rho2sigmatau = v4rho3tau + np; + v4rho2tau2 = v4rho2sigmatau + np; + v4rhosigma2tau = v4rho2tau2 + np; + v4rhosigmatau2 = v4rhosigma2tau + np; + v4rhotau3 = v4rhosigmatau2 + np; + v4sigma3tau = v4rhotau3 + np; + v4sigma2tau2 = v4sigma3tau + np; + v4sigmatau3 = v4sigma2tau2 + np; + v4tau4 = v4sigmatau3 + np; + } } + xc_mgga(func_x, np, rho, sigma, lapl, tau, + exc, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, + v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, + v3rho3, v3rho2sigma, v3rho2lapl, v3rho2tau, v3rhosigma2, + v3rhosigmalapl, v3rhosigmatau, v3rholapl2, v3rholapltau, + v3rhotau2, v3sigma3, v3sigma2lapl, v3sigma2tau, + v3sigmalapl2, v3sigmalapltau, v3sigmatau2, v3lapl3, + v3lapl2tau, v3lapltau2, v3tau3, + v4rho4, v4rho3sigma, v4rho3lapl, v4rho3tau, v4rho2sigma2, + v4rho2sigmalapl, v4rho2sigmatau, v4rho2lapl2, v4rho2lapltau, + v4rho2tau2, v4rhosigma3, v4rhosigma2lapl, v4rhosigma2tau, + v4rhosigmalapl2, v4rhosigmalapltau, v4rhosigmatau2, + v4rholapl3, v4rholapl2tau, v4rholapltau2, v4rhotau3, + v4sigma4, v4sigma3lapl, v4sigma3tau, v4sigma2lapl2, + v4sigma2lapltau, v4sigma2tau2, v4sigmalapl3, v4sigmalapl2tau, + v4sigmalapltau2, v4sigmatau3, v4lapl4, v4lapl3tau, + v4lapl2tau2, v4lapltau3, v4tau4); break; default: fprintf(stderr, "functional %d '%s' is not implemented\n", @@ -582,77 +682,26 @@ int LIBXC_xc_type(int fn_id) return type; } -static int xc_output_length(int nvar, int deriv) -{ - int i; - int len = 1.; - for (i = 1; i <= nvar; i++) { - len *= deriv + i; - len /= i; - } - return len; -} - -// return value 0 means no functional needs to be evaluated. -int LIBXC_input_length(int nfn, int *fn_id, double *fac, int spin) -{ - int i; - int nvar = 0; - xc_func_type func; - for (i = 0; i < nfn; i++) { - if (xc_func_init(&func, fn_id[i], spin) != 0) { - fprintf(stderr, "XC functional %d not found\n", - fn_id[i]); - raise_error -1; - } - if (spin == XC_POLARIZED) { - switch (func.info->family) { - case XC_FAMILY_LDA: -#ifdef XC_FAMILY_HYB_LDA - case XC_FAMILY_HYB_LDA: -#endif - nvar = MAX(nvar, 2); - break; - case XC_FAMILY_GGA: -#ifdef XC_FAMILY_HYB_GGA - case XC_FAMILY_HYB_GGA: -#endif - nvar = MAX(nvar, 5); - break; - case XC_FAMILY_MGGA: -#ifdef XC_FAMILY_HYB_MGGA - case XC_FAMILY_HYB_MGGA: -#endif - nvar = MAX(nvar, 9); - } - } else { - switch (func.info->family) { - case XC_FAMILY_LDA: -#ifdef XC_FAMILY_HYB_LDA - case XC_FAMILY_HYB_LDA: -#endif - nvar = MAX(nvar, 1); - break; - case XC_FAMILY_GGA: -#ifdef XC_FAMILY_HYB_GGA - case XC_FAMILY_HYB_GGA: -#endif - nvar = MAX(nvar, 2); - break; - case XC_FAMILY_MGGA: -#ifdef XC_FAMILY_HYB_MGGA - case XC_FAMILY_HYB_MGGA: -#endif - nvar = MAX(nvar, 4); - } - } - xc_func_end(&func); - } - return nvar; -} +//static int xc_output_length(int nvar, int deriv) +//{ +// int i; +// int len = 1; +// for (i = 1; i <= nvar; i++) { +// len *= deriv + i; +// len /= i; +// } +// return len; +//} +// offsets = [xc_output_length(nvar, i) for i in range(deriv+1) +// for nvar in [1,2,3,5,7]] +static int xc_nvar1_offsets[] = {0, 1, 2, 3, 4, 5}; +static int xc_nvar2_offsets[] = {0, 1, 3, 6, 10, 15}; +static int xc_nvar3_offsets[] = {0, 1, 4, 10, 20, 35}; +static int xc_nvar5_offsets[] = {0, 1, 6, 21, 56, 126}; +static int xc_nvar7_offsets[] = {0, 1, 8, 36, 120, 330}; static void axpy(double *dst, double *src, double fac, - int np, int ndst, int nsrc) + int np, int nsrc) { int i, j; for (j = 0; j < nsrc; j++) { @@ -661,130 +710,106 @@ static void axpy(double *dst, double *src, double fac, } } } - -static void merge_xc(double *dst, double *ebuf, double *vbuf, - double *fbuf, double *kbuf, double fac, - int np, int ndst, int nvar, int spin, int type) +static int vseg1[] = {2, 3, 2}; +static int fseg1[] = {3, 6, 6, 4, 6, 3}; +static int kseg1[] = {4, 9, 12, 10, 6, 12, 6, 12, 9, 4}; +static int lseg1[] = {5, 12, 18, 20, 15, 8, 18, 9, 24, 18, 8, 20, 18, 12, 5}; +static int *seg1[] = {NULL, vseg1, fseg1, kseg1, lseg1}; + +static void merge_xc(double *dst, double *ebuf, double fac, + int spin, int deriv, int nvar, int np, int outlen, int type) { - int seg0 [] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - // LDA | | - // GGA | | - // MGGA | | - int vseg1[] = {2, 3, 2, 2}; - // LDA | | - // GGA | | - // MGGA | | - int fseg1[] = {3, 6, 6, 3, 3, 4, 4, 4, 6, 6}; - // LDA | | - // GGA | | - // MGGA | | - int kseg1[] = {4, 9,12,10, 6, 6,12,12, 6, 8, 6,12,12, 9,12, 9, 4, 6, 6, 4}; - int vsegtot, fsegtot, ksegtot; - int *vseg, *fseg, *kseg; - if (spin == XC_POLARIZED) { - vseg = vseg1; - fseg = fseg1; - kseg = kseg1; - } else { - vseg = seg0; - fseg = seg0; - kseg = seg0; + int order, nsrc, i; + for (i = 0; i < np; i++) { + dst[i] += fac * ebuf[i]; } + int *offsets0, *offsets1; + double *pout, *pin; switch (type) { case XC_FAMILY_GGA: #ifdef XC_FAMILY_HYB_GGA case XC_FAMILY_HYB_GGA: #endif - vsegtot = 2; - fsegtot = 3; - ksegtot = 4; + offsets0 = xc_nvar2_offsets; break; case XC_FAMILY_MGGA: #ifdef XC_FAMILY_HYB_MGGA case XC_FAMILY_HYB_MGGA: #endif - vsegtot = 4; - fsegtot = 10; - ksegtot = 20; // not supported + offsets0 = xc_nvar3_offsets; break; default: //case XC_FAMILY_LDA: - vsegtot = 1; - fsegtot = 1; - ksegtot = 1; + offsets0 = xc_nvar1_offsets; } - int i; - size_t offset; - axpy(dst, ebuf, fac, np, ndst, 1); - - if (vbuf != NULL) { - offset = np; - for (i = 0; i < vsegtot; i++) { - axpy(dst+offset, vbuf, fac, np, ndst, vseg[i]); - offset += np * vseg[i]; - vbuf += np * vseg[i]; + if (spin == 0) { + switch (nvar) { + case LDA_NVAR: + offsets1 = xc_nvar1_offsets; + break; + case GGA_NVAR: + offsets1 = xc_nvar2_offsets; + break; + default: // MGGA_NVAR + offsets1 = xc_nvar3_offsets; + break; } - } - if (fbuf != NULL) { - offset = np * xc_output_length(nvar, 1); - for (i = 0; i < fsegtot; i++) { - axpy(dst+offset, fbuf, fac, np, ndst, fseg[i]); - offset += np * fseg[i]; - fbuf += np * fseg[i]; + for (order = 1; order <= deriv; order++) { + pout = dst + offsets1[order] * np; + pin = ebuf + offsets0[order] * np; + nsrc = offsets0[order+1] - offsets0[order]; + for (i = 0; i < np * nsrc; i++) { + pout[i] += fac * pin[i]; + } } + return; } - if (kbuf != NULL) { - offset = np * xc_output_length(nvar, 2); - for (i = 0; i < ksegtot; i++) { - axpy(dst+offset, kbuf, fac, np, ndst, kseg[i]); - offset += np * kseg[i]; - kbuf += np * kseg[i]; + switch (nvar) { + case LDA_NVAR: + offsets1 = xc_nvar2_offsets; + break; + case GGA_NVAR: + offsets1 = xc_nvar5_offsets; + break; + default: // MGGA_NVAR + offsets1 = xc_nvar7_offsets; + break; + } + + int terms; + int *pseg1; + pin = ebuf + np; + for (order = 1; order <= deriv; order++) { + pseg1 = seg1[order]; + pout = dst + offsets1[order] * np; + terms = offsets0[order+1] - offsets0[order]; + for (i = 0; i < terms; i++) { + nsrc = pseg1[i]; + axpy(pout, pin, fac, np, nsrc); + pin += np * nsrc; + pout += np * nsrc; } } } // omega is the range separation parameter mu in xcfun void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, - int spin, int deriv, int np, - double *rho_u, double *rho_d, double *output, - double dens_threshold) + int spin, int deriv, int nvar, int np, int outlen, + double *rho_u, double *output, double dens_threshold) { - assert(deriv <= 3); - int nvar = LIBXC_input_length(nfn, fn_id, fac, spin); - if (nvar == 0) { // No functional needs to be evaluated. - return; - } - - int outlen = xc_output_length(nvar, deriv); - // output buffer is zeroed in the Python caller - //NPdset0(output, np*outlen); - - double *ebuf = malloc(sizeof(double) * np); - double *vbuf = NULL; - double *fbuf = NULL; - double *kbuf = NULL; - if (deriv > 0) { - vbuf = malloc(sizeof(double) * np*9); - } - if (deriv > 1) { - fbuf = malloc(sizeof(double) * np*45); - } - if (deriv > 2) { - if (spin == XC_POLARIZED) { // spin-unresctricted MGGA - // FIXME *220 in xcfun - kbuf = malloc(sizeof(double) * np*165); - } else { // spin-resctricted MGGA - kbuf = malloc(sizeof(double) * np*20); - } - } + assert(deriv <= 4); + double *ebuf = malloc(sizeof(double) * np * outlen); + double *rho = malloc(sizeof(double) * np * 7); + _eval_rho(rho, rho_u, spin, nvar, np); + int nspin = spin + 1; int i, j; xc_func_type func; for (i = 0; i < nfn; i++) { - if (xc_func_init(&func, fn_id[i], spin) != 0) { + if (xc_func_init(&func, fn_id[i], nspin) != 0) { fprintf(stderr, "XC functional %d not found\n", fn_id[i]); raise_error; @@ -832,22 +857,13 @@ void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, #if defined XC_SET_RELATIVITY xc_lda_x_set_params(&func, relativity); #endif - _eval_xc(&func, spin, np, rho_u, rho_d, ebuf, vbuf, fbuf, kbuf); - merge_xc(output, ebuf, vbuf, fbuf, kbuf, fac[i], - np, outlen, nvar, spin, func.info->family); + _eval_xc(&func, spin, deriv, np, rho, ebuf); + merge_xc(output, ebuf, fac[i], + spin, deriv, nvar, np, outlen, func.info->family); xc_func_end(&func); } - free(ebuf); - if (deriv > 0) { - free(vbuf); - } - if (deriv > 1) { - free(fbuf); - } - if (deriv > 2) { - free(kbuf); - } + free(rho); } int LIBXC_max_deriv_order(int xc_id) diff --git a/pyscf/lib/dft/xc_deriv.c b/pyscf/lib/dft/xc_deriv.c index 3292d31eb8..ecab4adb89 100644 --- a/pyscf/lib/dft/xc_deriv.c +++ b/pyscf/lib/dft/xc_deriv.c @@ -17,6 +17,7 @@ */ #include +#include /* * tmp = fg[...,[[0,1],[1,2]],...] @@ -65,109 +66,104 @@ void VXCfg_to_direct_deriv(double *qg, double *fg, double *rho, } } -void VXCud2ts_deriv1(double *v_ts, double *v_ud, int nvar, int ngrids) +void VXCud2ts(double *v_ts, double *v_ud, int ncounts, size_t ngrids) { - size_t Ngrids = ngrids; - size_t vg = nvar * Ngrids; - double *vu = v_ud; - double *vd = v_ud + vg; - double *vt = v_ts; - double *vs = v_ts + vg; - size_t n; + double *vu, *vd, *vt, *vs; + int i, n; + for (n = 0; n < ncounts; n++) { + vu = v_ud + 2*n * ngrids; + vd = v_ud +(2*n+1) * ngrids; + vt = v_ts + 2*n * ngrids; + vs = v_ts +(2*n+1) * ngrids; #pragma GCC ivdep - for (n = 0; n < vg; n++) { - vt[n] = (vu[n] + vd[n]) * .5; - vs[n] = (vu[n] - vd[n]) * .5; + for (i = 0; i < ngrids; i++) { + vt[i] = (vu[i] + vd[i]) * .5; + vs[i] = (vu[i] - vd[i]) * .5; + } } } -void VXCud2ts_deriv2(double *v_ts, double *v_ud, int nvar, int ngrids) +void VXCts2ud(double *v_ud, double *v_ts, int ncounts, size_t ngrids) +{ + double *vu, *vd, *vt, *vs; + int i, n; + for (n = 0; n < ncounts; n++) { + vu = v_ud + 2*n * ngrids; + vd = v_ud +(2*n+1) * ngrids; + vt = v_ts + 2*n * ngrids; + vs = v_ts +(2*n+1) * ngrids; +#pragma GCC ivdep + for (i = 0; i < ngrids; i++) { + vu[i] = vt[i] + vs[i]; + vd[i] = vt[i] - vs[i]; + } + } +} + +#define frho_at(n, a, x, g) frho[((n*2+a)*Nvg + x*Ngrids + g)] +#define fsigma_at(x, n, g) fsigma[(x*Ncg + n*Ngrids + g)] +// spin0 shape: frho[...,nvar,ngrids], fsigma[:,...,ngrids], rho[nvar,ngrids] +void VXCunfold_sigma_spin0(double *frho, double *fsigma, double *rho, + int ncounts, int nvar, int ngrids) { size_t Ngrids = ngrids; - size_t vg = nvar * Ngrids; - size_t vg2 = vg * 2; - size_t vvg = nvar * vg2; - double *vuu = v_ud; - double *vud = v_ud + vg; - double *vdu = vuu + vvg; - double *vdd = vud + vvg; - double *vtt = v_ts; - double *vts = v_ts + vg; - double *vst = vtt + vvg; - double *vss = vts + vvg; - double ut, us, dt, ds; - size_t i, n; - for (i = 0; i < nvar; i++) { + size_t Ncg = ncounts * Ngrids; + size_t Nvg = nvar * Ngrids; + int g, n; + for (n = 0; n < ncounts; n++) { #pragma GCC ivdep - for (n = 0; n < vg; n++) { - ut = vuu[i*vg2+n] + vud[i*vg2+n]; - us = vuu[i*vg2+n] - vud[i*vg2+n]; - dt = vdu[i*vg2+n] + vdd[i*vg2+n]; - ds = vdu[i*vg2+n] - vdd[i*vg2+n]; - vtt[i*vg2+n] = (ut + dt) * .25; - vts[i*vg2+n] = (us + ds) * .25; - vst[i*vg2+n] = (ut - dt) * .25; - vss[i*vg2+n] = (us - ds) * .25; + for (g = 0; g < ngrids; g++) { + frho[n*Nvg+g] = fsigma[n*Ngrids+g]; + frho[n*Nvg+1*Ngrids+g] = fsigma[Ncg+n*Ngrids+g] * rho[1*Ngrids+g] * 2; + frho[n*Nvg+2*Ngrids+g] = fsigma[Ncg+n*Ngrids+g] * rho[2*Ngrids+g] * 2; + frho[n*Nvg+3*Ngrids+g] = fsigma[Ncg+n*Ngrids+g] * rho[3*Ngrids+g] * 2; + } + } + if (nvar > 4) { + // MGGA + assert(nvar == 5); + for (n = 0; n < ncounts; n++) { +#pragma GCC ivdep + for (g = 0; g < ngrids; g++) { + frho[n*Nvg+4*Ngrids+g] = fsigma[2*Ncg+n*Ngrids+g]; + } } } } -void VXCud2ts_deriv3(double *v_ts, double *v_ud, int nvar, int ngrids) +#define frho_at(n, a, x, g) frho[((n*2+a)*Nvg + x*Ngrids + g)] +#define fsigma_at(x, n, g) fsigma[(x*Ncg + n*Ngrids + g)] +#define rho_at(a, x, g) rho[(a*Nvg + x*Ngrids + g)] +// spin1 shape: frho[...,2,nvar,ngrids], fsigma[:,...,ngrids], rho[2,nvar,ngrids] +void VXCunfold_sigma_spin1(double *frho, double *fsigma, double *rho, + int ncounts, int nvar, int ngrids) { size_t Ngrids = ngrids; - size_t vg = nvar * Ngrids; - size_t vg2 = vg * 2; - size_t vvg = nvar * vg2; - size_t vvg2 = vvg * 2; - size_t vvvg = nvar * vvg2; - double *vuuu = v_ud; - double *vuud = v_ud + vg; - double *vudu = vuuu + vvg; - double *vudd = vuud + vvg; - double *vduu = vuuu + vvvg; - double *vdud = vuud + vvvg; - double *vddu = vudu + vvvg; - double *vddd = vudd + vvvg; - double *vttt = v_ts; - double *vtts = v_ts + vg; - double *vtst = vttt + vvg; - double *vtss = vtts + vvg; - double *vstt = vttt + vvvg; - double *vsts = vtts + vvvg; - double *vsst = vtst + vvvg; - double *vsss = vtss + vvvg; - double uut, uus, udt, uds, dut, dus, ddt, dds; - double utt, uts, ust, uss, dtt, dts, dst, dss; - size_t i, j, ij, n; - for (i = 0; i < nvar; i++) { - for (j = 0; j < nvar; j++) { - ij = (i * nvar * 2 + j) * vg2; + size_t Ncg = ncounts * Ngrids; + size_t Nvg = nvar * Ngrids; + int g, n; + for (n = 0; n < ncounts; n++) { #pragma GCC ivdep - for (n = 0; n < vg; n++) { - uut = vuuu[ij+n] + vuud[ij+n]; - uus = vuuu[ij+n] - vuud[ij+n]; - udt = vudu[ij+n] + vudd[ij+n]; - uds = vudu[ij+n] - vudd[ij+n]; - dut = vduu[ij+n] + vdud[ij+n]; - dus = vduu[ij+n] - vdud[ij+n]; - ddt = vddu[ij+n] + vddd[ij+n]; - dds = vddu[ij+n] - vddd[ij+n]; - utt = uut + udt; - uts = uus + uds; - ust = uut - udt; - uss = uus - uds; - dtt = dut + ddt; - dts = dus + dds; - dst = dut - ddt; - dss = dus - dds; - vttt[ij+n] = (utt + dtt) * .125; - vtts[ij+n] = (uts + dts) * .125; - vtst[ij+n] = (ust + dst) * .125; - vtss[ij+n] = (uss + dss) * .125; - vstt[ij+n] = (utt - dtt) * .125; - vsts[ij+n] = (uts - dts) * .125; - vsst[ij+n] = (ust - dst) * .125; - vsss[ij+n] = (uss - dss) * .125; + for (g = 0; g < ngrids; g++) { + frho_at(n,0,0,g) = fsigma_at(0,n,g); + frho_at(n,1,0,g) = fsigma_at(1,n,g); + frho_at(n,0,1,g) = fsigma_at(2,n,g) * rho_at(0,1,g) * 2 + fsigma_at(3,n,g) * rho_at(1,1,g); + frho_at(n,1,1,g) = fsigma_at(3,n,g) * rho_at(0,1,g) + 2 * fsigma_at(4,n,g) * rho_at(1,1,g); + frho_at(n,0,2,g) = fsigma_at(2,n,g) * rho_at(0,2,g) * 2 + fsigma_at(3,n,g) * rho_at(1,2,g); + frho_at(n,1,2,g) = fsigma_at(3,n,g) * rho_at(0,2,g) + 2 * fsigma_at(4,n,g) * rho_at(1,2,g); + frho_at(n,0,3,g) = fsigma_at(2,n,g) * rho_at(0,3,g) * 2 + fsigma_at(3,n,g) * rho_at(1,3,g); + frho_at(n,1,3,g) = fsigma_at(3,n,g) * rho_at(0,3,g) + 2 * fsigma_at(4,n,g) * rho_at(1,3,g); } - } } + } + if (nvar > 4) { + // MGGA + assert(nvar == 5); + for (n = 0; n < ncounts; n++) { +#pragma GCC ivdep + for (g = 0; g < ngrids; g++) { + frho_at(n,0,4,g) = fsigma_at(5,n,g); + frho_at(n,1,4,g) = fsigma_at(6,n,g); + } + } + } } diff --git a/pyscf/lib/dft/xcfun_itrf.c b/pyscf/lib/dft/xcfun_itrf.c index 8bfcecffaf..a7825030ea 100644 --- a/pyscf/lib/dft/xcfun_itrf.c +++ b/pyscf/lib/dft/xcfun_itrf.c @@ -25,12 +25,17 @@ #include #include "config.h" +int XCFUN_max_deriv_order = XCFUN_MAX_DERIV_ORDER; + static int eval_xc(xcfun_t* fun, int deriv, xcfun_vars vars, - int np, int ncol, double *rho, double *output) + int np, int ncol, int outlen, double *rho, double *output) { - xcfun_eval_setup(fun, vars, XC_PARTIAL_DERIVATIVES, deriv); + int err = xcfun_eval_setup(fun, vars, XC_PARTIAL_DERIVATIVES, deriv); + if (err != 0) { + fprintf(stderr, "Failed to initialize xcfun %d\n", err); + return err; + } assert(ncol == xcfun_input_length(fun)); - int outlen = xcfun_output_length(fun); //xcfun_eval_vec(fun, np, rho, ncol, output, outlen); #pragma omp parallel default(none) \ @@ -42,14 +47,15 @@ static int eval_xc(xcfun_t* fun, int deriv, xcfun_vars vars, xcfun_eval(fun, rho+i*ncol, output+i*outlen); } } - return outlen; + return 0; } -void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, - int spin, int deriv, int np, - double *rho_u, double *rho_d, double *output) +int XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, + int spin, int deriv, int nvar, int np, int outlen, + double *rho_u, double *output) { - int i, outlen; + int i, err; + double *rho_d = rho_u + np * nvar; double *rho; double *gxu, *gyu, *gzu, *gxd, *gyd, *gzd, *tau_u, *tau_d; const char *name; @@ -73,13 +79,13 @@ void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, gxu = rho_u + np; gyu = rho_u + np * 2; gzu = rho_u + np * 3; - tau_u = rho_u + np * 5; + tau_u = rho_u + np * 4; for (i = 0; i < np; i++) { rho[i*3+0] = rho_u[i]; rho[i*3+1] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; rho[i*3+2] = tau_u[i]; } - outlen = eval_xc(fun, deriv, XC_N_GNN_TAUN, np, 3, rho, output); + err = eval_xc(fun, deriv, XC_N_GNN_TAUN, np, 3, outlen, rho, output); free(rho); } else if (xcfun_is_gga(fun)) { rho = malloc(sizeof(double) * np*2); @@ -90,11 +96,11 @@ void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, rho[i*2+0] = rho_u[i]; rho[i*2+1] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; } - outlen = eval_xc(fun, deriv, XC_N_GNN, np, 2, rho, output); + err = eval_xc(fun, deriv, XC_N_GNN, np, 2, outlen, rho, output); free(rho); } else { // LDA rho = rho_u; - outlen = eval_xc(fun, deriv, XC_N, np, 1, rho, output); + err = eval_xc(fun, deriv, XC_N, np, 1, outlen, rho, output); } // xcfun computed rho*Exc[rho] for zeroth order deriviative instead of Exc[rho] for (i = 0; i < np; i++) { @@ -109,8 +115,8 @@ void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, gxd = rho_d + np; gyd = rho_d + np * 2; gzd = rho_d + np * 3; - tau_u = rho_u + np * 5; - tau_d = rho_d + np * 5; + tau_u = rho_u + np * 4; + tau_d = rho_d + np * 4; for (i = 0; i < np; i++) { rho[i*7+0] = rho_u[i]; rho[i*7+1] = rho_d[i]; @@ -120,7 +126,7 @@ void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, rho[i*7+5] = tau_u[i]; rho[i*7+6] = tau_d[i]; } - outlen = eval_xc(fun, deriv, XC_A_B_GAA_GAB_GBB_TAUA_TAUB, np, 7, rho, output); + err = eval_xc(fun, deriv, XC_A_B_GAA_GAB_GBB_TAUA_TAUB, np, 7, outlen, rho, output); free(rho); } else if (xcfun_is_gga(fun)) { rho = malloc(sizeof(double) * np*5); @@ -137,7 +143,7 @@ void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, rho[i*5+3] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; rho[i*5+4] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; } - outlen = eval_xc(fun, deriv, XC_A_B_GAA_GAB_GBB, np, 5, rho, output); + err = eval_xc(fun, deriv, XC_A_B_GAA_GAB_GBB, np, 5, outlen, rho, output); free(rho); } else { // LDA rho = malloc(sizeof(double) * np*2); @@ -145,7 +151,7 @@ void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, rho[i*2+0] = rho_u[i]; rho[i*2+1] = rho_d[i]; } - outlen = eval_xc(fun, deriv, XC_A_B, np, 2, rho, output); + err = eval_xc(fun, deriv, XC_A_B, np, 2, outlen, rho, output); free(rho); } for (i = 0; i < np; i++) { @@ -153,6 +159,7 @@ void XCFUN_eval_xc(int nfn, int *fn_id, double *fac, double *omega, } } xcfun_delete(fun); + return err; } /* diff --git a/pyscf/lib/libxcfun.patch b/pyscf/lib/libxcfun.patch index 7624c1ddfc..04e2a2a245 100644 --- a/pyscf/lib/libxcfun.patch +++ b/pyscf/lib/libxcfun.patch @@ -1,13 +1,101 @@ -diff --git a/src/functionals/common_parameters.cpp b/src/functionals/common_parameters.cpp -index 4142b0d..210b4ab 100644 ---- a/src/functionals/common_parameters.cpp -+++ b/src/functionals/common_parameters.cpp -@@ -3,7 +3,7 @@ - PARAMETER(XC_RANGESEP_MU) = - { - "Range separation inverse length [1/a0]", -- 0.4 -+ 0.33 - }; +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 4281527..8a2f4bb 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -2,7 +2,7 @@ + # Copyright (c) 2015-2020 by Radovan Bast, Roberto Di Remigio, Jonas Juselius, and contributors. - PARAMETER(XC_EXX) = + # set minimum cmake version +-cmake_minimum_required(VERSION 3.14 FATAL_ERROR) ++cmake_minimum_required(VERSION 3.5 FATAL_ERROR) + + # project name + project(XCFun LANGUAGES CXX) +diff --git a/src/XCFunctional.cpp b/src/XCFunctional.cpp +index 239cef5..6288e40 100644 +--- a/src/XCFunctional.cpp ++++ b/src/XCFunctional.cpp +@@ -432,7 +432,7 @@ int xcfun_eval_setup(XCFunctional * fun, + return xcfun::XC_EVARS; + } + if ((order < 0 || order > XCFUN_MAX_ORDER) || +- (mode == XC_PARTIAL_DERIVATIVES && order > 4)) ++ (mode == XC_PARTIAL_DERIVATIVES && order > 6)) + return xcfun::XC_EORDER; + if (mode == XC_POTENTIAL) { + // GGA potential needs full laplacian +@@ -557,6 +557,74 @@ void xcfun_eval(const XCFunctional * fun, const double input[], double output[]) + #endif + #if XCFUN_MAX_ORDER >= 2 + #if XCFUN_MAX_ORDER >= 3 ++#if XCFUN_MAX_ORDER >= 4 ++#if XCFUN_MAX_ORDER >= 5 ++ case 5: { ++ typedef ctaylor ttype; ++ int inlen = xcint_vars[fun->vars].len; ++ ttype in[XC_MAX_INVARS], out = 0; ++ for (int i = 0; i < inlen; i++) ++ in[i] = input[i]; ++ int k = 1 + inlen + (inlen * (inlen + 1)) / 2; ++ for (int i = 0; i < inlen; i++) { ++ in[i].set(VAR0, 1); ++ for (int j = i; j < inlen; j++) { ++ in[j].set(VAR1, 1); ++ for (int s = j; s < inlen; s++) { ++ in[s].set(VAR2, 1); ++ for (int s1 = s; s1 < inlen; s1++) { ++ in[s1].set(VAR3, 1); ++ for (int s2 = s1; s2 < inlen; s2++) { ++ in[s1].set(VAR4, 1); ++ densvars d(fun, in); ++ out = 0; ++ for (int n = 0; n < fun->nr_active_functionals; n++) ++ out += fun->settings[fun->active_functionals[n]->id] * ++ fun->active_functionals[n]->fp5(d); ++ output[k++] = out.get(VAR0 | VAR1 | VAR2 | VAR3 | VAR4); ++ in[s2].set(VAR4, 0); ++ } ++ in[s1].set(VAR3, 0); ++ } ++ in[s].set(VAR2, 0); ++ } ++ in[j].set(VAR1, 0); ++ } ++ in[i] = input[i]; ++ } ++ } ++#endif ++ case 4: { ++ typedef ctaylor ttype; ++ int inlen = xcint_vars[fun->vars].len; ++ ttype in[XC_MAX_INVARS], out = 0; ++ for (int i = 0; i < inlen; i++) ++ in[i] = input[i]; ++ int k = 1 + inlen + (inlen * (inlen + 1)) / 2; ++ for (int i = 0; i < inlen; i++) { ++ in[i].set(VAR0, 1); ++ for (int j = i; j < inlen; j++) { ++ in[j].set(VAR1, 1); ++ for (int s = j; s < inlen; s++) { ++ in[s].set(VAR2, 1); ++ for (int s1 = s; s1 < inlen; s1++) { ++ in[s1].set(VAR3, 1); ++ densvars d(fun, in); ++ out = 0; ++ for (int n = 0; n < fun->nr_active_functionals; n++) ++ out += fun->settings[fun->active_functionals[n]->id] * ++ fun->active_functionals[n]->fp4(d); ++ output[k++] = out.get(VAR0 | VAR1 | VAR2 | VAR3); ++ in[s1].set(VAR3, 0); ++ } ++ in[s].set(VAR2, 0); ++ } ++ in[j].set(VAR1, 0); ++ } ++ in[i] = input[i]; ++ } ++ } ++#endif + // Do the third order derivatives here, then use the second order code. This is + // getting expensive.. + case 3: { diff --git a/pyscf/lib/misc.py b/pyscf/lib/misc.py index 08199fd6cf..63342131ac 100644 --- a/pyscf/lib/misc.py +++ b/pyscf/lib/misc.py @@ -341,6 +341,14 @@ def prange_split(n_total, n_sections): div_points = numpy.array(section_sizes).cumsum() return zip(div_points[:-1], div_points[1:]) +izip = zip + +if sys.version_info > (3, 8): + from math import comb +else: + import math + def comb(n, k): + return math.factorial(n) // math.factorial(n-k) // math.factorial(k) def map_with_prefetch(func, *iterables): ''' @@ -906,13 +914,6 @@ def overwrite_mro(obj, mro): '''A hacky function to overwrite the __mro__ attribute''' raise DeprecationWarning -def izip(*args): - '''python2 izip == python3 zip''' - if sys.version_info < (3,): - return itertools.izip(*args) - else: - return zip(*args) - class ProcessWithReturnValue(Process): def __init__(self, group=None, target=None, name=None, args=(), kwargs=None):