diff --git a/pyscf/dft/libxc.py b/pyscf/dft/libxc.py index 6256ce8624..a4c882583b 100644 --- a/pyscf/dft/libxc.py +++ b/pyscf/dft/libxc.py @@ -1406,262 +1406,223 @@ def eval_xc(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=Non see also libxc_itrf.c ''' # noqa: E501 - hyb, fn_facs = parse_xc(xc_code) - if omega is not None: - hyb = hyb[:2] + (float(omega),) - return _eval_xc(hyb, fn_facs, rho, spin, relativity, deriv, verbose) - - -def _eval_xc(hyb, fn_facs, rho, spin=0, relativity=0, deriv=1, verbose=None): - assert (deriv <= 3) - if spin == 0: - nspin = 1 - rho_u = rho_d = numpy.asarray(rho, order='C') - else: - nspin = 2 - rho_u = numpy.asarray(rho[0], order='C') - rho_d = numpy.asarray(rho[1], order='C') - assert (rho_u.dtype == numpy.double) - assert (rho_d.dtype == numpy.double) - - if rho_u.ndim == 1: - rho_u = rho_u.reshape(1,-1) - rho_d = rho_d.reshape(1,-1) - ngrids = rho_u.shape[1] - - fn_ids = [x[0] for x in fn_facs] - facs = [x[1] for x in fn_facs] - if hyb[2] != 0: - # Current implementation does not support different omegas for - # different RSH functionals if there are multiple RSHs - omega = [hyb[2]] * len(facs) - else: - omega = [0] * len(facs) - fn_ids_set = set(fn_ids) - if fn_ids_set.intersection(PROBLEMATIC_XC): - problem_xc = [PROBLEMATIC_XC[k] - for k in fn_ids_set.intersection(PROBLEMATIC_XC)] - warnings.warn('Libxc functionals %s may have discrepancy to xcfun ' - 'library.\n' % problem_xc) - - if any([needs_laplacian(fid) for fid in fn_ids]): - raise NotImplementedError('laplacian in meta-GGA method') - - n = len(fn_ids) - if (n == 0 or # xc_code = '' or xc_code = 'HF', an empty functional - all((is_lda(x) for x in fn_ids))): - if spin == 0: - nvar = 1 - xctype = 'R-LDA' - else: - nvar = 2 - xctype = 'U-LDA' - elif any((is_meta_gga(x) for x in fn_ids)): - if spin == 0: - nvar = 4 - xctype = 'R-MGGA' - else: - nvar = 9 - xctype = 'U-MGGA' - else: # GGA - if spin == 0: - nvar = 2 - xctype = 'R-GGA' - else: - nvar = 5 - xctype = 'U-GGA' - - # Check that the density rho has the appropriate shape - # should it be >= or ==, in test test_xcfun.py, test_vs_libxc_rks - # the density contain 6 rows independently of the functional - if xctype[2:] == 'LDA': - for rho_ud in [rho_u, rho_d]: - assert rho_ud.shape[0] >= 1 - - elif xctype[2:] == 'GGA': - for rho_ud in [rho_u, rho_d]: - assert rho_ud.shape[0] >= 4 - - elif xctype[2:] == 'MGGA': - for rho_ud in [rho_u, rho_d]: - assert rho_ud.shape[0] >= 6 - else: - raise ValueError("Unknown nvar {}".format(nvar)) - - outlen = lib.comb(nvar+deriv, deriv) - outbuf = numpy.zeros((outlen,ngrids)) - density_threshold = 0 - if n > 0: - _itrf.LIBXC_eval_xc(ctypes.c_int(n), - (ctypes.c_int*n)(*fn_ids), - (ctypes.c_double*n)(*facs), - (ctypes.c_double*n)(*omega), - ctypes.c_int(nspin), - ctypes.c_int(deriv), ctypes.c_int(ngrids), - rho_u.ctypes.data_as(ctypes.c_void_p), - rho_d.ctypes.data_as(ctypes.c_void_p), - outbuf.ctypes.data_as(ctypes.c_void_p), - ctypes.c_double(density_threshold)) - + outbuf = _eval_xc(xc_code, rho, spin, deriv, omega) exc = outbuf[0] vxc = fxc = kxc = None - if xctype == 'R-LDA': + xctype = xc_type(xc_code) + if xctype == 'LDA' and spin == 0: if deriv > 0: vxc = [outbuf[1]] if deriv > 1: fxc = [outbuf[2]] if deriv > 2: kxc = [outbuf[3]] - elif xctype == 'R-GGA': + elif xctype == 'GGA' and spin == 0: if deriv > 0: vxc = [outbuf[1], outbuf[2]] if deriv > 1: fxc = [outbuf[3], outbuf[4], outbuf[5]] if deriv > 2: kxc = [outbuf[6], outbuf[7], outbuf[8], outbuf[9]] - elif xctype == 'U-LDA': + elif xctype == 'LDA' and spin == 1: if deriv > 0: vxc = [outbuf[1:3].T] if deriv > 1: fxc = [outbuf[3:6].T] if deriv > 2: kxc = [outbuf[6:10].T] - elif xctype == 'U-GGA': + elif xctype == 'GGA' and spin == 1: if deriv > 0: vxc = [outbuf[1:3].T, outbuf[3:6].T] if deriv > 1: fxc = [outbuf[6:9].T, outbuf[9:15].T, outbuf[15:21].T] if deriv > 2: kxc = [outbuf[21:25].T, outbuf[25:34].T, outbuf[34:46].T, outbuf[46:56].T] - elif xctype == 'R-MGGA': + elif xctype == 'MGGA' and spin == 0: if deriv > 0: - vxc = [outbuf[1], outbuf[2], None, outbuf[4]] + vxc = [outbuf[1], outbuf[2], None, outbuf[3]] if deriv > 1: fxc = [ # v2rho2, v2rhosigma, v2sigma2, - outbuf[5], outbuf[6], outbuf[7], + outbuf[4], outbuf[5], outbuf[6], # v2lapl2, v2tau2, None, outbuf[9], # v2rholapl, v2rhotau, - None, outbuf[11], + None, outbuf[7], # v2lapltau, v2sigmalapl, v2sigmatau, - None, None, outbuf[14]] + None, None, outbuf[8]] if deriv > 2: # v3lapltau2 might not be strictly 0 # outbuf[18] = 0 kxc = [ # v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, - outbuf[15], outbuf[16], outbuf[17], outbuf[18], + outbuf[10], outbuf[11], outbuf[12], outbuf[13], # v3rho2lapl, v3rho2tau, - None, outbuf[20], + None, outbuf[14], # v3rhosigmalapl, v3rhosigmatau, - None, outbuf[22], + None, outbuf[15], # v3rholapl2, v3rholapltau, v3rhotau2, - None, None, outbuf[25], + None, None, outbuf[16], # v3sigma2lapl, v3sigma2tau, - None, outbuf[27], + None, outbuf[17], # v3sigmalapl2, v3sigmalapltau, v3sigmatau2, - None, None, outbuf[30], + None, None, outbuf[18], # v3lapl3, v3lapl2tau, v3lapltau2, v3tau3) - None, None, None, outbuf[34]] - elif xctype == 'U-MGGA': + None, None, None, outbuf[19]] + elif xctype == 'MGGA' and spin == 1: if deriv > 0: - vxc = [outbuf[1:3].T, outbuf[3:6].T, None, outbuf[8:10].T] + vxc = [outbuf[1:3].T, outbuf[3:6].T, None, outbuf[6:8].T] if deriv > 1: # v2lapltau might not be strictly 0 # outbuf[39:43] = 0 fxc = [ # v2rho2, v2rhosigma, v2sigma2, - outbuf[10:13].T, outbuf[13:19].T, outbuf[19:25].T, + outbuf[8:11].T, outbuf[11:17].T, outbuf[17:23].T, # v2lapl2, v2tau2, - None, outbuf[28:31].T, + None, outbuf[33:36].T, # v2rholapl, v2rhotau, - None, outbuf[35:39].T, + None, outbuf[23:27].T, # v2lapltau, v2sigmalapl, v2sigmatau, - None, None, outbuf[49:55].T] + None, None, outbuf[27:33].T] if deriv > 2: # v3lapltau2 might not be strictly 0 # outbuf[204:216] = 0 kxc = [ # v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, - outbuf[55:59].T, outbuf[59:68].T, outbuf[68:80].T, outbuf[80:90].T, + outbuf[36:40].T, outbuf[40:49].T, outbuf[49:61].T, outbuf[61:71].T, # v3rho2lapl, v3rho2tau, - None, outbuf[96:102].T, + None, outbuf[71:77].T, # v3rhosigmalapl, v3rhosigmatau, - None, outbuf[114:126].T, + None, outbuf[77:89].T, # v3rholapl2, v3rholapltau, v3rhotau2, - None, None, outbuf[140:146].T, + None, None, outbuf[89:95].T, # v3sigma2lapl, v3sigma2tau, - None, outbuf[158:170].T, + None, outbuf[95:107].T, # v3sigmalapl2, v3sigmalapltau, v3sigmatau2, - None, None, outbuf[191:200].T, + None, None, outbuf[107:116].T, # v3lapl3, v3lapl2tau, v3lapltau2, v3tau3) - None, None, None, outbuf[216:220].T] + None, None, None, outbuf[116:120].T] return exc, vxc, fxc, kxc -_UFXC_SORT = numpy.array([ - 0, 1, # v2rho2 - 2, 3, 4, # v2rhosigma - 11, 12, 13, 14, 15, 16, # v2rhotau - 5, 6, 7, 8, 9, 10, # v2sigma2 - 17, 18, 19, 20, # v2sigmatau - 21, 22, 23, 24, 25, 26, # v2tau2 -]) -_UKXC_SORT = numpy.array([ - 0, 1, 2, # v3rho3 - 3, 4, 5, 6, # v3rho2sigma - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, # v3rho2tau - 7, 8, 9, 10, 11, 12, 13, 14, 15, # v3rhosigma2 - 38, 39, 40, 41, 42, 43, # v3rhosigmatau - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, # v3rhotau2 - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, # v3sigma3 - 56, 57, 58, 59, 60, 61, # v3sigma2tau - 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, # v3sigmatau2 - 74, 75, 76, 77, 78, 79, 80, 81, 82, # v3tau3 -]) -_ULXC_SORT = numpy.array([ - 0, 1, 2, 3, # v4rho4 - 4, 5, 6, 7, 8, # v4rho3sigma - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, # v4rho3tau - 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, # v4rho2sigma2 - 74, 75, 76, 77, 78, 79, 80, 81, # v4rho2sigmatau - 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, # v4rho2tau2 - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, # v4rhosigma3 - 100, 101, 102, 103, 104, 105, 106, 107, 108, # v4rhosigma2tau - 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, # v4rhosigmatau2 - 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, # v4rhotau3 - 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, # v4sigma4 - 151, 152, 153, 154, 155, 156, 157, 158, # v4sigma3tau - 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, # v4sigma2tau2 - 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, # v4sigmatau3 - 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, # v4tau4 -]) - -def eval_xc_eff(xc_code, rho, deriv=1, omega=None): - r'''Returns the derivative tensor against the density parameters - - [density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a] - - or spin-polarized density parameters - - [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], - [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. +_GGA_SORT = { + (1, 2): numpy.array([ + 6, 7, 9, 10, 11, 8, 12, 13, 14, 15, 16, 17, 18, 19, 20, + ]), + (1, 3): numpy.array([ + 21, 22, 25, 26, 27, 23, 28, 29, 30, 34, 35, 36, 37, 38, 39, 24, 31, 32, + 33, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + ]), + (1, 4): numpy.array([ + 56, 57, 61, 62, 63, 58, 64, 65, 66, 73, 74, 75, 76, 77, 78, 59, 67, 68, + 69, 79, 80, 81, 82, 83, 84, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 60, + 70, 71, 72, 85, 86, 87, 88, 89, 90, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 123, 124, 125, + ]) +} +_MGGA_SORT = { + (0, 2): numpy.array([ + 0, # v2rho2 + 1, # v2rhosigma + 3, # v2rhotau + 2, # v2sigma2 + 4, # v2sigmatau + 5, # v2tau2 + ]) + 4, + (0, 3): numpy.array([ + 0, # v3rho3 + 1, # v3rho2sigma + 4, # v3rho2tau + 2, # v3rhosigma2 + 5, # v3rhosigmatau + 6, # v3rhotau2 + 3, # v3sigma3 + 7, # v3sigma2tau + 8, # v3sigmatau2 + 9, # v3tau3 + ]) + 10, + (0, 4): numpy.array([ + 0, # v4rho4 + 1, # v4rho3sigma + 5, # v4rho3tau + 2, # v4rho2sigma2 + 6, # v4rho2sigmatau + 7, # v4rho2tau2 + 3, # v4rhosigma3 + 8, # v4rhosigma2tau + 9, # v4rhosigmatau2 + 10, # v4rhotau3 + 4, # v4sigma4 + 11, # v4sigma3tau + 12, # v4sigma2tau2 + 13, # v4sigmatau3 + 14, # v4tau4 + ]) + 20, + (1, 2): numpy.array([ + 8, 9, 11, 12, 13, 23, 24, 10, 14, 15, 16, 25, 26, 17, 18, 19, 27, 28, + 20, 21, 29, 30, 22, 31, 32, 33, 34, 35, + ]), + (1, 3): numpy.array([ + 36, 37, 40, 41, 42, 71, 72, 38, 43, 44, 45, 73, 74, 49, 50, 51, 77, 78, + 52, 53, 79, 80, 54, 81, 82, 89, 90, 91, 39, 46, 47, 48, 75, 76, 55, 56, + 57, 83, 84, 58, 59, 85, 86, 60, 87, 88, 92, 93, 94, 61, 62, 63, 95, 96, + 64, 65, 97, 98, 66, 99, 100, 107, 108, 109, 67, 68, 101, 102, 69, 103, + 104, 110, 111, 112, 70, 105, 106, 113, 114, 115, 116, 117, 118, 119, + ]), + (1, 4): numpy.array([ + 120, 121, 125, 126, 127, 190, 191, 122, 128, 129, 130, 192, 193, 137, + 138, 139, 198, 199, 140, 141, 200, 201, 142, 202, 203, 216, 217, 218, + 123, 131, 132, 133, 194, 195, 143, 144, 145, 204, 205, 146, 147, 206, + 207, 148, 208, 209, 219, 220, 221, 155, 156, 157, 225, 226, 158, 159, + 227, 228, 160, 229, 230, 249, 250, 251, 161, 162, 231, 232, 163, 233, + 234, 252, 253, 254, 164, 235, 236, 255, 256, 257, 267, 268, 269, 270, + 124, 134, 135, 136, 196, 197, 149, 150, 151, 210, 211, 152, 153, 212, + 213, 154, 214, 215, 222, 223, 224, 165, 166, 167, 237, 238, 168, 169, + 239, 240, 170, 241, 242, 258, 259, 260, 171, 172, 243, 244, 173, 245, + 246, 261, 262, 263, 174, 247, 248, 264, 265, 266, 271, 272, 273, 274, + 175, 176, 177, 275, 276, 178, 179, 277, 278, 180, 279, 280, 295, 296, + 297, 181, 182, 281, 282, 183, 283, 284, 298, 299, 300, 184, 285, 286, + 301, 302, 303, 313, 314, 315, 316, 185, 186, 287, 288, 187, 289, 290, + 304, 305, 306, 188, 291, 292, 307, 308, 309, 317, 318, 319, 320, 189, + 293, 294, 310, 311, 312, 321, 322, 323, 324, 325, 326, 327, 328, 329, + ]) +} + +def eval_xc1(xc_code, rho, spin=0, deriv=1, omega=None): + '''Similar to eval_xc. + Returns an array with the order of derivatives following xcfun convention. + ''' + out = _eval_xc(xc_code, rho, spin, deriv=deriv, omega=omega) + xctype = xc_type(xc_code) + if deriv <= 1: + return out + elif xctype == 'LDA': + return out + elif xctype == 'GGA': + if spin == 0: + return out + else: + idx = [numpy.arange(6)] # up to deriv=1 + for i in range(2, deriv+1): + idx.append(_GGA_SORT[(spin, i)]) + else: # MGGA + if spin == 0: + idx = [numpy.arange(4)] # up to deriv=1 + else: + idx = [numpy.arange(8)] # up to deriv=1 + for i in range(2, deriv+1): + idx.append(_MGGA_SORT[(spin, i)]) + return out[numpy.hstack(idx)] - It differs from the eval_xc method in the derivatives of non-local part. - The eval_xc method returns the XC functional derivatives to sigma - (|\nabla \rho|^2) +def _eval_xc(xc_code, rho, spin=0, deriv=1, omega=None): + assert deriv <= 4 + xctype = xc_type(xc_code) + assert xctype in ('LDA', 'GGA', 'MGGA') - Args: - rho: 2-dimensional or 3-dimensional array - Total density or (spin-up, spin-down) densities (and their - derivatives if GGA or MGGA functionals) on grids + rho = numpy.asarray(rho, order='C', dtype=numpy.double) + if xctype == 'MGGA' and rho.shape[-2] == 6: + rho = numpy.asarray(rho[...,[0,1,2,3,5],:], order='C') - Kwargs: - deriv: int - derivative orders - omega: float - define the exponent in the attenuated Coulomb for RSH functional - ''' hyb, fn_facs = parse_xc(xc_code) if omega is not None: hyb = hyb[:2] + (float(omega),) @@ -1684,47 +1645,60 @@ def eval_xc_eff(xc_code, rho, deriv=1, omega=None): if any([needs_laplacian(fid) for fid in fn_ids]): raise NotImplementedError('laplacian in meta-GGA method') - rho = numpy.asarray(rho, order='C', dtype=numpy.double) + nvar, xlen = xc_deriv._XC_NVAR[xctype, spin] ngrids = rho.shape[-1] + rho = rho.reshape(spin+1,nvar,ngrids) + outlen = lib.comb(xlen+deriv, deriv) + out = numpy.zeros((outlen,ngrids)) + n = len(fn_ids) + if n > 0: + density_threshold = 0 + _itrf.LIBXC_eval_xc(ctypes.c_int(n), + (ctypes.c_int*n)(*fn_ids), + (ctypes.c_double*n)(*facs), + (ctypes.c_double*n)(*omega), + ctypes.c_int(spin), ctypes.c_int(deriv), + ctypes.c_int(nvar), ctypes.c_int(ngrids), + ctypes.c_int(outlen), + rho.ctypes.data_as(ctypes.c_void_p), + out.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(density_threshold)) + return out - xctype = xc_type(xc_code) - assert xctype in ('LDA', 'GGA', 'MGGA') +def eval_xc_eff(xc_code, rho, deriv=1, omega=None): + r'''Returns the derivative tensor against the density parameters - if xctype == 'MGGA' and rho.shape[-2] == 6: - rho = rho[...,[0,1,2,3,5],:] + [density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a] - if xctype == 'LDA': - spin_polarized = rho.ndim >= 2 and rho.shape[0] != 1 - else: - spin_polarized = rho.ndim == 3 and rho.shape[0] != 1 + or spin-polarized density parameters + + [[density_a, (nabla_x)_a, (nabla_y)_a, (nabla_z)_a, tau_a], + [density_b, (nabla_x)_b, (nabla_y)_b, (nabla_z)_b, tau_b]]. + + It differs from the eval_xc method in the derivatives of non-local part. + The eval_xc method returns the XC functional derivatives to sigma + (|\nabla \rho|^2) + + Args: + rho: 2-dimensional or 3-dimensional array + Total density or (spin-up, spin-down) densities (and their + derivatives if GGA or MGGA functionals) on grids + + Kwargs: + deriv: int + derivative orders + omega: float + define the exponent in the attenuated Coulomb for RSH functional + ''' + xctype = xc_type(xc_code) + rho = numpy.asarray(rho, order='C', dtype=numpy.double) + spin_polarized = rho.ndim >= 2 and rho.shape[0] == 2 if spin_polarized: spin = 1 else: spin = 0 - nspin = spin + 1 - - nvar, xlen = xc_deriv._XC_NVAR[xctype, spin] - - rho = rho.reshape(spin+1,nvar,ngrids) - outlen = lib.comb(xlen+deriv, deriv) - out = numpy.zeros((ngrids,outlen)) - n = len(fn_ids) - density_threshold = 0 - if n > 0: - _itrf.LIBXC_eval_xc1(ctypes.c_int(n), - (ctypes.c_int*n)(*fn_ids), - (ctypes.c_double*n)(*facs), - (ctypes.c_double*n)(*omega), - ctypes.c_int(spin), ctypes.c_int(deriv), - ctypes.c_int(nvar), ctypes.c_int(ngrids), - rho.ctypes.data_as(ctypes.c_void_p), - out.ctypes.data_as(ctypes.c_void_p), - ctypes.c_double(density_threshold)) - assert False - out = lib.transpose(out) - xc_tensor = xc_deriv.transform_xc(rho, out, xctype, spin, deriv) - exc = out[0] - return exc, xc_tensor + out = eval_xc1(xc_code, rho, spin, deriv, omega) + return xc_deriv.transform_xc(rho, out, xctype, spin, deriv) def define_xc_(ni, description, xctype='LDA', hyb=0, rsh=(0,0,0)): '''Define XC functional. See also :func:`eval_xc` for the rules of input description. diff --git a/pyscf/dft/test/test_xc_deriv.py b/pyscf/dft/test/test_xc_deriv.py index e4b940110e..ac49d874a5 100644 --- a/pyscf/dft/test/test_xc_deriv.py +++ b/pyscf/dft/test/test_xc_deriv.py @@ -282,6 +282,115 @@ def test_ud2ts(self): self.assertAlmostEqual(abs(xc_deriv.ud2ts(f_ud) - f_ts).max(), 0, 12) self.assertAlmostEqual(abs(xc_deriv.ud2ts(k_ud) - k_ts).max(), 0, 12) + def test_libxc_lda_deriv3(self): + rho1 = rho[:,0].copy() + ni = numint.NumInt() + ref = ni.eval_xc_eff('LDA,', rho1, deriv=3) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 51.36053114469969, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -13.323225829690143, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -6.912554696220437, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1,0].copy() + ni = numint.NumInt() + ref = ni.eval_xc_eff('LDA,', rho1, deriv=3) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 20.21333987261437, 9) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -5.269784014086463, 9) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('LDA,', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -2.7477984980958627, 9) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + def test_libxc_gga_deriv3(self): + rho1 = rho[:,:4].copy() + ni = numint.NumInt() + ref = ni.eval_xc_eff('PBE', rho1, deriv=3) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 61.29042037001073, 3) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -13.896034377219816, 4) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -7.616226587554259, 6) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1,:4].copy() + ni = numint.NumInt() + ref = ni.eval_xc_eff('PBE', rho1, deriv=3) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 26.08081046374974, 3) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -5.559303849017572, 4) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), -3.0715856471099032, 6) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + def test_libxc_mgga_deriv3(self): + rho1 = rho + ni = numint.NumInt() + ref = ni.eval_xc_eff('M06', rho1, deriv=3) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 3461867.985594323, 1) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -19196.865088253828, 3) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), 90.99262909378264, 6) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + rho1 = rho[1] + ni = numint.NumInt() + ref = ni.eval_xc_eff('M06', rho1, deriv=3) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=3) + self.assertAlmostEqual(xc1.sum(), 2506574.915698602, 1) + self.assertAlmostEqual(abs(ref[3] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=2) + self.assertAlmostEqual(xc1.sum(), -9308.64852580393, 3) + self.assertAlmostEqual(abs(ref[2] - xc1).max(), 0, 9) + + xc1 = dft.libxc.eval_xc_eff('M06', rho1, deriv=1) + self.assertAlmostEqual(xc1.sum(), 19.977512805950784, 7) + self.assertAlmostEqual(abs(ref[1] - xc1).max(), 0, 9) + + @unittest.skip(dft.libxc.max_deriv_order('pbe,') <= 3) + def test_libxc_gga_deriv4(self): + rho1 = rho[:,:4].copy() + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=4) + self.assertAlmostEqual(xc1.sum(), -920.135878252819, 4) + + rho1 = rho[1,:4].copy() + xc1 = dft.libxc.eval_xc_eff('PBE', rho1, deriv=4) + self.assertAlmostEqual(xc1.sum(), -869.6617638095072, 4) + @unittest.skip(not hasattr(dft, 'xcfun')) def test_xcfun_lda_deriv3(self): rho1 = rho[:,0].copy() diff --git a/pyscf/dft/xcfun.py b/pyscf/dft/xcfun.py index b94106ff03..69613cbd58 100644 --- a/pyscf/dft/xcfun.py +++ b/pyscf/dft/xcfun.py @@ -823,7 +823,7 @@ def eval_xc(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=Non See also :func:`pyscf.dft.libxc.eval_xc` ''' - outbuf = eval_xc1(xc_code, rho, spin, deriv=deriv, omega=omega) + outbuf = eval_xc1(xc_code, rho, spin, deriv, omega) exc = outbuf[0] vxc = fxc = kxc = None xctype = xc_type(xc_code) @@ -953,8 +953,9 @@ def eval_xc(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=Non outbuf[[XC_D0000030,XC_D0000021,XC_D0000012,XC_D0000003]].T] return exc, vxc, fxc, kxc -def eval_xc1(xc_code, rho, spin=0, relativity=0, deriv=1, omega=None, verbose=None): - '''Similar to eval_xc. Returns an array of deriviates following xcfun convention. +def eval_xc1(xc_code, rho, spin=0, deriv=1, omega=None): + '''Similar to eval_xc. + Returns an array with the order of derivatives following xcfun convention. ''' assert deriv <= MAX_DERIV_ORDER xctype = xc_type(xc_code) @@ -1029,7 +1030,7 @@ def eval_xc_eff(xc_code, rho, deriv=1, omega=None): spin = 1 else: spin = 0 - out = eval_xc1(xc_code, rho, spin, deriv=deriv, omega=omega) + out = eval_xc1(xc_code, rho, spin, deriv, omega) return xc_deriv.transform_xc(rho, out, xctype, spin, deriv) def define_xc_(ni, description, xctype='LDA', hyb=0, rsh=(0,0,0)): diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt index 212bb873ba..eb213f8143 100644 --- a/pyscf/lib/CMakeLists.txt +++ b/pyscf/lib/CMakeLists.txt @@ -204,7 +204,7 @@ if(ENABLE_LIBXC AND BUILD_LIBXC) CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBUILD_SHARED_LIBS=1 -DCMAKE_INSTALL_PREFIX:PATH= -DCMAKE_INSTALL_LIBDIR:PATH=lib - -DENABLE_FORTRAN=0 -DDISABLE_KXC=0 -DDISABLE_LXC=0 + -DENABLE_FORTRAN=0 -DDISABLE_KXC=0 -DDISABLE_LXC=1 -DCMAKE_C_CREATE_SHARED_LIBRARY=${C_LINK_TEMPLATE} -DENABLE_XHOST:STRING=${BUILD_MARCH_NATIVE} -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER} @@ -222,7 +222,7 @@ if(ENABLE_XCFUN AND BUILD_XCFUN) PREFIX ${PROJECT_BINARY_DIR}/deps INSTALL_DIR ${PROJECT_SOURCE_DIR}/deps CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBUILD_SHARED_LIBS=1 - -DXCFUN_MAX_ORDER=5 -DENABLE_TESTALL=0 + -DXCFUN_MAX_ORDER=3 -DENABLE_TESTALL=0 -DCMAKE_INSTALL_PREFIX:PATH= -DCMAKE_CXX_CREATE_SHARED_LIBRARY=${CXX_LINK_TEMPLATE} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/pyscf/lib/dft/CMakeLists.txt b/pyscf/lib/dft/CMakeLists.txt index d1c1a201a8..6b01b7eca0 100644 --- a/pyscf/lib/dft/CMakeLists.txt +++ b/pyscf/lib/dft/CMakeLists.txt @@ -25,7 +25,7 @@ target_link_libraries(dft cvhf cgto cint np_helper ${BLAS_LIBRARIES} ${OPENMP_C_ if(ENABLE_LIBXC) -add_library(xc_itrf SHARED libxc_itrf1.c) +add_library(xc_itrf SHARED libxc_itrf.c) set_target_properties(xc_itrf PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}) target_link_libraries(xc_itrf xc ${OPENMP_C_PROPERTIES}) diff --git a/pyscf/lib/dft/libxc_itrf.c b/pyscf/lib/dft/libxc_itrf.c index 60641e5e43..76d7497980 100644 --- a/pyscf/lib/dft/libxc_itrf.c +++ b/pyscf/lib/dft/libxc_itrf.c @@ -73,19 +73,108 @@ v3sigma(10) = (uu_uu_uu, uu_uu_ud, uu_uu_dd, uu_ud_ud, uu_ud_dd, uu_dd_dd, ud_ud_ud, ud_ud_dd, ud_dd_dd, dd_dd_dd) */ + +#define LDA_NVAR 1 +#define GGA_NVAR 4 +#define MGGA_NVAR 5 + /* * rho_u/rho_d = (den,grad_x,grad_y,grad_z,laplacian,tau) * In spin restricted case (spin == 1), rho_u is assumed to be the * spin-free quantities, rho_d is not used. */ -static void _eval_xc(xc_func_type *func_x, int spin, int np, - double *rho_u, double *rho_d, - double *ex, double *vxc, double *fxc, double *kxc) +static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) { int i; - double *rho, *sigma, *lapl, *tau; + double *sigma, *tau; double *gxu, *gyu, *gzu, *gxd, *gyd, *gzd; - double *lapl_u, *lapl_d, *tau_u, *tau_d; + double *tau_u, *tau_d; + double *rho_d = rho_u + np * nvar; + + switch (nvar) { + case LDA_NVAR: + if (spin == 1) { + for (i = 0; i < np; i++) { + rho[i*2+0] = rho_u[i]; + rho[i*2+1] = rho_d[i]; + } + } else { + for (i = 0; i < np; i++) { + rho[i] = rho_u[i]; + } + } + break; + case GGA_NVAR: + if (spin == 1) { + sigma = rho + np * 2; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + gxd = rho_d + np; + gyd = rho_d + np * 2; + gzd = rho_d + np * 3; + for (i = 0; i < np; i++) { + rho[i*2+0] = rho_u[i]; + rho[i*2+1] = rho_d[i]; + sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; + sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; + } + } else { + sigma = rho + np; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + for (i = 0; i < np; i++) { + rho[i] = rho_u[i]; + sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + } + } + break; + case MGGA_NVAR: + if (spin == 1) { + sigma = rho + np * 2; + tau = sigma + np * 3; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + gxd = rho_d + np; + gyd = rho_d + np * 2; + gzd = rho_d + np * 3; + tau_u = rho_u + np * 4; + tau_d = rho_d + np * 4; + for (i = 0; i < np; i++) { + rho[i*2+0] = rho_u[i]; + rho[i*2+1] = rho_d[i]; + tau[i*2+0] = tau_u[i]; + tau[i*2+1] = tau_d[i]; + } + for (i = 0; i < np; i++) { + sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; + sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; + } + } else { + sigma = rho + np; + tau = sigma + np; + gxu = rho_u + np; + gyu = rho_u + np * 2; + gzu = rho_u + np * 3; + tau_u = rho_u + np * 4; + for (i = 0; i < np; i++) { + rho[i] = rho_u[i]; + sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + tau[i] = tau_u[i]; + } + } + break; + } +} +static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, + double *rho, double *exc) +{ + double *sigma, *tau; + double *lapl = rho; double *vrho = NULL; double *vsigma = NULL; double *vlapl = NULL; @@ -120,6 +209,41 @@ static void _eval_xc(xc_func_type *func_x, int spin, int np, double *v3lapl2tau = NULL; double *v3lapltau2 = NULL; double *v3tau3 = NULL; + double *v4rho4 = NULL; + double *v4rho3sigma = NULL; + double *v4rho3lapl = NULL; + double *v4rho3tau = NULL; + double *v4rho2sigma2 = NULL; + double *v4rho2sigmalapl = NULL; + double *v4rho2sigmatau = NULL; + double *v4rho2lapl2 = NULL; + double *v4rho2lapltau = NULL; + double *v4rho2tau2 = NULL; + double *v4rhosigma3 = NULL; + double *v4rhosigma2lapl = NULL; + double *v4rhosigma2tau = NULL; + double *v4rhosigmalapl2 = NULL; + double *v4rhosigmalapltau= NULL; + double *v4rhosigmatau2 = NULL; + double *v4rholapl3 = NULL; + double *v4rholapl2tau = NULL; + double *v4rholapltau2 = NULL; + double *v4rhotau3 = NULL; + double *v4sigma4 = NULL; + double *v4sigma3lapl = NULL; + double *v4sigma3tau = NULL; + double *v4sigma2lapl2 = NULL; + double *v4sigma2lapltau = NULL; + double *v4sigma2tau2 = NULL; + double *v4sigmalapl3 = NULL; + double *v4sigmalapl2tau = NULL; + double *v4sigmalapltau2 = NULL; + double *v4sigmatau3 = NULL; + double *v4lapl4 = NULL; + double *v4lapl3tau = NULL; + double *v4lapl2tau2 = NULL; + double *v4lapltau3 = NULL; + double *v4tau4 = NULL; switch (func_x->info->family) { case XC_FAMILY_LDA: @@ -129,231 +253,207 @@ static void _eval_xc(xc_func_type *func_x, int spin, int np, // ex is the energy density // NOTE libxc library added ex/ec into vrho/vcrho // vrho = rho d ex/d rho + ex, see work_lda.c:L73 - if (spin == XC_POLARIZED) { - rho = malloc(sizeof(double) * np*2); - for (i = 0; i < np; i++) { - rho[i*2+0] = rho_u[i]; - rho[i*2+1] = rho_d[i]; + if (spin == 1) { + if (deriv > 0) { + vrho = exc + np; + } + if (deriv > 1) { + v2rho2 = vrho + np * 2; + } + if (deriv > 2) { + v3rho3 = v2rho2 + np * 3; + } + if (deriv > 3) { + v4rho4 = v3rho3 + np * 4; } - xc_lda_exc_vxc_fxc_kxc(func_x, np, rho, ex, vxc, fxc, kxc); - free(rho); } else { - rho = rho_u; - xc_lda_exc_vxc_fxc_kxc(func_x, np, rho, ex, vxc, fxc, kxc); + if (deriv > 0) { + vrho = exc + np; + } + if (deriv > 1) { + v2rho2 = vrho + np; + } + if (deriv > 2) { + v3rho3 = v2rho2 + np; + } + if (deriv > 3) { + v4rho4 = v3rho3 + np; + } } + xc_lda(func_x, np, rho, exc, vrho, v2rho2, v3rho3, v4rho4); break; case XC_FAMILY_GGA: #ifdef XC_FAMILY_HYB_GGA case XC_FAMILY_HYB_GGA: #endif - if (spin == XC_POLARIZED) { - rho = malloc(sizeof(double) * np * 5); + if (spin == 1) { sigma = rho + np * 2; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; - for (i = 0; i < np; i++) { - rho[i*2+0] = rho_u[i]; - rho[i*2+1] = rho_d[i]; - sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; - sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; - sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; - } - if (vxc != NULL) { - vrho = vxc; - vsigma = vxc + np * 2; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np * 2; } - if (fxc != NULL) { - v2rho2 = fxc; - v2rhosigma = fxc + np * 3; + if (deriv > 1) { + v2rho2 = vsigma + np * 3; + v2rhosigma = v2rho2 + np * 3; v2sigma2 = v2rhosigma + np * 6; // np*6 } - if (kxc != NULL) { - v3rho3 = kxc; - v3rho2sigma = kxc + np * 4; + if (deriv > 2) { + v3rho3 = v2sigma2 + np * 6; + v3rho2sigma = v3rho3 + np * 4; v3rhosigma2 = v3rho2sigma + np * 9; v3sigma3 = v3rhosigma2 + np * 12; // np*10 } - xc_gga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, ex, - vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, - v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3); - free(rho); - } else { - rho = rho_u; - sigma = malloc(sizeof(double) * np); - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - for (i = 0; i < np; i++) { - sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + if (deriv > 3) { + v4rho4 = v3sigma3 + np * 10 ; + v4rho3sigma = v4rho4 + np * 5 ; + v4rho2sigma2 = v4rho3sigma + np * 4*3 ; + v4rhosigma3 = v4rho2sigma2 + np * 3*6 ; + v4sigma4 = v4rhosigma3 + np * 2*10; } - if (vxc != NULL) { - vrho = vxc; - vsigma = vxc + np; + } else { + sigma = rho + np; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np; } - if (fxc != NULL) { - v2rho2 = fxc; - v2rhosigma = fxc + np; + if (deriv > 1) { + v2rho2 = vsigma + np; + v2rhosigma = v2rho2 + np; v2sigma2 = v2rhosigma + np; } - if (kxc != NULL) { - v3rho3 = kxc; + if (deriv > 2) { + v3rho3 = v2sigma2 + np; v3rho2sigma = v3rho3 + np; v3rhosigma2 = v3rho2sigma + np; v3sigma3 = v3rhosigma2 + np; } - xc_gga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, ex, - vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, - v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3); - free(sigma); + if (deriv > 3) { + v4rho4 = v3sigma3 + np; + v4rho3sigma = v4rho4 + np; + v4rho2sigma2 = v4rho3sigma + np; + v4rhosigma3 = v4rho2sigma2 + np; + v4sigma4 = v4rhosigma3 + np; + } } + xc_gga(func_x, np, rho, sigma, + exc, vrho, vsigma, + v2rho2, v2rhosigma, v2sigma2, + v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, + v4rho4, v4rho3sigma, v4rho2sigma2, v4rhosigma3, v4sigma4); break; case XC_FAMILY_MGGA: #ifdef XC_FAMILY_HYB_MGGA case XC_FAMILY_HYB_MGGA: #endif - if (spin == XC_POLARIZED) { - rho = malloc(sizeof(double) * np * 9); + if (spin == 1) { sigma = rho + np * 2; - lapl = sigma + np * 3; - tau = lapl + np * 2; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; - lapl_u = rho_u + np * 4; - tau_u = rho_u + np * 5; - lapl_d = rho_d + np * 4; - tau_d = rho_d + np * 5; - for (i = 0; i < np; i++) { - rho[i*2+0] = rho_u[i]; - rho[i*2+1] = rho_d[i]; - lapl[i*2+0] = lapl_u[i]; - lapl[i*2+1] = lapl_d[i]; - tau[i*2+0] = tau_u[i]; - tau[i*2+1] = tau_d[i]; + tau = sigma + np * 3; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np * 2; + vtau = vsigma + np * 3; } - for (i = 0; i < np; i++) { - sigma[i*3+0] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; - sigma[i*3+1] = gxu[i]*gxd[i] + gyu[i]*gyd[i] + gzu[i]*gzd[i]; - sigma[i*3+2] = gxd[i]*gxd[i] + gyd[i]*gyd[i] + gzd[i]*gzd[i]; - } - if (vxc != NULL) { - vrho = vxc; - vsigma = vxc + np * 2; - vlapl = vsigma + np * 3; - vtau = vlapl + np * 2; // np*2 - } - if (fxc != NULL) { - v2rho2 = fxc; + if (deriv > 1) { + v2rho2 = vtau + np * 2; v2rhosigma = v2rho2 + np * 3; v2sigma2 = v2rhosigma + np * 6; - v2lapl2 = v2sigma2 + np * 6; - v2tau2 = v2lapl2 + np * 3; - v2rholapl = v2tau2 + np * 3; - v2rhotau = v2rholapl + np * 4; - v2lapltau = v2rhotau + np * 4; - v2sigmalapl = v2lapltau + np * 4; - v2sigmatau = v2sigmalapl + np * 6; - } - if (kxc != NULL) { - v3rho3 = kxc; + v2rhotau = v2sigma2 + np * 6; + v2sigmatau = v2rhotau + np * 4; + v2tau2 = v2sigmatau + np * 6; + } + if (deriv > 2) { + v3rho3 = v2tau2 + np * 3 ; v3rho2sigma = v3rho3 + np * 4 ; v3rhosigma2 = v3rho2sigma + np * 9 ; v3sigma3 = v3rhosigma2 + np * 12; - v3rho2lapl = v3sigma3 + np * 10; - v3rho2tau = v3rho2lapl + np * 6 ; - v3rhosigmalapl = v3rho2tau + np * 6 ; - v3rhosigmatau = v3rhosigmalapl + np * 12; - v3rholapl2 = v3rhosigmatau + np * 12; - v3rholapltau = v3rholapl2 + np * 6 ; - v3rhotau2 = v3rholapltau + np * 8 ; - v3sigma2lapl = v3rhotau2 + np * 6 ; - v3sigma2tau = v3sigma2lapl + np * 12; - v3sigmalapl2 = v3sigma2tau + np * 12; - v3sigmalapltau = v3sigmalapl2 + np * 9 ; - v3sigmatau2 = v3sigmalapltau + np * 12; - v3lapl3 = v3sigmatau2 + np * 9 ; - v3lapl2tau = v3lapl3 + np * 4 ; - v3lapltau2 = v3lapl2tau + np * 6 ; - v3tau3 = v3lapltau2 + np * 6 ; - } - xc_mgga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, lapl, tau, ex, - vrho, vsigma, vlapl, vtau, - v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, - v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, - v3rho3, v3rho2sigma, v3rho2lapl, v3rho2tau, v3rhosigma2, - v3rhosigmalapl, v3rhosigmatau, v3rholapl2, v3rholapltau, - v3rhotau2, v3sigma3, v3sigma2lapl, v3sigma2tau, - v3sigmalapl2, v3sigmalapltau, v3sigmatau2, v3lapl3, - v3lapl2tau, v3lapltau2, v3tau3); - free(rho); - } else { - rho = rho_u; - sigma = malloc(sizeof(double) * np); - lapl = rho_u + np * 4; - tau = rho_u + np * 5; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - for (i = 0; i < np; i++) { - sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; + v3rho2tau = v3sigma3 + np * 10; + v3rhosigmatau = v3rho2tau + np * 6 ; + v3rhotau2 = v3rhosigmatau + np * 12; + v3sigma2tau = v3rhotau2 + np * 6 ; + v3sigmatau2 = v3sigma2tau + np * 12; + v3tau3 = v3sigmatau2 + np * 9 ; } - if (vxc != NULL) { - vsigma = vxc + np; - vlapl = vsigma + np; - vtau = vlapl + np; + if (deriv > 3) { + v4rho4 = v3tau3 + np * 4 ; + v4rho3sigma = v4rho4 + np * 5 ; + v4rho2sigma2 = v4rho3sigma + np * 4*3 ; + v4rhosigma3 = v4rho2sigma2 + np * 3*6 ; + v4sigma4 = v4rhosigma3 + np * 2*10 ; + v4rho3tau = v4sigma4 + np * 15 ; + v4rho2sigmatau = v4rho3tau + np * 4*2 ; + v4rho2tau2 = v4rho2sigmatau + np * 3*3*2; + v4rhosigma2tau = v4rho2tau2 + np * 3*3 ; + v4rhosigmatau2 = v4rhosigma2tau + np * 2*6*2; + v4rhotau3 = v4rhosigmatau2 + np * 2*3*3; + v4sigma3tau = v4rhotau3 + np * 2*4 ; + v4sigma2tau2 = v4sigma3tau + np * 10*2 ; + v4sigmatau3 = v4sigma2tau2 + np * 6*3 ; + v4tau4 = v4sigmatau3 + np * 3*4 ; } - if (fxc != NULL) { - v2rho2 = fxc; + } else { + sigma = rho + np; + tau = sigma + np; + if (deriv > 0) { + vrho = exc + np; + vsigma = vrho + np; + vtau = vsigma + np; + } + if (deriv > 1) { + v2rho2 = vtau + np; v2rhosigma = v2rho2 + np; v2sigma2 = v2rhosigma + np; - v2lapl2 = v2sigma2 + np; - v2tau2 = v2lapl2 + np; - v2rholapl = v2tau2 + np; - v2rhotau = v2rholapl + np; - v2lapltau = v2rhotau + np; - v2sigmalapl = v2lapltau + np; - v2sigmatau = v2sigmalapl + np; - } - if (kxc != NULL) { - v3rho3 = kxc; + v2rhotau = v2sigma2 + np; + v2sigmatau = v2rhotau + np; + v2tau2 = v2sigmatau + np; + } + if (deriv > 2) { + v3rho3 = v2tau2 + np; v3rho2sigma = v3rho3 + np; v3rhosigma2 = v3rho2sigma + np; v3sigma3 = v3rhosigma2 + np; - v3rho2lapl = v3sigma3 + np; - v3rho2tau = v3rho2lapl + np; - v3rhosigmalapl = v3rho2tau + np; - v3rhosigmatau = v3rhosigmalapl + np; - v3rholapl2 = v3rhosigmatau + np; - v3rholapltau = v3rholapl2 + np; - v3rhotau2 = v3rholapltau + np; - v3sigma2lapl = v3rhotau2 + np; - v3sigma2tau = v3sigma2lapl + np; - v3sigmalapl2 = v3sigma2tau + np; - v3sigmalapltau = v3sigmalapl2 + np; - v3sigmatau2 = v3sigmalapltau + np; - v3lapl3 = v3sigmatau2 + np; - v3lapl2tau = v3lapl3 + np; - v3lapltau2 = v3lapl2tau + np; - v3tau3 = v3lapltau2 + np; - } - xc_mgga_exc_vxc_fxc_kxc(func_x, np, rho, sigma, lapl, tau, ex, - vxc, vsigma, vlapl, vtau, - v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, - v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, - v3rho3, v3rho2sigma, v3rho2lapl, v3rho2tau, v3rhosigma2, - v3rhosigmalapl, v3rhosigmatau, v3rholapl2, v3rholapltau, - v3rhotau2, v3sigma3, v3sigma2lapl, v3sigma2tau, - v3sigmalapl2, v3sigmalapltau, v3sigmatau2, v3lapl3, - v3lapl2tau, v3lapltau2, v3tau3); - free(sigma); + v3rho2tau = v3sigma3 + np; + v3rhosigmatau = v3rho2tau + np; + v3rhotau2 = v3rhosigmatau + np; + v3sigma2tau = v3rhotau2 + np; + v3sigmatau2 = v3sigma2tau + np; + v3tau3 = v3sigmatau2 + np; + } + if (deriv > 3) { + v4rho4 = v3tau3 + np; + v4rho3sigma = v4rho4 + np; + v4rho2sigma2 = v4rho3sigma + np; + v4rhosigma3 = v4rho2sigma2 + np; + v4sigma4 = v4rhosigma3 + np; + v4rho3tau = v4sigma4 + np; + v4rho2sigmatau = v4rho3tau + np; + v4rho2tau2 = v4rho2sigmatau + np; + v4rhosigma2tau = v4rho2tau2 + np; + v4rhosigmatau2 = v4rhosigma2tau + np; + v4rhotau3 = v4rhosigmatau2 + np; + v4sigma3tau = v4rhotau3 + np; + v4sigma2tau2 = v4sigma3tau + np; + v4sigmatau3 = v4sigma2tau2 + np; + v4tau4 = v4sigmatau3 + np; + } } + xc_mgga(func_x, np, rho, sigma, lapl, tau, + exc, vrho, vsigma, vlapl, vtau, + v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, + v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, + v3rho3, v3rho2sigma, v3rho2lapl, v3rho2tau, v3rhosigma2, + v3rhosigmalapl, v3rhosigmatau, v3rholapl2, v3rholapltau, + v3rhotau2, v3sigma3, v3sigma2lapl, v3sigma2tau, + v3sigmalapl2, v3sigmalapltau, v3sigmatau2, v3lapl3, + v3lapl2tau, v3lapltau2, v3tau3, + v4rho4, v4rho3sigma, v4rho3lapl, v4rho3tau, v4rho2sigma2, + v4rho2sigmalapl, v4rho2sigmatau, v4rho2lapl2, v4rho2lapltau, + v4rho2tau2, v4rhosigma3, v4rhosigma2lapl, v4rhosigma2tau, + v4rhosigmalapl2, v4rhosigmalapltau, v4rhosigmatau2, + v4rholapl3, v4rholapl2tau, v4rholapltau2, v4rhotau3, + v4sigma4, v4sigma3lapl, v4sigma3tau, v4sigma2lapl2, + v4sigma2lapltau, v4sigma2tau2, v4sigmalapl3, v4sigmalapl2tau, + v4sigmalapltau2, v4sigmatau3, v4lapl4, v4lapl3tau, + v4lapl2tau2, v4lapltau3, v4tau4); break; default: fprintf(stderr, "functional %d '%s' is not implemented\n", @@ -582,77 +682,26 @@ int LIBXC_xc_type(int fn_id) return type; } -static int xc_output_length(int nvar, int deriv) -{ - int i; - int len = 1.; - for (i = 1; i <= nvar; i++) { - len *= deriv + i; - len /= i; - } - return len; -} - -// return value 0 means no functional needs to be evaluated. -int LIBXC_input_length(int nfn, int *fn_id, double *fac, int spin) -{ - int i; - int nvar = 0; - xc_func_type func; - for (i = 0; i < nfn; i++) { - if (xc_func_init(&func, fn_id[i], spin) != 0) { - fprintf(stderr, "XC functional %d not found\n", - fn_id[i]); - raise_error -1; - } - if (spin == XC_POLARIZED) { - switch (func.info->family) { - case XC_FAMILY_LDA: -#ifdef XC_FAMILY_HYB_LDA - case XC_FAMILY_HYB_LDA: -#endif - nvar = MAX(nvar, 2); - break; - case XC_FAMILY_GGA: -#ifdef XC_FAMILY_HYB_GGA - case XC_FAMILY_HYB_GGA: -#endif - nvar = MAX(nvar, 5); - break; - case XC_FAMILY_MGGA: -#ifdef XC_FAMILY_HYB_MGGA - case XC_FAMILY_HYB_MGGA: -#endif - nvar = MAX(nvar, 9); - } - } else { - switch (func.info->family) { - case XC_FAMILY_LDA: -#ifdef XC_FAMILY_HYB_LDA - case XC_FAMILY_HYB_LDA: -#endif - nvar = MAX(nvar, 1); - break; - case XC_FAMILY_GGA: -#ifdef XC_FAMILY_HYB_GGA - case XC_FAMILY_HYB_GGA: -#endif - nvar = MAX(nvar, 2); - break; - case XC_FAMILY_MGGA: -#ifdef XC_FAMILY_HYB_MGGA - case XC_FAMILY_HYB_MGGA: -#endif - nvar = MAX(nvar, 4); - } - } - xc_func_end(&func); - } - return nvar; -} +//static int xc_output_length(int nvar, int deriv) +//{ +// int i; +// int len = 1; +// for (i = 1; i <= nvar; i++) { +// len *= deriv + i; +// len /= i; +// } +// return len; +//} +// offsets = [xc_output_length(nvar, i) for i in range(deriv+1) +// for nvar in [1,2,3,5,7]] +static int xc_nvar1_offsets[] = {0, 1, 2, 3, 4, 5}; +static int xc_nvar2_offsets[] = {0, 1, 3, 6, 10, 15}; +static int xc_nvar3_offsets[] = {0, 1, 4, 10, 20, 35}; +static int xc_nvar5_offsets[] = {0, 1, 6, 21, 56, 126}; +static int xc_nvar7_offsets[] = {0, 1, 8, 36, 120, 330}; static void axpy(double *dst, double *src, double fac, - int np, int ndst, int nsrc) + int np, int nsrc) { int i, j; for (j = 0; j < nsrc; j++) { @@ -661,130 +710,106 @@ static void axpy(double *dst, double *src, double fac, } } } - -static void merge_xc(double *dst, double *ebuf, double *vbuf, - double *fbuf, double *kbuf, double fac, - int np, int ndst, int nvar, int spin, int type) +static int vseg1[] = {2, 3, 2}; +static int fseg1[] = {3, 6, 6, 4, 6, 3}; +static int kseg1[] = {4, 9, 12, 10, 6, 12, 6, 12, 9, 4}; +static int lseg1[] = {5, 12, 18, 20, 15, 8, 18, 9, 24, 18, 8, 20, 18, 12, 5}; +static int *seg1[] = {NULL, vseg1, fseg1, kseg1, lseg1}; + +static void merge_xc(double *dst, double *ebuf, double fac, + int spin, int deriv, int nvar, int np, int outlen, int type) { - int seg0 [] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - // LDA | | - // GGA | | - // MGGA | | - int vseg1[] = {2, 3, 2, 2}; - // LDA | | - // GGA | | - // MGGA | | - int fseg1[] = {3, 6, 6, 3, 3, 4, 4, 4, 6, 6}; - // LDA | | - // GGA | | - // MGGA | | - int kseg1[] = {4, 9,12,10, 6, 6,12,12, 6, 8, 6,12,12, 9,12, 9, 4, 6, 6, 4}; - int vsegtot, fsegtot, ksegtot; - int *vseg, *fseg, *kseg; - if (spin == XC_POLARIZED) { - vseg = vseg1; - fseg = fseg1; - kseg = kseg1; - } else { - vseg = seg0; - fseg = seg0; - kseg = seg0; + int order, nsrc, i; + for (i = 0; i < np; i++) { + dst[i] += fac * ebuf[i]; } + int *offsets0, *offsets1; + double *pout, *pin; switch (type) { case XC_FAMILY_GGA: #ifdef XC_FAMILY_HYB_GGA case XC_FAMILY_HYB_GGA: #endif - vsegtot = 2; - fsegtot = 3; - ksegtot = 4; + offsets0 = xc_nvar2_offsets; break; case XC_FAMILY_MGGA: #ifdef XC_FAMILY_HYB_MGGA case XC_FAMILY_HYB_MGGA: #endif - vsegtot = 4; - fsegtot = 10; - ksegtot = 20; // not supported + offsets0 = xc_nvar3_offsets; break; default: //case XC_FAMILY_LDA: - vsegtot = 1; - fsegtot = 1; - ksegtot = 1; + offsets0 = xc_nvar1_offsets; } - int i; - size_t offset; - axpy(dst, ebuf, fac, np, ndst, 1); - - if (vbuf != NULL) { - offset = np; - for (i = 0; i < vsegtot; i++) { - axpy(dst+offset, vbuf, fac, np, ndst, vseg[i]); - offset += np * vseg[i]; - vbuf += np * vseg[i]; + if (spin == 0) { + switch (nvar) { + case LDA_NVAR: + offsets1 = xc_nvar1_offsets; + break; + case GGA_NVAR: + offsets1 = xc_nvar2_offsets; + break; + default: // MGGA_NVAR + offsets1 = xc_nvar3_offsets; + break; } - } - if (fbuf != NULL) { - offset = np * xc_output_length(nvar, 1); - for (i = 0; i < fsegtot; i++) { - axpy(dst+offset, fbuf, fac, np, ndst, fseg[i]); - offset += np * fseg[i]; - fbuf += np * fseg[i]; + for (order = 1; order <= deriv; order++) { + pout = dst + offsets1[order] * np; + pin = ebuf + offsets0[order] * np; + nsrc = offsets0[order+1] - offsets0[order]; + for (i = 0; i < np * nsrc; i++) { + pout[i] += fac * pin[i]; + } } + return; } - if (kbuf != NULL) { - offset = np * xc_output_length(nvar, 2); - for (i = 0; i < ksegtot; i++) { - axpy(dst+offset, kbuf, fac, np, ndst, kseg[i]); - offset += np * kseg[i]; - kbuf += np * kseg[i]; + switch (nvar) { + case LDA_NVAR: + offsets1 = xc_nvar2_offsets; + break; + case GGA_NVAR: + offsets1 = xc_nvar5_offsets; + break; + default: // MGGA_NVAR + offsets1 = xc_nvar7_offsets; + break; + } + + int terms; + int *pseg1; + pin = ebuf + np; + for (order = 1; order <= deriv; order++) { + pseg1 = seg1[order]; + pout = dst + offsets1[order] * np; + terms = offsets0[order+1] - offsets0[order]; + for (i = 0; i < terms; i++) { + nsrc = pseg1[i]; + axpy(pout, pin, fac, np, nsrc); + pin += np * nsrc; + pout += np * nsrc; } } } // omega is the range separation parameter mu in xcfun void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, - int spin, int deriv, int np, - double *rho_u, double *rho_d, double *output, - double dens_threshold) + int spin, int deriv, int nvar, int np, int outlen, + double *rho_u, double *output, double dens_threshold) { - assert(deriv <= 3); - int nvar = LIBXC_input_length(nfn, fn_id, fac, spin); - if (nvar == 0) { // No functional needs to be evaluated. - return; - } - - int outlen = xc_output_length(nvar, deriv); - // output buffer is zeroed in the Python caller - //NPdset0(output, np*outlen); - - double *ebuf = malloc(sizeof(double) * np); - double *vbuf = NULL; - double *fbuf = NULL; - double *kbuf = NULL; - if (deriv > 0) { - vbuf = malloc(sizeof(double) * np*9); - } - if (deriv > 1) { - fbuf = malloc(sizeof(double) * np*45); - } - if (deriv > 2) { - if (spin == XC_POLARIZED) { // spin-unresctricted MGGA - // FIXME *220 in xcfun - kbuf = malloc(sizeof(double) * np*165); - } else { // spin-resctricted MGGA - kbuf = malloc(sizeof(double) * np*20); - } - } + assert(deriv <= 4); + double *ebuf = malloc(sizeof(double) * np * outlen); + double *rho = malloc(sizeof(double) * np * 7); + _eval_rho(rho, rho_u, spin, nvar, np); + int nspin = spin + 1; int i, j; xc_func_type func; for (i = 0; i < nfn; i++) { - if (xc_func_init(&func, fn_id[i], spin) != 0) { + if (xc_func_init(&func, fn_id[i], nspin) != 0) { fprintf(stderr, "XC functional %d not found\n", fn_id[i]); raise_error; @@ -832,22 +857,13 @@ void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, #if defined XC_SET_RELATIVITY xc_lda_x_set_params(&func, relativity); #endif - _eval_xc(&func, spin, np, rho_u, rho_d, ebuf, vbuf, fbuf, kbuf); - merge_xc(output, ebuf, vbuf, fbuf, kbuf, fac[i], - np, outlen, nvar, spin, func.info->family); + _eval_xc(&func, spin, deriv, np, rho, ebuf); + merge_xc(output, ebuf, fac[i], + spin, deriv, nvar, np, outlen, func.info->family); xc_func_end(&func); } - free(ebuf); - if (deriv > 0) { - free(vbuf); - } - if (deriv > 1) { - free(fbuf); - } - if (deriv > 2) { - free(kbuf); - } + free(rho); } int LIBXC_max_deriv_order(int xc_id)