diff --git a/examples/pbc/27-multigrid.py b/examples/pbc/27-multigrid.py index f1b1f85a95..6809f33e3d 100644 --- a/examples/pbc/27-multigrid.py +++ b/examples/pbc/27-multigrid.py @@ -31,9 +31,9 @@ # # There are two ways to enable multigrid numerical integration # -# Method 1: use multigrid.multigrid function to update SCF object +# Method 1: use multigrid.multigrid_fftdf function to update SCF object # -mf = multigrid.multigrid(mf) +mf = multigrid.multigrid_fftdf(mf) mf.kernel() # diff --git a/examples/pbc/27-multigrid2.py b/examples/pbc/27-multigrid2.py new file mode 100644 index 0000000000..d73cd8fe50 --- /dev/null +++ b/examples/pbc/27-multigrid2.py @@ -0,0 +1,238 @@ +#from os.path import expanduser +#home_dir = expanduser("~") +#f = open(home_dir+'/.pyscf_conf.py', 'a') +# use FFTW for fft, this requires to compile the FFTW library +# cmake -DENABLE_FFTW=ON -DBUILD_FFTW=ON +#f.write('pbc_tools_pbc_fft_engine=\'FFTW\'') +#f.close() + +import numpy +import pyscf +from pyscf import lib +from pyscf import pbc +from pyscf.pbc import gto as pbcgto +from pyscf.pbc import dft as pbcdft +from pyscf.pbc.dft import multigrid + +cell=pbcgto.Cell() + +#Molecule +boxlen=12.4138 +cell.a=numpy.array([[boxlen,0.0,0.0],[0.0,boxlen,0.0],[0.0,0.0,boxlen]]) +cell.atom=""" +O 12.235322 1.376642 10.869880 +O 6.445390 3.706940 8.650794 +O 0.085977 2.181322 8.276663 +O 12.052554 2.671366 2.147199 +O 12.250036 4.190930 12.092014 +O 7.187422 0.959062 4.733469 +O 8.346457 7.210040 4.667644 +O 12.361546 11.527875 8.106887 +O 3.299984 4.440816 9.193275 +O 2.855829 3.759909 6.552815 +O 1.392494 6.362753 0.586172 +O 1.858645 8.694013 2.068738 +O 3.770231 12.094519 8.652183 +O 6.432508 3.669828 2.772418 +O 1.998724 1.820217 4.876440 +O 8.248581 2.404730 6.931303 +O 5.753814 3.360029 12.461534 +O 11.322212 5.649239 2.236798 +O 4.277318 2.113956 10.590808 +O 5.405015 3.349247 5.484702 +O 6.493278 11.869958 0.684912 +O 3.275250 2.346576 2.425241 +O 7.981003 6.352512 7.507970 +O 5.985990 6.512854 12.194648 +O 10.636714 11.856872 12.209540 +O 9.312283 3.670384 3.508594 +O 1.106885 5.830301 6.638695 +O 8.008007 3.326363 10.869818 +O 12.403000 9.687405 11.761901 +O 4.219782 7.085315 8.153470 +O 3.781557 8.203821 11.563272 +O 11.088898 4.532081 7.809475 +O 10.387548 8.408890 1.017882 +O 1.979016 6.418091 10.374159 +O 4.660547 0.549666 5.617403 +O 8.745880 12.256257 8.089383 +O 2.662041 10.489890 0.092980 +O 7.241661 10.471815 4.226946 +O 2.276827 0.276647 10.810417 +O 8.887733 0.946877 1.333885 +O 1.943554 8.088552 7.567650 +O 9.667942 8.056759 9.868847 +O 10.905491 8.339638 6.484782 +O 3.507733 4.862402 1.557439 +O 8.010457 8.642846 12.055969 +O 8.374446 10.035932 6.690309 +O 5.635247 6.076875 5.563993 +O 11.728434 1.601906 5.079475 +O 9.771134 9.814114 3.548703 +O 3.944355 10.563450 4.687536 +O 0.890357 6.382287 4.065806 +O 6.862447 6.425182 2.488202 +O 3.813963 6.595122 3.762649 +O 6.562448 8.295463 8.807182 +O 9.809455 0.143325 3.886553 +O 4.117074 11.661225 2.221679 +O 5.295317 8.735561 2.763183 +O 9.971999 5.379339 5.340378 +O 12.254708 8.643874 3.957116 +O 2.344274 10.761274 6.829162 +O 7.013416 0.643488 10.518797 +O 5.152349 10.233624 10.359388 +O 11.184278 5.884064 10.298279 +O 12.252335 8.974142 9.070831 +H 12.415139 2.233125 11.257611 +H 11.922476 1.573799 9.986994 +H 5.608192 3.371543 8.971482 +H 6.731226 3.060851 8.004962 +H -0.169205 1.565594 7.589645 +H -0.455440 2.954771 8.118939 +H 12.125168 2.826463 1.205443 +H 12.888828 2.969761 2.504745 +H 11.553255 4.386613 11.465566 +H 12.818281 4.960808 12.067151 +H 7.049495 1.772344 4.247898 +H 6.353019 0.798145 5.174047 +H 7.781850 7.384852 5.420566 +H 9.103203 6.754017 5.035898 +H 12.771232 11.788645 8.931744 +H 12.018035 10.650652 8.276334 +H 3.557245 3.792529 9.848846 +H 2.543844 4.884102 9.577958 +H 2.320235 4.521250 6.329813 +H 2.872128 3.749963 7.509824 +H 1.209685 7.121391 1.140501 +H 2.238885 6.038801 0.894245 +H 2.763109 8.856353 2.336735 +H 1.329379 9.047369 2.783755 +H 4.315639 11.533388 9.203449 +H 3.098742 12.433043 9.244412 +H 5.987369 3.448974 3.590530 +H 5.813096 3.419344 2.086985 +H 1.057126 1.675344 4.969379 +H 2.248496 2.292119 5.670892 +H 8.508264 1.653337 7.464411 +H 8.066015 2.034597 6.067646 +H 5.197835 2.915542 11.821572 +H 6.630900 3.329981 12.079371 +H 10.788986 6.436672 2.127933 +H 11.657923 5.463602 1.359832 +H 3.544476 1.634958 10.977765 +H 4.755770 1.455054 10.087655 +H 4.465371 3.375459 5.665294 +H 5.682663 4.264430 5.524498 +H 6.174815 11.778676 1.582954 +H 5.713640 12.089924 0.174999 +H 3.476076 1.498708 2.028983 +H 2.730229 2.134295 3.182949 +H 7.119624 5.936450 7.474030 +H 8.536492 5.799405 6.958665 +H 5.909499 5.717477 11.667621 +H 6.125402 6.196758 13.087330 +H 11.203499 12.513536 11.804844 +H 10.260930 12.300153 12.970145 +H 9.985036 3.927685 2.878172 +H 8.545584 3.468329 2.972331 +H 1.399882 6.620092 7.093246 +H 0.963561 6.112523 5.735345 +H 8.067363 3.674002 9.979955 +H 8.000737 2.375959 10.756190 +H 11.821629 10.402510 12.020482 +H 12.206854 8.983242 12.379892 +H 3.461473 7.606485 7.889688 +H 3.844478 6.304711 8.560946 +H 3.179884 7.585614 11.148494 +H 4.401957 7.652030 12.039573 +H 11.573777 5.053211 7.169515 +H 10.342076 4.186083 7.320831 +H 10.065640 8.919194 1.760981 +H 9.629585 8.322499 0.439729 +H 1.396302 6.546079 9.625630 +H 1.405516 6.479759 11.138049 +H 4.024008 1.232518 5.405828 +H 4.736858 0.579881 6.571077 +H 9.452293 12.313381 8.732772 +H 8.976559 11.502788 7.545965 +H 1.834701 10.012311 0.153462 +H 3.295197 9.836403 -0.204175 +H 7.056724 11.401702 4.095264 +H 6.499038 10.020287 3.825865 +H 1.365541 0.487338 11.013887 +H 2.501591 -0.428131 11.417871 +H 8.644279 1.812362 1.005409 +H 8.142674 0.388030 1.112955 +H 1.272659 8.365063 8.191888 +H 2.142485 8.877768 7.063867 +H 8.961493 7.826192 9.265523 +H 9.227102 8.487654 10.601118 +H 10.150144 7.758934 6.392768 +H 10.596082 9.187988 6.167290 +H 3.463106 4.096188 2.129414 +H 3.919461 4.539801 0.755791 +H 7.418998 9.394959 12.028876 +H 7.430413 7.883095 12.106546 +H 7.972905 10.220334 5.841196 +H 7.675111 9.631498 7.203725 +H 5.332446 6.381336 6.419473 +H 5.000025 6.434186 4.943466 +H 11.575078 2.271167 4.412540 +H 11.219802 0.847030 4.783357 +H 8.865342 9.721516 3.843998 +H 10.000732 10.719285 3.758898 +H 3.186196 10.476397 5.265333 +H 4.407331 11.335128 5.013723 +H 0.558187 7.255936 3.859331 +H 0.341672 5.789383 3.552346 +H 7.459933 6.526049 3.229193 +H 6.696228 5.483739 2.440372 +H 3.864872 6.313007 2.849385 +H 2.876419 6.621201 3.953862 +H 5.631529 8.079145 8.753997 +H 7.003296 7.568245 8.367822 +H 9.615413 0.527902 3.031755 +H 8.962985 0.109366 4.332162 +H 3.825854 11.139182 1.474087 +H 4.063988 11.063232 2.967211 +H 5.784391 7.914558 2.708486 +H 4.780461 8.655167 3.566110 +H 10.880659 5.444664 5.046607 +H 9.593331 4.687991 4.797350 +H 11.562317 8.960134 3.376765 +H 11.926084 8.816948 4.839320 +H 2.856874 11.297981 7.433660 +H 1.492332 11.195517 6.786033 +H 7.145820 0.090200 9.749009 +H 7.227275 0.077690 11.260665 +H 4.662021 9.538430 10.798155 +H 5.994537 9.833472 10.142985 +H 10.544299 6.595857 10.301445 +H 11.281750 5.653082 9.374494 +H 12.103020 8.841164 10.006916 +H 11.491592 8.576221 8.647557 +""" +cell.basis = 'gth-tzv2p' +cell.ke_cutoff = 200 # kinetic energy cutoff in a.u. +cell.max_memory = 8000 # in MB +cell.precision = 1e-6 # integral precision +cell.pseudo = 'gth-pade' +cell.verbose = 4 +cell.use_loose_rcut = True # integral screening based on shell radii +cell.use_particle_mesh_ewald = True # use particle mesh ewald for nuclear repulsion +cell.build() +#cell = pbc.tools.super_cell(cell, [1,2,2]) #build super cell by replicating unit cell + +mf=pbcdft.RKS(cell) +#mf.xc = "LDA, VWN" +mf.xc = "PBE,PBE" +mf.init_guess = 'atom' # atom guess is fast +mf.with_df = multigrid.MultiGridFFTDF2(cell) +mf.with_df.ngrids = 4 # number of sets of grid points +mf.kernel() + +# Nuclear Gradients +from pyscf.pbc.grad import rks as rks_grad +grad = rks_grad.Gradients(mf) +g = grad.kernel() diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index 4e06980ffb..28d8fd444d 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -61,6 +61,7 @@ NUC_MOD_OF = 2 PTR_ZETA = 3 PTR_FRAC_CHARGE = 4 +PTR_RADIUS = 5 ATM_SLOTS = 6 ATOM_OF = 0 ANG_OF = 1 @@ -2412,6 +2413,15 @@ def ms(self, x): else: self.spin = int(round(2*x, 4)) + @property + def enuc(self): + '''nuclear repulsion energy''' + if self._enuc is None: + self._enuc = self.energy_nuc() + return self._enuc + @enuc.setter + def enuc(self, enuc): + self._enuc = enuc copy = copy @@ -2576,6 +2586,9 @@ def build(self, dump_input=True, parse_arg=ARGPARSE, # number of electrons are consistent. self.nelec + # reset nuclear energy + self.enuc = None + if not self.magmom: self.magmom = [0,] * self.natm elif len(self.magmom) != self.natm: @@ -2784,7 +2797,7 @@ def dump_input(self): if self.verbose >= logger.INFO: self.stdout.write('\n') - logger.info(self, 'nuclear repulsion = %.15g', self.energy_nuc()) + logger.info(self, 'nuclear repulsion = %.15g', self.enuc) if self.symmetry: if self.topgroup == self.groupname: logger.info(self, 'point group symmetry = %s', self.topgroup) @@ -3050,6 +3063,9 @@ def set_geom_(self, atoms_or_coords, unit=None, symmetry=None, mol.symmetry = symmetry mol.build(False, False) + # reset nuclear energy + mol.enuc = None + if mol.verbose >= logger.INFO: logger.info(mol, 'New geometry') for ia, atom in enumerate(mol._atom): @@ -3542,7 +3558,9 @@ def intor_by_shell(self, intor, shells, comp=None, grids=None): eval_ao = eval_gto = eval_gto - energy_nuc = get_enuc = energy_nuc + energy_nuc = energy_nuc + def get_enuc(self): + return self.enuc def get_ao_indices(self, bas_list, ao_loc=None): ''' diff --git a/pyscf/gto/moleintor.py b/pyscf/gto/moleintor.py index e3d661f1e0..4c6a4ce8cf 100644 --- a/pyscf/gto/moleintor.py +++ b/pyscf/gto/moleintor.py @@ -429,6 +429,7 @@ def _get_intor_and_comp(intor_name, comp=None): 'int2c2e_ip1ip2' : (9, 9), 'int2c2e_ipip1' : (9, 9), 'int3c1e' : (1, 1), + 'int3c1e_ip1' : (3, 3), 'int3c1e_p2' : (1, 1), 'int3c1e_iprinv' : (3, 3), 'int2c2e' : (1, 1), diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt index 1dc076da21..4b7236535e 100644 --- a/pyscf/lib/CMakeLists.txt +++ b/pyscf/lib/CMakeLists.txt @@ -136,6 +136,9 @@ else () set(CMAKE_INSTALL_RPATH "\$ORIGIN:\$ORIGIN/deps/lib:\$ORIGIN/deps/lib64") endif () +option(ENABLE_FFTW "Using fftw3" OFF) +option(BUILD_FFTW "Building fftw3" OFF) + add_subdirectory(np_helper) add_subdirectory(gto) add_subdirectory(vhf) @@ -198,6 +201,12 @@ option(ENABLE_XCFUN "Using xcfun for XC functional library" ON) option(BUILD_LIBXC "Download and build libxc library" ON) option(BUILD_XCFUN "Download and build xcfun library" ON) +option(ENABLE_LIBXSMM "Using libxsmm" OFF) +option(BUILD_LIBXSMM "Building libxsmm" OFF) +if(APPLE) + set(ENABLE_LIBXSMM OFF) +endif() + if(NOT DISABLE_DFT) add_subdirectory(dft) @@ -237,8 +246,39 @@ if(ENABLE_XCFUN AND BUILD_XCFUN) add_dependencies(xcfun_itrf libxcfun) add_dependencies(dft libxcfun) endif() # ENABLE_XCFUN + +if(ENABLE_LIBXSMM AND BUILD_LIBXSMM) + if(NOT EXISTS "${PROJECT_SOURCE_DIR}/deps/include/libxsmm.h") + ExternalProject_Add(libxsmm + GIT_REPOSITORY https://github.com/hfp/libxsmm.git + GIT_TAG 1.17 + PREFIX ${PROJECT_BINARY_DIR}/deps + INSTALL_DIR ${PROJECT_SOURCE_DIR}/deps + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE True + BUILD_COMMAND make -j4 PREFIX= CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER} STATIC=0 MALLOC=0 INTRINSICS=2 install + INSTALL_COMMAND "" + ) + add_dependencies(dft libxsmm) + endif() +endif() endif() # DISABLE_DFT +if(ENABLE_FFTW AND BUILD_FFTW) +# if(NOT EXISTS "${PROJECT_SOURCE_DIR}/deps/include/fftw3.h") + ExternalProject_Add(libfftw3 + URL https://www.fftw.org/fftw-3.3.10.tar.gz + PREFIX ${PROJECT_BINARY_DIR}/deps + INSTALL_DIR ${PROJECT_SOURCE_DIR}/deps + BUILD_IN_SOURCE True + CONFIGURE_COMMAND ./configure --enable-static=no --enable-shared=yes --enable-threads CXX=${CMAKE_CXX_COMPILER} CC=${CMAKE_C_COMPILER} prefix= + BUILD_COMMAND make -j4 install + ) + add_dependencies(fft libfftw3) + add_dependencies(pbc libfftw3) +# endif() +endif() + if(EXISTS "${PROJECT_SOURCE_DIR}/cmake.user.inc") include("${PROJECT_SOURCE_DIR}/cmake.user.inc") endif() diff --git a/pyscf/lib/dft/CMakeLists.txt b/pyscf/lib/dft/CMakeLists.txt index 6b01b7eca0..c7263183c8 100644 --- a/pyscf/lib/dft/CMakeLists.txt +++ b/pyscf/lib/dft/CMakeLists.txt @@ -15,14 +15,19 @@ add_library(dft SHARED CxLebedevGrid.c grid_basis.c nr_numint.c r_numint.c numint_uniform_grid.c xc_deriv.c nr_numint_sparse.c - ) -add_dependencies(dft cgto cvhf np_helper) + multigrid.c grid_common.c grid_collocate.c grid_integrate.c utils.c +) +add_dependencies(dft cgto cvhf np_helper pbc) set_target_properties(dft PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}) -target_link_libraries(dft cvhf cgto cint np_helper ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) - +if(ENABLE_LIBXSMM) + add_definitions(-DHAVE_LIBXSMM) + target_link_libraries(dft cvhf cgto cint np_helper pbc xsmm ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) +else() + target_link_libraries(dft cvhf cgto cint np_helper pbc ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) +endif() if(ENABLE_LIBXC) add_library(xc_itrf SHARED libxc_itrf.c) @@ -37,4 +42,3 @@ set_target_properties(xcfun_itrf PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}) target_link_libraries(xcfun_itrf xcfun ${OPENMP_C_PROPERTIES}) endif() - diff --git a/pyscf/lib/dft/grid_collocate.c b/pyscf/lib/dft/grid_collocate.c new file mode 100644 index 0000000000..33842191d3 --- /dev/null +++ b/pyscf/lib/dft/grid_collocate.c @@ -0,0 +1,655 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include "config.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "dft/multigrid.h" +#include "dft/grid_common.h" +#include "dft/utils.h" + +#define MAX_THREADS 256 +#define PTR_RADIUS 5 + +static void transform_dm(double* dm_cart, double* dm, + double* ish_contr_coeff, double* jsh_contr_coeff, + int* ish_ao_loc, int* jsh_ao_loc, + int* ish_bas, int* jsh_bas, int ish, int jsh, + int ish0, int jsh0, int naoj, double* cache) +{ + int i0 = ish_ao_loc[ish] - ish_ao_loc[ish0]; + int i1 = ish_ao_loc[ish+1] - ish_ao_loc[ish0]; + int j0 = jsh_ao_loc[jsh] - jsh_ao_loc[jsh0]; + int j1 = jsh_ao_loc[jsh+1] - jsh_ao_loc[jsh0]; + + int nrow = i1 - i0; + int ncol = j1 - j0; + double* pdm = dm + ((size_t)naoj) * i0 + j0; + + int l_i = ish_bas[ANG_OF+ish*BAS_SLOTS]; + int ncart_i = _LEN_CART[l_i]; + int nprim_i = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + int nao_i = nprim_i*ncart_i; + int l_j = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + int ncart_j = _LEN_CART[l_j]; + int nprim_j = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + int nao_j = nprim_j*ncart_j; + + const char TRANS_T = 'T'; + const char TRANS_N = 'N'; + const double D1 = 1; + const double D0 = 0; + //einsum("pi,ij,qj->pq", coeff_i, dm, coeff_j) + dgemm_wrapper(TRANS_T, TRANS_N, nao_j, nrow, ncol, + D1, jsh_contr_coeff, ncol, pdm, naoj, D0, cache, nao_j); + dgemm_wrapper(TRANS_N, TRANS_N, nao_j, nao_i, nrow, + D1, cache, nao_j, ish_contr_coeff, nrow, D0, dm_cart, nao_j); +} + + +static void add_rho_submesh(double* rho, double* pqr, + int* mesh_lb, int* mesh_ub, int* submesh_lb, + const int* mesh, const int* submesh) +{ + const int x0 = mesh_lb[0]; + const int y0 = mesh_lb[1]; + const int z0 = mesh_lb[2]; + + const int nx = mesh_ub[0] - x0; + const int ny = mesh_ub[1] - y0; + const int nz = mesh_ub[2] - z0; + + const int x0_sub = submesh_lb[0]; + const int y0_sub = submesh_lb[1]; + const int z0_sub = submesh_lb[2]; + + const size_t mesh_yz = ((size_t) mesh[1]) * mesh[2]; + const size_t submesh_yz = ((size_t) submesh[1]) * submesh[2]; + + int ix, iy, iz; + for (ix = 0; ix < nx; ix++) { + double* __restrict ptr_rho = rho + (ix + x0) * mesh_yz + y0 * mesh[2] + z0; + double* __restrict ptr_pqr = pqr + (ix + x0_sub) * submesh_yz + y0_sub * submesh[2] + z0_sub; + for (iy = 0; iy < ny; iy++) { + #pragma omp simd + for (iz = 0; iz < nz; iz++) { + ptr_rho[iz] += ptr_pqr[iz]; + } + ptr_rho += mesh[2]; + ptr_pqr += submesh[2]; + } + } +} + + +static void _orth_rho(double *rho, double *dm_xyz, + double fac, int topl, + int *mesh, int *grid_slice, + double *xs_exp, double *ys_exp, double *zs_exp, + double *cache) +{ + const int l1 = topl + 1; + const int l1l1 = l1 * l1; + const int nx0 = grid_slice[0]; + const int nx1 = grid_slice[1]; + const int ny0 = grid_slice[2]; + const int ny1 = grid_slice[3]; + const int nz0 = grid_slice[4]; + const int nz1 = grid_slice[5]; + const int ngridx = nx1 - nx0; + const int ngridy = ny1 - ny0; + const int ngridz = nz1 - nz0; + if (ngridx == 0 || ngridy == 0 || ngridz == 0) { + return; + } + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D0 = 0; + const double D1 = 1; + const int xcols = ngridy * ngridz; + double *xyr = cache; + double *xqr = xyr + l1l1 * ngridz; + double *pqr = xqr + l1 * xcols; + int ix, iy, iz, l; + + dgemm_wrapper(TRANS_N, TRANS_N, ngridz, l1l1, l1, + fac, zs_exp, ngridz, dm_xyz, l1, + D0, xyr, ngridz); + for (l = 0; l <= topl; l++) { + dgemm_wrapper(TRANS_N, TRANS_T, ngridz, ngridy, l1, + D1, xyr+l*l1*ngridz, ngridz, ys_exp, ngridy, + D0, xqr+l*xcols, ngridz); + } + dgemm_wrapper(TRANS_N, TRANS_T, xcols, ngridx, l1, + D1, xqr, xcols, xs_exp, ngridx, + D0, pqr, xcols); + + const int submesh[3] = {ngridx, ngridy, ngridz}; + int lb[3], ub[3]; + for (ix = 0; ix < ngridx;) { + lb[0] = modulo(ix + nx0, mesh[0]); + ub[0] = get_upper_bound(lb[0], mesh[0], ix, ngridx); + for (iy = 0; iy < ngridy;) { + lb[1] = modulo(iy + ny0, mesh[1]); + ub[1] = get_upper_bound(lb[1], mesh[1], iy, ngridy); + for (iz = 0; iz < ngridz;) { + lb[2] = modulo(iz + nz0, mesh[2]); + ub[2] = get_upper_bound(lb[2], mesh[2], iz, ngridz); + int lb_sub[3] = {ix, iy, iz}; + add_rho_submesh(rho, pqr, lb, ub, lb_sub, mesh, submesh); + iz += ub[2] - lb[2]; + } + iy += ub[1] - lb[1]; + } + ix += ub[0] - lb[0]; + } +} + + +void make_rho_lda_orth(double *rho, double *dm, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int topl = li + lj; + int l1 = topl + 1; + int l1l1l1 = l1 * l1 * l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + + if (data_size == 0) { + return; + } + cache += data_size; + + double *dm_xyz = cache; + cache += l1l1l1; + memset(dm_xyz, 0, l1l1l1*sizeof(double)); + + _dm_to_dm_xyz(dm_xyz, dm, li, lj, ri, rj, cache); + + _orth_rho(rho, dm_xyz, fac, topl, mesh, grid_slice, + xs_exp, ys_exp, zs_exp, cache); +} + + +static void _apply_rho(void (*eval_rho)(), double *rho, double *dm, + PGFPair* pgfpair, int comp, int dimension, + double* dh, double *a, double *b, int *mesh, + double* ish_gto_norm, double* jsh_gto_norm, + int *ish_atm, int *ish_bas, double *ish_env, + int *jsh_atm, int *jsh_bas, double *jsh_env, + double* Ls, double *cache) +{ + int ish = pgfpair->ish; + int jsh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + int iL = pgfpair->iL; + double cutoff = pgfpair->radius; + + double *ri = ish_env + ish_atm[PTR_COORD+ish_bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + double *rj = jsh_env + jsh_atm[PTR_COORD+jsh_bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + double *rL = Ls + iL*3; + double rjL[3]; + rjL[0] = rj[0] + rL[0]; + rjL[1] = rj[1] + rL[1]; + rjL[2] = rj[2] + rL[2]; + + const int li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + const int lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + double ai = ish_env[ish_bas[PTR_EXP+ish*BAS_SLOTS]+ipgf]; + double aj = jsh_env[jsh_bas[PTR_EXP+jsh*BAS_SLOTS]+jpgf]; + double ci = ish_gto_norm[ipgf]; + double cj = jsh_gto_norm[jpgf]; + double aij = ai + aj; + double rrij = CINTsquare_dist(ri, rjL); + double eij = (ai * aj / aij) * rrij; + if (eij > EIJCUTOFF) { + return; + } + double fac = exp(-eij) * ci * cj * CINTcommon_fac_sp(li) * CINTcommon_fac_sp(lj); + if (fac < ish_env[PTR_EXPDROP] && fac < jsh_env[PTR_EXPDROP]) { + return; + } + + (*eval_rho)(rho, dm, comp, li, lj, ai, aj, ri, rjL, + fac, cutoff, dimension, dh, a, b, mesh, cache); +} + + +static size_t _rho_cache_size(int l, int nprim, int nctr, int* mesh, double radius, double* dh) +{ + size_t size = 0; + size_t mesh_size = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + size_t nmx = get_max_num_grid_orth(dh, radius); + int l1 = 2 * l + 1; + int l1l1 = l1 * l1; + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + size += (nprim * _LEN_CART[l]) * (nprim * _LEN_CART[l]); // dm_cart + size += _LEN_CART[l]*_LEN_CART[l]; // dm_pgf + size += nctr * _LEN_CART[l] * nprim * _LEN_CART[l]; // transform_dm + size += l1 * (mesh[0] + mesh[1] + mesh[2]); // xs_exp, ys_exp, zs_exp + size += l1l1 * l1; // dm_xyz + size += 3 * (_LEN_CART[l] + l1); // _dm_to_dm_xyz + + size_t size_orth_components = l1 * nmx + nmx; // orth_components + size_t size_orth_rho = 0; // _orth_rho + if (nmx < max_mesh) { + size_orth_rho = l1l1*nmx + l1*nmx*nmx + nmx*nmx*nmx; + } else { + size_orth_rho = l1l1*mesh[2] + l1*mesh[1]*mesh[2] + mesh_size; + } + size += MAX(size_orth_rho, size_orth_components); + size += 1000000; + //printf("Memory allocated per thread for make_rho: %ld MB.\n", (size+mesh_size)*sizeof(double) / 1000000); + return size; +} + + +static size_t _rho_core_cache_size(int* mesh, double radius, double* dh) +{ + size_t size = 0; + size_t mesh_size = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + size_t nmx = get_max_num_grid_orth(dh, radius); + int l = 0; + int l1 = 1; + int l1l1 = l1 * l1; + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + size += l1 * (mesh[0] + mesh[1] + mesh[2]); + size += l1l1 * l1; + size += 3 * (_LEN_CART[l] + l1); + + size_t size_orth_components = l1 * nmx + nmx; + size_t size_orth_rho = 0; + if (nmx < max_mesh) { + size_orth_rho = l1l1*nmx + l1*nmx*nmx + nmx*nmx*nmx; + } else { + size_orth_rho = l1l1*mesh[2] + l1*mesh[1]*mesh[2] + mesh_size; + } + size += MAX(size_orth_rho, size_orth_components); + //size += 1000000; + return size; +} + + +void grid_collocate_drv(void (*eval_rho)(), RS_Grid** rs_rho, double* dm, TaskList** task_list, + int comp, int hermi, int *shls_slice, int* ish_ao_loc, int* jsh_ao_loc, + int dimension, double* Ls, double* a, double* b, + int* ish_atm, int* ish_bas, double* ish_env, + int* jsh_atm, int* jsh_bas, double* jsh_env, int cart) +{ + TaskList* tl = *task_list; + GridLevel_Info* gridlevel_info = tl->gridlevel_info; + int nlevels = gridlevel_info->nlevels; + + assert (comp == (*rs_rho)->comp); + + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + //const int nijsh = nish * njsh; + //const int naoi = ish_ao_loc[ish1] - ish_ao_loc[ish0]; + const int naoj = jsh_ao_loc[jsh1] - jsh_ao_loc[jsh0]; + + double **gto_norm_i = (double**) malloc(sizeof(double*) * nish); + double **cart2sph_coeff_i = (double**) malloc(sizeof(double*) * nish); + get_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1, ish_bas, ish_env, cart); + double **gto_norm_j = gto_norm_i; + double **cart2sph_coeff_j = cart2sph_coeff_i; + if (hermi != 1) { + gto_norm_j = (double**) malloc(sizeof(double*) * njsh); + cart2sph_coeff_j = (double**) malloc(sizeof(double*) * njsh); + get_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1, jsh_bas, jsh_env, cart); + } + + int ish_lmax = get_lmax(ish0, ish1, ish_bas); + int jsh_lmax = ish_lmax; + if (hermi != 1) { + jsh_lmax = get_lmax(jsh0, jsh1, jsh_bas); + } + + int ish_nprim_max = get_nprim_max(ish0, ish1, ish_bas); + int jsh_nprim_max = ish_nprim_max; + if (hermi != 1) { + jsh_nprim_max = get_nprim_max(jsh0, jsh1, jsh_bas); + } + + int ish_nctr_max = get_nctr_max(ish0, ish1, ish_bas); + int jsh_nctr_max = ish_nctr_max; + if (hermi != 1) { + jsh_nctr_max = get_nctr_max(jsh0, jsh1, jsh_bas); + } + + int ilevel; + int *mesh; + double max_radius; + double *rho, *rhobufs[MAX_THREADS]; + Task* task; + size_t ntasks; + PGFPair** pgfpairs; + for (ilevel = 0; ilevel < nlevels; ilevel++) { + task = (tl->tasks)[ilevel]; + ntasks = task->ntasks; + if (ntasks <= 0) { + continue; + } + pgfpairs = task->pgfpairs; + max_radius = task->radius; + + rho = (*rs_rho)->data[ilevel]; + mesh = gridlevel_info->mesh + ilevel*3; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + int *task_loc; + int nblock = get_task_loc(&task_loc, pgfpairs, ntasks, ish0, ish1, jsh0, jsh1, hermi); + + size_t cache_size = _rho_cache_size(MAX(ish_lmax,jsh_lmax), + MAX(ish_nprim_max, jsh_nprim_max), + MAX(ish_nctr_max, jsh_nctr_max), mesh, max_radius, dh); + size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + +#pragma omp parallel +{ + PGFPair *pgfpair = NULL; + int iblock, itask, ish, jsh; + double *ptr_gto_norm_i, *ptr_gto_norm_j; + double *cache0 = malloc(sizeof(double) * cache_size); + double *dm_cart = cache0; + double *dm_pgf = cache0 + ish_nprim_max*_LEN_CART[ish_lmax]*jsh_nprim_max*_LEN_CART[jsh_lmax]; + double *cache = dm_pgf + _LEN_CART[ish_lmax]*_LEN_CART[jsh_lmax]; + + int thread_id = omp_get_thread_num(); + double *rho_priv; + if (thread_id == 0) { + rho_priv = rho; + } else { + rho_priv = calloc(comp*ngrids, sizeof(double)); + } + rhobufs[thread_id] = rho_priv; + + #pragma omp for schedule(dynamic) + for (iblock = 0; iblock < nblock; iblock+=2) { + itask = task_loc[iblock]; + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + ptr_gto_norm_i = gto_norm_i[ish]; + ptr_gto_norm_j = gto_norm_j[jsh]; + transform_dm(dm_cart, dm, cart2sph_coeff_i[ish], + cart2sph_coeff_j[jsh], ish_ao_loc, jsh_ao_loc, + ish_bas, jsh_bas, ish, jsh, ish0, jsh0, naoj, cache); + for (; itask < task_loc[iblock+1]; itask++) { + pgfpair = pgfpairs[itask]; + get_dm_pgfpair(dm_pgf, dm_cart, pgfpair, ish_bas, jsh_bas, hermi); + _apply_rho(eval_rho, rho_priv, dm_pgf, pgfpair, comp, dimension, dh, a, b, mesh, + ptr_gto_norm_i, ptr_gto_norm_j, ish_atm, ish_bas, ish_env, + jsh_atm, jsh_bas, jsh_env, Ls, cache); + } + } + + free(cache0); + NPomp_dsum_reduce_inplace(rhobufs, comp*ngrids); + if (thread_id != 0) { + free(rho_priv); + } +} + if (task_loc) { + free(task_loc); + } + } // loop ilevel + + del_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1); + if (hermi != 1) { + del_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1); + } +} + + +void build_core_density(void (*eval_rho)(), double* rho, + int* atm, int* bas, int nbas, double* env, + int* mesh, int dimension, double* a, double* b, double max_radius) +{ + size_t ngrids; + ngrids = ((size_t) mesh[0]) * mesh[1] * mesh[2]; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + double *rhobufs[MAX_THREADS]; + size_t cache_size = _rho_core_cache_size(mesh, max_radius, dh); + +#pragma omp parallel +{ + int ia, ib; + double alpha, coeff, charge, rad, fac; + double dm[] = {1.0}; + double *r0; + double *cache = (double*) malloc(sizeof(double) * cache_size); + + int thread_id = omp_get_thread_num(); + double *rho_priv; + if (thread_id == 0) { + rho_priv = rho; + } else { + rho_priv = calloc(ngrids, sizeof(double)); + } + rhobufs[thread_id] = rho_priv; + + #pragma omp for schedule(static) + for (ib = 0; ib < nbas; ib++) { + ia = bas[ib*BAS_SLOTS+ATOM_OF]; + alpha = env[bas[ib*BAS_SLOTS+PTR_EXP]]; + coeff = env[bas[ib*BAS_SLOTS+PTR_COEFF]]; + charge = (double)atm[ia*ATM_SLOTS+CHARGE_OF]; + r0 = env + atm[ia*ATM_SLOTS+PTR_COORD]; + fac = -charge * coeff; + rad = env[atm[ia*ATM_SLOTS+PTR_RADIUS]]; + eval_rho(rho_priv, dm, 1, 0, 0, alpha, 0., r0, r0, + fac, rad, dimension, dh, a, b, mesh, cache); + } + free(cache); + + NPomp_dsum_reduce_inplace(rhobufs, ngrids); + if (thread_id != 0) { + free(rho_priv); + } +} +} + + + + +static void make_pgfparis_orth( + PGFPair* pgfpair, int comp, int dimension, + double* dh, double *a, double *b, int *mesh, + double* ish_gto_norm, double* jsh_gto_norm, + int *ish_atm, int *ish_bas, double *ish_env, + int *jsh_atm, int *jsh_bas, double *jsh_env, + double* Ls, double *cache) +{ + int ish = pgfpair->ish; + int jsh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + int iL = pgfpair->iL; + double cutoff = pgfpair->radius; + + double *ri = ish_env + ish_atm[PTR_COORD+ish_bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + double *rj = jsh_env + jsh_atm[PTR_COORD+jsh_bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + double *rL = Ls + iL*3; + double rjL[3]; + rjL[0] = rj[0] + rL[0]; + rjL[1] = rj[1] + rL[1]; + rjL[2] = rj[2] + rL[2]; + + const int li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + const int lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + double ai = ish_env[ish_bas[PTR_EXP+ish*BAS_SLOTS]+ipgf]; + double aj = jsh_env[jsh_bas[PTR_EXP+jsh*BAS_SLOTS]+jpgf]; + double ci = ish_gto_norm[ipgf]; + double cj = jsh_gto_norm[jpgf]; + double aij = ai + aj; + double rrij = CINTsquare_dist(ri, rjL); + double eij = (ai * aj / aij) * rrij; + if (eij > EIJCUTOFF) { + return; + } + double fac = exp(-eij) * ci * cj * CINTcommon_fac_sp(li) * CINTcommon_fac_sp(lj); + if (fac < ish_env[PTR_EXPDROP] && fac < jsh_env[PTR_EXPDROP]) { + return; + } + + int topl = li + lj; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); +} + + +void eval_pgfpairs(TaskList** task_list, + int comp, int hermi, int *shls_slice, int* ish_ao_loc, int* jsh_ao_loc, + int dimension, double* Ls, double* a, double* b, + int* ish_atm, int* ish_bas, double* ish_env, + int* jsh_atm, int* jsh_bas, double* jsh_env, int cart) +{ + TaskList* tl = *task_list; + GridLevel_Info* gridlevel_info = tl->gridlevel_info; + int nlevels = gridlevel_info->nlevels; + + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + //const int nijsh = nish * njsh; + //const int naoi = ish_ao_loc[ish1] - ish_ao_loc[ish0]; + //const int naoj = jsh_ao_loc[jsh1] - jsh_ao_loc[jsh0]; + + double **gto_norm_i = (double**) malloc(sizeof(double*) * nish); + double **cart2sph_coeff_i = (double**) malloc(sizeof(double*) * nish); + get_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1, ish_bas, ish_env, cart); + double **gto_norm_j = gto_norm_i; + double **cart2sph_coeff_j = cart2sph_coeff_i; + if (hermi != 1) { + gto_norm_j = (double**) malloc(sizeof(double*) * njsh); + cart2sph_coeff_j = (double**) malloc(sizeof(double*) * njsh); + get_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1, jsh_bas, jsh_env, cart); + } + + int ish_lmax = get_lmax(ish0, ish1, ish_bas); + int jsh_lmax = ish_lmax; + if (hermi != 1) { + jsh_lmax = get_lmax(jsh0, jsh1, jsh_bas); + } + + int ish_nprim_max = get_nprim_max(ish0, ish1, ish_bas); + int jsh_nprim_max = ish_nprim_max; + if (hermi != 1) { + jsh_nprim_max = get_nprim_max(jsh0, jsh1, jsh_bas); + } + + int ish_nctr_max = get_nctr_max(ish0, ish1, ish_bas); + int jsh_nctr_max = ish_nctr_max; + if (hermi != 1) { + jsh_nctr_max = get_nctr_max(jsh0, jsh1, jsh_bas); + } + + int ilevel; + int *mesh; + double max_radius; + Task* task; + size_t ntasks; + PGFPair** pgfpairs; + for (ilevel = 0; ilevel < nlevels; ilevel++) { + task = (tl->tasks)[ilevel]; + ntasks = task->ntasks; + if (ntasks <= 0) { + continue; + } + pgfpairs = task->pgfpairs; + max_radius = task->radius; + + mesh = gridlevel_info->mesh + ilevel*3; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + int *task_loc; + int nblock = get_task_loc(&task_loc, pgfpairs, ntasks, ish0, ish1, jsh0, jsh1, hermi); + + size_t cache_size = _rho_cache_size(MAX(ish_lmax,jsh_lmax), + MAX(ish_nprim_max, jsh_nprim_max), + MAX(ish_nctr_max, jsh_nctr_max), mesh, max_radius, dh); + //size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + +#pragma omp parallel +{ + PGFPair *pgfpair = NULL; + int iblock, itask, ish, jsh; + double *ptr_gto_norm_i, *ptr_gto_norm_j; + double *cache = malloc(sizeof(double) * cache_size); + + #pragma omp for schedule(dynamic) + for (iblock = 0; iblock < nblock; iblock+=2) { + itask = task_loc[iblock]; + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + ptr_gto_norm_i = gto_norm_i[ish]; + ptr_gto_norm_j = gto_norm_j[jsh]; + for (; itask < task_loc[iblock+1]; itask++) { + pgfpair = pgfpairs[itask]; + make_pgfparis_orth(pgfpair, comp, dimension, dh, a, b, mesh, + ptr_gto_norm_i, ptr_gto_norm_j, ish_atm, ish_bas, ish_env, + jsh_atm, jsh_bas, jsh_env, Ls, cache); + } + } + + free(cache); +} + if (task_loc) { + free(task_loc); + } + } // loop ilevel + + del_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1); + if (hermi != 1) { + del_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1); + } +} diff --git a/pyscf/lib/dft/grid_common.c b/pyscf/lib/dft/grid_common.c new file mode 100644 index 0000000000..f7e198ab17 --- /dev/null +++ b/pyscf/lib/dft/grid_common.c @@ -0,0 +1,660 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "dft/multigrid.h" +#include "dft/grid_common.h" + +#define EXPMIN -700 + + +int get_lmax(int ish0, int ish1, int* bas) +{ + int lmax = 0; + int ish; + for (ish = ish0; ish < ish1; ish++) { + lmax = MAX(lmax, bas[ANG_OF+ish*BAS_SLOTS]); + } + return lmax; +} + + +int get_nprim_max(int ish0, int ish1, int* bas) +{ + int nprim_max = 1; + int ish; + for (ish = ish0; ish < ish1; ish++) { + nprim_max = MAX(nprim_max, bas[NPRIM_OF+ish*BAS_SLOTS]); + } + return nprim_max; +} + + +int get_nctr_max(int ish0, int ish1, int* bas) +{ + int nctr_max = 1; + int ish; + for (ish = ish0; ish < ish1; ish++) { + nctr_max = MAX(nctr_max, bas[NCTR_OF+ish*BAS_SLOTS]); + } + return nctr_max; +} + + +void get_cart2sph_coeff(double** contr_coeff, double** gto_norm, + int ish0, int ish1, int* bas, double* env, int cart) +{ + int l; + int lmax = get_lmax(ish0, ish1, bas); + int nprim, ncart, nsph, nctr; + int ptr_exp, ptr_coeff; + int ish, ipgf, ic, i, j; + + double **c2s = (double**) malloc(sizeof(double*) * (lmax+1)); + for (l = 0; l <= lmax; l++) { + ncart = _LEN_CART[l]; + if (l <= 1 || cart == 1) { + c2s[l] = (double*) calloc(ncart*ncart, sizeof(double)); + for (i = 0; i < ncart; i++) { + c2s[l][i*ncart + i] = 1; + } + } + else { + nsph = 2*l + 1; + c2s[l] = (double*) calloc(nsph*ncart, sizeof(double)); + double* gcart = (double*) calloc(ncart*ncart, sizeof(double)); + for (i = 0; i < ncart; i++) { + gcart[i*ncart + i] = 1; + } + CINTc2s_ket_sph(c2s[l], ncart, gcart, l); + free(gcart); + } + } + +#pragma omp parallel private (ish, ipgf, ic, i, j, l,\ + ncart, nsph, nprim, nctr,\ + ptr_exp, ptr_coeff) +{ + #pragma omp for schedule(dynamic) + for (ish = ish0; ish < ish1; ish++) { + l = bas[ANG_OF+ish*BAS_SLOTS]; + ncart = _LEN_CART[l]; + nsph = cart == 1 ? ncart : 2*l+1; + nprim = bas[NPRIM_OF+ish*BAS_SLOTS]; + nctr = bas[NCTR_OF+ish*BAS_SLOTS]; + + ptr_exp = bas[PTR_EXP+ish*BAS_SLOTS]; + gto_norm[ish] = (double*) malloc(sizeof(double) * nprim); + for (ipgf = 0; ipgf < nprim; ipgf++) { + gto_norm[ish][ipgf] = CINTgto_norm(l, env[ptr_exp+ipgf]); + } + + ptr_coeff = bas[PTR_COEFF+ish*BAS_SLOTS]; + double *buf = (double*) calloc(nctr*nprim, sizeof(double)); + for (ipgf = 0; ipgf < nprim; ipgf++) { + double inv_norm = 1./gto_norm[ish][ipgf]; + daxpy_(&nctr, &inv_norm, env+ptr_coeff+ipgf, &nprim, buf+ipgf, &nprim); + } + + contr_coeff[ish] = (double*) malloc(sizeof(double) * nprim*ncart*nctr*nsph); + double* ptr_contr_coeff = contr_coeff[ish]; + for (ipgf = 0; ipgf < nprim; ipgf++) { + for (i = 0; i < ncart; i++) { + for (ic = 0; ic < nctr; ic++) { + for (j = 0; j < nsph; j++) { + *ptr_contr_coeff = buf[ic*nprim+ipgf] * c2s[l][j*ncart+i]; + ptr_contr_coeff += 1; + } + } + } + } + free(buf); + } +} + + for (l = 0; l <= lmax; l++) { + free(c2s[l]); + } + free(c2s); +} + + +void del_cart2sph_coeff(double** contr_coeff, double** gto_norm, int ish0, int ish1) +{ + int ish; + for (ish = ish0; ish < ish1; ish++) { + if (contr_coeff[ish]) { + free(contr_coeff[ish]); + } + if (gto_norm[ish]) { + free(gto_norm[ish]); + } + } + free(contr_coeff); + free(gto_norm); +} + + +int get_max_num_grid_orth(double* dh, double radius) +{ + double dx = MIN(MIN(dh[0], dh[4]), dh[8]); + int ngrid = 2 * (int) ceil(radius / dx) + 1; + return ngrid; +} + + +void get_grid_spacing(double* dh, double* a, int* mesh) +{ + int i, j; + for (i = 0; i < 3; i++) { + for (j = 0; j < 3; j++) { + dh[i*3+j] = a[i*3+j] / mesh[i]; + } + } +} + + +int orth_components(double *xs_exp, int* bounds, double dx, double radius, + double xi, double xj, double ai, double aj, + int nx_per_cell, int topl, double *cache) +{ + double aij = ai + aj; + double xij = (ai * xi + aj * xj) / aij; + int x0_latt = (int) floor((xij - radius) / dx); + int x1_latt = (int) ceil((xij + radius) / dx); + int xij_latt = rint(xij / dx); + xij_latt = MAX(xij_latt, x0_latt); + xij_latt = MIN(xij_latt, x1_latt); + bounds[0] = x0_latt; + bounds[1] = x1_latt; + int ngridx = x1_latt - x0_latt; + + double base_x = dx * xij_latt; + double x0xij = base_x - xij; + double _x0x0 = -aij * x0xij * x0xij; + if (_x0x0 < EXPMIN) { + return 0; + } + + double *gridx = cache; + double *xs_all = xs_exp; + if (ngridx >= nx_per_cell) { + xs_all = gridx + ngridx; + } + + double _dxdx = -aij * dx * dx; + double _x0dx = -2 * aij * x0xij * dx; + double exp_dxdx = exp(_dxdx); + double exp_2dxdx = exp_dxdx * exp_dxdx; + double exp_x0dx = exp(_x0dx + _dxdx); + double exp_x0x0 = exp(_x0x0); + + int i; + int istart = xij_latt - x0_latt; + for (i = istart; i < ngridx; i++) { + xs_all[i] = exp_x0x0; + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + } + + exp_x0dx = exp(_dxdx - _x0dx); + exp_x0x0 = exp(_x0x0); + for (i = istart-1; i >= 0; i--) { + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + xs_all[i] = exp_x0x0; + } + + if (topl > 0) { + double x0xi = x0_latt * dx - xi; + for (i = 0; i < ngridx; i++) { + gridx[i] = x0xi + i * dx; + } + int l; + double *px0; + for (l = 1; l <= topl; l++) { + px0 = xs_all + (l-1) * ngridx; + for (i = 0; i < ngridx; i++) { + px0[ngridx+i] = px0[i] * gridx[i]; + } + } + } + + // add up contributions from all images to the referece image + if (ngridx >= nx_per_cell) { + memset(xs_exp, 0, (topl+1)*nx_per_cell*sizeof(double)); + int ix, l, lb, ub, size_x; + for (ix = 0; ix < ngridx; ix++) { + lb = modulo(ix + x0_latt, nx_per_cell); + ub = get_upper_bound(lb, nx_per_cell, ix, ngridx); + size_x = ub - lb; + double* __restrict ptr_xs_exp = xs_exp + lb; + double* __restrict ptr_xs_all = xs_all + ix; + for (l = 0; l <= topl; l++) { + #pragma omp simd + for (i = 0; i < size_x; i++) { + ptr_xs_exp[i] += ptr_xs_all[i]; + } + ptr_xs_exp += nx_per_cell; + ptr_xs_all += ngridx; + } + ix += size_x - 1; + } + + bounds[0] = 0; + bounds[1] = nx_per_cell; + ngridx = nx_per_cell; + } + return ngridx; +} + + +int _orth_components(double *xs_exp, int *img_slice, int *grid_slice, + double a, double b, double cutoff, + double xi, double xj, double ai, double aj, + int periodic, int nx_per_cell, int topl, double *cache) +{ + double aij = ai + aj; + double xij = (ai * xi + aj * xj) / aij; + double heights_inv = b; + double xij_frac = xij * heights_inv; + double edge0 = xij_frac - cutoff * heights_inv; + double edge1 = xij_frac + cutoff * heights_inv; + + if (edge0 == edge1) { + return 0; + } + + int nimg0 = 0; + int nimg1 = 1; + if (periodic) { + nimg0 = (int) floor(edge0); + nimg1 = (int) ceil(edge1); + } + + int nimg = nimg1 - nimg0; + + int nmx0 = nimg0 * nx_per_cell; + int nmx1 = nimg1 * nx_per_cell; + int nmx = nmx1 - nmx0; + + int nx0 = (int) floor(edge0 * nx_per_cell); + int nx1 = (int) ceil(edge1 * nx_per_cell); + + int nx0_edge = nx0 - nmx0; + int nx1_edge = nx1 - nmx0; + + if (periodic) { + nx0 = nx0_edge % nx_per_cell; + nx1 = nx1_edge % nx_per_cell; + if (nx1 == 0) { + nx1 = nx_per_cell; + } + } + assert(nx0 == nx0_edge); + + img_slice[0] = nimg0; + img_slice[1] = nimg1; + grid_slice[0] = nx0; + grid_slice[1] = nx1; + + int ngridx = _num_grids_on_x(nimg, nx0, nx1, nx_per_cell); + if (ngridx == 0) { + return 0; + } + + int i, m, l; + double *px0; + + double *gridx = cache; + double *xs_all = cache + nmx; + if (nimg == 1) { + xs_all = xs_exp; + } + + int grid_close_to_xij = rint(xij_frac * nx_per_cell) - nmx0; + grid_close_to_xij = MIN(grid_close_to_xij, nx1_edge); + grid_close_to_xij = MAX(grid_close_to_xij, nx0_edge); + + double img0_x = a * nimg0; + double dx = a / nx_per_cell; + double base_x = img0_x + dx * grid_close_to_xij; + double x0xij = base_x - xij; + double _x0x0 = -aij * x0xij * x0xij; + if (_x0x0 < EXPMIN) { + return 0; + } + + double _dxdx = -aij * dx * dx; + double _x0dx = -2 * aij * x0xij * dx; + double exp_dxdx = exp(_dxdx); + double exp_2dxdx = exp_dxdx * exp_dxdx; + double exp_x0dx = exp(_x0dx + _dxdx); + double exp_x0x0 = exp(_x0x0); + + for (i = grid_close_to_xij; i < nx1_edge; i++) { + xs_all[i] = exp_x0x0; + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + } + + exp_x0dx = exp(_dxdx - _x0dx); + exp_x0x0 = exp(_x0x0); + for (i = grid_close_to_xij-1; i >= nx0_edge; i--) { + exp_x0x0 *= exp_x0dx; + exp_x0dx *= exp_2dxdx; + xs_all[i] = exp_x0x0; + } + + if (topl > 0) { + double x0xi = img0_x - xi; + for (i = nx0_edge; i < nx1_edge; i++) { + gridx[i] = x0xi + i * dx; + } + for (l = 1; l <= topl; l++) { + px0 = xs_all + (l-1) * nmx; + for (i = nx0_edge; i < nx1_edge; i++) { + px0[nmx+i] = px0[i] * gridx[i]; + } + } + } + + int idx1; + if (nimg > 1) { + for (l = 0; l <= topl; l++) { + px0 = xs_all + l * nmx; + for (i = nx0; i < nx_per_cell; i++) { + xs_exp[l*nx_per_cell+i] = px0[i]; + } + memset(xs_exp+l*nx_per_cell, 0, nx0*sizeof(double)); + for (m = 1; m < nimg; m++) { + px0 = xs_all + l * nmx + m*nx_per_cell; + idx1 = (m == nimg - 1) ? nx1 : nx_per_cell; + for (i = 0; i < idx1; i++) { + xs_exp[l*nx_per_cell+i] += px0[i]; + } + } + } + } + return ngridx; +} + + +int init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *grid_slice, double* dh, int* mesh, int topl, double radius, + double ai, double aj, double *ri, double *rj, double *cache) +{ + int l1 = topl + 1; + *xs_exp = cache; + *ys_exp = *xs_exp + l1 * mesh[0]; + *zs_exp = *ys_exp + l1 * mesh[1]; + int data_size = l1 * (mesh[0] + mesh[1] + mesh[2]); + cache += data_size; + + int ngridx = orth_components(*xs_exp, grid_slice, dh[0], radius, + ri[0], rj[0], ai, aj, mesh[0], topl, cache); + if (ngridx == 0) { + return 0; + } + + int ngridy = orth_components(*ys_exp, grid_slice+2, dh[4], radius, + ri[1], rj[1], ai, aj, mesh[1], topl, cache); + if (ngridy == 0) { + return 0; + } + + int ngridz = orth_components(*zs_exp, grid_slice+4, dh[8], radius, + ri[2], rj[2], ai, aj, mesh[2], topl, cache); + if (ngridz == 0) { + return 0; + } + + return data_size; +} + + +int _init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *img_slice, int *grid_slice, int *mesh, + int topl, int dimension, double cutoff, + double ai, double aj, double *ri, double *rj, + double *a, double *b, double *cache) +{ + int l1 = topl + 1; + *xs_exp = cache; + *ys_exp = *xs_exp + l1 * mesh[0]; + *zs_exp = *ys_exp + l1 * mesh[1]; + int data_size = l1 * (mesh[0] + mesh[1] + mesh[2]); + cache += data_size; + + int ngridx = _orth_components(*xs_exp, img_slice, grid_slice, + a[0], b[0], cutoff, ri[0], rj[0], ai, aj, + (dimension>=1), mesh[0], topl, cache); + if (ngridx == 0) { + return 0; + } + + int ngridy = _orth_components(*ys_exp, img_slice+2, grid_slice+2, + a[4], b[4], cutoff, ri[1], rj[1], ai, aj, + (dimension>=2), mesh[1], topl, cache); + if (ngridy == 0) { + return 0; + } + + int ngridz = _orth_components(*zs_exp, img_slice+4, grid_slice+4, + a[8], b[8], cutoff, ri[2], rj[2], ai, aj, + (dimension>=3), mesh[2], topl, cache); + if (ngridz == 0) { + return 0; + } + + return data_size; +} + + +void _get_dm_to_dm_xyz_coeff(double* coeff, double* rij, int lmax, double* cache) +{ + int l1 = lmax + 1; + int l, lx; + + double *rx_pow = cache; + double *ry_pow = rx_pow + l1; + double *rz_pow = ry_pow + l1; + + rx_pow[0] = 1.0; + ry_pow[0] = 1.0; + rz_pow[0] = 1.0; + for (lx = 1; lx <= lmax; lx++) { + rx_pow[lx] = rx_pow[lx-1] * rij[0]; + ry_pow[lx] = ry_pow[lx-1] * rij[1]; + rz_pow[lx] = rz_pow[lx-1] * rij[2]; + } + + int dj = _LEN_CART[lmax]; + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + for (l = 0; l <= lmax; l++){ + for (lx = 0; lx <= l; lx++) { + pcx[lx] = BINOMIAL(l, lx) * rx_pow[l-lx]; + pcy[lx] = BINOMIAL(l, lx) * ry_pow[l-lx]; + pcz[lx] = BINOMIAL(l, lx) * rz_pow[l-lx]; + } + pcx += l+1; + pcy += l+1; + pcz += l+1; + } +} + + +void _dm_to_dm_xyz(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache) +{ + int lx, ly, lz; + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + int jx, jy, jz; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int l1 = li + lj + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj, cache); + + double cx, cxy, cxyz; + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pdm = dm; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + for (jx = 0; jx <= lx_j; jx++) { + cx = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cxy = cx * pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + for (jz = 0; jz <= lz_j; jz++) { + cxyz = cxy * pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + dm_xyz[lx*l1l1+ly*l1+lz] += cxyz * pdm[0]; + } + } + } + pdm += 1; + } + } + } + } +} + + +void _dm_xyz_to_dm(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache) +{ + int lx, ly, lz; + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + int jx, jy, jz; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int l1 = li + lj + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj, cache); + + double cx, cy, cz; + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pdm = dm; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + for (jx = 0; jx <= lx_j; jx++) { + cx = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cy = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + for (jz = 0; jz <= lz_j; jz++) { + cz = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + pdm[0] += cx*cy*cz * dm_xyz[lx*l1l1+ly*l1+lz]; + } + } + } + pdm += 1; + } + } + } + } +} + + +void get_dm_pgfpair(double* dm_pgf, double* dm_cart, + PGFPair* pgfpair, int* ish_bas, int* jsh_bas, int hermi) +{ + int ish = pgfpair->ish; + int jsh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + + int li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + int lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + int di = _LEN_CART[li]; + int dj = _LEN_CART[lj]; + + int nprim_j = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + int ncol = nprim_j * dj; + double *pdm = dm_cart + (ipgf*di*ncol + jpgf*dj); + double *pdm_pgf = dm_pgf; + int i, j; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pdm_pgf[j] = pdm[j]; + } + pdm_pgf += dj; + pdm += ncol; + } + + /* + if (hermi == 1 && ish == jsh) { + assert(di == dj); + for (i = 0; i < di; i++) { + for (j = i+1; j < dj; j++) { + dm_pgf[i*dj+j] *= 2; + dm_pgf[j*dj+i] = 0; + } + } + }*/ + if (hermi == 1 && ish != jsh) { + pdm_pgf = dm_pgf; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pdm_pgf[j] *= 2; + } + pdm_pgf += dj; + } + } +} diff --git a/pyscf/lib/dft/grid_common.h b/pyscf/lib/dft/grid_common.h new file mode 100644 index 0000000000..36dc7e3655 --- /dev/null +++ b/pyscf/lib/dft/grid_common.h @@ -0,0 +1,109 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef HAVE_DEFINED_GRID_COMMON_H +#define HAVE_DEFINED_GRID_COMMON_H + +#include "cint.h" + +#define EIJCUTOFF 60 +#define PTR_EXPDROP 16 + +extern double CINTsquare_dist(const double *r1, const double *r2); +extern double CINTcommon_fac_sp(int l); + +int get_lmax(int ish0, int ish1, int* bas); +int get_nprim_max(int ish0, int ish1, int* bas); +int get_nctr_max(int ish0, int ish1, int* bas); +void get_cart2sph_coeff(double** contr_coeff, double** gto_norm, + int ish0, int ish1, int* bas, double* env, int cart); +void del_cart2sph_coeff(double** contr_coeff, double** gto_norm, int ish0, int ish1); + +static inline int _has_overlap(int nx0, int nx1, int nx_per_cell) +{ + return nx0 <= nx1; +} + +static inline int _num_grids_on_x(int nimgx, int nx0, int nx1, int nx_per_cell) +{ + int ngridx; + if (nimgx == 1) { + ngridx = nx1 - nx0; + } else if (nimgx == 2 && !_has_overlap(nx0, nx1, nx_per_cell)) { + ngridx = nx1 - nx0 + nx_per_cell; + } else { + ngridx = nx_per_cell; + } + return ngridx; +} + + +static inline void _get_grid_mapping(int* xmap, int nx0, int nx1, int ngridx, int nimgx, bool is_x_split) +{ + int ix, nx; + if (nimgx == 1) { + for (ix = 0; ix < ngridx; ix++) { + xmap[ix] = ix + nx0; + } + } else if (is_x_split) { + for (ix = 0; ix < nx1; ix++) { + xmap[ix] = ix; + } + nx = nx0 - nx1; + for (ix = nx1; ix < ngridx; ix++) { + xmap[ix] = ix + nx; + } + } else { + for (ix = 0; ix < ngridx; ix++) { + xmap[ix] = ix; + } + } +} + + +static inline int modulo(int i, int n) +{ + return (i % n + n) % n; +} + + +static inline int get_upper_bound(int x0, int nx_per_cell, int ix, int ngridx) +{ + return x0 + MIN(nx_per_cell - x0, ngridx - ix); +} + +int _orth_components(double *xs_exp, int *img_slice, int *grid_slice, + double a, double b, double cutoff, + double xi, double xj, double ai, double aj, + int periodic, int nx_per_cell, int topl, double *cache); +int _init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *img_slice, int *grid_slice, int *mesh, + int topl, int dimension, double cutoff, + double ai, double aj, double *ri, double *rj, + double *a, double *b, double *cache); + +int init_orth_data(double **xs_exp, double **ys_exp, double **zs_exp, + int *grid_slice, double* dh, int* mesh, int topl, double radius, + double ai, double aj, double *ri, double *rj, double *cache); +void get_grid_spacing(double* dh, double* a, int* mesh); + +void _get_dm_to_dm_xyz_coeff(double* coeff, double* rij, int lmax, double* cache); +void _dm_to_dm_xyz(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache); +void _dm_xyz_to_dm(double* dm_xyz, double* dm, int li, int lj, double* ri, double* rj, double* cache); +void get_dm_pgfpair(double* dm_pgf, double* dm_cart, + PGFPair* pgfpair, int* ish_bas, int* jsh_bas, int hermi); +int get_max_num_grid_orth(double* dh, double radius); +#endif diff --git a/pyscf/lib/dft/grid_integrate.c b/pyscf/lib/dft/grid_integrate.c new file mode 100644 index 0000000000..9cabe864cb --- /dev/null +++ b/pyscf/lib/dft/grid_integrate.c @@ -0,0 +1,1358 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include "config.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "dft/multigrid.h" +#include "dft/grid_common.h" +#include "dft/utils.h" + +#define PTR_RADIUS 5 + + +void transform_dm_inverse(double* dm_cart, double* dm, int comp, + double* ish_contr_coeff, double* jsh_contr_coeff, + int* ish_ao_loc, int* jsh_ao_loc, + int* ish_bas, int* jsh_bas, int ish, int jsh, + int ish0, int jsh0, int naoi, int naoj, double* cache) +{ + int i0 = ish_ao_loc[ish] - ish_ao_loc[ish0]; + int i1 = ish_ao_loc[ish+1] - ish_ao_loc[ish0]; + int j0 = jsh_ao_loc[jsh] - jsh_ao_loc[jsh0]; + int j1 = jsh_ao_loc[jsh+1] - jsh_ao_loc[jsh0]; + + int nrow = i1 - i0; + int ncol = j1 - j0; + double* pdm = dm + ((size_t)naoj) * i0 + j0; + + int l_i = ish_bas[ANG_OF+ish*BAS_SLOTS]; + int ncart_i = _LEN_CART[l_i]; + int nprim_i = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + int nao_i = nprim_i*ncart_i; + int l_j = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + int ncart_j = _LEN_CART[l_j]; + int nprim_j = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + int nao_j = nprim_j*ncart_j; + + const char TRANS_T = 'T'; + const char TRANS_N = 'N'; + const double D1 = 1; + const double D0 = 0; + double *buf = cache; + + int ic; + for (ic = 0; ic < comp; ic++) { + //einsum("pi,pq,qj->ij", coeff_i, dm_cart, coeff_j) + dgemm_(&TRANS_N, &TRANS_N, &ncol, &nao_i, &nao_j, + &D1, jsh_contr_coeff, &ncol, dm_cart, &nao_j, &D0, buf, &ncol); + dgemm_(&TRANS_N, &TRANS_T, &ncol, &nrow, &nao_i, + &D1, buf, &ncol, ish_contr_coeff, &nrow, &D0, pdm, &naoj); + pdm += ((size_t)naoi) * naoj; + dm_cart += nao_i * nao_j; + } +} + + +static void fill_tril(double* mat, int comp, int* ish_ao_loc, int* jsh_ao_loc, + int ish, int jsh, int ish0, int jsh0, int naoi, int naoj) +{ + int i0 = ish_ao_loc[ish] - ish_ao_loc[ish0]; + int i1 = ish_ao_loc[ish+1] - ish_ao_loc[ish0]; + int j0 = jsh_ao_loc[jsh] - jsh_ao_loc[jsh0]; + int j1 = jsh_ao_loc[jsh+1] - jsh_ao_loc[jsh0]; + int ni = i1 - i0; + int nj = j1 - j0; + size_t nao2 = ((size_t)naoi) * naoj; + + double *pmat_up = mat + i0*((size_t)naoj) + j0; + double *pmat_low = mat + j0*((size_t)naoj) + i0; + int ic, i, j; + for (ic = 0; ic < comp; ic++) { + for (i = 0; i < ni; i++) { + for (j = 0; j < nj; j++) { + pmat_low[j*naoj+i] = pmat_up[i*naoj+j]; + } + } + pmat_up += nao2; + pmat_low += nao2; + } +} + + +static void integrate_submesh(double* out, double* weights, + double* xs_exp, double* ys_exp, double* zs_exp, + double fac, int topl, + int* mesh_lb, int* mesh_ub, int* submesh_lb, + const int* mesh, const int* submesh, double* cache) +{ + const int l1 = topl + 1; + const int l1l1 = l1 * l1; + const int x0 = mesh_lb[0]; + const int y0 = mesh_lb[1]; + const int z0 = mesh_lb[2]; + + const int nx = mesh_ub[0] - x0; + const int ny = mesh_ub[1] - y0; + const int nz = mesh_ub[2] - z0; + + const int x0_sub = submesh_lb[0]; + const int y0_sub = submesh_lb[1]; + const int z0_sub = submesh_lb[2]; + + const size_t mesh_yz = ((size_t) mesh[1]) * mesh[2]; + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D0 = 0; + const double D1 = 1; + + double *lzlyx = cache; + double *zly = lzlyx + l1l1 * nx; + double *ptr_weights = weights + x0 * mesh_yz + y0 * mesh[2] + z0; + + int ix; + for (ix = 0; ix < nx; ix++) { + dgemm_wrapper(TRANS_N, TRANS_N, nz, l1, ny, + D1, ptr_weights, mesh[2], ys_exp+y0_sub, submesh[1], + D0, zly, nz); + dgemm_wrapper(TRANS_T, TRANS_N, l1, l1, nz, + D1, zs_exp+z0_sub, submesh[2], zly, nz, + D0, lzlyx+l1l1*ix, l1); + ptr_weights += mesh_yz; + } + dgemm_wrapper(TRANS_N, TRANS_N, l1l1, l1, nx, + fac, lzlyx, l1l1, xs_exp+x0_sub, submesh[0], + D1, out, l1l1); +} + + +static void _orth_ints(double *out, double *weights, int topl, double fac, + double *xs_exp, double *ys_exp, double *zs_exp, + int *grid_slice, int *mesh, double *cache) +{// NOTE: out is accumulated + const int nx0 = grid_slice[0]; + const int nx1 = grid_slice[1]; + const int ny0 = grid_slice[2]; + const int ny1 = grid_slice[3]; + const int nz0 = grid_slice[4]; + const int nz1 = grid_slice[5]; + const int ngridx = nx1 - nx0; + const int ngridy = ny1 - ny0; + const int ngridz = nz1 - nz0; + if (ngridx == 0 || ngridy == 0 || ngridz == 0) { + return; + } + + const int submesh[3] = {ngridx, ngridy, ngridz}; + int lb[3], ub[3]; + int ix, iy, iz; + for (ix = 0; ix < ngridx;) { + lb[0] = modulo(ix + nx0, mesh[0]); + ub[0] = get_upper_bound(lb[0], mesh[0], ix, ngridx); + for (iy = 0; iy < ngridy;) { + lb[1] = modulo(iy + ny0, mesh[1]); + ub[1] = get_upper_bound(lb[1], mesh[1], iy, ngridy); + for (iz = 0; iz < ngridz;) { + lb[2] = modulo(iz + nz0, mesh[2]); + ub[2] = get_upper_bound(lb[2], mesh[2], iz, ngridz); + int lb_sub[3] = {ix, iy, iz}; + integrate_submesh(out, weights, xs_exp, ys_exp, zs_exp, fac, topl, + lb, ub, lb_sub, mesh, submesh, cache); + iz += ub[2] - lb[2]; + } + iy += ub[1] - lb[1]; + } + ix += ub[0] - lb[0]; + } +} + + +#define VRHO_LOOP_IP1(X, Y, Z) \ + int lx, ly, lz; \ + int jx, jy, jz; \ + int l##X##_i_m1 = l##X##_i - 1; \ + int l##X##_i_p1 = l##X##_i + 1; \ + double cx, cy, cz, cfac; \ + double fac_i = -2.0 * ai; \ + for (j##Y = 0; j##Y <= l##Y##_j; j##Y++) { \ + c##Y = pc##Y[j##Y+_LEN_CART0[l##Y##_j]]; \ + l##Y = l##Y##_i + j##Y; \ + for (j##Z = 0; j##Z <= l##Z##_j; j##Z++) { \ + c##Z = pc##Z[j##Z+_LEN_CART0[l##Z##_j]]; \ + l##Z = l##Z##_i + j##Z; \ + cfac = c##Y * c##Z; \ + for (j##X = 0; j##X <= l##X##_j; j##X++) { \ + if (l##X##_i > 0) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * l##X##_i; \ + l##X = l##X##_i_m1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * fac_i; \ + l##X = l##X##_i_p1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + } \ + } + + +static void _vrho_loop_ip1_x(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VRHO_LOOP_IP1(x,y,z); +} + + +static void _vrho_loop_ip1_y(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VRHO_LOOP_IP1(y,x,z); +} + + +static void _vrho_loop_ip1_z(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VRHO_LOOP_IP1(z,x,y); +} + + +#define VSIGMA_LOOP(X, Y, Z) \ + int lx, ly, lz; \ + int jx, jy, jz; \ + int l##X##_i_m1 = l##X##_i - 1; \ + int l##X##_i_p1 = l##X##_i + 1; \ + int l##X##_j_m1 = l##X##_j - 1; \ + int l##X##_j_p1 = l##X##_j + 1; \ + double cx, cy, cz, cfac; \ + double fac_i = -2.0 * ai; \ + double fac_j = -2.0 * aj; \ + for (j##Y = 0; j##Y <= l##Y##_j; j##Y++) { \ + c##Y = pc##Y[j##Y+_LEN_CART0[l##Y##_j]]; \ + l##Y = l##Y##_i + j##Y; \ + for (j##Z = 0; j##Z <= l##Z##_j; j##Z++) { \ + c##Z = pc##Z[j##Z+_LEN_CART0[l##Z##_j]]; \ + l##Z = l##Z##_i + j##Z; \ + cfac = c##Y * c##Z; \ + for (j##X = 0; j##X <= l##X##_j_m1; j##X++) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j_m1]] * l##X##_j; \ + l##X = l##X##_i + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + for (j##X = 0; j##X <= l##X##_j_p1; j##X++) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j_p1]] * fac_j; \ + l##X = l##X##_i + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + for (j##X = 0; j##X <= l##X##_j; j##X++) { \ + if (l##X##_i > 0) { \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * l##X##_i; \ + l##X = l##X##_i_m1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + c##X = pc##X[j##X+_LEN_CART0[l##X##_j]] * fac_i; \ + l##X = l##X##_i_p1 + j##X; \ + pv1[0] += c##X * cfac * v1_xyz[lx*l1l1+ly*l1+lz]; \ + } \ + } \ + } + + +static void _vsigma_loop_x(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VSIGMA_LOOP(x,y,z); +} + + +static void _vsigma_loop_y(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VSIGMA_LOOP(y,x,z); +} + + +static void _vsigma_loop_z(double* pv1, double* v1_xyz, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + VSIGMA_LOOP(z,x,y); +} + + +static void _v1_xyz_to_v1(void (*_v1_loop)(), double* v1_xyz, double* v1, + int li, int lj, double ai, double aj, + double* ri, double* rj, double* cache) +{ + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int l1 = li + lj + 2; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj+1]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj+1, cache); + + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pv1 = v1; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + _v1_loop(pv1, v1_xyz, pcx, pcy, pcz, ai, aj, + lx_i, ly_i, lz_i, lx_j, ly_j, lz_j, l1, l1l1); + pv1 += 1; + } + } + } + } +} + +/* +#define SUM_NABLA_I \ + if (lx_i > 0) { \ + pv1[0] += lx_i * cxyzj * v1x[(lx-1)*l1l1+ly*l1+lz]; \ + } \ + pv1[0] += fac_i * cxyzj * v1x[(lx+1)*l1l1+ly*l1+lz]; \ + if (ly_i > 0) { \ + pv1[0] += ly_i * cxyzj * v1y[lx*l1l1+(ly-1)*l1+lz]; \ + } \ + pv1[0] += fac_i * cxyzj * v1y[lx*l1l1+(ly+1)*l1+lz]; \ + if (lz_i > 0) { \ + pv1[0] += lz_i * cxyzj * v1z[lx*l1l1+ly*l1+lz-1]; \ + } \ + pv1[0] += fac_i * cxyzj * v1z[lx*l1l1+ly*l1+lz+1]; +*/ +/* +static void _vsigma_loop_ip1ip2_x(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + int lx_j_m1 = lx_j - 1; + int lx_j_p1 = lx_j + 1; + double cxj, cyj, czj, cyzj, cxyzj; + double fac_i = -2.0 * ai; + double fac_j = -2.0 * aj; + + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cyzj = cyj * czj; + for (jx = 0; jx <= lx_j_m1; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j_m1]] * lx_j; + cxyzj = cxj * cyzj; + lx = lx_i + jx; + SUM_NABLA_I; + } + for (jx = 0; jx <= lx_j_p1; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j_p1]] * fac_j; + cxyzj = cxj * cyzj; + lx = lx_i + jx; + SUM_NABLA_I; + } + } + } +} +*/ + +#define COMMON_INIT(x) \ + int l##x##_i; \ + int lx, ly, lz; \ + int jx, jy, jz; \ + int lx_j_m1 = lx_j - 1; \ + int lx_j_p1 = lx_j + 1; \ + int ly_j_m1 = ly_j - 1; \ + int ly_j_p1 = ly_j + 1; \ + int lz_j_m1 = lz_j - 1; \ + int lz_j_p1 = lz_j + 1; \ + double ci; \ + double cxj, cyj, czj; \ + double cyzj, cxzj, cxyj, cxyzj; \ + double fac_i = -2.0 * ai; \ + double fac_j = -2.0 * aj; \ + + +#define SUM_NABLA_J(x, y, z) \ + for (j##y = 0; j##y <= l##y##_j; j##y++) { \ + c##y##j = pc##y[j##y+_LEN_CART0[l##y##_j]]; \ + l##y = l##y##_i + j##y; \ + for (j##z = 0; j##z <= l##z##_j; j##z++) { \ + c##z##j = pc##z[j##z+_LEN_CART0[l##z##_j]]; \ + l##z = l##z##_i + j##z; \ + c##y##z##j = c##y##j * c##z##j; \ + for (j##x = 0; j##x <= l##x##_j_m1; j##x++) { \ + c##x##j = pc##x[j##x+_LEN_CART0[l##x##_j_m1]] * l##x##_j; \ + cxyzj = c##x##j * c##y##z##j; \ + l##x = l##x##_i + j##x; \ + pv1[0] += ci * cxyzj * v1##x[lx*l1l1+ly*l1+lz]; \ + } \ + for (j##x = 0; j##x <= l##x##_j_p1; j##x++) { \ + c##x##j = pc##x[j##x+_LEN_CART0[l##x##_j_p1]] * fac_j; \ + cxyzj = c##x##j * c##y##z##j; \ + l##x = l##x##_i + j##x; \ + pv1[0] += ci * cxyzj * v1##x[lx*l1l1+ly*l1+lz]; \ + } \ + } \ + } + + +static void _vsigma_loop_ip1ip2_x(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i0, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + COMMON_INIT(x); + + lx_i = lx_i0 + 1; + ci = fac_i; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + + if (lx_i0 > 0) { + lx_i = lx_i0 - 1; + ci = lx_i0; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + } +} + +/* +static void _vsigma_loop_ip1ip2_y(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + int ly_j_m1 = ly_j - 1; + int ly_j_p1 = ly_j + 1; + double cxj, cyj, czj, cxzj, cxyzj; + double fac_i = -2.0 * ai; + double fac_j = -2.0 * aj; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxzj = cxj * czj; + for (jy = 0; jy <= ly_j_m1; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j_m1]] * ly_j; + cxyzj = cyj * cxzj; + ly = ly_i + jy; + SUM_NABLA_I; + } + for (jy = 0; jy <= ly_j_p1; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j_p1]] * fac_j; + cxyzj = cyj * cxzj; + ly = ly_i + jy; + SUM_NABLA_I; + } + } + } +} +*/ + +static void _vsigma_loop_ip1ip2_y(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i0, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + COMMON_INIT(y); + + ly_i = ly_i0 + 1; + ci = fac_i; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + + if (ly_i0 > 0) { + ly_i = ly_i0 - 1; + ci = ly_i0; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + } +} + + +/* +static void _vsigma_loop_ip1ip2_z(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + int lz_j_m1 = lz_j - 1; + int lz_j_p1 = lz_j + 1; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_i = -2.0 * ai; + double fac_j = -2.0 * aj; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j_m1; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j_m1]] * lz_j; + cxyzj = czj * cxyj; + lz = lz_i + jz; + SUM_NABLA_I; + } + for (jz = 0; jz <= lz_j_p1; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j_p1]] * fac_j; + cxyzj = czj * cxyj; + lz = lz_i + jz; + SUM_NABLA_I; + } + } + } +} +*/ + +static void _vsigma_loop_ip1ip2_z(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i0, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + COMMON_INIT(z); + + lz_i = lz_i0 + 1; + ci = fac_i; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + + if (lz_i0 > 0) { + lz_i = lz_i0 - 1; + ci = lz_i0; + SUM_NABLA_J(x,y,z); + SUM_NABLA_J(y,x,z); + SUM_NABLA_J(z,x,y); + } +} + + +static void _vsigma_ip1ip2(void (*_v1_loop)(), double* v1x, + double* v1y, double* v1z, double* v1, + int li, int lj, double ai, double aj, + double* ri, double* rj, double* cache) +{ + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int topl = li + lj + 2; + int l1 = topl + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj+1]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj+1, cache); + + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pv1 = v1; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + _v1_loop(pv1, v1x, v1y, v1z, pcx, pcy, pcz, ai, aj, + lx_i, ly_i, lz_i, lx_j, ly_j, lz_j, l1, l1l1); + pv1 += 1; + } + } + } + } +} + + +static void _vsigma_loop_lap1_x(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_x; + double fac_i = -2.0 * ai; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxyzj = cxyj * czj; + + fac_x = lx_i + 1; + pv1[0] += fac_x * fac_i * cxyzj * v1x[lx*l1l1+ly*l1+lz]; + if (lx_i - 1 > 0) { + fac_x = lx_i - 1; + pv1[0] += fac_x * lx_i * cxyzj * v1x[(lx-2)*l1l1+ly*l1+lz]; + } + + if (lx_i > 0) { + fac_x = lx_i; + if (ly_i > 0) { + pv1[0] += fac_x * ly_i * cxyzj * v1y[(lx-1)*l1l1+(ly-1)*l1+lz]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1y[(lx-1)*l1l1+(ly+1)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_x * lz_i * cxyzj * v1z[(lx-1)*l1l1+ly*l1+lz-1]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1z[(lx-1)*l1l1+ly*l1+lz+1]; + } + + fac_x = fac_i; + if (lx_i > 0) { + pv1[0] += fac_x * lx_i * cxyzj * v1x[lx*l1l1+ly*l1+lz]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1x[(lx+2)*l1l1+ly*l1+lz]; + + if (ly_i > 0) { + pv1[0] += fac_x * ly_i * cxyzj * v1y[(lx+1)*l1l1+(ly-1)*l1+lz]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1y[(lx+1)*l1l1+(ly+1)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_x * lz_i * cxyzj * v1z[(lx+1)*l1l1+ly*l1+lz-1]; + } + pv1[0] += fac_x * fac_i * cxyzj * v1z[(lx+1)*l1l1+ly*l1+lz+1]; + } + } + } +} + + +static void _vsigma_loop_lap1_y(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_y; + double fac_i = -2.0 * ai; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxyzj = cxyj * czj; + + fac_y = ly_i + 1; + pv1[0] += fac_y * fac_i * cxyzj * v1y[lx*l1l1+ly*l1+lz]; + if (ly_i - 1 > 0) { + fac_y = ly_i - 1; + pv1[0] += fac_y * ly_i * cxyzj * v1y[lx*l1l1+(ly-2)*l1+lz]; + } + + if (ly_i > 0) { + fac_y = ly_i; + if (lx_i > 0) { + pv1[0] += fac_y * lx_i * cxyzj * v1x[(lx-1)*l1l1+(ly-1)*l1+lz]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1x[(lx+1)*l1l1+(ly-1)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_y * lz_i * cxyzj * v1z[lx*l1l1+(ly-1)*l1+lz-1]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1z[lx*l1l1+(ly-1)*l1+lz+1]; + } + + fac_y = fac_i; + if (lx_i > 0) { + pv1[0] += fac_y * lx_i * cxyzj * v1x[(lx-1)*l1l1+(ly+1)*l1+lz]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1x[(lx+1)*l1l1+(ly+1)*l1+lz]; + + if (ly_i > 0) { + pv1[0] += fac_y * ly_i * cxyzj * v1y[lx*l1l1+ly*l1+lz]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1y[lx*l1l1+(ly+2)*l1+lz]; + + if (lz_i > 0) { + pv1[0] += fac_y * lz_i * cxyzj * v1z[lx*l1l1+(ly+1)*l1+lz-1]; + } + pv1[0] += fac_y * fac_i * cxyzj * v1z[lx*l1l1+(ly+1)*l1+lz+1]; + } + } + } +} + + +static void _vsigma_loop_lap1_z(double* pv1, double* v1x, double* v1y, double* v1z, + double* pcx, double* pcy, double* pcz, + double ai, double aj, + int lx_i, int ly_i, int lz_i, + int lx_j, int ly_j, int lz_j, int l1, int l1l1) +{ + int lx, ly, lz; + int jx, jy, jz; + double cxj, cyj, czj, cxyj, cxyzj; + double fac_z; + double fac_i = -2.0 * ai; + + for (jx = 0; jx <= lx_j; jx++) { + cxj = pcx[jx+_LEN_CART0[lx_j]]; + lx = lx_i + jx; + for (jy = 0; jy <= ly_j; jy++) { + cyj = pcy[jy+_LEN_CART0[ly_j]]; + ly = ly_i + jy; + cxyj = cxj * cyj; + for (jz = 0; jz <= lz_j; jz++) { + czj = pcz[jz+_LEN_CART0[lz_j]]; + lz = lz_i + jz; + cxyzj = cxyj * czj; + + fac_z = lz_i + 1; + pv1[0] += fac_z * fac_i * cxyzj * v1z[lx*l1l1+ly*l1+lz]; + if (lz_i - 1 > 0) { + fac_z = lz_i - 1; + pv1[0] += fac_z * lz_i * cxyzj * v1z[lx*l1l1+ly*l1+lz-2]; + } + + if (lz_i > 0) { + fac_z = lz_i; + if (lx_i > 0) { + pv1[0] += fac_z * lx_i * cxyzj * v1x[(lx-1)*l1l1+ly*l1+lz-1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1x[(lx+1)*l1l1+ly*l1+lz-1]; + + if (ly_i > 0) { + pv1[0] += fac_z * ly_i * cxyzj * v1y[lx*l1l1+(ly-1)*l1+lz-1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1y[lx*l1l1+(ly+1)*l1+lz-1]; + } + + fac_z = fac_i; + if (lx_i > 0) { + pv1[0] += fac_z * lx_i * cxyzj * v1x[(lx-1)*l1l1+ly*l1+lz+1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1x[(lx+1)*l1l1+ly*l1+lz+1]; + + if (ly_i > 0) { + pv1[0] += fac_z * ly_i * cxyzj * v1y[lx*l1l1+(ly-1)*l1+lz+1]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1y[lx*l1l1+(ly+1)*l1+lz+1]; + + if (lz_i > 0) { + pv1[0] += fac_z * lz_i * cxyzj * v1z[lx*l1l1+ly*l1+lz]; + } + pv1[0] += fac_z * fac_i * cxyzj * v1z[lx*l1l1+ly*l1+lz+2]; + } + } + } +} + + +static void _vsigma_lap1(void (*_v1_loop)(), double* v1x, + double* v1y, double* v1z, double* v1, + int li, int lj, double ai, double aj, + double* ri, double* rj, double* cache) +{ + int lx_i, ly_i, lz_i; + int lx_j, ly_j, lz_j; + double rij[3]; + + rij[0] = ri[0] - rj[0]; + rij[1] = ri[1] - rj[1]; + rij[2] = ri[2] - rj[2]; + + int topl = li + lj + 2; + int l1 = topl + 1; + int l1l1 = l1 * l1; + double *coeff = cache; + int dj = _LEN_CART[lj]; + cache += 3 * dj; + + _get_dm_to_dm_xyz_coeff(coeff, rij, lj, cache); + + double *pcx = coeff; + double *pcy = pcx + dj; + double *pcz = pcy + dj; + double *pv1 = v1; + for (lx_i = li; lx_i >= 0; lx_i--) { + for (ly_i = li-lx_i; ly_i >= 0; ly_i--) { + lz_i = li - lx_i - ly_i; + for (lx_j = lj; lx_j >= 0; lx_j--) { + for (ly_j = lj-lx_j; ly_j >= 0; ly_j--) { + lz_j = lj - lx_j - ly_j; + _v1_loop(pv1, v1x, v1y, v1z, pcx, pcy, pcz, ai, aj, + lx_i, ly_i, lz_i, lx_j, ly_j, lz_j, l1, l1l1); + pv1 += 1; + } + } + } + } +} + + +int eval_mat_gga_orth(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int topl = li + lj + 1; + int l1 = topl+1; + int l1l1l1 = l1 * l1 * l1; + double *mat_xyz = cache; + cache += l1l1l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + if (data_size == 0) { + return 0; + } + cache += data_size; + + size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + double *vx = weights + ngrids; + double *vy = vx + ngrids; + double *vz = vy + ngrids; + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, weights, li+lj, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _dm_xyz_to_dm(mat_xyz, out, li, lj, ri, rj, cache); + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, vx, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vsigma_loop_x, mat_xyz, out, li, lj, ai, aj, ri, rj, cache); + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, vy, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vsigma_loop_y, mat_xyz, out, li, lj, ai, aj, ri, rj, cache); + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, vz, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vsigma_loop_z, mat_xyz, out, li, lj, ai, aj, ri, rj, cache); + + return 1; +} + + +int eval_mat_lda_orth(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int topl = li + lj; + int l1 = topl+1; + int l1l1l1 = l1*l1*l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + + if (data_size == 0) { + return 0; + } + cache += data_size; + + double *dm_xyz = cache; + cache += l1l1l1; + + memset(dm_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(dm_xyz, weights, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + _dm_xyz_to_dm(dm_xyz, out, li, lj, ri, rj, cache); + return 1; +} + + +int eval_mat_lda_orth_ip1(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int dij = _LEN_CART[li] * _LEN_CART[lj]; + int topl = li + lj + 1; + int l1 = topl+1; + int l1l1l1 = l1*l1*l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + if (data_size == 0) { + return 0; + } + cache += data_size; + + double *mat_xyz = cache; + cache += l1l1l1; + double *pout_x = out; + double *pout_y = pout_x + dij; + double *pout_z = pout_y + dij; + + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, weights, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_x, mat_xyz, pout_x, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_y, mat_xyz, pout_y, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_z, mat_xyz, pout_z, li, lj, ai, aj, ri, rj, cache); + return 1; +} + + +int eval_mat_gga_orth_ip1(double *weights, double *out, int comp, + int li, int lj, double ai, double aj, + double *ri, double *rj, double fac, double cutoff, + int dimension, double* dh, double *a, double *b, + int *mesh, double *cache) +{ + int dij = _LEN_CART[li] * _LEN_CART[lj]; + int topl = li + lj + 2; + int l1 = topl+1; + int l1l1l1 = l1*l1*l1; + int grid_slice[6]; + double *xs_exp, *ys_exp, *zs_exp; + + int data_size = init_orth_data(&xs_exp, &ys_exp, &zs_exp, + grid_slice, dh, mesh, topl, cutoff, + ai, aj, ri, rj, cache); + if (data_size == 0) { + return 0; + } + cache += data_size; + + double *mat_xyz = cache; + double *mat_x = mat_xyz; + double *mat_y = mat_x + l1l1l1; + double *mat_z = mat_y + l1l1l1; + cache += l1l1l1*3; + double *pout_x = out; + double *pout_y = pout_x + dij; + double *pout_z = pout_y + dij; + + size_t ngrids = ((size_t)mesh[0]) * mesh[1] * mesh[2]; + double *vx = weights + ngrids; + double *vy = vx + ngrids; + double *vz = vy + ngrids; + + //vrho part + memset(mat_xyz, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_xyz, weights, topl-1, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_x, mat_xyz, pout_x, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_y, mat_xyz, pout_y, li, lj, ai, aj, ri, rj, cache); + _v1_xyz_to_v1(_vrho_loop_ip1_z, mat_xyz, pout_z, li, lj, ai, aj, ri, rj, cache); + + //vsigma part + memset(mat_x, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_x, vx, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + memset(mat_y, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_y, vy, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + memset(mat_z, 0, l1l1l1*sizeof(double)); + _orth_ints(mat_z, vz, topl, fac, xs_exp, ys_exp, zs_exp, + grid_slice, mesh, cache); + + _vsigma_ip1ip2(_vsigma_loop_ip1ip2_x, mat_x, mat_y, mat_z, + pout_x, li, lj, ai, aj, ri, rj, cache); + _vsigma_ip1ip2(_vsigma_loop_ip1ip2_y, mat_x, mat_y, mat_z, + pout_y, li, lj, ai, aj, ri, rj, cache); + _vsigma_ip1ip2(_vsigma_loop_ip1ip2_z, mat_x, mat_y, mat_z, + pout_z, li, lj, ai, aj, ri, rj, cache); + + _vsigma_lap1(_vsigma_loop_lap1_x, mat_x, mat_y, mat_z, + pout_x, li, lj, ai, aj, ri, rj, cache); + _vsigma_lap1(_vsigma_loop_lap1_y, mat_x, mat_y, mat_z, + pout_y, li, lj, ai, aj, ri, rj, cache); + _vsigma_lap1(_vsigma_loop_lap1_z, mat_x, mat_y, mat_z, + pout_z, li, lj, ai, aj, ri, rj, cache); + return 1; +} + + +void _apply_ints(int (*eval_ints)(), double *weights, double *mat, + PGFPair* pgfpair, int comp, double fac, int dimension, + double* dh, double *a, double *b, int *mesh, + double* ish_gto_norm, double* jsh_gto_norm, + int *ish_atm, int *ish_bas, double *ish_env, + int *jsh_atm, int *jsh_bas, double *jsh_env, + double* Ls, double *cache) +{ + int i_sh = pgfpair->ish; + int j_sh = pgfpair->jsh; + int ipgf = pgfpair->ipgf; + int jpgf = pgfpair->jpgf; + int iL = pgfpair->iL; + double cutoff = pgfpair->radius; + + int li = ish_bas[ANG_OF+i_sh*BAS_SLOTS]; + int lj = jsh_bas[ANG_OF+j_sh*BAS_SLOTS]; + int di = _LEN_CART[li]; + int dj = _LEN_CART[lj]; + + int ish_nprim = ish_bas[NPRIM_OF+i_sh*BAS_SLOTS]; + int jsh_nprim = jsh_bas[NPRIM_OF+j_sh*BAS_SLOTS]; + int naoi = ish_nprim * di; + int naoj = jsh_nprim * dj; + + double *ri = ish_env + ish_atm[PTR_COORD+ish_bas[ATOM_OF+i_sh*BAS_SLOTS]*ATM_SLOTS]; + double *rj = jsh_env + jsh_atm[PTR_COORD+jsh_bas[ATOM_OF+j_sh*BAS_SLOTS]*ATM_SLOTS]; + double *rL = Ls + iL*3; + double rjL[3]; + rjL[0] = rj[0] + rL[0]; + rjL[1] = rj[1] + rL[1]; + rjL[2] = rj[2] + rL[2]; + + double ai = ish_env[ish_bas[PTR_EXP+i_sh*BAS_SLOTS]+ipgf]; + double aj = jsh_env[jsh_bas[PTR_EXP+j_sh*BAS_SLOTS]+jpgf]; + double ci = ish_gto_norm[ipgf]; + double cj = jsh_gto_norm[jpgf]; + double aij = ai + aj; + double rrij = CINTsquare_dist(ri, rjL); + double eij = (ai * aj / aij) * rrij; + if (eij > EIJCUTOFF) { + return; + } + fac *= exp(-eij) * ci * cj * CINTcommon_fac_sp(li) * CINTcommon_fac_sp(lj); + if (fac < ish_env[PTR_EXPDROP] && fac < jsh_env[PTR_EXPDROP]) { + return; + } + + double *out = cache; + memset(out, 0, comp*di*dj*sizeof(double)); + cache += comp * di * dj; + + int value = (*eval_ints)(weights, out, comp, li, lj, ai, aj, ri, rjL, + fac, cutoff, dimension, dh, a, b, mesh, cache); + + double *pmat = mat + ipgf*di*naoj + jpgf*dj; + if (value != 0) { + int i, j, ic; + for (ic = 0; ic < comp; ic++) { + for (i = 0; i < di; i++) { + #pragma omp simd + for (j = 0; j < dj; j++) { + pmat[i*naoj+j] += out[i*dj+j]; + } + } + pmat += naoi * naoj; + out += di * dj; + } + } +} + + +static size_t _ints_cache_size(int l, int nprim, int nctr, int* mesh, double radius, double* dh, int comp) +{ + size_t size = 0; + size_t nmx = get_max_num_grid_orth(dh, radius); + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + int l1 = 2 * l + 1; + if (comp == 3) { + l1 += 1; + } + int l1l1 = l1 * l1; + int ncart = _LEN_CART[l1]; // use l1 to be safe + + size += comp * nprim * nprim * ncart * ncart; // dm_cart + size += comp * ncart * ncart; // out + size += l1 * (mesh[0] + mesh[1] + mesh[2]); // xs_exp, ys_exp, zs_exp + + size_t size_orth_components = l1 * nmx + nmx; // orth_components + size += l1l1 * l1; // dm_xyz + size += 3 * (ncart + l1); // _dm_xyz_to_dm + + size_t size_orth_ints = 0; + if (nmx < max_mesh) { + size_orth_ints = (l1 + l1l1) * nmx; + } else { + size_orth_ints = l1*mesh[2] + l1l1*mesh[0]; + } + size += MAX(size_orth_components, size_orth_ints); + size += nctr * ncart * nprim * ncart; + //size += 1000000; + //printf("Memory allocated per thread for make_mat: %ld MB.\n", size*sizeof(double) / 1000000); + return size; +} + + +static size_t _ints_core_cache_size(int* mesh, double radius, double* dh, int comp) +{ + size_t size = 0; + size_t nmx = get_max_num_grid_orth(dh, radius); + int max_mesh = MAX(MAX(mesh[0], mesh[1]), mesh[2]); + const int l = 0; + int l1 = l + 1; + if (comp == 3) { + l1 += 1; + } + int l1l1 = l1 * l1; + int ncart = _LEN_CART[l1]; + + size_t size_orth_components = l1 * nmx + nmx; + size_t size_orth_ints = 0; + if (nmx < max_mesh) { + size_orth_ints = (l1 + l1l1) * nmx; + } else { + size_orth_ints = l1*mesh[2] + l1l1*mesh[0]; + } + size += MAX(size_orth_components, size_orth_ints); + size += l1 * (mesh[0] + mesh[1] + mesh[2]); + size += l1l1 * l1; + size += 3 * (ncart + l1); + //size += 1000000; + return size; +} + + +void grid_integrate_drv(int (*eval_ints)(), double* mat, double* weights, TaskList** task_list, + int comp, int hermi, int grid_level, + int *shls_slice, int* ish_ao_loc, int* jsh_ao_loc, + int dimension, double* Ls, double* a, double* b, + int* ish_atm, int* ish_bas, double* ish_env, + int* jsh_atm, int* jsh_bas, double* jsh_env, int cart) +{ + TaskList* tl = *task_list; + GridLevel_Info* gridlevel_info = tl->gridlevel_info; + Task *task = (tl->tasks)[grid_level]; + int ntasks = task->ntasks; + if (ntasks <= 0) { + return; + } + double max_radius = task->radius; + PGFPair **pgfpairs = task->pgfpairs; + int* mesh = gridlevel_info->mesh + grid_level*3; + + double dh[9]; + get_grid_spacing(dh, a, mesh); + + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + //const int nijsh = nish * njsh; + const int naoi = ish_ao_loc[ish1] - ish_ao_loc[ish0]; + const int naoj = jsh_ao_loc[jsh1] - jsh_ao_loc[jsh0]; + + int ish_lmax = get_lmax(ish0, ish1, ish_bas); + int jsh_lmax = ish_lmax; + if (hermi != 1) { + jsh_lmax = get_lmax(jsh0, jsh1, jsh_bas); + } + + int ish_nprim_max = get_nprim_max(ish0, ish1, ish_bas); + int jsh_nprim_max = ish_nprim_max; + if (hermi != 1) { + jsh_nprim_max = get_nprim_max(jsh0, jsh1, jsh_bas); + } + + int ish_nctr_max = get_nctr_max(ish0, ish1, ish_bas); + int jsh_nctr_max = ish_nctr_max; + if (hermi != 1) { + jsh_nctr_max = get_nctr_max(jsh0, jsh1, jsh_bas); + } + + double **gto_norm_i = (double**) malloc(sizeof(double*) * nish); + double **cart2sph_coeff_i = (double**) malloc(sizeof(double*) * nish); + get_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1, ish_bas, ish_env, cart); + double **gto_norm_j = gto_norm_i; + double **cart2sph_coeff_j = cart2sph_coeff_i; + if (hermi != 1) { + gto_norm_j = (double**) malloc(sizeof(double*) * njsh); + cart2sph_coeff_j = (double**) malloc(sizeof(double*) * njsh); + get_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1, jsh_bas, jsh_env, cart); + } + + int *task_loc; + int nblock = get_task_loc(&task_loc, pgfpairs, ntasks, ish0, ish1, jsh0, jsh1, hermi); + + size_t cache_size = _ints_cache_size(MAX(ish_lmax,jsh_lmax), + MAX(ish_nprim_max, jsh_nprim_max), + MAX(ish_nctr_max, jsh_nctr_max), + mesh, max_radius, dh, comp); + +#pragma omp parallel +{ + int ish, jsh, itask, iblock; + int li, lj, ish_nprim, jsh_nprim; + PGFPair *pgfpair = NULL; + double *ptr_gto_norm_i, *ptr_gto_norm_j; + double *cache0 = malloc(sizeof(double) * cache_size); + double *dm_cart = cache0; + int len_dm_cart = comp*ish_nprim_max*_LEN_CART[ish_lmax]*jsh_nprim_max*_LEN_CART[jsh_lmax]; + double *cache = dm_cart + len_dm_cart; + + #pragma omp for schedule(dynamic) + for (iblock = 0; iblock < nblock; iblock+=2) { + itask = task_loc[iblock]; + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + ptr_gto_norm_i = gto_norm_i[ish]; + ptr_gto_norm_j = gto_norm_j[jsh]; + li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + ish_nprim = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + jsh_nprim = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + len_dm_cart = comp*ish_nprim*_LEN_CART[li]*jsh_nprim*_LEN_CART[lj]; + memset(dm_cart, 0, len_dm_cart * sizeof(double)); + for (; itask < task_loc[iblock+1]; itask++) { + pgfpair = pgfpairs[itask]; + _apply_ints(eval_ints, weights, dm_cart, pgfpair, comp, 1.0, dimension, dh, a, b, mesh, + ptr_gto_norm_i, ptr_gto_norm_j, ish_atm, ish_bas, ish_env, + jsh_atm, jsh_bas, jsh_env, Ls, cache); + } + transform_dm_inverse(dm_cart, mat, comp, + cart2sph_coeff_i[ish], cart2sph_coeff_j[jsh], + ish_ao_loc, jsh_ao_loc, ish_bas, jsh_bas, + ish, jsh, ish0, jsh0, naoi, naoj, cache); + if (hermi == 1 && ish != jsh) { + fill_tril(mat, comp, ish_ao_loc, jsh_ao_loc, + ish, jsh, ish0, jsh0, naoi, naoj); + } + } + free(cache0); +} + + if (task_loc) { + free(task_loc); + } + del_cart2sph_coeff(cart2sph_coeff_i, gto_norm_i, ish0, ish1); + if (hermi != 1) { + del_cart2sph_coeff(cart2sph_coeff_j, gto_norm_j, jsh0, jsh1); + } +} + + +void int_gauss_charge_v_rs(int (*eval_ints)(), double* out, double* v_rs, int comp, + int* atm, int* bas, int nbas, double* env, + int* mesh, int dimension, double* a, double* b, double max_radius) +{ + double dh[9]; + get_grid_spacing(dh, a, mesh); + + size_t cache_size = _ints_core_cache_size(mesh, max_radius, dh, comp); + +#pragma omp parallel +{ + int ia, ib; + double alpha, coeff, charge, rad, fac; + double *r0; + double *cache = (double*) malloc(sizeof(double) * cache_size); + #pragma omp for schedule(static) + for (ib = 0; ib < nbas; ib++) { + ia = bas[ib*BAS_SLOTS+ATOM_OF]; + alpha = env[bas[ib*BAS_SLOTS+PTR_EXP]]; + coeff = env[bas[ib*BAS_SLOTS+PTR_COEFF]]; + charge = (double)atm[ia*ATM_SLOTS+CHARGE_OF]; + r0 = env + atm[ia*ATM_SLOTS+PTR_COORD]; + fac = -charge * coeff; + rad = env[atm[ia*ATM_SLOTS+PTR_RADIUS]]; + (*eval_ints)(v_rs, out+ia*comp, comp, 0, 0, alpha, 0.0, r0, r0, + fac, rad, dimension, dh, a, b, mesh, cache); + } + free(cache); +} +} diff --git a/pyscf/lib/dft/libxc_itrf.c b/pyscf/lib/dft/libxc_itrf.c index 76d7497980..badeab597a 100644 --- a/pyscf/lib/dft/libxc_itrf.c +++ b/pyscf/lib/dft/libxc_itrf.c @@ -15,6 +15,7 @@ * * Authors: Qiming Sun * Susi Lehtola + * Xing Zhang * * libxc from * http://www.tddft.org/programs/octopus/wiki/index.php/Libxc:manual @@ -24,7 +25,10 @@ #include #include #include +#include "config.h" #define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define MAX_THREADS 256 // TODO: register python signal #define raise_error return @@ -83,13 +87,13 @@ * In spin restricted case (spin == 1), rho_u is assumed to be the * spin-free quantities, rho_d is not used. */ -static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) +static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np, int ld_rho_u) { int i; double *sigma, *tau; double *gxu, *gyu, *gzu, *gxd, *gyd, *gzd; double *tau_u, *tau_d; - double *rho_d = rho_u + np * nvar; + double *rho_d = rho_u + ld_rho_u * nvar; switch (nvar) { case LDA_NVAR: @@ -107,12 +111,12 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) case GGA_NVAR: if (spin == 1) { sigma = rho + np * 2; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; + gxd = rho_d + ld_rho_u; + gyd = rho_d + ld_rho_u * 2; + gzd = rho_d + ld_rho_u * 3; for (i = 0; i < np; i++) { rho[i*2+0] = rho_u[i]; rho[i*2+1] = rho_d[i]; @@ -122,9 +126,9 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) } } else { sigma = rho + np; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; for (i = 0; i < np; i++) { rho[i] = rho_u[i]; sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; @@ -135,14 +139,14 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) if (spin == 1) { sigma = rho + np * 2; tau = sigma + np * 3; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - gxd = rho_d + np; - gyd = rho_d + np * 2; - gzd = rho_d + np * 3; - tau_u = rho_u + np * 4; - tau_d = rho_d + np * 4; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; + gxd = rho_d + ld_rho_u; + gyd = rho_d + ld_rho_u * 2; + gzd = rho_d + ld_rho_u * 3; + tau_u = rho_u + ld_rho_u * 4; + tau_d = rho_d + ld_rho_u * 4; for (i = 0; i < np; i++) { rho[i*2+0] = rho_u[i]; rho[i*2+1] = rho_d[i]; @@ -157,10 +161,10 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) } else { sigma = rho + np; tau = sigma + np; - gxu = rho_u + np; - gyu = rho_u + np * 2; - gzu = rho_u + np * 3; - tau_u = rho_u + np * 4; + gxu = rho_u + ld_rho_u; + gyu = rho_u + ld_rho_u * 2; + gzu = rho_u + ld_rho_u * 3; + tau_u = rho_u + ld_rho_u * 4; for (i = 0; i < np; i++) { rho[i] = rho_u[i]; sigma[i] = gxu[i]*gxu[i] + gyu[i]*gyu[i] + gzu[i]*gzu[i]; @@ -171,7 +175,7 @@ static void _eval_rho(double *rho, double *rho_u, int spin, int nvar, int np) } } static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, - double *rho, double *exc) + double *rho, double *exc, int offset, int blksize) { double *sigma, *tau; double *lapl = rho; @@ -266,6 +270,21 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, if (deriv > 3) { v4rho4 = v3rho3 + np * 4; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset * 2; + } + if (deriv > 1) { + v2rho2 += offset * 3; + } + if (deriv > 2) { + v3rho3 += offset * 4; + } + if (deriv > 3) { + v4rho4 += offset * 5; + } } else { if (deriv > 0) { vrho = exc + np; @@ -279,15 +298,30 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, if (deriv > 3) { v4rho4 = v3rho3 + np; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset; + } + if (deriv > 1) { + v2rho2 += offset; + } + if (deriv > 2) { + v3rho3 += offset; + } + if (deriv > 3) { + v4rho4 += offset; + } } - xc_lda(func_x, np, rho, exc, vrho, v2rho2, v3rho3, v4rho4); + xc_lda(func_x, blksize, rho, exc, vrho, v2rho2, v3rho3, v4rho4); break; case XC_FAMILY_GGA: #ifdef XC_FAMILY_HYB_GGA case XC_FAMILY_HYB_GGA: #endif if (spin == 1) { - sigma = rho + np * 2; + sigma = rho + blksize * 2; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np * 2; @@ -310,8 +344,33 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4rhosigma3 = v4rho2sigma2 + np * 3*6 ; v4sigma4 = v4rhosigma3 + np * 2*10; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset * 2; + vsigma += offset * 3; + } + if (deriv > 1) { + v2rho2 += offset * 3; + v2rhosigma += offset * 6; + v2sigma2 += offset * 6; + } + if (deriv > 2) { + v3rho3 += offset * 4; + v3rho2sigma += offset * 9; + v3rhosigma2 += offset * 12; + v3sigma3 += offset * 10; + } + if (deriv > 3) { + v4rho4 += offset * 5; + v4rho3sigma += offset * 4*3; + v4rho2sigma2 += offset * 3*6; + v4rhosigma3 += offset * 2*10; + v4sigma4 += offset * 15; + } } else { - sigma = rho + np; + sigma = rho + blksize; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np; @@ -334,8 +393,33 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4rhosigma3 = v4rho2sigma2 + np; v4sigma4 = v4rhosigma3 + np; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset; + vsigma += offset; + } + if (deriv > 1) { + v2rho2 += offset; + v2rhosigma += offset; + v2sigma2 += offset; + } + if (deriv > 2) { + v3rho3 += offset; + v3rho2sigma += offset; + v3rhosigma2 += offset; + v3sigma3 += offset; + } + if (deriv > 3) { + v4rho4 += offset; + v4rho3sigma += offset; + v4rho2sigma2 += offset; + v4rhosigma3 += offset; + v4sigma4 += offset; + } } - xc_gga(func_x, np, rho, sigma, + xc_gga(func_x, blksize, rho, sigma, exc, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, @@ -346,8 +430,8 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, case XC_FAMILY_HYB_MGGA: #endif if (spin == 1) { - sigma = rho + np * 2; - tau = sigma + np * 3; + sigma = rho + blksize * 2; + tau = sigma + blksize * 3; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np * 2; @@ -390,9 +474,54 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4sigmatau3 = v4sigma2tau2 + np * 6*3 ; v4tau4 = v4sigmatau3 + np * 3*4 ; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset * 2; + vsigma += offset * 3; + vtau += offset * 2; + } + if (deriv > 1) { + v2rho2 += offset * 3; + v2rhosigma += offset * 6; + v2sigma2 += offset * 6; + v2rhotau += offset * 4; + v2sigmatau += offset * 6; + v2tau2 += offset * 3; + } + if (deriv > 2) { + v3rho3 += offset * 4 ; + v3rho2sigma += offset * 9 ; + v3rhosigma2 += offset * 12; + v3sigma3 += offset * 10; + v3rho2tau += offset * 6 ; + v3rhosigmatau += offset * 12; + v3rhotau2 += offset * 6 ; + v3sigma2tau += offset * 12; + v3sigmatau2 += offset * 9 ; + v3tau3 += offset * 4 ; + } + if (deriv > 3) { + v4rho4 += offset * 5 ; + v4rho3sigma += offset * 4*3 ; + v4rho2sigma2 += offset * 3*6 ; + v4rhosigma3 += offset * 2*10 ; + v4sigma4 += offset * 15 ; + v4rho3tau += offset * 4*2 ; + v4rho2sigmatau += offset * 3*3*2; + v4rho2tau2 += offset * 3*3 ; + v4rhosigma2tau += offset * 2*6*2; + v4rhosigmatau2 += offset * 2*3*3; + v4rhotau3 += offset * 2*4 ; + v4sigma3tau += offset * 10*2 ; + v4sigma2tau2 += offset * 6*3 ; + v4sigmatau3 += offset * 3*4 ; + v4tau4 += offset * 5 ; + } } else { - sigma = rho + np; - tau = sigma + np; + sigma = rho + blksize; + tau = sigma + blksize; if (deriv > 0) { vrho = exc + np; vsigma = vrho + np; @@ -435,8 +564,53 @@ static void _eval_xc(xc_func_type *func_x, int spin, int deriv, int np, v4sigmatau3 = v4sigma2tau2 + np; v4tau4 = v4sigmatau3 + np; } + + // set offset + exc += offset; + if (deriv > 0) { + vrho += offset; + vsigma += offset; + vtau += offset; + } + if (deriv > 1) { + v2rho2 += offset; + v2rhosigma += offset; + v2sigma2 += offset; + v2rhotau += offset; + v2sigmatau += offset; + v2tau2 += offset; + } + if (deriv > 2) { + v3rho3 += offset; + v3rho2sigma += offset; + v3rhosigma2 += offset; + v3sigma3 += offset; + v3rho2tau += offset; + v3rhosigmatau += offset; + v3rhotau2 += offset; + v3sigma2tau += offset; + v3sigmatau2 += offset; + v3tau3 += offset; + } + if (deriv > 3) { + v4rho4 += offset; + v4rho3sigma += offset; + v4rho2sigma2 += offset; + v4rhosigma3 += offset; + v4sigma4 += offset; + v4rho3tau += offset; + v4rho2sigmatau += offset; + v4rho2tau2 += offset; + v4rhosigma2tau += offset; + v4rhosigmatau2 += offset; + v4rhotau3 += offset; + v4sigma3tau += offset; + v4sigma2tau2 += offset; + v4sigmatau3 += offset; + v4tau4 += offset; + } } - xc_mgga(func_x, np, rho, sigma, lapl, tau, + xc_mgga(func_x, blksize, rho, sigma, lapl, tau, exc, vrho, vsigma, vlapl, vtau, v2rho2, v2rhosigma, v2rholapl, v2rhotau, v2sigma2, v2sigmalapl, v2sigmatau, v2lapl2, v2lapltau, v2tau2, @@ -705,6 +879,7 @@ static void axpy(double *dst, double *src, double fac, { int i, j; for (j = 0; j < nsrc; j++) { + #pragma omp parallel for schedule(static) for (i = 0; i < np; i++) { dst[j*np+i] += fac * src[i*nsrc+j]; } @@ -760,6 +935,7 @@ static void merge_xc(double *dst, double *ebuf, double fac, pout = dst + offsets1[order] * np; pin = ebuf + offsets0[order] * np; nsrc = offsets0[order+1] - offsets0[order]; + #pragma omp parallel for schedule(static) for (i = 0; i < np * nsrc; i++) { pout[i] += fac * pin[i]; } @@ -802,10 +978,36 @@ void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, { assert(deriv <= 4); double *ebuf = malloc(sizeof(double) * np * outlen); - double *rho = malloc(sizeof(double) * np * 7); - _eval_rho(rho, rho_u, spin, nvar, np); - int nspin = spin + 1; + double *rhobufs[MAX_THREADS]; + int offsets[MAX_THREADS+1]; +#pragma omp parallel +{ + int iblk = omp_get_thread_num(); + int nblk = omp_get_num_threads(); + assert(nblk <= MAX_THREADS); + + int blksize = np / nblk; + int ioff = iblk * blksize; + int np_mod = np % nblk; + if (iblk < np_mod) { + blksize += 1; + } + if (np_mod > 0) { + ioff += MIN(iblk, np_mod); + } + offsets[iblk] = ioff; + if (iblk == nblk-1) { + offsets[nblk] = np; + assert(ioff + blksize == np); + } + + double *rho_priv = malloc(sizeof(double) * blksize * 7); + rhobufs[iblk] = rho_priv; + _eval_rho(rho_priv, rho_u+ioff, spin, nvar, blksize, np); +} + + int nspin = spin + 1; int i, j; xc_func_type func; for (i = 0; i < nfn; i++) { @@ -857,13 +1059,25 @@ void LIBXC_eval_xc(int nfn, int *fn_id, double *fac, double *omega, #if defined XC_SET_RELATIVITY xc_lda_x_set_params(&func, relativity); #endif - _eval_xc(&func, spin, deriv, np, rho, ebuf); + +#pragma omp parallel +{ + int iblk = omp_get_thread_num(); + int offset = offsets[iblk]; + int blksize = offsets[iblk+1] - offset; + _eval_xc(&func, spin, deriv, np, rhobufs[iblk], ebuf, offset, blksize); +} + merge_xc(output, ebuf, fac[i], spin, deriv, nvar, np, outlen, func.info->family); xc_func_end(&func); } free(ebuf); - free(rho); +#pragma omp parallel +{ + int iblk = omp_get_thread_num(); + free(rhobufs[iblk]); +} } int LIBXC_max_deriv_order(int xc_id) diff --git a/pyscf/lib/dft/multigrid.c b/pyscf/lib/dft/multigrid.c new file mode 100644 index 0000000000..593aedf1b8 --- /dev/null +++ b/pyscf/lib/dft/multigrid.c @@ -0,0 +1,744 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "pbc/neighbor_list.h" +#include "pbc/cell.h" +#include "dft/multigrid.h" + +#define SQUARE(r) (r[0]*r[0]+r[1]*r[1]+r[2]*r[2]) +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define BUF_SIZE 2000 +#define ADD_SIZE 1000 +#define RZERO 1e-6 + +const int _LEN_CART[] = { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120, 136 +}; + +const int _LEN_CART0[] = { + 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120 +}; + +const int _BINOMIAL_COEF[] = { + 1, + 1, 1, + 1, 2, 1, + 1, 3, 3, 1, + 1, 4, 6, 4, 1, + 1, 5, 10, 10, 5, 1, + 1, 6, 15, 20, 15, 6, 1, + 1, 7, 21, 35, 35, 21, 7, 1, + 1, 8, 28, 56, 70, 56, 28, 8, 1, + 1, 9, 36, 84, 126, 126, 84, 36, 9, 1, + 1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1, + 1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1, + 1, 12, 66, 220, 495, 792, 924, 792, 495, 220, 66, 12, 1, + 1, 13, 78, 286, 715,1287,1716,1716,1287, 715, 286, 78, 13, 1, + 1, 14, 91, 364,1001,2002,3003,3432,3003,2002,1001, 364, 91, 14, 1, + 1, 15, 105, 455,1365,3003,5005,6435,6435,5005,3003,1365, 455, 105, 15, 1, +}; + +double CINTsquare_dist(const double *r1, const double *r2); + +void init_gridlevel_info(GridLevel_Info** gridlevel_info, + double* cutoff, int* mesh, int nlevels, double rel_cutoff) +{ + GridLevel_Info* gl_info = (GridLevel_Info*) malloc(sizeof(GridLevel_Info)); + gl_info->nlevels = nlevels; + gl_info->rel_cutoff = rel_cutoff; + gl_info->cutoff = (double*) malloc(sizeof(double) * nlevels); + gl_info->mesh = (int*) malloc(sizeof(int) * nlevels * 3); + int i; + for (i = 0; i < nlevels; i++) { + (gl_info->cutoff)[i] = cutoff[i]; + (gl_info->mesh)[i*3] = mesh[i*3]; + (gl_info->mesh)[i*3+1] = mesh[i*3+1]; + (gl_info->mesh)[i*3+2] = mesh[i*3+2]; + } + *gridlevel_info = gl_info; +} + + +void init_rs_grid(RS_Grid** rs_grid, GridLevel_Info** gridlevel_info, int comp) +{ + RS_Grid* rg = (RS_Grid*) malloc(sizeof(RS_Grid)); + GridLevel_Info* gl_info = *gridlevel_info; + int nlevels = gl_info->nlevels; + rg->nlevels = nlevels; + rg->gridlevel_info = gl_info; + rg->comp = comp; + + int i; + size_t ngrid; + int *mesh = gl_info->mesh; + rg->data = (double**)malloc(sizeof(double*) * nlevels); + for (i = 0; i < nlevels; i++) { + ngrid = mesh[i*3] * mesh[i*3+1] * mesh[i*3+2]; + (rg->data)[i] = calloc(comp*ngrid, sizeof(double)); + } + *rs_grid = rg; +} + + +void del_rs_grid(RS_Grid** rs_grid) +{ + RS_Grid* rg = *rs_grid; + if (!rg) { + return; + } + if (rg->data) { + int i; + for (i = 0; i < rg->nlevels; i++) { + if (rg->data[i]) { + free(rg->data[i]); + } + } + free(rg->data); + } + rg->gridlevel_info = NULL; + free(rg); + *rs_grid = NULL; +} + + +void del_gridlevel_info(GridLevel_Info** gridlevel_info) +{ + GridLevel_Info* gl_info = *gridlevel_info; + if (!gl_info) { + return; + } + if (gl_info->cutoff) { + free(gl_info->cutoff); + } + if (gl_info->mesh) { + free(gl_info->mesh); + } + free(gl_info); + *gridlevel_info = NULL; +} + + +void init_pgfpair(PGFPair** pair_info, + int ish, int ipgf, int jsh, int jpgf, int iL, double radius) +{ + PGFPair *pair0 = (PGFPair*) malloc(sizeof(PGFPair)); + pair0->ish = ish; + pair0->ipgf = ipgf; + pair0->jsh = jsh; + pair0->jpgf = jpgf; + pair0->iL = iL; + pair0->radius = radius; + *pair_info = pair0; +} + + +bool pgfpairs_with_same_shells(PGFPair *pair1, PGFPair *pair2) +{ + if (!pair1 || !pair2) { + return false; + } + if (pair1->ish == pair2->ish && pair1->jsh == pair2->jsh) { + return true; + } + return false; +} + + +double pgfpair_radius(int la, int lb, double zeta, double zetb, double* ra, double* rab, double precision) +{ + double radius = 0; + double zetp = zeta + zetb; + double eps = precision * precision; + + if (rab[0] < RZERO && rab[1] < RZERO && rab[2] < RZERO) { + radius = pgf_rcut(la+lb, zetp, 1., eps, radius); + return radius; + } + + double prefactor = exp(-zeta*zetb/zetp*SQUARE(rab)); + double rb[3], rp[3]; + rb[0] = ra[0] + rab[0]; + rb[1] = ra[1] + rab[1]; + rb[2] = ra[2] + rab[2]; + rp[0] = ra[0] + zetb/zetp*rab[0]; + rp[1] = ra[1] + zetb/zetp*rab[1]; + rp[2] = ra[2] + zetb/zetp*rab[2]; + + double rad_a = sqrt(CINTsquare_dist(ra, rp)); + double rad_b = sqrt(CINTsquare_dist(rb, rp)); + + int lmax = la + lb; + double coef[lmax+1]; + double rap[la+1]; + double rbp[lb+1]; + + int lxa, lxb, i; + for (i = 0; i <= lmax; i++) { + coef[i] = 0; + } + rap[0] = 1.; + for (i = 1; i <= la; i++) { + rap[i] = rap[i-1] * rad_a; + } + rbp[0] = 1.; + for (i = 1; i <= lb; i++) { + rbp[i] = rbp[i-1] * rad_b; + } + + for (lxa = 0; lxa <= la; lxa++) { + for (lxb = 0; lxb <= lb; lxb++) { + coef[lxa+lxb] += BINOMIAL(la, lxa) * BINOMIAL(lb, lxb) * rap[la-lxa] * rbp[lb-lxb]; + } + } + + for (i = 0; i <= lmax; i++){ + coef[i] *= prefactor; + radius = MAX(radius, pgf_rcut(i, zetp, coef[i], eps, radius)); + } + return radius; +} + + +void del_pgfpair(PGFPair** pair_info) +{ + PGFPair *pair0 = *pair_info; + if (!pair0) { + return; + } else { + free(pair0); + } + *pair_info = NULL; +} + + +//unlink the pgfpair data instead of deleting +void nullify_pgfpair(PGFPair** pair_info) +{ + *pair_info = NULL; +} + + +void init_task(Task** task) +{ + Task *t0 = *task = (Task*) malloc(sizeof(Task)); + t0->ntasks = 0; + t0->buf_size = BUF_SIZE; + t0->pgfpairs = (PGFPair**) malloc(sizeof(PGFPair*) * t0->buf_size); + int i; + for (i = 0; i < t0->buf_size; i++) { + (t0->pgfpairs)[i] = NULL; + } +} + + +void del_task(Task** task) +{ + Task *t0 = *task; + if (!t0) { + return; + } + if (t0->pgfpairs) { + size_t i, ntasks = t0->ntasks; + for (i = 0; i < ntasks; i++) { + del_pgfpair(t0->pgfpairs + i); + } + free(t0->pgfpairs); + } + free(t0); + *task = NULL; +} + + +void nullify_task(Task** task) +{ + Task *t0 = *task; + if (!t0) { + return; + } + if (t0->pgfpairs) { + size_t i, ntasks = t0->ntasks; + for (i = 0; i < ntasks; i++) { + nullify_pgfpair(t0->pgfpairs + i); + } + free(t0->pgfpairs); + } + free(t0); + *task = NULL; +} + + +void init_task_list(TaskList** task_list, GridLevel_Info* gridlevel_info, int nlevels, int hermi) +{ + TaskList* tl = *task_list = (TaskList*) malloc(sizeof(TaskList)); + tl->nlevels = nlevels; + tl->hermi = hermi; + tl->gridlevel_info = gridlevel_info; + tl->tasks = (Task**) malloc(sizeof(Task*)*nlevels); + int i; + for (i = 0; i < nlevels; i++) { + init_task(tl->tasks + i); + } +} + + +void del_task_list(TaskList** task_list) +{ + TaskList *tl = *task_list; + if (!tl) { + return; + } + if (tl->gridlevel_info) { + del_gridlevel_info(&(tl->gridlevel_info)); + tl->gridlevel_info = NULL; + } + if (tl->tasks) { + int i; + for (i = 0; i < tl->nlevels; i++) { + if ((tl->tasks)[i]) { + del_task(tl->tasks + i); + } + } + free(tl->tasks); + } + free(tl); + *task_list = NULL; +} + + +void nullify_task_list(TaskList** task_list) +{ + TaskList *tl = *task_list; + if (!tl) { + return; + } + if (tl->gridlevel_info) { + tl->gridlevel_info = NULL; + } + if (tl->tasks) { + int i; + for (i = 0; i < tl->nlevels; i++) { + if ((tl->tasks)[i]) { + nullify_task(tl->tasks + i); + } + } + free(tl->tasks); + } + free(tl); + *task_list = NULL; +} + + +void update_task_list(TaskList** task_list, int grid_level, + int ish, int ipgf, int jsh, int jpgf, int iL, double radius) +{ + TaskList* tl = *task_list; + Task *t0 = (tl->tasks)[grid_level]; + t0->ntasks += 1; + if (t0->ntasks > t0->buf_size) { + t0->buf_size += ADD_SIZE; + t0->pgfpairs = (PGFPair**) realloc(t0->pgfpairs, sizeof(PGFPair*) * t0->buf_size); + } + init_pgfpair(t0->pgfpairs + t0->ntasks - 1, + ish, ipgf, jsh, jpgf, iL, radius); +} + + +void merge_task_list(TaskList** task_list, TaskList** task_list_loc) +{ + TaskList* tl = *task_list; + TaskList* tl_loc = *task_list_loc; + int ilevel, itask; + for (ilevel = 0; ilevel < tl->nlevels; ilevel++) { + Task *t0 = (tl->tasks)[ilevel]; + Task *t1 = (tl_loc->tasks)[ilevel]; + int itask_off = t0->ntasks; + int ntasks_loc = t1->ntasks; + t0->ntasks += ntasks_loc; + t0->buf_size = t0->ntasks; + t0->pgfpairs = (PGFPair**) realloc(t0->pgfpairs, sizeof(PGFPair*) * t0->buf_size); + PGFPair** ptr_pgfpairs = t0->pgfpairs + itask_off; + PGFPair** ptr_pgfpairs_loc = t1->pgfpairs; + for (itask = 0; itask < ntasks_loc; itask++) { + ptr_pgfpairs[itask] = ptr_pgfpairs_loc[itask]; + } + } +} + + +int get_grid_level(GridLevel_Info* gridlevel_info, double alpha) +{ + int i; + int nlevels = gridlevel_info->nlevels; + int grid_level = nlevels - 1; //default use the most dense grid + double needed_cutoff = alpha * gridlevel_info->rel_cutoff; + for (i = 0; i < nlevels; i++) { + if ((gridlevel_info->cutoff)[i] >= needed_cutoff) { + grid_level = i; + break; + } + } + return grid_level; +} + + +void build_task_list(TaskList** task_list, NeighborList** neighbor_list, + GridLevel_Info** gridlevel_info, + int* ish_atm, int* ish_bas, double* ish_env, + double* ish_rcut, double** ipgf_rcut, + int* jsh_atm, int* jsh_bas, double* jsh_env, + double* jsh_rcut, double** jpgf_rcut, + int nish, int njsh, double* Ls, double precision, int hermi) +{ + GridLevel_Info *gl_info = *gridlevel_info; + int ilevel; + int nlevels = gl_info->nlevels; + init_task_list(task_list, gl_info, nlevels, hermi); + double max_radius[nlevels]; + NeighborList *nl0 = *neighbor_list; + +#pragma omp parallel private(ilevel) +{ + double max_radius_loc[nlevels]; + TaskList** task_list_loc = (TaskList**) malloc(sizeof(TaskList*)); + init_task_list(task_list_loc, gl_info, nlevels, hermi); + NeighborPair *np0_ij; + int ish, jsh; + int li, lj; + int ipgf, jpgf; + int nipgf, njpgf; + int iL, iL_idx; + int ish_atm_id, jsh_atm_id; + int ish_alpha_of, jsh_alpha_of; + double ipgf_alpha, jpgf_alpha; + double *ish_ratm, *jsh_ratm, *rL; + double rij[3]; + double dij, radius; + + #pragma omp for schedule(dynamic) + for (ish = 0; ish < nish; ish++) { + li = ish_bas[ANG_OF+ish*BAS_SLOTS]; + nipgf = ish_bas[NPRIM_OF+ish*BAS_SLOTS]; + ish_atm_id = ish_bas[ish*BAS_SLOTS+ATOM_OF]; + ish_ratm = ish_env + ish_atm[ish_atm_id*ATM_SLOTS+PTR_COORD]; + ish_alpha_of = ish_bas[PTR_EXP+ish*BAS_SLOTS]; + for (jsh = 0; jsh < njsh; jsh++) { + if (hermi == 1 && jsh < ish) { + continue; + } + np0_ij = (nl0->pairs)[ish*njsh + jsh]; + if (np0_ij->nimgs > 0) { + lj = jsh_bas[ANG_OF+jsh*BAS_SLOTS]; + njpgf = jsh_bas[NPRIM_OF+jsh*BAS_SLOTS]; + jsh_atm_id = jsh_bas[jsh*BAS_SLOTS+ATOM_OF]; + jsh_ratm = jsh_env + jsh_atm[jsh_atm_id*ATM_SLOTS+PTR_COORD]; + jsh_alpha_of = jsh_bas[PTR_EXP+jsh*BAS_SLOTS]; + + for (iL_idx = 0; iL_idx < np0_ij->nimgs; iL_idx++){ + iL = (np0_ij->Ls_list)[iL_idx]; + rL = Ls + iL*3; + rij[0] = jsh_ratm[0] + rL[0] - ish_ratm[0]; + rij[1] = jsh_ratm[1] + rL[1] - ish_ratm[1]; + rij[2] = jsh_ratm[2] + rL[2] - ish_ratm[2]; + dij = sqrt(SQUARE(rij)); + + for (ipgf = 0; ipgf < nipgf; ipgf++) { + if (ipgf_rcut[ish][ipgf] + jsh_rcut[jsh] < dij) { + continue; + } + ipgf_alpha = ish_env[ish_alpha_of+ipgf]; + for (jpgf = 0; jpgf < njpgf; jpgf++) { + //if (hermi == 1 && ish == jsh && jpgf < ipgf) { + // continue; + //} + if (ipgf_rcut[ish][ipgf] + jpgf_rcut[jsh][jpgf] < dij) { + continue; + } + jpgf_alpha = jsh_env[jsh_alpha_of+jpgf]; + ilevel = get_grid_level(gl_info, ipgf_alpha+jpgf_alpha); + radius = pgfpair_radius(li, lj, ipgf_alpha, jpgf_alpha, ish_ratm, rij, precision); + if (radius < RZERO) { + continue; + } + max_radius_loc[ilevel] = MAX(radius, max_radius_loc[ilevel]); + update_task_list(task_list_loc, ilevel, ish, ipgf, jsh, jpgf, iL, radius); + } + } + } + } + } + } + + #pragma omp critical + merge_task_list(task_list, task_list_loc); + + nullify_task_list(task_list_loc); + free(task_list_loc); + + #pragma omp critical + for (ilevel = 0; ilevel < nlevels; ilevel++) { + max_radius[ilevel] = MAX(max_radius[ilevel], max_radius_loc[ilevel]); + } +} + + for (ilevel = 0; ilevel < nlevels; ilevel++) { + Task *t0 = ((*task_list)->tasks)[ilevel]; + t0->radius = max_radius[ilevel]; + } +} + + +int get_task_loc(int** task_loc, PGFPair** pgfpairs, int ntasks, + int ish0, int ish1, int jsh0, int jsh1, int hermi) +{ + int n = -2; + int ish_prev = -1; + int jsh_prev = -1; + int itask, ish, jsh; + int *buf = (int*)malloc(sizeof(int) * ntasks*2); + PGFPair *pgfpair; + for(itask = 0; itask < ntasks; itask++){ + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + jsh = pgfpair->jsh; + if (ish < ish0 || ish >= ish1) { + continue; + } + if (jsh < jsh0 || jsh >= jsh1) { + continue; + } + if (hermi == 1 && jsh < ish) { + continue; + } + + if (ish != ish_prev || jsh != jsh_prev) { + n += 2; + buf[n] = itask; + buf[n+1] = itask+1; + ish_prev = ish; + jsh_prev = jsh; + } else { + buf[n+1] = itask+1; + } + } + n += 2; + *task_loc = (int*)realloc(buf, sizeof(int) * n); + return n; +} + + +void gradient_gs(double complex* out, double complex* f_gs, double* Gv, + int n, size_t ng) +{ + int i; + double complex *outx, *outy, *outz; + for (i = 0; i < n; i++) { + outx = out; + outy = outx + ng; + outz = outy + ng; + #pragma omp parallel + { + size_t igrid; + double *pGv; + #pragma omp for schedule(static) + for (igrid = 0; igrid < ng; igrid++) { + pGv = Gv + igrid * 3; + outx[igrid] = pGv[0] * creal(f_gs[igrid]) * _Complex_I - pGv[0] * cimag(f_gs[igrid]); + outy[igrid] = pGv[1] * creal(f_gs[igrid]) * _Complex_I - pGv[1] * cimag(f_gs[igrid]); + outz[igrid] = pGv[2] * creal(f_gs[igrid]) * _Complex_I - pGv[2] * cimag(f_gs[igrid]); + } + } + f_gs += ng; + out += 3 * ng; + } +} + +/* +int get_task_loc_diff_ish(int** task_loc, PGFPair** pgfpairs, int ntasks, + int ish0, int ish1) +{ + int n = -2; + int ish_prev = -1; + int itask, ish; + int *buf = (int*)malloc(sizeof(int) * ntasks*2); + PGFPair *pgfpair; + for(itask = 0; itask < ntasks; itask++){ + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + if (ish < ish0 || ish >= ish1) { + continue; + } + + if (ish != ish_prev) { + n += 2; + buf[n] = itask; + ish_prev = ish; + } + if (ish == ish_prev) { + buf[n+1] = itask+1; + } + } + n += 2; + *task_loc = (int*)realloc(buf, sizeof(int) * n); + return n; +} +*/ + +/* +typedef struct Task_Index_struct { + int ntasks; + int bufsize; + int* task_index; +} Task_Index; + + +void init_task_index(Task_Index* task_idx) +{ + task_idx->ntasks = 0; + task_idx->bufsize = 10; + task_idx->task_index = (int*)malloc(sizeof(int) * task_idx->bufsize); +} + + +void update_task_index(Task_Index* task_idx, int itask) +{ + task_idx->ntasks += 1; + if (task_idx->bufsize < task_idx->ntasks) { + task_idx->bufsize += 10; + task_idx->task_index = (int*)realloc(task_idx->task_index, sizeof(int) * task_idx->bufsize); + } + task_idx->task_index[task_idx->ntasks-1] = itask; +} + + +void del_task_index(Task_Index* task_idx) +{ + if (!task_idx) { + return; + } + if (task_idx->task_index) { + free(task_idx->task_index); + } + task_idx->ntasks = 0; + task_idx->bufsize = 0; +} + + +typedef struct Shlpair_Task_Index_struct { + int nish; + int njsh; + int ish0; + int jsh0; + Task_Index *task_index; +} Shlpair_Task_Index; + + +void init_shlpair_task_index(Shlpair_Task_Index* shlpair_task_idx, + int ish0, int jsh0, int nish, int njsh) +{ + shlpair_task_idx->ish0 = ish0; + shlpair_task_idx->jsh0 = jsh0; + shlpair_task_idx->nish = nish; + shlpair_task_idx->njsh = njsh; + shlpair_task_idx->task_index = (Task_Index*)malloc(sizeof(Task_Index)*nish*njsh); + + int ijsh; + for (ijsh = 0; ijsh < nish*njsh; ijsh++) { + init_task_index(shlpair_task_idx->task_index + ijsh); + } +} + + +void update_shlpair_task_index(Shlpair_Task_Index* shlpair_task_idx, + int ish, int jsh, int itask) +{ + int ish0 = shlpair_task_idx->ish0; + int jsh0 = shlpair_task_idx->jsh0; + int njsh = shlpair_task_idx->njsh; + int ioff = ish - ish0; + int joff = jsh - jsh0; + + update_task_index(shlpair_task_idx->task_index + ioff*njsh+joff, itask); +} + + +int get_task_index(Shlpair_Task_Index* shlpair_task_idx, int** idx, int ish, int jsh) +{ + int ish0 = shlpair_task_idx->ish0; + int jsh0 = shlpair_task_idx->jsh0; + int njsh = shlpair_task_idx->njsh; + int ioff = ish - ish0; + int joff = jsh - jsh0; + Task_Index *task_idx = shlpair_task_idx->task_index + ioff*njsh+joff; + int ntasks = task_idx->ntasks; + *idx = task_idx->task_index; + return ntasks; +} + + +void del_shlpair_task_index(Shlpair_Task_Index* shlpair_task_idx) +{ + if (!shlpair_task_idx) { + return; + } + + int nish = shlpair_task_idx->nish; + int njsh = shlpair_task_idx->njsh; + int ijsh; + for (ijsh = 0; ijsh < nish*njsh; ijsh++) { + del_task_index(shlpair_task_idx->task_index + ijsh); + } + free(shlpair_task_idx->task_index); +} + + +Shlpair_Task_Index* get_shlpair_task_index(PGFPair** pgfpairs, int ntasks, + int ish0, int ish1, int jsh0, int jsh1, int hermi) +{ + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + + Shlpair_Task_Index* shlpair_task_idx = (Shlpair_Task_Index*) malloc(sizeof(Shlpair_Task_Index)); + init_shlpair_task_index(shlpair_task_idx, ish0, jsh0, nish, njsh); + + int itask; + int ish, jsh; + PGFPair *pgfpair = NULL; + for(itask = 0; itask < ntasks; itask++){ + pgfpair = pgfpairs[itask]; + ish = pgfpair->ish; + if (ish < ish0 || ish >= ish1) { + continue; + } + jsh = pgfpair->jsh; + if (jsh < jsh0 || jsh >= jsh1) { + continue; + } + if (hermi == 1 && jsh < ish) { + continue; + } + update_shlpair_task_index(shlpair_task_idx, ish, jsh, itask); + } + return shlpair_task_idx; +} +*/ diff --git a/pyscf/lib/dft/multigrid.h b/pyscf/lib/dft/multigrid.h new file mode 100644 index 0000000000..e691a3ce12 --- /dev/null +++ b/pyscf/lib/dft/multigrid.h @@ -0,0 +1,72 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_MULTIGRID_H +#define HAVE_DEFINED_MULTIGRID_H + +#include + +#define BINOMIAL(n, i) (_BINOMIAL_COEF[_LEN_CART0[n]+i]) + +extern const int _LEN_CART[]; +extern const int _LEN_CART0[]; +extern const int _BINOMIAL_COEF[]; + +typedef struct GridLevel_Info_struct { + int nlevels; + double rel_cutoff; + double *cutoff; + int *mesh; +} GridLevel_Info; + +typedef struct RS_Grid_struct { + int nlevels; + GridLevel_Info* gridlevel_info; + int comp; + double** data; +} RS_Grid; + +typedef struct PGFPair_struct { + int ish; + int ipgf; + int jsh; + int jpgf; + int iL; + double radius; +} PGFPair; + +bool pgfpairs_with_same_shells(PGFPair*, PGFPair*); + +typedef struct Task_struct { + size_t buf_size; + size_t ntasks; + PGFPair** pgfpairs; + double radius; +} Task; + +typedef struct TaskList_struct { + int nlevels; + int hermi; + GridLevel_Info* gridlevel_info; + Task** tasks; +} TaskList; + + +int get_task_loc(int** task_loc, PGFPair** pgfpairs, int ntasks, + int ish0, int ish1, int jsh0, int jsh1, int hermi); +#endif diff --git a/pyscf/lib/dft/utils.c b/pyscf/lib/dft/utils.c new file mode 100644 index 0000000000..04ef8e5b2f --- /dev/null +++ b/pyscf/lib/dft/utils.c @@ -0,0 +1,62 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include "config.h" +#include "vhf/fblas.h" +#if defined(HAVE_LIBXSMM) +#include "libxsmm.h" +#endif + + +void dgemm_wrapper(const char transa, const char transb, + const int m, const int n, const int k, + const double alpha, const double* a, const int lda, + const double* b, const int ldb, + const double beta, double* c, const int ldc) +{ +#if defined(HAVE_LIBXSMM) + if (transa == 'N') { + //libxsmm_dgemm(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); + int prefetch = LIBXSMM_PREFETCH_AUTO; + int flags = transb != 'T' ? LIBXSMM_GEMM_FLAG_NONE : LIBXSMM_GEMM_FLAG_TRANS_B; + libxsmm_dmmfunction kernel = libxsmm_dmmdispatch(m, n, k, &lda, &ldb, &ldc, + &alpha, &beta, &flags, &prefetch); + if (kernel) { + kernel(a,b,c,a,b,c); + return; + } + } +#endif + dgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); +} + +void get_gga_vrho_gs(double complex *out, double complex *vrho_gs, double complex *vsigma1_gs, + double *Gv, double weight, int ngrid) +{ + int i; + int ngrid2 = 2 * ngrid; + double complex fac = -2. * _Complex_I; + #pragma omp parallel for simd schedule(static) + for (i = 0; i < ngrid; i++) { + out[i] = ( Gv[i*3] * vsigma1_gs[i] + +Gv[i*3+1] * vsigma1_gs[i+ngrid] + +Gv[i*3+2] * vsigma1_gs[i+ngrid2]) * fac + vrho_gs[i]; + out[i] *= weight; + } +} diff --git a/pyscf/lib/dft/utils.h b/pyscf/lib/dft/utils.h new file mode 100644 index 0000000000..1c85ff1fdc --- /dev/null +++ b/pyscf/lib/dft/utils.h @@ -0,0 +1,27 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_GRID_UTILS_H +#define HAVE_DEFINED_GRID_UTILS_H + +extern void dgemm_wrapper(const char transa, const char transb, + const int m, const int n, const int k, + const double alpha, const double* a, const int lda, + const double* b, const int ldb, + const double beta, double* c, const int ldc); +#endif diff --git a/pyscf/lib/np_helper/np_helper.h b/pyscf/lib/np_helper/np_helper.h index 2c8227c03d..3ed8d05574 100644 --- a/pyscf/lib/np_helper/np_helper.h +++ b/pyscf/lib/np_helper/np_helper.h @@ -61,3 +61,10 @@ void NPdset0(double *p, const size_t n); void NPzset0(double complex *p, const size_t n); void NPdcopy(double *out, const double *in, const size_t n); void NPzcopy(double complex *out, const double complex *in, const size_t n); + +void NPdgemm(const char trans_a, const char trans_b, + const int m, const int n, const int k, + const int lda, const int ldb, const int ldc, + const int offseta, const int offsetb, const int offsetc, + double *a, double *b, double *c, + const double alpha, const double beta); diff --git a/pyscf/lib/numpy_helper.py b/pyscf/lib/numpy_helper.py index 406fa54e20..58508d9f8b 100644 --- a/pyscf/lib/numpy_helper.py +++ b/pyscf/lib/numpy_helper.py @@ -1116,6 +1116,16 @@ def expm(a): y, buf = buf, y return y +def ndarray_pointer_2d(array): + '''Get the C pointer of a 2D array + ''' + assert array.ndim == 2 + assert array.flags.c_contiguous + + ptr = (array.ctypes.data + + numpy.arange(array.shape[0])*array.strides[0]).astype(numpy.uintp) + ptr = ptr.ctypes.data_as(ctypes.c_void_p) + return ptr class NPArrayWithTag(numpy.ndarray): # Initialize kwargs in function tag_array diff --git a/pyscf/lib/pbc/CMakeLists.txt b/pyscf/lib/pbc/CMakeLists.txt index 6d185fdf85..636cb75451 100644 --- a/pyscf/lib/pbc/CMakeLists.txt +++ b/pyscf/lib/pbc/CMakeLists.txt @@ -13,10 +13,20 @@ # limitations under the License. add_library(pbc SHARED ft_ao.c fill_ints.c fill_ints_sr.c optimizer.c grid_ao.c - nr_direct.c symmetry.c inner_dot.c cint2e.c cint3c2e.c nr_ecp.c transform_mo.c) + nr_direct.c symmetry.c inner_dot.c cint2e.c cint3c2e.c nr_ecp.c transform_mo.c + neighbor_list.c cell.c pp.c hf_grad.c fill_ints_screened.c) add_dependencies(pbc cgto cvhf np_helper) set_target_properties(pbc PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}) target_link_libraries(pbc cgto cint cvhf np_helper ${BLAS_LIBRARIES} ${OPENMP_C_PROPERTIES}) + +if(ENABLE_FFTW) +add_library(fft SHARED fft.c) +set_target_properties(fft PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} + COMPILE_FLAGS ${OpenMP_C_FLAGS} + LINK_FLAGS ${OpenMP_C_FLAGS}) +target_link_libraries(fft fftw3_threads fftw3 ${BLAS_LIBRARIES}) +endif() diff --git a/pyscf/lib/pbc/cell.c b/pyscf/lib/pbc/cell.c new file mode 100644 index 0000000000..20bb96e72c --- /dev/null +++ b/pyscf/lib/pbc/cell.c @@ -0,0 +1,280 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "pbc/cell.h" +#include "np_helper/np_helper.h" + +#define SQUARE(r) (r[0]*r[0]+r[1]*r[1]+r[2]*r[2]) + +double pgf_rcut(int l, double alpha, double coeff, double precision, double r0) +{ + l += 2; + + double rcut; + double rmin = sqrt(.5 * l / alpha) * 2.; + double gmax = coeff * pow(rmin, l) * exp(-alpha * rmin * rmin); + if (gmax < precision) { + return rmin; + } + + double eps = MIN(rmin/10, RCUT_EPS); + double c = log(coeff / precision); + double rcut_last; + rcut = MAX(r0, rmin+eps); + + int i; + for (i = 0; i < RCUT_MAX_CYCLE; i++) { + rcut_last = rcut; + rcut = sqrt((l*log(rcut) + c) / alpha); + if (fabs(rcut - rcut_last) < eps) { + break; + } + } + if (i == RCUT_MAX_CYCLE) { + //printf("r0 = %.6e, l = %d, alpha = %.6e, coeff = %.6e, precision=%.6e\n", r0, l, alpha, coeff, precision); + fprintf(stderr, "pgf_rcut did not converge in %d cycles: %.6f > %.6f.\n", + RCUT_MAX_CYCLE, fabs(rcut - rcut_last), eps); + } + return rcut; +} + +void rcut_by_shells(double* shell_radius, double** ptr_pgf_rcut, + int* bas, double* env, int nbas, + double r0, double precision) +{ +#pragma omp parallel +{ + int ib, ic, p; + #pragma omp for schedule(static) + for (ib = 0; ib < nbas; ib ++) { + int l = bas[ANG_OF+ib*BAS_SLOTS]; + int nprim = bas[NPRIM_OF+ib*BAS_SLOTS]; + int ptr_exp = bas[PTR_EXP+ib*BAS_SLOTS]; + int nctr = bas[NCTR_OF+ib*BAS_SLOTS]; + int ptr_c = bas[PTR_COEFF+ib*BAS_SLOTS]; + double rcut_max = 0, rcut; + for (p = 0; p < nprim; p++) { + double alpha = env[ptr_exp+p]; + double cmax = 0; + for (ic = 0; ic < nctr; ic++) { + cmax = MAX(fabs(env[ptr_c+ic*nprim+p]), cmax); + } + rcut = pgf_rcut(l, alpha, cmax, precision, r0); + if (ptr_pgf_rcut) { + ptr_pgf_rcut[ib][p] = rcut; + } + rcut_max = MAX(rcut, rcut_max); + } + shell_radius[ib] = rcut_max; + } +} +} + + +static void get_SI_real_imag(double* out_real, double* out_imag, + double* coords, double* Gv, + int natm, size_t ngrid) +{ +#pragma omp parallel +{ + int ia; + size_t i; + double RG; + double *pcoords, *pGv; + double *pout_real, *pout_imag; + #pragma omp for schedule(static) + for (ia = 0; ia < natm; ia++) { + pcoords = coords + ia * 3; + pout_real = out_real + ia * ngrid; + pout_imag = out_imag + ia * ngrid; + for (i = 0; i < ngrid; i++) { + pGv = Gv + i * 3; + RG = pcoords[0] * pGv[0] + pcoords[1] * pGv[1] + pcoords[2] * pGv[2]; + pout_real[i] = cos(RG); + pout_imag[i] = -sin(RG); + } + } +} +} + + +void get_Gv(double* Gv, double* rx, double* ry, double* rz, int* mesh, double* b) +{ +#pragma omp parallel +{ + int x, y, z; + double *pGv; + #pragma omp for schedule(dynamic) + for (x = 0; x < mesh[0]; x++) { + pGv = Gv + x * (size_t)mesh[1] * mesh[2] * 3; + for (y = 0; y < mesh[1]; y++) { + for (z = 0; z < mesh[2]; z++) { + pGv[0] = rx[x] * b[0]; + pGv[0] += ry[y] * b[3]; + pGv[0] += rz[z] * b[6]; + pGv[1] = rx[x] * b[1]; + pGv[1] += ry[y] * b[4]; + pGv[1] += rz[z] * b[7]; + pGv[2] = rx[x] * b[2]; + pGv[2] += ry[y] * b[5]; + pGv[2] += rz[z] * b[8]; + pGv += 3; + }} + } +} +} + + +void ewald_gs_nuc_grad(double* out, double* Gv, double* charges, double* coords, + double ew_eta, double weights, int natm, size_t ngrid) +{ + double *SI_real = (double*) malloc(natm*ngrid*sizeof(double)); + double *SI_imag = (double*) malloc(natm*ngrid*sizeof(double)); + get_SI_real_imag(SI_real, SI_imag, coords, Gv, natm, ngrid); + + double *ZSI_real = calloc(ngrid, sizeof(double)); + double *ZSI_imag = calloc(ngrid, sizeof(double)); + + NPdgemm('N', 'N', ngrid, 1, natm, + ngrid, natm, ngrid, 0, 0, 0, + SI_real, charges, ZSI_real, 1., 0.); + NPdgemm('N', 'N', ngrid, 1, natm, + ngrid, natm, ngrid, 0, 0, 0, + SI_imag, charges, ZSI_imag, 1., 0.); + +#pragma omp parallel +{ + int ia; + size_t i; + double charge_i; + double G2, coulG, tmp; + double *pout, *pGv; + double *pSI_real, *pSI_imag; + double fac = 4. * M_PI * weights; + double fac1 = 4. * ew_eta * ew_eta; + + #pragma omp for schedule(static) + for (ia = 0; ia < natm; ia++) { + charge_i = charges[ia]; + pout = out + ia * 3; + pSI_real = SI_real + ia * ngrid; + pSI_imag = SI_imag + ia * ngrid; + #pragma omp simd + for (i = 0; i < ngrid; i++) { + pGv = Gv + i*3; + G2 = SQUARE(pGv); + if (G2 < 1e-12) {continue;} + coulG = fac / G2 * exp(-G2 / fac1); + tmp = coulG * charge_i; + tmp *= (pSI_imag[i] * ZSI_real[i] - pSI_real[i] * ZSI_imag[i]); + pout[0] += tmp * pGv[0]; + pout[1] += tmp * pGv[1]; + pout[2] += tmp * pGv[2]; + } + } +} + free(SI_real); + free(SI_imag); + free(ZSI_real); + free(ZSI_imag); +} + + +void get_ewald_direct(double* ewovrl, double* chargs, double* coords, double* Ls, + double beta, double rcut, int natm, int nL) +{ + *ewovrl = 0.0; + + #pragma omp parallel + { + int i, j, l; + double *ri, *rj, *rL; + double rij[3]; + double r, qi, qj; + double e_loc = 0.0; + #pragma omp for schedule(static) + for (i = 0; i < natm; i++) { + ri = coords + i*3; + qi = chargs[i]; + for (j = 0; j < natm; j++) { + rj = coords + j*3; + qj = chargs[j]; + for (l = 0; l < nL; l++) { + rL = Ls + l*3; + rij[0] = rj[0] + rL[0] - ri[0]; + rij[1] = rj[1] + rL[1] - ri[1]; + rij[2] = rj[2] + rL[2] - ri[2]; + r = sqrt(SQUARE(rij)); + if (r > 1e-10 && r < rcut) { + e_loc += qi * qj * erfc(beta * r) / r; + } + } + } + } + e_loc *= 0.5; + + #pragma omp critical + *ewovrl += e_loc; + } +} + + +void get_ewald_direct_nuc_grad(double* out, double* chargs, double* coords, double* Ls, + double beta, double rcut, int natm, int nL) +{ + double fac = 2. * beta / sqrt(M_PI); + double beta2 = beta * beta; + + #pragma omp parallel + { + int i, j, l; + double *ri, *rj, *rL, *pout; + double rij[3]; + double r, r2, qi, qj, tmp; + #pragma omp for schedule(static) + for (i = 0; i < natm; i++) { + pout = out + i*3; + ri = coords + i*3; + qi = chargs[i]; + for (j = 0; j < natm; j++) { + rj = coords + j*3; + qj = chargs[j]; + for (l = 0; l < nL; l++) { + rL = Ls + l*3; + rij[0] = ri[0] - rj[0] + rL[0]; + rij[1] = ri[1] - rj[1] + rL[1]; + rij[2] = ri[2] - rj[2] + rL[2]; + r2 = SQUARE(rij); + r = sqrt(r2); + if (r > 1e-10 && r < rcut) { + tmp = qi * qj * (erfc(beta * r) / (r2 * r) + fac * exp(-beta2 * r2) / r2); + pout[0] -= tmp * rij[0]; + pout[1] -= tmp * rij[1]; + pout[2] -= tmp * rij[2]; + } + } + } + } + } +} diff --git a/pyscf/lib/pbc/cell.h b/pyscf/lib/pbc/cell.h new file mode 100644 index 0000000000..bec26bb2ea --- /dev/null +++ b/pyscf/lib/pbc/cell.h @@ -0,0 +1,29 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_CELL_H +#define HAVE_DEFINED_CELL_H + +#define RCUT_MAX_CYCLE 10 +#define RCUT_EPS 1e-3 + +double pgf_rcut(int l, double alpha, double coeff, double precision, double r0); +void rcut_by_shells(double* shell_radius, double** ptr_pgf_rcut, + int* bas, double* env, int nbas, + double r0, double precision); +#endif diff --git a/pyscf/lib/pbc/fft.c b/pyscf/lib/pbc/fft.c new file mode 100644 index 0000000000..3affbb9a02 --- /dev/null +++ b/pyscf/lib/pbc/fft.c @@ -0,0 +1,147 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include "config.h" + +#define BLKSIZE 128 +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + +fftw_plan fft_create_r2c_plan(double* in, complex double* out, int rank, int* mesh) +{ + fftw_plan p; + p = fftw_plan_dft_r2c(rank, mesh, in, out, FFTW_ESTIMATE); + return p; +} + +fftw_plan fft_create_c2r_plan(complex double* in, double* out, int rank, int* mesh) +{ + fftw_plan p; + p = fftw_plan_dft_c2r(rank, mesh, in, out, FFTW_ESTIMATE); + return p; +} + +void fft_execute(fftw_plan p) +{ + fftw_execute(p); +} + +void fft_destroy_plan(fftw_plan p) +{ + fftw_destroy_plan(p); +} + +void _complex_fft(complex double* in, complex double* out, int* mesh, int rank, int sign) +{ + int i; + int nx = mesh[0]; + int nyz = 1; + for (i = 1; i < rank; i++) { + nyz *= mesh[i]; + } + int nmax = nyz / BLKSIZE * BLKSIZE; + fftw_plan p_2d = fftw_plan_dft(rank-1, mesh+1, in, out, sign, FFTW_ESTIMATE); + int nn[BLKSIZE] = {nx}; + fftw_plan p_3d_x = fftw_plan_many_dft(1, nn, BLKSIZE, + out, NULL, nyz, 1, + out, NULL, nyz, 1, + sign, FFTW_ESTIMATE); + + #pragma omp parallel private(i) + { + int off; + #pragma omp for schedule(dynamic) + for (i = 0; i < nx; i++) { + off = i * nyz; + fftw_execute_dft(p_2d, in+off, out+off); + } + + #pragma omp for schedule(dynamic) + for (i = 0; i < nmax; i+=BLKSIZE) { + fftw_execute_dft(p_3d_x, out+i, out+i); + } + } + fftw_destroy_plan(p_2d); + fftw_destroy_plan(p_3d_x); + + int nres = nyz - nmax; + if (nres > 0) { + fftw_plan p_3d_x = fftw_plan_many_dft(1, nn, nres, + out+nmax, NULL, nyz, 1, + out+nmax, NULL, nyz, 1, + sign, FFTW_ESTIMATE); + fftw_execute(p_3d_x); + fftw_destroy_plan(p_3d_x); + } +} + +void fft(complex double* in, complex double* out, int* mesh, int rank) +{ + _complex_fft(in, out, mesh, rank, FFTW_FORWARD); +} + +void ifft(complex double* in, complex double* out, int* mesh, int rank) +{ + _complex_fft(in, out, mesh, rank, FFTW_BACKWARD); + size_t i, n = 1; + for (i = 0; i < rank; i++) { + n *= mesh[i]; + } + double fac = 1. / (double)n; + #pragma omp parallel for schedule(static) + for (i = 0; i < n; i++) { + out[i] *= fac; + } +} + +void rfft(double* in, complex double* out, int* mesh, int rank) +{ + fftw_plan p = fftw_plan_dft_r2c(rank, mesh, in, out, FFTW_ESTIMATE); + fftw_execute(p); + fftw_destroy_plan(p); +} + +void irfft(complex double* in, double* out, int* mesh, int rank) +{ + fftw_plan p = fftw_plan_dft_c2r(rank, mesh, in, out, FFTW_ESTIMATE); + fftw_execute(p); + fftw_destroy_plan(p); + size_t i, n = 1; + for (i = 0; i < rank; i++) { + n *= mesh[i]; + } + double fac = 1. / (double)n; + #pragma omp parallel for schedule(static) + for (i = 0; i < n; i++) { + out[i] *= fac; + } +} + +void _copy_d2z(double complex *out, const double *in, const size_t n) +{ +#pragma omp parallel +{ + size_t i; + #pragma omp for schedule(static) + for (i = 0; i < n; i++) { + out[i] = in[i] + 0*_Complex_I; + } +} +} diff --git a/pyscf/lib/pbc/fft.h b/pyscf/lib/pbc/fft.h new file mode 100644 index 0000000000..edc5382f7e --- /dev/null +++ b/pyscf/lib/pbc/fft.h @@ -0,0 +1,26 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include + +#define FFT_PLAN fftw_plan + +FFT_PLAN fft_create_r2c_plan(double* in, complex double* out, int rank, int* mesh); +FFT_PLAN fft_create_c2r_plan(complex double* in, double* out, int rank, int* mesh); +void fft_execute(FFT_PLAN p); +void fft_destroy_plan(FFT_PLAN p); diff --git a/pyscf/lib/pbc/fill_ints.c b/pyscf/lib/pbc/fill_ints.c index 36c853724c..95857b19ee 100644 --- a/pyscf/lib/pbc/fill_ints.c +++ b/pyscf/lib/pbc/fill_ints.c @@ -1260,9 +1260,9 @@ static void shift_bas(double *env_loc, double *env, double *Ls, int ptr, int iL) env_loc[ptr+2] = env[ptr+2] + Ls[iL*3+2]; } -static void sort2c_ks1(double complex *out, double *bufr, double *bufi, - int *shls_slice, int *ao_loc, int nkpts, int comp, - int jsh, int msh0, int msh1) +void sort2c_ks1(double complex *out, double *bufr, double *bufi, + int *shls_slice, int *ao_loc, int nkpts, int comp, + int jsh, int msh0, int msh1) { const int ish0 = shls_slice[0]; const int ish1 = shls_slice[1]; diff --git a/pyscf/lib/pbc/fill_ints.h b/pyscf/lib/pbc/fill_ints.h new file mode 100644 index 0000000000..ec2000755e --- /dev/null +++ b/pyscf/lib/pbc/fill_ints.h @@ -0,0 +1,29 @@ +/* Copyright 2014-2024 The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef HAVE_DEFINED_PBC_FILL_INTS_H +#define HAVE_DEFINED_PBC_FILL_INTS_H + +void sort2c_gs1(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh); +void sort2c_gs2_igtj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh); +void sort2c_gs2_ieqj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh); +void sort2c_ks1(double complex *out, double *bufr, double *bufi, + int *shls_slice, int *ao_loc, int nkpts, int comp, + int jsh, int msh0, int msh1); +#endif diff --git a/pyscf/lib/pbc/fill_ints_screened.c b/pyscf/lib/pbc/fill_ints_screened.c new file mode 100644 index 0000000000..5d100c7ae3 --- /dev/null +++ b/pyscf/lib/pbc/fill_ints_screened.c @@ -0,0 +1,1012 @@ +/* Copyright 2021-2024 The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "vhf/fblas.h" +#include "pbc/optimizer.h" +#include "pbc/fill_ints.h" +#include "pbc/neighbor_list.h" +#include "np_helper/np_helper.h" + +#define INTBUFMAX 1000 +#define INTBUFMAX10 8000 +#define IMGBLK 80 +#define OF_CMPLX 2 +#define MAX_THREADS 256 + +int GTOmax_shell_dim(int *ao_loc, int *shls_slice, int ncenter); +int GTOmax_cache_size(int (*intor)(), int *shls_slice, int ncenter, + int *atm, int natm, int *bas, int nbas, double *env); + +static int shloc_partition(int *kshloc, int *ao_loc, int ksh0, int ksh1, int dkmax) +{ + int ksh; + int nloc = 0; + int loclast = ao_loc[ksh0]; + kshloc[0] = ksh0; + for (ksh = ksh0+1; ksh < ksh1; ksh++) { + assert(ao_loc[ksh+1] - ao_loc[ksh] < dkmax); + if (ao_loc[ksh+1] - loclast > dkmax) { + nloc += 1; + kshloc[nloc] = ksh; + loclast = ao_loc[ksh]; + } + } + nloc += 1; + kshloc[nloc] = ksh1; + return nloc; +} + +static void shift_bas(double *env_loc, double *env, double *Ls, int ptr, int iL) +{ + env_loc[ptr+0] = env[ptr+0] + Ls[iL*3+0]; + env_loc[ptr+1] = env[ptr+1] + Ls[iL*3+1]; + env_loc[ptr+2] = env[ptr+2] + Ls[iL*3+2]; +} + +static void sort3c_gs1(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh, int msh0, int msh1) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; + const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + const size_t naok = ao_loc[ksh1] - ao_loc[ksh0]; + const size_t njk = naoj * naok; + const size_t nijk = njk * naoi; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int ip = ao_loc[ish] - ao_loc[ish0]; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + const int dij = di * dj; + out += (ip * naoj + jp) * naok; + + int i, j, k, ksh, ic, dk, dijk; + double *pin, *pout; + + for (ksh = msh0; ksh < msh1; ksh++) { + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijk = dij * dk; + for (ic = 0; ic < comp; ic++) { + pout = out + nijk * ic + ao_loc[ksh]-ao_loc[ksh0]; + pin = in + dijk * ic; + for (j = 0; j < dj; j++) { + for (i = 0; i < di; i++) { + for (k = 0; k < dk; k++) { + pout[i*njk+k] = pin[k*dij+i]; + } } + pout += naok; + pin += di; + } + } + in += dijk * comp; + } +} + +static void _nr3c_screened_fill_g(int (*intor)(), void (*fsort)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + + jsh += jsh0; + ish += ish0; + int iptrxyz = atm[PTR_COORD+bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + int dkmax = INTBUFMAX10 / dij / 2 * MIN(IMGBLK,nimgs); + int kshloc[ksh1-ksh0+1]; + int nkshloc = shloc_partition(kshloc, ao_loc, ksh0, ksh1, dkmax); + + int i, m, msh0, msh1, dijm; + int ksh, dk, iL, jL, dijkc, ksh_off, jsh_off; + int shls[3]; + + int nshi = ish1 - ish0; + int nshj = jsh1 - jsh0; + int nshij = nshi + nshj; + int idx_i, idx_j; + + int dijmc = dij * dkmax * comp; + double *bufL = buf + dijmc; + double *cache = bufL + dijmc; + double *pbuf; + int (*fprescreen)(); + if (pbcopt != NULL) { + fprescreen = pbcopt->fprescreen; + } else { + fprescreen = PBCnoscreen; + } + + shls[0] = ish; + shls[1] = jsh; + jsh_off = jsh - nshi; + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0_ki, *np0_kj; + for (m = 0; m < nkshloc; m++) { + msh0 = kshloc[m]; + msh1 = kshloc[m+1]; + dkmax = ao_loc[msh1] - ao_loc[msh0]; + dijm = dij * dkmax; + dijmc = dijm * comp; + for (i = 0; i < dijmc; i++) { + bufL[i] = 0; + } + + pbuf = bufL; + for (ksh = msh0; ksh < msh1; ksh++){ + shls[2] = ksh; + ksh_off = ksh - nshij; + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijkc = dij*dk * comp; + np0_ki = (nl0->pairs)[ksh_off*nshi + ish]; + np0_kj = (nl0->pairs)[ksh_off*nshj + jsh_off]; + if (np0_ki->nimgs > 0 && np0_kj->nimgs > 0) { + for (idx_i = 0; idx_i < np0_ki->nimgs; idx_i++){ + iL = (np0_ki->Ls_list)[idx_i]; + shift_bas(env_loc, env, Ls, iptrxyz, iL); + for (idx_j = 0; idx_j < np0_kj->nimgs; idx_j++){ + jL = (np0_kj->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + + if ((*fprescreen)(shls, pbcopt, atm, bas, env_loc)) { + if ((*intor)(buf, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) { + for (i = 0; i < dijkc; i++) { + pbuf[i] += buf[i]; + } + } + } + } + + } + } + pbuf += dijkc; + } + + (*fsort)(out, bufL, shls_slice, ao_loc, comp, ish, jsh, msh0, msh1); + } +} + +static void _nr3c_screened_sum_auxbas_fill_g(int (*intor)(), void (*fsort)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + + jsh += jsh0; + ish += ish0; + int iptrxyz = atm[PTR_COORD+bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + int dkmax = INTBUFMAX10 / dij / 2 * MIN(IMGBLK,nimgs); + //int kshloc[ksh1-ksh0+1]; + //int nkshloc = shloc_partition(kshloc, ao_loc, ksh0, ksh1, dkmax); + + int i, k, ic; + int ksh, dk, dijk, iL, jL, ksh_off, jsh_off; + int shls[3]; + + int nshi = ish1 - ish0; + int nshj = jsh1 - jsh0; + int nshij = nshi + nshj; + int idx_i, idx_j; + + int dijmc = dij * dkmax * comp; + double *bufL = buf + dijmc; + double *cache = bufL + dijmc; + double *pbuf, *pbufL; + int (*fprescreen)(); + if (pbcopt != NULL) { + fprescreen = pbcopt->fprescreen; + } else { + fprescreen = PBCnoscreen; + } + + shls[0] = ish; + shls[1] = jsh; + jsh_off = jsh - nshi; + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0_ki, *np0_kj; + + int dijc = dij * comp; + for (i = 0; i < dijc; i++) { + bufL[i] = 0; + } + + for (ksh = ksh0; ksh < ksh1; ksh++){ + dk = ao_loc[ksh+1] - ao_loc[ksh]; + assert(dk < dkmax); + dijk = dij * dk; + shls[2] = ksh; + ksh_off = ksh - nshij; + np0_ki = (nl0->pairs)[ksh_off*nshi + ish]; + np0_kj = (nl0->pairs)[ksh_off*nshj + jsh_off]; + if (np0_ki->nimgs > 0 && np0_kj->nimgs > 0) { + for (idx_i = 0; idx_i < np0_ki->nimgs; idx_i++){ + iL = (np0_ki->Ls_list)[idx_i]; + shift_bas(env_loc, env, Ls, iptrxyz, iL); + for (idx_j = 0; idx_j < np0_kj->nimgs; idx_j++){ + jL = (np0_kj->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + + if ((*fprescreen)(shls, pbcopt, atm, bas, env_loc)) { + if ((*intor)(buf, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) { + for (ic = 0; ic < comp; ic++) { + pbufL = bufL + ic * dij; + pbuf = buf + ic * dijk; + for (k = 0; k < dk; k++) { + for (i = 0; i < dij; i++) { + pbufL[i] += pbuf[i]; + } + pbuf += dij; + } + } + } + } + } + } + } + } + (*fsort)(out, bufL, shls_slice, ao_loc, comp, ish, jsh); +} + +void PBCnr3c_screened_fill_gs1(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr3c_screened_fill_g(intor, &sort3c_gs1, out, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); +} + +static void sort3c_gs2_igtj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh, int msh0, int msh1) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + const size_t naok = ao_loc[ksh1] - ao_loc[ksh0]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + const size_t nijk = nij * naok; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += (((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp) * naok; + + int i, j, k, ij, ksh, ic, dk, dijk; + double *pin, *pout; + + for (ksh = msh0; ksh < msh1; ksh++) { + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijk = dij * dk; + for (ic = 0; ic < comp; ic++) { + pout = out + nijk * ic + ao_loc[ksh]-ao_loc[ksh0]; + pin = in + dijk * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + ij = j * di + i; + for (k = 0; k < dk; k++) { + pout[j*naok+k] = pin[k*dij+ij]; + } + } + pout += (i+ao_loc[ish]+1) * naok; + } + } + in += dijk * comp; + } +} + +void sort2c_gs2_igtj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += ((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp; + + int i, j, ic; + double *pin, *pout; + + for (ic = 0; ic < comp; ic++) { + pout = out + nij * ic; + pin = in + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pout[j] = pin[j*di+i]; + } + pout += (i+ao_loc[ish]+1); + } + } +} + +static void sort3c_gs2_ieqj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh, int msh0, int msh1) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + const size_t naok = ao_loc[ksh1] - ao_loc[ksh0]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + const size_t nijk = nij * naok; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dij = di * di; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += (((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp) * naok; + + int i, j, k, ij, ksh, ic, dk, dijk; + double *pin, *pout; + + for (ksh = msh0; ksh < msh1; ksh++) { + dk = ao_loc[ksh+1] - ao_loc[ksh]; + dijk = dij * dk; + for (ic = 0; ic < comp; ic++) { + pout = out + nijk * ic + ao_loc[ksh]-ao_loc[ksh0]; + pin = in + dijk * ic; + for (i = 0; i < di; i++) { + for (j = 0; j <= i; j++) { + ij = j * di + i; + for (k = 0; k < dk; k++) { + pout[j*naok+k] = pin[k*dij+ij]; + } + } + pout += (i+ao_loc[ish]+1) * naok; + } + } + in += dijk * comp; + } +} + +void sort2c_gs2_ieqj(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const size_t off0 = ((size_t)ao_loc[ish0]) * (ao_loc[ish0] + 1) / 2; + const size_t nij = ((size_t)ao_loc[ish1]) * (ao_loc[ish1] + 1) / 2 - off0; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dij = di * di; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + out += ((size_t)ao_loc[ish])*(ao_loc[ish]+1)/2-off0 + jp; + + int i, j, ic; + double *pin, *pout; + + for (ic = 0; ic < comp; ic++) { + pout = out + nij * ic; + pin = in + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j <= i; j++) { + pout[j] = pin[j*di+i]; + } + pout += (i+ao_loc[ish]+1); + } + } +} + +void sort2c_gs1(double *out, double *in, int *shls_slice, int *ao_loc, + int comp, int ish, int jsh) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const int ip = ao_loc[ish] - ao_loc[ish0]; + const int jp = ao_loc[jsh] - ao_loc[jsh0]; + const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; + const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + const size_t nij = naoi * naoj; + out += ip * naoj + jp; + + int i, j, ic; + double *pin, *pout; + + for (ic = 0; ic < comp; ic++) { + pout = out + nij * ic; + pin = in + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + pout[j] = pin[j*di+i]; + } + pout += naoj; + } + } +} + +void PBCnr3c_screened_fill_gs2(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + int ip = ish + shls_slice[0]; + int jp = jsh + shls_slice[2] - nbas; + if (ip > jp) { + _nr3c_screened_fill_g(intor, &sort3c_gs2_igtj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } else if (ip == jp) { + _nr3c_screened_fill_g(intor, &sort3c_gs2_ieqj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } +} + +void PBCnr3c_screened_sum_auxbas_fill_gs1(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr3c_screened_sum_auxbas_fill_g(intor, &sort2c_gs1, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); +} + +void PBCnr3c_screened_sum_auxbas_fill_gs2(int (*intor)(), double *out, int nkpts_ij, + int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + int ip = ish + shls_slice[0]; + int jp = jsh + shls_slice[2] - nbas; + if (ip > jp) { + _nr3c_screened_sum_auxbas_fill_g(intor, &sort2c_gs2_igtj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } else if (ip == jp) { + _nr3c_screened_sum_auxbas_fill_g(intor, &sort2c_gs2_ieqj, out, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } +} + +static void contract_3c1e_ipik_dm_gs1(double *grad, double* dm, double *eri, + int *shls, int *ao_loc, int *atm, int natm, + int *bas, int nbas, int comp, int nao) +{ + const int ish = shls[0]; + const int jsh = shls[1]; + const int ksh = shls[2]; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + const size_t i0 = ao_loc[ish]; + const size_t j0 = ao_loc[jsh] - nao; + + const int ia = bas[ATOM_OF+ish*BAS_SLOTS]; + const int ka = bas[ATOM_OF+ksh*BAS_SLOTS] - 2*natm; + + int i, j, ic; + double *ptr_eri, *ptr_dm; + double *dm0 = dm + (i0 * nao + j0); + double ipi_dm[comp]; + for (ic = 0; ic < comp; ic++) { + ipi_dm[ic] = 0; + ptr_dm = dm0; + ptr_eri = eri + dij * ic; + for (i = 0; i < di; i++) { + for (j = 0; j < dj; j++) { + ipi_dm[ic] += ptr_eri[j*di+i] * ptr_dm[j]; + } + ptr_dm += nao; + } + } + + for (ic = 0; ic < comp; ic++) { + grad[ia*comp+ic] += ipi_dm[ic]; + grad[ka*comp+ic] -= ipi_dm[ic]; + } +} + +static void _nr3c1e_screened_nuc_grad_fill_g(int (*intor)(), void (*fcontract)(), + double *grad, double *dm, int nkpts_ij, int nkpts, + int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nao, + NeighborList** neighbor_list) +{ + const int ish0 = shls_slice[0]; + //const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + //const int jsh1 = shls_slice[3]; + const int ksh0 = shls_slice[4]; + const int ksh1 = shls_slice[5]; + + ish += ish0; + jsh += jsh0; + int iptrxyz = atm[PTR_COORD+bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS]; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di * dj; + int dkmax = INTBUFMAX10 / dij / 2 * MIN(IMGBLK,nimgs); + //int kshloc[ksh1-ksh0+1]; + //int nkshloc = shloc_partition(kshloc, ao_loc, ksh0, ksh1, dkmax); + + int i, k, ic; + int ksh, dk, dijk, iL, jL, ksh_off, jsh_off; + int shls[3]; + + int idx_i, idx_j; + + int dijc = dij * comp; + int dijmc = dijc * dkmax; + double *bufL = buf + dijmc; + double *cache = bufL + dijc; + double *pbuf, *pbufL; + int (*fprescreen)(); + if (pbcopt != NULL) { + fprescreen = pbcopt->fprescreen; + } else { + fprescreen = PBCnoscreen; + } + + shls[0] = ish; + shls[1] = jsh; + jsh_off = jsh - nbas; + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0_ki, *np0_kj; + + for (ksh = ksh0; ksh < ksh1; ksh++){ + dk = ao_loc[ksh+1] - ao_loc[ksh]; + assert(dk < dkmax); + dijk = dij * dk; + shls[2] = ksh; + ksh_off = ksh - nbas*2; + np0_ki = (nl0->pairs)[ksh_off*nbas + ish]; + np0_kj = (nl0->pairs)[ksh_off*nbas + jsh_off]; + if (np0_ki->nimgs > 0 && np0_kj->nimgs > 0) { + for (i = 0; i < dijc; i++) { + bufL[i] = 0; + } + for (idx_i = 0; idx_i < np0_ki->nimgs; idx_i++){ + iL = (np0_ki->Ls_list)[idx_i]; + shift_bas(env_loc, env, Ls, iptrxyz, iL); + for (idx_j = 0; idx_j < np0_kj->nimgs; idx_j++){ + jL = (np0_kj->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + + if ((*fprescreen)(shls, pbcopt, atm, bas, env_loc)) { + if ((*intor)(buf, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) + { + for (ic = 0; ic < comp; ic++) { + pbufL = bufL + ic * dij; + pbuf = buf + ic * dijk; + for (k = 0; k < dk; k++) { + for (i = 0; i < dij; i++) { + pbufL[i] += pbuf[i]; + } + pbuf += dij; + } + } + } + } + } + } + (*fcontract)(grad, dm, bufL, shls, ao_loc, atm, natm, bas, nbas, comp, nao); + } + } +} + +void PBCnr3c1e_screened_nuc_grad_fill_gs1(int (*intor)(), double *out, double* dm, + int nkpts_ij, int nkpts, int comp, int nimgs, int ish, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nao, + NeighborList** neighbor_list) +{ + _nr3c1e_screened_nuc_grad_fill_g(intor, &contract_3c1e_ipik_dm_gs1, out, dm, + nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, nao, neighbor_list); +} + +void PBCnr3c_screened_drv(int (*intor)(), void (*fill)(), double complex *eri, + int nkpts_ij, int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + double *expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + double *expkL_i = expkL_r + nimgs*nkpts; + int i; + for (i = 0; i < nimgs*nkpts; i++) { + expkL_r[i] = creal(expkL[i]); + expkL_i[i] = cimag(expkL[i]); + } + + size_t count; + count = (nkpts * OF_CMPLX + nimgs) * INTBUFMAX10 * comp; + count+= nimgs * nkpts * OF_CMPLX; + const int cache_size = GTOmax_cache_size(intor, shls_slice, 3, + atm, natm, bas, nbas, env); + +#pragma omp parallel +{ + int ish, jsh, ij; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + double *buf = malloc(sizeof(double)*(count+cache_size)); +#pragma omp for schedule(dynamic) + for (ij = 0; ij < nish*njsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(intor, eri, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } + free(buf); + free(env_loc); +} + free(expkL_r); +} + +void PBCnr3c_screened_sum_auxbas_drv(int (*intor)(), void (*fill)(), double complex *eri, + int nkpts_ij, int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + double *expkL_r=NULL, *expkL_i=NULL; + //expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + //expkL_i = expkL_r + nimgs*nkpts; + //int i; + //for (i = 0; i < nimgs*nkpts; i++) { + // expkL_r[i] = creal(expkL[i]); + // expkL_i[i] = cimag(expkL[i]); + //} + + size_t count; + count = (nkpts * OF_CMPLX + nimgs) * INTBUFMAX10 * comp; + count+= nimgs * nkpts * OF_CMPLX; + const int cache_size = GTOmax_cache_size(intor, shls_slice, 3, + atm, natm, bas, nbas, env); + +#pragma omp parallel +{ + int ish, jsh, ij; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + double *buf = malloc(sizeof(double)*(count+cache_size)); +#pragma omp for schedule(dynamic) + for (ij = 0; ij < nish*njsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(intor, eri, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, neighbor_list); + } + free(buf); + free(env_loc); +} + //free(expkL_r); +} + +void PBCnr3c1e_screened_nuc_grad_drv(int (*intor)(), void (*fill)(), + double* grad, double* dm, + int nkpts_ij, int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, int *kptij_idx, + int *shls_slice, int *ao_loc, + CINTOpt *cintopt, PBCOpt *pbcopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, int nao, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + double *expkL_r=NULL, *expkL_i=NULL; + //double *expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + //double *expkL_i = expkL_r + nimgs*nkpts; + //int i; + //for (i = 0; i < nimgs*nkpts; i++) { + // expkL_r[i] = creal(expkL[i]); + // expkL_i[i] = cimag(expkL[i]); + //} + + size_t count; + count = (nkpts * OF_CMPLX + nimgs) * INTBUFMAX10 * comp; + count+= nimgs * nkpts * OF_CMPLX; + const int cache_size = GTOmax_cache_size(intor, shls_slice, 3, + atm, natm, bas, nbas, env); + + double *gradbufs[MAX_THREADS]; +#pragma omp parallel +{ + int ish, jsh, ij; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + double *grad_loc; + int thread_id = omp_get_thread_num(); + if (thread_id == 0) { + grad_loc = grad; + } else { + grad_loc = calloc(natm*comp, sizeof(double)); + } + gradbufs[thread_id] = grad_loc; + + double *buf = malloc(sizeof(double)*(count+cache_size)); + #pragma omp for schedule(dynamic) + for (ij = 0; ij < nish*njsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(intor, grad_loc, dm, nkpts_ij, nkpts, comp, nimgs, ish, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, kptij_idx, + shls_slice, ao_loc, cintopt, pbcopt, atm, natm, bas, nbas, env, nao, neighbor_list); + } + free(buf); + free(env_loc); + + NPomp_dsum_reduce_inplace(gradbufs, natm*comp); + if (thread_id != 0) { + free(grad_loc); + } +} + //free(expkL_r); +} + + +static int _nr2c_screened_fill( + int (*intor)(), double complex *out, + int nkpts, int comp, int nimgs, int jsh, int ish0, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nshi = ish1 - shls_slice[0]; + const int nshj = jsh1 - jsh0; + + const double D1 = 1; + const int I1 = 1; + + ish0 += shls_slice[0]; + jsh += jsh0; + int jsh_off = jsh - nshi; + int jptrxyz = atm[PTR_COORD+bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + int dimax = INTBUFMAX10 / dj; + int ishloc[ish1-ish0+1]; + int nishloc = shloc_partition(ishloc, ao_loc, ish0, ish1, dimax); + + int m, msh0, msh1, dijc, dmjc, ish, di, empty; + int jL, idx_j; + int shls[2]; + double *bufk_r = buf; + double *bufk_i, *bufL, *pbufk_r, *pbufk_i, *cache; + + NeighborList *nl0 = *neighbor_list; + NeighborPair *np0; + + shls[1] = jsh; + for (m = 0; m < nishloc; m++) { + msh0 = ishloc[m]; + msh1 = ishloc[m+1]; + dimax = ao_loc[msh1] - ao_loc[msh0]; + dmjc = dj * dimax * comp; + bufk_i = bufk_r + dmjc * nkpts; + bufL = bufk_i + dmjc * nkpts; + cache = bufL + dmjc; + + memset(bufk_r, 0, 2*dmjc*nkpts*sizeof(double)); + pbufk_r = bufk_r; + pbufk_i = bufk_i; + for (ish = msh0; ish < msh1; ish++) { + shls[0] = ish; + di = ao_loc[ish+1] - ao_loc[ish]; + dijc = di * dj * comp; + np0 = (nl0->pairs)[ish*nshj + jsh_off]; + if (np0->nimgs > 0) { + for (idx_j = 0; idx_j < np0->nimgs; idx_j++){ + jL = (np0->Ls_list)[idx_j]; + shift_bas(env_loc, env, Ls, jptrxyz, jL); + if ((*intor)(bufL, NULL, shls, atm, natm, bas, nbas, + env_loc, cintopt, cache)) { + empty = 0; + dger_(&dijc, &nkpts, &D1, bufL, &I1, + expkL_r+jL, &nimgs, pbufk_r, &dmjc); + dger_(&dijc, &nkpts, &D1, bufL, &I1, + expkL_i+jL, &nimgs, pbufk_i, &dmjc); + } + } + } + pbufk_r += dijc; + pbufk_i += dijc; + } + sort2c_ks1(out, bufk_r, bufk_i, shls_slice, ao_loc, + nkpts, comp, jsh, msh0, msh1); + } + return !empty; +} + +void PBCnr2c_screened_fill_ks1(int (*intor)(), double complex *out, + int nkpts, int comp, int nimgs, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr2c_screened_fill(intor, out, nkpts, comp, nimgs, jsh, 0, + buf, env_loc, Ls, expkL_r, expkL_i, shls_slice, ao_loc, + cintopt, atm, natm, bas, nbas, env, neighbor_list); +} + +void PBCnr2c_screened_fill_ks2(int (*intor)(), double complex *out, + int nkpts, int comp, int nimgs, int jsh, + double *buf, double *env_loc, double *Ls, + double *expkL_r, double *expkL_i, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, + NeighborList** neighbor_list) +{ + _nr2c_screened_fill(intor, out, nkpts, comp, nimgs, jsh, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, shls_slice, ao_loc, + cintopt, atm, natm, bas, nbas, env, neighbor_list); +} + +void PBCnr2c_screened_drv(int (*intor)(), void (*fill)(), double complex *out, + int nkpts, int comp, int nimgs, + double *Ls, double complex *expkL, + int *shls_slice, int *ao_loc, CINTOpt *cintopt, + int *atm, int natm, int *bas, int nbas, double *env, int nenv, + NeighborList** neighbor_list) +{ + assert(neighbor_list != NULL); + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int njsh = jsh1 - jsh0; + double *expkL_r = malloc(sizeof(double) * nimgs*nkpts * OF_CMPLX); + double *expkL_i = expkL_r + nimgs*nkpts; + int i; + for (i = 0; i < nimgs*nkpts; i++) { + expkL_r[i] = creal(expkL[i]); + expkL_i[i] = cimag(expkL[i]); + } + const int cache_size = GTOmax_cache_size(intor, shls_slice, 2, + atm, natm, bas, nbas, env); + +#pragma omp parallel +{ + int jsh; + double *env_loc = malloc(sizeof(double)*nenv); + NPdcopy(env_loc, env, nenv); + size_t count = (nkpts+1) * OF_CMPLX; + double *buf = malloc(sizeof(double)*(count*INTBUFMAX10*comp+cache_size)); +#pragma omp for schedule(dynamic) + for (jsh = 0; jsh < njsh; jsh++) { + (*fill)(intor, out, nkpts, comp, nimgs, jsh, + buf, env_loc, Ls, expkL_r, expkL_i, + shls_slice, ao_loc, cintopt, atm, natm, bas, nbas, env, + neighbor_list); + } + free(buf); + free(env_loc); +} + free(expkL_r); +} diff --git a/pyscf/lib/pbc/hf_grad.c b/pyscf/lib/pbc/hf_grad.c new file mode 100644 index 0000000000..7c781fba19 --- /dev/null +++ b/pyscf/lib/pbc/hf_grad.c @@ -0,0 +1,95 @@ +/* Copyright 2021-2024 The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include "config.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "pbc/neighbor_list.h" + +#define MAX_THREADS 256 + +void contract_vhf_dm(double* out, double* vhf, double* dm, + NeighborList** neighbor_list, + int* shls_slice, int* ao_loc, int* shls_atm, + int comp, int natm, int nbas) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + const size_t nijsh = (size_t)nish * njsh; + const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; + const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + + const int I1 = 1; + double *out_bufs[MAX_THREADS]; + +#pragma omp parallel +{ + size_t ij, ish, jsh, p0, q0; + int ni, nj, i, ic, iatm, nimgs=1; + NeighborList *nl0=NULL; + if (neighbor_list != NULL) { + nl0 = *neighbor_list; + } + double *pvhf, *pdm; + + int thread_id = omp_get_thread_num(); + double *buf; + if (thread_id == 0) { + buf = out; + } else { + buf = calloc(comp*natm, sizeof(double)); + } + out_bufs[thread_id] = buf; + + #pragma omp for schedule(dynamic) + for (ij = 0; ij < nijsh; ij++) { + ish = ij / njsh + ish0; + jsh = ij % njsh + jsh0; + + if (nl0 != NULL) { + nimgs = ((nl0->pairs)[ish*nbas + jsh])->nimgs; + } + if (nimgs > 0) { // this shell pair has contribution + p0 = ao_loc[ish] - ao_loc[ish0]; + q0 = ao_loc[jsh] - ao_loc[jsh0]; + ni = ao_loc[ish+1] - ao_loc[ish]; + nj = ao_loc[jsh+1] - ao_loc[jsh]; + + iatm = shls_atm[ish]; + pvhf = vhf + (p0 * naoj + q0); + pdm = dm + (p0 * naoj + q0); + for (ic = 0; ic < comp; ic++) { + for (i = 0; i < ni; i++) { + buf[iatm*3+ic] += ddot_(&nj, pvhf+i*naoj, &I1, pdm+i*naoj, &I1); + } + pvhf += naoi * naoj; + } + } + } + + NPomp_dsum_reduce_inplace(out_bufs, comp*natm); + if (thread_id != 0) { + free(buf); + } +} +} diff --git a/pyscf/lib/pbc/neighbor_list.c b/pyscf/lib/pbc/neighbor_list.c new file mode 100644 index 0000000000..26fb52fd37 --- /dev/null +++ b/pyscf/lib/pbc/neighbor_list.c @@ -0,0 +1,206 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include "config.h" +#include "cint.h" +#include "pbc/neighbor_list.h" + +#define SQUARE(r) (r[0]*r[0]+r[1]*r[1]+r[2]*r[2]) + +void init_neighbor_pair(NeighborPair** np, int nimgs, int* Ls_list) +{ + NeighborPair *np0 = (NeighborPair*) malloc(sizeof(NeighborPair)); + np0->nimgs = nimgs; + np0->q_cond = NULL; + np0->center = NULL; + if (nimgs > 0){ + np0->Ls_list = (int*) malloc(sizeof(int)*nimgs); + int i; + for (i=0; iLs_list[i] = Ls_list[i]; + } + } + else { + np0->Ls_list = NULL; + } + *np = np0; +} + +void del_neighbor_pair(NeighborPair** np) +{ + NeighborPair *np0 = *np; + if (!np0) { + return; + } + if (np0->Ls_list) { + free(np0->Ls_list); + } + if (np0->q_cond) { + free(np0->q_cond); + } + if (np0->center) { + free(np0->center); + } + free(np0); + *np = NULL; +} + +void init_neighbor_list(NeighborList** nl, int nish, int njsh, int nimgs) +{ + NeighborList *nl0 = (NeighborList*) malloc(sizeof(NeighborList)); + nl0->nish = nish; + nl0->njsh = njsh; + nl0->nimgs = nimgs; + nl0->pairs = (NeighborPair**) malloc(sizeof(NeighborPair*)*nish*njsh); + int ish, jsh; + for (ish=0; ishpairs)[ish*njsh+jsh] = NULL; + } + *nl = nl0; +} + +void build_neighbor_list(NeighborList** nl, + int* ish_atm, int* ish_bas, double* ish_env, double* ish_rcut, + int* jsh_atm, int* jsh_bas, double* jsh_env, double* jsh_rcut, + int nish, int njsh, double* Ls, int nimgs, int hermi) +{ + init_neighbor_list(nl, nish, njsh, nimgs); + NeighborList* nl0 = *nl; + +#pragma omp parallel +{ + int *buf = (int*) malloc(sizeof(int)*nimgs); + int ish, jsh, iL, nL; + int ish_atm_id, jsh_atm_id; + double ish_radius, jsh_radius, rmax, dij; + double *ish_ratm, *jsh_ratm, *rL; + double rij[3]; + NeighborPair **np = NULL; +#pragma omp for schedule(dynamic) + for (ish=0; ishpairs + ish*njsh+jsh; + init_neighbor_pair(np, nL, buf); + } + } + free(buf); +} +} + +void del_neighbor_list(NeighborList** nl) +{ + NeighborList *nl0 = *nl; + if (!nl0) { + return; + } + int ish, jsh; + int nish = nl0->nish; + int njsh = nl0->njsh; + if (nl0->pairs) { + for (ish=0; ishpairs + ish*njsh+jsh); + } + } + free(nl0->pairs); + } + free(nl0); + *nl = NULL; +} + + +int NLOpt_noscreen(int* shls, NeighborListOpt* opt) +{ + return 1; +} + +int NLOpt_screen(int* shls, NeighborListOpt* opt) +{ + int ish = shls[0]; + int jsh = shls[1]; + NeighborList *nl = opt->nl; + int njsh = nl->njsh; + NeighborPair *np; + np = (nl->pairs)[ish*njsh + jsh]; + return np->nimgs > 0; +} + +void NLOpt_init(NeighborListOpt **opt) +{ + NeighborListOpt *opt0 = malloc(sizeof(NeighborListOpt)); + opt0->nl = NULL; + opt0->fprescreen = &NLOpt_noscreen; + *opt = opt0; +} + +void NLOpt_del(NeighborListOpt **opt) +{ + NeighborListOpt *opt0 = *opt; + if (!opt0) { + return; + } + free(opt0); + *opt = NULL; +} + +void NLOpt_set_nl(NeighborListOpt *opt, NeighborList *nl) +{ + opt->nl = nl; +} + +void NLOpt_reset(NeighborListOpt *opt) +{ + opt->nl = NULL; + opt->fprescreen = &NLOpt_screen; +} + +void NLOpt_set_optimizer(NeighborListOpt *opt) +{ + opt->fprescreen = &NLOpt_screen; +} + +void NLOpt_del_optimizer(NeighborListOpt *opt) +{ + opt->fprescreen = &NLOpt_noscreen; +} + diff --git a/pyscf/lib/pbc/neighbor_list.h b/pyscf/lib/pbc/neighbor_list.h new file mode 100644 index 0000000000..3364be1f3d --- /dev/null +++ b/pyscf/lib/pbc/neighbor_list.h @@ -0,0 +1,41 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#ifndef HAVE_DEFINED_NEIGHBOR_LIST_H +#define HAVE_DEFINED_NEIGHBOR_LIST_H +typedef struct NeighborPair_struct { + int nimgs; + int *Ls_list; + double *q_cond; + double *center; +} NeighborPair; + +typedef struct NeighborList_struct { + int nish; + int njsh; + int nimgs; + NeighborPair **pairs; +} NeighborList; + +typedef struct NeighborListOpt_struct { + NeighborList *nl; + int (*fprescreen)(int *shls, struct NeighborListOpt_struct *opt); +} NeighborListOpt; + +int NLOpt_noscreen(int* shls, NeighborListOpt* opt); +#endif diff --git a/pyscf/lib/pbc/optimizer.c b/pyscf/lib/pbc/optimizer.c index d30c81c3e8..a37494ca0a 100644 --- a/pyscf/lib/pbc/optimizer.c +++ b/pyscf/lib/pbc/optimizer.c @@ -17,6 +17,7 @@ */ #include +#include #include "cint.h" #include "pbc/optimizer.h" @@ -27,6 +28,7 @@ void PBCinit_optimizer(PBCOpt **opt, int *atm, int natm, { PBCOpt *opt0 = malloc(sizeof(PBCOpt)); opt0->rrcut = NULL; + opt0->rcut = NULL; opt0->fprescreen = &PBCnoscreen; *opt = opt0; } @@ -41,11 +43,13 @@ void PBCdel_optimizer(PBCOpt **opt) if (opt0->rrcut != NULL) { free(opt0->rrcut); } + if (!opt0->rcut) { + free(opt0->rcut); + } free(opt0); *opt = NULL; } - int PBCnoscreen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env) { return 1; @@ -68,6 +72,23 @@ int PBCrcut_screen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env) return (rr < opt->rrcut[ish] || rr < opt->rrcut[jsh]); } +int PBCrcut_screen_loose(int *shls, PBCOpt *opt, int *atm, int *bas, double *env) +{ + if (opt == NULL) { + return 1; // no screen + } + const int ish = shls[0]; + const int jsh = shls[1]; + const double *ri = env + atm[bas[ATOM_OF+ish*BAS_SLOTS]*ATM_SLOTS+PTR_COORD]; + const double *rj = env + atm[bas[ATOM_OF+jsh*BAS_SLOTS]*ATM_SLOTS+PTR_COORD]; + double rirj[3]; + rirj[0] = ri[0] - rj[0]; + rirj[1] = ri[1] - rj[1]; + rirj[2] = ri[2] - rj[2]; + double r = sqrt(SQUARE(rirj)); + return r < opt->rcut[ish] + opt->rcut[jsh]; +} + void PBCset_rcut_cond(PBCOpt *opt, double *rcut, int *atm, int natm, int *bas, int nbas, double *env) { @@ -82,3 +103,18 @@ void PBCset_rcut_cond(PBCOpt *opt, double *rcut, opt->rrcut[i] = rcut[i] * rcut[i]; } } + +void PBCset_rcut_cond_loose(PBCOpt *opt, double *rcut, + int *atm, int natm, int *bas, int nbas, double *env) +{ + if (opt->rcut != NULL) { + free(opt->rcut); + } + opt->rcut = (double *)malloc(sizeof(double) * nbas); + opt->fprescreen = &PBCrcut_screen_loose; + + int i; + for (i = 0; i < nbas; i++) { + opt->rcut[i] = rcut[i]; + } +} diff --git a/pyscf/lib/pbc/optimizer.h b/pyscf/lib/pbc/optimizer.h index ff3299715b..62c8be5d32 100644 --- a/pyscf/lib/pbc/optimizer.h +++ b/pyscf/lib/pbc/optimizer.h @@ -16,10 +16,11 @@ * Author: Qiming Sun */ -#if !defined(HAVE_DEFINED_CVHFOPT_H) -#define HAVE_DEFINED_CVHFOPT_H +#if !defined(HAVE_DEFINED_PBCOPT_H) +#define HAVE_DEFINED_PBCOPT_H typedef struct PBCOpt_struct { double *rrcut; + double *rcut; int (*fprescreen)(int *shls, struct PBCOpt_struct *opt, int *atm, int *bas, double *env); } PBCOpt; @@ -27,4 +28,3 @@ typedef struct PBCOpt_struct { int PBCnoscreen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env); int PBCrcut_screen(int *shls, PBCOpt *opt, int *atm, int *bas, double *env); - diff --git a/pyscf/lib/pbc/pp.c b/pyscf/lib/pbc/pp.c new file mode 100644 index 0000000000..4885080544 --- /dev/null +++ b/pyscf/lib/pbc/pp.c @@ -0,0 +1,448 @@ +/* Copyright 2021- The PySCF Developers. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + * + * Author: Xing Zhang + */ + +#include +#include +#include +#include +#include "config.h" +#include "cint.h" +#include "gto/gto.h" +#include "vhf/fblas.h" +#include "np_helper/np_helper.h" +#include "pbc/fill_ints.h" +#include "pbc/neighbor_list.h" + +#define HL_TABLE_SLOTS 7 +//#define ATOM_OF 0 +//#define ANG_OF 1 +#define HL_DIM_OF 2 +#define HL_DATA_OF 3 +#define HL_OFFSET0 4 +#define HF_OFFSET1 5 +#define HF_OFFSET2 6 +#define MAX_THREADS 256 + + +static void _ppnl_fill_g(void (*fsort)(), double* out, double** ints, + int comp, int ish, int jsh, double* buf, + int *shls_slice, int *ao_loc, + int* hl_table, double* hl_data, int nhl, + NeighborListOpt* nlopt) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + + ish += ish0; + jsh += jsh0; + + const int di = ao_loc[ish+1] - ao_loc[ish]; + const int dj = ao_loc[jsh+1] - ao_loc[jsh]; + const int dij = di *dj; + const int ioff = ao_loc[ish] - ao_loc[ish0]; + const int joff = ao_loc[jsh] - ao_loc[jsh0]; + const int naoi = ao_loc[ish1] - ao_loc[ish0]; + const int naoj = ao_loc[jsh1] - ao_loc[jsh0]; + + int i, j, ij, pi, pj, ksh; + int hl_dim, nd; + int shls_ki[2], shls_kj[2]; + int *table, *offset; + double *hl; + for (ij = 0; ij < dij; ij++) { + buf[ij] = 0; + } + + int (*fprescreen)(); + if (nlopt != NULL) { + fprescreen = nlopt->fprescreen; + } else { + fprescreen = NLOpt_noscreen; + } + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D1 = 1.; + for (ksh = 0; ksh < nhl; ksh++) { + shls_ki[0] = ksh; + shls_ki[1] = ish; + shls_kj[0] = ksh; + shls_kj[1] = jsh; + if ((*fprescreen)(shls_ki, nlopt) && (*fprescreen)(shls_kj, nlopt)) { + table = hl_table + ksh * HL_TABLE_SLOTS; + hl_dim = table[HL_DIM_OF]; + nd = table[ANG_OF] * 2 + 1; + offset = table + HL_OFFSET0; + hl = hl_data + table[HL_DATA_OF]; + for (i=0; i jp) { + _ppnl_fill_g(&sort2c_gs2_igtj, out, ints, comp, ish, jsh, buf, + shls_slice, ao_loc, hl_table, hl_data, nhl, nlopt); + } else if (ip == jp) { + _ppnl_fill_g(&sort2c_gs2_ieqj, out, ints, comp, ish, jsh, buf, + shls_slice, ao_loc, hl_table, hl_data, nhl, nlopt); + } +} + + +void contract_ppnl(void (*fill)(), double* out, + double* ppnl_half0, double* ppnl_half1, double* ppnl_half2, + int comp, int* shls_slice, int *ao_loc, + int* hl_table, double* hl_data, int nhl, + NeighborListOpt* nlopt) +{ + const int ish0 = shls_slice[0]; + const int ish1 = shls_slice[1]; + const int jsh0 = shls_slice[2]; + const int jsh1 = shls_slice[3]; + const int nish = ish1 - ish0; + const int njsh = jsh1 - jsh0; + const size_t nijsh = (size_t) nish * njsh; + + double *ints[3] = {ppnl_half0, ppnl_half1, ppnl_half2}; + + int di = GTOmax_shell_dim(ao_loc, shls_slice+0, 1); + int dj = GTOmax_shell_dim(ao_loc, shls_slice+2, 1); + size_t buf_size = di*dj*comp; + + #pragma omp parallel + { + int ish, jsh; + size_t ij; + double *buf = (double*) malloc(sizeof(double) * buf_size); + #pragma omp for schedule(dynamic) + for (ij = 0; ij < nijsh; ij++) { + ish = ij / njsh; + jsh = ij % njsh; + (*fill)(out, ints, comp, ish, jsh, buf, + shls_slice, ao_loc, hl_table, hl_data, nhl, nlopt); + } + free(buf); + } +} + + +void contract_ppnl_ip1(double* out, int comp, + double* ppnl_half0, double* ppnl_half1, double* ppnl_half2, + double* ppnl_half_ip2_0, double* ppnl_half_ip2_1, double* ppnl_half_ip2_2, + int* hl_table, double* hl_data, int nhl, int nao, int* naux, + int* aux_id) +{ + const int One = 1; + const char TRANS_N = 'N'; + //const char TRANS_T = 'T'; + const double D1 = 1.; + const double D0 = 0.; + + size_t nao_pair = (size_t) nao * nao; + memset(out, 0, nao_pair*comp*sizeof(double)); + + size_t n2[3]; + n2[0] = (size_t) nao * naux[0]; + n2[1] = (size_t) nao * naux[1]; + n2[2] = (size_t) nao * naux[2]; + size_t buf_size = 54 * (size_t) nao + 27; + +#pragma omp parallel +{ + size_t ib, id, i, p, ic; + double *pout; + double *buf = (double*) malloc(sizeof(double)*buf_size); + + #pragma omp for schedule(dynamic) + for (p = 0; p < nao; p++){ + pout = out + (size_t)p*nao; + for (id = 0; id < nhl; id++) { + ib = aux_id[id]; + int *table = hl_table + ib * HL_TABLE_SLOTS; + int hl_dim = table[HL_DIM_OF]; + int ptr = table[HL_DATA_OF]; + int nd = table[ANG_OF] * 2 + 1; + int *offset = table + HL_OFFSET0; + double *hl = hl_data + ptr; + int lp_dim = nd * nao; + int ilp_dim = hl_dim * lp_dim; + int il_dim = hl_dim * nd; + + double *ilp = buf; + double *ilp_ip2 = ilp + ilp_dim; + double *hilp = ilp_ip2 + nd*3; + for (ic = 0; ic < comp; ic++) { + for (i=0; ifprescreen; + } else { + fprescreen = NLOpt_noscreen; + } + + const char TRANS_N = 'N'; + const char TRANS_T = 'T'; + const double D1 = 1.; + + int i, j, pi, pj, ksh, ic; + int katm, l, hl_dim, nd; + int shls_ki[2], shls_kj[2]; + int *table, *offset; + double *hl; + for (ksh = 0; ksh < nhl; ksh++) { + shls_ki[0] = ksh; + shls_ki[1] = ish; + shls_kj[0] = ksh; + shls_kj[1] = jsh; + if ((*fprescreen)(shls_ki, nlopt) && (*fprescreen)(shls_kj, nlopt)) { + table = hl_table + ksh * HL_TABLE_SLOTS; + katm = table[ATOM_OF]; + l = table[ANG_OF]; + hl_dim = table[HL_DIM_OF]; + nd = 2 * l + 1; + offset = table + HL_OFFSET0; + hl = hl_data + table[HL_DATA_OF]; + + memset(buf, 0, dijm*sizeof(double)); + for (ic = 0; ic < comp; ic++) { + for (i=0; i 0) { + if (ig == G0idx) { + vlocG = -2. * M_PI * Z[ia] * r0*r0; + } + else { + vlocG = Z[ia] * coulG[ig] * exp(-0.5*r0*r0 * G2[ig]); + } + } + else { // Z/r + vlocG = Z[ia] * coulG[ig]; + } + out[ig] -= (vlocG * cos(RG)) - (vlocG * sin(RG)) * _Complex_I; + } + } +} +} diff --git a/pyscf/lib/test/test_numint_uniform_grid.py b/pyscf/lib/test/test_numint_uniform_grid.py index 296dcbd61a..05e5664ab0 100644 --- a/pyscf/lib/test/test_numint_uniform_grid.py +++ b/pyscf/lib/test/test_numint_uniform_grid.py @@ -7,7 +7,7 @@ from pyscf.pbc.dft import gen_grid from pyscf.pbc.dft import multigrid -from pyscf.pbc.dft.multigrid import eval_mat, eval_rho +from pyscf.pbc.dft.multigrid.multigrid import eval_mat, eval_rho def uncontract(cell): pcell, contr_coeff = cell.to_uncontracted_cartesian_basis() @@ -18,8 +18,8 @@ def setUpModule(): global bak_EXPDROP, bak_EXTRA_PREC global vxc, kpts, nkpts, nao, dm, dm_kpts, grids_orth, grids_north global ao_kpts_orth, ao_kpts_north, ao_orth, ao_north, ao_gamma_orth, ao_gamma_north - multigrid.EXPDROP, bak_EXPDROP = 1e-14, multigrid.EXPDROP - multigrid.EXTRA_PREC, bak_EXTRA_PREC = 1e-3, multigrid.EXTRA_PREC + multigrid.multigrid.EXPDROP, bak_EXPDROP = 1e-14, multigrid.multigrid.EXPDROP + multigrid.multigrid.EXTRA_PREC, bak_EXTRA_PREC = 1e-3, multigrid.multigrid.EXTRA_PREC numpy.random.seed(2) cell_orth = gto.M(atom='H1 1 1 0; H2 0 0 1', diff --git a/pyscf/pbc/df/incore.py b/pyscf/pbc/df/incore.py index 253250a405..76c23f8e3e 100644 --- a/pyscf/pbc/df/incore.py +++ b/pyscf/pbc/df/incore.py @@ -30,6 +30,7 @@ from pyscf.pbc.tools import k2gamma from pyscf.pbc.tools import pbc as pbctools from pyscf import __config__ +from pyscf.pbc.gto import _pbcintor RCUT_THRESHOLD = getattr(__config__, 'pbc_scf_rsjk_rcut_threshold', 2.5) KECUT_THRESHOLD = getattr(__config__, 'pbc_scf_rsjk_kecut_threshold', 10.0) @@ -471,3 +472,246 @@ def _conc_locs(ao_loc1, ao_loc2): basis accordingly.''' comp_loc = np.append(ao_loc1[:-1], ao_loc1[-1] + ao_loc2) return np.asarray(comp_loc, dtype=np.int32) + +# The following functions use pre-constructed shell pair list +def aux_e2_sum_auxbas(cell, auxcell_or_auxbasis, intor='int3c2e', aosym='s1', comp=None, + kptij_lst=np.zeros((1,2,3)), shls_slice=None, **kwargs): + r'''Compute :math:`\sum_{L} (ij|L)` on the fly. + + Returns: + out : (nao_pair,) array + ''' + if isinstance(auxcell_or_auxbasis, gto.MoleBase): + auxcell = auxcell_or_auxbasis + else: + assert isinstance(auxcell_or_auxbasis, str) + auxcell = make_auxcell(cell, auxcell_or_auxbasis) + + int3c = wrap_int3c_sum_auxbas(cell, auxcell, intor, aosym, comp, kptij_lst, **kwargs) + out = int3c(shls_slice) + return out + +def wrap_int3c_sum_auxbas(cell, auxcell, intor='int3c2e', aosym='s1', comp=None, + kptij_lst=np.zeros((1,2,3)), cintopt=None, pbcopt=None, + neighbor_list=None): + if neighbor_list is None: + raise KeyError('Neighbor list is not initialized.') + + log = logger.new_logger(cell) + + nkptij = len(kptij_lst) + kpti = kptij_lst[:,0] + kptj = kptij_lst[:,1] + j_only = is_zero(kpti - kptj) + if j_only: + kpts = kpti + nkpts = len(kpts) + kptij_idx = np.arange(nkpts, dtype=np.int32) + else: + raise NotImplementedError + + intor = cell._add_suffix(intor) + intor, comp = gto.moleintor._get_intor_and_comp(intor, comp) + + pcell = cell.copy() + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + cell._atm, cell._bas, cell._env) + ao_loc = gto.moleintor.make_loc(bas, intor) + aux_loc = auxcell.ao_loc_nr() + ao_loc = np.asarray(np.hstack([ao_loc, ao_loc[-1]+aux_loc[1:]]), + dtype=np.int32) + atm, bas, env = gto.conc_env(atm, bas, env, + auxcell._atm, auxcell._bas, auxcell._env) + + Ls = cell.get_lattice_Ls() + nimgs = len(Ls) + nbas = cell.nbas + + gamma_point_only = is_zero(kpts) + if gamma_point_only: + assert nkpts == 1 + kk_type = 'g' + expkL = np.ones(1, dtype=np.complex128) + out_dtype = np.double + else: + raise NotImplementedError + + fill = 'PBCnr3c_screened_sum_auxbas_fill_%s%s' % (kk_type, aosym[:2]) + drv = libpbc.PBCnr3c_screened_sum_auxbas_drv + + if cintopt is None: + if nbas > 0: + env[gto.PTR_EXPCUTOFF] = abs(np.log(cell.precision)) + cintopt = _vhf.make_cintopt(atm, bas, env, intor) + else: + cintopt = lib.c_null_ptr() + if intor[:3] != 'ECP': + libpbc.CINTdel_pairdata_optimizer(cintopt) + if pbcopt is None: + pbcopt = _pbcintor.PBCOpt(pcell).init_rcut_cond(pcell) + if isinstance(pbcopt, _pbcintor.PBCOpt): + cpbcopt = pbcopt._this + else: + cpbcopt = lib.c_null_ptr() + + def int3c(shls_slice=None, out=None): + t0 = (logger.process_clock(), logger.perf_counter()) + if shls_slice is None: + shls_slice = (0, nbas, 0, nbas, 0, auxcell.nbas) + shls_slice = (shls_slice[0], shls_slice[1], + nbas+shls_slice[2], nbas+shls_slice[3], + nbas*2+shls_slice[4], nbas*2+shls_slice[5]) + ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] + nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] + + if aosym[:2] == 's2': + assert ni == nj + nao_pair = (ao_loc[shls_slice[1]]*(ao_loc[shls_slice[1]]+1)//2 - + ao_loc[shls_slice[0]]*(ao_loc[shls_slice[0]]+1)//2) + else: + nao_pair = ni * nj + + if out is None: + out = np.empty((nkptij,comp,nao_pair), dtype=out_dtype) + + drv(getattr(libpbc, intor), getattr(libpbc, fill), + out.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nkptij), ctypes.c_int(nkpts), + ctypes.c_int(comp), ctypes.c_int(nimgs), + Ls.ctypes.data_as(ctypes.c_void_p), + expkL.ctypes.data_as(ctypes.c_void_p), + kptij_idx.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*6)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cpbcopt, + atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), + bas.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nbas), # need to pass cell.nbas to libpbc.PBCnr3c_drv + env.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(env.size), + ctypes.byref(neighbor_list)) + + log.timer_debug1(f'pbc integral {intor}', *t0) + + if comp == 1: + out = out[:,0] + if nkptij == 1: + out = out[0] + return out + + return int3c + +def int3c1e_nuc_grad(cell, auxcell, dm, intor='int3c1e', aosym='s1', comp=3, + kptij_lst=np.zeros((1,2,3)), shls_slice=None, **kwargs): + '''Compute the nuclear gradient contribution + to the 2nd local part of PP on the fly. + See `pbc.gto.pseudo.pp_int.vpploc_part2_nuc_grad`. + + Returns: + out : (natm,comp) array + ''' + if comp != 3: + raise NotImplementedError + if aosym != 's1': + raise NotImplementedError + + int3c = wrap_int3c1e_nuc_grad(cell, auxcell, dm, intor, aosym, comp, kptij_lst, **kwargs) + out = int3c(shls_slice) + return out + +def wrap_int3c1e_nuc_grad(cell, auxcell, dm, intor='int3c1e', aosym='s1', comp=3, + kptij_lst=np.zeros((1,2,3)), cintopt=None, pbcopt=None, + neighbor_list=None): + if neighbor_list is None: + raise KeyError('Neighbor list is not initialized.') + + log = logger.new_logger(cell) + + nkptij = len(kptij_lst) + kpti = kptij_lst[:,0] + kptj = kptij_lst[:,1] + j_only = is_zero(kpti - kptj) + if j_only: + kpts = kpti + nkpts = len(kpts) + kptij_idx = np.arange(nkpts, dtype=np.int32) + else: + raise NotImplementedError + + intor = cell._add_suffix(intor) + intor, comp = gto.moleintor._get_intor_and_comp(intor, comp) + + pcell = cell.copy() + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + cell._atm, cell._bas, cell._env) + ao_loc = gto.moleintor.make_loc(bas, intor) + aux_loc = auxcell.ao_loc_nr() + ao_loc = np.asarray(np.hstack([ao_loc, ao_loc[-1]+aux_loc[1:]]), + dtype=np.int32) + atm, bas, env = gto.conc_env(atm, bas, env, + auxcell._atm, auxcell._bas, auxcell._env) + + Ls = cell.get_lattice_Ls() + nimgs = len(Ls) + nbas = cell.nbas + + gamma_point_only = is_zero(kpts) + if gamma_point_only: + assert nkpts == 1 + kk_type = 'g' + expkL = np.ones(1, dtype=np.complex128) + dm = np.asarray(dm, order="C", dtype=np.double) + else: + raise NotImplementedError + + fill = 'PBCnr3c1e_screened_nuc_grad_fill_%s%s' % (kk_type, aosym[:2]) + drv = libpbc.PBCnr3c1e_screened_nuc_grad_drv + + if cintopt is None: + if nbas > 0: + env[gto.PTR_EXPCUTOFF] = abs(np.log(cell.precision)) + cintopt = _vhf.make_cintopt(atm, bas, env, intor) + else: + cintopt = lib.c_null_ptr() + if intor[:3] != 'ECP': + libpbc.CINTdel_pairdata_optimizer(cintopt) + if pbcopt is None: + pbcopt = _pbcintor.PBCOpt(pcell).init_rcut_cond(pcell) + if isinstance(pbcopt, _pbcintor.PBCOpt): + cpbcopt = pbcopt._this + else: + cpbcopt = lib.c_null_ptr() + + def int3c(shls_slice=None, out=None): + t0 = (logger.process_clock(), logger.perf_counter()) + if shls_slice is None: + shls_slice = (0, nbas, 0, nbas, 0, auxcell.nbas) + shls_slice = (shls_slice[0], shls_slice[1], + nbas+shls_slice[2], nbas+shls_slice[3], + nbas*2+shls_slice[4], nbas*2+shls_slice[5]) + + if out is None: + out = np.zeros((nkptij,cell.natm,comp), dtype=np.double) + + drv(getattr(libpbc, intor), getattr(libpbc, fill), + out.ctypes.data_as(ctypes.c_void_p), + dm.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nkptij), ctypes.c_int(nkpts), + ctypes.c_int(comp), ctypes.c_int(nimgs), + Ls.ctypes.data_as(ctypes.c_void_p), + expkL.ctypes.data_as(ctypes.c_void_p), + kptij_idx.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*6)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, cpbcopt, + atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cell.natm), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nbas), + env.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(env.size), + ctypes.c_int(cell.nao), ctypes.byref(neighbor_list)) + + log.timer_debug1(f'pbc integral {intor}', *t0) + + if nkptij == 1: + out = out[0] + return out + + return int3c diff --git a/pyscf/pbc/dft/gks.py b/pyscf/pbc/dft/gks.py index 8d496bbfb1..5536b53daa 100644 --- a/pyscf/pbc/dft/gks.py +++ b/pyscf/pbc/dft/gks.py @@ -77,7 +77,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, ni = ks._numint n, exc, vxc = ni.get_vxc(cell, ks.grids, ks.xc, dm, hermi=hermi, kpt=kpt, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) if not hybrid: diff --git a/pyscf/pbc/dft/kgks.py b/pyscf/pbc/dft/kgks.py index f43a8ee04c..fd97e43cd1 100644 --- a/pyscf/pbc/dft/kgks.py +++ b/pyscf/pbc/dft/kgks.py @@ -84,7 +84,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, ni = ks._numint n, exc, vxc = ni.get_vxc(cell, ks.grids, ks.xc, dm, hermi=hermi, kpts=kpts, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) nkpts = len(kpts) diff --git a/pyscf/pbc/dft/krks.py b/pyscf/pbc/dft/krks.py index 572a7614af..3cd23636b1 100644 --- a/pyscf/pbc/dft/krks.py +++ b/pyscf/pbc/dft/krks.py @@ -69,7 +69,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_rks(ks.with_df, ks.xc, dm, hermi, kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -84,7 +84,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory = ks.max_memory - lib.current_memory()[0] n, exc, vxc = ni.nr_rks(cell, ks.grids, ks.xc, dm, 0, hermi, kpts, kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -95,7 +95,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) nkpts = len(kpts) diff --git a/pyscf/pbc/dft/krks_ksymm.py b/pyscf/pbc/dft/krks_ksymm.py index fb15bf6f40..0d9e1401e2 100644 --- a/pyscf/pbc/dft/krks_ksymm.py +++ b/pyscf/pbc/dft/krks_ksymm.py @@ -59,7 +59,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_rks(ks.with_df, ks.xc, dm_bz, hermi, kpts.kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -72,7 +72,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = ni.nr_rks(cell, ks.grids, ks.xc, dm_bz, kpts=kpts.kpts, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -83,7 +83,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpts.kpts, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) weight = kpts.weights_ibz diff --git a/pyscf/pbc/dft/kuks.py b/pyscf/pbc/dft/kuks.py index a07949ccca..634c99f8ff 100644 --- a/pyscf/pbc/dft/kuks.py +++ b/pyscf/pbc/dft/kuks.py @@ -55,7 +55,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_uks(ks.with_df, ks.xc, dm, hermi, kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -79,7 +79,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpts, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) nkpts = len(kpts) diff --git a/pyscf/pbc/dft/kuks_ksymm.py b/pyscf/pbc/dft/kuks_ksymm.py index eb02e674e9..15c2a623b5 100644 --- a/pyscf/pbc/dft/kuks_ksymm.py +++ b/pyscf/pbc/dft/kuks_ksymm.py @@ -58,7 +58,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_uks(ks.with_df, ks.xc, dm_bz, hermi, kpts.kpts, kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -71,7 +71,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = ni.nr_uks(cell, ks.grids, ks.xc, dm_bz, kpts=kpts.kpts, kpts_band=kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -82,7 +82,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpts.kpts, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) weight = kpts.weights_ibz diff --git a/pyscf/pbc/dft/multigrid/__init__.py b/pyscf/pbc/dft/multigrid/__init__.py new file mode 100644 index 0000000000..707853bf51 --- /dev/null +++ b/pyscf/pbc/dft/multigrid/__init__.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .multigrid import MultiGridFFTDF +from .multigrid import ( + multigrid_fftdf as multigrid_fftdf, + _gen_rhf_response as _gen_rhf_response, + _gen_uhf_response as _gen_uhf_response, + nr_rks as nr_rks_v1, + nr_rks_fxc as nr_rks_fxc, + nr_rks_fxc_st as nr_rks_fxc_st, + nr_uks as nr_uks_v1, + nr_uks_fxc as nr_uks_fxc +) + +from .multigrid_pair import MultiGridFFTDF2 +from .multigrid_pair import nr_rks as nr_rks_v2 +from .multigrid_pair import nr_uks as nr_uks_v2 + +def nr_rks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + if isinstance(mydf, MultiGridFFTDF2): + return nr_rks_v2(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + elif isinstance(mydf, MultiGridFFTDF): + return nr_rks_v1(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + else: + raise TypeError("Wrong density fitting type for multigrid DFT.") + +def nr_uks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + if isinstance(mydf, MultiGridFFTDF2): + return nr_uks_v2(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + elif isinstance(mydf, MultiGridFFTDF): + return nr_uks_v1(mydf, xc_code, dm_kpts, hermi=hermi, kpts=kpts, + kpts_band=kpts_band, with_j=with_j, + return_j=return_j, verbose=verbose) + else: + raise TypeError("Wrong density fitting type for multigrid DFT.") diff --git a/pyscf/pbc/dft/multigrid.py b/pyscf/pbc/dft/multigrid/multigrid.py similarity index 95% rename from pyscf/pbc/dft/multigrid.py rename to pyscf/pbc/dft/multigrid/multigrid.py index 80e72e551b..56fb3059cf 100644 --- a/pyscf/pbc/dft/multigrid.py +++ b/pyscf/pbc/dft/multigrid/multigrid.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import numpy import scipy.linalg +from pyscf import __config__ from pyscf import lib from pyscf.lib import logger from pyscf.gto import ATOM_OF, ANG_OF, NPRIM_OF, PTR_EXP, PTR_COEFF @@ -29,12 +30,21 @@ from pyscf.pbc import tools from pyscf.pbc import gto from pyscf.pbc.gto import pseudo +from pyscf.pbc.gto.pseudo import pp_int from pyscf.pbc.dft import numint, gen_grid -from pyscf.pbc.df.df_jk import _format_dms, _format_kpts_band, _format_jks +from pyscf.pbc.df.df_jk import ( + _format_dms, + _format_kpts_band, + _format_jks, +) from pyscf.pbc.lib.kpts_helper import gamma_point -from pyscf.pbc.df import fft -from pyscf.pbc.df import ft_ao -from pyscf import __config__ +from pyscf.pbc.df import fft, ft_ao +from pyscf.pbc.dft.multigrid.utils import ( + _take_4d, + _take_5d, + _takebak_4d, + _takebak_5d, +) #sys.stderr.write('WARN: multigrid is an experimental feature. It is still in ' # 'testing\nFeatures and APIs may be changed in the future.\n') @@ -367,23 +377,31 @@ def get_nuc(mydf, kpts=None): vne = vne[0] return numpy.asarray(vne) -def get_pp(mydf, kpts=None): +def get_pp(mydf, kpts=None, max_memory=4000): '''Get the periodic pseudotential nuc-el AO matrix, with G=0 removed. ''' from pyscf import gto kpts, is_single_kpt = fft._check_kpts(mydf, kpts) cell = mydf.cell mesh = mydf.mesh - SI = cell.get_SI() Gv = cell.get_Gv(mesh) - vpplocG = pseudo.get_vlocG(cell, Gv) - vpplocG = -numpy.einsum('ij,ij->j', SI, vpplocG) - # from get_jvloc_G0 function - vpplocG[0] = numpy.sum(pseudo.get_alphas(cell)) - ngrids = len(vpplocG) + + ngrids = len(Gv) + vpplocG = numpy.empty((ngrids,), dtype=numpy.complex128) + + mem_avail = max(max_memory, mydf.max_memory-lib.current_memory()[0]) + blksize = int(mem_avail*1e6/((cell.natm*2)*16)) + blksize = min(ngrids, max(21**3, blksize)) + for ig0, ig1 in lib.prange(0, ngrids, blksize): + vpplocG_batch = pp_int.get_gth_vlocG_part1(cell, Gv[ig0:ig1]) + SI = cell.get_SI(Gv[ig0:ig1]) + vpplocG[ig0:ig1] = -numpy.einsum('ij,ij->j', SI, vpplocG_batch) hermi = 1 vpp = _get_j_pass2(mydf, vpplocG, hermi, kpts)[0] + vpp2 = pp_int.get_pp_loc_part2(cell, kpts) + for k, kpt in enumerate(kpts): + vpp[k] += vpp2[k] # vppnonloc evaluated in reciprocal space fakemol = gto.Mole() @@ -396,51 +414,76 @@ def get_pp(mydf, kpts=None): fakemol._bas[0,gto.PTR_EXP ] = ptr+3 fakemol._bas[0,gto.PTR_COEFF] = ptr+4 - # buf for SPG_lmi upto l=0..3 and nl=3 - buf = numpy.empty((48,ngrids), dtype=numpy.complex128) - def vppnl_by_k(kpt): - Gk = Gv + kpt - G_rad = lib.norm(Gk, axis=1) - aokG = ft_ao.ft_ao(cell, Gv, kpt=kpt) * (ngrids/cell.vol) - vppnl = 0 + SPG_lm_aoGs = [] for ia in range(cell.natm): symb = cell.atom_symbol(ia) if symb not in cell._pseudo: + SPG_lm_aoGs.append(None) continue pp = cell._pseudo[symb] p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: - fakemol._bas[0,gto.ANG_OF] = l - fakemol._env[ptr+3] = .5*rl**2 - fakemol._env[ptr+4] = rl**(l+1.5)*numpy.pi**1.25 - pYlm_part = fakemol.eval_gto('GTOval', Gk) + p1 = p1+nl*(l*2+1) + SPG_lm_aoGs.append(numpy.zeros((p1, cell.nao), dtype=numpy.complex128)) - p0, p1 = p1, p1+nl*(l*2+1) - # pYlm is real, SI[ia] is complex - pYlm = numpy.ndarray((nl,l*2+1,ngrids), dtype=numpy.complex128, buffer=buf[p0:p1]) - for k in range(nl): - qkl = pseudo.pp._qli(G_rad*rl, l, k) - pYlm[k] = pYlm_part.T * qkl - #:SPG_lmi = numpy.einsum('g,nmg->nmg', SI[ia].conj(), pYlm) - #:SPG_lm_aoG = numpy.einsum('nmg,gp->nmp', SPG_lmi, aokG) - #:tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) - #:vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) - if p1 > 0: - SPG_lmi = buf[:p1] - SPG_lmi *= SI[ia].conj() - SPG_lm_aoGs = lib.zdot(SPG_lmi, aokG) + mem_avail = max(max_memory, mydf.max_memory-lib.current_memory()[0]) + blksize = int(mem_avail*1e6/((48+cell.nao+13+3)*16)) + blksize = min(ngrids, max(21**3, blksize)) + vppnl = 0 + for ig0, ig1 in lib.prange(0, ngrids, blksize): + ng = ig1 - ig0 + # buf for SPG_lmi upto l=0..3 and nl=3 + buf = numpy.empty((48,ng), dtype=numpy.complex128) + Gk = Gv[ig0:ig1] + kpt + G_rad = numpy.linalg.norm(Gk, axis=1) + aokG = ft_ao.ft_ao(cell, Gv[ig0:ig1], kpt=kpt) * (ngrids/cell.vol) + for ia in range(cell.natm): + symb = cell.atom_symbol(ia) + if symb not in cell._pseudo: + continue + pp = cell._pseudo[symb] p1 = 0 for l, proj in enumerate(pp[5:]): rl, nl, hl = proj if nl > 0: + fakemol._bas[0,gto.ANG_OF] = l + fakemol._env[ptr+3] = .5*rl**2 + fakemol._env[ptr+4] = rl**(l+1.5)*numpy.pi**1.25 + pYlm_part = fakemol.eval_gto('GTOval', Gk) + p0, p1 = p1, p1+nl*(l*2+1) - hl = numpy.asarray(hl) - SPG_lm_aoG = SPG_lm_aoGs[p0:p1].reshape(nl,l*2+1,-1) - tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) - vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) + # pYlm is real, SI[ia] is complex + pYlm = numpy.ndarray((nl,l*2+1,ng), dtype=numpy.complex128, buffer=buf[p0:p1]) + for k in range(nl): + qkl = pseudo.pp._qli(G_rad*rl, l, k) + pYlm[k] = pYlm_part.T * qkl + #:SPG_lmi = numpy.einsum('g,nmg->nmg', SI[ia].conj(), pYlm) + #:SPG_lm_aoG = numpy.einsum('nmg,gp->nmp', SPG_lmi, aokG) + #:tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) + #:vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) + if p1 > 0: + SPG_lmi = buf[:p1] + SPG_lmi *= cell.get_SI(Gv[ig0:ig1], atmlst=[ia,]).conj() + SPG_lm_aoGs[ia] += lib.zdot(SPG_lmi, aokG) + buf = None + for ia in range(cell.natm): + symb = cell.atom_symbol(ia) + if symb not in cell._pseudo: + continue + pp = cell._pseudo[symb] + p1 = 0 + for l, proj in enumerate(pp[5:]): + rl, nl, hl = proj + if nl > 0: + p0, p1 = p1, p1+nl*(l*2+1) + hl = numpy.asarray(hl) + SPG_lm_aoG = SPG_lm_aoGs[ia][p0:p1].reshape(nl,l*2+1,-1) + tmp = numpy.einsum('ij,jmp->imp', hl, SPG_lm_aoG) + vppnl += numpy.einsum('imp,imq->pq', SPG_lm_aoG.conj(), tmp) + SPG_lm_aoGs=None return vppnl * (1./ngrids**2) for k, kpt in enumerate(kpts): @@ -454,7 +497,6 @@ def vppnl_by_k(kpt): vpp = vpp[0] return numpy.asarray(vpp) - def get_j_kpts(mydf, dm_kpts, hermi=1, kpts=numpy.zeros((1,3)), kpts_band=None): '''Get the Coulomb (J) AO matrix at sampled k-points. @@ -1859,7 +1901,7 @@ def get_jk(self, dm, hermi=1, kpts=None, kpts_band=None, get_rho = get_rho -def multigrid(mf): +def multigrid_fftdf(mf): '''Use MultiGridFFTDF to replace the default FFTDF integration method in the DFT object. ''' @@ -1867,56 +1909,7 @@ def multigrid(mf): mf.with_df.__dict__.update(old_df.__dict__) return mf +multigrid = multigrid_fftdf # for backward compatibility def _pgto_shells(cell): return cell._bas[:,NPRIM_OF].sum() - -def _take_4d(a, indices): - a_shape = a.shape - ranges = [] - for i, s in enumerate(indices): - if s is None: - idx = numpy.arange(a_shape[i], dtype=numpy.int32) - else: - idx = numpy.asarray(s, dtype=numpy.int32) - idx[idx < 0] += a_shape[i] - ranges.append(idx) - idx = ranges[0][:,None] * a_shape[1] + ranges[1] - idy = ranges[2][:,None] * a_shape[3] + ranges[3] - a = a.reshape(a_shape[0]*a_shape[1], a_shape[2]*a_shape[3]) - out = lib.take_2d(a, idx.ravel(), idy.ravel()) - return out.reshape([len(s) for s in ranges]) - -def _takebak_4d(out, a, indices): - out_shape = out.shape - a_shape = a.shape - ranges = [] - for i, s in enumerate(indices): - if s is None: - idx = numpy.arange(a_shape[i], dtype=numpy.int32) - else: - idx = numpy.asarray(s, dtype=numpy.int32) - idx[idx < 0] += out_shape[i] - assert (len(idx) == a_shape[i]) - ranges.append(idx) - idx = ranges[0][:,None] * out_shape[1] + ranges[1] - idy = ranges[2][:,None] * out_shape[3] + ranges[3] - nx = idx.size - ny = idy.size - out = out.reshape(out_shape[0]*out_shape[1], out_shape[2]*out_shape[3]) - lib.takebak_2d(out, a.reshape(nx,ny), idx.ravel(), idy.ravel()) - return out - -def _take_5d(a, indices): - a_shape = a.shape - a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) - indices = (None,) + indices[2:] - return _take_4d(a, indices) - -def _takebak_5d(out, a, indices): - a_shape = a.shape - out_shape = out.shape - a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) - out = out.reshape((out_shape[0]*out_shape[1],) + out_shape[2:]) - indices = (None,) + indices[2:] - return _takebak_4d(out, a, indices) diff --git a/pyscf/pbc/dft/multigrid/multigrid_pair.py b/pyscf/pbc/dft/multigrid/multigrid_pair.py new file mode 100644 index 0000000000..3ef43b688d --- /dev/null +++ b/pyscf/pbc/dft/multigrid/multigrid_pair.py @@ -0,0 +1,1405 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import __config__ +from pyscf import lib +from pyscf.lib import logger +from pyscf.gto import moleintor +from pyscf.pbc import tools +from pyscf.pbc.lib.kpts_helper import gamma_point +from pyscf.pbc.df import fft +from pyscf.pbc.df.df_jk import ( + _format_dms, + _format_kpts_band, + _format_jks, +) +from pyscf.pbc.dft.multigrid.pp import ( + _get_vpplocG_part1, + _get_pp_without_erf, + vpploc_part1_nuc_grad, +) +from pyscf.pbc.dft.multigrid.utils import ( + _take_4d, + _take_5d, + _takebak_4d, + _takebak_5d, +) +from pyscf.pbc.dft.multigrid.multigrid import MultiGridFFTDF + +NGRIDS = getattr(__config__, 'pbc_dft_multigrid_ngrids', 4) +KE_RATIO = getattr(__config__, 'pbc_dft_multigrid_ke_ratio', 3.0) +REL_CUTOFF = getattr(__config__, 'pbc_dft_multigrid_rel_cutoff', 20.0) +GGA_METHOD = getattr(__config__, 'pbc_dft_multigrid_gga_method', 'FFT') + +EXTRA_PREC = getattr(__config__, 'pbc_gto_eval_gto_extra_precision', 1e-2) +RHOG_HIGH_ORDER = getattr(__config__, 'pbc_dft_multigrid_rhog_high_order', False) +PTR_EXPDROP = 16 +EXPDROP = getattr(__config__, 'pbc_dft_multigrid_expdrop', 1e-12) +IMAG_TOL = 1e-9 + +libdft = lib.load_library('libdft') + +def gradient_gs(f_gs, Gv): + r'''Compute the G-space components of :math:`\nabla f(r)` + given :math:`f(G)` and :math:`G`, + which is equivalent to einsum('np,px->nxp', f_gs, 1j*Gv) + ''' + ng, dim = Gv.shape + assert dim == 3 + Gv = np.asarray(Gv, order='C', dtype=np.double) + f_gs = np.asarray(f_gs.reshape(-1,ng), order='C', dtype=np.complex128) + n = f_gs.shape[0] + out = np.empty((n,dim,ng), dtype=np.complex128) + + fn = getattr(libdft, 'gradient_gs', None) + try: + fn(out.ctypes.data_as(ctypes.c_void_p), + f_gs.ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(n), ctypes.c_size_t(ng)) + except Exception as e: + raise RuntimeError(f'Error in gradient_gs: {e}') + return out + + +class GridLevel_Info(ctypes.Structure): + ''' + Info about the grid levels. + ''' + _fields_ = [("nlevels", ctypes.c_int), # number of grid levels + ("rel_cutoff", ctypes.c_double), + ("cutoff", ctypes.POINTER(ctypes.c_double)), + ("mesh", ctypes.POINTER(ctypes.c_int))] + +class RS_Grid(ctypes.Structure): + ''' + Values on real space multigrid. + ''' + _fields_ = [("nlevels", ctypes.c_int), + ("gridlevel_info", ctypes.POINTER(GridLevel_Info)), + ("comp", ctypes.c_int), + # data is list of 1d arrays + ("data", ctypes.POINTER(ctypes.POINTER(ctypes.c_double)))] + +class PGFPair(ctypes.Structure): + ''' + A primitive Gaussian function pair. + ''' + _fields_ = [("ish", ctypes.c_int), + ("ipgf", ctypes.c_int), + ("jsh", ctypes.c_int), + ("jpgf", ctypes.c_int), + ("iL", ctypes.c_int), + ("radius", ctypes.c_double)] + + +class Task(ctypes.Structure): + ''' + A single task. + ''' + _fields_ = [("buf_size", ctypes.c_size_t), + ("ntasks", ctypes.c_size_t), + ("pgfpairs", ctypes.POINTER(ctypes.POINTER(PGFPair))), + ("radius", ctypes.c_double)] + + +class TaskList(ctypes.Structure): + ''' + A task list. + ''' + _fields_ = [("nlevels", ctypes.c_int), + ("hermi", ctypes.c_int), + ("gridlevel_info", ctypes.POINTER(GridLevel_Info)), + ("tasks", ctypes.POINTER(ctypes.POINTER(Task)))] + + +def multi_grids_tasks(cell, ke_cutoff=None, hermi=0, + ngrids=NGRIDS, ke_ratio=KE_RATIO, rel_cutoff=REL_CUTOFF): + if ke_cutoff is None: + ke_cutoff = cell.ke_cutoff + if ke_cutoff is None: + raise ValueError("cell.ke_cutoff is not set.") + ke1 = ke_cutoff + cutoff = [ke1,] + for i in range(ngrids-1): + ke1 /= ke_ratio + cutoff.append(ke1) + cutoff.reverse() + a = cell.lattice_vectors() + mesh = [] + for ke in cutoff: + mesh.append(tools.cutoff_to_mesh(a, ke)) + logger.info(cell, 'ke_cutoff for multigrid tasks:\n%s', cutoff) + logger.info(cell, 'meshes for multigrid tasks:\n%s', mesh) + gridlevel_info = init_gridlevel_info(cutoff, rel_cutoff, mesh) + task_list = build_task_list(cell, gridlevel_info, hermi=hermi) + return task_list + + +def _update_task_list(mydf, hermi=0, ngrids=None, ke_ratio=None, rel_cutoff=None): + ''' + Update :attr:`task_list` if necessary. + ''' + cell = mydf.cell + if ngrids is None: + ngrids = mydf.ngrids + if ke_ratio is None: + ke_ratio = mydf.ke_ratio + if rel_cutoff is None: + rel_cutoff = mydf.rel_cutoff + + need_update = False + task_list = getattr(mydf, 'task_list', None) + if task_list is None: + need_update = True + else: + hermi_orig = task_list.contents.hermi + nlevels = task_list.contents.nlevels + rel_cutoff_orig = task_list.contents.gridlevel_info.contents.rel_cutoff + #TODO also need to check kenetic energy cutoff change + if (hermi_orig > hermi or + nlevels != ngrids or + abs(rel_cutoff_orig-rel_cutoff) > 1e-12): + need_update = True + + if need_update: + if task_list is not None: + free_task_list(task_list) + task_list = multi_grids_tasks(cell, hermi=hermi, ngrids=ngrids, + ke_ratio=ke_ratio, rel_cutoff=rel_cutoff) + mydf.task_list = task_list + return task_list + + +def init_gridlevel_info(cutoff, rel_cutoff, mesh): + if cutoff[0] < 1e-15: + cutoff = cutoff[1:] + cutoff = np.asarray(cutoff, order='C', dtype=np.double) + mesh = np.asarray(np.asarray(mesh).reshape(-1,3), order='C', dtype=np.int32) + nlevels = len(cutoff) + gridlevel_info = ctypes.POINTER(GridLevel_Info)() + fn = getattr(libdft, "init_gridlevel_info", None) + try: + fn(ctypes.byref(gridlevel_info), + cutoff.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nlevels), ctypes.c_double(rel_cutoff)) + except Exception as e: + raise RuntimeError("Failed to init grid level info. %s" % e) + return gridlevel_info + + +def free_gridlevel_info(gridlevel_info): + fn = getattr(libdft, "del_gridlevel_info", None) + try: + fn(ctypes.byref(gridlevel_info)) + except Exception as e: + raise RuntimeError("Failed to free grid level info. %s" % e) + + +def init_rs_grid(gridlevel_info, comp): + ''' + Initialize values on real space multigrid + ''' + rs_grid = ctypes.POINTER(RS_Grid)() + fn = getattr(libdft, "init_rs_grid", None) + try: + fn(ctypes.byref(rs_grid), + ctypes.byref(gridlevel_info), + ctypes.c_int(comp)) + except Exception as e: + raise RuntimeError("Failed to initialize real space multigrid data. %s" % e) + return rs_grid + + +def free_rs_grid(rs_grid): + fn = getattr(libdft, "del_rs_grid", None) + try: + fn(ctypes.byref(rs_grid)) + except Exception as e: + raise RuntimeError("Failed to free real space multigrid data. %s" % e) + + +def build_task_list(cell, gridlevel_info, cell1=None, Ls=None, hermi=0, precision=None): + ''' + Build the task list for multigrid DFT calculations. + + Arguments: + cell : :class:`pbc.gto.cell.Cell` + The :class:`Cell` instance for the bra basis functions. + gridlevel_info : :class:`ctypes.POINTER` + The C pointer of the :class:`GridLevel_Info` structure. + cell1 : :class:`pbc.gto.cell.Cell`, optional + The :class:`Cell` instance for the ket basis functions. + If not given, both bra and ket basis functions come from cell. + Ls : (*,3) array, optional + The cartesian coordinates of the periodic images. + Default is calculated by :func:`cell.get_lattice_Ls`. + hermi : int, optional + If :math:`hermi=1`, the task list is built only for + the upper triangle of the matrix. Default is 0. + precision : float, optional + The integral precision. Default is :attr:`cell.precision`. + + Returns: :class:`ctypes.POINTER` + The C pointer of the :class:`TaskList` structure. + ''' + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + if cell1 is None: + cell1 = cell + if Ls is None: + Ls = cell.get_lattice_Ls() + if precision is None: + precision = cell.precision + + if hermi == 1 and cell1 is not cell: + logger.warn(cell, + "Set hermi=0 because cell and cell1 are not the same.") + hermi = 0 + + ish_atm = np.asarray(cell._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell._env, order='C', dtype=float) + nish = len(ish_bas) + ish_rcut, ipgf_rcut = cell.rcut_by_shells(precision=precision, + return_pgf_radius=True) + assert nish == len(ish_rcut) + ptr_ipgf_rcut = lib.ndarray_pointer_2d(ipgf_rcut) + + if cell1 is cell: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + jsh_rcut = ish_rcut + jpgf_rcut = ipgf_rcut + ptr_jpgf_rcut = ptr_ipgf_rcut + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=float) + jsh_rcut, jpgf_rcut = cell1.rcut_by_shells(precision=precision, + return_pgf_radius=True) + ptr_jpgf_rcut = lib.ndarray_pointer_2d(jpgf_rcut) + njsh = len(jsh_bas) + assert njsh == len(jsh_rcut) + + nl = build_neighbor_list_for_shlpairs(cell, cell1, Ls=Ls, + ish_rcut=ish_rcut, jsh_rcut=jsh_rcut, + hermi=hermi) + + task_list = ctypes.POINTER(TaskList)() + func = getattr(libdft, "build_task_list", None) + try: + func(ctypes.byref(task_list), + ctypes.byref(nl), ctypes.byref(gridlevel_info), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + ish_rcut.ctypes.data_as(ctypes.c_void_p), + ptr_ipgf_rcut, + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + jsh_rcut.ctypes.data_as(ctypes.c_void_p), + ptr_jpgf_rcut, + ctypes.c_int(nish), ctypes.c_int(njsh), + Ls.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(precision), ctypes.c_int(hermi)) + except Exception as e: + raise RuntimeError("Failed to build task list. %s" % e) + free_neighbor_list(nl) + return task_list + + +def free_task_list(task_list): + ''' + Note: + This will also free task_list.contents.gridlevel_info. + ''' + if task_list is None: + return + func = getattr(libdft, "del_task_list", None) + try: + func(ctypes.byref(task_list)) + except Exception as e: + raise RuntimeError("Failed to free task list. %s" % e) + + +def eval_rho(cell, dm, task_list, shls_slice=None, hermi=0, xctype='LDA', kpts=None, + dimension=None, cell1=None, shls_slice1=None, Ls=None, + a=None, ignore_imag=False): + ''' + Collocate density (opt. gradients) on the real-space grid. + The two sets of Gaussian functions can be different. + + Returns: + rho: RS_Grid object + Densities on real space multigrids. + ''' + cell0 = cell + shls_slice0 = shls_slice + if cell1 is None: + cell1 = cell0 + + #TODO mixture of cartesian and spherical bases + assert cell0.cart == cell1.cart + + ish_atm = np.asarray(cell0._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell0._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell0._env, order='C', dtype=np.double) + ish_env[PTR_EXPDROP] = min(cell0.precision*EXTRA_PREC, EXPDROP) + + if cell1 is cell0: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=np.double) + jsh_env[PTR_EXPDROP] = min(cell1.precision*EXTRA_PREC, EXPDROP) + + if shls_slice0 is None: + shls_slice0 = (0, cell0.nbas) + i0, i1 = shls_slice0 + if shls_slice1 is None: + shls_slice1 = shls_slice0 + j0, j1 = shls_slice1 + + if hermi == 1: + assert cell1 is cell0 + assert i0 == j0 and i1 == j1 + + key0 = 'cart' if cell0.cart else 'sph' + ao_loc0 = moleintor.make_loc(ish_bas, key0) + naoi = ao_loc0[i1] - ao_loc0[i0] + if hermi == 1: + ao_loc1 = ao_loc0 + else: + key1 = 'cart' if cell1.cart else 'sph' + ao_loc1 = moleintor.make_loc(jsh_bas, key1) + naoj = ao_loc1[j1] - ao_loc1[j0] + + dm = np.asarray(dm, order='C') + assert dm.shape[-2:] == (naoi, naoj) + + if dimension is None: + dimension = cell0.dimension + assert dimension == getattr(cell1, "dimension", None) + + if Ls is None and dimension > 0: + Ls = np.asarray(cell0.get_lattice_Ls(), order='C') + elif Ls is None and dimension == 0: + Ls = np.zeros((1,3)) + + if dimension == 0 or kpts is None or gamma_point(kpts): + nkpts, nimgs = 1, Ls.shape[0] + dm = dm.reshape(-1,1,naoi,naoj) + else: + expkL = np.exp(1j*kpts.reshape(-1,3).dot(Ls.T)) + nkpts, nimgs = expkL.shape + dm = dm.reshape(-1,nkpts,naoi,naoj) + n_dm = dm.shape[0] + + #TODO check if cell1 has the same lattice vectors + if a is None: + a = cell0.lattice_vectors() + b = np.linalg.inv(a.T) + + if abs(a-np.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + xctype = xctype.upper() + if xctype == 'LDA': + comp = 1 + elif xctype == 'GGA': + if hermi == 1: + raise RuntimeError('hermi=1 is not supported for GGA functional') + comp = 4 + else: + raise NotImplementedError('meta-GGA') + + eval_fn = 'make_rho_' + xctype.lower() + lattice_type + drv = getattr(libdft, "grid_collocate_drv", None) + + def make_rho_(rs_rho, dm): + try: + drv(getattr(libdft, eval_fn, None), + ctypes.byref(rs_rho), + dm.ctypes.data_as(ctypes.c_void_p), + ctypes.byref(task_list), + ctypes.c_int(comp), ctypes.c_int(hermi), + (ctypes.c_int*4)(i0, i1, j0, j1), + ao_loc0.ctypes.data_as(ctypes.c_void_p), + ao_loc1.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(dimension), + Ls.ctypes.data_as(ctypes.c_void_p), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell0.cart)) + except Exception as e: + raise RuntimeError("Failed to compute rho. %s" % e) + return rs_rho + + gridlevel_info = task_list.contents.gridlevel_info + rho = [] + for i, dm_i in enumerate(dm): + rs_rho = init_rs_grid(gridlevel_info, comp) + if dimension == 0 or kpts is None or gamma_point(kpts): + make_rho_(rs_rho, dm_i) + else: + raise NotImplementedError + rho.append(rs_rho) + + if n_dm == 1: + rho = rho[0] + return rho + + +def _eval_rhoG(mydf, dm_kpts, hermi=1, kpts=np.zeros((1,3)), deriv=0, + rhog_high_order=RHOG_HIGH_ORDER): + assert(deriv < 2) + cell = mydf.cell + + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + gga_high_order = False + if deriv == 0: + xctype = 'LDA' + rhodim = 1 + elif deriv == 1: + if rhog_high_order: + xctype = 'GGA' + rhodim = 4 + else: # approximate high order derivatives in reciprocal space + gga_high_order = True + xctype = 'LDA' + rhodim = 1 + deriv = 0 + assert(hermi == 1 or gamma_point(kpts)) + elif deriv == 2: # meta-GGA + raise NotImplementedError + assert(hermi == 1 or gamma_point(kpts)) + + ignore_imag = (hermi == 1) + + rs_rho = eval_rho(cell, dms, task_list, hermi=hermi, xctype=xctype, kpts=kpts, + ignore_imag=ignore_imag) + + nx, ny, nz = mydf.mesh + rhoG = np.zeros((nset*rhodim,nx,ny,nz), dtype=np.complex128) + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + if nset > 1: + rho = [] + for i in range(nset): + rho.append(np.ctypeslib.as_array(rs_rho[i].contents.data[ilevel], shape=(ngrids,))) + rho = np.asarray(rho) + else: + rho = np.ctypeslib.as_array(rs_rho.contents.data[ilevel], shape=(ngrids,)) + + weight = 1./nkpts * cell.vol/ngrids + rho_freq = tools.fft(rho.reshape(nset*rhodim, -1), mesh) + rho = None + rho_freq *= weight + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + _takebak_4d(rhoG, rho_freq.reshape((-1,) + tuple(mesh)), (None, gx, gy, gz)) + rho_freq = None + + if nset > 1: + for i in range(nset): + free_rs_grid(rs_rho[i]) + else: + free_rs_grid(rs_rho) + rs_rho = None + + rhoG = rhoG.reshape(nset,rhodim,-1) + if gga_high_order: + Gv = cell.get_Gv(mydf.mesh) + #:rhoG1 = np.einsum('np,px->nxp', 1j*rhoG[:,0], Gv) + rhoG1 = gradient_gs(rhoG[:,0], Gv) + rhoG = np.concatenate([rhoG, rhoG1], axis=1) + Gv = rhoG1 = None + return rhoG + + +def eval_mat(cell, weights, task_list, shls_slice=None, comp=1, hermi=0, deriv=0, + xctype='LDA', kpts=None, grid_level=None, dimension=None, mesh=None, + cell1=None, shls_slice1=None, Ls=None, a=None): + + cell0 = cell + shls_slice0 = shls_slice + if cell1 is None: + cell1 = cell0 + + if mesh is None: + mesh = cell0.mesh + + #TODO mixture of cartesian and spherical bases + assert cell0.cart == cell1.cart + + ish_atm = np.asarray(cell0._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell0._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell0._env, order='C', dtype=np.double) + ish_env[PTR_EXPDROP] = min(cell0.precision*EXTRA_PREC, EXPDROP) + + if cell1 is cell0: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=np.double) + jsh_env[PTR_EXPDROP] = min(cell1.precision*EXTRA_PREC, EXPDROP) + + if shls_slice0 is None: + shls_slice0 = (0, cell0.nbas) + i0, i1 = shls_slice0 + if shls_slice1 is None: + shls_slice1 = (0, cell1.nbas) + j0, j1 = shls_slice1 + + if hermi == 1: + assert cell1 is cell0 + assert i0 == j0 and i1 == j1 + + key0 = 'cart' if cell0.cart else 'sph' + ao_loc0 = moleintor.make_loc(ish_bas, key0) + naoi = ao_loc0[i1] - ao_loc0[i0] + if hermi == 1: + ao_loc1 = ao_loc0 + else: + key1 = 'cart' if cell1.cart else 'sph' + ao_loc1 = moleintor.make_loc(jsh_bas, key1) + naoj = ao_loc1[j1] - ao_loc1[j0] + + if dimension is None: + dimension = cell0.dimension + assert dimension == getattr(cell1, "dimension", None) + + if Ls is None and dimension > 0: + Ls = np.asarray(cell0.get_lattice_Ls(), order='C') + elif Ls is None and dimension == 0: + Ls = np.zeros((1,3)) + + if dimension == 0 or kpts is None or gamma_point(kpts): + nkpts, nimgs = 1, Ls.shape[0] + else: + expkL = np.exp(1j*kpts.reshape(-1,3).dot(Ls.T)) + nkpts, nimgs = expkL.shape + + #TODO check if cell1 has the same lattice vectors + if a is None: + a = cell0.lattice_vectors() + b = np.linalg.inv(a.T) + + if abs(a-np.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + + weights = np.asarray(weights, order='C') + assert(weights.dtype == np.double) + xctype = xctype.upper() + n_mat = None + if xctype == 'LDA': + if weights.ndim == 1: + weights = weights.reshape(-1, np.prod(mesh)) + else: + n_mat = weights.shape[0] + elif xctype == 'GGA': + if weights.ndim == 2: + weights = weights.reshape(-1, 4, np.prod(mesh)) + else: + n_mat = weights.shape[0] + else: + raise NotImplementedError + + eval_fn = 'eval_mat_' + xctype.lower() + lattice_type + if deriv > 0: + if deriv == 1: + assert comp == 3 + assert hermi == 0 + eval_fn += '_ip1' + else: + raise NotImplementedError + drv = getattr(libdft, "grid_integrate_drv", None) + + def make_mat(wv): + if comp == 1: + mat = np.zeros((naoi, naoj)) + else: + mat = np.zeros((comp, naoi, naoj)) + + try: + drv(getattr(libdft, eval_fn, None), + mat.ctypes.data_as(ctypes.c_void_p), + wv.ctypes.data_as(ctypes.c_void_p), + ctypes.byref(task_list), + ctypes.c_int(comp), ctypes.c_int(hermi), + ctypes.c_int(grid_level), + (ctypes.c_int*4)(i0, i1, j0, j1), + ao_loc0.ctypes.data_as(ctypes.c_void_p), + ao_loc1.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(dimension), + Ls.ctypes.data_as(ctypes.c_void_p), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell0.cart)) + except Exception as e: + raise RuntimeError("Failed to compute rho. %s" % e) + return mat + + out = [] + for wv in weights: + if dimension == 0 or kpts is None or gamma_point(kpts): + mat = make_mat(wv) + else: + raise NotImplementedError + out.append(mat) + + if n_mat is None: + out = out[0] + return out + + +def _get_j_pass2(mydf, vG, kpts=np.zeros((1,3)), hermi=1, verbose=None): + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + at_gamma_point = gamma_point(kpts) + if at_gamma_point: + vj_kpts = np.zeros((nset,nkpts,nao,nao)) + else: + vj_kpts = np.zeros((nset,nkpts,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_4d(vG, (None, gx, gy, gz)).reshape(nset,ngrids) + + v_rs = tools.ifft(sub_vG, mesh).reshape(nset,ngrids) + vR = np.asarray(v_rs.real, order='C') + vI = np.asarray(v_rs.imag, order='C') + if at_gamma_point: + v_rs = vR + + mat = eval_mat(cell, vR, task_list, comp=1, hermi=hermi, + xctype='LDA', kpts=kpts, grid_level=ilevel, mesh=mesh) + vj_kpts += np.asarray(mat).reshape(nset,-1,nao,nao) + if not at_gamma_point and abs(vI).max() > IMAG_TOL: + raise NotImplementedError + + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +def _get_j_pass2_ip1(mydf, vG, kpts=np.zeros((1,3)), hermi=0, deriv=1, verbose=None): + if deriv == 1: + comp = 3 + assert hermi == 0 + else: + raise NotImplementedError + + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + at_gamma_point = gamma_point(kpts) + if at_gamma_point: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao)) + else: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_4d(vG, (None, gx, gy, gz)).reshape(nset,ngrids) + + v_rs = tools.ifft(sub_vG, mesh).reshape(nset,ngrids) + if at_gamma_point: + vR = np.asarray(v_rs.real, order='C', dtype=float) + #vI = None + else: + raise NotImplementedError + + mat = eval_mat(cell, vR, task_list, comp=comp, hermi=hermi, deriv=deriv, + xctype='LDA', kpts=kpts, grid_level=ilevel, mesh=mesh) + mat = np.asarray(mat).reshape(nset,-1,comp,nao,nao) + vj_kpts = np.add(vj_kpts, mat, out=vj_kpts) + + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +def _get_gga_pass2(mydf, vG, kpts=np.zeros((1,3)), hermi=1, verbose=None): + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,4,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + if gamma_point(kpts): + veff = np.zeros((nset,nkpts,nao,nao)) + else: + veff = np.zeros((nset,nkpts,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_5d(vG, (None, None, gx, gy, gz)).reshape(-1,ngrids) + wv = tools.ifft(sub_vG, mesh).real.reshape(nset,4,ngrids) + wv = np.asarray(wv, order='C') + + mat = eval_mat(cell, wv, task_list, comp=1, hermi=hermi, + xctype='GGA', kpts=kpts, grid_level=ilevel, mesh=mesh) + mat = np.asarray(mat).reshape(nset,-1,nao,nao) + veff = np.add(veff, mat, out=veff) + if not gamma_point(kpts): + raise NotImplementedError + + if nset == 1: + veff = veff[0] + return veff + + +def _get_gga_pass2_ip1(mydf, vG, kpts=np.zeros((1,3)), hermi=0, deriv=1, verbose=None): + if deriv == 1: + comp = 3 + assert hermi == 0 + else: + raise NotImplementedError + + cell = mydf.cell + nkpts = len(kpts) + nao = cell.nao_nr() + nx, ny, nz = mydf.mesh + vG = vG.reshape(-1,4,nx,ny,nz) + nset = vG.shape[0] + + task_list = _update_task_list(mydf, hermi=hermi, ngrids=mydf.ngrids, + ke_ratio=mydf.ke_ratio, rel_cutoff=mydf.rel_cutoff) + + at_gamma_point = gamma_point(kpts) + if at_gamma_point: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao)) + else: + vj_kpts = np.zeros((nset,nkpts,comp,nao,nao), dtype=np.complex128) + + nlevels = task_list.contents.nlevels + meshes = task_list.contents.gridlevel_info.contents.mesh + meshes = np.ctypeslib.as_array(meshes, shape=(nlevels,3)) + for ilevel in range(nlevels): + mesh = meshes[ilevel] + ngrids = np.prod(mesh) + + gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) + gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) + gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) + sub_vG = _take_5d(vG, (None, None, gx, gy, gz)).reshape(-1,ngrids) + + v_rs = tools.ifft(sub_vG, mesh).reshape(nset,4,ngrids) + vR = np.asarray(v_rs.real, order='C') + vI = np.asarray(v_rs.imag, order='C') + if at_gamma_point: + v_rs = vR + + mat = eval_mat(cell, vR, task_list, comp=comp, hermi=hermi, deriv=deriv, + xctype='GGA', kpts=kpts, grid_level=ilevel, mesh=mesh) + vj_kpts += np.asarray(mat).reshape(nset,-1,comp,nao,nao) + if not at_gamma_point and abs(vI).max() > IMAG_TOL: + raise NotImplementedError + + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +def _rks_gga_wv0(rho, vxc, weight): + vrho, vgamma = vxc[:2] + ngrid = vrho.size + wv = np.empty((4,ngrid)) + wv[0] = np.multiply(weight, vrho, out=wv[0]) + for i in range(1, 4): + wv[i] = np.multiply(weight * 2, np.multiply(vgamma, rho[i], out=wv[i]), out=wv[i]) + return wv + + +def _uks_gga_wv0(rho, vxc, weight): + rhoa, rhob = rho + vrho, vsigma = vxc[:2] + ngrids = vrho.shape[0] + wv = np.empty((2, 4, ngrids)) + wv[0,0] = np.multiply(weight, vrho[:,0], out=wv[0,0]) + for i in range(1,4): + wv[0,i] = np.multiply(2., np.multiply(rhoa[i], vsigma[:,0], out=wv[0,i]), out=wv[0,i]) + wv[0,i] = np.add(wv[0,i], np.multiply(rhob[i], vsigma[:,1]), out=wv[0,i]) + wv[0,i] = np.multiply(weight, wv[0,i], out=wv[0,i]) + wv[1,0] = np.multiply(weight, vrho[:,1], out=wv[1,0]) + for i in range(1,4): + wv[1,i] = np.multiply(2., np.multiply(rhob[i], vsigma[:,2], out=wv[1,i]), out=wv[1,i]) + wv[1,i] = np.add(wv[1,i], np.multiply(rhoa[i], vsigma[:,1]), out=wv[1,i]) + wv[1,i] = np.multiply(weight, wv[1,i], out=wv[1,i]) + return wv + + +def _rks_gga_wv0_pw(cell, rho, vxc, weight, mesh): + vrho, vgamma = vxc[:2] + ngrid = vrho.size + buf = np.empty((3,ngrid)) + for i in range(1, 4): + buf[i-1] = np.multiply(vgamma, rho[i], out=buf[i-1]) + + vrho_freq = tools.fft(vrho, mesh).reshape((1,ngrid)) + buf_freq = tools.fft(buf, mesh).reshape((3,ngrid)) + Gv = cell.get_Gv(mesh) + #out = vrho_freq - 2j * np.einsum('px,xp->p', Gv, buf_freq) + #out *= weight + + out = np.empty((ngrid,), order="C", dtype=np.complex128) + func = getattr(libdft, 'get_gga_vrho_gs', None) + func(out.ctypes.data_as(ctypes.c_void_p), + vrho_freq.ctypes.data_as(ctypes.c_void_p), + buf_freq.ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(weight), ctypes.c_int(ngrid)) + return out + + +def _uks_gga_wv0_pw(cell, rho, vxc, weight, mesh): + rhoa, rhob = rho + vrho, vgamma = vxc[:2] + ngrid = vrho.shape[0] + buf = np.empty((2,3,ngrid)) + for i in range(1, 4): + buf[0,i-1] = np.multiply(vgamma[:,0], rhoa[i], out=buf[0,i-1]) + tmp = np.multiply(vgamma[:,1], rhob[i]) + tmp = np.multiply(.5, tmp, out=tmp) + buf[0,i-1] = np.add(buf[0,i-1], tmp, out=buf[0,i-1]) + + buf[1,i-1] = np.multiply(vgamma[:,2], rhob[i], out=buf[1,i-1]) + tmp = np.multiply(vgamma[:,1], rhoa[i]) + tmp = np.multiply(.5, tmp, out=tmp) + buf[1,i-1] = np.add(buf[1,i-1], tmp, out=buf[1,i-1]) + + + vrho_freq = tools.fft(vrho.T, mesh).reshape((2,ngrid)) + buf_freq = tools.fft(buf.reshape(-1,ngrid), mesh).reshape((2,3,ngrid)) + Gv = cell.get_Gv(mesh) + #out = vrho_freq - 2j * np.einsum('px,xp->p', Gv, buf_freq) + #out *= weight + + out = np.empty((2,ngrid), order="C", dtype=np.complex128) + func = getattr(libdft, 'get_gga_vrho_gs') + for s in range(2): + func(out[s].ctypes.data_as(ctypes.c_void_p), + vrho_freq[s].ctypes.data_as(ctypes.c_void_p), + buf_freq[s].ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(weight), ctypes.c_int(ngrid)) + return out + + +def nr_rks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + ''' + Same as multigrid.nr_rks, but considers Hermitian symmetry also for GGA + ''' + if kpts is None: kpts = mydf.kpts + log = logger.new_logger(mydf, verbose) + cell = mydf.cell + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + + ni = mydf._numint + xctype = ni._xc_type(xc_code) + if xctype == 'LDA': + deriv = 0 + elif xctype == 'GGA': + deriv = 1 + rhoG = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv) + + mesh = mydf.mesh + ngrids = np.prod(mesh) + + coulG = tools.get_coulG(cell, mesh=mesh) + #vG = np.einsum('ng,g->ng', rhoG[:,0], coulG) + vG = np.empty_like(rhoG[:,0], dtype=np.result_type(rhoG[:,0], coulG)) + for i, rhoG_i in enumerate(rhoG[:,0]): + vG[i] = np.multiply(rhoG_i, coulG, out=vG[i]) + coulG = None + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] += mydf.vpplocG_part1 * 2 + vG[i] = np.add(vG[i], np.multiply(2., mydf.vpplocG_part1), out=vG[i]) + + #ecoul = .5 * np.einsum('ng,ng->n', rhoG[:,0].real, vG.real) + #ecoul+= .5 * np.einsum('ng,ng->n', rhoG[:,0].imag, vG.imag) + ecoul = np.zeros((rhoG.shape[0],)) + for i in range(rhoG.shape[0]): + ecoul[i] = .5 * np.vdot(rhoG[i,0], vG[i]).real + + ecoul /= cell.vol + log.debug('Multigrid Coulomb energy %s', ecoul) + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] -= mydf.vpplocG_part1 + vG[i] = np.subtract(vG[i], mydf.vpplocG_part1, out=vG[i]) + + weight = cell.vol / ngrids + # *(1./weight) because rhoR is scaled by weight in _eval_rhoG. When + # computing rhoR with IFFT, the weight factor is not needed. + rhoR = tools.ifft(rhoG.reshape(-1,ngrids), mesh).real * (1./weight) + rhoR = rhoR.reshape(nset,-1,ngrids) + wv_freq = [] + nelec = np.zeros(nset) + excsum = np.zeros(nset) + for i in range(nset): + exc, vxc = ni.eval_xc(xc_code, rhoR[i], spin=0, deriv=1)[:2] + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0]) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_rks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh).reshape(1,ngrids)) + else: + wv = _rks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + else: + raise NotImplementedError + + nelec[i] += np.sum(rhoR[i,0]) * weight + excsum[i] += np.sum(np.multiply(rhoR[i,0], exc)) * weight + exc = vxc = None + + rhoR = rhoG = None + + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,-1,*mesh) + + if nset == 1: + ecoul = ecoul[0] + nelec = nelec[0] + excsum = excsum[0] + log.debug('Multigrid exc %s nelec %s', excsum, nelec) + + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + if xctype == 'LDA': + if with_j: + wv_freq[:,0] += vG.reshape(nset,*mesh) + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + elif xctype == 'GGA': + if with_j: + #wv_freq[:,0] += vG.reshape(nset,*mesh) + wv_freq[:,0] = np.add(wv_freq[:,0], vG.reshape(nset,*mesh), out=wv_freq[:,0]) + if GGA_METHOD.upper() == 'FFT': + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + else: + veff = _get_gga_pass2(mydf, wv_freq, kpts_band, hermi=hermi, verbose=log) + wv_freq = None + veff = _format_jks(veff, dm_kpts, input_band, kpts) + + if return_j: + vj = _get_j_pass2(mydf, vG, kpts_band, verbose=log) + vj = _format_jks(veff, dm_kpts, input_band, kpts) + else: + vj = None + vG = None + + veff = lib.tag_array(veff, ecoul=ecoul, exc=excsum, vj=vj, vk=None) + return nelec, excsum, veff + +def nr_uks(mydf, xc_code, dm_kpts, hermi=1, kpts=None, + kpts_band=None, with_j=False, return_j=False, verbose=None): + if kpts is None: kpts = mydf.kpts + log = logger.new_logger(mydf, verbose) + cell = mydf.cell + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + nset //= 2 + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + + mesh = mydf.mesh + ngrids = np.prod(mesh) + ni = mydf._numint + xctype = ni._xc_type(xc_code) + if xctype == 'LDA': + deriv = 0 + elif xctype == 'GGA': + deriv = 1 + + rhoG = _eval_rhoG(mydf, dm_kpts, hermi, kpts, deriv) + rhoG = rhoG.reshape(nset,2,-1,ngrids) + + coulG = tools.get_coulG(cell, mesh=mesh) + #vG = np.einsum('nsg,g->ng', rhoG[:,:,0], coulG) + vG = np.empty((nset,ngrids), dtype=np.result_type(rhoG[:,:,0], coulG)) + for i, rhoG_i in enumerate(rhoG[:,:,0]): + vG[i] = np.multiply(np.add(rhoG_i[0], rhoG_i[1]), coulG, out=vG[i]) + coulG = None + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] += mydf.vpplocG_part1 * 2 + vG[i] = np.add(vG[i], np.multiply(2., mydf.vpplocG_part1), out=vG[i]) + + ecoul = np.zeros(nset) + for i in range(nset): + ecoul[i] = .5 * np.vdot(np.add(rhoG[i,0,0], rhoG[i,1,0]), vG[i]).real + + ecoul /= cell.vol + log.debug('Multigrid Coulomb energy %s', ecoul) + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + #vG[i] -= mydf.vpplocG_part1 + vG[i] = np.subtract(vG[i], mydf.vpplocG_part1, out=vG[i]) + + weight = cell.vol / ngrids + # *(1./weight) because rhoR is scaled by weight in _eval_rhoG. When + # computing rhoR with IFFT, the weight factor is not needed. + rhoR = tools.ifft(rhoG.reshape(-1,ngrids), mesh).real * (1./weight) + rhoR = rhoR.reshape(nset,2,-1,ngrids) + wv_freq = [] + nelec = np.zeros(nset) + excsum = np.zeros(nset) + for i in range(nset): + exc, vxc = ni.eval_xc(xc_code, rhoR[i], spin=1, deriv=1)[:2] + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0].T) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_uks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh)) + else: + wv = _uks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv.reshape(-1,*mesh), mesh)) + wv = None + else: + raise NotImplementedError + + nelec[i] += np.sum(rhoR[i,:,0]).sum() * weight + excsum[i] += np.sum(np.multiply(np.add(rhoR[i,0,0],rhoR[i,1,0]), exc)) * weight + exc = vxc = None + + rhoR = rhoG = None + + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,2,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,2,-1,*mesh) + + if nset == 1: + ecoul = ecoul[0] + nelec = nelec[0] + excsum = excsum[0] + log.debug('Multigrid exc %s nelec %s', excsum, nelec) + + kpts_band, input_band = _format_kpts_band(kpts_band, kpts), kpts_band + if xctype == 'LDA': + if with_j: + for s in range(2): + wv_freq[:,s,0] += vG.reshape(nset,*mesh) + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + elif xctype == 'GGA': + if with_j: + #wv_freq[:,:,0] += vG.reshape(nset,*mesh) + for s in range(2): + wv_freq[:,s,0] = np.add(wv_freq[:,s,0], vG.reshape(nset,*mesh), out=wv_freq[:,s,0]) + if GGA_METHOD.upper() == 'FFT': + veff = _get_j_pass2(mydf, wv_freq, kpts_band, verbose=log) + else: + veff = _get_gga_pass2(mydf, wv_freq, kpts_band, hermi=hermi, verbose=log) + wv_freq = None + veff = _format_jks(veff, dm_kpts, input_band, kpts) + + if return_j: + vj = _get_j_pass2(mydf, vG, kpts_band, verbose=log) + vj = _format_jks(veff, dm_kpts, input_band, kpts) + else: + vj = None + vG = None + + veff = lib.tag_array(veff, ecoul=ecoul, exc=excsum, vj=vj, vk=None) + return nelec, excsum, veff + +def get_veff_ip1(mydf, dm_kpts, xc_code=None, kpts=np.zeros((1,3)), kpts_band=None, spin=0): + cell = mydf.cell + dm_kpts = np.asarray(dm_kpts, order='C') + dms = _format_dms(dm_kpts, kpts) + nset, nkpts, nao = dms.shape[:3] + kpts_band = _format_kpts_band(kpts_band, kpts) + if spin == 1: + nset //= 2 + + mesh = mydf.mesh + ngrids = np.prod(mesh) + ni = mydf._numint + xctype = ni._xc_type(xc_code) + if xctype == 'LDA': + deriv = 0 + elif xctype == 'GGA': + deriv = 1 + rhoG = _eval_rhoG(mydf, dm_kpts, hermi=1, kpts=kpts_band, deriv=deriv) + if spin == 1: + rhoG = rhoG.reshape(nset,2,-1,ngrids) + # cache rhoG for core density gradients + mydf.rhoG = rhoG + + coulG = tools.get_coulG(cell, mesh=mesh) + vG = np.empty((nset,ngrids), dtype=np.result_type(rhoG, coulG)) + for i in range(nset): + if spin == 0: + vG[i] = np.multiply(rhoG[i,0], coulG, out=vG[i]) + elif spin == 1: + tmp = np.add(rhoG[i,0,0], rhoG[i,1,0]) + vG[i] = np.multiply(tmp, coulG, out=vG[i]) + + if mydf.vpplocG_part1 is not None: + for i in range(nset): + vG[i] = np.add(vG[i], mydf.vpplocG_part1, out=vG[i]) + + weight = cell.vol / ngrids + + # *(1./weight) because rhoR is scaled by weight in _eval_rhoG. When + # computing rhoR with IFFT, the weight factor is not needed. + rhoR = tools.ifft(rhoG.reshape(-1,ngrids), mesh).real * (1./weight) + if spin == 0: + rhoR = rhoR.reshape(nset,-1,ngrids) + elif spin == 1: + rhoR = rhoR.reshape(nset,2,-1,ngrids) + + wv_freq = [] + for i in range(nset): + exc, vxc = ni.eval_xc(xc_code, rhoR[i], spin=spin, deriv=1)[:2] + if spin == 0: + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0]) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_rks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh).reshape(1,ngrids)) + else: + wv = _rks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv, mesh)) + else: + raise NotImplementedError + elif spin == 1: + if xctype == 'LDA': + wv = np.multiply(weight, vxc[0].T) + wv_freq.append(tools.fft(wv, mesh)) + wv = None + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + wv_freq.append(_uks_gga_wv0_pw(cell, rhoR[i], vxc, weight, mesh)) + else: + wv = _uks_gga_wv0(rhoR[i], vxc, weight) + wv_freq.append(tools.fft(wv.reshape(-1,*mesh), mesh)) + wv = None + else: + raise NotImplementedError + + rhoR = rhoG = None + if spin == 0: + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,-1,*mesh) + elif spin == 1: + if len(wv_freq) == 1: + wv_freq = wv_freq[0].reshape(nset,2,-1,*mesh) + else: + wv_freq = np.asarray(wv_freq).reshape(nset,2,-1,*mesh) + + for i in range(nset): + if spin == 0: + wv_freq[i,0] = np.add(wv_freq[i,0], vG[i].reshape(*mesh), out=wv_freq[i,0]) + elif spin == 1: + for s in range(2): + wv_freq[i,s,0] = np.add(wv_freq[i,s,0], vG[i].reshape(*mesh), out=wv_freq[i,s,0]) + + if xctype == 'LDA': + vj_kpts = _get_j_pass2_ip1(mydf, wv_freq, kpts_band, hermi=0, deriv=1) + elif xctype == 'GGA': + if GGA_METHOD.upper() == 'FFT': + vj_kpts = _get_j_pass2_ip1(mydf, wv_freq, kpts_band, hermi=0, deriv=1) + else: + vj_kpts = _get_gga_pass2_ip1(mydf, wv_freq, kpts_band, hermi=0, deriv=1) + else: + raise NotImplementedError + + comp = 3 + nao = cell.nao + if spin == 0: + vj_kpts = vj_kpts.reshape(nset,nkpts,comp,nao,nao) + elif spin == 1: + vj_kpts = vj_kpts.reshape(nset,2,nkpts,comp,nao,nao) + vj_kpts = np.moveaxis(vj_kpts, -3, -4) + + if nkpts == 1: + vj_kpts = vj_kpts[...,0,:,:] + if nset == 1: + vj_kpts = vj_kpts[0] + return vj_kpts + + +class MultiGridFFTDF2(MultiGridFFTDF): + ''' + Base class for multigrid DFT (version 2). + + Attributes: + task_list : TaskList instance + Task list recording which primitive basis function pairs + need to be considered. + vpplocG_part1 : arrary + Short-range part of the local pseudopotential represented + in the reciprocal space. It is cached to reduce cost. + rhoG : array + Electronic density represented in the reciprocal space. + It is cached in nuclear gradient calculations to reduce cost. + ''' + ngrids = getattr(__config__, 'pbc_dft_multigrid_ngrids', 4) + ke_ratio = getattr(__config__, 'pbc_dft_multigrid_ke_ratio', 3.0) + rel_cutoff = getattr(__config__, 'pbc_dft_multigrid_rel_cutoff', 20.0) + _keys = {'ngrids', 'ke_ratio', 'rel_cutoff', + 'task_list', 'vpplocG_part1', 'rhoG'} + + def __init__(self, cell, kpts=np.zeros((1,3))): + fft.FFTDF.__init__(self, cell, kpts) + self.task_list = None + self.vpplocG_part1 = None + self.rhoG = None + if not gamma_point(kpts): + raise NotImplementedError('MultiGridFFTDF2 only supports Gamma-point calculations.') + a = cell.lattice_vectors() + if abs(a-np.diag(a.diagonal())).max() > 1e-12: + raise NotImplementedError('MultiGridFFTDF2 only supports orthorhombic lattices.') + + def reset(self, cell=None): + self.vpplocG_part1 = None + self.rhoG = None + if self.task_list is not None: + free_task_list(self.task_list) + self.task_list = None + fft.FFTDF.reset(self, cell=cell) + + def __del__(self): + self.reset() + + def get_veff_ip1(self, dm, xc_code=None, kpts=None, kpts_band=None, spin=0): + if kpts is None: + if self.kpts is None: + kpts = np.zeros(1,3) + else: + kpts = self.kpts + kpts = kpts.reshape(-1,3) + vj = get_veff_ip1(self, dm, xc_code=xc_code, + kpts=kpts, kpts_band=kpts_band, spin=spin) + return vj + + def get_pp(self, kpts=None): + '''Compute the GTH pseudopotential matrix, which includes + the second part of the local potential and the non-local potential. + The first part of the local potential is cached as `vpplocG_part1`, + which is the reciprocal space representation, to be added to the electron + density for computing the Coulomb matrix. + In order to get the full PP matrix, the potential due to `vpplocG_part1` + needs to be added. + ''' + self.vpplocG_part1 = _get_vpplocG_part1(self, with_rho_core=True) + return _get_pp_without_erf(self, kpts) + + vpploc_part1_nuc_grad = vpploc_part1_nuc_grad diff --git a/pyscf/pbc/dft/multigrid/pp.py b/pyscf/pbc/dft/multigrid/pp.py new file mode 100644 index 0000000000..13c0813dac --- /dev/null +++ b/pyscf/pbc/dft/multigrid/pp.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy +from pyscf import __config__ +from pyscf import lib, gto +from pyscf.lib import logger +from pyscf.pbc import tools +from pyscf.pbc.gto import pseudo +from pyscf.pbc.gto.pseudo import pp_int +from pyscf.pbc.lib.kpts_helper import gamma_point + +PP_WITH_RHO_CORE = getattr(__config__, 'pbc_dft_multigrid_pp_with_rho_core', True) + +libpbc = lib.load_library('libpbc') +libdft = lib.load_library('libdft') + +def make_rho_core(cell, mesh=None, precision=None, atm_id=None): + if mesh is None: + mesh = cell.mesh + fakecell, max_radius = fake_cell_vloc_part1(cell, atm_id=atm_id, precision=precision) + atm = fakecell._atm + bas = fakecell._bas + env = fakecell._env + + a = numpy.asarray(cell.lattice_vectors(), order='C', dtype=float) + if abs(a - numpy.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + raise NotImplementedError + eval_fn = 'make_rho_lda' + lattice_type + + b = numpy.asarray(numpy.linalg.inv(a.T), order='C', dtype=float) + mesh = numpy.asarray(mesh, order='C', dtype=numpy.int32) + rho_core = numpy.zeros((numpy.prod(mesh),), order='C', dtype=float) + drv = getattr(libdft, 'build_core_density', None) + try: + drv(getattr(libdft, eval_fn), + rho_core.ctypes.data_as(ctypes.c_void_p), + atm.ctypes.data_as(ctypes.c_void_p), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(bas)), + env.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.dimension), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), ctypes.c_double(max_radius)) + except Exception as e: + raise RuntimeError("Failed to compute rho_core. %s" % e) + return rho_core + + +def _get_pp_without_erf(mydf, kpts=None): + '''Get the periodic pseudotential nuc-el AO matrix, with G=0 removed. + ''' + cell = mydf.cell + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + vpp = pp_int.get_pp_loc_part2(cell, kpts_lst) + vppnl = pp_int.get_pp_nl(cell, kpts_lst) + + for k, kpt in enumerate(kpts_lst): + if gamma_point(kpt): + vpp[k] = vpp[k].real + vppnl[k].real + else: + vpp[k] += vppnl[k] + vppnl = None + + if kpts is None or numpy.shape(kpts) == (3,): + vpp = vpp[0] + return numpy.asarray(vpp) + + +def get_pp_loc_part1_gs(cell, Gv): + coulG = tools.get_coulG(cell, Gv=Gv) + G2 = numpy.einsum('ix,ix->i', Gv, Gv) + G0idx = numpy.where(G2==0)[0] + ngrid = len(G2) + Gv = numpy.asarray(Gv, order='C', dtype=numpy.double) + coulG = numpy.asarray(coulG, order='C', dtype=numpy.double) + G2 = numpy.asarray(G2, order='C', dtype=numpy.double) + + coords = cell.atom_coords() + coords = numpy.asarray(coords, order='C', dtype=numpy.double) + Z = numpy.empty([cell.natm,], order='C', dtype=numpy.double) + rloc = numpy.empty([cell.natm,], order='C', dtype=numpy.double) + for ia in range(cell.natm): + Z[ia] = cell.atom_charge(ia) + symb = cell.atom_symbol(ia) + if symb in cell._pseudo: + rloc[ia] = cell._pseudo[symb][1] + else: + rloc[ia] = -999 + + out = numpy.empty((ngrid,), order='C', dtype=numpy.complex128) + fn = getattr(libpbc, "pp_loc_part1_gs", None) + try: + fn(out.ctypes.data_as(ctypes.c_void_p), + coulG.ctypes.data_as(ctypes.c_void_p), + Gv.ctypes.data_as(ctypes.c_void_p), + G2.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(G0idx), ctypes.c_int(ngrid), + Z.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + rloc.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.natm)) + except Exception as e: + raise RuntimeError("Failed to get vlocG part1. %s" % e) + return out + + +def _get_vpplocG_part1(mydf, with_rho_core=PP_WITH_RHO_CORE): + cell = mydf.cell + mesh = mydf.mesh + + if not with_rho_core: + # compute rho_core directly in G-space + # this is much slower that the following + Gv = cell.get_Gv(mesh) + vpplocG_part1 = get_pp_loc_part1_gs(cell, Gv) + else: + # compute rho_core in real space then transform to G-space + weight = cell.vol / numpy.prod(mesh) + rho_core = make_rho_core(cell) + rhoG_core = weight * tools.fft(rho_core, mesh) + rho_core = None + coulG = tools.get_coulG(cell, mesh=mesh) + vpplocG_part1 = rhoG_core * coulG + rhoG_core = coulG = None + # G = 0 contribution + chargs = cell.atom_charges() + rloc = [] + for ia in range(cell.natm): + symb = cell.atom_symbol(ia) + rloc.append(cell._pseudo[symb][1]) + rloc = numpy.asarray(rloc) + vpplocG_part1[0] += 2. * numpy.pi * numpy.sum(rloc * rloc * chargs) + return vpplocG_part1 + + +def get_vpploc_part1_ip1(mydf, kpts=numpy.zeros((1,3))): + from .multigrid_pair import _get_j_pass2_ip1 + if mydf.pp_with_erf: + return 0 + + mesh = mydf.mesh + vG = mydf.vpplocG_part1 + vG.reshape(-1,*mesh) + + vpp_kpts = _get_j_pass2_ip1(mydf, vG, kpts, hermi=0, deriv=1) + if gamma_point(kpts): + vpp_kpts = vpp_kpts.real + if len(kpts) == 1: + vpp_kpts = vpp_kpts[0] + return vpp_kpts + + +def vpploc_part1_nuc_grad(mydf, dm, kpts=numpy.zeros((1,3)), atm_id=None, precision=None): + from .multigrid_pair import _eval_rhoG + t0 = (logger.process_clock(), logger.perf_counter()) + cell = mydf.cell + fakecell, max_radius = fake_cell_vloc_part1(cell, atm_id=atm_id, precision=precision) + atm = fakecell._atm + bas = fakecell._bas + env = fakecell._env + + a = numpy.asarray(cell.lattice_vectors(), order='C', dtype=float) + if abs(a - numpy.diag(a.diagonal())).max() < 1e-12: + lattice_type = '_orth' + else: + lattice_type = '_nonorth' + raise NotImplementedError + eval_fn = 'eval_mat_lda' + lattice_type + '_ip1' + + b = numpy.asarray(numpy.linalg.inv(a.T), order='C', dtype=float) + mesh = numpy.asarray(mydf.mesh, order='C', dtype=numpy.int32) + ngrids = numpy.prod(mesh) + comp = 3 + grad = numpy.zeros((len(atm),comp), order="C", dtype=float) + drv = getattr(libdft, 'int_gauss_charge_v_rs', None) + + if mydf.rhoG is None: + rhoG = _eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=0) + else: + rhoG = mydf.rhoG + rhoG = rhoG[...,0,:] + rhoG = rhoG.reshape(-1,ngrids) + if rhoG.shape[0] == 2: #unrestricted + rhoG = rhoG[0] + rhoG[1] + else: + assert rhoG.shape[0] == 1 + rhoG = rhoG[0] + + coulG = tools.get_coulG(cell, mesh=mesh) + vG = numpy.multiply(rhoG, coulG) + + v_rs = numpy.asarray(tools.ifft(vG, mesh).real, order="C") + try: + drv(getattr(libdft, eval_fn), + grad.ctypes.data_as(ctypes.c_void_p), + v_rs.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(comp), + atm.ctypes.data_as(ctypes.c_void_p), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(len(bas)), + env.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.dimension), + a.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p), ctypes.c_double(max_radius)) + except Exception as e: + raise RuntimeError("Failed to computed nuclear gradients of vpploc part1. %s" % e) + grad *= -1 + t0 = logger.timer(mydf, 'vpploc_part1_nuc_grad', *t0) + return grad + + +def fake_cell_vloc_part1(cell, atm_id=None, precision=None): + ''' + Generate fakecell for the non-local term of the local part of + the GTH pseudo-potential. Also stores the atomic radii. + Differs from pp_int.fake_cell_vloc(cell, cn=0) in the normalization factors. + ''' + from pyscf.pbc.gto.cell import pgf_rcut + if atm_id is None: + atm_id = numpy.arange(cell.natm) + else: + atm_id = numpy.asarray(atm_id) + natm = len(atm_id) + + if precision is None: + precision = cell.precision + + max_radius = 0 + kind = {} + # FIXME prec may be too tight + prec = precision ** 2 + for symb in cell._pseudo: + charge = numpy.sum(cell._pseudo[symb][0]) + rloc = cell._pseudo[symb][1] + zeta = .5 / rloc**2 + norm = (zeta / numpy.pi) ** 1.5 + radius = pgf_rcut(0, zeta, charge*norm, precision=prec) + max_radius = max(radius, max_radius) + kind[symb] = [zeta, norm, radius] + + fake_env = [cell.atom_coords()[atm_id].ravel()] + fake_atm = cell._atm[atm_id].copy().reshape(natm,-1) + fake_atm[:,gto.PTR_COORD] = numpy.arange(0, natm*3, 3) + ptr = natm * 3 + fake_bas = [] + for ia, atm in enumerate(atm_id): + if cell.atom_charge(atm) == 0: # pass ghost atoms + continue + + symb = cell.atom_symbol(atm) + if symb in kind: + fake_env.append(kind[symb]) + else: + alpha = 1e16 + norm = (alpha / numpy.pi) ** 1.5 + radius = 0.0 + fake_env.append([alpha, norm, radius]) + fake_bas.append([ia, 0, 1, 1, 0, ptr, ptr+1, 0]) + fake_atm[ia,gto.PTR_RADIUS] = ptr+2 + ptr += 3 + + fakecell = cell.copy(deep=False) + fakecell._atm = numpy.asarray(fake_atm, order="C", dtype=numpy.int32) + fakecell._bas = numpy.asarray(fake_bas, order="C", dtype=numpy.int32).reshape(-1, gto.BAS_SLOTS) + fakecell._env = numpy.asarray(numpy.hstack(fake_env), order="C", dtype=float) + return fakecell, max_radius diff --git a/pyscf/pbc/dft/multigrid/utils.py b/pyscf/pbc/dft/multigrid/utils.py new file mode 100644 index 0000000000..3ca9f0addb --- /dev/null +++ b/pyscf/pbc/dft/multigrid/utils.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Qiming Sun +# + +import numpy +from pyscf import lib + +def _take_4d(a, indices): + a_shape = a.shape + ranges = [] + for i, s in enumerate(indices): + if s is None: + idx = numpy.arange(a_shape[i], dtype=numpy.int32) + else: + idx = numpy.asarray(s, dtype=numpy.int32) + idx[idx < 0] += a_shape[i] + ranges.append(idx) + idx = ranges[0][:,None] * a_shape[1] + ranges[1] + idy = ranges[2][:,None] * a_shape[3] + ranges[3] + a = a.reshape(a_shape[0]*a_shape[1], a_shape[2]*a_shape[3]) + out = lib.take_2d(a, idx.ravel(), idy.ravel()) + return out.reshape([len(s) for s in ranges]) + +def _takebak_4d(out, a, indices): + out_shape = out.shape + a_shape = a.shape + ranges = [] + for i, s in enumerate(indices): + if s is None: + idx = numpy.arange(a_shape[i], dtype=numpy.int32) + else: + idx = numpy.asarray(s, dtype=numpy.int32) + idx[idx < 0] += out_shape[i] + assert (len(idx) == a_shape[i]) + ranges.append(idx) + idx = ranges[0][:,None] * out_shape[1] + ranges[1] + idy = ranges[2][:,None] * out_shape[3] + ranges[3] + nx = idx.size + ny = idy.size + out = out.reshape(out_shape[0]*out_shape[1], out_shape[2]*out_shape[3]) + lib.takebak_2d(out, a.reshape(nx,ny), idx.ravel(), idy.ravel()) + return out + +def _take_5d(a, indices): + a_shape = a.shape + a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) + indices = (None,) + indices[2:] + return _take_4d(a, indices) + +def _takebak_5d(out, a, indices): + a_shape = a.shape + out_shape = out.shape + a = a.reshape((a_shape[0]*a_shape[1],) + a_shape[2:]) + out = out.reshape((out_shape[0]*out_shape[1],) + out_shape[2:]) + indices = (None,) + indices[2:] + return _takebak_4d(out, a, indices) diff --git a/pyscf/pbc/dft/rks.py b/pyscf/pbc/dft/rks.py index 228bc6e91a..d3dc8d1047 100644 --- a/pyscf/pbc/dft/rks.py +++ b/pyscf/pbc/dft/rks.py @@ -73,7 +73,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_rks(ks.with_df, ks.xc, dm, hermi, kpt.reshape(1,3), kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -87,7 +87,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory = ks.max_memory - lib.current_memory()[0] n, exc, vxc = ni.nr_rks(cell, ks.grids, ks.xc, dm, 0, hermi, kpt, kpts_band, max_memory=max_memory) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) if ks.nlc or ni.libxc.is_nlc(ks.xc): if ni.libxc.is_nlc(ks.xc): xc = ks.xc @@ -98,7 +98,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec with nlc grids = %s', n) + logger.info(ks, 'nelec with nlc grids = %s', n) t0 = logger.timer(ks, 'vxc', *t0) if not hybrid: diff --git a/pyscf/pbc/dft/test/test_krks_ksym.py b/pyscf/pbc/dft/test/test_krks_ksym.py index 6c7bd46c4a..615f1d456f 100644 --- a/pyscf/pbc/dft/test/test_krks_ksym.py +++ b/pyscf/pbc/dft/test/test_krks_ksym.py @@ -207,14 +207,14 @@ def test_rsh_mdf(self): def test_multigrid(self): kmf0 = krks.KRKS(cell, kpts=cell.make_kpts(nk)) kmf0.xc = 'lda' - kmf0 = multigrid.multigrid(kmf0) + kmf0 = multigrid.multigrid_fftdf(kmf0) kmf0.kernel() rho0 = kmf0.get_rho() kpts = cell.make_kpts(nk,space_group_symmetry=True,time_reversal_symmetry=True) kmf = pscf.KRKS(cell, kpts=kpts) kmf.xc = 'lda' - kmf = multigrid.multigrid(kmf) + kmf = multigrid.multigrid_fftdf(kmf) kmf.kernel() self.assertAlmostEqual(kmf.e_tot, kmf0.e_tot, 7) rho = kmf.get_rho() @@ -231,14 +231,14 @@ def test_multigrid(self): def test_multigrid_kuks(self): kmf0 = pscf.KUKS(cell, kpts=cell.make_kpts(nk)) kmf0.xc = 'lda' - kmf0 = multigrid.multigrid(kmf0) + kmf0 = multigrid.multigrid_fftdf(kmf0) kmf0.kernel() rho0 = kmf0.get_rho() kpts = cell.make_kpts(nk,space_group_symmetry=True,time_reversal_symmetry=True) kmf = pscf.KUKS(cell, kpts=kpts) kmf.xc = 'lda' - kmf = multigrid.multigrid(kmf) + kmf = multigrid.multigrid_fftdf(kmf) kmf.kernel() self.assertAlmostEqual(kmf.e_tot, kmf0.e_tot, 7) rho = kmf.get_rho() diff --git a/pyscf/pbc/dft/test/test_multigrid.py b/pyscf/pbc/dft/test/test_multigrid.py index 2cd11e7732..9db362ded3 100644 --- a/pyscf/pbc/dft/test/test_multigrid.py +++ b/pyscf/pbc/dft/test/test_multigrid.py @@ -85,12 +85,24 @@ def test_orth_get_pp(self): self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + # test small memory + mydf = multigrid.MultiGridFFTDF(cell_orth) + mydf.max_memory = 10 + out = mydf.get_pp(max_memory=2) + self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + def test_nonorth_get_pp(self): ref = df.FFTDF(cell_nonorth).get_pp() out = multigrid.MultiGridFFTDF(cell_nonorth).get_pp() self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + # test small memory + mydf = multigrid.MultiGridFFTDF(cell_nonorth) + mydf.max_memory = 10 + out = mydf.get_pp(max_memory=2) + self.assertAlmostEqual(abs(ref-out).max(), 0, 8) + def test_orth_get_nuc_kpts(self): ref = df.FFTDF(cell_orth).get_nuc(kpts) out = multigrid.MultiGridFFTDF(cell_orth).get_nuc(kpts) @@ -133,7 +145,7 @@ def test_multigrid_kuks(self): mf = dft.KUKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, numpy.array((dm_he,dm_he)), kpts=kpts) - out = multigrid.multigrid(mf).get_veff(cell_he, (dm_he,dm_he), kpts=kpts) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, (dm_he,dm_he), kpts=kpts) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 8) @@ -143,7 +155,7 @@ def test_multigrid_krks(self): mf = dft.KRKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, dm_he, kpts=kpts) - out = multigrid.multigrid(mf).get_veff(cell_he, dm_he, kpts=kpts) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm_he, kpts=kpts) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 8) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 8) @@ -159,7 +171,7 @@ def test_multigrid_kroks(self): dm1 = lib.tag_array(numpy.array([dm1,dm1]), mo_coeff=mo, mo_occ=mo_occ*2) ref = mf.get_veff(cell_he, dm1, kpts=kpts) - out = multigrid.multigrid(mf).get_veff(cell_he, dm1, kpts=kpts) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm1, kpts=kpts) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -169,7 +181,7 @@ def test_multigrid_uks(self): mf = dft.UKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, numpy.array((dm_he[0],dm_he[0]))) - out = multigrid.multigrid(mf).get_veff(cell_he, (dm_he[0], dm_he[0])) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, (dm_he[0], dm_he[0])) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -179,7 +191,7 @@ def test_multigrid_rks(self): mf = dft.RKS(cell_he) mf.xc = 'lda,' ref = mf.get_veff(cell_he, dm_he[0]) - out = multigrid.multigrid(mf).get_veff(cell_he, dm_he[0]) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm_he[0]) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -195,7 +207,7 @@ def test_multigrid_roks(self): dm1 = lib.tag_array(numpy.array([dm1,dm1]), mo_coeff=mo, mo_occ=mo_occ*2) ref = mf.get_veff(cell_he, dm1) - out = multigrid.multigrid(mf).get_veff(cell_he, dm1) + out = multigrid.multigrid_fftdf(mf).get_veff(cell_he, dm1) self.assertEqual(out.shape, ref.shape) self.assertAlmostEqual(abs(ref-out).max(), 0, 7) self.assertAlmostEqual(abs(ref.exc-out.exc).max(), 0, 7) @@ -218,8 +230,8 @@ def test_eval_rhoG_orth_kpts(self): numpy.random.seed(9) dm = numpy.random.random(dm1.shape) + numpy.random.random(dm1.shape) * 1j mydf = multigrid.MultiGridFFTDF(cell_orth) - rhoG = multigrid._eval_rhoG(mydf, dm, hermi=0, kpts=kpts, deriv=0, - rhog_high_order=True) + rhoG = multigrid.multigrid._eval_rhoG(mydf, dm, hermi=0, kpts=kpts, deriv=0, + rhog_high_order=True) self.assertTrue(rhoG.dtype == numpy.complex128) mydf = df.FFTDF(cell_orth) @@ -232,8 +244,8 @@ def test_eval_rhoG_orth_kpts(self): def test_eval_rhoG_orth_gga(self): mydf = multigrid.MultiGridFFTDF(cell_orth) - rhoG = multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, - rhog_high_order=True) + rhoG = multigrid.multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, + rhog_high_order=True) mydf = df.FFTDF(cell_orth) ni = dft.numint.KNumInt() @@ -245,8 +257,8 @@ def test_eval_rhoG_orth_gga(self): def test_eval_rhoG_nonorth_gga(self): mydf = multigrid.MultiGridFFTDF(cell_nonorth) - rhoG = multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, - rhog_high_order=True) + rhoG = multigrid.multigrid._eval_rhoG(mydf, dm, hermi=1, kpts=kpts, deriv=1, + rhog_high_order=True) mydf = df.FFTDF(cell_nonorth) ni = dft.numint.KNumInt() @@ -273,7 +285,7 @@ def test_gen_rhf_response(self): hermi=1, kpts=kpts) vj = mydf.get_jk(dm1, with_k=False, kpts=kpts)[0] ref += vj - v = multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 8) @@ -282,7 +294,7 @@ def test_gen_rhf_response(self): ref = dft.numint.nr_rks_fxc(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, hermi=1, kpts=kpts) ref += vj - v = multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 6) @@ -356,7 +368,7 @@ def test_nr_rks_fxc_st(self): mf.xc = 'b88,' ref = dft.numint.nr_rks_fxc_st(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, singlet=True, kpts=kpts) - v = multigrid._gen_rhf_response(mf, dm_he, singlet=True)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, singlet=True)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 5) @@ -364,7 +376,7 @@ def test_nr_rks_fxc_st(self): mf.xc = 'lda,' ref = dft.numint.nr_rks_fxc_st(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, singlet=False, kpts=kpts) - v = multigrid._gen_rhf_response(mf, dm_he, singlet=False)(dm1) + v = multigrid.multigrid._gen_rhf_response(mf, dm_he, singlet=False)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 4) @@ -391,7 +403,7 @@ def test_gen_uhf_response(self): ref = dft.numint.nr_uks_fxc(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, hermi=1) vj = mydf.get_jk(dm1, with_k=False)[0] ref += vj[0] + vj[1] - v = multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) + v = multigrid.multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 7) @@ -399,7 +411,7 @@ def test_gen_uhf_response(self): mf.xc = 'b88,' ref = dft.numint.nr_uks_fxc(ni, cell_he, mydf.grids, mf.xc, dm_he, dm1, hermi=1) ref += vj[0] + vj[1] - v = multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) + v = multigrid.multigrid._gen_uhf_response(mf, dm_he, with_j=True, hermi=1)(dm1) self.assertEqual(ref.dtype, v.dtype) self.assertEqual(ref.shape, v.shape) self.assertAlmostEqual(abs(v-ref).max(), 0, 7) @@ -454,11 +466,11 @@ def test_orth_uks_fxc_hermi0(self): def test_rcut_vs_ke_cut(self): xc = 'lda,' - with lib.temporary_env(multigrid, TASKS_TYPE='rcut'): + with lib.temporary_env(multigrid.multigrid, TASKS_TYPE='rcut'): mg_df = multigrid.MultiGridFFTDF(cell_orth) n1, exc1, v1 = multigrid.nr_rks(mg_df, xc, dm1, kpts=kpts) self.assertEqual(len(mg_df.tasks), 3) - with lib.temporary_env(multigrid, TASKS_TYPE='ke_cut'): + with lib.temporary_env(multigrid.multigrid, TASKS_TYPE='ke_cut'): mg_df = multigrid.MultiGridFFTDF(cell_orth) n2, exc2, v2 = multigrid.nr_rks(mg_df, xc, dm1, kpts=kpts) self.assertEqual(len(mg_df.tasks), 6) diff --git a/pyscf/pbc/dft/test/test_multigrid2.py b/pyscf/pbc/dft/test/test_multigrid2.py new file mode 100644 index 0000000000..f23c687a48 --- /dev/null +++ b/pyscf/pbc/dft/test/test_multigrid2.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import unittest +import numpy +from pyscf.pbc import gto, dft +from pyscf.pbc.dft import multigrid +from pyscf.pbc.grad import rks as rks_grad +from pyscf.pbc.grad import uks as uks_grad +from pyscf.pbc.grad import krks as krks_grad + +def setUpModule(): + global cell + cell = gto.Cell() + boxlen = 5.0 + cell.a = numpy.array([[boxlen,0.0,0.0], + [0.0,boxlen,0.0], + [0.0,0.0,boxlen]]) + cell.atom = """ + O 1.84560 1.21649 1.10372 + H 2.30941 1.30070 1.92953 + H 0.91429 1.26674 1.28886 + """ + cell.basis = 'gth-szv' + cell.ke_cutoff = 200 + cell.pseudo = 'gth-pade' + cell.verbose = 0 + cell.use_loose_rcut = True + cell.build() + +def tearDownModule(): + global cell + del cell + +def _fftdf_energy_grad(cell, xc): + mf = dft.KRKS(cell, kpts=numpy.zeros((1,3))) + mf.xc = xc + e = mf.kernel() + grad = krks_grad.Gradients(mf) + g = grad.kernel() + return e, g + +def _multigrid2_energy_grad(cell, xc, spin=0): + if spin == 0: + mf = dft.RKS(cell) + elif spin == 1: + mf = dft.UKS(cell) + mf.xc = xc + mf.with_df = multigrid.MultiGridFFTDF2(cell) + e = mf.kernel() + if spin == 0: + g = rks_grad.Gradients(mf).kernel() + elif spin == 1: + g = uks_grad.Gradients(mf).kernel() + return e, g + +class KnownValues(unittest.TestCase): + def test_orth_lda(self): + xc = 'lda, vwn' + e0, g0 = _fftdf_energy_grad(cell, xc) + e, g = _multigrid2_energy_grad(cell, xc, 0) + e1, g1 = _multigrid2_energy_grad(cell, xc, 1) + assert abs(e-e0) < 1e-8 + assert abs(e1-e0) < 1e-8 + assert abs(g-g0).max() < 2e-5 + assert abs(g1-g0).max() < 2e-5 + + def test_orth_gga(self): + xc = 'pbe, pbe' + e0, g0 = _fftdf_energy_grad(cell, xc) + e, g = _multigrid2_energy_grad(cell, xc, 0) + e1, g1 = _multigrid2_energy_grad(cell, xc, 1) + assert abs(e-e0) < 1e-6 + assert abs(e1-e0) < 1e-6 + assert abs(g-g0).max() < 1e-4 + assert abs(g1-g0).max() < 1e-4 + +if __name__ == '__main__': + print("Full Tests for multigrid2") + unittest.main() diff --git a/pyscf/pbc/dft/uks.py b/pyscf/pbc/dft/uks.py index de72d6452d..20d8d14c71 100644 --- a/pyscf/pbc/dft/uks.py +++ b/pyscf/pbc/dft/uks.py @@ -57,7 +57,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, n, exc, vxc = multigrid.nr_uks(ks.with_df, ks.xc, dm, hermi, kpt.reshape(1,3), kpts_band, with_j=True, return_j=False) - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) return vxc @@ -86,7 +86,7 @@ def get_veff(ks, cell=None, dm=None, dm_last=0, vhf_last=0, hermi=1, 0, hermi, kpt, max_memory=max_memory) exc += enlc vxc += vnlc - logger.debug(ks, 'nelec by numeric integration = %s', n) + logger.info(ks, 'nelec by numeric integration = %s', n) t0 = logger.timer(ks, 'vxc', *t0) if not hybrid: diff --git a/pyscf/pbc/grad/__init__.py b/pyscf/pbc/grad/__init__.py index 5408a1eb50..e308bca1e1 100644 --- a/pyscf/pbc/grad/__init__.py +++ b/pyscf/pbc/grad/__init__.py @@ -19,7 +19,10 @@ ''' Analytical nuclear gradients for PBC ''' - +from pyscf.pbc.grad import rhf +from pyscf.pbc.grad import rks +from pyscf.pbc.grad import uhf +from pyscf.pbc.grad import uks from pyscf.pbc.grad import krhf from pyscf.pbc.grad import kuhf from pyscf.pbc.grad import krks @@ -30,4 +33,4 @@ from pyscf.pbc.grad.krks import Gradients as KRKS from pyscf.pbc.grad.kuks import Gradients as KUKS -grad_nuc = krhf.grad_nuc +grad_nuc = rhf.grad_nuc diff --git a/pyscf/pbc/grad/krhf.py b/pyscf/pbc/grad/krhf.py index 9fd628882f..0dd6a171e4 100644 --- a/pyscf/pbc/grad/krhf.py +++ b/pyscf/pbc/grad/krhf.py @@ -211,6 +211,10 @@ def hcore_deriv(atm_id): def grad_nuc(cell, atmlst): ''' Derivatives of nuclear repulsion energy wrt nuclear coordinates + + Notes: + An optimized version of this function is available in + `pbc.gto.ewald_methods.ewald_nuc_grad` ''' chargs = cell.atom_charges() ew_eta, ew_cut = cell.get_ewald_params() @@ -244,12 +248,14 @@ def grad_nuc(cell, atmlst): absG2[absG2==0] = 1e200 ewg_grad = np.zeros([natom,3]) SI = cell.get_SI(Gv) - if cell.low_dim_ft_type is None or cell.dimension == 3: + if cell.dimension != 2 or cell.low_dim_ft_type == 'inf_vacuum': coulG = 4*np.pi / absG2 coulG *= weights ZSI = np.einsum("i,ij->j", chargs, SI) ZexpG2 = coulG * np.exp(-absG2/(4*ew_eta**2)) ZexpG2_mod = ZexpG2.reshape(len(ZexpG2),1) * Gv + else: + raise NotImplementedError for i, qi in enumerate(chargs): Zfac = np.imag(ZSI * SI[i].conj()) * qi ewg_grad[i] = - np.sum(Zfac.reshape((len(Zfac),1)) * ZexpG2_mod, axis = 0) diff --git a/pyscf/pbc/grad/rhf.py b/pyscf/pbc/grad/rhf.py new file mode 100644 index 0000000000..720451b719 --- /dev/null +++ b/pyscf/pbc/grad/rhf.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import __config__ +from pyscf import lib +from pyscf.lib import logger +from pyscf.grad import rhf as mol_rhf +from pyscf.grad.rhf import _write +from pyscf.pbc.gto.pseudo import pp_int +from pyscf.pbc.lib.kpts_helper import gamma_point + +SCREEN_VHF_DM_CONTRA = getattr(__config__, 'pbc_rhf_grad_screen_vhf_dm_contract', True) +libpbc = lib.load_library('libpbc') + +def grad_elec(mf_grad, mo_energy=None, mo_coeff=None, mo_occ=None, + atmlst=None, kpt=np.zeros(3)): + mf = mf_grad.base + mol = mf_grad.mol + if mo_energy is None: mo_energy = mf.mo_energy + if mo_occ is None: mo_occ = mf.mo_occ + if mo_coeff is None: mo_coeff = mf.mo_coeff + log = logger.Logger(mf_grad.stdout, mf_grad.verbose) + + s1 = mf_grad.get_ovlp(mol, kpt) + dm0 = mf.make_rdm1(mo_coeff, mo_occ) + + t0 = (logger.process_clock(), logger.perf_counter()) + log.debug('Computing Gradients of NR-HF Coulomb repulsion') + vhf = mf_grad.get_veff(mol, dm0, kpt) + log.timer('gradients of 2e part', *t0) + + dme0 = mf_grad.make_rdm1e(mo_energy, mo_coeff, mo_occ) + + if atmlst is None: + atmlst = range(mol.natm) + + de = 0 + if gamma_point(kpt): + de = mf.with_df.vpploc_part1_nuc_grad(dm0, kpts=kpt.reshape(-1,3)) + de += pp_int.vpploc_part2_nuc_grad(mol, dm0) + de += pp_int.vppnl_nuc_grad(mol, dm0) + h1ao = -mol.pbc_intor('int1e_ipkin', kpt=kpt) + if getattr(mf.with_df, 'vpplocG_part1', None) is None: + h1ao += -mf.with_df.get_vpploc_part1_ip1(kpts=kpt.reshape(-1,3)) + de += _contract_vhf_dm(mf_grad, np.add(h1ao, vhf), dm0) * 2 + de += _contract_vhf_dm(mf_grad, s1, dme0) * -2 + h1ao = s1 = vhf = dm0 = dme0 = None + de = de[atmlst] + else: + raise NotImplementedError + + for k, ia in enumerate(atmlst): + de[k] += mf_grad.extra_force(ia, locals()) + + if log.verbose >= logger.DEBUG: + log.debug('gradients of electronic part') + _write(log, mol, de, atmlst) + return de + + +def _contract_vhf_dm(mf_grad, vhf, dm, comp=3, atmlst=None, + screen=SCREEN_VHF_DM_CONTRA): + from pyscf.gto.mole import ao_loc_nr, ATOM_OF + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + + t0 = (logger.process_clock(), logger.perf_counter()) + + mol = mf_grad.mol + natm = mol.natm + nbas = mol.nbas + shls_slice = np.asarray([0,nbas,0,nbas], order="C", dtype=np.int32) + ao_loc = np.asarray(ao_loc_nr(mol), order="C", dtype=np.int32) + shls_atm = np.asarray(mol._bas[:,ATOM_OF].copy(), order="C", dtype=np.int32) + + de = np.zeros((natm,comp), order="C") + vhf = np.asarray(vhf, order="C") + dm = np.asarray(dm, order="C") + + if screen: + neighbor_list = build_neighbor_list_for_shlpairs(mol) + else: + neighbor_list = lib.c_null_ptr() + func = getattr(libpbc, "contract_vhf_dm", None) + try: + func(de.ctypes.data_as(ctypes.c_void_p), + vhf.ctypes.data_as(ctypes.c_void_p), + dm.ctypes.data_as(ctypes.c_void_p), + ctypes.byref(neighbor_list), + shls_slice.ctypes.data_as(ctypes.c_void_p), + ao_loc.ctypes.data_as(ctypes.c_void_p), + shls_atm.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(comp), ctypes.c_int(natm), + ctypes.c_int(nbas)) + except RuntimeError: + raise + free_neighbor_list(neighbor_list) + + if atmlst is not None: + de = de[atmlst] + + logger.timer(mf_grad, '_contract_vhf_dm', *t0) + return de + + +def get_ovlp(cell, kpt=np.zeros(3)): + return -cell.pbc_intor('int1e_ipovlp', kpt=kpt) + + +def get_veff(mf_grad, mol, dm, kpt=np.zeros(3)): + mf = mf_grad.base + mydf = mf.with_df + xc_code = getattr(mf, 'xc', None) + kpts = kpt.reshape(-1,3) + return -mydf.get_veff_ip1(dm, xc_code=xc_code, kpts=kpts) + + +def grad_nuc(cell, atmlst=None, ew_eta=None, ew_cut=None): + from pyscf.pbc.gto import ewald_methods + + t0 = (logger.process_clock(), logger.perf_counter()) + + grad = ewald_methods.ewald_nuc_grad(cell, ew_eta, ew_cut) + if atmlst is not None: + grad = grad[atmlst] + + logger.timer(cell, 'nuclear gradient', *t0) + return grad + + +class GradientsBase(mol_rhf.GradientsBase): + '''Base class for Gamma-point nuclear gradient''' + def grad_nuc(self, mol=None, atmlst=None): + if mol is None: mol = self.mol + return grad_nuc(mol, atmlst) + + def get_ovlp(self, mol=None, kpt=np.zeros(3)): + if mol is None: + mol = self.mol + return get_ovlp(mol, kpt) + + +class Gradients(GradientsBase): + '''Non-relativistic Gamma-point restricted Hartree-Fock gradients''' + def get_veff(self, mol=None, dm=None, kpt=np.zeros(3)): + if mol is None: mol = self.mol + if dm is None: dm = self.base.make_rdm1() + return get_veff(self, mol, dm, kpt) + + make_rdm1e = mol_rhf.Gradients.make_rdm1e + grad_elec = grad_elec diff --git a/pyscf/pbc/grad/rks.py b/pyscf/pbc/grad/rks.py new file mode 100644 index 0000000000..1429050002 --- /dev/null +++ b/pyscf/pbc/grad/rks.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +from pyscf.pbc.grad import rhf + + +class Gradients(rhf.Gradients): + '''Non-relativistic Gamma-point restricted Kohn-Sham DFT gradients''' + pass diff --git a/pyscf/pbc/grad/uhf.py b/pyscf/pbc/grad/uhf.py new file mode 100644 index 0000000000..fd71aa0920 --- /dev/null +++ b/pyscf/pbc/grad/uhf.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import __config__ +from pyscf.lib import logger +from pyscf.grad import uhf as mol_uhf +from pyscf.grad.rhf import _write +from pyscf.pbc.gto.pseudo import pp_int +from pyscf.pbc.grad import rhf as rhf_grad +from pyscf.pbc.lib.kpts_helper import gamma_point + +def grad_elec(mf_grad, mo_energy=None, mo_coeff=None, mo_occ=None, atmlst=None, kpt=np.zeros(3)): + mf = mf_grad.base + mol = mf_grad.mol + if mo_energy is None: mo_energy = mf.mo_energy + if mo_occ is None: mo_occ = mf.mo_occ + if mo_coeff is None: mo_coeff = mf.mo_coeff + log = logger.Logger(mf_grad.stdout, mf_grad.verbose) + + s1 = mf_grad.get_ovlp(mol, kpt) + dm0 = mf.make_rdm1(mo_coeff, mo_occ) + + t0 = (logger.process_clock(), logger.perf_counter()) + log.debug('Computing Gradients of NR-HF Coulomb repulsion') + vhf = mf_grad.get_veff(mol, dm0, kpt) + log.timer('gradients of 2e part', *t0) + + dme0 = mf_grad.make_rdm1e(mo_energy, mo_coeff, mo_occ) + dm0_sf = dm0[0] + dm0[1] + dme0_sf = dme0[0] + dme0[1] + + if atmlst is None: + atmlst = range(mol.natm) + + de = 0 + if gamma_point(kpt): + de = mf.with_df.vpploc_part1_nuc_grad(dm0_sf, kpts=kpt.reshape(-1,3)) + de += pp_int.vpploc_part2_nuc_grad(mol, dm0_sf) + de += pp_int.vppnl_nuc_grad(mol, dm0_sf) + h1ao = -mol.pbc_intor('int1e_ipkin', kpt=kpt) + if getattr(mf.with_df, 'vpplocG_part1', None) is None: + h1ao += -mf.with_df.get_vpploc_part1_ip1(kpts=kpt.reshape(-1,3)) + de += rhf_grad._contract_vhf_dm(mf_grad, h1ao, dm0_sf) * 2 + for s in range(2): + de += rhf_grad._contract_vhf_dm(mf_grad, vhf[s], dm0[s]) * 2 + de += rhf_grad._contract_vhf_dm(mf_grad, s1, dme0_sf) * -2 + h1ao = s1 = vhf = dm0 = dme0 = dm0_sf = dme0_sf = None + de = de[atmlst] + else: + raise NotImplementedError + + for k, ia in enumerate(atmlst): + de[k] += mf_grad.extra_force(ia, locals()) + + if log.verbose >= logger.DEBUG: + log.debug('gradients of electronic part') + _write(log, mol, de, atmlst) + return de + +def get_veff(mf_grad, mol, dm, kpt=np.zeros(3)): + mf = mf_grad.base + mydf = mf.with_df + xc_code = getattr(mf, 'xc', None) + kpts = kpt.reshape(-1,3) + return -mydf.get_veff_ip1(dm, xc_code=xc_code, kpts=kpts, spin=1) + +class Gradients(rhf_grad.GradientsBase): + '''Non-relativistic Gamma-point restricted Hartree-Fock gradients''' + def get_veff(self, mol=None, dm=None, kpt=np.zeros(3)): + if mol is None: mol = self.mol + if dm is None: dm = self.base.make_rdm1() + return get_veff(self, mol, dm, kpt) + + make_rdm1e = mol_uhf.Gradients.make_rdm1e + grad_elec = grad_elec diff --git a/pyscf/pbc/grad/uks.py b/pyscf/pbc/grad/uks.py new file mode 100644 index 0000000000..4a6ce67c1a --- /dev/null +++ b/pyscf/pbc/grad/uks.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +from pyscf.pbc.grad import uhf + + +class Gradients(uhf.Gradients): + '''Non-relativistic Gamma-point unrestricted Kohn-Sham DFT gradients''' + pass diff --git a/pyscf/pbc/gto/__init__.py b/pyscf/pbc/gto/__init__.py index dcaaddebbc..769b76c616 100644 --- a/pyscf/pbc/gto/__init__.py +++ b/pyscf/pbc/gto/__init__.py @@ -22,6 +22,7 @@ from pyscf.pbc.gto.basis import parse, load, parse_ecp, load_ecp from pyscf.pbc.gto import pseudo from pyscf.pbc.gto.cell import * +from pyscf.pbc.gto.neighborlist import * parse_pp = parsepp = pseudo.parse load_pp = loadpp = pseudo.load diff --git a/pyscf/pbc/gto/_pbcintor.py b/pyscf/pbc/gto/_pbcintor.py index f721eb0304..c5b921b2e0 100644 --- a/pyscf/pbc/gto/_pbcintor.py +++ b/pyscf/pbc/gto/_pbcintor.py @@ -33,15 +33,21 @@ def __init__(self, cell): def init_rcut_cond(self, cell, precision=None): if precision is None: precision = cell.precision - rcut = numpy.array([cell.bas_rcut(ib, precision) - for ib in range(cell.nbas)]) + if cell.use_loose_rcut: + rcut = cell.rcut_by_shells(precision) + fn_set_rcut_cond = getattr(libpbc, 'PBCset_rcut_cond_loose') + else: + rcut = numpy.array([cell.bas_rcut(ib, precision) + for ib in range(cell.nbas)]) + fn_set_rcut_cond = getattr(libpbc, 'PBCset_rcut_cond') + natm = ctypes.c_int(cell._atm.shape[0]) nbas = ctypes.c_int(cell._bas.shape[0]) - libpbc.PBCset_rcut_cond(self._this, - rcut.ctypes.data_as(ctypes.c_void_p), - cell._atm.ctypes.data_as(ctypes.c_void_p), natm, - cell._bas.ctypes.data_as(ctypes.c_void_p), nbas, - cell._env.ctypes.data_as(ctypes.c_void_p)) + fn_set_rcut_cond(self._this, + rcut.ctypes.data_as(ctypes.c_void_p), + cell._atm.ctypes.data_as(ctypes.c_void_p), natm, + cell._bas.ctypes.data_as(ctypes.c_void_p), nbas, + cell._env.ctypes.data_as(ctypes.c_void_p)) return self def del_rcut_cond(self): @@ -56,4 +62,5 @@ def __del__(self): class _CPBCOpt(ctypes.Structure): _fields_ = [('rrcut', ctypes.c_void_p), + ('rcut', ctypes.c_void_p), ('fprescreen', ctypes.c_void_p)] diff --git a/pyscf/pbc/gto/cell.py b/pyscf/pbc/gto/cell.py index 87282fbfd4..872fda36e2 100644 --- a/pyscf/pbc/gto/cell.py +++ b/pyscf/pbc/gto/cell.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -41,6 +41,9 @@ WITH_GAMMA = getattr(__config__, 'pbc_gto_cell_make_kpts_with_gamma', True) EXP_DELIMITER = getattr(__config__, 'pbc_gto_cell_split_basis_exp_delimiter', [1.0, 0.5, 0.25, 0.1, 0]) +# defined in lib/pbc/cell.h +RCUT_EPS = 1e-3 +RCUT_MAX_CYCLE = 10 libpbc = _pbcintor.libpbc @@ -281,6 +284,89 @@ def intor_cross(intor, cell1, cell2, comp=None, hermi=0, kpts=None, kpt=None, mat = mat[0] return mat +def _intor_cross_screened( + intor, cell1, cell2, comp=None, hermi=0, kpts=None, kpt=None, + shls_slice=None, **kwargs): + '''`intor_cross` with prescreening. + + Notes: + This function may be subject to change. + ''' + from pyscf.pbc.gto.neighborlist import NeighborListOpt + intor, comp = moleintor._get_intor_and_comp(cell1._add_suffix(intor), comp) + + if kpts is None: + if kpt is not None: + kpts_lst = np.reshape(kpt, (1,3)) + else: + kpts_lst = np.zeros((1,3)) + else: + kpts_lst = np.reshape(kpts, (-1,3)) + nkpts = len(kpts_lst) + + pcell = cell1.copy(deep=False) + pcell.precision = min(cell1.precision, cell2.precision) + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = conc_env(cell1._atm, cell1._bas, cell1._env, + cell2._atm, cell2._bas, cell2._env) + if shls_slice is None: + shls_slice = (0, cell1.nbas, 0, cell2.nbas) + i0, i1, j0, j1 = shls_slice[:4] + j0 += cell1.nbas + j1 += cell1.nbas + ao_loc = moleintor.make_loc(bas, intor) + ni = ao_loc[i1] - ao_loc[i0] + nj = ao_loc[j1] - ao_loc[j0] + out = np.empty((nkpts,comp,ni,nj), dtype=np.complex128) + + if hermi == 0: + aosym = 's1' + else: + aosym = 's2' + fill = getattr(libpbc, 'PBCnr2c_screened_fill_k'+aosym) + fintor = getattr(moleintor.libcgto, intor) + drv = libpbc.PBCnr2c_screened_drv + + rcut = max(cell1.rcut, cell2.rcut) + Ls = cell1.get_lattice_Ls(rcut=rcut) + expkL = np.asarray(np.exp(1j*np.dot(kpts_lst, Ls.T)), order='C') + + neighbor_list = kwargs.get('neighbor_list', None) + if neighbor_list is None: + nlopt = NeighborListOpt(cell1) + nlopt.build(cell1, cell2, Ls, set_optimizer=False) + neighbor_list = nlopt.nl + + cintopt = lib.c_null_ptr() + + drv(fintor, fill, out.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nkpts), ctypes.c_int(comp), ctypes.c_int(len(Ls)), + Ls.ctypes.data_as(ctypes.c_void_p), + expkL.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*4)(i0, i1, j0, j1), + ao_loc.ctypes.data_as(ctypes.c_void_p), cintopt, + atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(pcell.natm), + bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(pcell.nbas), + env.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(env.size), + ctypes.byref(neighbor_list)) + + nlopt = None + + mat = [] + for k, kpt in enumerate(kpts_lst): + v = out[k] + if hermi != 0: + for ic in range(comp): + lib.hermi_triu(v[ic], hermi=hermi, inplace=True) + if comp == 1: + v = v[0] + if abs(kpt).sum() < 1e-9: # gamma_point + v = v.real + mat.append(v) + + if kpts is None or np.shape(kpts) == (3,): # A single k-point + mat = mat[0] + return mat def get_nimgs(cell, precision=None): r'''Choose number of basis function images in lattice sums @@ -339,6 +425,9 @@ def estimate_rcut(cell, precision=None): return 0.01 if precision is None: precision = cell.precision + if cell.use_loose_rcut: + return cell.rcut_by_shells(precision).max() + exps, cs = _extract_pgto_params(cell, 'min') ls = cell._bas[:,mole.ANG_OF] rcut = _estimate_rcut(exps, ls, cs, precision) @@ -491,7 +580,24 @@ def get_Gv_weights(cell, mesh=None, **kwargs): weights = np.einsum('i,k->ik', wxy, wz).reshape(-1) Gvbase = (rx, ry, rz) - Gv = np.dot(lib.cartesian_prod(Gvbase), b) + + #:Gv = np.dot(lib.cartesian_prod(Gvbase), b) + # NOTE mesh can be different from the input mesh + mesh = np.asarray((len(rx),len(ry),len(rz)), dtype=np.int32) + Gv = np.empty((*mesh,3), order='C', dtype=float) + b = np.asarray(b, order='C') + rx = np.asarray(rx, order='C') + ry = np.asarray(ry, order='C') + rz = np.asarray(rz, order='C') + fn = libpbc.get_Gv + fn(Gv.ctypes.data_as(ctypes.c_void_p), + rx.ctypes.data_as(ctypes.c_void_p), + ry.ctypes.data_as(ctypes.c_void_p), + rz.ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + b.ctypes.data_as(ctypes.c_void_p)) + Gv = Gv.reshape(-1, 3) + # 1/cell.vol == det(b)/(2pi)^3 weights *= 1/(2*np.pi)**3 return Gv, Gvbase, weights @@ -504,7 +610,7 @@ def _non_uniform_Gv_base(n): #return np.hstack((0,rs,-rs[::-1])), np.hstack((0,ws,ws[::-1])) return np.hstack((rs,-rs[::-1])), np.hstack((ws,ws[::-1])) -def get_SI(cell, Gv=None, mesh=None): +def get_SI(cell, Gv=None, mesh=None, atmlst=None): '''Calculate the structure factor (0D, 1D, 2D, 3D) for all atoms; see MH (3.34). Args: @@ -513,11 +619,16 @@ def get_SI(cell, Gv=None, mesh=None): Gv : (N,3) array G vectors + atmlst : list of ints, optional + Indices of atoms for which the structure factors are computed. + Returns: SI : (natm, ngrids) ndarray, dtype=np.complex128 The structure factor for each atom at each G-vector. ''' coords = cell.atom_coords() + if atmlst is not None: + coords = coords[np.asarray(atmlst)] if Gv is None: if mesh is None: mesh = cell.mesh @@ -598,6 +709,10 @@ def ewald(cell, ew_eta=None, ew_cut=None): if cell.natm == 0: return 0 + if cell.dimension == 3 and cell.use_particle_mesh_ewald: + from pyscf.pbc.gto import ewald_methods + return ewald_methods.particle_mesh_ewald(cell, ew_eta, ew_cut) + chargs = cell.atom_charges() if ew_eta is None or ew_cut is None: @@ -639,7 +754,16 @@ def ewald(cell, ew_eta=None, ew_cut=None): # have relatively large error coulG = 4*np.pi / absG2 coulG *= weights - ZSI = np.einsum("i,ij->j", chargs, cell.get_SI(Gv)) + + #:ZSI = np.einsum('i,ij->j', chargs, cell.get_SI(Gv)) + ngrids = len(Gv) + ZSI = np.empty((ngrids,), dtype=np.complex128) + mem_avail = cell.max_memory - lib.current_memory()[0] + blksize = int((mem_avail*1e6 - cell.natm*24)/((3+cell.natm*2)*8)) + blksize = min(ngrids, max(mesh[2], blksize)) + for ig0, ig1 in lib.prange(0, ngrids, blksize): + np.einsum('i,ij->j', chargs, cell.get_SI(Gv[ig0:ig1]), out=ZSI[ig0:ig1]) + ZexpG2 = ZSI * np.exp(-absG2/(4*ew_eta**2)) ewg = .5 * np.einsum('i,i,i', ZSI.conj(), ZexpG2, coulG).real @@ -835,6 +959,59 @@ def _mesh_inf_vaccum(cell): # meshz has to be even number due to the symmetry on z+ and z- return int(meshz*.5 + .999) * 2 +def pgf_rcut(l, alpha, coeff, precision=INTEGRAL_PRECISION, + rcut=0, max_cycle=RCUT_MAX_CYCLE, eps=RCUT_EPS): + '''Estimate the cutoff radii of primitive Gaussian functions + based on their values in real space: + `c*rcut^(l+2)*exp(-alpha*rcut^2) ~ precision`. + ''' + c = np.log(coeff / precision) + + rmin = np.sqrt(.5 * (l+2) / alpha) * 2 + eps = np.minimum(rmin/10, eps) + rcut = np.maximum(rcut, rmin+eps) + for i in range(max_cycle): + rcut_last = rcut + rcut = np.sqrt(((l+2) * np.log(rcut) + c) / alpha) + if np.all(abs(rcut - rcut_last) < eps): + return rcut + warnings.warn(f'cell.pgf_rcut failed to converge in {max_cycle} cycles.') + return rcut + +def rcut_by_shells(cell, precision=None, rcut=0, + return_pgf_radius=False): + '''Compute shell and primitive gaussian function radii. + ''' + # TODO the internal implementation loops over all shells, + # which can be optimized to loop over atom types. + if precision is None: + precision = cell.precision + + bas = np.asarray(cell._bas, order='C') + env = np.asarray(cell._env, order='C') + nbas = len(bas) + shell_radius = np.empty((nbas,), order='C', dtype=float) + if return_pgf_radius: + nprim = bas[:,mole.NPRIM_OF].max() + # be careful that the unused memory blocks are not initialized + pgf_radius = np.empty((nbas,nprim), order='C', dtype=np.double) + ptr_pgf_radius = lib.ndarray_pointer_2d(pgf_radius) + else: + ptr_pgf_radius = lib.c_null_ptr() + fn = getattr(libpbc, 'rcut_by_shells', None) + try: + fn(shell_radius.ctypes.data_as(ctypes.c_void_p), + ptr_pgf_radius, + bas.ctypes.data_as(ctypes.c_void_p), + env.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nbas), ctypes.c_double(rcut), + ctypes.c_double(precision)) + except Exception as e: + raise RuntimeError(f'Failed to get shell radii.\n{e}') + if return_pgf_radius: + return shell_radius, pgf_radius + return shell_radius + class Cell(mole.MoleBase): '''A Cell object holds the basic information of a crystal. @@ -864,6 +1041,14 @@ class Cell(mole.MoleBase): infinity vacuum (inf_vacuum) or truncated Coulomb potential (analytic_2d_1). Unless explicitly specified, analytic_2d_1 is used for 2D system and inf_vacuum is assumed for 1D and 0D. + use_loose_rcut : bool + If set to True, a loose `rcut` determined by shell radius is used, + which is usually accurate enough for pure DFT calculations; + otherwise, a tight `rcut` determined by overlap integral is used. + Default value is False. Has no effect if `rcut` is set manually. + use_particle_mesh_ewald : bool + If set to True, use particle-mesh Ewald to compute the nuclear repulsion. + Default value is False, meaning to use classical Ewald summation. space_group_symmetry : bool Whether to consider space group symmetry. Default is False. symmorphic : bool @@ -892,6 +1077,7 @@ class Cell(mole.MoleBase): 'precision', 'exp_to_discard', 'a', 'ke_cutoff', 'pseudo', 'dimension', 'low_dim_ft_type', 'space_group_symmetry', 'symmorphic', 'lattice_symmetry', 'mesh', 'rcut', + 'use_loose_rcut', 'use_particle_mesh_ewald', } def __init__(self, **kwargs): @@ -906,6 +1092,8 @@ def __init__(self, **kwargs): # density-fitting class. This determines how the ewald produces # its energy. self.low_dim_ft_type = None + self.use_loose_rcut = False + self.use_particle_mesh_ewald = False self.space_group_symmetry = False self.symmorphic = False self.lattice_symmetry = None @@ -1082,7 +1270,9 @@ def build_lattice_symmetry(self, check_mesh_symmetry=True): def build(self, dump_input=True, parse_arg=mole.ARGPARSE, a=None, mesh=None, ke_cutoff=None, precision=None, nimgs=None, h=None, dimension=None, rcut= None, low_dim_ft_type=None, - space_group_symmetry=None, symmorphic=None, *args, **kwargs): + space_group_symmetry=None, symmorphic=None, + use_loose_rcut=None, use_particle_mesh_ewald=None, + *args, **kwargs): '''Setup Mole molecule and Cell and initialize some control parameters. Whenever you change the value of the attributes of :class:`Cell`, you need call this function to refresh the internal data of Cell. @@ -1133,6 +1323,10 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, if rcut is not None: self.rcut = rcut if ke_cutoff is not None: self.ke_cutoff = ke_cutoff if low_dim_ft_type is not None: self.low_dim_ft_type = low_dim_ft_type + if use_loose_rcut is not None: + self.use_loose_rcut = use_loose_rcut + if use_particle_mesh_ewald is not None: + self.use_particle_mesh_ewald = use_particle_mesh_ewald if space_group_symmetry is not None: self.space_group_symmetry = space_group_symmetry if symmorphic is not None: @@ -1265,7 +1459,7 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, logger.info(self, 'Cell volume = %g', self.vol) # Check atoms coordinates if self.dimension > 0 and self.natm > 0: - scaled_atom_coords = np.linalg.solve(_a.T, self.atom_coords().T).T + scaled_atom_coords = self.get_scaled_atom_coords(_a) atom_boundary_max = scaled_atom_coords[:,:self.dimension].max(axis=0) atom_boundary_min = scaled_atom_coords[:,:self.dimension].min(axis=0) if (np.any(atom_boundary_max > 1) or np.any(atom_boundary_min < -1)): @@ -1367,13 +1561,12 @@ def lattice_vectors(self): else: return a/self.unit - def get_scaled_positions(self): - ''' Get scaled atom positions. + def get_scaled_atom_coords(self, a=None): + ''' Get scaled atomic coordinates. ''' - a = self.lattice_vectors() - atm_pos = self.atom_coords() - scaled_atm_pos = np.dot(atm_pos,np.linalg.inv(a)) - return scaled_atm_pos + if a is None: + a = self.lattice_vectors() + return np.dot(self.atom_coords(), np.linalg.inv(a)) def reciprocal_vectors(self, norm_to=2*np.pi): r''' @@ -1475,6 +1668,7 @@ def loads_(self, molstr): return self bas_rcut = bas_rcut + rcut_by_shells = rcut_by_shells get_lattice_Ls = pbctools.get_lattice_Ls @@ -1511,6 +1705,10 @@ def pbc_intor(self, intor, comp=None, hermi=0, kpts=None, kpt=None, # FIXME: Whether to check _built and call build? ._bas and .basis # may not be consistent. calling .build() may leads to wrong intor env. #self.build(False, False) + if self.use_loose_rcut: + return _intor_cross_screened( + intor, self, self, comp, hermi, kpts, kpt, + shls_slice, **kwargs) return intor_cross(intor, self, self, comp, hermi, kpts, kpt, shls_slice, **kwargs) @@ -1551,6 +1749,7 @@ def to_mol(self): mol = self.view(mole.Mole) delattr(mol, 'a') delattr(mol, '_mesh') + mol.enuc = None #reset nuclear energy if mol.symmetry: mol._build_symmetry() return mol diff --git a/pyscf/pbc/gto/ewald_methods.py b/pyscf/pbc/gto/ewald_methods.py new file mode 100644 index 0000000000..75d028a564 --- /dev/null +++ b/pyscf/pbc/gto/ewald_methods.py @@ -0,0 +1,293 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +import scipy +from pyscf import __config__ +from pyscf import lib +from pyscf.lib import logger +from pyscf.gto import mole +from pyscf.pbc import tools + +libpbc = lib.load_library('libpbc') + +INTERPOLATION_ORDER = getattr(__config__, 'pyscf_pbc_ewald_bspline_order', 10) + +def _bspline(u, n=4): + fac = 1. / scipy.special.factorial(n-1) + M = 0 + for k in range(n+1): + fac1 = ((-1)**k) * scipy.special.binom(n, k) + M += fac1 * ((np.maximum(u-k, 0)) ** (n-1)) + M *= fac + return M + +def _bspline_grad(u, n=4): + r''' + ... math:: + \frac{dM}{du} = M_{n-1}(u) - M_{n-1}(u-1) + ''' + dMdu = _bspline(u, n-1) - _bspline(u-1, n-1) + return dMdu + +def bspline(u, ng, n=4, deriv=0): + u = np.asarray(u).ravel() + u_floor = np.floor(u) + delta = u - u_floor + idx = [] + val = [] + for i in range(n): + idx.append(np.rint((u_floor - i) % ng).astype(int)) + val.append(delta + i) + + M = np.zeros((u.size, ng)) + for i in range(n): + M[np.arange(u.size),idx[i]] += _bspline(val[i], n) + + if deriv > 0: + if deriv > 1: + raise NotImplementedError + dM = np.zeros((u.size, ng)) + for i in range(n): + dM[np.arange(u.size),idx[i]] += _bspline_grad(val[i], n) + M = [M, dM] + + m = np.arange(ng) + b = np.exp(2*np.pi*1j*(n-1)*m/ng) + tmp = 0 + for k in range(n-1): + tmp += _bspline(k+1, n) * np.exp(2*np.pi*1j*m*k/ng) + b /= tmp + if n % 2 > 0 and ng % 2 == 0 : + b[ng//2] = 0 + return M, b, idx + +def _get_ewald_direct(cell, ew_eta=None, ew_cut=None): + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + + chargs = np.asarray(cell.atom_charges(), order='C', dtype=float) + coords = np.asarray(cell.atom_coords(), order='C') + Lall = np.asarray(cell.get_lattice_Ls(rcut=ew_cut), order='C') + + natm = len(chargs) + nL = len(Lall) + ewovrl = np.zeros([1]) + fun = getattr(libpbc, "get_ewald_direct") + fun(ewovrl.ctypes.data_as(ctypes.c_void_p), + chargs.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + Lall.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(ew_eta), ctypes.c_double(ew_cut), + ctypes.c_int(natm), ctypes.c_int(nL)) + return ewovrl[0] + +def _get_ewald_direct_nuc_grad(cell, ew_eta=None, ew_cut=None): + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + + chargs = np.asarray(cell.atom_charges(), order='C', dtype=float) + coords = np.asarray(cell.atom_coords(), order='C') + Lall = np.asarray(cell.get_lattice_Ls(rcut=ew_cut), order='C') + + natm = len(chargs) + nL = len(Lall) + grad = np.zeros([natm,3], order='C', dtype=float) + fun = getattr(libpbc, "get_ewald_direct_nuc_grad") + fun(grad.ctypes.data_as(ctypes.c_void_p), + chargs.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + Lall.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(ew_eta), ctypes.c_double(ew_cut), + ctypes.c_int(natm), ctypes.c_int(nL)) + return grad + + +# FIXME The default interpolation order may be too high +def particle_mesh_ewald(cell, ew_eta=None, ew_cut=None, + order=INTERPOLATION_ORDER): + if cell.dimension != 3: + raise NotImplementedError("Particle mesh ewald only works for 3D.") + + chargs = cell.atom_charges() + coords = cell.atom_coords() + natm = len(coords) + + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + log_precision = np.log(cell.precision / (chargs.sum()*16*np.pi**2)) + ke_cutoff = -2*ew_eta**2*log_precision + mesh = cell.cutoff_to_mesh(ke_cutoff) + + ewovrl = _get_ewald_direct(cell, ew_eta, ew_cut) + ewself = -.5 * np.dot(chargs,chargs) * 2 * ew_eta / np.sqrt(np.pi) + if cell.dimension == 3: + ewself += -.5 * np.sum(chargs)**2 * np.pi/(ew_eta**2 * cell.vol) + + b = cell.reciprocal_vectors(norm_to=1) + u = np.dot(coords, b.T) * mesh[None,:] + + Mx, bx, idx = bspline(u[:,0], mesh[0], order) + My, by, idy = bspline(u[:,1], mesh[1], order) + Mz, bz, idz = bspline(u[:,2], mesh[2], order) + + idx = np.asarray(idx).T + idy = np.asarray(idy).T + idz = np.asarray(idz).T + Mx_s = Mx[np.arange(natm)[:,None], idx] + My_s = My[np.arange(natm)[:,None], idy] + Mz_s = Mz[np.arange(natm)[:,None], idz] + + #:Q = np.einsum('i,ix,iy,iz->xyz', chargs, Mx, My, Mz) + Q = np.zeros([*mesh]) + for ia in range(len(chargs)): + Q_s = np.einsum('x,y,z->xyz', Mx_s[ia], My_s[ia], Mz_s[ia]) + Q[np.ix_(idx[ia], idy[ia], idz[ia])] += chargs[ia] * Q_s + + B = np.einsum('x,y,z->xyz', bx*bx.conj(), by*by.conj(), bz*bz.conj()) + + Gv, Gvbase, weights = cell.get_Gv_weights(mesh) + absG2 = np.einsum('ix,ix->i', Gv, Gv) + absG2[absG2==0] = 1e200 + coulG = 4*np.pi / absG2 + C = weights * coulG * np.exp(-absG2/(4*ew_eta**2)) + C = C.reshape(*mesh) + + Q_ifft = tools.ifft(Q, mesh).reshape(*mesh) + tmp = tools.fft(B * C * Q_ifft, mesh).real.reshape(*mesh) + ewg = 0.5 * np.prod(mesh) * np.einsum('xyz,xyz->', Q, tmp) + + logger.debug(cell, 'Ewald components = %.15g, %.15g, %.15g', ewovrl, ewself, ewg) + return ewovrl + ewself + ewg + +def particle_mesh_ewald_nuc_grad(cell, ew_eta=None, ew_cut=None, + order=INTERPOLATION_ORDER): + if cell.dimension != 3: + raise NotImplementedError("Particle mesh ewald only works for 3D.") + + chargs = cell.atom_charges() + coords = cell.atom_coords() + + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + log_precision = np.log(cell.precision / (chargs.sum()*16*np.pi**2)) + ke_cutoff = -2*ew_eta**2*log_precision + mesh = cell.cutoff_to_mesh(ke_cutoff) + + grad_dir = _get_ewald_direct_nuc_grad(cell, ew_eta, ew_cut) + + b = cell.reciprocal_vectors(norm_to=1) + u = np.dot(coords, b.T) * mesh[None,:] + + [Mx, dMx], bx, idx = bspline(u[:,0], mesh[0], order, deriv=1) + [My, dMy], by, idy = bspline(u[:,1], mesh[1], order, deriv=1) + [Mz, dMz], bz, idz = bspline(u[:,2], mesh[2], order, deriv=1) + + idx = np.asarray(idx).T + idy = np.asarray(idy).T + idz = np.asarray(idz).T + Mx_s = Mx[np.indices(idx.shape)[0], idx] + My_s = My[np.indices(idy.shape)[0], idy] + Mz_s = Mz[np.indices(idz.shape)[0], idz] + dMx_s = dMx[np.indices(idx.shape)[0], idx] + dMy_s = dMy[np.indices(idy.shape)[0], idy] + dMz_s = dMz[np.indices(idz.shape)[0], idz] + + Q = np.zeros([*mesh]) + for ia in range(len(chargs)): + Q_s = np.einsum('x,y,z->xyz', Mx_s[ia], My_s[ia], Mz_s[ia]) + Q[np.ix_(idx[ia], idy[ia], idz[ia])] += chargs[ia] * Q_s + + B = np.einsum('x,y,z->xyz', bx*bx.conj(), by*by.conj(), bz*bz.conj()) + + Gv, Gvbase, weights = cell.get_Gv_weights(mesh) + absG2 = np.einsum('ix,ix->i', Gv, Gv) + absG2[absG2==0] = 1e200 + coulG = 4*np.pi / absG2 + C = weights * coulG * np.exp(-absG2/(4*ew_eta**2)) + C = C.reshape(*mesh) + + Q_ifft = tools.ifft(Q, mesh).reshape(*mesh) + tmp = tools.fft(B * C * Q_ifft, mesh).real.reshape(*mesh) + + ng = np.prod(mesh) + bK = b * mesh[:,None] + grad_rec = np.zeros_like(grad_dir) + for ia in range(len(chargs)): + mask = np.ix_(idx[ia], idy[ia], idz[ia]) + dQ_s = np.einsum('x,y,z->xyz', dMx_s[ia], My_s[ia], Mz_s[ia]) + dQdr = np.einsum('x,abc->xabc', bK[0], dQ_s) + grad_rec[ia] += np.einsum('xabc,abc->x', dQdr, tmp[mask]) + + dQ_s = np.einsum('x,y,z->xyz', Mx_s[ia], dMy_s[ia], Mz_s[ia]) + dQdr = np.einsum('x,abc->xabc', bK[1], dQ_s) + grad_rec[ia] += np.einsum('xabc,abc->x', dQdr, tmp[mask]) + + dQ_s = np.einsum('x,y,z->xyz', Mx_s[ia], My_s[ia], dMz_s[ia]) + dQdr = np.einsum('x,abc->xabc', bK[2], dQ_s) + grad_rec[ia] += np.einsum('xabc,abc->x', dQdr, tmp[mask]) + + grad_rec[ia] *= chargs[ia] * ng + + # reciprocal space summation does not conserve momentum + shift = -np.sum(grad_rec, axis=0) / len(grad_rec) + logger.debug(cell, f'Shift ewald nuclear gradient by {shift} to keep momentum conservation.') + grad_rec += shift[None,:] + + grad = grad_dir + grad_rec + return grad + +def ewald_nuc_grad(cell, ew_eta=None, ew_cut=None): + chargs = np.asarray(cell.atom_charges(), order='C', dtype=float) + coords = np.asarray(cell.atom_coords(), order='C') + + if ew_eta is None or ew_cut is None: + ew_eta, ew_cut = cell.get_ewald_params() + log_precision = np.log(cell.precision / (chargs.sum()*16*np.pi**2)) + ke_cutoff = -2*ew_eta**2*log_precision + mesh = cell.cutoff_to_mesh(ke_cutoff) + + if cell.dimension == 3 and cell.use_particle_mesh_ewald: + return particle_mesh_ewald_nuc_grad(cell, ew_eta=ew_eta, ew_cut=ew_cut) + + grad_dir = _get_ewald_direct_nuc_grad(cell, ew_eta, ew_cut) + grad_rec = np.zeros_like(grad_dir, order="C") + + Gv, _, weights = cell.get_Gv_weights(mesh) + fn = getattr(libpbc, "ewald_gs_nuc_grad") + if cell.dimension != 2 or cell.low_dim_ft_type == 'inf_vacuum': + ngrids = len(Gv) + mem_avail = cell.max_memory - lib.current_memory()[0] + if mem_avail <= 0: + logger.warn(cell, "Not enough memory for computing ewald force.") + blksize = min(ngrids, max(mesh[2], int(mem_avail*1e6 / ((2+cell.natm*2)*8)))) + for ig0, ig1 in lib.prange(0, ngrids, blksize): + ngrid_sub = ig1 - ig0 + Gv_sub = np.asarray(Gv[ig0:ig1], order="C") + fn(grad_rec.ctypes.data_as(ctypes.c_void_p), + Gv_sub.ctypes.data_as(ctypes.c_void_p), + chargs.ctypes.data_as(ctypes.c_void_p), + coords.ctypes.data_as(ctypes.c_void_p), + ctypes.c_double(ew_eta), ctypes.c_double(weights), + ctypes.c_int(cell.natm), ctypes.c_size_t(ngrid_sub)) + else: + raise NotImplementedError + + grad = grad_dir + grad_rec + return grad diff --git a/pyscf/pbc/gto/neighborlist.py b/pyscf/pbc/gto/neighborlist.py new file mode 100644 index 0000000000..f4a0527ee2 --- /dev/null +++ b/pyscf/pbc/gto/neighborlist.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import ctypes +import numpy as np +from pyscf import lib +from pyscf.lib import logger + +libpbc = lib.load_library('libpbc') + +class _CNeighborPair(ctypes.Structure): + _fields_ = [("nimgs", ctypes.c_int), + ("Ls_list", ctypes.POINTER(ctypes.c_int)), + ("q_cond", ctypes.POINTER(ctypes.c_double)), + ("center", ctypes.POINTER(ctypes.c_double))] + + +class _CNeighborList(ctypes.Structure): + _fields_ = [("nish", ctypes.c_int), + ("njsh", ctypes.c_int), + ("nimgs", ctypes.c_int), + ("pairs", ctypes.POINTER(ctypes.POINTER(_CNeighborPair)))] + + +class _CNeighborListOpt(ctypes.Structure): + _fields_ = [("nl", ctypes.POINTER(_CNeighborList)), + ('fprescreen', ctypes.c_void_p)] + + +def build_neighbor_list_for_shlpairs(cell, cell1=None, Ls=None, + ish_rcut=None, jsh_rcut=None, hermi=0, + precision=None): + ''' + Build the neighbor list of shell pairs for periodic calculations. + + Arguments: + cell : :class:`pbc.gto.cell.Cell` + The :class:`Cell` instance for the bra basis functions. + cell1 : :class:`pbc.gto.cell.Cell`, optional + The :class:`Cell` instance for the ket basis functions. + If not given, both bra and ket basis functions come from cell. + Ls : (*,3) array, optional + The cartesian coordinates of the periodic images. + Default is calculated by :func:`cell.get_lattice_Ls`. + ish_rcut : (nish,) array, optional + The cutoff radii of the shells for bra basis functions. + jsh_rcut : (njsh,) array, optional + The cutoff radii of the shells for ket basis functions. + hermi : int, optional + If :math:`hermi=1`, the task list is built only for + the upper triangle of the matrix. Default is 0. + precision : float, optional + The integral precision. Default is :attr:`cell.precision`. + If both ``ish_rcut`` and ``jsh_rcut`` are given, + ``precision`` will be ignored. + + Returns: :class:`ctypes.POINTER` + The C pointer of the :class:`NeighborList` structure. + ''' + if cell1 is None: + cell1 = cell + if Ls is None: + Ls = cell.get_lattice_Ls() + Ls = np.asarray(Ls, order='C', dtype=float) + nimgs = len(Ls) + + if hermi == 1 and cell1 is not cell: + logger.warn(cell, + "Set hermi=0 because cell and cell1 are not the same.") + hermi = 0 + + ish_atm = np.asarray(cell._atm, order='C', dtype=np.int32) + ish_bas = np.asarray(cell._bas, order='C', dtype=np.int32) + ish_env = np.asarray(cell._env, order='C', dtype=float) + nish = len(ish_bas) + if ish_rcut is None: + ish_rcut = cell.rcut_by_shells(precision=precision) + assert nish == len(ish_rcut) + + if cell1 is cell: + jsh_atm = ish_atm + jsh_bas = ish_bas + jsh_env = ish_env + if jsh_rcut is None: + jsh_rcut = ish_rcut + else: + jsh_atm = np.asarray(cell1._atm, order='C', dtype=np.int32) + jsh_bas = np.asarray(cell1._bas, order='C', dtype=np.int32) + jsh_env = np.asarray(cell1._env, order='C', dtype=float) + if jsh_rcut is None: + jsh_rcut = cell1.rcut_by_shells(precision=precision) + njsh = len(jsh_bas) + assert njsh == len(jsh_rcut) + + nl = ctypes.POINTER(_CNeighborList)() + func = getattr(libpbc, "build_neighbor_list", None) + try: + func(ctypes.byref(nl), + ish_atm.ctypes.data_as(ctypes.c_void_p), + ish_bas.ctypes.data_as(ctypes.c_void_p), + ish_env.ctypes.data_as(ctypes.c_void_p), + ish_rcut.ctypes.data_as(ctypes.c_void_p), + jsh_atm.ctypes.data_as(ctypes.c_void_p), + jsh_bas.ctypes.data_as(ctypes.c_void_p), + jsh_env.ctypes.data_as(ctypes.c_void_p), + jsh_rcut.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(nish), ctypes.c_int(njsh), + Ls.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nimgs), + ctypes.c_int(hermi)) + except Exception as e: + raise RuntimeError(f"Failed to build neighbor list for shell pairs.\n{e}") + return nl + +def free_neighbor_list(nl): + func = getattr(libpbc, "del_neighbor_list", None) + try: + func(ctypes.byref(nl)) + except Exception as e: + raise RuntimeError(f"Failed to free neighbor list.\n{e}") + +def neighbor_list_to_ndarray(cell, cell1, nl): + ''' + Returns: + Ls_list: (nLtot,) ndarray + indices of Ls + Ls_idx: (2 x nish x njsh,) ndarray + starting and ending indices in Ls_list + ''' + nish = cell.nbas + njsh = cell1.nbas + Ls_list = [] + Ls_idx = [] + nLtot = 0 + for i in range(nish): + for j in range(njsh): + pair = nl.contents.pairs[i*njsh+j] + nL = pair.contents.nimgs + nLtot += nL + for iL in range(nL): + idx = pair.contents.Ls_list[iL] + Ls_list.append(idx) + if nL > 0: + Ls_idx.extend([nLtot-nL, nLtot]) + else: + Ls_idx.extend([-1,-1]) + return np.asarray(Ls_list), np.asarray(Ls_idx) + + +class NeighborListOpt(): + def __init__(self, cell): + self.cell = cell + self.nl = None + self._this = ctypes.POINTER(_CNeighborListOpt)() + libpbc.NLOpt_init(ctypes.byref(self._this)) + + def build(self, cell=None, cell1=None, Ls=None, + ish_rcut=None, jsh_rcut=None, + hermi=0, precision=None, + set_nl=True, set_optimizer=True): + if cell is None: + cell = self.cell + + if (set_nl or set_optimizer) and self.nl is None: + self.nl = build_neighbor_list_for_shlpairs( + cell, cell1=cell1, Ls=Ls, + ish_rcut=ish_rcut, jsh_rcut=jsh_rcut, + hermi=hermi, precision=precision) + libpbc.NLOpt_set_nl(self._this, self.nl) + + if set_optimizer: + libpbc.NLOpt_set_optimizer(self._this) + + def reset(self, free_nl=True): + if self.nl is not None and free_nl: + free_neighbor_list(self.nl) + self.nl = None + libpbc.NLOpt_reset(self._this) + + def __del__(self): + self.reset() + try: + libpbc.NLOpt_del(ctypes.byref(self._this)) + except AttributeError: + pass diff --git a/pyscf/pbc/gto/pseudo/pp_int.py b/pyscf/pbc/gto/pseudo/pp_int.py index 6114fb7f86..2ff3436dbc 100644 --- a/pyscf/pbc/gto/pseudo/pp_int.py +++ b/pyscf/pbc/gto/pseudo/pp_int.py @@ -29,6 +29,17 @@ from pyscf import lib from pyscf import gto from pyscf import __config__ +from pyscf.pbc.lib.kpts_helper import gamma_point + +EPS_PPL = getattr(__config__, "pbc_gto_pseudo_eps_ppl", 1e-2) +HL_TABLE_SLOTS = 7 +ATOM_OF = 0 +ANG_OF = 1 +HL_DIM_OF = 2 +HL_DATA_OF = 3 +HL_OFFSET0 = 4 +HF_OFFSET1 = 5 +HF_OFFSET2 = 6 libpbc = lib.load_library('libpbc') @@ -106,12 +117,293 @@ def get_gth_vlocG_part1(cell, Gv): def get_pp_loc_part2(cell, kpts=None): '''PRB, 58, 3641 Eq (1), integrals associated to C1, C2, C3, C4 ''' - from pyscf.pbc.df.aft import _IntPPBuilder - vpploc = _IntPPBuilder(cell, kpts).get_pp_loc_part2() + if kpts is None or gamma_point(kpts): + vpploc = [get_pp_loc_part2_gamma(cell)] + else: + from pyscf.pbc.df.aft import _IntPPBuilder + vpploc = _IntPPBuilder(cell, kpts).get_pp_loc_part2() if kpts is None or numpy.shape(kpts) == (3,): vpploc = vpploc[0] return vpploc + +def get_pp_loc_part2_gamma(cell): + from pyscf.pbc.df import incore + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + + fake_cells = {} + for cn in range(1, 5): + fake_cell = fake_cell_vloc(cell, cn) + fake_cell.precision = EPS_PPL + if fake_cell.nbas > 0: + fake_cells[cn] = fake_cell + + if not fake_cells: + if any(cell.atom_symbol(ia) in cell._pseudo for ia in range(cell.natm)): + pass + else: + lib.logger.warn(cell, 'cell.pseudo was specified but its elements %s ' + 'were not found in the system.', cell._pseudo.keys()) + return 0 + + intors = ('int3c2e', 'int3c1e', 'int3c1e_r2_origk', + 'int3c1e_r4_origk', 'int3c1e_r6_origk') + kptij_lst = numpy.zeros((1,2,3)) + Ls = cell.get_lattice_Ls() + buf = None + for i, (cn, fake_cell) in enumerate(fake_cells.items()): + neighbor_list = build_neighbor_list_for_shlpairs(fake_cell, cell, Ls) + v = incore.aux_e2_sum_auxbas(cell, fake_cell, intors[cn], aosym='s2', comp=1, + kptij_lst=kptij_lst, neighbor_list=neighbor_list) + if i == 0: + buf = v + else: + buf = numpy.add(buf, v, out=buf) + v = None + free_neighbor_list(neighbor_list) + + vpploc = lib.unpack_tril(buf) + return vpploc + + +# TODO add k-point sampling +def vpploc_part2_nuc_grad(cell, dm, kpts=None): + ''' + Nuclear gradients of the 2nd part of the local part of + the GTH pseudo potential, contracted with the density matrix. + ''' + from pyscf.pbc.df import incore + from pyscf.pbc.gto import build_neighbor_list_for_shlpairs, free_neighbor_list + if kpts is not None and not gamma_point(kpts): + raise NotImplementedError("k-point sampling not available") + + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + kptij_lst = numpy.hstack((kpts_lst,kpts_lst)).reshape(-1,2,3) + + intors = ('int3c2e_ip1', 'int3c1e_ip1', 'int3c1e_ip1_r2_origk', + 'int3c1e_ip1_r4_origk', 'int3c1e_ip1_r6_origk') + + Ls = cell.get_lattice_Ls() + count = 0 + grad = 0 + for cn in range(1, 5): + fakecell = fake_cell_vloc(cell, cn) + fakecell.precision = EPS_PPL + if fakecell.nbas > 0: + neighbor_list = build_neighbor_list_for_shlpairs(fakecell, cell, Ls) + buf = incore.int3c1e_nuc_grad(cell, fakecell, dm, intors[cn], + kptij_lst=kptij_lst, neighbor_list=neighbor_list) + if count == 0: + grad = buf + else: + grad = numpy.add(grad, buf, out=grad) + buf = None + count += 1 + free_neighbor_list(neighbor_list) + grad *= -2 + return grad + + +def _prepare_hl_data(fakecell, hl_blocks): + offset = [0] * 3 + hl_table = numpy.empty((len(hl_blocks),HL_TABLE_SLOTS), order='C', dtype=numpy.int32) + hl_data = [] + ptr = 0 + for ib, hl in enumerate(hl_blocks): + hl_table[ib,ATOM_OF] = fakecell._bas[ib,0] + hl_table[ib,ANG_OF] = l = fakecell.bas_angular(ib) + hl_dim = hl.shape[0] + hl_table[ib,HL_DIM_OF], hl_table[ib,HL_DATA_OF] = hl_dim, ptr + ptr += hl_dim**2 + hl_data.extend(list(hl.ravel())) + nd = 2 * l + 1 + for i in range(hl_dim): + hl_table[ib, i+HL_OFFSET0] = offset[i] + offset[i] += nd + hl_data = numpy.asarray(hl_data, order='C', dtype=numpy.double) + return hl_table, hl_data + + +# TODO add k-point sampling +def _contract_ppnl(cell, fakecell, hl_blocks, ppnl_half, comp=1, kpts=None): + from pyscf.pbc.gto import NeighborListOpt + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + hl_table, hl_data = _prepare_hl_data(fakecell, hl_blocks) + + opt = NeighborListOpt(fakecell) + opt.build(fakecell, cell) + + shls_slice = (0, cell.nbas, 0, cell.nbas) + key = 'cart' if cell.cart else 'sph' + ao_loc = gto.moleintor.make_loc(cell._bas, key) + + ppnl = [] + nao = cell.nao_nr() + nao_pair = nao * (nao+1) // 2 + for k, kpt in enumerate(kpts_lst): + ppnl_half0 = ppnl_half1 = ppnl_half2 = None + if len(ppnl_half[0]) > 0: + ppnl_half0 = ppnl_half[0][k] + if len(ppnl_half[1]) > 0: + ppnl_half1 = ppnl_half[1][k] + if len(ppnl_half[2]) > 0: + ppnl_half2 = ppnl_half[2][k] + + if gamma_point(kpt): + if ppnl_half0 is not None: + ppnl_half0 = ppnl_half0.real + if ppnl_half1 is not None: + ppnl_half1 = ppnl_half1.real + if ppnl_half2 is not None: + ppnl_half2 = ppnl_half2.real + buf = numpy.empty([nao_pair], order='C', dtype=numpy.double) + fill = getattr(libpbc, 'ppnl_fill_gs2') + else: + buf = numpy.empty([nao_pair], order='C', dtype=numpy.complex128) + raise NotImplementedError + + ppnl_half0 = numpy.asarray(ppnl_half0, order='C') + ppnl_half1 = numpy.asarray(ppnl_half1, order='C') + ppnl_half2 = numpy.asarray(ppnl_half2, order='C') + + drv = getattr(libpbc, "contract_ppnl", None) + try: + drv(fill, buf.ctypes.data_as(ctypes.c_void_p), + ppnl_half0.ctypes.data_as(ctypes.c_void_p), + ppnl_half1.ctypes.data_as(ctypes.c_void_p), + ppnl_half2.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(comp), (ctypes.c_int*4)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), + hl_table.ctypes.data_as(ctypes.c_void_p), + hl_data.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(len(hl_blocks)), opt._this) + except Exception as e: + raise RuntimeError(f"Failed to compute non-local pseudo-potential.\n{e}") + + ppnl_k = lib.unpack_tril(buf) + ppnl.append(ppnl_k) + + if kpts is None or numpy.shape(kpts) == (3,): + ppnl = ppnl[0] + return ppnl + + +# TODO add k-point sampling +def _contract_ppnl_nuc_grad(cell, fakecell, dms, hl_blocks, ppnl_half, ppnl_half_ip2, + comp=3, kpts=None, hl_table=None, hl_data=None): + from pyscf.pbc.gto import NeighborListOpt + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + if hl_table is None: + hl_table, hl_data = _prepare_hl_data(fakecell, hl_blocks) + + opt = NeighborListOpt(fakecell) + opt.build(fakecell, cell) + + nkpts = len(kpts_lst) + nao = cell.nao + dms = dms.reshape(nkpts, nao, nao) + shls_slice = (0, cell.nbas, 0, cell.nbas) + bas = numpy.asarray(cell._bas, order='C', dtype=numpy.int32) + key = 'cart' if cell.cart else 'sph' + ao_loc = gto.moleintor.make_loc(bas, key) + + grad = [] + for k, kpt in enumerate(kpts_lst): + dm = dms[k] + naux = [0] * 3 + ppnl_half0 = ppnl_half1 = ppnl_half2 = None + if len(ppnl_half[0]) > 0: + ppnl_half0 = ppnl_half[0][k] + naux[0] = ppnl_half0.shape[0] + if len(ppnl_half[1]) > 0: + ppnl_half1 = ppnl_half[1][k] + naux[1] = ppnl_half1.shape[0] + if len(ppnl_half[2]) > 0: + ppnl_half2 = ppnl_half[2][k] + naux[2] = ppnl_half2.shape[0] + + ppnl_half_ip2_0 = ppnl_half_ip2_1 = ppnl_half_ip2_2 = None + if len(ppnl_half_ip2[0]) > 0: + ppnl_half_ip2_0 = ppnl_half_ip2[0][k] + assert naux[0] == ppnl_half_ip2_0.shape[1] + if len(ppnl_half_ip2[1]) > 0: + ppnl_half_ip2_1 = ppnl_half_ip2[1][k] + assert naux[1] == ppnl_half_ip2_1.shape[1] + if len(ppnl_half_ip2[2]) > 0: + ppnl_half_ip2_2 = ppnl_half_ip2[2][k] + assert naux[2] == ppnl_half_ip2_2.shape[1] + + naux = numpy.asarray(naux, dtype=numpy.int32) + + if gamma_point(kpt): + dm = dm.real + if ppnl_half0 is not None: + ppnl_half0 = ppnl_half0.real + ppnl_half_ip2_0 = ppnl_half_ip2_0.real + if ppnl_half1 is not None: + ppnl_half1 = ppnl_half1.real + ppnl_half_ip2_1 = ppnl_half_ip2_1.real + if ppnl_half2 is not None: + ppnl_half2 = ppnl_half2.real + ppnl_half_ip2_2 = ppnl_half_ip2_2.real + grad_k = numpy.zeros([cell.natm, comp], order='C', dtype=numpy.double) + fill = getattr(libpbc, 'ppnl_nuc_grad_fill_gs1') + else: + grad_k = numpy.empty([cell.natm, comp], order='C', dtype=numpy.complex128) + raise NotImplementedError + + dm = numpy.asarray(dm, order='C') + ppnl_half0 = numpy.asarray(ppnl_half0, order='C') + ppnl_half1 = numpy.asarray(ppnl_half1, order='C') + ppnl_half2 = numpy.asarray(ppnl_half2, order='C') + ppnl_half_ip2_0 = numpy.asarray(ppnl_half_ip2_0, order='C') + ppnl_half_ip2_1 = numpy.asarray(ppnl_half_ip2_1, order='C') + ppnl_half_ip2_2 = numpy.asarray(ppnl_half_ip2_2, order='C') + + drv = getattr(libpbc, "contract_ppnl_nuc_grad", None) + try: + drv(fill, + grad_k.ctypes.data_as(ctypes.c_void_p), + dm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(comp), + ppnl_half0.ctypes.data_as(ctypes.c_void_p), + ppnl_half1.ctypes.data_as(ctypes.c_void_p), + ppnl_half2.ctypes.data_as(ctypes.c_void_p), + ppnl_half_ip2_0.ctypes.data_as(ctypes.c_void_p), + ppnl_half_ip2_1.ctypes.data_as(ctypes.c_void_p), + ppnl_half_ip2_2.ctypes.data_as(ctypes.c_void_p), + hl_table.ctypes.data_as(ctypes.c_void_p), + hl_data.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(len(hl_blocks)), + naux.ctypes.data_as(ctypes.c_void_p), + (ctypes.c_int*4)(*shls_slice), + ao_loc.ctypes.data_as(ctypes.c_void_p), + bas.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(cell.natm), opt._this) + except Exception as e: + raise RuntimeError(f"Failed to compute non-local pp nuclear gradient.\n{e}") + grad.append(grad_k) + + grad_tot = 0 + if nkpts == 1: + grad_tot = grad[0] + else: + for k in range(nkpts): + grad_tot += grad[k] + grad_tot = grad_tot.real + return grad_tot + + def get_pp_nl(cell, kpts=None): if kpts is None: kpts_lst = numpy.zeros((1,3)) @@ -122,6 +414,10 @@ def get_pp_nl(cell, kpts=None): fakecell, hl_blocks = fake_cell_vnl(cell) ppnl_half = _int_vnl(cell, fakecell, hl_blocks, kpts_lst) nao = cell.nao_nr() + + if gamma_point(kpts_lst): + return _contract_ppnl(cell, fakecell, hl_blocks, ppnl_half, kpts=kpts) + buf = numpy.empty((3*9*nao), dtype=numpy.complex128) # We set this equal to zeros in case hl_blocks loop is skipped @@ -148,7 +444,32 @@ def get_pp_nl(cell, kpts=None): return ppnl -def fake_cell_vloc(cell, cn=0): +def vppnl_nuc_grad(cell, dm, kpts=None): + ''' + Nuclear gradients of the non-local part of the GTH pseudo potential, + contracted with the density matrix. + ''' + if kpts is None: + kpts_lst = numpy.zeros((1,3)) + else: + kpts_lst = numpy.reshape(kpts, (-1,3)) + + fakecell, hl_blocks = fake_cell_vnl(cell) + intors = ('int1e_ipovlp', 'int1e_r2_origi_ip2', 'int1e_r4_origi_ip2') + ppnl_half = _int_vnl(cell, fakecell, hl_blocks, kpts_lst) + ppnl_half_ip2 = _int_vnl(cell, fakecell, hl_blocks, kpts_lst, intors, comp=3) + # int1e_ipovlp computes ip1 so multiply -1 to get ip2 + if len(ppnl_half_ip2[0]) > 0: + for k, kpt in enumerate(kpts_lst): + ppnl_half_ip2[0][k] *= -1 + + grad = _contract_ppnl_nuc_grad(cell, fakecell, dm, hl_blocks, + ppnl_half, ppnl_half_ip2, kpts=kpts) + grad *= -2 + return grad + + +def fake_cell_vloc(cell, cn=0, atm_id=None): '''Generate fake cell for V_{loc}. Each term of V_{loc} (erf, C_1, C_2, C_3, C_4) is a gaussian type @@ -158,17 +479,23 @@ def fake_cell_vloc(cell, cn=0): The kwarg cn indiciates which term to generate for the fake cell. If cn = 0, the erf term is generated. C_1,..,C_4 are generated with cn = 1..4 ''' - fake_env = [cell.atom_coords().ravel()] - fake_atm = cell._atm.copy() - fake_atm[:,gto.PTR_COORD] = numpy.arange(0, cell.natm*3, 3) - ptr = cell.natm * 3 + if atm_id is None: + atm_id = numpy.arange(cell.natm) + else: + atm_id = numpy.asarray(atm_id) + natm = len(atm_id) + + fake_env = [cell.atom_coords()[atm_id].ravel()] + fake_atm = cell._atm[atm_id].copy().reshape(natm,-1) + fake_atm[:,gto.PTR_COORD] = numpy.arange(0, natm*3, 3) + ptr = natm * 3 fake_bas = [] half_sph_norm = .5/numpy.pi**.5 - for ia in range(cell.natm): - if cell.atom_charge(ia) == 0: # pass ghost atoms + for ia, atm in enumerate(atm_id): + if cell.atom_charge(atm) == 0: # pass ghost atoms continue - symb = cell.atom_symbol(ia) + symb = cell.atom_symbol(atm) if cn == 0: if symb in cell._pseudo: pp = cell._pseudo[symb] @@ -196,6 +523,7 @@ def fake_cell_vloc(cell, cn=0): fakecell._env = numpy.asarray(numpy.hstack(fake_env), dtype=numpy.double) return fakecell + # sqrt(Gamma(l+1.5)/Gamma(l+2i+1.5)) _PLI_FAC = 1/numpy.sqrt(numpy.array(( (1, 3.75 , 59.0625 ), # l = 0, @@ -249,12 +577,14 @@ def fake_cell_vnl(cell): fakecell = cell.copy(deep=False) fakecell._atm = numpy.asarray(fake_atm, dtype=numpy.int32) - fakecell._bas = numpy.asarray(fake_bas, dtype=numpy.int32) + fakecell._bas = numpy.asarray(fake_bas, dtype=numpy.int32).reshape(-1, gto.BAS_SLOTS) fakecell._env = numpy.asarray(numpy.hstack(fake_env), dtype=numpy.double) return fakecell, hl_blocks -def _int_vnl(cell, fakecell, hl_blocks, kpts): +def _int_vnl(cell, fakecell, hl_blocks, kpts, intors=None, comp=1): '''Vnuc - Vloc''' + if intors is None: + intors = ['int1e_ovlp', 'int1e_r2_origi', 'int1e_r4_origi'] rcut = max(cell.rcut, fakecell.rcut) Ls = cell.get_lattice_Ls(rcut=rcut) nimgs = len(Ls) @@ -262,6 +592,7 @@ def _int_vnl(cell, fakecell, hl_blocks, kpts): nkpts = len(kpts) fill = getattr(libpbc, 'PBCnr2c_fill_ks1') + # TODO add screening cintopt = lib.c_null_ptr() def int_ket(_bas, intor): @@ -279,8 +610,10 @@ def int_ket(_bas, intor): ao_loc = gto.moleintor.make_loc(bas, intor) ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] - out = numpy.empty((nkpts,ni,nj), dtype=numpy.complex128) - comp = 1 + if comp == 1: + out = numpy.empty((nkpts,ni,nj), dtype=numpy.complex128) + else: + out = numpy.empty((nkpts,comp,ni,nj), dtype=numpy.complex128) fintor = getattr(gto.moleintor.libcgto, intor) @@ -297,7 +630,7 @@ def int_ket(_bas, intor): return out hl_dims = numpy.asarray([len(hl) for hl in hl_blocks]) - out = (int_ket(fakecell._bas[hl_dims>0], 'int1e_ovlp'), - int_ket(fakecell._bas[hl_dims>1], 'int1e_r2_origi'), - int_ket(fakecell._bas[hl_dims>2], 'int1e_r4_origi')) + out = (int_ket(fakecell._bas[hl_dims>0], intors[0]), + int_ket(fakecell._bas[hl_dims>1], intors[1]), + int_ket(fakecell._bas[hl_dims>2], intors[2])) return out diff --git a/pyscf/pbc/gto/pseudo/test/test_pp.py b/pyscf/pbc/gto/pseudo/test/test_pp.py index c00057a064..95b343bbf6 100644 --- a/pyscf/pbc/gto/pseudo/test/test_pp.py +++ b/pyscf/pbc/gto/pseudo/test/test_pp.py @@ -22,6 +22,7 @@ from pyscf.pbc.dft import numint from pyscf.pbc.gto import pseudo from pyscf.pbc.gto.pseudo import pp_int +from pyscf.data.nist import BOHR def get_pp_loc_part2(cell, kpt=np.zeros(3)): @@ -244,7 +245,42 @@ def test_pp(self): v1 = pseudo.get_pp(cell, k) self.assertAlmostEqual(abs(v0-v1).max(), 0, 6) + def test_pp_nuc_grad(self): + cell = pbcgto.Cell() + cell.atom = 'H 0 0 0; Na 0 0 0.8' + cell.a = np.diag([6,6,6]) + cell.basis='gth-szv' + cell.pseudo='gth-pade' + cell.ke_cutoff=200 + cell.build() + + cellp = cell.copy() + cellp.atom = 'H 0 0 0; Na 0 0 0.8001' + cellp.build() + cellm = cell.copy() + cellm.atom = 'H 0 0 0; Na 0 0 0.7999' + cellm.build() + + np.random.seed(1) + dm = np.random.rand(cell.nao, cell.nao) + dm = (dm + dm.T) / 2 + + # local_part2 + vp = pp_int.get_pp_loc_part2(cellp) + vm = pp_int.get_pp_loc_part2(cellm) + v_fd = (vp - vm) / (0.0002 / BOHR) + grad = pp_int.vpploc_part2_nuc_grad(cell, dm)[1,2] + grad_fd = np.einsum("ij,ij->", v_fd, dm) + self.assertAlmostEqual(abs(grad - grad_fd), 0, 7) + + # non-local + vp = pp_int.get_pp_nl(cellp) + vm = pp_int.get_pp_nl(cellm) + v_fd = (vp - vm) / (0.0002 / BOHR) + grad = pp_int.vppnl_nuc_grad(cell, dm)[1,2] + grad_fd = np.einsum("ij,ij->", v_fd, dm) + self.assertAlmostEqual(abs(grad - grad_fd), 0, 7) if __name__ == '__main__': print("Full Tests for pbc.gto.pseudo") diff --git a/pyscf/pbc/gto/test/test_cell.py b/pyscf/pbc/gto/test/test_cell.py index 5dee058140..bd7a0e067f 100644 --- a/pyscf/pbc/gto/test/test_cell.py +++ b/pyscf/pbc/gto/test/test_cell.py @@ -25,6 +25,7 @@ from pyscf.pbc import gto as pgto from pyscf.pbc.gto import ecp from pyscf.pbc.tools import pbc as pbctools +from pyscf.pbc.gto import ewald_methods def setUpModule(): @@ -252,6 +253,30 @@ def test_ewald_2d(self): # eref = cell.to_mol().energy_nuc() # self.assertAlmostEqual(cell.ewald(), eref, 2) + def test_particle_mesh_ewald(self): + cell = pgto.Cell() + cell.a = np.diag([10.,]*3) + cell.atom = ''' + O 5.84560 5.21649 5.10372 + H 6.30941 5.30070 5.92953 + H 4.91429 5.26674 5.28886 + ''' + cell.pseudo = 'gth-pade' + cell.verbose = 0 + cell.build() + + cell1 = cell.copy() + cell1.use_particle_mesh_ewald = True + cell1.build() + + e0 = cell.ewald() + e1 = cell1.ewald() + self.assertAlmostEqual(e0, e1, 6) + + g0 = ewald_methods.ewald_nuc_grad(cell) + g1 = ewald_methods.ewald_nuc_grad(cell1) + self.assertAlmostEqual(abs(g1-g0).max(), 0, 6) + def test_pbc_intor(self): numpy.random.seed(12) kpts = numpy.random.random((4,3)) diff --git a/pyscf/pbc/scf/hf.py b/pyscf/pbc/scf/hf.py index 8225d778b6..f6c91336ed 100644 --- a/pyscf/pbc/scf/hf.py +++ b/pyscf/pbc/scf/hf.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2019 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2024 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -53,23 +53,24 @@ def get_ovlp(cell, kpt=np.zeros(3)): # Avoid pbcopt's prescreening in the lattice sum, for better accuracy s = cell.pbc_intor('int1e_ovlp', hermi=0, kpts=kpt, pbcopt=lib.c_null_ptr()) - s = lib.asarray(s) + s = np.asarray(s) hermi_error = abs(s - np.rollaxis(s.conj(), -1, -2)).max() if hermi_error > cell.precision and hermi_error > 1e-12: logger.warn(cell, '%.4g error found in overlap integrals. ' 'cell.precision or cell.rcut can be adjusted to ' 'improve accuracy.', hermi_error) - cond = np.max(lib.cond(s)) - if cond * precision > 1e2: - prec = 1e7 / cond - rmin = gto.estimate_rcut(cell, prec*1e-5) - logger.warn(cell, 'Singularity detected in overlap matrix. ' - 'Integral accuracy may be not enough.\n ' - 'You can adjust cell.precision or cell.rcut to ' - 'improve accuracy. Recommended settings are\n ' - 'cell.precision < %.2g\n ' - 'cell.rcut > %.4g', prec, rmin) + if cell.verbose >= logger.DEBUG: + cond = np.max(lib.cond(s)) + if cond * precision > 1e2: + prec = 1e7 / cond + rmin = gto.estimate_rcut(cell, prec*1e-5) + logger.warn(cell, 'Singularity detected in overlap matrix. ' + 'Integral accuracy may be not enough.\n ' + 'You can adjust cell.precision or cell.rcut to ' + 'improve accuracy. Recommended settings are\n ' + 'cell.precision < %.2g\n ' + 'cell.rcut > %.4g', prec, rmin) return s @@ -615,11 +616,18 @@ def dump_flags(self, verbose=None): return self def check_sanity(self): - mol_hf.SCF.check_sanity(self) + lib.StreamObject.check_sanity(self) if (isinstance(self.exxdiv, str) and self.exxdiv.lower() != 'ewald' and isinstance(self.with_df, df.df.DF)): logger.warn(self, 'exxdiv %s is not supported in DF or MDF', self.exxdiv) + + if self.verbose >= logger.DEBUG: + s = self.get_ovlp() + cond = np.max(lib.cond(s)) + if cond * 1e-17 > self.conv_tol: + logger.warn(self, 'Singularity detected in overlap matrix (condition number = %4.3g). ' + 'SCF may be inaccurate and hard to converge.', cond) return self def get_hcore(self, cell=None, kpt=None): @@ -738,7 +746,7 @@ def get_jk_incore(self, cell=None, dm=None, hermi=1, kpt=None, omega=None, return self.get_jk(cell, dm, hermi, kpt) def energy_nuc(self): - return self.cell.energy_nuc() + return self.cell.enuc @lib.with_doc(dip_moment.__doc__) def dip_moment(self, cell=None, dm=None, unit='Debye', verbose=logger.NOTE, @@ -758,10 +766,10 @@ def _finalize(self): makov_payne_correction(self) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): if cell is None: cell = self.cell dm = mol_hf.SCF.get_init_guess(self, cell, key) - dm = normalize_dm_(self, dm) + dm = normalize_dm_(self, dm, s1e) return dm def init_guess_by_1e(self, cell=None): @@ -914,12 +922,14 @@ def _format_jks(vj, dm, kpts_band): vj = vj[0] return vj -def normalize_dm_(mf, dm): +def normalize_dm_(mf, dm, s1e=None): ''' Scale density matrix to make it produce the correct number of electrons. ''' cell = mf.cell - ne = np.einsum('ij,ji->', dm, mf.get_ovlp(cell)).real + if s1e is None: + s1e = mf.get_ovlp(cell) + ne = lib.einsum('ij,ji->', dm, s1e).real if abs(ne - cell.nelectron) > 0.01: logger.debug(mf, 'Big error detected in the electron number ' 'of initial guess density matrix (Ne/cell = %g)!\n' diff --git a/pyscf/pbc/scf/khf.py b/pyscf/pbc/scf/khf.py index 1ef2d88908..89124e8af4 100644 --- a/pyscf/pbc/scf/khf.py +++ b/pyscf/pbc/scf/khf.py @@ -496,7 +496,7 @@ def dump_flags(self, verbose=None): self.with_df.dump_flags(verbose) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): raise NotImplementedError def init_guess_by_1e(self, cell=None): @@ -524,10 +524,10 @@ def get_jk(self, cell=None, dm_kpts=None, hermi=1, kpts=None, kpts_band=None, cpu0 = (logger.process_clock(), logger.perf_counter()) if self.rsjk: vj, vk = self.rsjk.get_jk(dm_kpts, hermi, kpts, kpts_band, - with_j, with_k, omega, self.exxdiv) + with_j, with_k, omega=omega, exxdiv=self.exxdiv) else: vj, vk = self.with_df.get_jk(dm_kpts, hermi, kpts, kpts_band, - with_j, with_k, omega, self.exxdiv) + with_j, with_k, omega=omega, exxdiv=self.exxdiv) logger.timer(self, 'vj and vk', *cpu0) return vj, vk @@ -700,7 +700,9 @@ def check_sanity(self): 'found in KRHF method.', cell.nelec, nkpts) return KSCF.check_sanity(self) - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm = mol_hf.SCF.get_init_guess(self, cell, key) nkpts = len(self.kpts) if dm.ndim == 2: @@ -708,7 +710,7 @@ def get_init_guess(self, cell=None, key='minao'): dm = np.repeat(dm[None,:,:], nkpts, axis=0) dm_kpts = dm - ne = np.einsum('kij,kji->', dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('kij,kji->', dm_kpts, s1e).real # FIXME: consider the fractional num_electron or not? This maybe # relate to the charged system. nelectron = float(self.cell.tot_electrons(nkpts)) diff --git a/pyscf/pbc/scf/khf_ksymm.py b/pyscf/pbc/scf/khf_ksymm.py index baaf5543a6..69e4d5c5d1 100644 --- a/pyscf/pbc/scf/khf_ksymm.py +++ b/pyscf/pbc/scf/khf_ksymm.py @@ -343,14 +343,16 @@ class KsymAdaptedKRHF(KsymAdaptedKSCF, khf.KRHF): to_ks = khf.KRHF.to_ks convert_from_ = khf.KRHF.convert_from_ - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm_kpts = mol_hf.SCF.get_init_guess(self, cell, key) if dm_kpts.ndim == 2: dm_kpts = np.asarray([dm_kpts]*self.kpts.nkpts_ibz) elif len(dm_kpts) != self.kpts.nkpts_ibz: dm_kpts = dm_kpts[self.kpts.ibz2bz] - ne = np.einsum('k,kij,kji', self.kpts.weights_ibz, dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('k,kij,kji', self.kpts.weights_ibz, dm_kpts, s1e).real nkpts = self.kpts.nkpts ne *= nkpts nelectron = float(self.cell.tot_electrons(nkpts)) diff --git a/pyscf/pbc/scf/kuhf.py b/pyscf/pbc/scf/kuhf.py index af56a2ced3..eae04c0713 100644 --- a/pyscf/pbc/scf/kuhf.py +++ b/pyscf/pbc/scf/kuhf.py @@ -416,7 +416,9 @@ def dump_flags(self, verbose=None): 'alpha = %d beta = %d', *self.nelec) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm_kpts = mol_hf.SCF.get_init_guess(self, cell, key) assert dm_kpts.shape[0] == 2 nkpts = len(self.kpts) @@ -424,7 +426,7 @@ def get_init_guess(self, cell=None, key='minao'): # dm[spin,nao,nao] at gamma point -> dm_kpts[spin,nkpts,nao,nao] dm_kpts = np.repeat(dm_kpts[:,None,:,:], nkpts, axis=1) - ne = np.einsum('xkij,kji->x', dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('xkij,kji->x', dm_kpts, s1e).real nelec = np.asarray(self.nelec) if np.any(abs(ne - nelec) > 0.01*nkpts): logger.debug(self, 'Big error detected in the electron number ' diff --git a/pyscf/pbc/scf/kuhf_ksymm.py b/pyscf/pbc/scf/kuhf_ksymm.py index 310de63289..4e10ed0fdc 100644 --- a/pyscf/pbc/scf/kuhf_ksymm.py +++ b/pyscf/pbc/scf/kuhf_ksymm.py @@ -155,7 +155,9 @@ def dump_flags(self, verbose=None): 'alpha = %d beta = %d', *self.nelec) return self - def get_init_guess(self, cell=None, key='minao'): + def get_init_guess(self, cell=None, key='minao', s1e=None): + if s1e is None: + s1e = self.get_ovlp(cell) dm_kpts = mol_hf.SCF.get_init_guess(self, cell, key) assert dm_kpts.shape[0]==2 if dm_kpts.ndim != 4: @@ -165,7 +167,7 @@ def get_init_guess(self, cell=None, key='minao'): elif dm_kpts.shape[1] != self.kpts.nkpts_ibz: dm_kpts = dm_kpts[:,self.kpts.ibz2bz] - ne = np.einsum('k,xkij,kji->x', self.kpts.weights_ibz, dm_kpts, self.get_ovlp(cell)).real + ne = lib.einsum('k,xkij,kji->x', self.kpts.weights_ibz, dm_kpts, s1e).real nkpts = self.kpts.nkpts ne *= nkpts nelec = np.asarray(self.nelec) diff --git a/pyscf/pbc/scf/test/test_hf.py b/pyscf/pbc/scf/test/test_hf.py index fe3387468b..3e47561cee 100644 --- a/pyscf/pbc/scf/test/test_hf.py +++ b/pyscf/pbc/scf/test/test_hf.py @@ -20,6 +20,7 @@ import tempfile import numpy from pyscf import lib +from pyscf.scf import atom_hf from pyscf.pbc import gto as pbcgto from pyscf.pbc.scf import hf as pbchf import pyscf.pbc.scf as pscf @@ -511,7 +512,7 @@ def test_init_guess_by_1e(self): self.assertEqual(dm.ndim, 3) self.assertAlmostEqual(lib.fp(dm), 0.025922864381755062, 6) - def test_init_guess_by_atom(self): + def test_init_guess_by_minao(self): with lib.temporary_env(cell, dimension=1): dm = mf.get_init_guess(key='minao') kdm = kmf.get_init_guess(key='minao') @@ -521,6 +522,29 @@ def test_init_guess_by_atom(self): self.assertEqual(kdm.ndim, 3) self.assertAlmostEqual(lib.fp(kdm), -1.714952331211208, 8) + def test_init_guess_by_atom(self): + with lib.temporary_env(cell, dimension=1): + dm = mf.get_init_guess(key='atom') + kdm = kmf.get_init_guess(key='atom') + + self.assertAlmostEqual(lib.fp(dm), 0.18074522075843902, 7) + + self.assertEqual(kdm.ndim, 3) + self.assertAlmostEqual(lib.fp(dm), 0.18074522075843902, 7) + + def test_atom_hf_with_pp(self): + mol = pbcgto.Cell() + mol.build( + verbose = 7, + output = '/dev/null', + atom = 'O 0 0 0; H 0 0 -1; H 0 0 1', + a = [[5, 0, 0], [0, 5, 0], [0, 0, 5]], + basis = 'gth-dzvp', + pseudo = 'gth-pade') + scf_result = atom_hf.get_atm_nrhf(mol) + self.assertAlmostEqual(scf_result['O'][0], -15.193243796069835, 9) + self.assertAlmostEqual(scf_result['H'][0], -0.49777509423571864, 9) + def test_jk(self): nao = cell.nao numpy.random.seed(2) diff --git a/pyscf/pbc/scf/uhf.py b/pyscf/pbc/scf/uhf.py index b9d9b1407d..0d247f745e 100644 --- a/pyscf/pbc/scf/uhf.py +++ b/pyscf/pbc/scf/uhf.py @@ -221,10 +221,13 @@ def dip_moment(self, cell=None, dm=None, unit='Debye', verbose=logger.NOTE, rho = self.get_rho(dm) return dip_moment(cell, dm, unit, verbose, rho=rho, kpt=self.kpt, **kwargs) - def get_init_guess(self, cell=None, key='minao'): - if cell is None: cell = self.cell + def get_init_guess(self, cell=None, key='minao', s1e=None): + if cell is None: + cell = self.cell + if s1e is None: + s1e = self.get_ovlp(cell) dm = mol_uhf.UHF.get_init_guess(self, cell, key) - ne = np.einsum('xij,ji->x', dm, self.get_ovlp(cell)).real + ne = np.einsum('xij,ji->x', dm, s1e).real nelec = self.nelec if np.any(abs(ne - nelec) > 0.01): logger.debug(self, 'Big error detected in the electron number ' diff --git a/pyscf/pbc/symm/geom.py b/pyscf/pbc/symm/geom.py index 74119a4483..ae698d2347 100644 --- a/pyscf/pbc/symm/geom.py +++ b/pyscf/pbc/symm/geom.py @@ -77,7 +77,7 @@ def search_space_group_ops(cell, rotations=None, tol=SYMPREC): ''' if rotations is None: rotations = search_point_group_ops(cell, tol=tol) a = cell.lattice_vectors() - coords = cell.get_scaled_positions() + coords = cell.get_scaled_atom_coords() atmgrp = mole.atom_types(cell._atom, magmom=cell.magmom) atmgrp_spin_inv = {} #spin up and down inverted has_spin = False diff --git a/pyscf/pbc/symm/pyscf_spglib.py b/pyscf/pbc/symm/pyscf_spglib.py index 3a0d1442cb..f87117a8dd 100644 --- a/pyscf/pbc/symm/pyscf_spglib.py +++ b/pyscf/pbc/symm/pyscf_spglib.py @@ -29,7 +29,7 @@ def cell_to_spgcell(cell): Convert PySCF Cell object to spglib cell object ''' a = cell.lattice_vectors() - atm_pos = cell.get_scaled_positions() + atm_pos = cell.get_scaled_atom_coords() atm_num = [] from pyscf.data import elements for symbol in cell.elements: diff --git a/pyscf/pbc/symm/symmetry.py b/pyscf/pbc/symm/symmetry.py index c79bc81167..ce29e3afac 100644 --- a/pyscf/pbc/symm/symmetry.py +++ b/pyscf/pbc/symm/symmetry.py @@ -219,7 +219,7 @@ def dump_info(self): def _get_phase(cell, op, kpt_scaled, ignore_phase=False, tol=SYMPREC): kpt_scaled = op.a2b(cell).dot_rot(kpt_scaled) - coords_scaled = cell.get_scaled_positions().reshape(-1,3) + coords_scaled = cell.get_scaled_atom_coords().reshape(-1,3) natm = coords_scaled.shape[0] phase = np.ones((natm,), dtype=np.complex128) atm_map = np.arange(natm) diff --git a/pyscf/pbc/tools/pbc.py b/pyscf/pbc/tools/pbc.py index 7ca867fd21..20d45fe692 100644 --- a/pyscf/pbc/tools/pbc.py +++ b/pyscf/pbc/tools/pbc.py @@ -14,6 +14,7 @@ # limitations under the License. import warnings +import ctypes import numpy as np import scipy.linalg from pyscf import lib @@ -57,6 +58,44 @@ def _ifftn_blas(g, mesh): return out.reshape(-1, *mesh) if FFT_ENGINE == 'FFTW': + try: + libfft = lib.load_library('libfft') + except OSError: + raise RuntimeError("Failed to load libfft") + + def _copy_d2z(a): + fn = libfft._copy_d2z + out = np.empty(a.shape, dtype=np.complex128) + fn(out.ctypes.data_as(ctypes.c_void_p), + a.ctypes.data_as(ctypes.c_void_p), + ctypes.c_size_t(a.size)) + return out + + def _complex_fftn_fftw(f, mesh, func): + if f.dtype == np.double and f.flags.c_contiguous: + # np.asarray or np.astype is too slow + f = _copy_d2z(f) + else: + f = np.asarray(f, order='C', dtype=np.complex128) + mesh = np.asarray(mesh, order='C', dtype=np.int32) + rank = len(mesh) + out = np.empty_like(f) + fn = getattr(libfft, func) + for i, fi in enumerate(f): + fn(fi.ctypes.data_as(ctypes.c_void_p), + out[i].ctypes.data_as(ctypes.c_void_p), + mesh.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int(rank)) + return out + + def _fftn_wrapper(a): + mesh = a.shape[1:] + return _complex_fftn_fftw(a, mesh, 'fft') + def _ifftn_wrapper(a): + mesh = a.shape[1:] + return _complex_fftn_fftw(a, mesh, 'ifft') + +elif FFT_ENGINE == 'PYFFTW': # pyfftw is slower than np.fft in most cases try: import pyfftw @@ -235,8 +274,9 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None, else: kG = Gv - equal2boundary = np.zeros(Gv.shape[0], dtype=bool) + equal2boundary = None if wrap_around and abs(k).sum() > 1e-9: + equal2boundary = np.zeros(Gv.shape[0], dtype=bool) # Here we 'wrap around' the high frequency k+G vectors into their lower # frequency counterparts. Important if you want the gamma point and k-point # answers to agree @@ -357,7 +397,8 @@ def get_coulG(cell, k=np.zeros(3), exx=False, mf=None, mesh=None, Gv=None, if cell.dimension > 0 and exxdiv == 'ewald' and len(G0_idx) > 0: coulG[G0_idx] += Nk*cell.vol*madelung(cell, kpts) - coulG[equal2boundary] = 0 + if equal2boundary is not None: + coulG[equal2boundary] = 0 # Scale the coulG kernel for attenuated Coulomb integrals. # * omega is used by RangeSeparatedJKBuilder which requires ewald probe charge @@ -507,7 +548,7 @@ def get_lattice_Ls(cell, nimgs=None, rcut=None, dimension=None, discard=True): a = cell.lattice_vectors() - scaled_atom_coords = np.linalg.solve(a.T, cell.atom_coords().T).T + scaled_atom_coords = cell.get_scaled_atom_coords() atom_boundary_max = scaled_atom_coords[:,:dimension].max(axis=0) atom_boundary_min = scaled_atom_coords[:,:dimension].min(axis=0) if (np.any(atom_boundary_max > 1) or np.any(atom_boundary_min < -1)): @@ -542,11 +583,12 @@ def find_boundary(a): np.arange(-bounds[2], bounds[2]+1))) Ls = np.dot(Ts[:,:dimension], a[:dimension]) - ovlp_penalty += 1e-200 # avoid /0 - Ts_scaled = (Ts[:,:dimension] + 1e-200) / ovlp_penalty - ovlp_penalty_fac = 1. / abs(Ts_scaled).min(axis=1) - Ls_mask = np.linalg.norm(Ls, axis=1) * (1-ovlp_penalty_fac) < rcut - Ls = Ls[Ls_mask] + if discard: + ovlp_penalty += 1e-200 # avoid /0 + Ts_scaled = (Ts[:,:dimension] + 1e-200) / ovlp_penalty + ovlp_penalty_fac = 1. / abs(Ts_scaled).min(axis=1) + Ls_mask = np.linalg.norm(Ls, axis=1) * (1-ovlp_penalty_fac) < rcut + Ls = Ls[Ls_mask] return np.asarray(Ls, order='C') diff --git a/pyscf/scf/atom_hf.py b/pyscf/scf/atom_hf.py index 58e0a585c3..4430963493 100644 --- a/pyscf/scf/atom_hf.py +++ b/pyscf/scf/atom_hf.py @@ -30,6 +30,7 @@ def get_atm_nrhf(mol, atomic_configuration=elements.NRSRHF_CONFIGURATION): atm_template = mol.copy(deep=False) atm_template.charge = 0 + atm_template.enuc = 0 atm_template.symmetry = False # TODO: enable SO3 symmetry here atm_template.atom = atm_template._atom = [] atm_template.cart = False # AtomSphAverageRHF does not support cartesian basis @@ -50,7 +51,6 @@ def get_atm_nrhf(mol, atomic_configuration=elements.NRSRHF_CONFIGURATION): atm._ecpbas[:,0] = 0 if element in mol._pseudo: atm._pseudo = {element: mol._pseudo.get(element)} - raise NotImplementedError atm.spin = atm.nelectron % 2 nao = atm.nao @@ -59,6 +59,19 @@ def get_atm_nrhf(mol, atomic_configuration=elements.NRSRHF_CONFIGURATION): mo_occ = mo_energy = numpy.zeros(nao) mo_coeff = numpy.zeros((nao,nao)) atm_scf_result[element] = (0, mo_energy, mo_coeff, mo_occ) + elif atm._pseudo: + from pyscf.scf import atom_hf_pp + atm.a = None + if atm.nelectron == 1: + atm_hf = atom_hf_pp.AtomHF1ePP(atm) + else: + atm_hf = atom_hf_pp.AtomSCFPP(atm) + atm_hf.atomic_configuration = atomic_configuration + + atm_hf.verbose = mol.verbose + atm_hf.run() + atm_scf_result[element] = (atm_hf.e_tot, atm_hf.mo_energy, + atm_hf.mo_coeff, atm_hf.mo_occ) else: if atm.nelectron == 1: atm_hf = AtomHF1e(atm) diff --git a/pyscf/scf/atom_hf_pp.py b/pyscf/scf/atom_hf_pp.py new file mode 100644 index 0000000000..19a2f73930 --- /dev/null +++ b/pyscf/scf/atom_hf_pp.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Xing Zhang +# + +import copy +import numpy +from scipy.special import erf + +from pyscf import lib +from pyscf import gto, scf +from pyscf.dft import gen_grid, numint +from pyscf.pbc import gto as pbcgto +from pyscf.scf import atom_hf, rohf + +def get_pp_loc_part1_rs(mol, coords): + atm_coords = mol.atom_coords() + out = 0 + for ia in range(mol.natm): + r0 = atm_coords[ia] + r2 = numpy.sum((coords - r0)**2, axis=1) + r = numpy.sqrt(r2) + Zia = mol.atom_charge(ia) + symb = mol.atom_symbol(ia) + if symb in mol._pseudo: + pp = mol._pseudo[symb] + rloc, nexp, cexp = pp[1:3+1] + else: + rloc = 1e16 + alpha = 1.0 / (numpy.sqrt(2) * rloc) + out += - Zia / r * erf(alpha * r) + return out + +def _aux_e2(cell, auxcell, intor, aosym='s1', comp=1): + intor = cell._add_suffix(intor) + pcell = copy.copy(cell) + pcell._atm, pcell._bas, pcell._env = \ + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + cell._atm, cell._bas, cell._env) + ao_loc = gto.moleintor.make_loc(bas, intor) + aux_loc = auxcell.ao_loc_nr(auxcell.cart or 'ssc' in intor) + ao_loc = numpy.asarray(numpy.hstack([ao_loc, ao_loc[-1]+aux_loc[1:]]), + dtype=numpy.int32) + atm, bas, env = gto.conc_env(atm, bas, env, + auxcell._atm, auxcell._bas, auxcell._env) + nbas = cell.nbas + shls_slice = (0, nbas, nbas, nbas*2, nbas*2, nbas*2+auxcell.nbas) + comp = 1 + out = gto.moleintor.getints3c(intor, atm, bas, env, shls_slice=shls_slice, + comp=comp, aosym=aosym, ao_loc=ao_loc) + return out + +def get_pp_loc_part2(mol): + buf = 0 + intors = ('int3c2e', 'int3c1e', 'int3c1e_r2_origk', + 'int3c1e_r4_origk', 'int3c1e_r6_origk') + for cn in range(1, 5): + fakecell = pbcgto.pseudo.pp_int.fake_cell_vloc(mol, cn) + if fakecell.nbas > 0: + v = _aux_e2(mol, fakecell, intors[cn], aosym='s2', comp=1) + buf += numpy.einsum('...i->...', v) + if numpy.isscalar(buf): + vpp_loc = buf + else: + vpp_loc = lib.unpack_tril(buf) + return vpp_loc + +def get_pp_loc(mol): + # TODO use analytic integral + grids = gen_grid.Grids(mol) + grids.level = 3 + grids.build(with_non0tab=True) + _numint = numint.NumInt() + + vpp = 0 + for ao, mask, weight, coords in _numint.block_loop(mol, grids): + vloc = get_pp_loc_part1_rs(mol, coords) + vpp += numpy.einsum("g,g,gi,gj->ij", weight, vloc, ao, ao) + vpp += get_pp_loc_part2(mol) + return vpp + +def get_pp_nl(mol): + nao = mol.nao + fakecell, hl_blocks = pbcgto.pseudo.pp_int.fake_cell_vnl(mol) + ppnl_half = _int_vnl(mol, fakecell, hl_blocks) + + ppnl = numpy.zeros((nao,nao), dtype=numpy.double) + offset = [0] * 3 + for ib, hl in enumerate(hl_blocks): + l = fakecell.bas_angular(ib) + nd = 2 * l + 1 + hl_dim = hl.shape[0] + ilp = numpy.ndarray((hl_dim,nd,nao), dtype=numpy.double) + for i in range(hl_dim): + p0 = offset[i] + ilp[i] = ppnl_half[i][p0:p0+nd] + offset[i] = p0 + nd + ppnl += numpy.einsum('ilp,ij,jlq->pq', ilp, hl, ilp) + return ppnl + +def _int_vnl(cell, fakecell, hl_blocks): + intopt = lib.c_null_ptr() + + def int_ket(_bas, intor): + if len(_bas) == 0: + return [] + intor = cell._add_suffix(intor) + atm, bas, env = gto.conc_env(cell._atm, cell._bas, cell._env, + fakecell._atm, _bas, fakecell._env) + atm = numpy.asarray(atm, dtype=numpy.int32) + bas = numpy.asarray(bas, dtype=numpy.int32) + env = numpy.asarray(env, dtype=numpy.double) + nbas = len(bas) + shls_slice = (cell.nbas, nbas, 0, cell.nbas) + ao_loc = gto.moleintor.make_loc(bas, intor) + ni = ao_loc[shls_slice[1]] - ao_loc[shls_slice[0]] + nj = ao_loc[shls_slice[3]] - ao_loc[shls_slice[2]] + out = numpy.empty((ni,nj), dtype=numpy.double) + comp = 1 + out = gto.moleintor.getints2c(intor, atm, bas, env, shls_slice=shls_slice, comp=comp, hermi=0, + ao_loc=ao_loc, cintopt=intopt, out=out) + return out + + hl_dims = numpy.asarray([len(hl) for hl in hl_blocks]) + out = (int_ket(fakecell._bas[hl_dims>0], 'int1e_ovlp'), + int_ket(fakecell._bas[hl_dims>1], 'int1e_r2_origi'), + int_ket(fakecell._bas[hl_dims>2], 'int1e_r4_origi')) + return out + +class AtomSCFPP(atom_hf.AtomSphAverageRHF): + def get_hcore(self, mol=None): + if mol is None: + mol = self.mol + h = mol.intor('int1e_kin', hermi=1) + h += get_pp_nl(mol) + h += get_pp_loc(mol) + return h + +class AtomHF1ePP(rohf.HF1e, AtomSCFPP): + eig = AtomSCFPP.eig + get_hcore = AtomSCFPP.get_hcore diff --git a/pyscf/scf/dhf.py b/pyscf/scf/dhf.py index 32d2d0f7f2..6e29d5a450 100644 --- a/pyscf/scf/dhf.py +++ b/pyscf/scf/dhf.py @@ -285,14 +285,14 @@ def fproj(mo): return dm -def get_init_guess(mol, key='minao'): +def get_init_guess(mol, key='minao', **kwargs): '''Generate density matrix for initial guess Kwargs: key : str One of 'minao', 'atom', 'huckel', 'mod_huckel', 'hcore', '1e', 'chkfile'. ''' - return UHF(mol).get_init_guess(mol, key) + return UHF(mol).get_init_guess(mol, key, **kwargs) def time_reversal_matrix(mol, mat): ''' T(A_ij) = A[T(i),T(j)]^* diff --git a/pyscf/scf/diis.py b/pyscf/scf/diis.py index 321f81cdfe..a442f58b9c 100644 --- a/pyscf/scf/diis.py +++ b/pyscf/scf/diis.py @@ -72,13 +72,13 @@ def get_num_vec(self): def get_err_vec_orig(s, d, f): '''error vector = SDF - FDS''' if isinstance(f, numpy.ndarray) and f.ndim == 2: - sdf = reduce(numpy.dot, (s,d,f)) + sdf = reduce(lib.dot, (s,d,f)) errvec = (sdf.conj().T - sdf).ravel() elif isinstance(f, numpy.ndarray) and f.ndim == 3 and s.ndim == 3: errvec = [] for i in range(f.shape[0]): - sdf = reduce(numpy.dot, (s[i], d[i], f[i])) + sdf = reduce(lib.dot, (s[i], d[i], f[i])) errvec.append((sdf.conj().T - sdf).ravel()) errvec = numpy.hstack(errvec) @@ -98,7 +98,7 @@ def get_err_vec_orth(s, d, f, Corth): sym_forbid = orbsym[:,None] != orbsym if isinstance(f, numpy.ndarray) and f.ndim == 2: - sdf = reduce(numpy.dot, (Corth.conj().T, s, d, f, Corth)) + sdf = reduce(lib.dot, (Corth.conj().T, s, d, f, Corth)) if orbsym is not None: sdf[sym_forbid] = 0 errvec = (sdf.conj().T - sdf).ravel() @@ -106,7 +106,7 @@ def get_err_vec_orth(s, d, f, Corth): elif isinstance(f, numpy.ndarray) and f.ndim == 3 and s.ndim == 3: errvec = [] for i in range(f.shape[0]): - sdf = reduce(numpy.dot, (Corth[i].conj().T, s[i], d[i], f[i], Corth[i])) + sdf = reduce(lib.dot, (Corth[i].conj().T, s[i], d[i], f[i], Corth[i])) if orbsym is not None: sdf[sym_forbid] = 0 errvec.append((sdf.conj().T - sdf).ravel()) diff --git a/pyscf/scf/hf.py b/pyscf/scf/hf.py index b6ecb5ace0..7a8c0e8f22 100644 --- a/pyscf/scf/hf.py +++ b/pyscf/scf/hf.py @@ -115,8 +115,10 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, logger.info(mf, 'Set gradient conv threshold to %g', conv_tol_grad) mol = mf.mol + s1e = mf.get_ovlp(mol) + if dm0 is None: - dm = mf.get_init_guess(mol, mf.init_guess) + dm = mf.get_init_guess(mol, mf.init_guess, s1e=s1e) else: dm = dm0 @@ -128,13 +130,6 @@ def kernel(mf, conv_tol=1e-10, conv_tol_grad=None, scf_conv = False mo_energy = mo_coeff = mo_occ = None - s1e = mf.get_ovlp(mol) - cond = lib.cond(s1e) - logger.debug(mf, 'cond(S) = %s', cond) - if numpy.max(cond)*1e-17 > conv_tol: - logger.warn(mf, 'Singularity detected in overlap matrix (condition number = %4.3g). ' - 'SCF may be inaccurate and hard to converge.', numpy.max(cond)) - # Skip SCF iterations. Compute only the total energy of the initial density if mf.max_cycle <= 0: fock = mf.get_fock(h1e, s1e, vhf, dm) # = h1e + vhf, no DIIS @@ -722,14 +717,14 @@ def fproj(mo): return dm -def get_init_guess(mol, key='minao'): +def get_init_guess(mol, key='minao', **kwargs): '''Generate density matrix for initial guess Kwargs: key : str One of 'minao', 'atom', 'huckel', 'hcore', '1e', 'chkfile'. ''' - return RHF(mol).get_init_guess(mol, key) + return RHF(mol).get_init_guess(mol, key, **kwargs) # eigenvalue of d is 1 @@ -752,7 +747,7 @@ def level_shift(s, d, f, factor): Returns: New Fock matrix, 2D ndarray ''' - dm_vir = s - reduce(numpy.dot, (s, d, s)) + dm_vir = s - reduce(lib.dot, (s, d, s)) return f + dm_vir * factor @@ -1570,6 +1565,15 @@ def __init__(self, mol): self._opt = {None: None} self._eri = None # Note: self._eri requires large amount of memory + def check_sanity(self): + s1e = self.get_ovlp() + cond = lib.cond(s1e) + logger.debug(self, 'cond(S) = %s', cond) + if numpy.max(cond)*1e-17 > self.conv_tol: + logger.warn(self, 'Singularity detected in overlap matrix (condition number = %4.3g). ' + 'SCF may be inaccurate and hard to converge.', numpy.max(cond)) + return super().check_sanity() + def build(self, mol=None): if mol is None: mol = self.mol if self.verbose >= logger.WARN: @@ -1704,7 +1708,7 @@ def from_chk(self, chkfile=None, project=None): return self.init_guess_by_chkfile(chkfile, project) from_chk.__doc__ = init_guess_by_chkfile.__doc__ - def get_init_guess(self, mol=None, key='minao'): + def get_init_guess(self, mol=None, key='minao', **kwargs): if not isinstance(key, str): return key @@ -1742,7 +1746,7 @@ def get_init_guess(self, mol=None, key='minao'): energy_tot = energy_tot def energy_nuc(self): - return self.mol.energy_nuc() + return self.mol.enuc # A hook for overloading convergence criteria in SCF iterations. Assigning # a function @@ -2103,8 +2107,8 @@ def check_sanity(self): mol.nelectron) return SCF.check_sanity(self) - def get_init_guess(self, mol=None, key='minao'): - dm = SCF.get_init_guess(self, mol, key) + def get_init_guess(self, mol=None, key='minao', **kwargs): + dm = SCF.get_init_guess(self, mol, key, **kwargs) if self.verbose >= logger.DEBUG1: s = self.get_ovlp() nelec = numpy.einsum('ij,ji', dm, s).real diff --git a/pyscf/scf/uhf.py b/pyscf/scf/uhf.py index 4f07335bd6..0afc66d0ba 100644 --- a/pyscf/scf/uhf.py +++ b/pyscf/scf/uhf.py @@ -130,8 +130,8 @@ def _break_dm_spin_symm(mol, dm): dmb[...,p0:p1,p0:p1] = dma[...,p0:p1,p0:p1] return dma, dmb -def get_init_guess(mol, key='minao'): - return UHF(mol).get_init_guess(mol, key) +def get_init_guess(mol, key='minao', **kwargs): + return UHF(mol).get_init_guess(mol, key, **kwargs) def make_rdm1(mo_coeff, mo_occ, **kwargs): '''One-particle density matrix in AO representation @@ -830,8 +830,8 @@ def make_rdm2(self, mo_coeff=None, mo_occ=None, **kwargs): energy_elec = energy_elec - def get_init_guess(self, mol=None, key='minao'): - dm = hf.SCF.get_init_guess(self, mol, key) + def get_init_guess(self, mol=None, key='minao', **kwargs): + dm = hf.SCF.get_init_guess(self, mol, key, **kwargs) if self.verbose >= logger.DEBUG1: s = self.get_ovlp() nelec =(numpy.einsum('ij,ji', dm[0], s).real,