diff --git a/pyscf/gto/basis/__init__.py b/pyscf/gto/basis/__init__.py index 1601b6e03f..d8e22ec5ee 100644 --- a/pyscf/gto/basis/__init__.py +++ b/pyscf/gto/basis/__init__.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2020 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2023 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,14 +16,18 @@ # Author: Qiming Sun # +__all__ = ['ALIAS', 'GTH_ALIAS', 'PP_ALIAS', + 'parse', 'parse_ecp', 'load', 'load_ecp', 'load_pseudo', + 'optimize_contraction', 'to_general_contraction'] + import os import sys +import re from os.path import join -if sys.version_info < (2,7): - import imp -else: - import importlib -from pyscf.gto.basis import parse_nwchem +import importlib +import pyscf +from pyscf.gto.basis import parse_nwchem, parse_nwchem_ecp +from pyscf.gto.basis import parse_cp2k, parse_cp2k_pp from pyscf.lib.exceptions import BasisNotFoundError from pyscf import __config__ @@ -371,12 +375,52 @@ 'dyallv4z' : 'dyall-basis.dyall_v4z', } +GTH_ALIAS = { + 'gthaugdzvp' : 'gth-aug-dzvp.dat', + 'gthaugqzv2p' : 'gth-aug-qzv2p.dat', + 'gthaugqzv3p' : 'gth-aug-qzv3p.dat', + 'gthaugtzv2p' : 'gth-aug-tzv2p.dat', + 'gthaugtzvp' : 'gth-aug-tzvp.dat', + 'gthdzv' : 'gth-dzv.dat', + 'gthdzvp' : 'gth-dzvp.dat', + 'gthqzv2p' : 'gth-qzv2p.dat', + 'gthqzv3p' : 'gth-qzv3p.dat', + 'gthszv' : 'gth-szv.dat', + 'gthtzv2p' : 'gth-tzv2p.dat', + 'gthtzvp' : 'gth-tzvp.dat', + 'gthccdzvp' : 'gth-cc-dzvp.dat', + 'gthcctzvp' : 'gth-cc-tzvp.dat', + 'gthccqzvp' : 'gth-cc-qzvp.dat', + 'gthszvmolopt' : 'gth-szv-molopt.dat', + 'gthdzvpmolopt' : 'gth-dzvp-molopt.dat', + 'gthtzvpmolopt' : 'gth-tzvp-molopt.dat', + 'gthtzv2pmolopt' : 'gth-tzv2p-molopt.dat', + 'gthszvmoloptsr' : 'gth-szv-molopt-sr.dat', + 'gthdzvpmoloptsr' : 'gth-dzvp-molopt-sr.dat', +} + +PP_ALIAS = { + 'gthblyp' : 'gth-blyp.dat' , + 'gthbp' : 'gth-bp.dat' , + 'gthhcth120' : 'gth-hcth120.dat', + 'gthhcth407' : 'gth-hcth407.dat', + 'gtholyp' : 'gth-olyp.dat' , + 'gthlda' : 'gth-pade.dat' , + 'gthpade' : 'gth-pade.dat' , + 'gthpbe' : 'gth-pbe.dat' , + 'gthpbesol' : 'gth-pbesol.dat' , + 'gthhf' : 'gth-hf.dat' , + 'gthhfrev' : 'gth-hf-rev.dat' , +} + def _is_pople_basis(basis): return (basis.startswith('631') or basis.startswith('321') or basis.startswith('431')) _BASIS_DIR = os.path.dirname(__file__) +_GTH_BASIS_DIR = os.path.abspath(f'{pyscf.__file__}/../pbc/gto/basis') +_GTH_PP_DIR = os.path.abspath(f'{_GTH_BASIS_DIR}/../pseudo') def _parse_pople_basis(basis, symb): if '(' in basis: @@ -408,17 +452,56 @@ def convert(s): return tuple([ALIAS[mbas]] + convert(extension.split(',')[0])) OPTIMIZE_CONTRACTION = getattr(__config__, 'gto_basis_parse_optimize', False) + def parse(string, symb=None, optimize=OPTIMIZE_CONTRACTION): + '''Parse the basis (ECP, PP) text in NWChem or CP2K format, returns internal format + + Args: + string : Blank linke and the lines of "BASIS SET" and "END" will be ignored + + Examples: + + >>> mol = gto.Mole() + >>> mol.basis = gto.basis.parse(""" + ... He S + ... 13.6267000 0.1752300 + ... 1.9993500 0.8934830 + ... 0.3829930 0.0000000 + ... He S + ... 13.6267000 0.0000000 + ... 1.9993500 0.0000000 + ... 0.3829930 1.0000000 + ... """, optimize=True) + + >>> cell = pbc.gto.Cell() + >>> cell.basis = {'C': gto.basis.parse(""" + ... C DZVP-GTH + ... 2 + ... 2 0 1 4 2 2 + ... 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000 + ... 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000 + ... 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000 + ... 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000 + ... 3 2 2 1 1 + ... 0.5500000000 1.0000000000 + ... # + ... """)} + ''' if 'ECP' in string: - return parse_nwchem.parse_ecp(string, symb) + return parse_nwchem_ecp.parse(string, symb) + elif 'GTH' in string: + if 'PSEUDOPOTENTIAL' in string: + return parse_cp2k_pp.parse(string, symb) + else: + return parse_cp2k.parse(string, symb, optimize) else: return parse_nwchem.parse(string, symb, optimize) parse.__doc__ = parse_nwchem.parse.__doc__ def parse_ecp(string, symb=None): # TODO: catch KeyError and provide suggestion for the possible keys - return parse_nwchem.parse_ecp(string, symb) -parse_ecp.__doc__ = parse_nwchem.parse_ecp.__doc__ + return parse_nwchem_ecp.parse(string, symb) +parse_ecp.__doc__ = parse_nwchem_ecp.parse.__doc__ def _convert_contraction(contr_string): '''Parse contraction scheme string into a list @@ -497,31 +580,50 @@ def load(filename_or_basisname, symb, optimize=OPTIMIZE_CONTRACTION): contr_scheme = _convert_contraction(split_name[1].lower()) else: contr_scheme = 'Full' + if os.path.isfile(filename_or_basisname): - # read basis from given file try: - b = parse_nwchem.load(filename_or_basisname, symb, optimize) + b = _load_external(parse_nwchem, filename_or_basisname, symb, + optimize=optimize) except BasisNotFoundError: - with open(filename_or_basisname, 'r') as fin: - b = parse_nwchem.parse(fin.read(), symb) + b = _load_external(parse_cp2k, filename_or_basisname, symb, + optimize=optimize) + if contr_scheme != 'Full': b = _truncate(b, contr_scheme, symb, split_name) return b name = _format_basis_name(filename_or_basisname) - - if not (name in ALIAS or _is_pople_basis(name)): + fload = parse_nwchem.load + basis_dir = _BASIS_DIR + if name in ALIAS: + basmod = ALIAS[name] + elif name in GTH_ALIAS: + basmod = GTH_ALIAS[name] + fload = parse_cp2k.load + basis_dir = _GTH_BASIS_DIR + elif _is_pople_basis(name): + basmod = _parse_pople_basis(name, symb) + else: try: - return parse_nwchem.parse(filename_or_basisname, symb) + return parse_nwchem.parse(filename_or_basisname, symb, + optimize=optimize) except IndexError: raise BasisNotFoundError(filename_or_basisname) except BasisNotFoundError as basis_err: pass try: - return parse_nwchem.parse(filename_or_basisname) + return parse_nwchem.parse(filename_or_basisname, optimize=optimize) + except IndexError: + raise BasisNotFoundError(f'Invalid basis {filename_or_basisname}') + except BasisNotFoundError: + pass + + try: + return parse_cp2k.parse(filename_or_basisname, optimize=optimize) except IndexError: - raise BasisNotFoundError('Invalid basis name %s' % filename_or_basisname) + raise BasisNotFoundError(f'Invalid basis {filename_or_basisname}') except BasisNotFoundError: pass @@ -538,62 +640,44 @@ def load(filename_or_basisname, symb, optimize=OPTIMIZE_CONTRACTION): raise basis_err - if name in ALIAS: - basmod = ALIAS[name] - elif _is_pople_basis(name): - basmod = _parse_pople_basis(name, symb) - else: - raise BasisNotFoundError(filename_or_basisname) - if 'dat' in basmod: - b = parse_nwchem.load(join(_BASIS_DIR, basmod), symb, optimize) + b = fload(join(basis_dir, basmod), symb, optimize) elif isinstance(basmod, (tuple, list)) and isinstance(basmod[0], str): b = [] for f in basmod: - b += parse_nwchem.load(join(_BASIS_DIR, f), symb, optimize) + b += fload(join(basis_dir, f), symb, optimize) else: - if sys.version_info < (2,7): - fp, pathname, description = imp.find_module(basmod, __path__) - mod = imp.load_module(name, fp, pathname, description) - b = mod.__getattribute__(symb) - fp.close() - else: - mod = importlib.import_module('.'+basmod, __package__) - b = mod.__getattribute__(symb) + mod = importlib.import_module('.'+basmod, __package__) + b = mod.__getattribute__(symb) if contr_scheme != 'Full': b = _truncate(b, contr_scheme, symb, split_name) return b def load_ecp(filename_or_basisname, symb): - '''Convert the basis of the given symbol to internal format + '''Parses ECP database file ''' symb = ''.join([i for i in symb if i.isalpha()]) if os.path.isfile(filename_or_basisname): - # read basis from given file - try: - return parse_nwchem.load_ecp(filename_or_basisname, symb) - except BasisNotFoundError: - with open(filename_or_basisname, 'r') as fin: - return parse_ecp(fin.read(), symb) + return _load_external(parse_nwchem_ecp, filename_or_basisname, symb) name = _format_basis_name(filename_or_basisname) if name in ALIAS: basmod = ALIAS[name] - return parse_nwchem.load_ecp(join(_BASIS_DIR, basmod), symb) + return parse_nwchem_ecp.load(join(_BASIS_DIR, basmod), symb) try: - return parse_ecp(filename_or_basisname, symb) + return parse_nwchem_ecp.parse(filename_or_basisname, symb) except IndexError: raise BasisNotFoundError(filename_or_basisname) except BasisNotFoundError as basis_err: pass try: - return parse_nwchem.parse_ecp(filename_or_basisname) + return parse_nwchem_ecp.parse(filename_or_basisname) except IndexError: - raise BasisNotFoundError('Invalid basis name %s' % filename_or_basisname) + raise BasisNotFoundError(f'Invalid ECP {filename_or_basisname}') except BasisNotFoundError: pass @@ -610,7 +694,41 @@ def load_ecp(filename_or_basisname, symb): raise basis_err +def load_pseudo(filename_or_basisname, symb): + '''Parses PP database file + ''' + symb = ''.join([i for i in symb if i.isalpha()]) + if os.path.isfile(filename_or_basisname): + return _load_external(parse_cp2k_pp, filename_or_basisname, symb) + + name, suffix = _format_pseudo_name(filename_or_basisname) + if name in PP_ALIAS: + basmod = PP_ALIAS[name] + return parse_cp2k_pp.load(join(_GTH_PP_DIR, basmod), symb, suffix) + + try: + return parse_cp2k_pp.parse(filename_or_basisname) + except IndexError: + raise BasisNotFoundError(f'Invalid PP {filename_or_basisname}') + +def _load_external(module, filename_or_basisname, symb, **kwargs): + '''Try to read basis from given file''' + try: + return module.load(filename_or_basisname, symb, **kwargs) + except BasisNotFoundError: + with open(filename_or_basisname, 'r') as fin: + return module.parse(fin.read(), **kwargs) + def _format_basis_name(basisname): return basisname.lower().replace('-', '').replace('_', '').replace(' ', '') -del(OPTIMIZE_CONTRACTION) +SUFFIX_PATTERN = re.compile(r'q\d+$') +def _format_pseudo_name(pseudo_name): + name_suffix = _format_basis_name(pseudo_name) + match = re.search(SUFFIX_PATTERN, name_suffix) + if match: + name = name_suffix[:match.start()] + suffix = name_suffix[match.start():] + else: + name, suffix = name_suffix, None + return name, suffix diff --git a/pyscf/pbc/gto/basis/parse_cp2k.py b/pyscf/gto/basis/parse_cp2k.py similarity index 79% rename from pyscf/pbc/gto/basis/parse_cp2k.py rename to pyscf/gto/basis/parse_cp2k.py index aa6f5cd76f..4701fb0bad 100644 --- a/pyscf/pbc/gto/basis/parse_cp2k.py +++ b/pyscf/gto/basis/parse_cp2k.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2018,2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2023 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ ''' import re +from pyscf.lib.exceptions import BasisNotFoundError from pyscf.gto.basis import parse_nwchem from pyscf import __config__ @@ -32,6 +33,22 @@ def parse(string, optimize=False): '''Parse the basis text which is in CP2K format, return an internal basis format which can be assigned to :attr:`Mole.basis` Lines started with # are ignored. + + Examples: + + >>> cell = gto.Cell() + >>> cell.basis = {'C': pyscf.gto.basis.parse_cp2k.parse(""" + ... C DZVP-GTH + ... 2 + ... 2 0 1 4 2 2 + ... 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000 + ... 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000 + ... 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000 + ... 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000 + ... 3 2 2 1 1 + ... 0.5500000000 1.0000000000 + ... # + ... """)} ''' bastxt = [] for dat in string.splitlines(): @@ -88,6 +105,4 @@ def search_seg(basisfile, symb): # remove blank lines return [x.strip() for x in dat.splitlines() if x.strip() and 'END' not in x] - raise RuntimeError('Basis not found for %s in %s' % (symb, basisfile)) - - + raise BasisNotFoundError(f'Basis for {symb} not found in {basisfile}') diff --git a/pyscf/pbc/gto/pseudo/parse_cp2k.py b/pyscf/gto/basis/parse_cp2k_pp.py similarity index 85% rename from pyscf/pbc/gto/pseudo/parse_cp2k.py rename to pyscf/gto/basis/parse_cp2k_pp.py index dc9cee819e..c385ca3b9c 100644 --- a/pyscf/pbc/gto/pseudo/parse_cp2k.py +++ b/pyscf/gto/basis/parse_cp2k_pp.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2018,2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2023 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,12 +21,29 @@ ''' import sys +from pyscf.lib.exceptions import BasisNotFoundError import numpy as np def parse(string): '''Parse the pseudo text *string* which is in CP2K format, return an internal basis format which can be assigned to :attr:`Cell.pseudo` Lines started with # are ignored. + + Args: + string : Blank linke and the lines of "PSEUDOPOTENTIAL" and "END" will be ignored + + Examples: + + >>> cell = gto.Cell() + >>> cell.pseudo = {'C': pyscf.gto.basis.pseudo_cp2k.parse(""" + ... #PSEUDOPOTENTIAL + ... C GTH-BLYP-q4 + ... 2 2 + ... 0.33806609 2 -9.13626871 1.42925956 + ... 2 + ... 0.30232223 1 9.66551228 + ... 0.28637912 0 + ... """)} ''' pseudotxt = [x.strip() for x in string.splitlines() if x.strip() and 'END' not in x and '#PSEUDOPOTENTIAL' not in x] @@ -90,7 +107,7 @@ def search_seg(pseudofile, symb, suffix=None): else: if any(suffix == x.split('-')[-1] for x in dat[0].split()): return dat - raise RuntimeError('Pseudopotential not found for %s in %s' % (symb, pseudofile)) + raise BasisNotFoundError(f'Pseudopotential for {symb} in {pseudofile}') if __name__ == '__main__': args = sys.argv[1:] diff --git a/pyscf/gto/basis/parse_nwchem.py b/pyscf/gto/basis/parse_nwchem.py index da21424b3a..0bf5f59bea 100644 --- a/pyscf/gto/basis/parse_nwchem.py +++ b/pyscf/gto/basis/parse_nwchem.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2020 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2023 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,20 +26,16 @@ import re import numpy -import numpy as np import scipy.linalg from pyscf.data.elements import _std_symbol from pyscf.lib.exceptions import BasisNotFoundError +from pyscf.gto.basis.parse_nwchem_ecp import DISABLE_EVAL, MAXL, SPDF, MAPSPDF +from pyscf.gto.basis.parse_nwchem_ecp import parse as parse_ecp +from pyscf.gto.basis.parse_nwchem_ecp import load as load_ecp +from pyscf.gto.basis.parse_nwchem_ecp import convert_ecp_to_nwchem from pyscf import __config__ -DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False) - -MAXL = 15 -SPDF = 'SPDFGHIKLMNORTU' -MAPSPDF = {key: l for l, key in enumerate(SPDF)} - BASIS_SET_DELIMITER = re.compile('# *BASIS SET.*\n|END\n') -ECP_DELIMITER = re.compile('\n *ECP *\n') def parse(string, symb=None, optimize=True): '''Parse the basis text which is in NWChem format. Return an internal @@ -97,6 +93,7 @@ def parse(string, symb=None, optimize=True): return _parse(raw_basis, optimize) def load(basisfile, symb, optimize=True): + '''Load basis for atom of symb from file''' raw_basis = search_seg(basisfile, symb) return _parse(raw_basis, optimize) @@ -145,79 +142,6 @@ def _parse(raw_basis, optimize=True): basis_sorted = remove_zero(basis_sorted) return basis_sorted -def parse_ecp(string, symb=None): - if symb is not None: - symb = _std_symbol(symb) - raw_data = string.splitlines() - for i, dat in enumerate(raw_data): - dat0 = dat.split(None, 1) - if dat0 and dat0[0] == symb: - break - if i+1 == len(raw_data): - raise BasisNotFoundError('ECP not found for %s' % symb) - seg = [] - for dat in raw_data[i:]: - dat = dat.strip() - if dat: # remove empty lines - if ((dat[0].isalpha() and dat.split(None, 1)[0].upper() != symb.upper())): - break - else: - seg.append(dat) - else: - seg = string.splitlines() - - ecptxt = [] - for dat in seg: - dat = dat.split('#')[0].strip() - dat_upper = dat.upper() - if (dat and not dat_upper.startswith('END') and not dat_upper.startswith('ECP')): - ecptxt.append(dat) - return _parse_ecp(ecptxt) - -def _parse_ecp(raw_ecp): - ecp_add = [] - nelec = None - for line in raw_ecp: - dat = line.strip() - if not dat or dat.startswith('#'): # comment line - continue - elif dat[0].isalpha(): - key = dat.split()[1].upper() - if key == 'NELEC': - nelec = int(dat.split()[2]) - continue - elif key == 'UL': - ecp_add.append([-1]) - else: - ecp_add.append([MAPSPDF[key]]) - # up to r^6 - by_ang = [[] for i in range(7)] - ecp_add[-1].append(by_ang) - else: - line = dat.replace('D','e').split() - l = int(line[0]) - try: - coef = [float(x) for x in line[1:]] - except ValueError: - if DISABLE_EVAL: - raise ValueError('Failed to parse ecp %s' % line) - else: - coef = list(eval(','.join(line[1:]))) - by_ang[l].append(coef) - - if nelec is None: - return [] - - bsort = [] - for l in range(-1, MAXL): - bsort.extend([b for b in ecp_add if b[0] == l]) - if not bsort: - raise BasisNotFoundError(f'ECP data not found in "{raw_ecp}"') - return [nelec, bsort] - -def load_ecp(basisfile, symb): - return _parse_ecp(search_ecp(basisfile, symb)) - def search_seg(basisfile, symb): symb = _std_symbol(symb) with open(basisfile, 'r') as fin: @@ -234,29 +158,6 @@ def _search_basis_block(raw_data, symb): break return raw_basis -def search_ecp(basisfile, symb): - symb = _std_symbol(symb) - with open(basisfile, 'r') as fin: - fdata = re.split(ECP_DELIMITER, fin.read()) - if len(fdata) <= 1: - return [] - - fdata = fdata[1].splitlines() - for i, dat in enumerate(fdata): - dat0 = dat.split(None, 1) - if dat0 and dat0[0] == symb: - break - seg = [] - for dat in fdata[i:]: - dat = dat.strip() - if dat: # remove empty lines - if ((dat[0].isalpha() and dat.split(None, 1)[0].upper() != symb.upper())): - return seg - else: - seg.append(dat) - return [] - - def convert_basis_to_nwchem(symb, basis): '''Convert the internal basis format to NWChem format string''' res = [] @@ -287,22 +188,6 @@ def convert_basis_to_nwchem(symb, basis): res.append(' '.join('%15.9f'%x for x in dat)) return '\n'.join(res) -def convert_ecp_to_nwchem(symb, ecp): - '''Convert the internal ecp format to NWChem format string''' - symb = _std_symbol(symb) - res = ['%-2s nelec %d' % (symb, ecp[0])] - - for ecp_block in ecp[1]: - l = ecp_block[0] - if l == -1: - res.append('%-2s ul' % symb) - else: - res.append('%-2s %s' % (symb, SPDF[l].lower())) - for r_order, dat in enumerate(ecp_block[1]): - for e,c in dat: - res.append('%d %15.9f %15.9f' % (r_order, e, c)) - return '\n'.join(res) - def optimize_contraction(basis): '''Search the basis segments which have the same exponents then merge them to the general contracted sets. diff --git a/pyscf/gto/basis/parse_nwchem_ecp.py b/pyscf/gto/basis/parse_nwchem_ecp.py new file mode 100644 index 0000000000..f768abb524 --- /dev/null +++ b/pyscf/gto/basis/parse_nwchem_ecp.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# Copyright 2023 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Qiming Sun +# + +''' +Parsers for basis set in the NWChem format +''' + +__all__ = ['parse', 'load', 'convert_ecp_to_nwchem'] + +import re +from pyscf.data.elements import _std_symbol +from pyscf.lib.exceptions import BasisNotFoundError +from pyscf import __config__ + +DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False) + +MAXL = 15 +SPDF = 'SPDFGHIKLMNORTU' +MAPSPDF = {key: l for l, key in enumerate(SPDF)} + +ECP_DELIMITER = re.compile('\n *ECP *\n') + +def parse(string, symb=None): + '''Parse ECP text which is in NWChem format. Return an internal + basis format which can be assigned to attribute :attr:`Mole.ecp` + Empty lines, or the lines started with #, or the lines of "BASIS SET" and + "END" will be ignored are ignored. + ''' + + if symb is not None: + symb = _std_symbol(symb) + raw_data = string.splitlines() + for i, dat in enumerate(raw_data): + dat0 = dat.split(None, 1) + if dat0 and dat0[0] == symb: + break + if i+1 == len(raw_data): + raise BasisNotFoundError('ECP not found for %s' % symb) + seg = [] + for dat in raw_data[i:]: + dat = dat.strip() + if dat: # remove empty lines + if ((dat[0].isalpha() and dat.split(None, 1)[0].upper() != symb.upper())): + break + else: + seg.append(dat) + else: + seg = string.splitlines() + + ecptxt = [] + for dat in seg: + dat = dat.split('#')[0].strip() + dat_upper = dat.upper() + if (dat and not dat_upper.startswith('END') and not dat_upper.startswith('ECP')): + ecptxt.append(dat) + return _parse_ecp(ecptxt) + +def _parse_ecp(raw_ecp): + ecp_add = [] + nelec = None + for line in raw_ecp: + dat = line.strip() + if not dat or dat.startswith('#'): # comment line + continue + elif dat[0].isalpha(): + key = dat.split()[1].upper() + if key == 'NELEC': + nelec = int(dat.split()[2]) + continue + elif key == 'UL': + ecp_add.append([-1]) + else: + ecp_add.append([MAPSPDF[key]]) + # up to r^6 + by_ang = [[] for i in range(7)] + ecp_add[-1].append(by_ang) + else: + line = dat.replace('D','e').split() + l = int(line[0]) + try: + coef = [float(x) for x in line[1:]] + except ValueError: + if DISABLE_EVAL: + raise ValueError('Failed to parse ecp %s' % line) + else: + coef = list(eval(','.join(line[1:]))) + by_ang[l].append(coef) + + if nelec is None: + return [] + + bsort = [] + for l in range(-1, MAXL): + bsort.extend([b for b in ecp_add if b[0] == l]) + if not bsort: + raise BasisNotFoundError(f'ECP data not found in "{raw_ecp}"') + return [nelec, bsort] + +def load(basisfile, symb): + '''Load ECP for atom of symb from file''' + return _parse_ecp(_search_ecp(basisfile, symb)) + +def _search_ecp(basisfile, symb): + symb = _std_symbol(symb) + with open(basisfile, 'r') as fin: + fdata = re.split(ECP_DELIMITER, fin.read()) + if len(fdata) <= 1: + return [] + + fdata = fdata[1].splitlines() + for i, dat in enumerate(fdata): + dat0 = dat.split(None, 1) + if dat0 and dat0[0] == symb: + break + seg = [] + for dat in fdata[i:]: + dat = dat.strip() + if dat: # remove empty lines + if ((dat[0].isalpha() and dat.split(None, 1)[0].upper() != symb.upper())): + return seg + else: + seg.append(dat) + return [] + +def convert_ecp_to_nwchem(symb, ecp): + '''Convert the internal ecp format to NWChem format string''' + symb = _std_symbol(symb) + res = ['%-2s nelec %d' % (symb, ecp[0])] + + for ecp_block in ecp[1]: + l = ecp_block[0] + if l == -1: + res.append('%-2s ul' % symb) + else: + res.append('%-2s %s' % (symb, SPDF[l].lower())) + for r_order, dat in enumerate(ecp_block[1]): + for e,c in dat: + res.append('%d %15.9f %15.9f' % (r_order, e, c)) + return '\n'.join(res) diff --git a/pyscf/gto/mole.py b/pyscf/gto/mole.py index 71afac1486..e8b10fd1ab 100644 --- a/pyscf/gto/mole.py +++ b/pyscf/gto/mole.py @@ -452,6 +452,13 @@ def format_basis(basis_tab): [5.4471780000000001, 0.15628500000000001], [0.82454700000000003, 0.90469100000000002]], [0, [0.18319199999999999, 1.0]]]} + + >>> gto.format_basis({'H':'gth-szv'}) + {'H': [[0, + (8.3744350009, -0.0283380461), + (1.8058681460, -0.1333810052), + (0.4852528328, -0.3995676063), + (0.1658236932, -0.5531027541)]]} ''' basis_converter = _generate_basis_converter() fmt_basis = {} @@ -476,27 +483,34 @@ def nparray_to_list(item): return val def load(basis_name, symb): - if basis_name.lower().startswith('unc'): - return uncontract(basis.load(basis_name[3:], symb)) + unc = basis_name.lower().startswith('unc') + if unc: + basis_name = basis_name[3:] + if 'gth' in basis_name: + from pyscf.pbc.gto.basis import load as pbc_basis_load + _basis = pbc_basis_load(basis_name, symb) else: - return basis.load(basis_name, symb) + _basis = basis.load(basis_name, symb) + if unc: + _basis = uncontracted_basis(_basis) + return _basis def converter(symb, raw_basis): - if isinstance(raw_basis, (str, unicode)): - bset = load(str(raw_basis), _std_symbol_without_ghost(symb)) - elif (any(isinstance(x, (str, unicode)) for x in raw_basis) + if isinstance(raw_basis, str): + _basis = load(raw_basis, _std_symbol_without_ghost(symb)) + elif (any(isinstance(x, str) for x in raw_basis) # The first element is the basis of internal format or not isinstance(raw_basis[0][0], int)): stdsymb = _std_symbol_without_ghost(symb) - bset = [] + _basis = [] for rawb in raw_basis: - if isinstance(rawb, (str, unicode)): - bset += load(str(rawb), stdsymb) + if isinstance(rawb, str): + _basis.extend(load(rawb, stdsymb)) else: - bset += nparray_to_list(rawb) + _basis.extend(nparray_to_list(rawb)) else: - bset = nparray_to_list(raw_basis) - return bset + _basis = nparray_to_list(raw_basis) + return _basis return converter def uncontracted_basis(_basis): @@ -699,6 +713,48 @@ def format_ecp(ecp_tab): fmt_ecp[symb] = atom_ecp return fmt_ecp +def format_pseudo(pseudo_tab): + r'''Convert the input :attr:`pseudo` (dict) to the internal data format:: + + { atom: ( (nelec_s, nele_p, nelec_d, ...), + rloc, nexp, (cexp_1, cexp_2, ..., cexp_nexp), + nproj_types, + (r1, nproj1, ( (hproj1[1,1], hproj1[1,2], ..., hproj1[1,nproj1]), + (hproj1[2,1], hproj1[2,2], ..., hproj1[2,nproj1]), + ... + (hproj1[nproj1,1], hproj1[nproj1,2], ... ) )), + (r2, nproj2, ( (hproj2[1,1], hproj2[1,2], ..., hproj2[1,nproj1]), + ... ) ) + ) + ... } + + Args: + pseudo_tab : dict + Similar to :attr:`pseudo` (a dict), it **cannot** be a str + + Returns: + Formatted :attr:`pseudo` + + Examples: + + >>> pbc.format_pseudo({'H':'gth-blyp', 'He': 'gth-pade'}) + {'H': [[1], + 0.2, 2, [-4.19596147, 0.73049821], 0], + 'He': [[2], + 0.2, 2, [-9.1120234, 1.69836797], 0]} + ''' + from pyscf.pbc.gto.pseudo import load + fmt_pseudo = {} + for atom, atom_pp in pseudo_tab.items(): + symb = _symbol(atom) + + if isinstance(atom_pp, str): + stdsymb = _std_symbol_without_ghost(symb) + fmt_pseudo[symb] = load(atom_pp, stdsymb) + else: + fmt_pseudo[symb] = atom_pp + return fmt_pseudo + # transform etb to basis format def expand_etb(l, n, alpha, beta): r'''Generate the exponents of even tempered basis for :attr:`Mole.basis`. @@ -825,8 +881,11 @@ def conc_mol(mol1, mol2): mol3._pseudo.update(mol1._pseudo) mol3._ecp.update(mol2._ecp) mol3._ecp.update(mol1._ecp) + mol3._pseudo.update(mol2._pseudo) + mol3._pseudo.update(mol1._pseudo) mol3.basis = mol3._basis mol3.ecp = mol3._ecp + mol3.pseudo = mol3._pseudo mol3.nucprop.update(mol1.nucprop) mol3.nucprop.update(mol2.nucprop) @@ -1149,6 +1208,8 @@ def copy(mol): newmol._basis = copy.deepcopy(mol._basis) newmol.ecp = copy.deepcopy(mol.ecp) newmol._ecp = copy.deepcopy(mol._ecp) + newmol.pseudo = copy.deepcopy(mol.pseudo) + newmol._pseudo = copy.deepcopy(mol._pseudo) return newmol def pack(mol): @@ -1167,6 +1228,7 @@ def pack(mol): 'nucmod' : mol.nucmod, 'nucprop' : mol.nucprop, 'ecp' : mol.ecp, + 'pseudo' : mol.pseudo, '_nelectron': mol._nelectron, 'verbose' : mol.verbose} return mdic @@ -1185,6 +1247,7 @@ def dumps(mol): exclude_keys = set(('output', 'stdout', '_keys', # Constructing in function loads 'symm_orb', 'irrep_id', 'irrep_name')) + # FIXME: nparray and kpts for cell objects may need to be excluded nparray_keys = set(('_atm', '_bas', '_env', '_ecpbas', '_symm_orig', '_symm_axes')) @@ -1198,6 +1261,7 @@ def dumps(mol): moldic['atom'] = repr(mol.atom) moldic['basis']= repr(mol.basis) moldic['ecp' ] = repr(mol.ecp) + moldic['pseudo'] = repr(mol.pseudo) try: return json.dumps(moldic) @@ -1244,6 +1308,7 @@ def byteify(inp): mol.atom = eval(mol.atom) mol.basis= eval(mol.basis) mol.ecp = eval(mol.ecp) + mol.pseudo = eval(mol.pseudo) mol._atm = numpy.array(mol._atm, dtype=numpy.int32) mol._bas = numpy.array(mol._bas, dtype=numpy.int32) mol._env = numpy.array(mol._env, dtype=numpy.double) @@ -2293,14 +2358,8 @@ def __init__(self, **kwargs): self._atom = [] self._basis = {} self._ecp = {} - self._built = False - - # _pseudo is created to make the mol object consistenet with the mol - # object converted from Cell.to_mol(). It is initialized in the - # Cell.build() method only. Assigning _pseudo to mol object basically - # has no effects. Mole.build() method does not have code to access the - # contents of _pseudo. self._pseudo = {} + self._built = False # Some methods modify ._env. These method are executed in the context # _TemporaryMoleContext which is protected by the _ctx_lock. @@ -2449,6 +2508,7 @@ def build(self, dump_input=True, parse_arg=ARGPARSE, if unit is not None: self.unit = unit if nucmod is not None: self.nucmod = nucmod if ecp is not None: self.ecp = ecp + if pseudo is not None: self.pseudo = pseudo if charge is not None: self.charge = charge if spin != 0: self.spin = spin if symmetry is not None: self.symmetry = symmetry @@ -2481,41 +2541,42 @@ def build(self, dump_input=True, parse_arg=ARGPARSE, self._atom = self.format_atom(self.atom, unit=self.unit) uniq_atoms = set([a[0] for a in self._atom]) - if isinstance(self.basis, (str, unicode, tuple, list)): - # specify global basis for whole molecule - _basis = dict(((a, self.basis) for a in uniq_atoms)) - elif 'default' in self.basis: - default_basis = self.basis['default'] - _basis = dict(((a, default_basis) for a in uniq_atoms)) - _basis.update(self.basis) - del (_basis['default']) - else: - _basis = self.basis + _basis = _parse_default_basis(self.basis, uniq_atoms) self._basis = self.format_basis(_basis) - - if self.ecp: - # Unless explicitly input, ECP should not be assigned to ghost atoms - if isinstance(self.ecp, (str, unicode)): - _ecp = dict([(a, str(self.ecp)) - for a in uniq_atoms if not is_ghost_atom(a)]) - elif 'default' in self.ecp: - default_ecp = self.ecp['default'] - _ecp = dict(((a, default_ecp) - for a in uniq_atoms if not is_ghost_atom(a))) - _ecp.update(self.ecp) - del (_ecp['default']) - else: - _ecp = self.ecp - self._ecp = self.format_ecp(_ecp) - - #TODO: pass PBC basis here - env = self._env[:PTR_ENV_START] self._atm, self._bas, self._env = \ self.make_env(self._atom, self._basis, env, self.nucmod, self.nucprop) - self._atm, self._ecpbas, self._env = \ - self.make_ecp_env(self._atm, self._ecp, self._env) + + if self.pseudo: + self.ecp, self.pseudo = classify_ecp_pseudo(self, self.ecp, self.pseudo) + + if self.ecp: + # Unless explicitly input, ECP should not be assigned to ghost atoms + atoms_wo_ghost = [a for a in uniq_atoms if not is_ghost_atom(a)] + _ecp = _parse_default_basis(self.ecp, atoms_wo_ghost) + self._ecp = self.format_ecp(_ecp) + if self._ecp: + self._atm, self._ecpbas, self._env = \ + self.make_ecp_env(self._atm, self._ecp, self._env) + + if self.pseudo: + # Unless explicitly input, PP should not be assigned to ghost atoms + atoms_wo_ghost = [a for a in uniq_atoms if not is_ghost_atom(a)] + _pseudo = _parse_default_basis(self.pseudo, atoms_wo_ghost) + self._pseudo = self.format_pseudo(_pseudo) + if self._pseudo: + conflicts = set(self._pseudo).intersection(self._ecp) + if conflicts: + raise RuntimeError('Pseudo potential for atoms %s are defined ' + 'in both .ecp and .pseudo.' % list(conflicts)) + + for ia, atom in enumerate(self._atom): + symb = atom[0] + if (symb in self._pseudo and + # skip ghost atoms + self._atm[ia,0] != 0): + self._atm[ia,0] = sum(_pseudo[symb][0]) if self.spin is None: self.spin = self.nelectron % 2 @@ -3762,6 +3823,19 @@ def to_cell(self, a, dimension=3): cell.build(False, False) return cell +def _parse_default_basis(basis, uniq_atoms): + if isinstance(basis, (str, tuple, list)): + # default basis for all atoms + _basis = {a: basis for a in uniq_atoms} + elif 'default' in basis: + default_basis = basis['default'] + _basis = {a: default_basis for a in uniq_atoms} + _basis.update(basis) + del _basis['default'] + else: + _basis = basis + return _basis + def _parse_nuc_mod(str_or_int_or_fn): nucmod = NUC_POINT if callable(str_or_int_or_fn): @@ -4001,4 +4075,52 @@ def fakemol_for_charges(coords, expnt=1e16): fakemol._built = True return fakemol -del (BASE) +def classify_ecp_pseudo(mol, ecp, pp): + ''' + Check whether ecp keywords are presented in pp and whether pp keywords are + presented in ecp. The return (ecp, pp) should have only the ecp keywords and + pp keywords in each dict. + The "misplaced" ecp/pp keywords have the lowest priority. E.g., if an atom + is defined in ecp, the same ecp atom found in pp does NOT replace the + definition in ecp, and vise versa. + ''' + from pyscf.pbc.gto import pseudo + def classify(ecp, pp_alias): + if isinstance(ecp, (str, unicode)): + if pseudo._format_pseudo_name(ecp)[0] in pp_alias: + return {}, {'default': str(ecp)} + elif isinstance(ecp, dict): + ecp_as_pp = {} + for atom in ecp: + key = ecp[atom] + if (isinstance(key, (str, unicode)) and + pseudo._format_pseudo_name(key)[0] in pp_alias): + ecp_as_pp[atom] = str(key) + if ecp_as_pp: + ecp_left = dict(ecp) + for atom in ecp_as_pp: + ecp_left.pop(atom) + return ecp_left, ecp_as_pp + return ecp, {} + ecp_left, ecp_as_pp = classify(ecp, basis.PP_ALIAS) + pp_left , pp_as_ecp = classify(pp, basis.ALIAS) + + # ecp = ecp_left + pp_as_ecp + # pp = pp_left + ecp_as_pp + ecp = ecp_left + if pp_as_ecp and not isinstance(ecp_left, (str, unicode)): + # If ecp is a str, all atoms have ecp definition. + # The misplaced ecp has no effects. + logger.info(mol, 'pseudo-potentials keywords for %s found in .ecp', + pp_as_ecp.keys()) + if ecp_left: + pp_as_ecp.update(ecp_left) + ecp = pp_as_ecp + pp = pp_left + if ecp_as_pp and not isinstance(pp_left, (str, unicode)): + logger.info(mol, 'ECP keywords for %s found in .pseudo', + ecp_as_pp.keys()) + if pp_left: + ecp_as_pp.update(pp_left) + pp = ecp_as_pp + return ecp, pp diff --git a/pyscf/pbc/gto/basis/__init__.py b/pyscf/pbc/gto/basis/__init__.py index 400ed7bf9a..df348dee2b 100644 --- a/pyscf/pbc/gto/basis/__init__.py +++ b/pyscf/pbc/gto/basis/__init__.py @@ -16,103 +16,5 @@ # Author: Qiming Sun # Timothy Berkelbach -import os -from pyscf.gto import basis as _mol_basis -from pyscf.gto.basis import parse_ecp, load_ecp -from pyscf.pbc.gto.basis import parse_cp2k -from pyscf import __config__ - -ALIAS = { - 'gthaugdzvp' : 'gth-aug-dzvp.dat', - 'gthaugqzv2p' : 'gth-aug-qzv2p.dat', - 'gthaugqzv3p' : 'gth-aug-qzv3p.dat', - 'gthaugtzv2p' : 'gth-aug-tzv2p.dat', - 'gthaugtzvp' : 'gth-aug-tzvp.dat', - 'gthdzv' : 'gth-dzv.dat', - 'gthdzvp' : 'gth-dzvp.dat', - 'gthqzv2p' : 'gth-qzv2p.dat', - 'gthqzv3p' : 'gth-qzv3p.dat', - 'gthszv' : 'gth-szv.dat', - 'gthtzv2p' : 'gth-tzv2p.dat', - 'gthtzvp' : 'gth-tzvp.dat', - 'gthccdzvp' : 'gth-cc-dzvp.dat', - 'gthcctzvp' : 'gth-cc-tzvp.dat', - 'gthccqzvp' : 'gth-cc-qzvp.dat', - 'gthszvmolopt' : 'gth-szv-molopt.dat', - 'gthdzvpmolopt' : 'gth-dzvp-molopt.dat', - 'gthtzvpmolopt' : 'gth-tzvp-molopt.dat', - 'gthtzv2pmolopt' : 'gth-tzv2p-molopt.dat', - 'gthszvmoloptsr' : 'gth-szv-molopt-sr.dat', - 'gthdzvpmoloptsr' : 'gth-dzvp-molopt-sr.dat', -} - -OPTIMIZE_CONTRACTION = getattr(__config__, 'pbc_gto_basis_parse_optimize', False) -def parse(string, optimize=OPTIMIZE_CONTRACTION): - '''Parse the basis text in CP2K format, return an internal basis format - which can be assigned to :attr:`Cell.basis` - - Args: - string : Blank linke and the lines of "BASIS SET" and "END" will be ignored - - Examples: - - >>> cell = gto.Cell() - >>> cell.basis = {'C': gto.basis.parse(""" - ... C DZVP-GTH - ... 2 - ... 2 0 1 4 2 2 - ... 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000 - ... 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000 - ... 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000 - ... 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000 - ... 3 2 2 1 1 - ... 0.5500000000 1.0000000000 - ... # - ... """)} - ''' - return parse_cp2k.parse(string) - -def load(file_or_basis_name, symb, optimize=OPTIMIZE_CONTRACTION): - '''Convert the basis of the given symbol to internal format - - Args: - file_or_basis_name : str - Case insensitive basis set name. Special characters will be removed. - symb : str - Atomic symbol, Special characters will be removed. - - Examples: - Load DZVP-GTH of carbon - - >>> cell = gto.Cell() - >>> cell.basis = {'C': load('gth-dzvp', 'C')} - ''' - if os.path.isfile(file_or_basis_name): - try: - return parse_cp2k.load(file_or_basis_name, symb) - except RuntimeError: - with open(file_or_basis_name, 'r') as fin: - return parse_cp2k.parse(fin.read()) - - name = _mol_basis._format_basis_name(file_or_basis_name) - if '@' in name: - split_name = name.split('@') - assert len(split_name) == 2 - name = split_name[0] - contr_scheme = _mol_basis._convert_contraction(split_name[1]) - else: - contr_scheme = 'Full' - - if name not in ALIAS: - return _mol_basis.load(file_or_basis_name, symb) - - basmod = ALIAS[name] - symb = ''.join(i for i in symb if i.isalpha()) - b = parse_cp2k.load(os.path.join(os.path.dirname(__file__), basmod), symb) - - if contr_scheme != 'Full': - b = _mol_basis._truncate(b, contr_scheme, symb, split_name) - return b - -del(OPTIMIZE_CONTRACTION) - +from pyscf.gto.basis import parse, load, parse_ecp, load_ecp +from pyscf.gto.basis import GTH_ALIAS as ALIAS diff --git a/pyscf/pbc/gto/cell.py b/pyscf/pbc/gto/cell.py index 3d417c1c88..b2413258c2 100644 --- a/pyscf/pbc/gto/cell.py +++ b/pyscf/pbc/gto/cell.py @@ -23,28 +23,17 @@ import warnings import numpy as np import scipy.linalg -try: - from scipy.special import factorial2 -except ImportError: - from scipy.misc import factorial2 from scipy.special import erf, erfc -import scipy.optimize import pyscf.lib.parameters as param from pyscf import lib from pyscf.dft import radi from pyscf.lib import logger from pyscf.gto import mole from pyscf.gto import moleintor -from pyscf.gto.mole import (_symbol, _rm_digit, _atom_symbol, _std_symbol, - _std_symbol_without_ghost, charge, is_ghost_atom, is_au) # noqa -from pyscf.gto.mole import conc_env, uncontract -from pyscf.pbc.gto import basis -from pyscf.pbc.gto import pseudo +from pyscf.gto.mole import conc_env, is_au # noqa from pyscf.pbc.gto import _pbcintor from pyscf.pbc.gto.eval_gto import eval_gto as pbc_eval_gto from pyscf.pbc.tools import pbc as pbctools -from pyscf.gto.basis import ALIAS as MOLE_ALIAS -from pyscf.pbc.lib import kpts as libkpts from pyscf import __config__ INTEGRAL_PRECISION = getattr(__config__, 'pbc_gto_cell_Cell_precision', 1e-8) @@ -74,111 +63,6 @@ def M(**kwargs): return cell C = M - -def format_pseudo(pseudo_tab): - r'''Convert the input :attr:`Cell.pseudo` (dict) to the internal data format:: - - { atom: ( (nelec_s, nele_p, nelec_d, ...), - rloc, nexp, (cexp_1, cexp_2, ..., cexp_nexp), - nproj_types, - (r1, nproj1, ( (hproj1[1,1], hproj1[1,2], ..., hproj1[1,nproj1]), - (hproj1[2,1], hproj1[2,2], ..., hproj1[2,nproj1]), - ... - (hproj1[nproj1,1], hproj1[nproj1,2], ... ) )), - (r2, nproj2, ( (hproj2[1,1], hproj2[1,2], ..., hproj2[1,nproj1]), - ... ) ) - ) - ... } - - Args: - pseudo_tab : dict - Similar to :attr:`Cell.pseudo` (a dict), it **cannot** be a str - - Returns: - Formatted :attr:`~Cell.pseudo` - - Examples: - - >>> pbc.format_pseudo({'H':'gth-blyp', 'He': 'gth-pade'}) - {'H': [[1], - 0.2, 2, [-4.19596147, 0.73049821], 0], - 'He': [[2], - 0.2, 2, [-9.1120234, 1.69836797], 0]} - ''' - fmt_pseudo = {} - for atom, atom_pp in pseudo_tab.items(): - symb = _symbol(atom) - - if isinstance(atom_pp, (str, unicode)): - stdsymb = _std_symbol_without_ghost(symb) - fmt_pseudo[symb] = pseudo.load(str(atom_pp), stdsymb) - else: - fmt_pseudo[symb] = atom_pp - return fmt_pseudo - -def make_pseudo_env(cell, _atm, _pseudo, pre_env=[]): - for ia, atom in enumerate(cell._atom): - symb = atom[0] - if symb in _pseudo and _atm[ia,0] != 0: # pass ghost atoms. - _atm[ia,0] = sum(_pseudo[symb][0]) - _pseudobas = None - return _atm, _pseudobas, pre_env - -def format_basis(basis_tab): - '''Convert the input :attr:`Cell.basis` to the internal data format:: - - { atom: (l, kappa, ((-exp, c_1, c_2, ..), nprim, nctr, ptr-exps, ptr-contraction-coeff)), ... } - - Args: - basis_tab : dict - Similar to :attr:`Cell.basis`, it **cannot** be a str - - Returns: - Formated :attr:`~Cell.basis` - - Examples: - - >>> pbc.format_basis({'H':'gth-szv'}) - {'H': [[0, - (8.3744350009, -0.0283380461), - (1.8058681460, -0.1333810052), - (0.4852528328, -0.3995676063), - (0.1658236932, -0.5531027541)]]} - ''' - def convert(basis_name, symb): - if basis_name.lower().startswith('unc'): - return uncontract(basis.load(basis_name[3:], symb)) - else: - return basis.load(basis_name, symb) - - fmt_basis = {} - for atom, atom_basis in basis_tab.items(): - symb = _atom_symbol(atom) - stdsymb = _std_symbol_without_ghost(symb) - - if isinstance(atom_basis, (str, unicode)): - if 'gth' in atom_basis: - bset = convert(str(atom_basis), stdsymb) - else: - bset = atom_basis - else: - bset = [] - for rawb in atom_basis: - if isinstance(rawb, (str, unicode)) and 'gth' in rawb: - bset.append(convert(str(rawb), stdsymb)) - else: - bset.append(rawb) - fmt_basis[symb] = bset - return mole.format_basis(fmt_basis) - -def copy(cell): - '''Deepcopy of the given :class:`Cell` object - ''' - import copy - newcell = mole.copy(cell) - newcell._pseudo = copy.deepcopy(cell._pseudo) - return newcell - def pack(cell): '''Pack the input args of :class:`Cell` to a dict, which can be serialized with :mod:`pickle` @@ -186,7 +70,6 @@ def pack(cell): cldic = mole.pack(cell) cldic['a'] = cell.a cldic['precision'] = cell.precision - cldic['pseudo'] = cell.pseudo cldic['ke_cutoff'] = cell.ke_cutoff cldic['exp_to_discard'] = cell.exp_to_discard cldic['_mesh'] = cell._mesh @@ -283,34 +166,10 @@ def byteify(inp): def conc_cell(cell1, cell2): '''Concatenate two Cell objects. ''' + mol3 = mole.conc_mol(cell1, cell2) cell3 = Cell() - cell3._atm, cell3._bas, cell3._env = \ - conc_env(cell1._atm, cell1._bas, cell1._env, - cell2._atm, cell2._bas, cell2._env) - off = len(cell1._env) - natm_off = len(cell1._atm) - if len(cell2._ecpbas) == 0: - cell3._ecpbas = cell1._ecpbas - else: - ecpbas2 = np.copy(cell2._ecpbas) - ecpbas2[:,mole.ATOM_OF ] += natm_off - ecpbas2[:,mole.PTR_EXP ] += off - ecpbas2[:,mole.PTR_COEFF] += off - if len(cell1._ecpbas) == 0: - cell3._ecpbas = ecpbas2 - else: - cell3._ecpbas = np.vstack((cell1._ecpbas, ecpbas2)) - - cell3.verbose = cell1.verbose - cell3.output = cell1.output - cell3.max_memory = cell1.max_memory - cell3.charge = cell1.charge + cell2.charge - cell3.spin = cell1.spin + cell2.spin - cell3.cart = cell1.cart and cell2.cart - cell3._atom = cell1._atom + cell2._atom - cell3.unit = cell1.unit - cell3._basis = dict(cell2._basis) - cell3._basis.update(cell1._basis) + cell3.__dict__.update(mol3.__dict__) + # Whether to update the lattice_vectors? cell3.a = cell1.a cell3.mesh = np.max((cell1.mesh, cell2.mesh), axis=0) @@ -330,20 +189,6 @@ def conc_cell(cell1, cell2): cell3.dimension = max(cell1.dimension, cell2.dimension) cell3.low_dim_ft_type = cell1.low_dim_ft_type or cell2.low_dim_ft_type cell3.rcut = max(cell1.rcut, cell2.rcut) - - cell3._pseudo.update(cell1._pseudo) - cell3._pseudo.update(cell2._pseudo) - cell3._ecp.update(cell1._ecp) - cell3._ecp.update(cell2._ecp) - - cell3.nucprop.update(cell1.nucprop) - cell3.nucprop.update(cell2.nucprop) - - if not cell1._built: - logger.warn(cell1, 'Warning: intor envs of %s not initialized.', cell1) - if not cell2._built: - logger.warn(cell2, 'Warning: intor envs of %s not initialized.', cell2) - cell3._built = cell1._built or cell2._built return cell3 def intor_cross(intor, cell1, cell2, comp=None, hermi=0, kpts=None, kpt=None, @@ -907,6 +752,7 @@ def make_kpts(cell, nks, wrap_around=WRAP_AROUND, with_gamma_point=WITH_GAMMA, scaled_kpts += np.array(scaled_center) kpts = cell.get_abs_kpts(scaled_kpts) if space_group_symmetry or time_reversal_symmetry: + from pyscf.pbc.lib import kpts as libkpts if space_group_symmetry and not cell.space_group_symmetry: raise RuntimeError('Using k-point symmetry now requires cell ' 'to be built with space group symmetry info:\n' @@ -944,52 +790,6 @@ def get_uniform_grids(cell, mesh=None, wrap_around=True): return coords gen_uniform_grids = get_uniform_grids -# Check whether ecp keywords are presented in pp and whether pp keywords are -# presented in ecp. The return (ecp, pp) should have only the ecp keywords and -# pp keywords in each dict. -# The "misplaced" ecp/pp keywords have lowest priority, ie if the atom is -# defined in ecp, the misplaced ecp atom found in pp does NOT replace the -# definition in ecp, and versa vise. -def classify_ecp_pseudo(cell, ecp, pp): - def classify(ecp, pp_alias): - if isinstance(ecp, (str, unicode)): - if pseudo._format_pseudo_name(ecp)[0] in pp_alias: - return {}, {'default': str(ecp)} - elif isinstance(ecp, dict): - ecp_as_pp = {} - for atom in ecp: - key = ecp[atom] - if (isinstance(key, (str, unicode)) and - pseudo._format_pseudo_name(key)[0] in pp_alias): - ecp_as_pp[atom] = str(key) - if ecp_as_pp: - ecp_left = dict(ecp) - for atom in ecp_as_pp: - ecp_left.pop(atom) - return ecp_left, ecp_as_pp - return ecp, {} - ecp_left, ecp_as_pp = classify(ecp, pseudo.ALIAS) - pp_left , pp_as_ecp = classify(pp, MOLE_ALIAS) - - # ecp = ecp_left + pp_as_ecp - # pp = pp_left + ecp_as_pp - ecp = ecp_left - if pp_as_ecp and not isinstance(ecp_left, (str, unicode)): - # If ecp is a str, all atoms have ecp definition. The misplaced ecp has no effects. - logger.info(cell, 'PBC pseudo-potentials keywords for %s found in .ecp', - pp_as_ecp.keys()) - if ecp_left: - pp_as_ecp.update(ecp_left) - ecp = pp_as_ecp - pp = pp_left - if ecp_as_pp and not isinstance(pp_left, (str, unicode)): - logger.info(cell, 'ECP keywords for %s found in PBC .pseudo', - ecp_as_pp.keys()) - if pp_left: - ecp_as_pp.update(pp_left) - pp = ecp_as_pp - return ecp, pp - def _split_basis(cell, delimiter=EXP_DELIMITER): ''' Split the contracted basis to small segmant. The new basis has more @@ -1119,7 +919,6 @@ def __init__(self, **kwargs): # of fourier components, with .5 * G**2 < ke_cutoff self.ke_cutoff = None - self.pseudo = None self.dimension = 3 # TODO: Simple hack for now; the implementation of ewald depends on the # density-fitting class. This determines how the ewald produces @@ -1246,10 +1045,11 @@ def __getattr__(self, key): def _build_symmetry(self, kpts=None, **kwargs): '''Construct symmetry adapted crystalline atomic orbitals ''' + from pyscf.pbc.lib.kpts import KPoints from pyscf.pbc.symm.basis import symm_adapted_basis if kpts is None: return mole.Mole._build_symmetry(self) - elif isinstance(kpts, libkpts.KPoints): + elif isinstance(kpts, KPoints): self.symm_orb, self.irrep_id = symm_adapted_basis(self, kpts) return self else: @@ -1297,8 +1097,7 @@ def build_lattice_symmetry(self, check_mesh_symmetry=True): #Note: Exculde dump_input, parse_arg, basis from kwargs to avoid parsing twice def build(self, dump_input=True, parse_arg=mole.ARGPARSE, a=None, mesh=None, ke_cutoff=None, precision=None, nimgs=None, - pseudo=None, basis=None, h=None, dimension=None, rcut= None, - ecp=None, low_dim_ft_type=None, + h=None, dimension=None, rcut= None, low_dim_ft_type=None, space_group_symmetry=None, symmorphic=None, *args, **kwargs): '''Setup Mole molecule and Cell and initialize some control parameters. Whenever you change the value of the attributes of :class:`Cell`, @@ -1323,10 +1122,6 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, Cutoff radius (unit Bohr) in lattice summation to produce periodicity. The value can be estimated based on the required precision. It's recommended NOT making changes to this value. - pseudo : dict or str - To define pseudopotential. - ecp : dict or str - To define ECP type pseudopotential. h : (3,3) ndarray a.T. Deprecated dimension : int @@ -1349,12 +1144,9 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, if a is not None: self.a = a if mesh is not None: self.mesh = mesh if nimgs is not None: self.nimgs = nimgs - if pseudo is not None: self.pseudo = pseudo - if basis is not None: self.basis = basis if dimension is not None: self.dimension = dimension if precision is not None: self.precision = precision if rcut is not None: self.rcut = rcut - if ecp is not None: self.ecp = ecp if ke_cutoff is not None: self.ke_cutoff = ke_cutoff if low_dim_ft_type is not None: self.low_dim_ft_type = low_dim_ft_type if space_group_symmetry is not None: @@ -1362,39 +1154,8 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, if symmorphic is not None: self.symmorphic = symmorphic - if 'unit' in kwargs: - self.unit = kwargs['unit'] - - if 'atom' in kwargs: - self.atom = kwargs['atom'] - - if 'gs' in kwargs: - self.gs = kwargs['gs'] - - # Set-up pseudopotential if it exists - # This must be done before build() because it affects - # tot_electrons() via the call to .atom_charge() - - self.ecp, self.pseudo = classify_ecp_pseudo(self, self.ecp, self.pseudo) - if self.pseudo is not None: - _atom = self.format_atom(self.atom) - # Unless explicitly input, ECP should not be assigned to ghost atoms - uniq_atoms = set([a[0] for a in _atom if not is_ghost_atom(a[0])]) - if isinstance(self.pseudo, (str, unicode)): - # specify global pseudo for whole molecule - _pseudo = dict([(a, str(self.pseudo)) for a in uniq_atoms]) - elif 'default' in self.pseudo: - default_pseudo = self.pseudo['default'] - _pseudo = dict(((a, default_pseudo) for a in uniq_atoms)) - _pseudo.update(self.pseudo) - del (_pseudo['default']) - else: - _pseudo = self.pseudo - self._pseudo = self.format_pseudo(_pseudo) - - # Do regular Mole.build - _built = self._built - mole.GTO.build(self, False, parse_arg, *args, **kwargs) + dump_input = dump_input and not self._built and self.verbose > logger.NOTE + mole.GTO.build(self, dump_input, parse_arg, *args, **kwargs) exp_min = np.array([self.bas_exp(ib).min() for ib in range(self.nbas)]) if self.exp_to_discard is None: @@ -1474,8 +1235,6 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, # The rest initialization requires lattice parameters. If .a is not # set, pass the rest initialization. if self.a is None: - if dump_input and not _built and self.verbose > logger.NOTE: - self.dump_input() return self if self.rcut is None or self._rcut_from_build: @@ -1513,8 +1272,7 @@ def build(self, dump_input=True, parse_arg=mole.ARGPARSE, _check_mesh_symm = not self._mesh_from_build self.build_lattice_symmetry(check_mesh_symmetry=_check_mesh_symm) - if dump_input and not _built and self.verbose > logger.NOTE: - self.dump_input() + if dump_input: logger.info(self, 'lattice vectors a1 [%.9f, %.9f, %.9f]', *_a[0]) logger.info(self, ' a2 [%.9f, %.9f, %.9f]', *_a[1]) logger.info(self, ' a3 [%.9f, %.9f, %.9f]', *_a[2]) @@ -1574,14 +1332,6 @@ def vol(self): def Gv(self): return self.get_Gv() - @lib.with_doc(format_pseudo.__doc__) - def format_pseudo(self, pseudo_tab): - return format_pseudo(pseudo_tab) - - @lib.with_doc(format_basis.__doc__) - def format_basis(self, basis_tab): - return format_basis(basis_tab) - @property def gs(self): return [n//2 for n in self.mesh] @@ -1614,20 +1364,6 @@ def nimgs(self, x): (x, rcut_guess)) warnings.warn(msg) - def make_ecp_env(self, _atm, _ecp, pre_env=[]): - if _ecp and self._pseudo: - conflicts = set(self._pseudo.keys()).intersection(set(_ecp.keys())) - if conflicts: - raise RuntimeError('Pseudo potential for atoms %s are defined ' - 'in both .ecp and .pseudo.' % list(conflicts)) - - _ecpbas, _env = np.zeros((0,8)), pre_env - if _ecp: - _atm, _ecpbas, _env = mole.make_ecp_env(self, _atm, _ecp, _env) - if self._pseudo: - _atm, _, _env = make_pseudo_env(self, _atm, self._pseudo, _env) - return _atm, _ecpbas, _env - def lattice_vectors(self): '''Convert the primitive lattice vectors. @@ -1702,7 +1438,8 @@ def get_scaled_kpts(self, abs_kpts, kpts_in_ibz=True): Returns: scaled_kpts : (nkpts, 3) ndarray of floats ''' - if isinstance(abs_kpts, libkpts.KPoints): + from pyscf.pbc.lib.kpts import KPoints + if isinstance(abs_kpts, KPoints): if kpts_in_ibz: return abs_kpts.kpts_scaled_ibz else: @@ -1728,9 +1465,6 @@ def cutoff_to_mesh(self, ke_cutoff): make_kpts = get_kpts = make_kpts - def copy(self): - return copy(self) - pack = pack @classmethod @@ -1836,8 +1570,4 @@ def to_mol(self): mol._build_symmetry() return mol - def has_ecp(self): - '''Whether pseudo potential is used in the system.''' - return self.pseudo or self._pseudo or (len(self._ecpbas) > 0) - del (INTEGRAL_PRECISION, WRAP_AROUND, WITH_GAMMA, EXP_DELIMITER) diff --git a/pyscf/pbc/gto/pseudo/__init__.py b/pyscf/pbc/gto/pseudo/__init__.py index b5d3841865..55b9095095 100644 --- a/pyscf/pbc/gto/pseudo/__init__.py +++ b/pyscf/pbc/gto/pseudo/__init__.py @@ -16,79 +16,8 @@ # Author: Qiming Sun # Timothy Berkelbach -import os -import re -from pyscf.pbc.gto.pseudo import parse_cp2k +from pyscf.gto.basis import parse +from pyscf.gto.basis import load_pseudo as load +from pyscf.gto.basis import PP_ALIAS as ALIAS from pyscf.pbc.gto.pseudo.pp import * from pyscf.pbc.gto.pseudo import pp_int - -ALIAS = { - 'gthblyp' : 'gth-blyp.dat' , - 'gthbp' : 'gth-bp.dat' , - 'gthhcth120' : 'gth-hcth120.dat', - 'gthhcth407' : 'gth-hcth407.dat', - 'gtholyp' : 'gth-olyp.dat' , - 'gthlda' : 'gth-pade.dat' , - 'gthpade' : 'gth-pade.dat' , - 'gthpbe' : 'gth-pbe.dat' , - 'gthpbesol' : 'gth-pbesol.dat' , - 'gthhf' : 'gth-hf.dat' , - 'gthhfrev' : 'gth-hf-rev.dat' , -} - -def parse(string): - '''Parse the pseudo text which is in CP2K format, return an internal - pseudo format which can be assigned to :attr:`Cell.pseudo` - - Args: - string : Blank linke and the lines of "PSEUDOPOTENTIAL" and "END" will be ignored - - Examples: - - >>> cell = gto.Cell() - >>> cell.pseudo = {'C': gto.pseudo.parse(""" - ... #PSEUDOPOTENTIAL - ... C GTH-BLYP-q4 - ... 2 2 - ... 0.33806609 2 -9.13626871 1.42925956 - ... 2 - ... 0.30232223 1 9.66551228 - ... 0.28637912 0 - ... """)} - ''' - return parse_cp2k.parse(string) - -def load(pseudo_name, symb): - '''Convert the pseudopotential of the given symbol to internal format - - Args: - pseudo_name : str - Case insensitive pseudopotential name. Special characters will be removed. - symb : str - Atomic symbol, Special characters will be removed. - - Examples: - Load GTH-BLYP pseudopotential of carbon - - >>> cell = gto.Cell() - >>> cell.pseudo = {'C': load('gth-blyp', 'C')} - ''' - if os.path.isfile(pseudo_name): - return parse_cp2k.load(pseudo_name, symb) - - name, suffix = _format_pseudo_name(pseudo_name) - pseudomod = ALIAS[name] - symb = ''.join(i for i in symb if i.isalpha()) - p = parse_cp2k.load(os.path.join(os.path.dirname(__file__), pseudomod), symb, suffix) - return p - -SUFFIX_PATTERN = re.compile(r'q\d+$') -def _format_pseudo_name(pseudo_name): - name_suffix = pseudo_name.lower().replace('-', '').replace('_', '').replace(' ', '') - match = re.search(SUFFIX_PATTERN, name_suffix) - if match: - name = name_suffix[:match.start()] - suffix = name_suffix[match.start():] - else: - name, suffix = name_suffix, None - return name, suffix