Skip to content

Commit

Permalink
Refactor basis parser; merge gto.mole basis parser and pbcgto.cell ba…
Browse files Browse the repository at this point in the history
…sis parser
  • Loading branch information
sunqm committed Sep 10, 2023
1 parent c126df7 commit d5b5bd2
Show file tree
Hide file tree
Showing 9 changed files with 553 additions and 681 deletions.
212 changes: 165 additions & 47 deletions pyscf/gto/basis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Copyright 2014-2020 The PySCF Developers. All Rights Reserved.
# Copyright 2014-2023 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,14 +16,18 @@
# Author: Qiming Sun <[email protected]>
#

__all__ = ['ALIAS', 'GTH_ALIAS', 'PP_ALIAS',
'parse', 'parse_ecp', 'load', 'load_ecp', 'load_pseudo',
'optimize_contraction', 'to_general_contraction']

import os
import sys
import re
from os.path import join
if sys.version_info < (2,7):
import imp
else:
import importlib
from pyscf.gto.basis import parse_nwchem
import importlib
import pyscf
from pyscf.gto.basis import parse_nwchem, parse_nwchem_ecp
from pyscf.gto.basis import parse_cp2k, parse_cp2k_pp
from pyscf.lib.exceptions import BasisNotFoundError
from pyscf import __config__

Expand Down Expand Up @@ -371,12 +375,52 @@
'dyallv4z' : 'dyall-basis.dyall_v4z',
}

GTH_ALIAS = {
'gthaugdzvp' : 'gth-aug-dzvp.dat',
'gthaugqzv2p' : 'gth-aug-qzv2p.dat',
'gthaugqzv3p' : 'gth-aug-qzv3p.dat',
'gthaugtzv2p' : 'gth-aug-tzv2p.dat',
'gthaugtzvp' : 'gth-aug-tzvp.dat',
'gthdzv' : 'gth-dzv.dat',
'gthdzvp' : 'gth-dzvp.dat',
'gthqzv2p' : 'gth-qzv2p.dat',
'gthqzv3p' : 'gth-qzv3p.dat',
'gthszv' : 'gth-szv.dat',
'gthtzv2p' : 'gth-tzv2p.dat',
'gthtzvp' : 'gth-tzvp.dat',
'gthccdzvp' : 'gth-cc-dzvp.dat',
'gthcctzvp' : 'gth-cc-tzvp.dat',
'gthccqzvp' : 'gth-cc-qzvp.dat',
'gthszvmolopt' : 'gth-szv-molopt.dat',
'gthdzvpmolopt' : 'gth-dzvp-molopt.dat',
'gthtzvpmolopt' : 'gth-tzvp-molopt.dat',
'gthtzv2pmolopt' : 'gth-tzv2p-molopt.dat',
'gthszvmoloptsr' : 'gth-szv-molopt-sr.dat',
'gthdzvpmoloptsr' : 'gth-dzvp-molopt-sr.dat',
}

PP_ALIAS = {
'gthblyp' : 'gth-blyp.dat' ,
'gthbp' : 'gth-bp.dat' ,
'gthhcth120' : 'gth-hcth120.dat',
'gthhcth407' : 'gth-hcth407.dat',
'gtholyp' : 'gth-olyp.dat' ,
'gthlda' : 'gth-pade.dat' ,
'gthpade' : 'gth-pade.dat' ,
'gthpbe' : 'gth-pbe.dat' ,
'gthpbesol' : 'gth-pbesol.dat' ,
'gthhf' : 'gth-hf.dat' ,
'gthhfrev' : 'gth-hf-rev.dat' ,
}

def _is_pople_basis(basis):
return (basis.startswith('631') or
basis.startswith('321') or
basis.startswith('431'))

_BASIS_DIR = os.path.dirname(__file__)
_GTH_BASIS_DIR = os.path.abspath(f'{pyscf.__file__}/../pbc/gto/basis')
_GTH_PP_DIR = os.path.abspath(f'{_GTH_BASIS_DIR}/../pseudo')

def _parse_pople_basis(basis, symb):
if '(' in basis:
Expand Down Expand Up @@ -408,17 +452,56 @@ def convert(s):
return tuple([ALIAS[mbas]] + convert(extension.split(',')[0]))

OPTIMIZE_CONTRACTION = getattr(__config__, 'gto_basis_parse_optimize', False)

def parse(string, symb=None, optimize=OPTIMIZE_CONTRACTION):
'''Parse the basis (ECP, PP) text in NWChem or CP2K format, returns internal format
Args:
string : Blank linke and the lines of "BASIS SET" and "END" will be ignored
Examples:
>>> mol = gto.Mole()
>>> mol.basis = gto.basis.parse("""
... He S
... 13.6267000 0.1752300
... 1.9993500 0.8934830
... 0.3829930 0.0000000
... He S
... 13.6267000 0.0000000
... 1.9993500 0.0000000
... 0.3829930 1.0000000
... """, optimize=True)
>>> cell = pbc.gto.Cell()
>>> cell.basis = {'C': gto.basis.parse("""
... C DZVP-GTH
... 2
... 2 0 1 4 2 2
... 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000
... 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000
... 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000
... 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000
... 3 2 2 1 1
... 0.5500000000 1.0000000000
... #
... """)}
'''
if 'ECP' in string:
return parse_nwchem.parse_ecp(string, symb)
return parse_nwchem_ecp.parse(string, symb)
elif 'GTH' in string:
if 'PSEUDOPOTENTIAL' in string:
return parse_cp2k_pp.parse(string, symb)
else:
return parse_cp2k.parse(string, symb, optimize)
else:
return parse_nwchem.parse(string, symb, optimize)
parse.__doc__ = parse_nwchem.parse.__doc__

def parse_ecp(string, symb=None):
# TODO: catch KeyError and provide suggestion for the possible keys
return parse_nwchem.parse_ecp(string, symb)
parse_ecp.__doc__ = parse_nwchem.parse_ecp.__doc__
return parse_nwchem_ecp.parse(string, symb)
parse_ecp.__doc__ = parse_nwchem_ecp.parse.__doc__

def _convert_contraction(contr_string):
'''Parse contraction scheme string into a list
Expand Down Expand Up @@ -497,31 +580,50 @@ def load(filename_or_basisname, symb, optimize=OPTIMIZE_CONTRACTION):
contr_scheme = _convert_contraction(split_name[1].lower())
else:
contr_scheme = 'Full'

if os.path.isfile(filename_or_basisname):
# read basis from given file
try:
b = parse_nwchem.load(filename_or_basisname, symb, optimize)
b = _load_external(parse_nwchem, filename_or_basisname, symb,
optimize=optimize)
except BasisNotFoundError:
with open(filename_or_basisname, 'r') as fin:
b = parse_nwchem.parse(fin.read(), symb)
b = _load_external(parse_cp2k, filename_or_basisname, symb,
optimize=optimize)

if contr_scheme != 'Full':
b = _truncate(b, contr_scheme, symb, split_name)
return b

name = _format_basis_name(filename_or_basisname)

if not (name in ALIAS or _is_pople_basis(name)):
fload = parse_nwchem.load
basis_dir = _BASIS_DIR
if name in ALIAS:
basmod = ALIAS[name]
elif name in GTH_ALIAS:
basmod = GTH_ALIAS[name]
fload = parse_cp2k.load
basis_dir = _GTH_BASIS_DIR
elif _is_pople_basis(name):
basmod = _parse_pople_basis(name, symb)
else:
try:
return parse_nwchem.parse(filename_or_basisname, symb)
return parse_nwchem.parse(filename_or_basisname, symb,
optimize=optimize)
except IndexError:
raise BasisNotFoundError(filename_or_basisname)
except BasisNotFoundError as basis_err:
pass

try:
return parse_nwchem.parse(filename_or_basisname)
return parse_nwchem.parse(filename_or_basisname, optimize=optimize)
except IndexError:
raise BasisNotFoundError(f'Invalid basis {filename_or_basisname}')
except BasisNotFoundError:
pass

try:
return parse_cp2k.parse(filename_or_basisname, optimize=optimize)
except IndexError:
raise BasisNotFoundError('Invalid basis name %s' % filename_or_basisname)
raise BasisNotFoundError(f'Invalid basis {filename_or_basisname}')
except BasisNotFoundError:
pass

Expand All @@ -538,62 +640,44 @@ def load(filename_or_basisname, symb, optimize=OPTIMIZE_CONTRACTION):

raise basis_err

if name in ALIAS:
basmod = ALIAS[name]
elif _is_pople_basis(name):
basmod = _parse_pople_basis(name, symb)
else:
raise BasisNotFoundError(filename_or_basisname)

if 'dat' in basmod:
b = parse_nwchem.load(join(_BASIS_DIR, basmod), symb, optimize)
b = fload(join(basis_dir, basmod), symb, optimize)
elif isinstance(basmod, (tuple, list)) and isinstance(basmod[0], str):
b = []
for f in basmod:
b += parse_nwchem.load(join(_BASIS_DIR, f), symb, optimize)
b += fload(join(basis_dir, f), symb, optimize)
else:
if sys.version_info < (2,7):
fp, pathname, description = imp.find_module(basmod, __path__)
mod = imp.load_module(name, fp, pathname, description)
b = mod.__getattribute__(symb)
fp.close()
else:
mod = importlib.import_module('.'+basmod, __package__)
b = mod.__getattribute__(symb)
mod = importlib.import_module('.'+basmod, __package__)
b = mod.__getattribute__(symb)

if contr_scheme != 'Full':
b = _truncate(b, contr_scheme, symb, split_name)
return b

def load_ecp(filename_or_basisname, symb):
'''Convert the basis of the given symbol to internal format
'''Parses ECP database file
'''
symb = ''.join([i for i in symb if i.isalpha()])
if os.path.isfile(filename_or_basisname):
# read basis from given file
try:
return parse_nwchem.load_ecp(filename_or_basisname, symb)
except BasisNotFoundError:
with open(filename_or_basisname, 'r') as fin:
return parse_ecp(fin.read(), symb)
return _load_external(parse_nwchem_ecp, filename_or_basisname, symb)

name = _format_basis_name(filename_or_basisname)

if name in ALIAS:
basmod = ALIAS[name]
return parse_nwchem.load_ecp(join(_BASIS_DIR, basmod), symb)
return parse_nwchem_ecp.load(join(_BASIS_DIR, basmod), symb)

try:
return parse_ecp(filename_or_basisname, symb)
return parse_nwchem_ecp.parse(filename_or_basisname, symb)
except IndexError:
raise BasisNotFoundError(filename_or_basisname)
except BasisNotFoundError as basis_err:
pass

try:
return parse_nwchem.parse_ecp(filename_or_basisname)
return parse_nwchem_ecp.parse(filename_or_basisname)
except IndexError:
raise BasisNotFoundError('Invalid basis name %s' % filename_or_basisname)
raise BasisNotFoundError(f'Invalid ECP {filename_or_basisname}')
except BasisNotFoundError:
pass

Expand All @@ -610,7 +694,41 @@ def load_ecp(filename_or_basisname, symb):

raise basis_err

def load_pseudo(filename_or_basisname, symb):
'''Parses PP database file
'''
symb = ''.join([i for i in symb if i.isalpha()])
if os.path.isfile(filename_or_basisname):
return _load_external(parse_cp2k_pp, filename_or_basisname, symb)

name, suffix = _format_pseudo_name(filename_or_basisname)
if name in PP_ALIAS:
basmod = PP_ALIAS[name]
return parse_cp2k_pp.load(join(_GTH_PP_DIR, basmod), symb, suffix)

try:
return parse_cp2k_pp.parse(filename_or_basisname)
except IndexError:
raise BasisNotFoundError(f'Invalid PP {filename_or_basisname}')

def _load_external(module, filename_or_basisname, symb, **kwargs):
'''Try to read basis from given file'''
try:
return module.load(filename_or_basisname, symb, **kwargs)
except BasisNotFoundError:
with open(filename_or_basisname, 'r') as fin:
return module.parse(fin.read(), **kwargs)

def _format_basis_name(basisname):
return basisname.lower().replace('-', '').replace('_', '').replace(' ', '')

del(OPTIMIZE_CONTRACTION)
SUFFIX_PATTERN = re.compile(r'q\d+$')
def _format_pseudo_name(pseudo_name):
name_suffix = _format_basis_name(pseudo_name)
match = re.search(SUFFIX_PATTERN, name_suffix)
if match:
name = name_suffix[:match.start()]
suffix = name_suffix[match.start():]
else:
name, suffix = name_suffix, None
return name, suffix
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# Copyright 2014-2018,2021 The PySCF Developers. All Rights Reserved.
# Copyright 2014-2023 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,6 +21,7 @@
'''

import re
from pyscf.lib.exceptions import BasisNotFoundError
from pyscf.gto.basis import parse_nwchem
from pyscf import __config__

Expand All @@ -32,6 +33,22 @@ def parse(string, optimize=False):
'''Parse the basis text which is in CP2K format, return an internal
basis format which can be assigned to :attr:`Mole.basis`
Lines started with # are ignored.
Examples:
>>> cell = gto.Cell()
>>> cell.basis = {'C': pyscf.gto.basis.parse_cp2k.parse("""
... C DZVP-GTH
... 2
... 2 0 1 4 2 2
... 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000
... 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000
... 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000
... 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000
... 3 2 2 1 1
... 0.5500000000 1.0000000000
... #
... """)}
'''
bastxt = []
for dat in string.splitlines():
Expand Down Expand Up @@ -88,6 +105,4 @@ def search_seg(basisfile, symb):
# remove blank lines
return [x.strip() for x in dat.splitlines()
if x.strip() and 'END' not in x]
raise RuntimeError('Basis not found for %s in %s' % (symb, basisfile))


raise BasisNotFoundError(f'Basis for {symb} not found in {basisfile}')
Loading

0 comments on commit d5b5bd2

Please sign in to comment.