forked from pyscf/pyscf
-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor basis parser; merge gto.mole basis parser and pbcgto.cell ba…
…sis parser
- Loading branch information
Showing
9 changed files
with
553 additions
and
681 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
#!/usr/bin/env python | ||
# Copyright 2014-2020 The PySCF Developers. All Rights Reserved. | ||
# Copyright 2014-2023 The PySCF Developers. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
|
@@ -16,14 +16,18 @@ | |
# Author: Qiming Sun <[email protected]> | ||
# | ||
|
||
__all__ = ['ALIAS', 'GTH_ALIAS', 'PP_ALIAS', | ||
'parse', 'parse_ecp', 'load', 'load_ecp', 'load_pseudo', | ||
'optimize_contraction', 'to_general_contraction'] | ||
|
||
import os | ||
import sys | ||
import re | ||
from os.path import join | ||
if sys.version_info < (2,7): | ||
import imp | ||
else: | ||
import importlib | ||
from pyscf.gto.basis import parse_nwchem | ||
import importlib | ||
import pyscf | ||
from pyscf.gto.basis import parse_nwchem, parse_nwchem_ecp | ||
from pyscf.gto.basis import parse_cp2k, parse_cp2k_pp | ||
from pyscf.lib.exceptions import BasisNotFoundError | ||
from pyscf import __config__ | ||
|
||
|
@@ -371,12 +375,52 @@ | |
'dyallv4z' : 'dyall-basis.dyall_v4z', | ||
} | ||
|
||
GTH_ALIAS = { | ||
'gthaugdzvp' : 'gth-aug-dzvp.dat', | ||
'gthaugqzv2p' : 'gth-aug-qzv2p.dat', | ||
'gthaugqzv3p' : 'gth-aug-qzv3p.dat', | ||
'gthaugtzv2p' : 'gth-aug-tzv2p.dat', | ||
'gthaugtzvp' : 'gth-aug-tzvp.dat', | ||
'gthdzv' : 'gth-dzv.dat', | ||
'gthdzvp' : 'gth-dzvp.dat', | ||
'gthqzv2p' : 'gth-qzv2p.dat', | ||
'gthqzv3p' : 'gth-qzv3p.dat', | ||
'gthszv' : 'gth-szv.dat', | ||
'gthtzv2p' : 'gth-tzv2p.dat', | ||
'gthtzvp' : 'gth-tzvp.dat', | ||
'gthccdzvp' : 'gth-cc-dzvp.dat', | ||
'gthcctzvp' : 'gth-cc-tzvp.dat', | ||
'gthccqzvp' : 'gth-cc-qzvp.dat', | ||
'gthszvmolopt' : 'gth-szv-molopt.dat', | ||
'gthdzvpmolopt' : 'gth-dzvp-molopt.dat', | ||
'gthtzvpmolopt' : 'gth-tzvp-molopt.dat', | ||
'gthtzv2pmolopt' : 'gth-tzv2p-molopt.dat', | ||
'gthszvmoloptsr' : 'gth-szv-molopt-sr.dat', | ||
'gthdzvpmoloptsr' : 'gth-dzvp-molopt-sr.dat', | ||
} | ||
|
||
PP_ALIAS = { | ||
'gthblyp' : 'gth-blyp.dat' , | ||
'gthbp' : 'gth-bp.dat' , | ||
'gthhcth120' : 'gth-hcth120.dat', | ||
'gthhcth407' : 'gth-hcth407.dat', | ||
'gtholyp' : 'gth-olyp.dat' , | ||
'gthlda' : 'gth-pade.dat' , | ||
'gthpade' : 'gth-pade.dat' , | ||
'gthpbe' : 'gth-pbe.dat' , | ||
'gthpbesol' : 'gth-pbesol.dat' , | ||
'gthhf' : 'gth-hf.dat' , | ||
'gthhfrev' : 'gth-hf-rev.dat' , | ||
} | ||
|
||
def _is_pople_basis(basis): | ||
return (basis.startswith('631') or | ||
basis.startswith('321') or | ||
basis.startswith('431')) | ||
|
||
_BASIS_DIR = os.path.dirname(__file__) | ||
_GTH_BASIS_DIR = os.path.abspath(f'{pyscf.__file__}/../pbc/gto/basis') | ||
_GTH_PP_DIR = os.path.abspath(f'{_GTH_BASIS_DIR}/../pseudo') | ||
|
||
def _parse_pople_basis(basis, symb): | ||
if '(' in basis: | ||
|
@@ -408,17 +452,56 @@ def convert(s): | |
return tuple([ALIAS[mbas]] + convert(extension.split(',')[0])) | ||
|
||
OPTIMIZE_CONTRACTION = getattr(__config__, 'gto_basis_parse_optimize', False) | ||
|
||
def parse(string, symb=None, optimize=OPTIMIZE_CONTRACTION): | ||
'''Parse the basis (ECP, PP) text in NWChem or CP2K format, returns internal format | ||
Args: | ||
string : Blank linke and the lines of "BASIS SET" and "END" will be ignored | ||
Examples: | ||
>>> mol = gto.Mole() | ||
>>> mol.basis = gto.basis.parse(""" | ||
... He S | ||
... 13.6267000 0.1752300 | ||
... 1.9993500 0.8934830 | ||
... 0.3829930 0.0000000 | ||
... He S | ||
... 13.6267000 0.0000000 | ||
... 1.9993500 0.0000000 | ||
... 0.3829930 1.0000000 | ||
... """, optimize=True) | ||
>>> cell = pbc.gto.Cell() | ||
>>> cell.basis = {'C': gto.basis.parse(""" | ||
... C DZVP-GTH | ||
... 2 | ||
... 2 0 1 4 2 2 | ||
... 4.3362376436 0.1490797872 0.0000000000 -0.0878123619 0.0000000000 | ||
... 1.2881838513 -0.0292640031 0.0000000000 -0.2775560300 0.0000000000 | ||
... 0.4037767149 -0.6882040510 0.0000000000 -0.4712295093 0.0000000000 | ||
... 0.1187877657 -0.3964426906 1.0000000000 -0.4058039291 1.0000000000 | ||
... 3 2 2 1 1 | ||
... 0.5500000000 1.0000000000 | ||
... # | ||
... """)} | ||
''' | ||
if 'ECP' in string: | ||
return parse_nwchem.parse_ecp(string, symb) | ||
return parse_nwchem_ecp.parse(string, symb) | ||
elif 'GTH' in string: | ||
if 'PSEUDOPOTENTIAL' in string: | ||
return parse_cp2k_pp.parse(string, symb) | ||
else: | ||
return parse_cp2k.parse(string, symb, optimize) | ||
else: | ||
return parse_nwchem.parse(string, symb, optimize) | ||
parse.__doc__ = parse_nwchem.parse.__doc__ | ||
|
||
def parse_ecp(string, symb=None): | ||
# TODO: catch KeyError and provide suggestion for the possible keys | ||
return parse_nwchem.parse_ecp(string, symb) | ||
parse_ecp.__doc__ = parse_nwchem.parse_ecp.__doc__ | ||
return parse_nwchem_ecp.parse(string, symb) | ||
parse_ecp.__doc__ = parse_nwchem_ecp.parse.__doc__ | ||
|
||
def _convert_contraction(contr_string): | ||
'''Parse contraction scheme string into a list | ||
|
@@ -497,31 +580,50 @@ def load(filename_or_basisname, symb, optimize=OPTIMIZE_CONTRACTION): | |
contr_scheme = _convert_contraction(split_name[1].lower()) | ||
else: | ||
contr_scheme = 'Full' | ||
|
||
if os.path.isfile(filename_or_basisname): | ||
# read basis from given file | ||
try: | ||
b = parse_nwchem.load(filename_or_basisname, symb, optimize) | ||
b = _load_external(parse_nwchem, filename_or_basisname, symb, | ||
optimize=optimize) | ||
except BasisNotFoundError: | ||
with open(filename_or_basisname, 'r') as fin: | ||
b = parse_nwchem.parse(fin.read(), symb) | ||
b = _load_external(parse_cp2k, filename_or_basisname, symb, | ||
optimize=optimize) | ||
|
||
if contr_scheme != 'Full': | ||
b = _truncate(b, contr_scheme, symb, split_name) | ||
return b | ||
|
||
name = _format_basis_name(filename_or_basisname) | ||
|
||
if not (name in ALIAS or _is_pople_basis(name)): | ||
fload = parse_nwchem.load | ||
basis_dir = _BASIS_DIR | ||
if name in ALIAS: | ||
basmod = ALIAS[name] | ||
elif name in GTH_ALIAS: | ||
basmod = GTH_ALIAS[name] | ||
fload = parse_cp2k.load | ||
basis_dir = _GTH_BASIS_DIR | ||
elif _is_pople_basis(name): | ||
basmod = _parse_pople_basis(name, symb) | ||
else: | ||
try: | ||
return parse_nwchem.parse(filename_or_basisname, symb) | ||
return parse_nwchem.parse(filename_or_basisname, symb, | ||
optimize=optimize) | ||
except IndexError: | ||
raise BasisNotFoundError(filename_or_basisname) | ||
except BasisNotFoundError as basis_err: | ||
pass | ||
|
||
try: | ||
return parse_nwchem.parse(filename_or_basisname) | ||
return parse_nwchem.parse(filename_or_basisname, optimize=optimize) | ||
except IndexError: | ||
raise BasisNotFoundError(f'Invalid basis {filename_or_basisname}') | ||
except BasisNotFoundError: | ||
pass | ||
|
||
try: | ||
return parse_cp2k.parse(filename_or_basisname, optimize=optimize) | ||
except IndexError: | ||
raise BasisNotFoundError('Invalid basis name %s' % filename_or_basisname) | ||
raise BasisNotFoundError(f'Invalid basis {filename_or_basisname}') | ||
except BasisNotFoundError: | ||
pass | ||
|
||
|
@@ -538,62 +640,44 @@ def load(filename_or_basisname, symb, optimize=OPTIMIZE_CONTRACTION): | |
|
||
raise basis_err | ||
|
||
if name in ALIAS: | ||
basmod = ALIAS[name] | ||
elif _is_pople_basis(name): | ||
basmod = _parse_pople_basis(name, symb) | ||
else: | ||
raise BasisNotFoundError(filename_or_basisname) | ||
|
||
if 'dat' in basmod: | ||
b = parse_nwchem.load(join(_BASIS_DIR, basmod), symb, optimize) | ||
b = fload(join(basis_dir, basmod), symb, optimize) | ||
elif isinstance(basmod, (tuple, list)) and isinstance(basmod[0], str): | ||
b = [] | ||
for f in basmod: | ||
b += parse_nwchem.load(join(_BASIS_DIR, f), symb, optimize) | ||
b += fload(join(basis_dir, f), symb, optimize) | ||
else: | ||
if sys.version_info < (2,7): | ||
fp, pathname, description = imp.find_module(basmod, __path__) | ||
mod = imp.load_module(name, fp, pathname, description) | ||
b = mod.__getattribute__(symb) | ||
fp.close() | ||
else: | ||
mod = importlib.import_module('.'+basmod, __package__) | ||
b = mod.__getattribute__(symb) | ||
mod = importlib.import_module('.'+basmod, __package__) | ||
b = mod.__getattribute__(symb) | ||
|
||
if contr_scheme != 'Full': | ||
b = _truncate(b, contr_scheme, symb, split_name) | ||
return b | ||
|
||
def load_ecp(filename_or_basisname, symb): | ||
'''Convert the basis of the given symbol to internal format | ||
'''Parses ECP database file | ||
''' | ||
symb = ''.join([i for i in symb if i.isalpha()]) | ||
if os.path.isfile(filename_or_basisname): | ||
# read basis from given file | ||
try: | ||
return parse_nwchem.load_ecp(filename_or_basisname, symb) | ||
except BasisNotFoundError: | ||
with open(filename_or_basisname, 'r') as fin: | ||
return parse_ecp(fin.read(), symb) | ||
return _load_external(parse_nwchem_ecp, filename_or_basisname, symb) | ||
|
||
name = _format_basis_name(filename_or_basisname) | ||
|
||
if name in ALIAS: | ||
basmod = ALIAS[name] | ||
return parse_nwchem.load_ecp(join(_BASIS_DIR, basmod), symb) | ||
return parse_nwchem_ecp.load(join(_BASIS_DIR, basmod), symb) | ||
|
||
try: | ||
return parse_ecp(filename_or_basisname, symb) | ||
return parse_nwchem_ecp.parse(filename_or_basisname, symb) | ||
except IndexError: | ||
raise BasisNotFoundError(filename_or_basisname) | ||
except BasisNotFoundError as basis_err: | ||
pass | ||
|
||
try: | ||
return parse_nwchem.parse_ecp(filename_or_basisname) | ||
return parse_nwchem_ecp.parse(filename_or_basisname) | ||
except IndexError: | ||
raise BasisNotFoundError('Invalid basis name %s' % filename_or_basisname) | ||
raise BasisNotFoundError(f'Invalid ECP {filename_or_basisname}') | ||
except BasisNotFoundError: | ||
pass | ||
|
||
|
@@ -610,7 +694,41 @@ def load_ecp(filename_or_basisname, symb): | |
|
||
raise basis_err | ||
|
||
def load_pseudo(filename_or_basisname, symb): | ||
'''Parses PP database file | ||
''' | ||
symb = ''.join([i for i in symb if i.isalpha()]) | ||
if os.path.isfile(filename_or_basisname): | ||
return _load_external(parse_cp2k_pp, filename_or_basisname, symb) | ||
|
||
name, suffix = _format_pseudo_name(filename_or_basisname) | ||
if name in PP_ALIAS: | ||
basmod = PP_ALIAS[name] | ||
return parse_cp2k_pp.load(join(_GTH_PP_DIR, basmod), symb, suffix) | ||
|
||
try: | ||
return parse_cp2k_pp.parse(filename_or_basisname) | ||
except IndexError: | ||
raise BasisNotFoundError(f'Invalid PP {filename_or_basisname}') | ||
|
||
def _load_external(module, filename_or_basisname, symb, **kwargs): | ||
'''Try to read basis from given file''' | ||
try: | ||
return module.load(filename_or_basisname, symb, **kwargs) | ||
except BasisNotFoundError: | ||
with open(filename_or_basisname, 'r') as fin: | ||
return module.parse(fin.read(), **kwargs) | ||
|
||
def _format_basis_name(basisname): | ||
return basisname.lower().replace('-', '').replace('_', '').replace(' ', '') | ||
|
||
del(OPTIMIZE_CONTRACTION) | ||
SUFFIX_PATTERN = re.compile(r'q\d+$') | ||
def _format_pseudo_name(pseudo_name): | ||
name_suffix = _format_basis_name(pseudo_name) | ||
match = re.search(SUFFIX_PATTERN, name_suffix) | ||
if match: | ||
name = name_suffix[:match.start()] | ||
suffix = name_suffix[match.start():] | ||
else: | ||
name, suffix = name_suffix, None | ||
return name, suffix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.