Merge pull request #26 from chrisjsewell/mulliken

Various Improvements
aiidaplugins · Oct 18, 2019 · 6ce9f12 · 6ce9f12
2 parents bd56476 + 63e4315
commit 6ce9f12
Show file tree

Hide file tree

Showing 26 changed files with 3,944 additions and 300 deletions.
diff --git a/aiida_crystal17/calcfunctions/band_gap.py b/aiida_crystal17/calcfunctions/band_gap.py
@@ -116,12 +116,13 @@ def calcfunction_band_gap(doss_results, doss_array, dtol=None, try_fshifts=None)
         return ExitCode(101, 'doss_results is not of type `aiida.orm.Dict`: {}'.format(doss_results))
     if 'fermi_energy' not in doss_results.get_dict():
         return ExitCode(102, '`fermi_energy` not in doss_results')
-    if 'energy_units' not in doss_results.get_dict():
-        return ExitCode(102, '`energy_units` not in doss_results')
+    if 'energy' not in doss_results.get_dict().get('units', {}):
+        return ExitCode(102, '`energy units` not in doss_results')
     if not isinstance(doss_array, ArrayData):
         return ExitCode(103, 'doss_array is not of type `aiida.orm.ArrayData`: {}'.format(doss_array))
 
-    kwargs = {'fermi': doss_results.get_dict()['fermi_energy']}
+    kwargs = {}
+    kwargs['fermi'] = doss_results.get_dict()['fermi_energy']
 
     if dtol is not None:
         if not isinstance(dtol, Float):
@@ -156,7 +157,7 @@ def calcfunction_band_gap(doss_results, doss_array, dtol=None, try_fshifts=None)
         total_density = np.abs(alpha_density) + np.abs(beta_density)
         calcs = {'alpha': alpha_density, 'beta': beta_density, 'total': total_density}
 
-    final_dict = {'energy_units': doss_results.get_dict()['energy_units']}
+    final_dict = {'energy_units': doss_results.get_dict()['units']['energy']}
 
     for name, density in calcs.items():
         try:

diff --git a/aiida_crystal17/parsers/raw/crystal_stdout.py b/aiida_crystal17/parsers/raw/crystal_stdout.py
@@ -73,14 +73,14 @@
     ('MPI_Abort', 'ERROR_MPI_ABORT'))
 
 SYSTEM_INFO_REGEXES = (
-    ('n_atoms', re.compile(r'\sN. OF ATOMS PER CELL\s*(\d*)', re.DOTALL)),
-    ('n_shells', re.compile(r'\sNUMBER OF SHELLS\s*(\d*)', re.DOTALL)),
-    ('n_ao', re.compile(r'\sNUMBER OF AO\s*(\d*)', re.DOTALL)),
-    ('n_electrons', re.compile(r'\sN. OF ELECTRONS PER CELL\s*(\d*)', re.DOTALL)),
-    ('n_core_el', re.compile(r'\sCORE ELECTRONS PER CELL\s*(\d*)', re.DOTALL)),
-    ('n_symops', re.compile(r'\sN. OF SYMMETRY OPERATORS\s*(\d*)', re.DOTALL)),
-    ('n_kpoints_ibz', re.compile(r'\sNUMBER OF K POINTS IN THE IBZ\s*(\d*)', re.DOTALL)),
-    ('n_kpoints_gilat', re.compile(r'\s NUMBER OF K POINTS\(GILAT NET\)\s*(\d*)', re.DOTALL)),
+    ('n_atoms', re.compile(r'\sN. OF ATOMS PER CELL\s*(\d+)', re.DOTALL)),
+    ('n_shells', re.compile(r'\sNUMBER OF SHELLS\s*(\d+)', re.DOTALL)),
+    ('n_ao', re.compile(r'\sNUMBER OF AO\s*(\d+)', re.DOTALL)),
+    ('n_electrons', re.compile(r'\sN. OF ELECTRONS PER CELL\s*(\d+)', re.DOTALL)),
+    ('n_core_el', re.compile(r'\sCORE ELECTRONS PER CELL\s*(\d+)', re.DOTALL)),
+    ('n_symops', re.compile(r'\sN. OF SYMMETRY OPERATORS\s*(\d+)', re.DOTALL)),
+    ('n_kpoints_ibz', re.compile(r'\sNUMBER OF K POINTS IN THE IBZ\s*(\d+)', re.DOTALL)),
+    ('n_kpoints_gilat', re.compile(r'\s NUMBER OF K POINTS\(GILAT NET\)\s*(\d+)', re.DOTALL)),
 )
 
 
@@ -849,7 +849,7 @@ def parse_optimisation(lines, initial_lineno):
 
                 if not fnmatch(line, '*E(AU)*'):
                     raise IOError('was expecting units in a.u. on line:' ' {0}, got: {1}'.format(curr_lineno, line))
-                data = [{'energy': {'total_corrected': convert_units(split_numbers(lines[-1])[0], 'hartree', 'eV')}}]
+                data = [{'energy': {'total_corrected': convert_units(split_numbers(line)[0], 'hartree', 'eV')}}]
 
                 return ParsedSection(curr_lineno, data)
 
@@ -1009,41 +1009,65 @@ def parse_mulliken_analysis(lines, mulliken_indices):
 
     for i, indx in enumerate(mulliken_indices):
         name = lines[indx - 1].strip().lower()
+        key_name = name.replace(' ', '_')
         if not (name == 'ALPHA+BETA ELECTRONS'.lower() or name == 'ALPHA-BETA ELECTRONS'.lower()):
             return ParsedSection(
                 mulliken_indices[0], mulliken, 'was expecting mulliken to be alpha+beta or alpha-beta on line:'
                 ' {0}, got: {1}'.format(indx - 1, lines[indx - 1]))
 
-        mulliken[name.replace(' ', '_')] = {'ids': [], 'symbols': [], 'atomic_numbers': [], 'charges': []}
-
         if len(mulliken_indices) > i + 1:
             searchlines = lines[indx + 1:mulliken_indices[i + 1]]
         else:
             searchlines = lines[indx + 1:]
-        charge_line = None
+
+        data_ao = {}
+        data_shell = {}
+
         for j, line in enumerate(searchlines):
-            if fnmatch(line.strip(), '*ATOM*Z*CHARGE*SHELL*POPULATION*'):
+            if fnmatch(line.strip(), '*ATOM*Z*CHARGE*A.O.*POPULATION*'):
                 charge_line = j + 2
-                break
-        if charge_line is None:
-            continue
 
-        while searchlines[charge_line].strip() and not searchlines[charge_line].strip()[0].isalpha():
-            fields = searchlines[charge_line].strip().split()
-            # shell population can wrap multiple lines, the one we want has the label in it
-            if len(fields) != len(split_numbers(searchlines[charge_line])):
-                mulliken[name.replace(' ', '_')]['ids'].append(int(fields[0]))
-                mulliken[name.replace(' ', '_')]['symbols'].append(fields[1].lower().capitalize())
-                mulliken[name.replace(' ', '_')]['atomic_numbers'].append(int(fields[2]))
-                mulliken[name.replace(' ', '_')]['charges'].append(float(fields[3]))
+                while searchlines[charge_line].strip() and not searchlines[charge_line].strip()[0].isalpha():
+                    fields = searchlines[charge_line].strip().split()
+                    # a.o. population can wrap multiple lines
+                    if len(fields) != len(split_numbers(searchlines[charge_line])):
+                        data_ao.setdefault('ids', []).append(int(fields[0]))
+                        data_ao.setdefault('symbols', []).append(fields[1].lower().capitalize())
+                        data_ao.setdefault('atomic_numbers', []).append(int(fields[2]))
+                        data_ao.setdefault('charges', []).append(float(fields[3]))
+                        data_ao.setdefault('aos', []).append([float(f) for f in fields[4:]])
+                    else:
+                        data_ao['aos'][-1].extend(split_numbers(searchlines[charge_line]))
+
+                    charge_line += 1
+
+            elif fnmatch(line.strip(), '*ATOM*Z*CHARGE*SHELL*POPULATION*'):
+                charge_line = j + 2
+
+                while searchlines[charge_line].strip() and not searchlines[charge_line].strip()[0].isalpha():
+                    fields = searchlines[charge_line].strip().split()
+                    # shell population can wrap multiple lines
+                    if len(fields) != len(split_numbers(searchlines[charge_line])):
+                        data_shell.setdefault('ids', []).append(int(fields[0]))
+                        data_shell.setdefault('symbols', []).append(fields[1].lower().capitalize())
+                        data_shell.setdefault('atomic_numbers', []).append(int(fields[2]))
+                        data_shell.setdefault('charges', []).append(float(fields[3]))
+                        data_shell.setdefault('shells', []).append([float(f) for f in fields[4:]])
+                    else:
+                        data_shell['shells'][-1].extend(split_numbers(searchlines[charge_line]))
+
+                    charge_line += 1
+
+        # TODO check consistency of ids, ...
+        data_ao.update(data_shell)
 
-            charge_line += 1
+        mulliken[key_name] = data_ao
 
     return ParsedSection(mulliken_indices[0], mulliken)
 
 
 def extract_final_info(parsed_data):
-    """extract the final energies and primitive geometry/symmetry
+    """Extract the final energies and primitive geometry/symmetry
     from the relevant sections of the parse data
     (depending if it was an optimisation or not)
     """

diff --git a/aiida_crystal17/parsers/raw/main_out.py b/aiida_crystal17/parsers/raw/main_out.py
@@ -13,9 +13,7 @@
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License for more details.
-"""
-parse the main output file and create the required output nodes
-"""
+"""Parse the main output file and create the required output nodes."""
 from collections import Mapping
 import traceback
 from aiida_crystal17.symmetry import convert_structure
@@ -82,7 +80,7 @@ def __init__(self):
 
 # pylint: disable=too-many-locals,too-many-statements
 def parse_main_out(fileobj, parser_class, init_struct=None, init_settings=None):
-    """ parse the main output file and create the required output nodes
+    """Parse the main output file and create the required output nodes.
 
     :param fileobj: handle to main output file
     :param parser_class: a string denoting the parser class
@@ -118,7 +116,6 @@ def parse_main_out(fileobj, parser_class, init_struct=None, init_settings=None):
     # TODO could also read .gui file for definitive final (primitive) geometry,
     # with symmetries
     # TODO could also read .SCFLOG, to get scf output for each opt step
-    # TODO could also read files in .optstory folder,
     # to get (primitive) geometries (+ symmetries) for each opt step
     # Note the above files are only available for optimisation runs
 
@@ -157,6 +154,7 @@ def parse_main_out(fileobj, parser_class, init_struct=None, init_settings=None):
     _extract_symmetry(final_info, init_settings, results_data, parser_result, exit_codes)
 
     if mulliken_analysis is not None:
+        # TODO output Mulliken analysis as separate ArrayData node
         _extract_mulliken(mulliken_analysis, results_data)
 
     parser_result.nodes.results = DataFactory('dict')(dict=results_data)
@@ -168,8 +166,7 @@ def parse_main_out(fileobj, parser_class, init_struct=None, init_settings=None):
 
 
 def _extract_symmetry(final_data, init_settings, param_data, parser_result, exit_codes):
-    """extract symmetry operations"""
-
+    """Extract symmetry operations."""
     if 'primitive_symmops' not in final_data:
         param_data['parser_errors'].append('primitive symmops were not found in the output file')
         parser_result.exit_code = exit_codes.ERROR_SYMMETRY_NOT_FOUND
@@ -238,6 +235,10 @@ def _extract_structure(final_data, init_struct, results_data, parser_result, exi
 
 def _extract_mulliken(data, param_data):
     """Extract mulliken electronic charge partition data."""
+    for mtype in ['alpha+beta_electrons', 'alpha-beta_electrons']:
+        data.get(mtype, {}).pop('aos', None)
+        data.get(mtype, {}).pop('shells', None)
+
     if 'alpha+beta_electrons' in data:
         electrons = data['alpha+beta_electrons']['charges']
         anum = data['alpha+beta_electrons']['atomic_numbers']

diff --git a/...a_crystal17/parsers/raw/gcube_to_vesta.py → aiida_crystal17/parsers/raw/vesta.py b/...a_crystal17/parsers/raw/gcube_to_vesta.py → aiida_crystal17/parsers/raw/vesta.py
@@ -32,6 +32,7 @@
 def get_default_settings():
     """Return dict of default settings."""
     return {
+        'sites': {},
         'bounds': {
             'xmin': 0,
             'xmax': 1,
@@ -97,13 +98,13 @@ def get_complete_settings(settings):
     return defaults
 
 
-def create_vesta_input(cube_data, cube_filepath, settings=None):
+def create_vesta_input(atoms, cube_filepath=None, settings=None):
     """Return the file content of a VESTA input file.
 
     Parameters
     ----------
-    cube_data: aiida_crystal17.parsers.raw.gaussian_cube.GcubeResult
-    cube_filepath: str
+    atoms: ase.Atoms
+    cube_filepath: str or None
     settings: dict
         Settings that will be merged with the default settings,
         and validated against 'vesta_input.schema.json'
@@ -121,12 +122,6 @@ def create_vesta_input(cube_data, cube_filepath, settings=None):
         if not settings['2d_display'][dim + 'min'] < settings['2d_display'][dim + 'max']:
             raise ValueError('2d_display: {}min must be less than {}max'.format(dim))
 
-    atoms = ase.Atoms(
-        cell=cube_data.cell,
-        positions=cube_data.atoms_positions,
-        numbers=cube_data.atoms_atomic_number,
-        pbc=True,
-    )
     el_info = {s: SymbolInfo(*VESTA_ELEMENT_INFO[s]) for s in set(atoms.get_chemical_symbols())}
 
     # header
@@ -139,12 +134,13 @@ def create_vesta_input(cube_data, cube_filepath, settings=None):
         ''
         # NB: originally used cube_data.header[0],
         # but the file load can fail if a key word (like CRYSTAL) is in the title
-        'GAUSSIAN_CUBE_DATA',
+        'AIIDA_DATA',
         '',
     ]
 
     # density input
-    lines.extend(['IMPORT_DENSITY 1', '+1.000000 {}'.format(cube_filepath), ''])
+    if cube_filepath is not None:
+        lines.extend(['IMPORT_DENSITY 1', '+1.000000 {}'.format(cube_filepath), ''])
 
     # symmetry
     lines.extend([
@@ -188,15 +184,23 @@ def create_vesta_input(cube_data, cube_filepath, settings=None):
     lines.append('STRUC')
     for i, ((x, y, z), symbol) in enumerate(zip(atoms.get_scaled_positions(),
                                                 atoms.get_chemical_symbols())):  # type: (int, ase.Atom)
-        lines.append('  {idx:<2d} {sym:<2s} {sym:>2s}{idx:<2d} {occ:.6f} {x:.6f} {y:.6f} {z:.6f} {wyck} -'.format(
-            idx=i + 1, sym=symbol, occ=1.0, x=x, y=y, z=z, wyck='1'))
-        lines.append('    0.000000   0.000000   0.000000  0.00')
+        label = settings['sites'].get(str(i + 1), {}).get('label', '{sym:s}{idx:d}'.format(sym=symbol, idx=i + 1))
+        lines.append('  {idx:<2d} {sym:<2s} {label:4s} {occ:.6f} {x:.6f} {y:.6f} {z:.6f} {wyck} -'.format(idx=i + 1,
+                                                                                                          sym=symbol,
+                                                                                                          label=label,
+                                                                                                          occ=1.0,
+                                                                                                          x=x,
+                                                                                                          y=y,
+                                                                                                          z=z,
+                                                                                                          wyck='1'))
+        lines.append('    0.000000   0.000000   0.000000  {charge:.6f}'.format(charge=0))
     lines.append('  0 0 0 0 0 0 0')
 
     # isotropic displacement parameter
     lines.append('THERI 0')
     for i, atom in enumerate(atoms):  # type: (int, ase.Atom)
-        lines.append('  {idx:<2d} {sym:>2s}{idx:<2d}  1.000000'.format(idx=i + 1, sym=atom.symbol))
+        label = settings['sites'].get(str(i + 1), {}).get('label', '{sym:s}{idx:d}'.format(sym=atom.symbol, idx=i + 1))
+        lines.append('  {idx:<2d} {label:4s}  1.000000'.format(idx=i + 1, label=label))
     lines.append('  0 0 0')
 
     lines.extend(['SHAPE', '  0       0       0       0   0.000000  0   192   192   192   192'])
@@ -227,18 +231,27 @@ def create_vesta_input(cube_data, cube_filepath, settings=None):
     lines.append('  0 0 0 0')
 
     # site radii and colors
-    # TODO allow bespoke site colors
     lines.append('SITET')
-    lines.extend([
-        '  {idx:<2d} {sym:>2s}{idx:<2d} {rad:.6f} {r} {g} {b} {r} {g} {b}  100  0'.format(
+    for i, atom in enumerate(atoms):
+        symbol = atom.symbol
+        if str(i + 1) in settings.get('sites', {}):
+            label = settings['sites'][str(i + 1)].get('label', '{sym:s}{idx:d}'.format(sym=symbol, idx=i + 1))
+            radius = settings['sites'][str(i + 1)].get('radius', el_info[symbol].radius)
+            red, green, blue = settings['sites'][str(i + 1)].get(
+                'color', (el_info[symbol].r, el_info[symbol].g, el_info[symbol].b))
+        else:
+            label = '{sym:s}{idx:d}'.format(sym=symbol, idx=i + 1)
+            radius = el_info[symbol].radius
+            red, green, blue = (el_info[symbol].r, el_info[symbol].g, el_info[symbol].b)
+        lines.append('  {idx:<2d} {label:4s} {rad:.6f} {r} {g} {b} {r} {g} {b}  100  {show_label}'.format(
             idx=i + 1,
-            sym=a.symbol,
-            rad=el_info[a.symbol].radius,
-            r=int(el_info[a.symbol].r * 255),
-            g=int(el_info[a.symbol].g * 255),
-            b=int(el_info[a.symbol].b * 255),
-        ) for i, a in enumerate(atoms)
-    ])
+            label=label,
+            rad=radius,
+            r=int(red * 255),
+            g=int(green * 255),
+            b=int(blue * 255),
+            show_label=0  # NB: needs to be used in conjunction with LBLAT
+        ))
     lines.append('  0 0 0 0 0 0')
 
     # additional lines (currently hardcoded)
@@ -442,7 +455,7 @@ def create_vesta_input(cube_data, cube_filepath, settings=None):
     return '\n'.join(lines)
 
 
-def write_vesta_files(
+def write_gcube_to_vesta(
         aiida_gcube,
         folder_path,
         file_name,
@@ -465,7 +478,13 @@ def write_vesta_files(
     vesta_filepath = os.path.join(folder_path, '{}.vesta'.format(file_name))
     with aiida_gcube.open_cube_file() as handle:
         cube_data = read_gaussian_cube(handle, return_density=False, dist_units='angstrom')
-        content = create_vesta_input(cube_data, os.path.basename(cube_filepath), settings=settings)
+        atoms = ase.Atoms(
+            cell=cube_data.cell,
+            positions=cube_data.atoms_positions,
+            numbers=cube_data.atoms_atomic_number,
+            pbc=True,
+        )
+        content = create_vesta_input(atoms, cube_filepath=os.path.basename(cube_filepath), settings=settings)
         with io.open(vesta_filepath, 'w') as out_handle:
             out_handle.write(six.ensure_text(content))
     with aiida_gcube.open_cube_file(binary=True) as handle: