Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix various input parsing bugs related to the distributed jobs #393

Merged
merged 8 commits into from
Oct 11, 2023
10 changes: 1 addition & 9 deletions .github/workflows/pypi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,6 @@ jobs:
- ["cp310", "3.10"]
- ["cp311", "3.11"]
- ["cp312", "3.12"]
exclude:
- buildplat: [macos-latest, macosx_x86_64, macosx_10_14_x86_64]
version: ["cp312", "3.12"]

steps:
- uses: actions/checkout@v4
Expand All @@ -105,15 +102,9 @@ jobs:
name: wheels
path: dist

# TODO: Remove the manual h5py building once h5py 3.10 has been released with cp312 wheels
- name: Install dependencies
run: |
WHL_NAME=$(python scripts/get_whl_name.py dist ${{ matrix.buildplat[2] }})
if [[ "${{ matrix.version[1] }}" == '3.12' ]]; then
sudo apt-get update
sudo apt-get install libhdf5-dev
pip install git+https://github.com/h5py/h5py@89e1e2e78d7fb167d2a67c9a8354ced6491160fe
fi
pip install "$WHL_NAME"[test] --prefer-binary

- name: Python info
Expand Down Expand Up @@ -163,6 +154,7 @@ jobs:
- ["cp39", "python3.9"]
- ["cp310", "python3.10"]
- ["cp311", "python3.11"]
- ["cp312", "python3.12"]

steps:
- uses: actions/checkout@v4
Expand Down
10 changes: 2 additions & 8 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
version: ["3.8", "3.9", "3.10", "3.11"]
version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
special:
- ["", ""]
include:
Expand All @@ -35,7 +35,7 @@ jobs:
- os: ubuntu-latest
version: "3.11"
special: ["GCC", "12"]
# CP2K 6.1/7.1/8.2/9.1 tests
# CP2K 6.1/7.1/8.2/9.1/2022.1 tests
- os: ubuntu-latest
version: "3.11"
special: ["CP2K", "6.1"]
Expand Down Expand Up @@ -153,15 +153,9 @@ jobs:
with:
python-version: ${{ matrix.version }}

# TODO: Remove the manual h5py building once h5py 3.10 has been released with cp312 wheels
- name: Install dependencies
run: |
WHL_NAME=$(python scripts/get_whl_name.py wheelhouse manylinux2014_x86_64)
if [[ "${{ matrix.version }}" == '3.12' ]]; then
sudo apt-get update
sudo apt-get install libhdf5-dev
pip install git+https://github.com/h5py/h5py@89e1e2e78d7fb167d2a67c9a8354ced6491160fe
fi
pip install "$WHL_NAME"[test]

- name: Python info
Expand Down
4 changes: 2 additions & 2 deletions nanoqm/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
_path_valence_electrons = Path(nanoqm_path[0]) / "basis" / "valence_electrons.json"
_path_aux_fit = Path(nanoqm_path[0]) / "basis" / "aux_fit.json"

with open(_path_valence_electrons, 'r') as f1, open(_path_aux_fit, 'r') as f2:
with open(_path_valence_electrons, 'r', encoding="utf8") as f1, open(_path_aux_fit, 'r', encoding="utf8") as f2: # noqa
valence_electrons: "dict[str, int]" = json.load(f1)
aux_fit: "dict[str, list[int]]" = json.load(f2)

Expand Down Expand Up @@ -395,7 +395,7 @@ def read_cell_parameters_as_array(
"""Read the cell parameters as a numpy array."""
arr = np.loadtxt(file_cell_parameters, skiprows=1)

with open(file_cell_parameters, 'r') as f:
with open(file_cell_parameters, 'r', encoding="utf8") as f:
header = f.readline()

return header, arr
2 changes: 1 addition & 1 deletion nanoqm/schedule/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def create_point_folder(
def split_file_geometries(path_xyz: PathLike) -> list[str]:
"""Read a set of molecular geometries in xyz format."""
# Read Cartesian Coordinates
with open(path_xyz) as f:
with open(path_xyz, "r", encoding="utf8") as f:
xss = iter(f.readlines())

data = []
Expand Down
2 changes: 1 addition & 1 deletion nanoqm/schedule/scheduleCP2K.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def print_cp2k_error(path_dir: str | os.PathLike[str], prefix: str) -> str:
err_file = next(Path(path_dir).glob(f"*{prefix}"), None)
msg = ""
if err_file is not None:
with open(err_file, 'r') as handler:
with open(err_file, 'r', encoding="utf8") as handler:
err = handler.read()
msg = f"CP2K {prefix} file:\n{err}\n"
logger.error(msg)
Expand Down
25 changes: 20 additions & 5 deletions nanoqm/workflows/distribute_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from __future__ import annotations

import sys
import copy
import argparse
import os
Expand All @@ -42,6 +43,8 @@
from .input_validation import process_input
from .. import _data

__all__ = ["distribute_jobs"]


def read_cmd_line() -> str:
"""Read the input file and the workflow name from the command line."""
Expand All @@ -59,8 +62,12 @@ def main() -> None:
"""Distribute the user specified by the user."""
# command line argument
input_file = read_cmd_line()
distribute_jobs(input_file)


with open(input_file, 'r') as f:
def distribute_jobs(input_file: str) -> None:
"""Distribute the user specified by the user."""
with open(input_file, 'r', encoding="utf8") as f:
args = yaml.load(f, Loader=UniqueSafeLoader)

# Read and process input
Expand Down Expand Up @@ -132,6 +139,13 @@ def distribute_computations(config: _data.Distribute, hamiltonians: bool = False
path_ham = f"{config.orbitals_type}_hamiltonians"
dict_input.hamiltonians_dir = join(copy_config.scratch_path, path_ham)

# Disable keys that imply the necasity of pre-processing in the newly chunked jobs
# (as that's already done in this function)
for name in ["stride", "multiplicity"]:
# Attributes set to `NotImplemented` are ignored when writing the input
if hasattr(copy_config, name):
setattr(copy_config, name, NotImplemented)

# Write input file
write_input(folder_path, copy_config)

Expand Down Expand Up @@ -170,7 +184,7 @@ def write_input(folder_path: str | os.PathLike[str], original_config: _data.Dist
}
workflow_type = config["workflow"].lower()
config['workflow'] = dict_distribute[workflow_type]
with open(file_path, "w") as f:
with open(file_path, "w", encoding="utf8") as f:
yaml.dump(config, f, default_flow_style=False, allow_unicode=True)


Expand Down Expand Up @@ -210,7 +224,7 @@ def write_slurm_script(
content = format_slurm_parameters(slurm_config) + python + mkdir + copy

# Write the script
with open(join(dict_input.folder_path, "launch.sh"), 'w') as f:
with open(join(dict_input.folder_path, "launch.sh"), 'w', encoding="utf8") as f:
f.write(content)


Expand All @@ -237,11 +251,12 @@ def format_slurm_parameters(slurm: _data.JobScheduler) -> str:

def compute_number_of_geometries(file_name: str | os.PathLike[str]) -> int:
"""Count the number of geometries in XYZ formant in a given file."""
with open(file_name, 'r') as f:
with open(file_name, 'r', encoding="utf8") as f:
numat = int(f.readline())

cmd = f"wc -l {os.fspath(file_name)}"
wc = subprocess.getoutput(cmd).split()[0]
kwargs = {"encoding": "utf8"} if sys.version_info >= (3, 11) else {}
wc = subprocess.getoutput(cmd, **kwargs).split()[0]

lines_per_geometry = numat + 2

Expand Down
9 changes: 5 additions & 4 deletions nanoqm/workflows/initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
def initialize(config: _data.GeneralOptions) -> None:
"""Initialize all the data required to schedule the workflows."""
with EnableFileHandler(f'{config.project_name}.log'):
logger.info(f"Using nano-qmflows version: {qmflows.__version__} ")
logger.info(f"Using qmflows version: {qmflows.__version__} ")
logger.info(f"Using nano-qmflows version: {__version__} ")
logger.info(f"nano-qmflows path is: {nanoqm_path[0]}")
logger.info(f"Working directory is: {os.path.abspath('.')}")
logger.info(f"Data will be stored in HDF5 file: {config.path_hdf5}")
Expand Down Expand Up @@ -213,7 +214,7 @@ def split_trajectory(path: str | Path, nblocks: int, pathOut: str | os.PathLike[
list of paths to the xyz geometries

"""
with open(path, 'r') as f:
with open(path, 'r', encoding="utf8") as f:
# Read First line
ls = f.readline()
numat = int(ls.split()[0])
Expand All @@ -234,11 +235,11 @@ def split_trajectory(path: str | Path, nblocks: int, pathOut: str | os.PathLike[
# Path where the splitted xyz files are written
prefix = join(pathOut, 'chunk_xyz_')
cmd = f'split -a 1 -l {lines_per_block} {path} {prefix}'
output = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
output = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True, encoding="utf8")
rs = output.communicate()
err = rs[1]
if err:
raise RuntimeError(f"Submission Errors: {err.decode()}")
raise RuntimeError(f"Submission Errors: {err}")
else:
return fnmatch.filter(os.listdir(), "chunk_xyz_?")

Expand Down
6 changes: 3 additions & 3 deletions nanoqm/workflows/input_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ def process_input(input_file: PathLike, workflow_name: str) -> _data.GeneralOpti
"""
schema = schema_workflows[workflow_name]

with open(input_file, 'r') as f:
with open(input_file, 'r', encoding="utf8") as f:
dict_input = yaml.load(f.read(), Loader=UniqueSafeLoader)

try:
d = schema.validate(dict_input)
return InputSanitizer(d).sanitize()
except SchemaError as e:
msg = f"There was an error in the input yaml provided:\n{e}"
msg = f"There was an error in the provided {workflow_name!r} input yaml:\n{e}"
logger.warning(msg)
raise

Expand Down Expand Up @@ -317,5 +317,5 @@ def add_mo_index_range(self) -> None:
def print_final_input(self) -> None:
"""Print the input after post-processing."""
xs = self.user_input.asdict()
with open("input_parameters.yml", "w") as f:
with open("input_parameters.yml", "w", encoding="utf8") as f:
yaml.dump(xs, f, indent=4)
2 changes: 1 addition & 1 deletion nanoqm/workflows/run_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def main() -> None:
"""Parse the command line arguments and run workflow."""
args = parser.parse_args()
input_file: str = args.i
with open(input_file, 'r') as f:
with open(input_file, 'r', encoding="utf8") as f:
dict_input = yaml.load(f, Loader=UniqueSafeLoader)
if 'workflow' not in dict_input:
raise RuntimeError(
Expand Down
4 changes: 2 additions & 2 deletions nanoqm/workflows/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ def validate(self, data: Any, **kwargs: Any) -> Any:
Optional("potential_file_name", default=None): Use(_parse_filenames),

# Name(s) of the exchange part of the DFT functional`
Optional("functional_x", default=None): str,
Optional("functional_x", default=None): Or(str, None),

# Name(s) of the correlation part of the DFT functional`
Optional("functional_c", default=None): str,
Optional("functional_c", default=None): Or(str, None),

# Settings describing the input of the quantum package
"cp2k_settings_main": Use(Settings),
Expand Down
2 changes: 1 addition & 1 deletion scripts/pyxaid/iconds_excess_energy.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def main(path_output: str, nstates: int, iconds: list[int], excess: float, delta
for i in range(len(iconds)):
t += f' {iconds[i]} {indexes[i][0] + 1}\n'

with open('initial_conditions.out', 'w') as f:
with open('initial_conditions.out', 'w', encoding="utf8") as f:
f.write(t)


Expand Down
4 changes: 2 additions & 2 deletions scripts/qmflows/coordination_ldos.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def store_optimized_molecule(optimized_geometry: Molecule, name: str, path_resul
path_geometry = f"{path_results}/{name}"
if not os.path.exists(path_geometry):
os.mkdir(path_geometry)
with open(f"{path_geometry}/{name}_OPT.xyz", 'w') as f:
with open(f"{path_geometry}/{name}_OPT.xyz", 'w', encoding="utf8") as f:
optimized_geometry.writexyz(f)


Expand Down Expand Up @@ -141,7 +141,7 @@ def store_coordination(coord: NestedDict, name: str, path_results: str) -> None:
t += f'{v[0]} {v[1]} "list{i}" {v[2]}\n'

path_ldos = f"{path_results}/{name}"
with open(f"{path_ldos}/coord_lists.out", 'w') as f:
with open(f"{path_ldos}/coord_lists.out", 'w', encoding="utf8") as f:
f.write(t)


Expand Down
2 changes: 1 addition & 1 deletion scripts/qmflows/mergeHDF5.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def main() -> None:

def touch(fname: str, times: tuple[float, float] | None = None) -> None:
"""Equivalent to unix touch command"""
with open(fname, 'a'):
with open(fname, 'a', encoding="utf8"):
os.utime(fname, times)


Expand Down
2 changes: 1 addition & 1 deletion scripts/qmflows/plot_dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

def readatom(filename: str) -> str:
# In the first line in column 6, the atom is defined
with open(filename, 'r') as f:
with open(filename, 'r', encoding="utf8") as f:
atom = f.readline().split()[6]
return atom

Expand Down
6 changes: 4 additions & 2 deletions test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ def test_run_workflow(mocker: MockFixture, tmp_path: Path):
def test_run_workflow_no_workflow(mocker: MockFixture, tmp_path: Path):
"""Check that an error is raised if not workflow is provided."""
# remove workflow keyword
with open(PATH_TEST / "input_fast_test_derivative_couplings.yml", 'r') as handler:
with open(
PATH_TEST / "input_fast_test_derivative_couplings.yml", 'r', encoding="utf8"
) as handler:
input = yaml.load(handler, UniqueSafeLoader)
input.pop('workflow')
path_input = tmp_path / "wrong_input.yml"
with open(path_input, 'w') as handler:
with open(path_input, 'w', encoding="utf8") as handler:
yaml.dump(input, handler)

with pytest.raises(RuntimeError) as info:
Expand Down
2 changes: 1 addition & 1 deletion test/test_cpk2_error_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def test_cp2k_call_error(tmp_path: Path):
"""Check cp2k error files."""
path_err = tmp_path / "cp2k.err"
with open(path_err, 'w') as handler:
with open(path_err, 'w', encoding="utf8") as handler:
handler.write("Some CP2K error")

with pytest.raises(RuntimeError) as info:
Expand Down
Loading