Skip to content

Commit

Permalink
Add new POTCAR validation to VASP validation plus tests (#892)
Browse files Browse the repository at this point in the history
* Fix issue with using dir_name != ./ in emmet.core.tasks.TaskDoc

* Revert previous change - actual fix is using path relative to dir_name of TaskDoc.from_directory

* Add new POTCAR summary_stats check for validation, two tests for POTCAR checking/IDing

* Adding builder side of commit and test files

* Remove MP_POTCAR_ID.json.gz test file

* Fix handling of missing POTCAR lib in tests

* cleanup tests

* Hopefully fix weird behavior in failing matcalc tests

---------

Co-authored-by: esoteric-ephemera <[email protected]>
  • Loading branch information
esoteric-ephemera and esoteric-ephemera authored Nov 16, 2023
1 parent 39edc68 commit ba01dff
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 18 deletions.
2 changes: 1 addition & 1 deletion emmet-builders/emmet/builders/vasp/task_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
potcar = PotcarSingle.from_symbol_and_functional(
symbol=potcar_symbol, functional=functional
)
hashes[calc_type][potcar_symbol] = potcar.md5_header_hash
hashes[calc_type][potcar_symbol] = potcar._summary_stats

self.potcar_hashes = potcar_hashes
else:
Expand Down
2 changes: 1 addition & 1 deletion emmet-core/emmet/core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ class CustodianDoc(BaseModel):
)
job: Optional[Any] = Field(
None,
title="Cusotodian Job Data",
title="Custodian Job Data",
description="Job data logged by custodian.",
)

Expand Down
47 changes: 33 additions & 14 deletions emmet-core/emmet/core/vasp/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def from_task_doc(
valid_input_set = None

if valid_input_set:
# Checking POTCAR hashes if a directory is supplied
# Checking POTCAR summary_stats if a directory is supplied
if potcar_hashes:
if _potcar_hash_check(task_doc, potcar_hashes):
if task_type in [
Expand Down Expand Up @@ -313,28 +313,47 @@ def _kspacing_warnings(input_set, inputs, data, warnings, kspacing_tolerance):

def _potcar_hash_check(task_doc, potcar_hashes):
"""
Checks to make sure the POTCAR hash is equal to the correct value from the
pymatgen input set.
Checks to make sure the POTCAR summary stats is equal to the correct
value from the pymatgen input set.
"""
data_tol = 1.0e-6

try:
potcar_details = task_doc.calcs_reversed[0]["input"]["potcar_spec"]

all_match = True
except KeyError:
# Assume it is an old calculation without potcar_spec data and treat it as passing POTCAR hash check
return False

for entry in potcar_details:
symbol = entry["titel"].split(" ")[1]
hash = potcar_hashes[str(task_doc.calc_type)].get(symbol, None)
all_match = True
for entry in potcar_details:
symbol = entry["titel"].split(" ")[1]
ref_summ_stats = potcar_hashes[str(task_doc.calc_type)].get(symbol, None)
if not ref_summ_stats:
all_match = False
break

key_match = all(
set(ref_summ_stats["keywords"][key])
== set(entry["summary_stats"]["keywords"][key])
for key in ["header", "data"]
)

if not hash or hash != entry["hash"]:
all_match = False
break
data_match = all(
abs(
ref_summ_stats["stats"][key][stat]
- entry["summary_stats"]["stats"][key][stat]
)
< data_tol
for stat in ["MEAN", "ABSMEAN", "VAR", "MIN", "MAX"]
for key in ["header", "data"]
)

return not all_match
if (not key_match) or (not data_match):
all_match = False
break

except KeyError:
# Assume it is an old calculation without potcar_spec data and treat it as passing POTCAR hash check
return False
return not all_match


def _magmom_check(task_doc, chemsys):
Expand Down
27 changes: 27 additions & 0 deletions emmet-core/tests/test_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,30 @@ def test_calculation(test_dir, object_name, task_name):

# and decoded
MontyDecoder().process_decoded(d)


def test_PotcarSpec(test_dir):
from emmet.core.vasp.calculation import PotcarSpec
from pymatgen.io.vasp import PotcarSingle, Potcar

try:
# First test, PotcarSingle object
potcar = PotcarSingle.from_symbol_and_functional(symbol="Si", functional="PBE")
ps_spec = PotcarSpec.from_potcar_single(potcar_single=potcar)

assert ps_spec.titel == potcar.symbol
assert ps_spec.hash == potcar.md5_header_hash
assert ps_spec.summary_stats == potcar._summary_stats

# Second test, Potcar object containing mulitple PotcarSingle obejcts
potcars = Potcar(symbols=["Ga_d", "As"], functional="PBE")
ps_spec = PotcarSpec.from_potcar(potcar=potcars)

for ips, ps in enumerate(ps_spec):
assert ps.titel == potcars[ips].symbol
assert ps.hash == potcars[ips].md5_header_hash
assert ps.summary_stats == potcars[ips]._summary_stats

except (OSError, ValueError):
# missing Pymatgen POTCARs, cannot perform test
assert True
2 changes: 1 addition & 1 deletion emmet-core/tests/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
("calculator", "prop_kwargs"),
[
(get_universal_calculator("chgnet"), None),
("m3gnet", {"ElasticityCalc": {"relax_structure": False}}),
("M3GNet-MP-2021.2.8-PES", {"ElasticityCalc": {"relax_structure": False}}),
],
)
def test_ml_doc(calculator: Union[str, "Calculator"], prop_kwargs: dict) -> None:
Expand Down
56 changes: 55 additions & 1 deletion emmet-core/tests/vasp/test_vasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from emmet.core.vasp.calc_types import RunType, TaskType, run_type, task_type
from emmet.core.vasp.task_valid import TaskDocument
from emmet.core.vasp.validation import ValidationDoc
from emmet.core.vasp.validation import ValidationDoc, _potcar_hash_check


def test_task_type():
Expand Down Expand Up @@ -85,3 +85,57 @@ def test_ldau_validation(test_dir):
valid = ValidationDoc.from_task_doc(task)

assert valid.valid


def test_potcar_hash_check(test_dir):
from pymatgen.io.vasp import PotcarSingle

with zopen(test_dir / "CoF_TaskDoc.json") as f:
data = json.load(f)

"""
NB: seems like TaskDoc is not fully compatible with TaskDocument
excluding all keys but `last_updated` ensures TaskDocument can be built
Similarly, after a TaskDoc is dumped to a file, using
json.dump(
jsanitize(
< Task Doc >.model_dump()
),
< filename > )
I cannot rebuild the TaskDoc without excluding the `orig_inputs` key.
"""
task_doc = TaskDocument(**{key: data[key] for key in data if key != "last_updated"})

# First check: generate hashes from POTCARs in TaskDoc, check should pass
calc_type = str(task_doc.calc_type)
expected_hashes = {calc_type: {}}
try:
for spec in task_doc.calcs_reversed[0]["input"]["potcar_spec"]:
symbol = spec["titel"].split(" ")[1]
expected_hashes[calc_type][
symbol
] = PotcarSingle.from_symbol_and_functional(
symbol=symbol, functional="PBE"
)._summary_stats

assert not _potcar_hash_check(task_doc, expected_hashes)

# Second check: remove POTCAR from expected_hashes, check should fail

missing_hashes = {calc_type: {**expected_hashes[calc_type]}}
first_element = list(missing_hashes[calc_type])[0]
missing_hashes[calc_type].pop(first_element)
assert _potcar_hash_check(task_doc, missing_hashes)

# Third check: change data in expected hashes, check should fail

wrong_hashes = {calc_type: {**expected_hashes[calc_type]}}
for key in wrong_hashes[calc_type][first_element]["stats"]["data"]:
wrong_hashes[calc_type][first_element]["stats"]["data"][key] *= 1.1

assert _potcar_hash_check(task_doc, wrong_hashes)

except (OSError, ValueError):
# missing Pymatgen POTCARs, cannot perform test
assert True
1 change: 1 addition & 0 deletions test_files/CoF_TaskDoc.json

Large diffs are not rendered by default.

0 comments on commit ba01dff

Please sign in to comment.