From e8bf0dd5bf3cab290f8027f170ce4caa51fc8dfc Mon Sep 17 00:00:00 2001 From: chrisiacovella Date: Wed, 13 Sep 2023 23:09:56 -0700 Subject: [PATCH] edits. --- modelforge/dataset/dataset.py | 6 +++++- modelforge/tests/test_curation.py | 13 +++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/modelforge/dataset/dataset.py b/modelforge/dataset/dataset.py index 0a5279b0..4dc08bac 100644 --- a/modelforge/dataset/dataset.py +++ b/modelforge/dataset/dataset.py @@ -146,11 +146,15 @@ def _from_hdf5(self) -> None: for mol in tqdm.tqdm(list(hf.keys())): n_configs = hf[mol]["n_configs"][()] for i in range(n_configs): + temp_data = {} + contains_nan = False for value in self.properties_of_interest: # if we have a series, we will index into it if hf[mol][value].attrs["series"]: - data[value].append(hf[mol][value][i]) + temp_data[value] = hf[mol][value][i] + if else: # if we do not have a series, just append the value + temp_data[value] = hf[mol][value][()] data[value].append(hf[mol][value][()]) self.hdf5data = data diff --git a/modelforge/tests/test_curation.py b/modelforge/tests/test_curation.py index d5510fc6..320ba914 100644 --- a/modelforge/tests/test_curation.py +++ b/modelforge/tests/test_curation.py @@ -451,6 +451,19 @@ def test_an1_process_download_short(prep_temp_dir): ani1_data._process_downloaded(str(local_data_path), hdf5_file) # ani1_n5.hdf5 datafile includes entries [843, 861, 872, 930, 932] from the full datafile + # Example code snippet used to generate this file + # + # input_file_name = "ani1x-release.h5" + # + # with h5py.File(input_file_name, "r") as hf_in: + # with h5py.File('ani1_n5.hdf5', "w") as hf_out: + # test_names = list(hf_in.keys()) + # test_names2 = [] + # for i in [843, 861, 872, 930, 932]: + # test_names2.append(test_names[i]) + # for test_name in test_names2: + # hf_in.copy(hf_in[test_name], hf_out) + assert len(ani1_data.data) == 5 assert ani1_data.data[0]["name"] == "C1H4N4O4"