Skip to content

Commit

Permalink
edits.
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisiacovella committed Sep 14, 2023
1 parent 42ec430 commit e8bf0dd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
6 changes: 5 additions & 1 deletion modelforge/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,15 @@ def _from_hdf5(self) -> None:
for mol in tqdm.tqdm(list(hf.keys())):
n_configs = hf[mol]["n_configs"][()]
for i in range(n_configs):
temp_data = {}
contains_nan = False
for value in self.properties_of_interest:
# if we have a series, we will index into it
if hf[mol][value].attrs["series"]:
data[value].append(hf[mol][value][i])
temp_data[value] = hf[mol][value][i]
if
else: # if we do not have a series, just append the value
temp_data[value] = hf[mol][value][()]
data[value].append(hf[mol][value][()])

self.hdf5data = data
Expand Down
13 changes: 13 additions & 0 deletions modelforge/tests/test_curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,19 @@ def test_an1_process_download_short(prep_temp_dir):
ani1_data._process_downloaded(str(local_data_path), hdf5_file)

# ani1_n5.hdf5 datafile includes entries [843, 861, 872, 930, 932] from the full datafile
# Example code snippet used to generate this file
#
# input_file_name = "ani1x-release.h5"
#
# with h5py.File(input_file_name, "r") as hf_in:
# with h5py.File('ani1_n5.hdf5', "w") as hf_out:
# test_names = list(hf_in.keys())
# test_names2 = []
# for i in [843, 861, 872, 930, 932]:
# test_names2.append(test_names[i])
# for test_name in test_names2:
# hf_in.copy(hf_in[test_name], hf_out)

assert len(ani1_data.data) == 5

assert ani1_data.data[0]["name"] == "C1H4N4O4"
Expand Down

0 comments on commit e8bf0dd

Please sign in to comment.