Skip to content

Commit

Permalink
Add one-pass check for unconnected molecules
Browse files Browse the repository at this point in the history
  • Loading branch information
vandrw committed Nov 21, 2023
1 parent 1d22f1d commit 00756c6
Showing 1 changed file with 5 additions and 8 deletions.
13 changes: 5 additions & 8 deletions selfies/utils/encoding_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,18 @@ def selfies_to_encoding(
selfies += "[nop]" * (pad_to_len - len_selfies(selfies))

# integer encode
char_list = split_selfies(selfies)

try:
integer_encoded = [vocab_stoi[char] for char in char_list]
except KeyError as e:
if e.args[0] == ".":
integer_encoded = []
for char in split_selfies(selfies):
if (char == ".") and ("." not in vocab_stoi):
raise KeyError(
"The SELFIES string contains two unconnected molecules "
"(given by the '.' character), but vocab_stoi does not "
"contain the '.' key. Please add it to the vocabulary "
"or separate the molecules."
)
raise KeyError(e.args[0])


integer_encoded.append(vocab_stoi[char])

if enc_type == "label":
return integer_encoded

Expand Down

0 comments on commit 00756c6

Please sign in to comment.