diff --git a/selfies/utils/encoding_utils.py b/selfies/utils/encoding_utils.py index 411c353..05d31c1 100644 --- a/selfies/utils/encoding_utils.py +++ b/selfies/utils/encoding_utils.py @@ -47,9 +47,18 @@ def selfies_to_encoding( selfies += "[nop]" * (pad_to_len - len_selfies(selfies)) # integer encode - char_list = split_selfies(selfies) - integer_encoded = [vocab_stoi[char] for char in char_list] - + integer_encoded = [] + for char in split_selfies(selfies): + if (char == ".") and ("." not in vocab_stoi): + raise KeyError( + "The SELFIES string contains two unconnected molecules " + "(given by the '.' character), but vocab_stoi does not " + "contain the '.' key. Please add it to the vocabulary " + "or separate the molecules." + ) + + integer_encoded.append(vocab_stoi[char]) + if enc_type == "label": return integer_encoded