From 1d22f1d517188c1f858609c5416d2ee310a3b847 Mon Sep 17 00:00:00 2001 From: vandrw Date: Sun, 19 Nov 2023 12:54:33 +0100 Subject: [PATCH] Move search for dot symbol in try-except --- selfies/utils/encoding_utils.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/selfies/utils/encoding_utils.py b/selfies/utils/encoding_utils.py index 05347c7..a2aabf3 100644 --- a/selfies/utils/encoding_utils.py +++ b/selfies/utils/encoding_utils.py @@ -47,17 +47,20 @@ def selfies_to_encoding( selfies += "[nop]" * (pad_to_len - len_selfies(selfies)) # integer encode - char_list = list(split_selfies(selfies)) - - # Check if SELFIES string contains unconnected molecules - if "." in list(char_list) and not "." in vocab_stoi: - raise ValueError( - "The SELFIES string contains two unconnected molecules " - "(given by the '.' character), but vocab_stoi does not " - "contain the '.' key. Please add it or separate the molecules." - ) - - integer_encoded = [vocab_stoi[char] for char in char_list] + char_list = split_selfies(selfies) + + try: + integer_encoded = [vocab_stoi[char] for char in char_list] + except KeyError as e: + if e.args[0] == ".": + raise KeyError( + "The SELFIES string contains two unconnected molecules " + "(given by the '.' character), but vocab_stoi does not " + "contain the '.' key. Please add it to the vocabulary " + "or separate the molecules." + ) + raise KeyError(e.args[0]) + if enc_type == "label": return integer_encoded