Skip to content

Commit

Permalink
🖨️ Fix error text in BCO and KTO tokenizing function (#2286)
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipMay authored Nov 11, 2024
1 parent 015321e commit dde20b2
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 3 additions & 1 deletion trl/trainer/bco_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ def _tokenize(
full_input_ids = [np.array(f) for f in full_input_ids]
for full, concat in zip(full_input_ids, full_concat_input_ids):
if len(full) != len(concat):
raise ValueError("Prompt input ids and answer input ids should have the same length.")
raise ValueError(
"The elements in 'full_input_ids' and 'full_concat_input_ids' must have the same pairwise length."
)

# On some tokenizers, like Llama-2 tokenizer, there are occasions where tokens
# can be merged together when tokenizing prompt+answer. This could result
Expand Down
4 changes: 3 additions & 1 deletion trl/trainer/kto_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ def _tokenize(
full_input_ids = [np.array(f) for f in full_input_ids]
for full, concat in zip(full_input_ids, full_concat_input_ids):
if len(full) != len(concat):
raise ValueError("Prompt input ids and answer input ids should have the same length.")
raise ValueError(
"The elements in 'full_input_ids' and 'full_concat_input_ids' must have the same pairwise length."
)

# On some tokenizers, like Llama-2 tokenizer, there are occasions where tokens
# can be merged together when tokenizing prompt+answer. This could result
Expand Down

0 comments on commit dde20b2

Please sign in to comment.