Skip to content

Commit

Permalink
Fix PPO/RLOO examples (#2100)
Browse files Browse the repository at this point in the history
  • Loading branch information
lewtun authored Sep 23, 2024
1 parent 92eea1f commit 6859e04
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 4 deletions.
3 changes: 1 addition & 2 deletions examples/scripts/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
--sft_model_path EleutherAI/pythia-1b-deduped \
--reward_model_path EleutherAI/pythia-1b-deduped \
--local_rollout_forward_batch_size 1 \
--deepspeed3 \
--missing_eos_penalty 1.0
"""

Expand Down Expand Up @@ -88,7 +87,7 @@
# Dataset
################
dataset = load_dataset("trl-internal-testing/descriptiveness-sentiment-trl-style", split="descriptiveness")
eval_samples = 20
eval_samples = 100
train_dataset = dataset.select(range(len(dataset) - eval_samples))
eval_dataset = dataset.select(range(len(dataset) - eval_samples, len(dataset)))
dataset_text_field = "prompt"
Expand Down
3 changes: 1 addition & 2 deletions examples/scripts/rloo/rloo.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
--sft_model_path EleutherAI/pythia-1b-deduped \
--reward_model_path EleutherAI/pythia-1b-deduped \
--local_rollout_forward_batch_size 1 \
--deepspeed3 \
--missing_eos_penalty 1.0
"""

Expand Down Expand Up @@ -89,7 +88,7 @@
# Dataset
################
dataset = load_dataset("trl-internal-testing/descriptiveness-sentiment-trl-style", split="descriptiveness")
eval_samples = 20
eval_samples = 100
train_dataset = dataset.select(range(len(dataset) - eval_samples))
eval_dataset = dataset.select(range(len(dataset) - eval_samples, len(dataset)))
dataset_text_field = "prompt"
Expand Down

0 comments on commit 6859e04

Please sign in to comment.