Update examples and doc

huggingface · Nov 22, 2024 · a592521 · a592521
1 parent f2c7794
commit a592521
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/docs/source/detoxifying_a_lm.mdx b/docs/source/detoxifying_a_lm.mdx
@@ -105,8 +105,8 @@ and the optimizer will take care of computing the gradients in `bfloat16` precis
 </div>
 
 ```python
-ref_policy = create_reference_model(model, num_shared_layers=6)
-trainer = PPOTrainer(..., ref_policy=ref_policy)
+ref_model = create_reference_model(model, num_shared_layers=6)
+trainer = PPOTrainer(..., ref_model=ref_model)
 ```
 
 In the example above this means that the model have the 4 first layers frozen (i.e. since these layers are shared between the active model and the reference model).

diff --git a/examples/scripts/ppo/ppo.py b/examples/scripts/ppo/ppo.py
@@ -154,8 +154,8 @@ def tokenize(element):
     trainer = PPOTrainer(
         config=training_args,
         processing_class=tokenizer,
-        policy=policy,
-        ref_policy=ref_policy,
+        model=policy,
+        ref_model=ref_policy,
         reward_model=reward_model,
         value_model=value_model,
         train_dataset=train_dataset,

diff --git a/examples/scripts/ppo/ppo_tldr.py b/examples/scripts/ppo/ppo_tldr.py
@@ -165,8 +165,8 @@ def tokenize(element):
     trainer = PPOTrainer(
         config=training_args,
         processing_class=tokenizer,
-        policy=policy,
-        ref_policy=ref_policy,
+        model=policy,
+        ref_model=ref_policy,
         reward_model=reward_model,
         value_model=value_model,
         train_dataset=train_dataset,