Skip to content

Commit

Permalink
Rename trainer arg tokenizer to processing_class (#2162)
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec authored Oct 7, 2024
1 parent 70327c1 commit 47d08a9
Show file tree
Hide file tree
Showing 59 changed files with 546 additions and 384 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ training_args = RewardConfig(output_dir="Qwen2.5-0.5B-Reward", per_device_train_
trainer = RewardTrainer(
args=training_args,
model=model,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=dataset,
)
trainer.train()
Expand Down Expand Up @@ -166,7 +166,7 @@ dataset = dataset.map(lambda x: tokenizer(x["prompt"]), remove_columns="prompt")
training_args = RLOOConfig(output_dir="Qwen2.5-0.5B-RL")
trainer = RLOOTrainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand All @@ -189,7 +189,7 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
training_args = DPOConfig(output_dir="Qwen2.5-0.5B-DPO")
trainer = DPOTrainer(model=model, args=training_args, train_dataset=dataset, tokenizer=tokenizer)
trainer = DPOTrainer(model=model, args=training_args, train_dataset=dataset, processing_class=tokenizer)
trainer.train()
```

Expand Down
4 changes: 2 additions & 2 deletions docs/source/bco_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ bco_trainer = BCOTrainer(
model_ref,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
)
```
After this one can then call:
Expand Down Expand Up @@ -75,7 +75,7 @@ bco_trainer = BCOTrainer(
model_ref,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
embedding_func=embedding_func,
embedding_tokenizer=self.embedding_tokenizer,
)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/cpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")

training_args = CPOConfig(output_dir="Qwen2-0.5B-CPO", logging_steps=10)
trainer = CPOTrainer(model=model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset)
trainer = CPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```

Expand Down
8 changes: 4 additions & 4 deletions docs/source/dpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")

training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10)
trainer = DPOTrainer(model=model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset)
trainer = DPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```

Expand Down Expand Up @@ -100,8 +100,8 @@ Additionally, unlike standard text-based models where a `tokenizer` is used, for
model,
args=training_args,
train_dataset=train_dataset,
- tokenizer=tokenizer,
+ tokenizer=processor,
- processing_class=tokenizer,
+ processing_class=processor,
)
```

Expand Down Expand Up @@ -194,7 +194,7 @@ First install `unsloth` according to the [official documentation](https://github

- training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10)
+ training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10, bf16=True)
trainer = DPOTrainer(model=model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset)
trainer = DPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()

```
Expand Down
2 changes: 1 addition & 1 deletion docs/source/gkd_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ trainer = GKDTrainer(
model=model,
teacher_model=teacher_model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/kto_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ kto_trainer = KTOTrainer(
ref_model,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
)
```
After this one can then call:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/nash_md_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ trainer = NashMDTrainer(
model=model,
reward_model=reward_model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=train_dataset,
)
trainer.train()
Expand Down
2 changes: 1 addition & 1 deletion docs/source/online_dpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")

training_args = OnlineDPOConfig(output_dir="online-dpo-qwen2", logging_steps=10)
trainer = OnlineDPOTrainer(
model=model, reward_model=reward_model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset
model=model, reward_model=reward_model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset
)
trainer.train()
```
Expand Down
2 changes: 1 addition & 1 deletion docs/source/orpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ orpo_trainer = ORPOTrainer(
model,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
)
```
After this one can then call:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reward_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ peft_config = LoraConfig(
trainer = RewardTrainer(
model=model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=dataset,
peft_config=peft_config,
)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/sft_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ trainer = SFTTrainer(
args=training_args,
data_collator=collate_fn,
train_dataset=train_dataset,
tokenizer=processor.tokenizer,
processing_class=processor.tokenizer,
)
```

Expand Down
2 changes: 1 addition & 1 deletion docs/source/xpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ trainer = XPOTrainer(
model=model,
reward_model=reward_model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=train_dataset,
)
trainer.train()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def return_prompt_and_responses(samples) -> Dict[str, str]:
beta=script_args.beta,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=peft_config,
max_prompt_length=script_args.max_prompt_length,
max_length=script_args.max_length,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def create_datasets(tokenizer, args, seed=None):
peft_config=peft_config,
max_seq_length=None,
formatting_func=prepare_sample_text,
tokenizer=tokenizer,
processing_class=tokenizer,
args=training_args,
)
trainer.train()
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/bco.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def mean_pooling(model_output, attention_mask):
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_args),
embedding_func=embedding_func,
embedding_tokenizer=embedding_tokenizer,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/cpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class ScriptArguments:
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=peft_config,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/dpo_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)
generation_config = GenerationConfig(
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/dpo_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def process(row):
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=processor,
processing_class=processor,
peft_config=peft_config,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/gkd.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)
completions_callback = LogCompletionsCallback(trainer, trainer.generation_config, num_prompts=8)
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/kto.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def format_dataset(example):
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_args),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/nash_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
)
generation_config = GenerationConfig(
max_new_tokens=training_args.max_new_tokens, do_sample=True, temperature=training_args.temperature
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/orpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def process(row):
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def tokenize(element):
################
trainer = PPOv2Trainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/ppo/ppo_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def tokenize(element):
################
trainer = PPOv2Trainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/reward_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
##########
trainer = RewardTrainer(
model=model,
tokenizer=tokenizer,
processing_class=tokenizer,
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/rloo/rloo.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def tokenize(element):
################
trainer = RLOOTrainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/rloo/rloo_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def tokenize(element):
################
trainer = RLOOTrainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/sft_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def collate_fn(examples):
data_collator=collate_fn,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=processor.tokenizer,
processing_class=processor.tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/xpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
)
generation_config = GenerationConfig(
max_new_tokens=training_args.max_new_tokens, do_sample=True, temperature=training_args.temperature
Expand Down
6 changes: 3 additions & 3 deletions tests/slow/test_dpo_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
model=model,
ref_model=None,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=self.dataset,
eval_dataset=self.dataset,
)
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_
model=model,
ref_model=None,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=self.dataset,
eval_dataset=self.dataset,
peft_config=self.peft_config,
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gra
model=model,
ref_model=None,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=self.dataset,
eval_dataset=self.dataset,
peft_config=self.peft_config,
Expand Down
Loading

0 comments on commit 47d08a9

Please sign in to comment.