From 0f94e3e152dcc172dbd681aa0d0dc527d9338b8d Mon Sep 17 00:00:00 2001 From: Anton Vlasjuk <73884904+vasqu@users.noreply.github.com> Date: Wed, 10 Apr 2024 16:36:43 +0200 Subject: [PATCH] Fix accelerate kwargs for versions <0.28.0 (#30086) * fix learning rate display issue in galore optimizer * fix kwarg in accelerate when using versions < 0.28.0 * this was supposed to be in the other PR whoops --- src/transformers/trainer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index f6e80ebafe3..292ecad3838 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -4374,8 +4374,9 @@ def create_accelerator_and_postprocess(self): even_batches=accelerator_config.pop("even_batches"), use_seedable_sampler=accelerator_config.pop("use_seedable_sampler"), ) - # this would have been updated above, no need for it anymore - accelerator_config.pop("gradient_accumulation_kwargs") + # this would have been updated above, no need for it anymore + accelerator_config.pop("gradient_accumulation_kwargs") + args = { "deepspeed_plugin": self.args.deepspeed_plugin, "gradient_accumulation_plugin": gradient_accumulation_plugin,