diff --git a/returnn/torch/engine.py b/returnn/torch/engine.py index 1af2e4a7ab..bc5c35b27c 100644 --- a/returnn/torch/engine.py +++ b/returnn/torch/engine.py @@ -346,7 +346,7 @@ def train_epoch(self): # only update the weights when every gradient accumulation loop ends if (step_idx % self._accum_grad_multiple_step) == (self._accum_grad_multiple_step - 1): - self._updater.update_params(grad_scaler=self._grad_scaler) + self._updater.step(grad_scaler=self._grad_scaler) elapsed_computation_time += time.time() - step_begin_time diff --git a/returnn/torch/updater.py b/returnn/torch/updater.py index dcf8e099f3..7bf1f4c442 100644 --- a/returnn/torch/updater.py +++ b/returnn/torch/updater.py @@ -177,7 +177,7 @@ def set_current_train_step(self, *, global_train_step: int, epoch: int): self._current_epoch = epoch self._update_effective_learning_rate() - def update_params(self, *, grad_scaler: Optional[torch.cuda.amp.GradScaler] = None): + def step(self, *, grad_scaler: Optional[torch.cuda.amp.GradScaler] = None): """ Perform one step, i.e. update the parameters using the optimizer given the current calculated gradients. """