From a032ae0d8fb98b002859bb98f301e637a4a39d4d Mon Sep 17 00:00:00 2001 From: "viktor.dobrev" Date: Thu, 11 May 2023 17:57:24 +0200 Subject: [PATCH 1/2] support for other pytorch schedulers and warm up --- neuralpredictors/training/early_stopping.py | 70 ++++++++++++++++++--- 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/neuralpredictors/training/early_stopping.py b/neuralpredictors/training/early_stopping.py index 2ddb4dd7..f78a9cd3 100644 --- a/neuralpredictors/training/early_stopping.py +++ b/neuralpredictors/training/early_stopping.py @@ -42,8 +42,8 @@ def early_stopping( tracker=None, scheduler=None, lr_decay_steps=1, + number_warmup_epochs=0, ): - """ Early stopping iterator. Keeps track of the best model state during training. Resets the model to its best state, when either the number of maximum epochs or the patience [number of epochs without improvement) @@ -72,10 +72,29 @@ def early_stopping( tracker (Tracker): Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize` method is NOT invoked. - scheduler: scheduler object, which automatically reduces decreases the LR by a specified amount. - The scheduler's `step` method is invoked, passing in the current value of `objective` - lr_decay_steps: Number of times the learning rate should be reduced before stopping the training. + scheduler: scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount. + The scheduler's `step` method is invoked for a the approptiate scheduler if a tuple of two schedulers is provided. + The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`. + For example a provided tuple of scheduler can be of the form: + + scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs)) + + or in case that no scheduler is desired after the warm up: + + scheduler = (warmup_scheduler,None). + + An example warm up scheduler can be defined as: + def warmup_function(current_step: int): + return 1 / (2 ** (float(number_warmup_epochs - current_step - 1))) + + warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=warmup_function) + + Of course single schedulers can also be provided. + If the warm-up is shifted (goes to a to high learning rate or does not reach the desired learning rate), + consider adjusting the warm up function accordingly. + lr_decay_steps: Number of times the learning rate should be reduced before stopping the training. + number_warmup_epochs: Number of warm-up epochs """ training_status = model.training @@ -107,11 +126,36 @@ def finalize(model, best_state_dict): best_objective = current_objective = _objective() best_state_dict = copy_state(model) + # check if the learning rate scheduler is 'ReduceLROnPlateau' so that we pass the current_objective to step + reduce_lr_on_plateau = False + if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): + reduce_lr_on_plateau = True + elif isinstance(scheduler, tuple): + if isinstance(scheduler[1], torch.optim.lr_scheduler.ReduceLROnPlateau): + reduce_lr_on_plateau = True + + # check if warm up is to be performed + if isinstance(scheduler, tuple): + warmup = True + if scheduler[0] is None: + logger.warning( + f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0" + ) + number_warmup_epochs = 0 + else: + warmup = False + + if warmup and number_warmup_epochs == 0: + logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0") + elif not warmup and number_warmup_epochs > 0: + logger.warning( + f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided" + ) + for repeat in range(lr_decay_steps): patience_counter = 0 while patience_counter < patience and epoch < max_iter: - for _ in range(interval): epoch += 1 if tracker is not None: @@ -124,9 +168,21 @@ def finalize(model, best_state_dict): current_objective = _objective() - # if a scheduler is defined, a .step with the current objective is all that is needed to reduce the LR + # if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR if scheduler is not None: - scheduler.step(current_objective) + if warmup and epoch < number_warmup_epochs: + scheduler[0].step() + elif reduce_lr_on_plateau: + if not warmup: + scheduler.step(current_objective) + else: + scheduler[1].step(current_objective) + else: + if not warmup: + scheduler.step() + else: + if scheduler[1] is not None: + scheduler[1].step() if current_objective * maximize < best_objective * maximize - tolerance: logger.info(f"[{epoch:03d}|{patience_counter:02d}/{patience:02d}] ---> {current_objective}") From c0a89a44aeefd438bb791528546d5c63571344e5 Mon Sep 17 00:00:00 2001 From: "viktor.dobrev" Date: Mon, 22 May 2023 14:06:31 +0200 Subject: [PATCH 2/2] better in-line comments for schedulers --- neuralpredictors/training/early_stopping.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/neuralpredictors/training/early_stopping.py b/neuralpredictors/training/early_stopping.py index f78a9cd3..c101c44f 100644 --- a/neuralpredictors/training/early_stopping.py +++ b/neuralpredictors/training/early_stopping.py @@ -73,9 +73,10 @@ def early_stopping( Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize` method is NOT invoked. scheduler: scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount. - The scheduler's `step` method is invoked for a the approptiate scheduler if a tuple of two schedulers is provided. - The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`. - For example a provided tuple of scheduler can be of the form: + If a tuple of schedulers is provided the 1st scheduler is assumed to be the warm up scheduler. The .step method + for the 1st scheduler will be called while epoch is smaller than number_warmup_epochs afterwards the .step method of + the second scheduler is called. The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`. + For example a provided tuple of schedulers can be of the form: scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs)) @@ -137,6 +138,8 @@ def finalize(model, best_state_dict): # check if warm up is to be performed if isinstance(scheduler, tuple): warmup = True + + # check if the warm-up scheduler is not of type None if scheduler[0] is None: logger.warning( f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0" @@ -145,8 +148,11 @@ def finalize(model, best_state_dict): else: warmup = False + # check if warm up scheduler and number of warm-up epochs is provided if warmup and number_warmup_epochs == 0: logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0") + + # inform user that no warm-up scheduler is provided althouth warm-up epochs is non zero elif not warmup and number_warmup_epochs > 0: logger.warning( f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided" @@ -171,13 +177,16 @@ def finalize(model, best_state_dict): # if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR if scheduler is not None: if warmup and epoch < number_warmup_epochs: + # warm-up step scheduler[0].step() elif reduce_lr_on_plateau: + # reduce_lr_on_plateau requires current objective for the step if not warmup: scheduler.step(current_objective) else: scheduler[1].step(current_objective) else: + # .step() for the rest of the schedulers if not warmup: scheduler.step() else: