From a032ae0d8fb98b002859bb98f301e637a4a39d4d Mon Sep 17 00:00:00 2001
From: "viktor.dobrev" <viktor.dobrev@phobos.cidas.uni-goettingen.de>
Date: Thu, 11 May 2023 17:57:24 +0200
Subject: [PATCH 1/2] support for other pytorch schedulers and warm up

---
 neuralpredictors/training/early_stopping.py | 70 ++++++++++++++++++---
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/neuralpredictors/training/early_stopping.py b/neuralpredictors/training/early_stopping.py
index 2ddb4dd7..f78a9cd3 100644
--- a/neuralpredictors/training/early_stopping.py
+++ b/neuralpredictors/training/early_stopping.py
@@ -42,8 +42,8 @@ def early_stopping(
     tracker=None,
     scheduler=None,
     lr_decay_steps=1,
+    number_warmup_epochs=0,
 ):
-
     """
     Early stopping iterator. Keeps track of the best model state during training. Resets the model to its
         best state, when either the number of maximum epochs or the patience [number of epochs without improvement)
@@ -72,10 +72,29 @@ def early_stopping(
         tracker (Tracker):
             Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize`
             method is NOT invoked.
-        scheduler:  scheduler object, which automatically reduces decreases the LR by a specified amount.
-                    The scheduler's `step` method is invoked, passing in the current value of `objective`
-        lr_decay_steps: Number of times the learning rate should be reduced before stopping the training.
+        scheduler:  scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount.
+                    The scheduler's `step` method is invoked for a the approptiate scheduler if a tuple of two schedulers is provided.
+                    The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`.
+                    For example a provided tuple of scheduler can be of the form:
+
+                                 scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs))
+
+                    or in case that no scheduler is desired after the warm up:
+
+                                 scheduler = (warmup_scheduler,None).
+
+                    An example warm up scheduler can be defined as:
 
+                                def warmup_function(current_step: int):
+                                    return 1 / (2 ** (float(number_warmup_epochs - current_step - 1)))
+
+                                warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=warmup_function)
+
+                    Of course single schedulers can also be provided.
+                    If the warm-up is shifted (goes to a to high learning rate or does not reach the desired learning rate),
+                    consider adjusting the warm up function accordingly.
+        lr_decay_steps: Number of times the learning rate should be reduced before stopping the training.
+        number_warmup_epochs: Number of warm-up epochs
     """
     training_status = model.training
 
@@ -107,11 +126,36 @@ def finalize(model, best_state_dict):
     best_objective = current_objective = _objective()
     best_state_dict = copy_state(model)
 
+    # check if the learning rate scheduler is 'ReduceLROnPlateau' so that we pass the current_objective to step
+    reduce_lr_on_plateau = False
+    if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
+        reduce_lr_on_plateau = True
+    elif isinstance(scheduler, tuple):
+        if isinstance(scheduler[1], torch.optim.lr_scheduler.ReduceLROnPlateau):
+            reduce_lr_on_plateau = True
+
+    # check if warm up is to be performed
+    if isinstance(scheduler, tuple):
+        warmup = True
+        if scheduler[0] is None:
+            logger.warning(
+                f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0"
+            )
+            number_warmup_epochs = 0
+    else:
+        warmup = False
+
+    if warmup and number_warmup_epochs == 0:
+        logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0")
+    elif not warmup and number_warmup_epochs > 0:
+        logger.warning(
+            f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided"
+        )
+
     for repeat in range(lr_decay_steps):
         patience_counter = 0
 
         while patience_counter < patience and epoch < max_iter:
-
             for _ in range(interval):
                 epoch += 1
                 if tracker is not None:
@@ -124,9 +168,21 @@ def finalize(model, best_state_dict):
 
             current_objective = _objective()
 
-            # if a scheduler is defined, a .step with the current objective is all that is needed to reduce the LR
+            # if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR
             if scheduler is not None:
-                scheduler.step(current_objective)
+                if warmup and epoch < number_warmup_epochs:
+                    scheduler[0].step()
+                elif reduce_lr_on_plateau:
+                    if not warmup:
+                        scheduler.step(current_objective)
+                    else:
+                        scheduler[1].step(current_objective)
+                else:
+                    if not warmup:
+                        scheduler.step()
+                    else:
+                        if scheduler[1] is not None:
+                            scheduler[1].step()
 
             if current_objective * maximize < best_objective * maximize - tolerance:
                 logger.info(f"[{epoch:03d}|{patience_counter:02d}/{patience:02d}] ---> {current_objective}")

From c0a89a44aeefd438bb791528546d5c63571344e5 Mon Sep 17 00:00:00 2001
From: "viktor.dobrev" <viktor.dobrev@phobos.cidas.uni-goettingen.de>
Date: Mon, 22 May 2023 14:06:31 +0200
Subject: [PATCH 2/2] better in-line comments for schedulers

---
 neuralpredictors/training/early_stopping.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/neuralpredictors/training/early_stopping.py b/neuralpredictors/training/early_stopping.py
index f78a9cd3..c101c44f 100644
--- a/neuralpredictors/training/early_stopping.py
+++ b/neuralpredictors/training/early_stopping.py
@@ -73,9 +73,10 @@ def early_stopping(
             Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize`
             method is NOT invoked.
         scheduler:  scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount.
-                    The scheduler's `step` method is invoked for a the approptiate scheduler if a tuple of two schedulers is provided.
-                    The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`.
-                    For example a provided tuple of scheduler can be of the form:
+                    If a tuple of schedulers is provided the 1st scheduler is assumed to be the warm up scheduler. The .step method
+                    for the 1st scheduler will be called while epoch is smaller than number_warmup_epochs afterwards the .step method of
+                    the second scheduler is called. The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`.
+                    For example a provided tuple of schedulers can be of the form:
 
                                  scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs))
 
@@ -137,6 +138,8 @@ def finalize(model, best_state_dict):
     # check if warm up is to be performed
     if isinstance(scheduler, tuple):
         warmup = True
+
+        # check if the warm-up scheduler is not of type None
         if scheduler[0] is None:
             logger.warning(
                 f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0"
@@ -145,8 +148,11 @@ def finalize(model, best_state_dict):
     else:
         warmup = False
 
+    # check if warm up scheduler and number of warm-up epochs is provided
     if warmup and number_warmup_epochs == 0:
         logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0")
+
+    # inform user that no warm-up scheduler is provided althouth warm-up epochs is non zero
     elif not warmup and number_warmup_epochs > 0:
         logger.warning(
             f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided"
@@ -171,13 +177,16 @@ def finalize(model, best_state_dict):
             # if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR
             if scheduler is not None:
                 if warmup and epoch < number_warmup_epochs:
+                    # warm-up step
                     scheduler[0].step()
                 elif reduce_lr_on_plateau:
+                    # reduce_lr_on_plateau requires current objective for the step
                     if not warmup:
                         scheduler.step(current_objective)
                     else:
                         scheduler[1].step(current_objective)
                 else:
+                    # .step() for the rest of the schedulers
                     if not warmup:
                         scheduler.step()
                     else: