sinzlab · pollytur · Mar 7, 2024 · May 11, 2023 · May 22, 2023 · Mar 7, 2024
diff --git a/neuralpredictors/training/early_stopping.py b/neuralpredictors/training/early_stopping.py
@@ -42,8 +42,8 @@
     tracker=None,
     scheduler=None,
     lr_decay_steps=1,
+    number_warmup_epochs=0,
 ):
-
     """
     Early stopping iterator. Keeps track of the best model state during training. Resets the model to its
         best state, when either the number of maximum epochs or the patience [number of epochs without improvement)
@@ -72,10 +72,30 @@
         tracker (Tracker):
             Tracker to be invoked for every epoch. `log_objective` is invoked with the current value of `objective`. Note that `finalize`
             method is NOT invoked.
-        scheduler:  scheduler object, which automatically reduces decreases the LR by a specified amount.
-                    The scheduler's `step` method is invoked, passing in the current value of `objective`
-        lr_decay_steps: Number of times the learning rate should be reduced before stopping the training.
+        scheduler:  scheduler object or tuple of two scheduler objects, which automatically modifies the LR by a specified amount.
+                    If a tuple of schedulers is provided the 1st scheduler is assumed to be the warm up scheduler. The .step method
+                    for the 1st scheduler will be called while epoch is smaller than number_warmup_epochs afterwards the .step method of
+                    the second scheduler is called. The current value of `objective` is passed to the `step` method if the scheduler at hand is `ReduceLROnPlateau`.
+                    For example a provided tuple of schedulers can be of the form:
+
+                                 scheduler = (warmup_scheduler,CosineAnnealingLR(*args,**kwargs))
+
+                    or in case that no scheduler is desired after the warm up:
+
+                                 scheduler = (warmup_scheduler,None).
+
+                    An example warm up scheduler can be defined as:
 
+                                def warmup_function(current_step: int):
+                                    return 1 / (2 ** (float(number_warmup_epochs - current_step - 1)))
+
+                                warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=warmup_function)
+
+                    Of course single schedulers can also be provided.
+                    If the warm-up is shifted (goes to a to high learning rate or does not reach the desired learning rate),
+                    consider adjusting the warm up function accordingly.
+        lr_decay_steps: Number of times the learning rate should be reduced before stopping the training.
+        number_warmup_epochs: Number of warm-up epochs
     """
     training_status = model.training
 
@@ -107,11 +127,41 @@
     best_objective = current_objective = _objective()
     best_state_dict = copy_state(model)
 
+    # check if the learning rate scheduler is 'ReduceLROnPlateau' so that we pass the current_objective to step
+    reduce_lr_on_plateau = False
+    if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
+        reduce_lr_on_plateau = True
+    elif isinstance(scheduler, tuple):
+        if isinstance(scheduler[1], torch.optim.lr_scheduler.ReduceLROnPlateau):
+            reduce_lr_on_plateau = True
+
+    # check if warm up is to be performed
+    if isinstance(scheduler, tuple):
+        warmup = True
+
+        # check if the warm-up scheduler is not of type None
+        if scheduler[0] is None:
+            logger.warning(
+                f"Provided warm up scheduler is of type None. Warm up epochs set to {number_warmup_epochs}. Setting number of warm up epochs to 0"
+            )
+            number_warmup_epochs = 0
+    else:
+        warmup = False
+
+    # check if warm up scheduler and number of warm-up epochs is provided
+    if warmup and number_warmup_epochs == 0:
+        logger.warning("Warm up scheduler is provided, but number of warm up steps is set to 0")
+
+    # inform user that no warm-up scheduler is provided althouth warm-up epochs is non zero
+    elif not warmup and number_warmup_epochs > 0:
+        logger.warning(
+            f"Number of warm up steps is set to {number_warmup_epochs}, but no warm up scheduler is provided"
+        )
+
     for repeat in range(lr_decay_steps):
         patience_counter = 0
 
         while patience_counter < patience and epoch < max_iter:
-
             for _ in range(interval):
                 epoch += 1
                 if tracker is not None:
@@ -124,9 +174,24 @@
 
             current_objective = _objective()
 
-            # if a scheduler is defined, a .step with the current objective is all that is needed to reduce the LR
+            # if a scheduler is defined, a .step with or without the current objective is all that is needed to reduce the LR
             if scheduler is not None:
-                scheduler.step(current_objective)
+                if warmup and epoch < number_warmup_epochs:
+                    # warm-up step
+                    scheduler[0].step()
+                elif reduce_lr_on_plateau:
+                    # reduce_lr_on_plateau requires current objective for the step
+                    if not warmup:
+                        scheduler.step(current_objective)
+                    else:
+                        scheduler[1].step(current_objective)
+                else:
+                    # .step() for the rest of the schedulers
+                    if not warmup:
+                        scheduler.step()
+                    else:
+                        if scheduler[1] is not None:
+                            scheduler[1].step()
 
             if current_objective * maximize < best_objective * maximize - tolerance:
                 logger.info(f"[{epoch:03d}|{patience_counter:02d}/{patience:02d}] ---> {current_objective}")