From ce0ebdade285dc208777af74c11117bbae34fa96 Mon Sep 17 00:00:00 2001 From: "B. Shen" <45259695+sbwww@users.noreply.github.com> Date: Fri, 17 Nov 2023 03:33:35 +0800 Subject: [PATCH] [Bug fix] WarmupCosineLR issues (#4688) Original code missing a `self.` before `warmup_num_steps` so that `warmup_num_steps` might be 0 and cause math domain error when doing `math.log(0)` ```py self.warmup_num_steps = max(2, warmup_num_steps) self.inverse_log_warm_up = 1.0 / math.log(warmup_num_steps) ``` --- deepspeed/runtime/lr_schedules.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/deepspeed/runtime/lr_schedules.py b/deepspeed/runtime/lr_schedules.py index f5a677227f76..d7f7e15a4dbd 100755 --- a/deepspeed/runtime/lr_schedules.py +++ b/deepspeed/runtime/lr_schedules.py @@ -812,7 +812,7 @@ def __init__(self, self.warmup_type = warmup_type self.warmup_min_ratio = warmup_min_ratio self.warmup_num_steps = max(2, warmup_num_steps) - self.inverse_log_warm_up = 1.0 / math.log(warmup_num_steps) + self.inverse_log_warm_up = 1.0 / math.log(self.warmup_num_steps) if self.total_num_steps < self.warmup_num_steps: logger.warning('total_num_steps {} is less than warmup_num_steps {}'.format( @@ -857,6 +857,12 @@ def get_lr(self): lr_ratio = self.get_lr_ratio() return [org_lr * lr_ratio for org_lr in self.org_lrs] + def get_last_lr(self): + """ Return last computed learning rate by current scheduler. + """ + assert getattr(self, '_last_lr', None) is not None, "need to call step() first" + return self._last_lr + def state_dict(self): return {'last_batch_iteration': self.last_batch_iteration}