-
Notifications
You must be signed in to change notification settings - Fork 0
/
lr_sched.py
137 lines (127 loc) · 5.45 KB
/
lr_sched.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import math
import torch
from timm.scheduler.cosine_lr import CosineLRScheduler
from timm.scheduler.tanh_lr import TanhLRScheduler
from timm.scheduler.step_lr import StepLRScheduler
from timm.scheduler.plateau_lr import PlateauLRScheduler
class CosineLRSchedulerwithLayerDecay(CosineLRScheduler):
def __init__(self,
optimizer: torch.optim.Optimizer,
t_initial: int,
lr_min: float = 0.,
warmup_t=0,
warmup_lr_init=0,
warmup_prefix=False,
cycle_limit=0,
t_in_epochs=True,
noise_range_t=None,
noise_pct=0.67,
noise_std=1.0,
noise_seed=42,
initialize=True) -> None:
super().__init__(
optimizer, t_initial=t_initial, lr_min=lr_min, warmup_t=warmup_t, warmup_lr_init=warmup_lr_init, warmup_prefix=warmup_prefix,
cycle_limit=cycle_limit, t_in_epochs=t_in_epochs, noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed,
initialize=initialize)
def update_groups(self, values):
if not isinstance(values, (list, tuple)):
values = [values] * len(self.optimizer.param_groups)
for param_group, value in zip(self.optimizer.param_groups, values):
if "lr_scale" in param_group:
param_group[self.param_group_field] = value * param_group["lr_scale"]
else:
param_group[self.param_group_field] = value
def create_scheduler(num_epochs, warmup_epochs, warmup_lr, min_lr, args, optimizer, n_iter_per_epoch):
num_steps = int(num_epochs * n_iter_per_epoch)
warmup_steps = int(warmup_epochs * n_iter_per_epoch)
if getattr(args, 'lr_noise', None) is not None:
lr_noise = getattr(args, 'lr_noise')
if isinstance(lr_noise, (list, tuple)):
noise_range = [n * num_epochs for n in lr_noise]
if len(noise_range) == 1:
noise_range = noise_range[0]
else:
noise_range = lr_noise * num_epochs
else:
noise_range = None
lr_scheduler = None
if args.sched == 'cosine':
lr_scheduler = CosineLRSchedulerwithLayerDecay(
optimizer,
t_initial=num_steps - warmup_steps,
# t_mul=getattr(args, 'lr_cycle_mul', 1.),
lr_min=min_lr,
# decay_rate=args.decay_rate,
warmup_lr_init=warmup_lr,
warmup_t=warmup_steps,
cycle_limit=getattr(args, 'lr_cycle_limit', 1),
t_in_epochs=False,
warmup_prefix=True,
noise_range_t=noise_range,
noise_pct=getattr(args, 'lr_noise_pct', 0.67),
noise_std=getattr(args, 'lr_noise_std', 1.),
noise_seed=getattr(args, 'seed', 42),
)
num_epochs = lr_scheduler.get_cycle_length() + args.cooldown_epochs
elif args.sched == 'tanh':
lr_scheduler = TanhLRScheduler(
optimizer,
t_initial=num_epochs,
t_mul=getattr(args, 'lr_cycle_mul', 1.),
lr_min=min_lr,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
cycle_limit=getattr(args, 'lr_cycle_limit', 1),
t_in_epochs=True,
noise_range_t=noise_range,
noise_pct=getattr(args, 'lr_noise_pct', 0.67),
noise_std=getattr(args, 'lr_noise_std', 1.),
noise_seed=getattr(args, 'seed', 42),
)
num_epochs = lr_scheduler.get_cycle_length() + args.cooldown_epochs
elif args.sched == 'step':
lr_scheduler = StepLRScheduler(
optimizer,
decay_t=args.decay_epochs,
decay_rate=args.decay_rate,
warmup_lr_init=args.warmup_lr,
warmup_t=args.warmup_epochs,
noise_range_t=noise_range,
noise_pct=getattr(args, 'lr_noise_pct', 0.67),
noise_std=getattr(args, 'lr_noise_std', 1.),
noise_seed=getattr(args, 'seed', 42),
)
elif args.sched == 'plateau':
mode = 'min' if 'loss' in getattr(args, 'eval_metric', '') else 'max'
lr_scheduler = PlateauLRScheduler(
optimizer,
decay_rate=args.decay_rate,
patience_t=args.patience_epochs,
lr_min=min_lr,
mode=mode,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
cooldown_t=0,
noise_range_t=noise_range,
noise_pct=getattr(args, 'lr_noise_pct', 0.67),
noise_std=getattr(args, 'lr_noise_std', 1.),
noise_seed=getattr(args, 'seed', 42),
)
return lr_scheduler, num_epochs
def adjust_learning_rate(warmup_epochs, lr, min_lr, optimizer, epoch, total_epochs, args):
"""Decay the model learning rate with half-cycle cosine after warmup"""
if epoch < args.warmup_epochs:
lr = lr * epoch / warmup_epochs
else:
lr = min_lr + (lr - min_lr) * 0.5 * \
(1. + math.cos(math.pi * (epoch - warmup_epochs) / (total_epochs - warmup_epochs)))
for param_group in optimizer.param_groups:
if "lr_scale" in param_group:
param_group["lr"] = lr * param_group["lr_scale"]
else:
param_group["lr"] = lr
return lr