-
Notifications
You must be signed in to change notification settings - Fork 8
/
pretrain.py
69 lines (55 loc) · 2.58 KB
/
pretrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
'''Main pretraining script.'''
import hydra
@hydra.main(config_path='conf', config_name='pretrain')
def run(config):
# Deferred imports for faster tab completion
import os
import flatten_dict
import pytorch_lightning as pl
from src import online_evaluator
from src.datasets.catalog import MULTILABEL_DATASETS, PRETRAINING_DATASETS, UNLABELED_DATASETS
from src.systems import emix, mae, shed
pl.seed_everything(config.trainer.seed)
# Saving checkpoints and logging with wandb.
flat_config = flatten_dict.flatten(config, reducer='dot')
save_dir = os.path.join(config.exp.base_dir, config.exp.name)
wandb_logger = pl.loggers.WandbLogger(project='domain-agnostic', name=config.exp.name)
wandb_logger.log_hyperparams(flat_config)
lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='epoch')
callbacks = [lr_monitor, pl.callbacks.ModelCheckpoint(dirpath=save_dir, every_n_train_steps=2000, save_top_k=-1)]
assert config.dataset.name in PRETRAINING_DATASETS, f'{config.dataset.name} not one of {PRETRAINING_DATASETS}.'
if config.algorithm == 'emix':
system = emix.EMixSystem(config)
elif config.algorithm == 'shed':
system = shed.ShEDSystem(config)
elif config.algorithm == 'mae':
system = mae.MAESystem(config)
else:
raise ValueError(f'Unimplemented algorithm config.algorithm={config.algorithm}.')
# Online evaluator for labeled datasets using the Domain Agnostic Transformer.
if config.dataset.name not in UNLABELED_DATASETS and config.model == 'transformer':
ssl_online_evaluator = online_evaluator.SSLOnlineEvaluator(
dataset=config.dataset.name,
z_dim=config.model.kwargs.dim,
num_classes=system.dataset.num_classes(),
multi_label=(config.dataset.name in MULTILABEL_DATASETS),
)
callbacks += [ssl_online_evaluator]
# PyTorch Lightning Trainer.
trainer = pl.Trainer(
default_root_dir=save_dir,
logger=wandb_logger,
gpus=str(config.gpus), # GPU indices
max_steps=config.trainer.max_steps,
min_steps=config.trainer.max_steps,
resume_from_checkpoint=config.trainer.resume_from_checkpoint,
val_check_interval=config.trainer.val_check_interval,
limit_val_batches=config.trainer.limit_val_batches,
callbacks=callbacks,
# weights_summary=config.trainer.weights_summary,
gradient_clip_val=config.trainer.gradient_clip_val,
precision=config.trainer.precision,
)
trainer.fit(system)
if __name__ == '__main__':
run()