-
Notifications
You must be signed in to change notification settings - Fork 10
/
train_disk_lstm.py
64 lines (51 loc) · 1.98 KB
/
train_disk_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""LSTM training script for visual tracking task."""
import pathlib
import fifteen
import tyro
from tqdm.auto import tqdm
from lib import disk, utils, validation_tracker
def main(config: disk.experiment_config.LstmExperimentConfig) -> None:
experiment = fifteen.experiments.Experiment(
data_dir=pathlib.Path("./experiments/")
/ config.experiment_identifier.format(dataset_fold=config.dataset_fold)
).clear()
experiment.write_metadata("experiment_config", config)
experiment.write_metadata("git_commit_hash", utils.get_git_commit_hash())
# Set random seed (for everything but JAX)
utils.set_random_seed(config.random_seed)
# Load dataset
train_dataloader = disk.data_loading.make_subsequence_dataloader(
config=config, train=True
)
# Helper for validation + metric-aware checkpointing
validation = validation_tracker.ValidationTracker[disk.training_lstm.TrainState](
name="val",
experiment=experiment,
compute_metrics=disk.validation_lstm.make_compute_metrics(
dataset_fold=config.dataset_fold
),
)
# Train
train_state = disk.training_lstm.TrainState.initialize(config)
for epoch in tqdm(range(config.num_epochs)):
batch: disk.data.DiskStructNormalized
for batch in train_dataloader:
# Validation + checkpointing
if train_state.steps % 500 == 0:
validation = validation.validate_log_and_checkpoint_if_best(train_state)
# Training step!
train_state, log_data = train_state.training_step(batch)
# Log to Tensorboard
experiment.log(
log_data,
step=train_state.steps,
log_scalars_every_n=10,
log_histograms_every_n=100,
)
if __name__ == "__main__":
fifteen.utils.pdb_safety_net()
config = tyro.cli(
disk.experiment_config.LstmExperimentConfig,
description=__doc__,
)
main(config)