Skip to content

Commit

Permalink
New Singularity def
Browse files Browse the repository at this point in the history
  • Loading branch information
ArneNx committed Jun 30, 2022
1 parent a6866d3 commit fdfe47d
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,4 @@ ENV/
#misc
.DS_Store
.history.*
*.sif
2 changes: 1 addition & 1 deletion Singularity.v0.4.def
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ From: ubuntu:21.10
jupyterlab \
ipykernel \
opencv-python \
ffcv \
datajoint==0.12.7

conda run -n ffcv pip install -e /src/bias_transfer
conda run -n ffcv pip install -e /src/nntransfer
conda run -n ffcv pip install -e /src/nnfabrik
conda run -n ffcv pip install -e /src/neuralpredictors
conda run -n ffcv pip install -e /src/pytorch_warmup
conda run -n ffcv pip install -e /src/ffcv

conda run -n ffcv python -m ipykernel install --user --name=ffcv

Expand Down
10 changes: 9 additions & 1 deletion nntransfer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(self, dataloaders, model, seed, uid, cb, **kwargs):
self.seed = seed

self.data_loaders = dataloaders
print(self.data_loaders["train"])
self.task_keys = dataloaders["train"].keys()
self.optimizer, self.stop_closure, self.criterion = self.get_training_controls()
self.lr_scheduler = self.prepare_lr_schedule()
Expand Down Expand Up @@ -234,8 +235,15 @@ def main_loop(
outputs, loss, targets = module.post_forward(
outputs, loss, targets, **shared_memory
)

if outputs.isinf().any():
print(outputs)
raise ValueError()
if outputs.isnan().any():
print(outputs)
raise ValueError()
loss = self.compute_loss(mode, task_key, loss, outputs, targets)
if loss.isnan():
raise ValueError()

if not self.config.show_epoch_progress or not mode not in (
"Validation",
Expand Down

0 comments on commit fdfe47d

Please sign in to comment.