From e3173f83cc6a4f1efbfa26d50e88796b5012679e Mon Sep 17 00:00:00 2001 From: Deepanshu Gupta Date: Wed, 5 Apr 2023 01:03:20 +0530 Subject: [PATCH 1/2] Error in comment of log_weights.py There was an error in line 77 comment instead of you there should be user --- common/log_weights.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/log_weights.py b/common/log_weights.py index d2c8e21..6794fc9 100644 --- a/common/log_weights.py +++ b/common/log_weights.py @@ -74,7 +74,7 @@ def log_ebc_norms( : min(sample_size, emb_weight_tensor.shape[0]) ] # WARNING: .cpu() transfer executes malloc that may be the cause of memory leaks - # Change sample_size if the you observe frequent OOM errors or remove weight logging. + # Change sample_size if the user observe frequent OOM errors or remove weight logging. norms = emb_weight_tensor[sample_mask].cpu().norm(dim=1).to(torch.float32) logging.info(f"Norm shape before reduction: {norms.shape}", rank=-1) norms = norms.mean().to(torch.device(f"cuda:{dist.get_rank()}")) From 8fb8f2329e709d050c3146d9785479566589a435 Mon Sep 17 00:00:00 2001 From: Deepanshu Gupta Date: Sun, 9 Apr 2023 11:11:43 +0530 Subject: [PATCH 2/2] Update log_weights.py --- common/log_weights.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/log_weights.py b/common/log_weights.py index 6794fc9..2007891 100644 --- a/common/log_weights.py +++ b/common/log_weights.py @@ -74,7 +74,7 @@ def log_ebc_norms( : min(sample_size, emb_weight_tensor.shape[0]) ] # WARNING: .cpu() transfer executes malloc that may be the cause of memory leaks - # Change sample_size if the user observe frequent OOM errors or remove weight logging. + # Change sample_size if user observe frequent OOM errors or remove weight logging. norms = emb_weight_tensor[sample_mask].cpu().norm(dim=1).to(torch.float32) logging.info(f"Norm shape before reduction: {norms.shape}", rank=-1) norms = norms.mean().to(torch.device(f"cuda:{dist.get_rank()}"))