From 7241f6e5939d774e41f8308dae1285fd33e2544d Mon Sep 17 00:00:00 2001 From: llauraa23 <142199710+llauraa23@users.noreply.github.com> Date: Sun, 24 Sep 2023 20:59:15 -0700 Subject: [PATCH] Fix the issue of parameters updated as nan in the reward model training. rw_finetuning.py Language model is loaded in torch.float16. Adam optimizer adds epsilon to avoid zero denominator. Note, torch.float 16 will round any number smaller than 6e-8 to 0. Do not change episolon to smaller than 6e-8. --- pykoi/rlhf/rw_finetuning.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pykoi/rlhf/rw_finetuning.py b/pykoi/rlhf/rw_finetuning.py index 000a52b..ecaba1e 100644 --- a/pykoi/rlhf/rw_finetuning.py +++ b/pykoi/rlhf/rw_finetuning.py @@ -94,7 +94,9 @@ def __init__(self, logging_strategy=rlhf_config.logging_strategy, logging_steps=rlhf_config.logging_steps, # optim=rlhf_config.optim, - # lr_scheduler_type=rlhf_config.lr_scheduler_type_rw + # lr_scheduler_type=rlhf_config.lr_scheduler_type_rw, + adam_epsilon = 1e-7 # Language model is loaded in torch.float16. Adam optimizer adds epsilon to avoid zero denominator. + # NOTE: torch.float 16 will round any number smaller than 6e-8 to 0. Do not change episolon to smaller than 6e-8. ) self.torch_dtype = torch.bfloat16 if rlhf_config.bf16 else torch.float16 # self.torch_dtype = torch.bfloat16 if bf16 else (torch.float16 if fp16 else torch.float32)