From 7241f6e5939d774e41f8308dae1285fd33e2544d Mon Sep 17 00:00:00 2001
From: llauraa23 <142199710+llauraa23@users.noreply.github.com>
Date: Sun, 24 Sep 2023 20:59:15 -0700
Subject: [PATCH] Fix the issue of parameters updated as nan in the reward
 model training. rw_finetuning.py

 Language model is loaded in torch.float16. Adam optimizer adds epsilon to avoid zero denominator. Note, torch.float 16 will round any number smaller than 6e-8 to 0. Do not change episolon to smaller than 6e-8.
---
 pykoi/rlhf/rw_finetuning.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pykoi/rlhf/rw_finetuning.py b/pykoi/rlhf/rw_finetuning.py
index 000a52b..ecaba1e 100644
--- a/pykoi/rlhf/rw_finetuning.py
+++ b/pykoi/rlhf/rw_finetuning.py
@@ -94,7 +94,9 @@ def __init__(self,
             logging_strategy=rlhf_config.logging_strategy,
             logging_steps=rlhf_config.logging_steps,
             # optim=rlhf_config.optim,
-            # lr_scheduler_type=rlhf_config.lr_scheduler_type_rw
+            # lr_scheduler_type=rlhf_config.lr_scheduler_type_rw,
+            adam_epsilon = 1e-7 # Language model is loaded in torch.float16. Adam optimizer adds epsilon to avoid zero denominator.
+                                # NOTE: torch.float 16 will round any number smaller than 6e-8 to 0. Do not change episolon to smaller than 6e-8. 
         )
         self.torch_dtype = torch.bfloat16 if rlhf_config.bf16 else torch.float16
         # self.torch_dtype = torch.bfloat16 if bf16 else (torch.float16 if fp16 else torch.float32)