Skip to content

Commit

Permalink
Merge pull request #69 from llauraa23/main
Browse files Browse the repository at this point in the history
Fix the issue of parameters updated as nan during reward model training.
  • Loading branch information
Cambio ML authored Sep 25, 2023
2 parents 1a8be3a + 7241f6e commit 9b64d82
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion pykoi/rlhf/rw_finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ def __init__(self,
logging_strategy=rlhf_config.logging_strategy,
logging_steps=rlhf_config.logging_steps,
# optim=rlhf_config.optim,
# lr_scheduler_type=rlhf_config.lr_scheduler_type_rw
# lr_scheduler_type=rlhf_config.lr_scheduler_type_rw,
adam_epsilon = 1e-7 # Language model is loaded in torch.float16. Adam optimizer adds epsilon to avoid zero denominator.
# NOTE: torch.float 16 will round any number smaller than 6e-8 to 0. Do not change episolon to smaller than 6e-8.
)
self.torch_dtype = torch.bfloat16 if rlhf_config.bf16 else torch.float16
# self.torch_dtype = torch.bfloat16 if bf16 else (torch.float16 if fp16 else torch.float32)
Expand Down

0 comments on commit 9b64d82

Please sign in to comment.