Skip to content

Commit

Permalink
[Patch] Add a clear_cache_freq option in the commandline. (#63)
Browse files Browse the repository at this point in the history
* .

* .
  • Loading branch information
garrett4wade authored Sep 3, 2024
1 parent bcb4c99 commit c68c8d6
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions realhf/experiments/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ class CommonExperimentConfig(Experiment):
:type nodelist: str or None
:param seed: Random seed.
:type seed: int
:param cache_clear_freq: The cache of data transfer will be cleared after each ``cache_clear_freq`` steps.
If None, will not clear the cache. Set to a small number, e.g., 1, if OOM or CUDA OOM occurs.
:type cache_clear_freq: int or None
:param exp_ctrl: The save and evaluation control of the experiment.
:type exp_ctrl: ExperimentSaveEvalControl
"""
Expand All @@ -172,6 +175,7 @@ class CommonExperimentConfig(Experiment):
n_gpus_per_node: int = 8
nodelist: Optional[str] = None
seed: int = 1
cache_clear_freq: Optional[int] = 10
exp_ctrl: ExperimentSaveEvalControl = dataclasses.field(
default_factory=ExperimentSaveEvalControl
)
Expand Down Expand Up @@ -413,8 +417,8 @@ def _get_model_worker_configs(
seed=self.seed,
shards=[],
datasets=self.datasets,
cuda_cache_cleanliness=False,
cuda_cache_clear_freq=10,
cuda_cache_cleanliness=self.cache_clear_freq is not None,
cuda_cache_clear_freq=self.cache_clear_freq,
tokenizer_name_or_path=self.tokenizer_name_or_path,
)

Expand Down

0 comments on commit c68c8d6

Please sign in to comment.