Skip to content

Commit

Permalink
Merge branch 'main' of github.com:garrett4wade/distributed_llm into f…
Browse files Browse the repository at this point in the history
…ix-contiguous-param
  • Loading branch information
garrett4wade committed Jul 1, 2024
2 parents 4aaf456 + ccfe95c commit d3bf732
Show file tree
Hide file tree
Showing 6 changed files with 2 additions and 22 deletions.
4 changes: 0 additions & 4 deletions realhf/api/core/data_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def load_shuffle_split_dataset(
util: DatasetUtility,
dataset_path: str,
dataset_builder: Optional[Callable[[], List[Dict[str, str]]]] = None,
max_num_sequences: Optional[int] = None,
):
if dataset_path is not None:
if dataset_path.endswith(".jsonl"):
Expand All @@ -80,9 +79,6 @@ def load_shuffle_split_dataset(
assert dataset_builder is not None
data = dataset_builder()

if max_num_sequences is not None:
data = data[:max_num_sequences]

datasize_per_rank = len(data) // util.world_size
shuffle_indices = get_shuffle_indices(
util.seed, datasize_per_rank * util.world_size
Expand Down
3 changes: 0 additions & 3 deletions realhf/apps/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ def main_start(args, recover_count: int = 0):
"will be saved to temporary directory of the system. "
"To change the fileroot, set the fileroot option of your choice in your CLUSTER_SPEC_PATH."
)
use_cuda_graph = os.environ.get("USE_CUDA_GRAPH", "0") == "1"

BASE_ENVIRONS = {
"PYTHONPATH": "/realhf",
Expand All @@ -152,12 +151,10 @@ def main_start(args, recover_count: int = 0):
"RECOVER_RUN": "1" if is_recover_run else "0",
"SAVE_RECOVER_STATES": "1" if save_recover_states else "0",
"CLUSTER_SPEC_PATH": cluster_spec_path if cluster_spec_path else "",
"USE_CUDA_GRAPH": "1" if use_cuda_graph else "0",
}

os.environ["IS_REMOTE"] = "0" if not force_allocation_use_cache else "1"
os.environ["REAL_PACKAGE_PATH"] = repo_path
os.environ["USE_CUDA_GRAPH"] = "1" if use_cuda_graph else "0"

# setup experiments
if args.allocation_mode == "search":
Expand Down
5 changes: 0 additions & 5 deletions realhf/experiments/common/ppo_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ class PPOHyperparameters:
:param value_norm_eps: Epsilon factor in the
denominator of exponential moving average.
:type value_norm_eps: float
:param use_cuda_graph: Whether to use CUDA graph in PPO actor generation.
:type use_cuda_graph: bool
"""

gen: GenerationHyperparameters = dataclasses.field(
Expand All @@ -117,7 +115,6 @@ class PPOHyperparameters:
)
value_norm_beta: float = 0.99995
value_norm_eps: float = 1e-5
use_cuda_graph: bool = False


@dataclasses.dataclass
Expand Down Expand Up @@ -248,8 +245,6 @@ def __post_init__(self):
raise NotImplementedError("SFT LoRA is not supported yet.")
if self.is_rew_lora or self.rew_lora_path is not None:
raise NotImplementedError("Rew LoRA is not supported yet.")
if self.ppo.use_cuda_graph:
os.environ["USE_CUDA_GRAPH"] = "1"

self.ppo_kwargs = dict(
n_minibatches=self.ppo.ppo_n_minibatches,
Expand Down
6 changes: 1 addition & 5 deletions realhf/impl/dataset/prompt_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def __init__(
dataset_path: Optional[str] = None,
dataset_builder: Optional[Callable[[], List[Dict]]] = None,
pad_to_max_length: bool = False,
max_num_sequences: Optional[int] = None,
):
"""A dataset with prompts. Usually used for PPO.
Expand All @@ -30,14 +29,11 @@ def __init__(
dataset_builder (Optional[Callable[[], List[Dict]]], optional): Alternative to dataset_path.
A callable that returns a list of dictionary. Defaults to None.
pad_to_max_length (bool): Whether to pad the prompts to max_length. Defaults to False.
max_num_sequences (int): Max number of sequences the dataset contains. Only used in tests.
"""
self._util = util
self.max_length = max_length

data = data_api.load_shuffle_split_dataset(
util, dataset_path, dataset_builder, max_num_sequences=max_num_sequences
)
data = data_api.load_shuffle_split_dataset(util, dataset_path, dataset_builder)

prompts_str = [x["prompt"] for x in data]
util.tokenizer.padding_side = "left"
Expand Down
5 changes: 0 additions & 5 deletions realhf/impl/model/backend/pipe_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,11 +1143,6 @@ def terminate_condition():
terminate_condition=terminate_condition,
)

use_cuda_graph = os.environ.get("USE_CUDA_GRAPH", "0") == "1"
if use_cuda_graph:
dist.barrier(group=constants.parallelism_group())
torch.cuda.synchronize()

if not constants.is_last_pipe_stage():
return None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def init_custom_ar() -> None:
" capability. "
)
return

_CA_HANDLE = CustomAllreduce(mp_rank, mp_world_size, full_nvlink=full_nvlink)


Expand Down

0 comments on commit d3bf732

Please sign in to comment.