Skip to content

Commit

Permalink
remove useless comment
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyiou committed Sep 18, 2024
1 parent 5a44f8c commit 6e1e90c
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions deepspeed/runtime/zero/stage_1_and_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def __init__(self,
self.timers = timers

self.reduce_scatter = reduce_scatter
print(f"{self.reduce_scatter=}")

self.overlap_comm = overlap_comm

Expand Down Expand Up @@ -203,6 +204,7 @@ def __init__(self,

# CPU-Offload requires contiguous gradients
self.contiguous_gradients = contiguous_gradients or self.cpu_offload
print(f"{self.contiguous_gradients=}")

self.has_moe_layers = has_moe_layers
if self.has_moe_layers:
Expand Down Expand Up @@ -1042,6 +1044,9 @@ def allreduce_and_scatter(self, bucket, numel_per_bucket=500000000, log=None, di
bucket_ranks=small_bucket_ranks)

def average_tensor(self, tensor):
# import traceback2 as traceback
# traceback.print_stack()
# exit()
if self.overlap_comm:
stream = self.reduction_stream
if not get_accelerator().resolves_data_dependency():
Expand Down Expand Up @@ -1070,9 +1075,6 @@ def average_tensor(self, tensor):

process_group = self.dp_process_group

#Averages gradients at parameter level if ipg has a moe param
#Otherwise averaging is done at the entire buffer level at the end of the loop
# MoE param have different groups
if self.ipg_bucket_has_moe_params:
process_group = self.expert_dp_process_group[param.group_name] if is_moe_param(
param) else self.dp_process_group
Expand Down

0 comments on commit 6e1e90c

Please sign in to comment.