Skip to content

Commit

Permalink
deepspeed-fork content for 1.17.1
Browse files Browse the repository at this point in the history
Signed-off-by: SW publisher <[email protected]>
  • Loading branch information
SW publisher authored and Jenkins committed Aug 25, 2024
1 parent ce78a63 commit e3078cb
Show file tree
Hide file tree
Showing 240 changed files with 13,808 additions and 2,605 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ repos:
name: check-torchdist
entry: ./scripts/check-torchdist.py
language: python
exclude: ^(deepspeed/comm/|docs/|benchmarks/|scripts/check-torchdist.py|deepspeed/moe/sharded_moe.py|deepspeed/runtime/comm/coalesced_collectives.py|deepspeed/elasticity/elastic_agent.py|deepspeed/launcher/launch.py|tests/unit/comm/test_dist.py)
exclude: ^(deepspeed/comm/|docs/|benchmarks/|scripts/check-torchdist.py|deepspeed/moe/sharded_moe.py|deepspeed/runtime/comm/coalesced_collectives.py|deepspeed/elasticity/elastic_agent.py|deepspeed/launcher/launch.py|tests/unit/comm/test_dist.py|deepspeed/runtime/zero/utils.py|deepspeed/tools/pg_sim/ut/base.py|deepspeed/tools/pg_sim/pg.py|.ci/unit_tests/)
# Specific deepspeed/ files are excluded for now until we wrap ProcessGroup in deepspeed.comm

- repo: local
Expand Down
6 changes: 5 additions & 1 deletion accelerator/abstract_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def manual_seed_all(self, seed):
...

@abc.abstractmethod
def initial_seed(self, seed):
def initial_seed(self):
...

@abc.abstractmethod
Expand Down Expand Up @@ -280,6 +280,10 @@ def create_op_builder(self, class_name):
def get_op_builder(self, class_name):
...

@abc.abstractmethod
def get_compile_backend(self):
...

@abc.abstractmethod
def build_extension(self):
...
Expand Down
7 changes: 5 additions & 2 deletions accelerator/cpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def manual_seed(self, seed):
def manual_seed_all(self, seed):
return torch.manual_seed(seed)

def initial_seed(self, seed):
return torch.initial_seed(seed)
def initial_seed(self):
return torch.initial_seed()

def default_generator(self, device_index):
return torch.default_generator
Expand Down Expand Up @@ -302,3 +302,6 @@ def build_extension(self):

def export_envs(self):
return []

def get_compile_backend(self):
return "inductor"
7 changes: 5 additions & 2 deletions accelerator/cuda_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def manual_seed(self, seed):
def manual_seed_all(self, seed):
return torch.cuda.manual_seed_all(seed)

def initial_seed(self, seed):
return torch.cuda.initial_seed(seed)
def initial_seed(self):
return torch.cuda.initial_seed()

def default_generator(self, device_index):
return torch.cuda.default_generators[device_index]
Expand Down Expand Up @@ -360,3 +360,6 @@ def build_extension(self):

def export_envs(self):
return ['NCCL']

def get_compile_backend(self):
return "inductor"
22 changes: 16 additions & 6 deletions accelerator/hpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ def handles_memory_backpressure(self):
return True

def device_name(self, device_index=None):
if device_index is None:
return 'hpu'
return 'hpu:{}'.format(device_index)
# ignoring device_index.
return 'hpu'

def device(self, device_index=None):
return torch.device(self.device_name(device_index))
Expand Down Expand Up @@ -73,13 +72,13 @@ def get_rng_state(self, device_index=None):
return self.hpu.random.get_rng_state()

def manual_seed(self, seed):
self.hpu.random.manual_seed(seed)
return self.hpu.random.manual_seed(seed)

def manual_seed_all(self, seed):
self.hpu.random.manual_seed_all(seed)

def initial_seed(self, seed):
self.hpu.random.initial_seed(seed)
def initial_seed(self):
return self.hpu.random.initial_seed()

def default_generator(self, device_index):
return self.hpu.random.default_generators[device_index]
Expand Down Expand Up @@ -288,6 +287,17 @@ def get_op_builder(self, class_name):
else:
return self.class_dict['NotImplementedBuilder'] if 'NotImplementedBuilder' in self.class_dict else None

def get_compile_backend(self):
return "hpu_backend"

#shall be removed once moving to torch.compile
def wrap_in_hpu_graph(self, module):
if self.hpu.is_lazy():
module = self.hpu.wrap_in_hpu_graph(module)
else:
print("Warning: hpu graphs in eager mode is not supported, ignoring")
return module

def build_extension(self):
from torch.utils.cpp_extension import BuildExtension
return BuildExtension
Expand Down
5 changes: 4 additions & 1 deletion accelerator/mps_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def manual_seed_all(self, seed):
def seed(self):
return torch.mps.seed()

def initial_seed(self, seed):
def initial_seed(self):
return

def default_generator(self, device_index):
Expand Down Expand Up @@ -258,3 +258,6 @@ def build_extension(self):

def export_envs(self):
return []

def get_compile_backend(self):
return "inductor"
7 changes: 5 additions & 2 deletions accelerator/npu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def manual_seed(self, seed):
def manual_seed_all(self, seed):
return torch.npu.manual_seed_all(seed)

def initial_seed(self, seed):
return torch.npu.initial_seed(seed)
def initial_seed(self):
return torch.npu.initial_seed()

def default_generator(self, device_index):
return torch.npu.default_generators[device_index]
Expand Down Expand Up @@ -278,3 +278,6 @@ def build_extension(self):

def export_envs(self):
return ['ASCEND', 'HCCL', 'LD_LIBRARY', 'PATH']

def get_compile_backend(self):
return "inductor"
4 changes: 2 additions & 2 deletions accelerator/xpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def manual_seed(self, seed):
def manual_seed_all(self, seed):
return torch.xpu.manual_seed_all(seed)

def initial_seed(self, seed):
return torch.xpu.initial_seed(seed)
def initial_seed(self):
return torch.xpu.initial_seed()

def default_generator(self, device_index):
return torch.xpu.default_generators[device_index]
Expand Down
1 change: 1 addition & 0 deletions build.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
+hpu.synapse.v1.17.1
Loading

0 comments on commit e3078cb

Please sign in to comment.