-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into costineseanu/windows_inference_build
- Loading branch information
Showing
8 changed files
with
151 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# DeepSpeed Team | ||
from .builder import SYCLOpBuilder | ||
|
||
|
||
class FlashAttentionBuilderObject(): | ||
|
||
def __init__(self): | ||
pass | ||
|
||
# general functions | ||
def flash_attn_func_v2(self, q, k, v, dropout_p, softmax_scale, is_causal): | ||
try: | ||
import torch | ||
import intel_extension_for_pytorch # noqa | ||
return torch.nn.functional.scaled_dot_product_attention(q, | ||
k, | ||
v, | ||
dropout_p=dropout_p, | ||
is_causal=is_causal, | ||
scale=softmax_scale) | ||
except ImportError: | ||
raise ImportError( | ||
"Please install pytorch and intel_extension_for_pytorch to include scaled dot product attention.") | ||
|
||
|
||
class FlashAttentionBuilder(SYCLOpBuilder): | ||
BUILD_VAR = "DS_BUILD_FlashAttention" | ||
NAME = "flash_attn" | ||
|
||
def __init__(self, name=None): | ||
name = self.NAME if name is None else name | ||
super().__init__(name=name) | ||
|
||
def absolute_name(self): | ||
return f'deepspeed.ops.{self.NAME}_op' | ||
|
||
def sources(self): | ||
return | ||
|
||
def include_paths(self): | ||
return [] | ||
|
||
def extra_ldflags(self): | ||
return [] | ||
|
||
def cxx_args(self): | ||
return [] | ||
|
||
def load(self): | ||
return FlashAttentionBuilderObject() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# DeepSpeed Team | ||
from .builder import SYCLOpBuilder | ||
|
||
|
||
class InferenceBuilder(SYCLOpBuilder): | ||
BUILD_VAR = "DS_BUILD_TRANSFORMER_INFERENCE" | ||
NAME = "transformer_inference" | ||
|
||
def __init__(self, name=None): | ||
name = self.NAME if name is None else name | ||
super().__init__(name=name) | ||
|
||
def absolute_name(self): | ||
return f'deepspeed.ops.transformer.inference.{self.NAME}_op' | ||
|
||
def sources(self): | ||
return | ||
|
||
def include_paths(self): | ||
return [] | ||
|
||
def extra_ldflags(self): | ||
return [] | ||
|
||
def cxx_args(self): | ||
return [] | ||
|
||
def load(self): | ||
try: | ||
import intel_extension_for_pytorch.deepspeed | ||
return intel_extension_for_pytorch.deepspeed.transformer_inference.transformer_inference | ||
except ImportError: | ||
raise ImportError("Please install intel-extension-for-pytorch >= 2.1.30 to include DeepSpeed kernels.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# DeepSpeed Team | ||
|
||
from .builder import SYCLOpBuilder | ||
|
||
|
||
class NotImplementedBuilder(SYCLOpBuilder): | ||
BUILD_VAR = "DS_BUILD_NOT_IMPLEMENTED" | ||
NAME = "deepspeed_not_implemented" | ||
|
||
def __init__(self, name=None): | ||
name = self.NAME if name is None else name | ||
super().__init__(name=name) | ||
|
||
def absolute_name(self): | ||
return f'deepspeed.ops.{self.NAME}_op' | ||
|
||
def load(self, verbose=True): | ||
raise ValueError("This op had not been implemented on XPU backend.") | ||
|
||
def sources(self): | ||
return [] | ||
|
||
def cxx_args(self): | ||
return [] | ||
|
||
def extra_ldflags(self): | ||
return [] | ||
|
||
def include_paths(self): | ||
return [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.14.4 | ||
0.14.5 |