Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
5bcc463d
Commit
5bcc463d
authored
May 29, 2023
by
aiss
Browse files
update v0.9.2
parent
ac5fbab4
Changes
554
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
227 additions
and
286 deletions
+227
-286
examples/README.md
examples/README.md
+2
-2
hipify_20230511113250.png
hipify_20230511113250.png
+0
-0
install.sh
install.sh
+2
-2
op_builder/__init__.py
op_builder/__init__.py
+9
-6
op_builder/all_ops.py
op_builder/all_ops.py
+5
-3
op_builder/async_io.py
op_builder/async_io.py
+15
-30
op_builder/builder.py
op_builder/builder.py
+97
-135
op_builder/cpu_adagrad.py
op_builder/cpu_adagrad.py
+8
-11
op_builder/cpu_adam.py
op_builder/cpu_adam.py
+11
-13
op_builder/fused_adam.py
op_builder/fused_adam.py
+8
-8
op_builder/fused_lamb.py
op_builder/fused_lamb.py
+9
-12
op_builder/quantizer.py
op_builder/quantizer.py
+4
-1
op_builder/random_ltd.py
op_builder/random_ltd.py
+8
-11
op_builder/sparse_attn.py
op_builder/sparse_attn.py
+11
-15
op_builder/spatial_inference.py
op_builder/spatial_inference.py
+7
-7
op_builder/stochastic_transformer.py
op_builder/stochastic_transformer.py
+5
-3
op_builder/transformer.py
op_builder/transformer.py
+10
-15
op_builder/transformer_inference.py
op_builder/transformer_inference.py
+7
-8
op_builder/utils.py
op_builder/utils.py
+5
-3
release/bump_patch_version.py
release/bump_patch_version.py
+4
-1
No files found.
Too many changes to show.
To preserve performance only
554 of 554+
files are displayed.
Plain diff
Email patch
examples/README.md
View file @
5bcc463d
...
...
@@ -4,6 +4,6 @@ If you are looking for examples using DeepSpeed please see the following resourc
1.
[
DeepSpeedExamples
](
https://github.com/microsoft/DeepSpeedExamples
)
2.
[
Megatron-DeepSpeed
](
https://github.com/microsoft/Megatron-DeepSpeed
)
3.
[
DeepSpeed + AzureML
](
https://github.com/Azure/azureml-examples/tree/main/python-sdk/workflows/train/deepspeed
)
3.
[
DeepSpeed + AzureML
](
https://github.com/Azure/azureml-examples/tree/main/
v1/
python-sdk/workflows/train/deepspeed
)
4.
[
DeepSpeed + Hugging Face Transformers Integration
](
https://huggingface.co/docs/transformers/main_classes/deepspeed
)
5.
[
DeepSpeed + PyTorch Lightning
](
https://
pytorch-
lightning.
readthedocs.io/en/latest/api/pytorch_
lightning.utilities.deepspeed.html
)
5.
[
DeepSpeed + PyTorch Lightning
](
https://lightning.
ai/docs/pytorch/stable/api/
lightning.
pytorch.
utilities.deepspeed.html
)
hipify_20230511113250.png
deleted
100755 → 0
View file @
ac5fbab4
33.4 KB
install.sh
View file @
5bcc463d
#!/
bin/
bash
#!/
usr/bin/env
bash
set
-e
err_report
()
{
...
...
@@ -121,7 +121,7 @@ rm_if_exist() {
if
[
-f
$1
]
;
then
rm
$VERBOSE
$1
elif
[
-d
$1
]
;
then
rm
-r
$VERBOSE
$1
rm
-r
f
$VERBOSE
$1
fi
}
...
...
op_builder/__init__.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import
sys
import
os
import
pkgutil
...
...
@@ -8,6 +10,9 @@ import importlib
from
.builder
import
get_default_compute_capabilities
,
OpBuilder
# Do not remove, required for abstract accelerator to detect if we have a deepspeed or 3p op_builder
__deepspeed__
=
True
# List of all available op builders from deepspeed op_builder
try
:
import
deepspeed.ops.op_builder
# noqa: F401
...
...
@@ -42,9 +47,7 @@ for _, module_name, _ in pkgutil.iter_modules([os.path.dirname(this_module.__fil
if
module_name
!=
'all_ops'
and
module_name
!=
'builder'
:
module
=
importlib
.
import_module
(
f
".
{
module_name
}
"
,
package
=
op_builder_dir
)
for
member_name
in
module
.
__dir__
():
if
member_name
.
endswith
(
'Builder'
)
and
member_name
!=
"OpBuilder"
and
member_name
!=
"CUDAOpBuilder"
:
if
member_name
.
endswith
(
'Builder'
)
and
member_name
!=
"OpBuilder"
and
member_name
!=
"CUDAOpBuilder"
:
# assign builder name to variable with same name
# the following is equivalent to i.e. TransformerBuilder = "TransformerBuilder"
this_module
.
__dict__
[
member_name
]
=
builder_closure
(
member_name
)
op_builder/all_ops.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import
os
import
pkgutil
import
importlib
...
...
op_builder/async_io.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import
distutils.spawn
import
subprocess
...
...
@@ -19,14 +21,10 @@ class AsyncIOBuilder(OpBuilder):
def
sources
(
self
):
return
[
'csrc/aio/py_lib/deepspeed_py_copy.cpp'
,
'csrc/aio/py_lib/py_ds_aio.cpp'
,
'csrc/aio/py_lib/deepspeed_py_aio.cpp'
,
'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp'
,
'csrc/aio/py_lib/deepspeed_aio_thread.cpp'
,
'csrc/aio/common/deepspeed_aio_utils.cpp'
,
'csrc/aio/common/deepspeed_aio_common.cpp'
,
'csrc/aio/common/deepspeed_aio_types.cpp'
,
'csrc/aio/py_lib/deepspeed_py_copy.cpp'
,
'csrc/aio/py_lib/py_ds_aio.cpp'
,
'csrc/aio/py_lib/deepspeed_py_aio.cpp'
,
'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp'
,
'csrc/aio/py_lib/deepspeed_aio_thread.cpp'
,
'csrc/aio/common/deepspeed_aio_utils.cpp'
,
'csrc/aio/common/deepspeed_aio_common.cpp'
,
'csrc/aio/common/deepspeed_aio_types.cpp'
,
'csrc/aio/py_lib/deepspeed_pin_tensor.cpp'
]
...
...
@@ -52,21 +50,14 @@ class AsyncIOBuilder(OpBuilder):
]
def
extra_ldflags
(
self
):
#aiss
#return ['-laio']
return
[
'-laio'
,
'-liomp5'
]
def
check_for_libaio_pkg
(
self
):
libs
=
dict
(
dpkg
=
[
"-l"
,
"libaio-dev"
,
"apt"
],
pacman
=
[
"-Q"
,
"libaio"
,
"pacman"
],
rpm
=
[
"-q"
,
"libaio-devel"
,
"yum"
],
dpkg
=
[
"-l"
,
"libaio-dev"
,
"apt"
],
pacman
=
[
"-Q"
,
"libaio"
,
"pacman"
],
rpm
=
[
"-q"
,
"libaio-devel"
,
"yum"
],
)
found
=
False
...
...
@@ -75,15 +66,11 @@ class AsyncIOBuilder(OpBuilder):
path
=
distutils
.
spawn
.
find_executable
(
pkgmgr
)
if
path
is
not
None
:
cmd
=
f
"
{
pkgmgr
}
{
flag
}
{
lib
}
"
result
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
)
result
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
)
if
result
.
wait
()
==
0
:
found
=
True
else
:
self
.
warning
(
f
"
{
self
.
NAME
}
: please install the
{
lib
}
package with
{
tool
}
"
)
self
.
warning
(
f
"
{
self
.
NAME
}
: please install the
{
lib
}
package with
{
tool
}
"
)
break
return
found
...
...
@@ -95,9 +82,7 @@ class AsyncIOBuilder(OpBuilder):
# respectively to specify the directories for libaio.h and libaio.so.
aio_compatible
=
self
.
has_function
(
'io_submit'
,
(
'aio'
,
))
if
verbose
and
not
aio_compatible
:
self
.
warning
(
f
"
{
self
.
NAME
}
requires the dev libaio .so object and headers but these were not found."
)
self
.
warning
(
f
"
{
self
.
NAME
}
requires the dev libaio .so object and headers but these were not found."
)
# Check for the libaio package via known package managers
# to print suggestions on which package to install.
...
...
op_builder/builder.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import
os
import
sys
import
time
...
...
@@ -27,25 +29,18 @@ DEFAULT_COMPUTE_CAPABILITIES = "6.0;6.1;7.0"
try
:
import
torch
except
ImportError
:
print
(
f
"
{
WARNING
}
unable to import torch, please install it if you want to pre-compile any deepspeed ops."
)
print
(
f
"
{
WARNING
}
unable to import torch, please install it if you want to pre-compile any deepspeed ops."
)
else
:
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
0
])
TORCH_MINOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
1
])
def
installed_cuda_version
(
name
=
""
):
import
torch.cuda
if
not
torch
.
cuda
.
is_available
():
return
0
,
0
import
torch.utils.cpp_extension
cuda_home
=
torch
.
utils
.
cpp_extension
.
CUDA_HOME
assert
cuda_home
is
not
None
,
"CUDA_HOME does not exist, unable to compile CUDA op(s)"
# Ensure there is not a cuda version mismatch between torch and nvcc compiler
output
=
subprocess
.
check_output
([
cuda_home
+
"/bin/nvcc"
,
"-V"
],
universal_newlines
=
True
)
output
=
subprocess
.
check_output
([
cuda_home
+
"/bin/nvcc"
,
"-V"
],
universal_newlines
=
True
)
output_split
=
output
.
split
()
release_idx
=
output_split
.
index
(
"release"
)
release
=
output_split
[
release_idx
+
1
].
replace
(
','
,
''
).
split
(
"."
)
...
...
@@ -57,8 +52,7 @@ def installed_cuda_version(name=""):
def
get_default_compute_capabilities
():
compute_caps
=
DEFAULT_COMPUTE_CAPABILITIES
import
torch.utils.cpp_extension
if
torch
.
utils
.
cpp_extension
.
CUDA_HOME
is
not
None
and
installed_cuda_version
(
)[
0
]
>=
11
:
if
torch
.
utils
.
cpp_extension
.
CUDA_HOME
is
not
None
and
installed_cuda_version
()[
0
]
>=
11
:
if
installed_cuda_version
()[
0
]
==
11
and
installed_cuda_version
()[
1
]
==
0
:
# Special treatment of CUDA 11.0 because compute_86 is not supported.
compute_caps
+=
";8.0"
...
...
@@ -75,37 +69,25 @@ cuda_minor_mismatch_ok = {
"10.1"
,
"10.2"
,
],
11
:
[
"11.0"
,
"11.1"
,
"11.2"
,
"11.3"
,
"11.4"
,
"11.5"
,
"11.6"
,
"11.7"
,
"11.8"
],
11
:
[
"11.0"
,
"11.1"
,
"11.2"
,
"11.3"
,
"11.4"
,
"11.5"
,
"11.6"
,
"11.7"
,
"11.8"
],
}
def
assert_no_cuda_mismatch
(
name
=
""
):
cuda_major
,
cuda_minor
=
installed_cuda_version
(
name
)
if
cuda_minor
==
0
and
cuda_major
==
0
:
return
False
sys_cuda_version
=
f
'
{
cuda_major
}
.
{
cuda_minor
}
'
torch_cuda_version
=
"."
.
join
(
torch
.
version
.
cuda
.
split
(
'.'
)[:
2
])
# This is a show-stopping error, should probably not proceed past this
if
sys_cuda_version
!=
torch_cuda_version
:
if
(
cuda_major
in
cuda_minor_mismatch_ok
and
sys_cuda_version
in
cuda_minor_mismatch_ok
[
cuda_major
]
if
(
cuda_major
in
cuda_minor_mismatch_ok
and
sys_cuda_version
in
cuda_minor_mismatch_ok
[
cuda_major
]
and
torch_cuda_version
in
cuda_minor_mismatch_ok
[
cuda_major
]):
print
(
f
"Installed CUDA version
{
sys_cuda_version
}
does not match the "
f
"version torch was compiled with
{
torch
.
version
.
cuda
}
"
"but since the APIs are compatible, accepting this combination"
)
return
True
raise
Exception
(
f
">- DeepSpeed Op Builder: Installed CUDA version
{
sys_cuda_version
}
does not match the "
f
"version torch was compiled with
{
torch
.
version
.
cuda
}
, unable to compile "
"cuda/cpp extensions without a matching cuda version."
)
raise
Exception
(
f
">- DeepSpeed Op Builder: Installed CUDA version
{
sys_cuda_version
}
does not match the "
f
"version torch was compiled with
{
torch
.
version
.
cuda
}
, unable to compile "
"cuda/cpp extensions without a matching cuda version."
)
return
True
...
...
@@ -142,12 +124,11 @@ class OpBuilder(ABC):
install_torch_version
=
torch_info
[
'version'
]
current_torch_version
=
"."
.
join
(
torch
.
__version__
.
split
(
'.'
)[:
2
])
if
install_torch_version
!=
current_torch_version
:
raise
RuntimeError
(
"PyTorch version mismatch! DeepSpeed ops were compiled and installed "
"with a different version than what is being used at runtime. "
f
"Please re-install DeepSpeed or switch torch versions. "
f
"Install torch version=
{
install_torch_version
}
, "
f
"Runtime torch version=
{
current_torch_version
}
"
)
raise
RuntimeError
(
"PyTorch version mismatch! DeepSpeed ops were compiled and installed "
"with a different version than what is being used at runtime. "
f
"Please re-install DeepSpeed or switch torch versions. "
f
"Install torch version=
{
install_torch_version
}
, "
f
"Runtime torch version=
{
current_torch_version
}
"
)
@
staticmethod
def
validate_torch_op_version
(
torch_info
):
...
...
@@ -155,22 +136,20 @@ class OpBuilder(ABC):
current_cuda_version
=
"."
.
join
(
torch
.
version
.
cuda
.
split
(
'.'
)[:
2
])
install_cuda_version
=
torch_info
[
'cuda_version'
]
if
install_cuda_version
!=
current_cuda_version
:
raise
RuntimeError
(
"CUDA version mismatch! DeepSpeed ops were compiled and installed "
"with a different version than what is being used at runtime. "
f
"Please re-install DeepSpeed or switch torch versions. "
f
"Install CUDA version=
{
install_cuda_version
}
, "
f
"Runtime CUDA version=
{
current_cuda_version
}
"
)
raise
RuntimeError
(
"CUDA version mismatch! DeepSpeed ops were compiled and installed "
"with a different version than what is being used at runtime. "
f
"Please re-install DeepSpeed or switch torch versions. "
f
"Install CUDA version=
{
install_cuda_version
}
, "
f
"Runtime CUDA version=
{
current_cuda_version
}
"
)
else
:
current_hip_version
=
"."
.
join
(
torch
.
version
.
hip
.
split
(
'.'
)[:
2
])
install_hip_version
=
torch_info
[
'hip_version'
]
if
install_hip_version
!=
current_hip_version
:
raise
RuntimeError
(
"HIP version mismatch! DeepSpeed ops were compiled and installed "
"with a different version than what is being used at runtime. "
f
"Please re-install DeepSpeed or switch torch versions. "
f
"Install HIP version=
{
install_hip_version
}
, "
f
"Runtime HIP version=
{
current_hip_version
}
"
)
raise
RuntimeError
(
"HIP version mismatch! DeepSpeed ops were compiled and installed "
"with a different version than what is being used at runtime. "
f
"Please re-install DeepSpeed or switch torch versions. "
f
"Install HIP version=
{
install_hip_version
}
, "
f
"Runtime HIP version=
{
current_hip_version
}
"
)
@
staticmethod
def
is_rocm_pytorch
():
...
...
@@ -184,8 +163,7 @@ class OpBuilder(ABC):
pass
else
:
if
TORCH_MAJOR
>
1
or
(
TORCH_MAJOR
==
1
and
TORCH_MINOR
>=
5
):
_is_rocm_pytorch
=
hasattr
(
torch
.
version
,
'hip'
)
and
torch
.
version
.
hip
is
not
None
_is_rocm_pytorch
=
hasattr
(
torch
.
version
,
'hip'
)
and
torch
.
version
.
hip
is
not
None
if
_is_rocm_pytorch
:
from
torch.utils.cpp_extension
import
ROCM_HOME
_is_rocm_pytorch
=
ROCM_HOME
is
not
None
...
...
@@ -240,7 +218,6 @@ class OpBuilder(ABC):
return
True
def
extra_ldflags
(
self
):
#aiss
#return []
return
[
'-liomp5'
]
...
...
@@ -248,10 +225,7 @@ class OpBuilder(ABC):
valid
=
False
check_cmd
=
'dpkg -l'
for
lib
in
libraries
:
result
=
subprocess
.
Popen
(
f
'dpkg -l
{
lib
}
'
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
)
result
=
subprocess
.
Popen
(
f
'dpkg -l
{
lib
}
'
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
)
valid
=
valid
or
result
.
wait
()
==
0
return
valid
...
...
@@ -282,9 +256,7 @@ class OpBuilder(ABC):
tempdir
=
tempfile
.
mkdtemp
()
# Define a simple C program that calls the function in question
prog
=
"void %s(void); int main(int argc, char** argv) { %s(); return 0; }"
%
(
funcname
,
funcname
)
prog
=
"void %s(void); int main(int argc, char** argv) { %s(); return 0; }"
%
(
funcname
,
funcname
)
# Write the test program to a file.
filename
=
os
.
path
.
join
(
tempdir
,
'test.c'
)
...
...
@@ -305,16 +277,13 @@ class OpBuilder(ABC):
# Attempt to compile the C program into an object file.
cflags
=
shlex
.
split
(
os
.
environ
.
get
(
'CFLAGS'
,
""
))
objs
=
compiler
.
compile
([
filename
],
output_dir
=
output_dir
,
extra_preargs
=
self
.
strip_empty_entries
(
cflags
))
objs
=
compiler
.
compile
([
filename
],
output_dir
=
output_dir
,
extra_preargs
=
self
.
strip_empty_entries
(
cflags
))
# Attempt to link the object file into an executable.
# Be sure to tack on any libraries that have been specified.
ldflags
=
shlex
.
split
(
os
.
environ
.
get
(
'LDFLAGS'
,
""
))
compiler
.
link_executable
(
objs
,
os
.
path
.
join
(
tempdir
,
'a.out'
),
os
.
path
.
join
(
tempdir
,
'a.out'
),
extra_preargs
=
self
.
strip_empty_entries
(
ldflags
),
libraries
=
libraries
)
...
...
@@ -358,9 +327,8 @@ class OpBuilder(ABC):
try
:
cpu_info
=
get_cpu_info
()
except
Exception
as
e
:
self
.
warning
(
f
"
{
self
.
name
}
attempted to use `py-cpuinfo` but failed (exception type:
{
type
(
e
)
}
,
{
e
}
), "
"falling back to `lscpu` to get this information."
)
self
.
warning
(
f
"
{
self
.
name
}
attempted to use `py-cpuinfo` but failed (exception type:
{
type
(
e
)
}
,
{
e
}
), "
"falling back to `lscpu` to get this information."
)
cpu_info
=
self
.
_backup_cpuinfo
()
if
cpu_info
is
None
:
return
"-march=native"
...
...
@@ -372,23 +340,23 @@ class OpBuilder(ABC):
def
is_cuda_enable
(
self
):
try
:
#assert_no_cuda_mismatch(self.name)
#return '-D__ENABLE_CUDA__'
#aiss
if
torch
.
cuda
.
is_available
():
return
'-D__ENABLE_CUDA__'
except
:
print
(
f
"
{
WARNING
}
{
self
.
name
}
torch.cuda is missing, only cpu ops can be compiled!"
)
except
BaseException
:
print
(
f
"
{
WARNING
}
{
self
.
name
}
cuda is missing or is incompatible with installed torch, "
"only cpu ops can be compiled!"
)
return
'-D__DISABLE_CUDA__'
return
'-D__DISABLE_CUDA__'
def
_backup_cpuinfo
(
self
):
# Construct cpu_info dict from lscpu that is similar to what py-cpuinfo provides
if
not
self
.
command_exists
(
'lscpu'
):
self
.
warning
(
f
"
{
self
.
name
}
attempted to query 'lscpu' after failing to use py-cpuinfo "
"to detect the CPU architecture. 'lscpu' does not appear to exist on "
"your system, will fall back to use -march=native and non-vectorized execution."
)
self
.
warning
(
f
"
{
self
.
name
}
attempted to query 'lscpu' after failing to use py-cpuinfo "
"to detect the CPU architecture. 'lscpu' does not appear to exist on "
"your system, will fall back to use -march=native and non-vectorized execution."
)
return
None
result
=
subprocess
.
check_output
(
'lscpu'
,
shell
=
True
)
result
=
result
.
decode
(
'utf-8'
).
strip
().
lower
()
...
...
@@ -420,9 +388,8 @@ class OpBuilder(ABC):
try
:
cpu_info
=
get_cpu_info
()
except
Exception
as
e
:
self
.
warning
(
f
"
{
self
.
name
}
attempted to use `py-cpuinfo` but failed (exception type:
{
type
(
e
)
}
,
{
e
}
), "
"falling back to `lscpu` to get this information."
)
self
.
warning
(
f
"
{
self
.
name
}
attempted to use `py-cpuinfo` but failed (exception type:
{
type
(
e
)
}
,
{
e
}
), "
"falling back to `lscpu` to get this information."
)
cpu_info
=
self
.
_backup_cpuinfo
()
if
cpu_info
is
None
:
return
'-D__SCALAR__'
...
...
@@ -445,13 +412,9 @@ class OpBuilder(ABC):
valid
=
valid
or
result
.
wait
()
==
0
if
not
valid
and
len
(
cmds
)
>
1
:
print
(
f
"
{
WARNING
}
{
self
.
name
}
requires one of the following commands '
{
cmds
}
', but it does not exist!"
)
print
(
f
"
{
WARNING
}
{
self
.
name
}
requires one of the following commands '
{
cmds
}
', but it does not exist!"
)
elif
not
valid
and
len
(
cmds
)
==
1
:
print
(
f
"
{
WARNING
}
{
self
.
name
}
requires the '
{
cmd
}
' command, but it does not exist!"
)
print
(
f
"
{
WARNING
}
{
self
.
name
}
requires the '
{
cmd
}
' command, but it does not exist!"
)
return
valid
def
warning
(
self
,
msg
):
...
...
@@ -466,12 +429,11 @@ class OpBuilder(ABC):
def
builder
(
self
):
from
torch.utils.cpp_extension
import
CppExtension
return
CppExtension
(
name
=
self
.
absolute_name
(),
sources
=
self
.
strip_empty_entries
(
self
.
sources
()),
include_dirs
=
self
.
strip_empty_entries
(
self
.
include_paths
()),
extra_compile_args
=
{
'cxx'
:
self
.
strip_empty_entries
(
self
.
cxx_args
())},
extra_link_args
=
self
.
strip_empty_entries
(
self
.
extra_ldflags
()))
return
CppExtension
(
name
=
self
.
absolute_name
(),
sources
=
self
.
strip_empty_entries
(
self
.
sources
()),
include_dirs
=
self
.
strip_empty_entries
(
self
.
include_paths
()),
extra_compile_args
=
{
'cxx'
:
self
.
strip_empty_entries
(
self
.
cxx_args
())},
extra_link_args
=
self
.
strip_empty_entries
(
self
.
extra_ldflags
()))
def
load
(
self
,
verbose
=
True
):
from
deepspeed.git_version_info
import
installed_ops
,
torch_info
...
...
@@ -480,9 +442,8 @@ class OpBuilder(ABC):
# torch/cuda versions we are currently using at runtime.
self
.
validate_torch_version
(
torch_info
)
if
torch
.
cuda
.
is_available
()
and
isinstance
(
self
,
CUDAOpBuilder
):
#aiss HIP version mismatch error
#self.validate_torch_op_version(torch_info)
pass
self
.
validate_torch_op_version
(
torch_info
)
return
importlib
.
import_module
(
self
.
absolute_name
())
else
:
return
self
.
jit_load
(
verbose
)
...
...
@@ -495,21 +456,21 @@ class OpBuilder(ABC):
try
:
import
ninja
# noqa: F401
except
ImportError
:
raise
RuntimeError
(
f
"Unable to JIT load the
{
self
.
name
}
op due to ninja not being installed."
)
raise
RuntimeError
(
f
"Unable to JIT load the
{
self
.
name
}
op due to ninja not being installed."
)
if
isinstance
(
self
,
CUDAOpBuilder
)
and
not
self
.
is_rocm_pytorch
():
self
.
build_for_cpu
=
not
assert_no_cuda_mismatch
(
self
.
name
)
try
:
assert_no_cuda_mismatch
(
self
.
name
)
self
.
build_for_cpu
=
False
except
BaseException
:
self
.
build_for_cpu
=
True
self
.
jit_mode
=
True
from
torch.utils.cpp_extension
import
load
start_build
=
time
.
time
()
sources
=
[
self
.
deepspeed_src_path
(
path
)
for
path
in
self
.
sources
()]
extra_include_paths
=
[
self
.
deepspeed_src_path
(
path
)
for
path
in
self
.
include_paths
()
]
extra_include_paths
=
[
self
.
deepspeed_src_path
(
path
)
for
path
in
self
.
include_paths
()]
# Torch will try and apply whatever CCs are in the arch list at compile time,
# we have already set the intended targets ourselves we know that will be
...
...
@@ -520,14 +481,13 @@ class OpBuilder(ABC):
torch_arch_list
=
os
.
environ
.
get
(
"TORCH_CUDA_ARCH_LIST"
)
os
.
environ
[
"TORCH_CUDA_ARCH_LIST"
]
=
""
op_module
=
load
(
name
=
self
.
name
,
sources
=
self
.
strip_empty_entries
(
sources
),
extra_include_paths
=
self
.
strip_empty_entries
(
extra_include_paths
),
extra_cflags
=
self
.
strip_empty_entries
(
self
.
cxx_args
()),
extra_cuda_cflags
=
self
.
strip_empty_entries
(
self
.
nvcc_args
()),
extra_ldflags
=
self
.
strip_empty_entries
(
self
.
extra_ldflags
()),
verbose
=
verbose
)
op_module
=
load
(
name
=
self
.
name
,
sources
=
self
.
strip_empty_entries
(
sources
),
extra_include_paths
=
self
.
strip_empty_entries
(
extra_include_paths
),
extra_cflags
=
self
.
strip_empty_entries
(
self
.
cxx_args
()),
extra_cuda_cflags
=
self
.
strip_empty_entries
(
self
.
nvcc_args
()),
extra_ldflags
=
self
.
strip_empty_entries
(
self
.
extra_ldflags
()),
verbose
=
verbose
)
build_duration
=
time
.
time
()
-
start_build
if
verbose
:
...
...
@@ -541,6 +501,7 @@ class OpBuilder(ABC):
class
CUDAOpBuilder
(
OpBuilder
):
def
compute_capability_args
(
self
,
cross_compile_archs
=
None
):
"""
Returns nvcc compute capability compile flags.
...
...
@@ -587,8 +548,7 @@ class CUDAOpBuilder(OpBuilder):
ccs
=
self
.
filter_ccs
(
ccs
)
if
len
(
ccs
)
==
0
:
raise
RuntimeError
(
f
"Unable to load
{
self
.
name
}
op due to no compute capabilities remaining after filtering"
)
f
"Unable to load
{
self
.
name
}
op due to no compute capabilities remaining after filtering"
)
args
=
[]
for
cc
in
ccs
:
...
...
@@ -623,7 +583,16 @@ class CUDAOpBuilder(OpBuilder):
return
super
().
is_compatible
(
verbose
)
def
builder
(
self
):
#self.build_for_cpu = not assert_no_cuda_mismatch(self.name)
#try:
# assert_no_cuda_mismatch(self.name)
# self.build_for_cpu = False
#except BaseException:
# self.build_for_cpu = True
#if self.build_for_cpu:
# from torch.utils.cpp_extension import CppExtension as ExtensionBuilder
#else:
# from torch.utils.cpp_extension import CUDAExtension as ExtensionBuilder
#aiss
if
not
self
.
is_rocm_pytorch
():
self
.
build_for_cpu
=
not
assert_no_cuda_mismatch
(
self
.
name
)
...
...
@@ -632,23 +601,17 @@ class CUDAOpBuilder(OpBuilder):
else
:
from
torch.utils.cpp_extension
import
CUDAExtension
as
ExtensionBuilder
else
:
from
torch.utils.cpp_extension
import
CUDAExtension
as
ExtensionBuilder
#self.build_for_cpu = not assert_no_cuda_mismatch(self.name)
#if self.build_for_cpu:
# from torch.utils.cpp_extension import CppExtension as ExtensionBuilder
#else:
# from torch.utils.cpp_extension import CUDAExtension as ExtensionBuilder
from
torch.utils.cpp_extension
import
CUDAExtension
as
ExtensionBuilder
compile_args
=
{
'cxx'
:
self
.
strip_empty_entries
(
self
.
cxx_args
())}
if
self
.
build_for_cpu
else
\
{
'cxx'
:
self
.
strip_empty_entries
(
self
.
cxx_args
()),
\
'nvcc'
:
self
.
strip_empty_entries
(
self
.
nvcc_args
())}
cuda_ext
=
ExtensionBuilder
(
name
=
self
.
absolute_name
(),
sources
=
self
.
strip_empty_entries
(
self
.
sources
()),
include_dirs
=
self
.
strip_empty_entries
(
self
.
include_paths
()),
libraries
=
self
.
strip_empty_entries
(
self
.
libraries_args
()),
extra_compile_args
=
compile_args
)
cuda_ext
=
ExtensionBuilder
(
name
=
self
.
absolute_name
(),
sources
=
self
.
strip_empty_entries
(
self
.
sources
()),
include_dirs
=
self
.
strip_empty_entries
(
self
.
include_paths
()),
libraries
=
self
.
strip_empty_entries
(
self
.
libraries_args
()),
extra_compile_args
=
compile_args
)
if
self
.
is_rocm_pytorch
():
# hip converts paths to absolute, this converts back to relative
...
...
@@ -656,7 +619,10 @@ class CUDAOpBuilder(OpBuilder):
curr_file
=
Path
(
__file__
).
parent
.
parent
# ds root
for
i
in
range
(
len
(
sources
)):
src
=
Path
(
sources
[
i
])
sources
[
i
]
=
str
(
src
.
relative_to
(
curr_file
))
if
src
.
is_absolute
():
sources
[
i
]
=
str
(
src
.
relative_to
(
curr_file
))
else
:
sources
[
i
]
=
str
(
src
)
cuda_ext
.
sources
=
sources
return
cuda_ext
...
...
@@ -711,9 +677,7 @@ class CUDAOpBuilder(OpBuilder):
if
self
.
is_rocm_pytorch
():
ROCM_MAJOR
,
ROCM_MINOR
=
self
.
installed_rocm_version
()
args
+=
[
'-std=c++14'
,
'-U__HIP_NO_HALF_OPERATORS__'
,
'-U__HIP_NO_HALF_CONVERSIONS__'
,
'-std=c++14'
,
'-U__HIP_NO_HALF_OPERATORS__'
,
'-U__HIP_NO_HALF_CONVERSIONS__'
,
'-U__HIP_NO_HALF2_OPERATORS__'
,
'-DROCM_VERSION_MAJOR=%s'
%
ROCM_MAJOR
,
'-DROCM_VERSION_MINOR=%s'
%
ROCM_MINOR
,
...
...
@@ -722,13 +686,9 @@ class CUDAOpBuilder(OpBuilder):
else
:
cuda_major
,
_
=
installed_cuda_version
()
args
+=
[
'-allow-unsupported-compiler'
if
sys
.
platform
==
"win32"
else
''
,
'--use_fast_math'
,
'-std=c++17'
if
sys
.
platform
==
"win32"
and
cuda_major
>
10
else
'-std=c++14'
,
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'-U__CUDA_NO_HALF2_OPERATORS__'
'-allow-unsupported-compiler'
if
sys
.
platform
==
"win32"
else
''
,
'--use_fast_math'
,
'-std=c++17'
if
sys
.
platform
==
"win32"
and
cuda_major
>
10
else
'-std=c++14'
,
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'-U__CUDA_NO_HALF2_OPERATORS__'
]
if
os
.
environ
.
get
(
'DS_DEBUG_CUDA_BUILD'
,
'0'
)
==
'1'
:
args
.
append
(
'--ptxas-options=-v'
)
...
...
@@ -742,10 +702,12 @@ class CUDAOpBuilder(OpBuilder):
if
sys
.
platform
==
"win32"
:
return
[
'cublas'
,
'curand'
]
else
:
#return []
return
[
'iomp5'
]
class
TorchCPUOpBuilder
(
CUDAOpBuilder
):
def
extra_ldflags
(
self
):
if
self
.
build_for_cpu
:
return
[
'-fopenmp'
]
...
...
op_builder/cpu_adagrad.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import
os
from
.builder
import
TorchCPUOpBuilder
...
...
@@ -38,13 +40,8 @@ class CPUAdagradBuilder(TorchCPUOpBuilder):
CUDA_INCLUDE
=
[
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
CUDA_HOME
,
"include"
)]
else
:
CUDA_INCLUDE
=
[
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"rocrand"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"hiprand"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"rocrand"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"hiprand"
),
]
return
[
'csrc/includes'
]
+
CUDA_INCLUDE
op_builder/cpu_adam.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import
os
from
.builder
import
TorchCPUOpBuilder
...
...
@@ -8,7 +10,7 @@ from .builder import TorchCPUOpBuilder
class
CPUAdamBuilder
(
TorchCPUOpBuilder
):
BUILD_VAR
=
"DS_BUILD_CPU_ADAM"
NAME
=
"cpu_adam"
def
__init__
(
self
):
super
().
__init__
(
name
=
self
.
NAME
)
...
...
@@ -18,7 +20,7 @@ class CPUAdamBuilder(TorchCPUOpBuilder):
def
sources
(
self
):
if
self
.
build_for_cpu
:
return
[
'csrc/adam/cpu_adam.cpp'
]
return
[
'csrc/adam/cpu_adam.cpp'
,
'csrc/common/custom_cuda_kernel.cu'
]
def
libraries_args
(
self
):
...
...
@@ -28,6 +30,7 @@ class CPUAdamBuilder(TorchCPUOpBuilder):
if
not
self
.
is_rocm_pytorch
():
args
+=
[
'curand'
]
return
args
def
include_paths
(
self
):
...
...
@@ -38,13 +41,8 @@ class CPUAdamBuilder(TorchCPUOpBuilder):
CUDA_INCLUDE
=
[
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
CUDA_HOME
,
"include"
)]
else
:
CUDA_INCLUDE
=
[
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"rocrand"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"hiprand"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"rocrand"
),
os
.
path
.
join
(
torch
.
utils
.
cpp_extension
.
ROCM_HOME
,
"include"
,
"hiprand"
),
]
return
[
'csrc/includes'
]
+
CUDA_INCLUDE
op_builder/fused_adam.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
CUDAOpBuilder
import
sys
...
...
@@ -29,9 +31,7 @@ class FusedAdamBuilder(CUDAOpBuilder):
def
nvcc_args
(
self
):
nvcc_flags
=
[
'-O3'
]
+
self
.
version_dependent_macros
()
if
not
self
.
is_rocm_pytorch
():
nvcc_flags
.
extend
([
'-allow-unsupported-compiler'
if
sys
.
platform
==
"win32"
else
''
,
'-lineinfo'
,
'--use_fast_math'
]
+
self
.
compute_capability_args
())
nvcc_flags
.
extend
(
[
'-allow-unsupported-compiler'
if
sys
.
platform
==
"win32"
else
''
,
'-lineinfo'
,
'--use_fast_math'
]
+
self
.
compute_capability_args
())
return
nvcc_flags
op_builder/fused_lamb.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
CUDAOpBuilder
import
sys
...
...
@@ -30,14 +32,9 @@ class FusedLambBuilder(CUDAOpBuilder):
nvcc_flags
=
[
'-O3'
]
+
self
.
version_dependent_macros
()
if
self
.
is_rocm_pytorch
():
ROCM_MAJOR
,
ROCM_MINOR
=
self
.
installed_rocm_version
()
nvcc_flags
+=
[
'-DROCM_VERSION_MAJOR=%s'
%
ROCM_MAJOR
,
'-DROCM_VERSION_MINOR=%s'
%
ROCM_MINOR
]
nvcc_flags
+=
[
'-DROCM_VERSION_MAJOR=%s'
%
ROCM_MAJOR
,
'-DROCM_VERSION_MINOR=%s'
%
ROCM_MINOR
]
else
:
nvcc_flags
.
extend
([
'-allow-unsupported-compiler'
if
sys
.
platform
==
"win32"
else
''
,
'-lineinfo'
,
'--use_fast_math'
]
+
self
.
compute_capability_args
())
nvcc_flags
.
extend
(
[
'-allow-unsupported-compiler'
if
sys
.
platform
==
"win32"
else
''
,
'-lineinfo'
,
'--use_fast_math'
]
+
self
.
compute_capability_args
())
return
nvcc_flags
op_builder/quantizer.py
View file @
5bcc463d
'''Copyright The Microsoft DeepSpeed Team'''
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
CUDAOpBuilder
...
...
op_builder/random_ltd.py
View file @
5bcc463d
"""
Copyright 2022 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
CUDAOpBuilder
...
...
@@ -23,18 +25,13 @@ class RandomLTDBuilder(CUDAOpBuilder):
def
sources
(
self
):
return
[
'csrc/random_ltd/pt_binding.cpp'
,
'csrc/random_ltd/gather_scatter.cu'
,
'csrc/random_ltd/slice_attn_masks.cu'
,
'csrc/random_ltd/token_sort.cu'
'csrc/random_ltd/pt_binding.cpp'
,
'csrc/random_ltd/gather_scatter.cu'
,
'csrc/random_ltd/slice_attn_masks.cu'
,
'csrc/random_ltd/token_sort.cu'
]
def
include_paths
(
self
):
includes
=
[
'csrc/includes'
]
if
self
.
is_rocm_pytorch
():
from
torch.utils.cpp_extension
import
ROCM_HOME
includes
+=
[
'{}/hiprand/include'
.
format
(
ROCM_HOME
),
'{}/rocrand/include'
.
format
(
ROCM_HOME
)
]
includes
+=
[
'{}/hiprand/include'
.
format
(
ROCM_HOME
),
'{}/rocrand/include'
.
format
(
ROCM_HOME
)]
return
includes
op_builder/sparse_attn.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
OpBuilder
try
:
...
...
@@ -32,9 +34,8 @@ class SparseAttnBuilder(OpBuilder):
#deps_compatible = all(command_status)
if
self
.
is_rocm_pytorch
():
#aiss debug
#self.warning(f'{self.NAME} is not compatible with ROCM')
#return False
#aiss debug
return
True
try
:
...
...
@@ -49,26 +50,23 @@ class SparseAttnBuilder(OpBuilder):
self
.
warning
(
f
"
{
self
.
NAME
}
cuda is not available from torch"
)
else
:
major
,
minor
=
torch
.
version
.
cuda
.
split
(
'.'
)[:
2
]
cuda_compatible
=
(
int
(
major
)
==
10
and
int
(
minor
)
>=
1
)
or
(
int
(
major
)
>=
11
)
cuda_compatible
=
(
int
(
major
)
==
10
and
int
(
minor
)
>=
1
)
or
(
int
(
major
)
>=
11
)
if
not
cuda_compatible
:
self
.
warning
(
f
"
{
self
.
NAME
}
requires CUDA version 10.1+"
)
TORCH_MAJOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
0
])
TORCH_MINOR
=
int
(
torch
.
__version__
.
split
(
'.'
)[
1
])
torch_compatible
=
TORCH_MAJOR
==
1
and
TORCH_MINOR
>=
5
torch_compatible
=
(
TORCH_MAJOR
==
1
and
TORCH_MINOR
>=
5
)
if
not
torch_compatible
:
self
.
warning
(
f
'
{
self
.
NAME
}
requires a torch version >= 1.5 but detected
{
TORCH_MAJOR
}
.
{
TORCH_MINOR
}
'
)
f
'
{
self
.
NAME
}
requires a torch version >= 1.5 and < 2.0 but detected
{
TORCH_MAJOR
}
.
{
TORCH_MINOR
}
'
)
try
:
import
triton
except
ImportError
:
# auto-install of triton is broken on some systems, reverting to manual install for now
# see this issue: https://github.com/microsoft/DeepSpeed/issues/1710
self
.
warning
(
f
"please install triton==1.0.0 if you want to use sparse attention"
)
self
.
warning
(
f
"please install triton==1.0.0 if you want to use sparse attention"
)
return
False
if
pkg_version
:
...
...
@@ -79,9 +77,7 @@ class SparseAttnBuilder(OpBuilder):
triton_mismatch
=
installed_triton
!=
"1.0.0"
if
triton_mismatch
:
self
.
warning
(
f
"using untested triton version (
{
installed_triton
}
), only 1.0.0 is known to be compatible"
)
self
.
warning
(
f
"using untested triton version (
{
installed_triton
}
), only 1.0.0 is known to be compatible"
)
return
False
return
super
().
is_compatible
(
verbose
)
and
torch_compatible
and
cuda_compatible
op_builder/spatial_inference.py
View file @
5bcc463d
'''
Copyright 2022 The Microsoft DeepSpeed Team
'''
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
CUDAOpBuilder
,
installed_cuda_version
...
...
@@ -19,8 +21,7 @@ class SpatialInferenceBuilder(CUDAOpBuilder):
try
:
import
torch
except
ImportError
:
self
.
warning
(
"Please install torch if trying to pre-compile inference kernels"
)
self
.
warning
(
"Please install torch if trying to pre-compile inference kernels"
)
return
False
cuda_okay
=
True
...
...
@@ -30,8 +31,7 @@ class SpatialInferenceBuilder(CUDAOpBuilder):
cuda_capability
=
torch
.
cuda
.
get_device_properties
(
0
).
major
if
cuda_capability
>=
8
:
if
torch_cuda_major
<
11
or
sys_cuda_major
<
11
:
self
.
warning
(
"On Ampere and higher architectures please use CUDA 11+"
)
self
.
warning
(
"On Ampere and higher architectures please use CUDA 11+"
)
cuda_okay
=
False
return
super
().
is_compatible
(
verbose
)
and
cuda_okay
...
...
op_builder/stochastic_transformer.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.transformer
import
TransformerBuilder
...
...
op_builder/transformer.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
CUDAOpBuilder
...
...
@@ -23,22 +25,15 @@ class TransformerBuilder(CUDAOpBuilder):
def
sources
(
self
):
return
[
'csrc/transformer/ds_transformer_cuda.cpp'
,
'csrc/transformer/cublas_wrappers.cu'
,
'csrc/transformer/transform_kernels.cu'
,
'csrc/transformer/gelu_kernels.cu'
,
'csrc/transformer/dropout_kernels.cu'
,
'csrc/transformer/normalize_kernels.cu'
,
'csrc/transformer/softmax_kernels.cu'
,
'csrc/transformer/general_kernels.cu'
'csrc/transformer/ds_transformer_cuda.cpp'
,
'csrc/transformer/cublas_wrappers.cu'
,
'csrc/transformer/transform_kernels.cu'
,
'csrc/transformer/gelu_kernels.cu'
,
'csrc/transformer/dropout_kernels.cu'
,
'csrc/transformer/normalize_kernels.cu'
,
'csrc/transformer/softmax_kernels.cu'
,
'csrc/transformer/general_kernels.cu'
]
def
include_paths
(
self
):
includes
=
[
'csrc/includes'
]
if
self
.
is_rocm_pytorch
():
from
torch.utils.cpp_extension
import
ROCM_HOME
includes
+=
[
'{}/hiprand/include'
.
format
(
ROCM_HOME
),
'{}/rocrand/include'
.
format
(
ROCM_HOME
)
]
includes
+=
[
'{}/hiprand/include'
.
format
(
ROCM_HOME
),
'{}/rocrand/include'
.
format
(
ROCM_HOME
)]
return
includes
op_builder/transformer_inference.py
View file @
5bcc463d
'''Copyright The Microsoft DeepSpeed Team'''
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
CUDAOpBuilder
,
installed_cuda_version
...
...
@@ -18,8 +21,7 @@ class InferenceBuilder(CUDAOpBuilder):
try
:
import
torch
except
ImportError
:
self
.
warning
(
"Please install torch if trying to pre-compile inference kernels"
)
self
.
warning
(
"Please install torch if trying to pre-compile inference kernels"
)
return
False
cuda_okay
=
True
...
...
@@ -28,14 +30,11 @@ class InferenceBuilder(CUDAOpBuilder):
torch_cuda_major
=
int
(
torch
.
version
.
cuda
.
split
(
'.'
)[
0
])
cuda_capability
=
torch
.
cuda
.
get_device_properties
(
0
).
major
if
cuda_capability
<
6
:
self
.
warning
(
"NVIDIA Inference is only supported on Pascal and newer architectures"
)
self
.
warning
(
"NVIDIA Inference is only supported on Pascal and newer architectures"
)
cuda_okay
=
False
if
cuda_capability
>=
8
:
if
torch_cuda_major
<
11
or
sys_cuda_major
<
11
:
self
.
warning
(
"On Ampere and higher architectures please use CUDA 11+"
)
self
.
warning
(
"On Ampere and higher architectures please use CUDA 11+"
)
cuda_okay
=
False
return
super
().
is_compatible
(
verbose
)
and
cuda_okay
...
...
op_builder/utils.py
View file @
5bcc463d
"""
Copyright 2020 The Microsoft DeepSpeed Team
"""
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
.builder
import
OpBuilder
...
...
release/bump_patch_version.py
View file @
5bcc463d
'''Copyright The Microsoft DeepSpeed Team'''
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from
packaging
import
version
as
pkg_version
...
...
Prev
1
…
22
23
24
25
26
27
28
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment