Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
db6eea35
Unverified
Commit
db6eea35
authored
Jan 04, 2023
by
Jiarui Fang
Committed by
GitHub
Jan 04, 2023
Browse files
[builder] reconfig op_builder for pypi install (#2314)
parent
a9b27b92
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
13 additions
and
332 deletions
+13
-332
MANIFEST.in
MANIFEST.in
+1
-0
colossalai/kernel/op_builder
colossalai/kernel/op_builder
+1
-0
colossalai/kernel/op_builder/__init__.py
colossalai/kernel/op_builder/__init__.py
+0
-7
colossalai/kernel/op_builder/builder.py
colossalai/kernel/op_builder/builder.py
+0
-104
colossalai/kernel/op_builder/cpu_adam.py
colossalai/kernel/op_builder/cpu_adam.py
+0
-42
colossalai/kernel/op_builder/fused_optim.py
colossalai/kernel/op_builder/fused_optim.py
+0
-35
colossalai/kernel/op_builder/moe.py
colossalai/kernel/op_builder/moe.py
+0
-33
colossalai/kernel/op_builder/multi_head_attn.py
colossalai/kernel/op_builder/multi_head_attn.py
+0
-41
colossalai/kernel/op_builder/scaled_upper_triang_masked_softmax.py
...i/kernel/op_builder/scaled_upper_triang_masked_softmax.py
+0
-36
colossalai/kernel/op_builder/utils.py
colossalai/kernel/op_builder/utils.py
+0
-20
op_builder/builder.py
op_builder/builder.py
+5
-3
op_builder/cpu_adam.py
op_builder/cpu_adam.py
+1
-1
op_builder/fused_optim.py
op_builder/fused_optim.py
+1
-1
op_builder/moe.py
op_builder/moe.py
+1
-1
op_builder/multi_head_attn.py
op_builder/multi_head_attn.py
+1
-1
op_builder/scaled_upper_triang_masked_softmax.py
op_builder/scaled_upper_triang_masked_softmax.py
+1
-1
tests/test_optimizer/test_cpu_adam.py
tests/test_optimizer/test_cpu_adam.py
+1
-6
No files found.
MANIFEST.in
View file @
db6eea35
include *.txt README.md
include *.txt README.md
recursive-include requirements *.txt
recursive-include requirements *.txt
recursive-include colossalai *.cpp *.h *.cu *.tr *.cuh *.cc *.pyi
recursive-include colossalai *.cpp *.h *.cu *.tr *.cuh *.cc *.pyi
recursive-include op_builder *.py
colossalai/kernel/op_builder
0 → 120000
View file @
db6eea35
../../op_builder
\ No newline at end of file
colossalai/kernel/op_builder/__init__.py
deleted
100644 → 0
View file @
a9b27b92
from
.cpu_adam
import
CPUAdamBuilder
from
.fused_optim
import
FusedOptimBuilder
from
.moe
import
MOEBuilder
from
.multi_head_attn
import
MultiHeadAttnBuilder
from
.scaled_upper_triang_masked_softmax
import
ScaledSoftmaxBuilder
__all__
=
[
'CPUAdamBuilder'
,
'FusedOptimBuilder'
,
'MultiHeadAttnBuilder'
,
'ScaledSoftmaxBuilder'
,
'MOEBuilder'
]
colossalai/kernel/op_builder/builder.py
deleted
100644 → 0
View file @
a9b27b92
import
os
import
re
from
pathlib
import
Path
from
typing
import
List
import
torch
def
get_cuda_cc_flag
()
->
List
:
"""get_cuda_cc_flag
cc flag for your GPU arch
"""
cc_flag
=
[]
for
arch
in
torch
.
cuda
.
get_arch_list
():
res
=
re
.
search
(
r
'sm_(\d+)'
,
arch
)
if
res
:
arch_cap
=
res
[
1
]
if
int
(
arch_cap
)
>=
60
:
cc_flag
.
extend
([
'-gencode'
,
f
'arch=compute_
{
arch_cap
}
,code=
{
arch
}
'
])
return
cc_flag
class
Builder
(
object
):
def
colossalai_src_path
(
self
,
code_path
):
if
os
.
path
.
isabs
(
code_path
):
return
code_path
else
:
return
os
.
path
.
join
(
Path
(
__file__
).
parent
.
parent
.
absolute
(),
code_path
)
def
get_cuda_home_include
(
self
):
"""
return include path inside the cuda home.
"""
from
torch.utils.cpp_extension
import
CUDA_HOME
if
CUDA_HOME
is
None
:
raise
RuntimeError
(
"CUDA_HOME is None, please set CUDA_HOME to compile C++/CUDA kernels in ColossalAI."
)
cuda_include
=
os
.
path
.
join
(
CUDA_HOME
,
"include"
)
return
cuda_include
# functions must be overrided begin
def
sources_files
(
self
):
raise
NotImplementedError
def
include_dirs
(
self
):
raise
NotImplementedError
def
cxx_flags
(
self
):
raise
NotImplementedError
def
nvcc_flags
(
self
):
raise
NotImplementedError
# functions must be overrided over
def
strip_empty_entries
(
self
,
args
):
'''
Drop any empty strings from the list of compile and link flags
'''
return
[
x
for
x
in
args
if
len
(
x
)
>
0
]
def
load
(
self
,
verbose
=
True
):
"""
load and compile cpu_adam lib at runtime
Args:
verbose (bool, optional): show detailed info. Defaults to True.
"""
import
time
from
torch.utils.cpp_extension
import
load
start_build
=
time
.
time
()
op_module
=
load
(
name
=
self
.
name
,
sources
=
self
.
strip_empty_entries
(
self
.
sources_files
()),
extra_include_paths
=
self
.
strip_empty_entries
(
self
.
include_dirs
()),
extra_cflags
=
self
.
cxx_flags
(),
extra_cuda_cflags
=
self
.
nvcc_flags
(),
extra_ldflags
=
[],
verbose
=
verbose
)
build_duration
=
time
.
time
()
-
start_build
if
verbose
:
print
(
f
"Time to load
{
self
.
name
}
op:
{
build_duration
}
seconds"
)
return
op_module
def
builder
(
self
,
name
)
->
'CUDAExtension'
:
"""
get a CUDAExtension instance used for setup.py
"""
from
torch.utils.cpp_extension
import
CUDAExtension
return
CUDAExtension
(
name
=
name
,
sources
=
[
os
.
path
.
join
(
'colossalai/kernel/cuda_native/csrc'
,
path
)
for
path
in
self
.
sources_files
()],
include_dirs
=
self
.
include_dirs
(),
extra_compile_args
=
{
'cxx'
:
self
.
cxx_flags
(),
'nvcc'
:
self
.
nvcc_flags
()
})
colossalai/kernel/op_builder/cpu_adam.py
deleted
100644 → 0
View file @
a9b27b92
import
os
from
.builder
import
Builder
from
.utils
import
append_nvcc_threads
class
CPUAdamBuilder
(
Builder
):
NAME
=
"cpu_adam"
BASE_DIR
=
"cuda_native"
def
__init__
(
self
):
self
.
name
=
CPUAdamBuilder
.
NAME
super
().
__init__
()
self
.
version_dependent_macros
=
[
'-DVERSION_GE_1_1'
,
'-DVERSION_GE_1_3'
,
'-DVERSION_GE_1_5'
]
# necessary 4 functions
def
sources_files
(
self
):
ret
=
[
os
.
path
.
join
(
CPUAdamBuilder
.
BASE_DIR
,
"csrc/cpu_adam.cpp"
),
]
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
include_dirs
(
self
):
return
[
self
.
colossalai_src_path
(
os
.
path
.
join
(
CPUAdamBuilder
.
BASE_DIR
,
"includes"
)),
self
.
get_cuda_home_include
()
]
def
cxx_flags
(
self
):
extra_cxx_flags
=
[
'-std=c++14'
,
'-lcudart'
,
'-lcublas'
,
'-g'
,
'-Wno-reorder'
,
'-fopenmp'
,
'-march=native'
]
return
[
'-O3'
]
+
self
.
version_dependent_macros
+
extra_cxx_flags
def
nvcc_flags
(
self
):
extra_cuda_flags
=
[
'-std=c++14'
,
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'-U__CUDA_NO_HALF2_OPERATORS__'
,
'-DTHRUST_IGNORE_CUB_VERSION_CHECK'
]
return
append_nvcc_threads
([
'-O3'
,
'--use_fast_math'
]
+
self
.
version_dependent_macros
+
extra_cuda_flags
)
# necessary 4 functions
colossalai/kernel/op_builder/fused_optim.py
deleted
100644 → 0
View file @
a9b27b92
import
os
from
.builder
import
Builder
,
get_cuda_cc_flag
class
FusedOptimBuilder
(
Builder
):
NAME
=
'fused_optim'
BASE_DIR
=
"cuda_native/csrc"
def
__init__
(
self
):
self
.
name
=
FusedOptimBuilder
.
NAME
super
().
__init__
()
self
.
version_dependent_macros
=
[
'-DVERSION_GE_1_1'
,
'-DVERSION_GE_1_3'
,
'-DVERSION_GE_1_5'
]
def
sources_files
(
self
):
ret
=
[
self
.
colossalai_src_path
(
os
.
path
.
join
(
FusedOptimBuilder
.
BASE_DIR
,
fname
))
for
fname
in
[
'colossal_C_frontend.cpp'
,
'multi_tensor_sgd_kernel.cu'
,
'multi_tensor_scale_kernel.cu'
,
'multi_tensor_adam.cu'
,
'multi_tensor_l2norm_kernel.cu'
,
'multi_tensor_lamb.cu'
]
]
return
ret
def
include_dirs
(
self
):
ret
=
[
os
.
path
.
join
(
FusedOptimBuilder
.
BASE_DIR
,
"includes"
),
self
.
get_cuda_home_include
()]
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
cxx_flags
(
self
):
extra_cxx_flags
=
[]
return
[
'-O3'
]
+
self
.
version_dependent_macros
+
extra_cxx_flags
def
nvcc_flags
(
self
):
extra_cuda_flags
=
[
'-lineinfo'
]
extra_cuda_flags
.
extend
(
get_cuda_cc_flag
())
return
[
'-O3'
,
'--use_fast_math'
]
+
extra_cuda_flags
colossalai/kernel/op_builder/moe.py
deleted
100644 → 0
View file @
a9b27b92
import
os
from
.builder
import
Builder
,
get_cuda_cc_flag
class
MOEBuilder
(
Builder
):
def
__init__
(
self
):
self
.
base_dir
=
"cuda_native/csrc"
self
.
name
=
'moe'
super
().
__init__
()
def
include_dirs
(
self
):
ret
=
[]
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
"includes"
),
self
.
get_cuda_home_include
()]
ret
.
append
(
os
.
path
.
join
(
self
.
base_dir
,
"kernels"
,
"include"
))
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
sources_files
(
self
):
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
fname
)
for
fname
in
[
'moe_cuda.cpp'
,
'moe_cuda_kernel.cu'
]]
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
cxx_flags
(
self
):
return
[
'-O3'
,
'-DVERSION_GE_1_1'
,
'-DVERSION_GE_1_3'
,
'-DVERSION_GE_1_5'
]
def
nvcc_flags
(
self
):
extra_cuda_flags
=
[
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'--expt-relaxed-constexpr'
,
'--expt-extended-lambda'
]
extra_cuda_flags
.
extend
(
get_cuda_cc_flag
())
ret
=
[
'-O3'
,
'--use_fast_math'
]
+
extra_cuda_flags
return
ret
colossalai/kernel/op_builder/multi_head_attn.py
deleted
100644 → 0
View file @
a9b27b92
import
os
from
.builder
import
Builder
,
get_cuda_cc_flag
class
MultiHeadAttnBuilder
(
Builder
):
def
__init__
(
self
):
self
.
base_dir
=
"cuda_native/csrc"
self
.
name
=
'multihead_attention'
super
().
__init__
()
self
.
version_dependent_macros
=
[
'-DVERSION_GE_1_1'
,
'-DVERSION_GE_1_3'
,
'-DVERSION_GE_1_5'
]
def
include_dirs
(
self
):
ret
=
[]
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
"includes"
),
self
.
get_cuda_home_include
()]
ret
.
append
(
os
.
path
.
join
(
self
.
base_dir
,
"kernels"
,
"include"
))
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
sources_files
(
self
):
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
fname
)
for
fname
in
[
'multihead_attention_1d.cpp'
,
'kernels/cublas_wrappers.cu'
,
'kernels/transform_kernels.cu'
,
'kernels/dropout_kernels.cu'
,
'kernels/normalize_kernels.cu'
,
'kernels/softmax_kernels.cu'
,
'kernels/general_kernels.cu'
,
'kernels/cuda_util.cu'
]
]
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
cxx_flags
(
self
):
return
[
'-O3'
]
+
self
.
version_dependent_macros
def
nvcc_flags
(
self
):
extra_cuda_flags
=
[
'-std=c++14'
,
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'-U__CUDA_NO_HALF2_OPERATORS__'
,
'-DTHRUST_IGNORE_CUB_VERSION_CHECK'
]
extra_cuda_flags
.
extend
(
get_cuda_cc_flag
())
ret
=
[
'-O3'
,
'--use_fast_math'
]
+
extra_cuda_flags
return
ret
colossalai/kernel/op_builder/scaled_upper_triang_masked_softmax.py
deleted
100644 → 0
View file @
a9b27b92
import
os
from
.builder
import
Builder
,
get_cuda_cc_flag
class
ScaledSoftmaxBuilder
(
Builder
):
def
__init__
(
self
):
self
.
base_dir
=
"cuda_native/csrc"
self
.
name
=
'scaled_upper_triang_masked_softmax'
super
().
__init__
()
def
include_dirs
(
self
):
ret
=
[]
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
"includes"
),
self
.
get_cuda_home_include
()]
ret
.
append
(
os
.
path
.
join
(
self
.
base_dir
,
"kernels"
,
"include"
))
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
sources_files
(
self
):
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
fname
)
for
fname
in
[
'scaled_upper_triang_masked_softmax.cpp'
,
'scaled_upper_triang_masked_softmax_cuda.cu'
]
]
return
[
self
.
colossalai_src_path
(
path
)
for
path
in
ret
]
def
cxx_flags
(
self
):
return
[
'-O3'
]
def
nvcc_flags
(
self
):
extra_cuda_flags
=
[
'-U__CUDA_NO_HALF_OPERATORS__'
,
'-U__CUDA_NO_HALF_CONVERSIONS__'
,
'--expt-relaxed-constexpr'
,
'--expt-extended-lambda'
]
extra_cuda_flags
.
extend
(
get_cuda_cc_flag
())
ret
=
[
'-O3'
,
'--use_fast_math'
]
+
extra_cuda_flags
return
ret
colossalai/kernel/op_builder/utils.py
deleted
100644 → 0
View file @
a9b27b92
import
subprocess
def
get_cuda_bare_metal_version
(
cuda_dir
):
raw_output
=
subprocess
.
check_output
([
cuda_dir
+
"/bin/nvcc"
,
"-V"
],
universal_newlines
=
True
)
output
=
raw_output
.
split
()
release_idx
=
output
.
index
(
"release"
)
+
1
release
=
output
[
release_idx
].
split
(
"."
)
bare_metal_major
=
release
[
0
]
bare_metal_minor
=
release
[
1
][
0
]
return
raw_output
,
bare_metal_major
,
bare_metal_minor
def
append_nvcc_threads
(
nvcc_extra_args
):
from
torch.utils.cpp_extension
import
CUDA_HOME
_
,
bare_metal_major
,
bare_metal_minor
=
get_cuda_bare_metal_version
(
CUDA_HOME
)
if
int
(
bare_metal_major
)
>=
11
and
int
(
bare_metal_minor
)
>=
2
:
return
nvcc_extra_args
+
[
"--threads"
,
"4"
]
return
nvcc_extra_args
op_builder/builder.py
View file @
db6eea35
...
@@ -25,10 +25,12 @@ def get_cuda_cc_flag() -> List:
...
@@ -25,10 +25,12 @@ def get_cuda_cc_flag() -> List:
class
Builder
(
object
):
class
Builder
(
object
):
def
colossalai_src_path
(
self
,
code_path
):
def
colossalai_src_path
(
self
,
code_path
):
if
os
.
path
.
isabs
(
code_path
):
current_file_path
=
Path
(
__file__
)
return
code_path
if
os
.
path
.
islink
(
current_file_path
.
parent
):
# symbolic link
return
os
.
path
.
join
(
current_file_path
.
parent
.
parent
.
absolute
(),
code_path
)
else
:
else
:
return
os
.
path
.
join
(
Path
(
_
_file_
_
)
.
parent
.
parent
.
absolute
(),
code_path
)
return
os
.
path
.
join
(
current
_file_
path
.
parent
.
parent
.
absolute
(),
"colossalai"
,
"kernel"
,
code_path
)
def
get_cuda_home_include
(
self
):
def
get_cuda_home_include
(
self
):
"""
"""
...
...
op_builder/cpu_adam.py
View file @
db6eea35
...
@@ -6,7 +6,7 @@ from .utils import append_nvcc_threads
...
@@ -6,7 +6,7 @@ from .utils import append_nvcc_threads
class
CPUAdamBuilder
(
Builder
):
class
CPUAdamBuilder
(
Builder
):
NAME
=
"cpu_adam"
NAME
=
"cpu_adam"
BASE_DIR
=
"
colossalai/kernel/
cuda_native"
BASE_DIR
=
"cuda_native"
def
__init__
(
self
):
def
__init__
(
self
):
self
.
name
=
CPUAdamBuilder
.
NAME
self
.
name
=
CPUAdamBuilder
.
NAME
...
...
op_builder/fused_optim.py
View file @
db6eea35
...
@@ -5,7 +5,7 @@ from .builder import Builder, get_cuda_cc_flag
...
@@ -5,7 +5,7 @@ from .builder import Builder, get_cuda_cc_flag
class
FusedOptimBuilder
(
Builder
):
class
FusedOptimBuilder
(
Builder
):
NAME
=
'fused_optim'
NAME
=
'fused_optim'
BASE_DIR
=
"
colossalai/kernel/
cuda_native/csrc"
BASE_DIR
=
"cuda_native/csrc"
def
__init__
(
self
):
def
__init__
(
self
):
self
.
name
=
FusedOptimBuilder
.
NAME
self
.
name
=
FusedOptimBuilder
.
NAME
...
...
op_builder/moe.py
View file @
db6eea35
...
@@ -6,7 +6,7 @@ from .builder import Builder, get_cuda_cc_flag
...
@@ -6,7 +6,7 @@ from .builder import Builder, get_cuda_cc_flag
class
MOEBuilder
(
Builder
):
class
MOEBuilder
(
Builder
):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
base_dir
=
"
colossalai/kernel/
cuda_native/csrc"
self
.
base_dir
=
"cuda_native/csrc"
self
.
name
=
'moe'
self
.
name
=
'moe'
super
().
__init__
()
super
().
__init__
()
...
...
op_builder/multi_head_attn.py
View file @
db6eea35
...
@@ -6,7 +6,7 @@ from .builder import Builder, get_cuda_cc_flag
...
@@ -6,7 +6,7 @@ from .builder import Builder, get_cuda_cc_flag
class
MultiHeadAttnBuilder
(
Builder
):
class
MultiHeadAttnBuilder
(
Builder
):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
base_dir
=
"
colossalai/kernel/
cuda_native/csrc"
self
.
base_dir
=
"cuda_native/csrc"
self
.
name
=
'multihead_attention'
self
.
name
=
'multihead_attention'
super
().
__init__
()
super
().
__init__
()
...
...
op_builder/scaled_upper_triang_masked_softmax.py
View file @
db6eea35
...
@@ -6,7 +6,7 @@ from .builder import Builder, get_cuda_cc_flag
...
@@ -6,7 +6,7 @@ from .builder import Builder, get_cuda_cc_flag
class
ScaledSoftmaxBuilder
(
Builder
):
class
ScaledSoftmaxBuilder
(
Builder
):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
base_dir
=
"
colossalai/kernel/
cuda_native/csrc"
self
.
base_dir
=
"cuda_native/csrc"
self
.
name
=
'scaled_upper_triang_masked_softmax'
self
.
name
=
'scaled_upper_triang_masked_softmax'
super
().
__init__
()
super
().
__init__
()
...
...
tests/test_optimizer/test_cpu_adam.py
View file @
db6eea35
...
@@ -66,12 +66,7 @@ def test_cpu_adam(adamw, step, p_dtype, g_dtype):
...
@@ -66,12 +66,7 @@ def test_cpu_adam(adamw, step, p_dtype, g_dtype):
exp_avg_sq
=
torch
.
rand
(
p_data
.
shape
)
exp_avg_sq
=
torch
.
rand
(
p_data
.
shape
)
exp_avg_sq_copy
=
exp_avg_sq
.
clone
()
exp_avg_sq_copy
=
exp_avg_sq
.
clone
()
try
:
from
colossalai.kernel
import
cpu_optim
from
colossalai._C
import
cpu_optim
except
:
from
colossalai.kernel.op_builder
import
CPUAdamBuilder
cpu_optim
=
CPUAdamBuilder
().
load
()
print
(
"build CPUAdamOptimizer at runtime"
)
cpu_adam_op
=
cpu_optim
.
CPUAdamOptimizer
(
lr
,
beta1
,
beta2
,
eps
,
weight_decay
,
adamw
)
cpu_adam_op
=
cpu_optim
.
CPUAdamOptimizer
(
lr
,
beta1
,
beta2
,
eps
,
weight_decay
,
adamw
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment