Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
76757921
Unverified
Commit
76757921
authored
Dec 28, 2022
by
Jiarui Fang
Committed by
GitHub
Dec 28, 2022
Browse files
[builder] raise Error when CUDA_HOME is not set (#2213)
parent
78a89d9b
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
19 additions
and
14 deletions
+19
-14
colossalai/kernel/op_builder/builder.py
colossalai/kernel/op_builder/builder.py
+7
-0
colossalai/kernel/op_builder/cpu_adam.py
colossalai/kernel/op_builder/cpu_adam.py
+1
-3
colossalai/kernel/op_builder/fused_optim.py
colossalai/kernel/op_builder/fused_optim.py
+1
-4
colossalai/kernel/op_builder/multi_head_attn.py
colossalai/kernel/op_builder/multi_head_attn.py
+1
-3
examples/language/gpt/README.md
examples/language/gpt/README.md
+5
-0
examples/language/gpt/run.sh
examples/language/gpt/run.sh
+3
-3
examples/language/gpt/train_gpt_demo.py
examples/language/gpt/train_gpt_demo.py
+1
-1
No files found.
colossalai/kernel/op_builder/builder.py
View file @
76757921
...
...
@@ -30,6 +30,13 @@ class Builder(object):
else
:
return
os
.
path
.
join
(
Path
(
__file__
).
parent
.
parent
.
absolute
(),
code_path
)
def
get_cuda_include
(
self
):
from
torch.utils.cpp_extension
import
CUDA_HOME
if
CUDA_HOME
is
None
:
raise
RuntimeError
(
"CUDA_HOME is None, please set CUDA_HOME to compile C++/CUDA kernels in ColossalAI."
)
cuda_include
=
os
.
path
.
join
(
CUDA_HOME
,
"include"
)
return
cuda_include
def
strip_empty_entries
(
self
,
args
):
'''
Drop any empty strings from the list of compile and link flags
...
...
colossalai/kernel/op_builder/cpu_adam.py
View file @
76757921
...
...
@@ -27,9 +27,7 @@ class CPUAdamBuilder(Builder):
]
def
include_paths
(
self
):
from
torch.utils.cpp_extension
import
CUDA_HOME
cuda_include
=
os
.
path
.
join
(
CUDA_HOME
,
"include"
)
return
[
os
.
path
.
join
(
CPUAdamBuilder
.
BASE_DIR
,
"includes"
),
cuda_include
]
return
[
os
.
path
.
join
(
CPUAdamBuilder
.
BASE_DIR
,
"includes"
),
self
.
get_cuda_include
()]
def
strip_empty_entries
(
self
,
args
):
'''
...
...
colossalai/kernel/op_builder/fused_optim.py
View file @
76757921
...
...
@@ -31,10 +31,7 @@ class FusedOptimBuilder(Builder):
]
def
include_paths
(
self
):
import
torch
from
torch.utils.cpp_extension
import
CUDA_HOME
cuda_include
=
os
.
path
.
join
(
CUDA_HOME
,
"include"
)
return
[
os
.
path
.
join
(
FusedOptimBuilder
.
BASE_DIR
,
"includes"
),
cuda_include
]
return
[
os
.
path
.
join
(
FusedOptimBuilder
.
BASE_DIR
,
"includes"
),
self
.
get_cuda_include
()]
def
builder
(
self
,
name
):
from
torch.utils.cpp_extension
import
CUDAExtension
...
...
colossalai/kernel/op_builder/multi_head_attn.py
View file @
76757921
...
...
@@ -31,10 +31,8 @@ class MultiHeadAttnBuilder(Builder):
]
def
include_paths
(
self
):
from
torch.utils.cpp_extension
import
CUDA_HOME
ret
=
[]
cuda_include
=
os
.
path
.
join
(
CUDA_HOME
,
"include"
)
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
"includes"
),
cuda_include
]
ret
=
[
os
.
path
.
join
(
self
.
base_dir
,
"includes"
),
self
.
get_cuda_include
()]
ret
.
append
(
os
.
path
.
join
(
self
.
base_dir
,
"kernels"
,
"include"
))
print
(
"include_paths"
,
ret
)
return
ret
...
...
examples/language/gpt/README.md
View file @
76757921
...
...
@@ -106,3 +106,8 @@ Touch the bar of model scale and batch size.
| gpt2_20b | 8 | auto | 2 | 16 | 99.871 |
| gpt2_20b | 8 | cpu | 2 | 64 | 125.170 |
| gpt2_20b | 8 | const | 2 | 32 | 105.415 |
| model | #GPU | policy | TP | batch per DP | Tflops |
| ---------- | --------- |--------- |--------- |--------- |--------- |
| gpt2_20b | 8 | cpu | 2 | 8 | 46.895 |
examples/language/gpt/run.sh
View file @
76757921
...
...
@@ -2,12 +2,12 @@
export
DISTPAN
=
"colossalai"
# The following options only valid when DISTPAN="colossalai"
export
TPDEGREE
=
2
export
TPDEGREE
=
4
export
GPUNUM
=
8
export
PLACEMENT
=
'cpu'
export
USE_SHARD_INIT
=
False
export
BATCH_SIZE
=
64
export
MODEL_TYPE
=
"gpt2_2
0
b"
export
BATCH_SIZE
=
32
#
export MODEL_TYPE="gpt2_2
4
b"
mkdir
-p
logs
env
OMP_NUM_THREADS
=
16 torchrun
--standalone
--nproc_per_node
=
${
GPUNUM
}
train_gpt_demo.py
--tp_degree
=
${
TPDEGREE
}
--model_type
=
${
MODEL_TYPE
}
--batch_size
=
${
BATCH_SIZE
}
--placement
${
PLACEMENT
}
--shardinit
${
USE_SHARD_INIT
}
--distplan
${
DISTPAN
}
2>&1 |
tee
./logs/
${
MODEL_TYPE
}
_
${
DISTPAN
}
_gpu_
${
GPUNUM
}
_bs_
${
BATCH_SIZE
}
_tp_
${
TPDEGREE
}
.log
examples/language/gpt/train_gpt_demo.py
View file @
76757921
...
...
@@ -218,7 +218,7 @@ def main():
model
=
gemini_zero_dpp
(
model
,
pg
,
args
.
placement
)
# build highly optimized cpu optimizer
optimizer
=
GeminiAdamOptimizer
(
model
,
lr
=
1e-3
,
initial_scale
=
2
**
5
)
optimizer
=
GeminiAdamOptimizer
(
model
,
lr
=
1e-3
,
initial_scale
=
2
**
5
,
gpu_margin_mem_ratio
=
0.6
)
logger
.
info
(
get_mem_info
(
prefix
=
'After init optim, '
),
ranks
=
[
0
])
else
:
model
=
model_builder
(
args
.
model_type
)(
checkpoint
=
True
).
cuda
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment