Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
355ffb38
Unverified
Commit
355ffb38
authored
Dec 23, 2022
by
Jiarui Fang
Committed by
GitHub
Dec 23, 2022
Browse files
[builder] unified cpu_optim fused_optim inferface (#2190)
parent
9587b080
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
28 additions
and
50 deletions
+28
-50
colossalai/amp/naive_amp/_fp16_optimizer.py
colossalai/amp/naive_amp/_fp16_optimizer.py
+1
-8
colossalai/kernel/__init__.py
colossalai/kernel/__init__.py
+14
-2
colossalai/nn/optimizer/fused_adam.py
colossalai/nn/optimizer/fused_adam.py
+2
-5
colossalai/nn/optimizer/fused_lamb.py
colossalai/nn/optimizer/fused_lamb.py
+1
-5
colossalai/nn/optimizer/fused_sgd.py
colossalai/nn/optimizer/fused_sgd.py
+2
-5
colossalai/nn/optimizer/hybrid_adam.py
colossalai/nn/optimizer/hybrid_adam.py
+1
-6
colossalai/utils/common.py
colossalai/utils/common.py
+4
-11
colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
+1
-1
tests/test_optimizer/test_fused_adam_kernel.py
tests/test_optimizer/test_fused_adam_kernel.py
+2
-7
No files found.
colossalai/amp/naive_amp/_fp16_optimizer.py
View file @
355ffb38
...
...
@@ -3,19 +3,12 @@
import
torch
import
torch.distributed
as
dist
try
:
from
colossalai._C
import
fused_optim
except
:
print
(
'Colossalai should be built with cuda extension to use the FP16 optimizer'
)
from
colossalai.kernel.op_builder.fused_optim
import
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
from
torch.distributed
import
ProcessGroup
from
torch.optim
import
Optimizer
from
colossalai.context
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.kernel
import
fused_optim
from
colossalai.logging
import
get_dist_logger
from
colossalai.utils
import
clip_grad_norm_fp32
,
copy_tensor_parallel_attributes
,
multi_tensor_applier
...
...
colossalai/kernel/__init__.py
View file @
355ffb38
from
.cuda_native
import
LayerNorm
,
FusedScaleMaskSoftmax
,
MultiHeadAttention
from
.cuda_native
import
FusedScaleMaskSoftmax
,
LayerNorm
,
MultiHeadAttention
__all__
=
[
"LayerNorm"
,
"FusedScaleMaskSoftmax"
,
"MultiHeadAttention"
]
try
:
from
colossalai._C
import
fused_optim
except
:
from
colossalai.kernel.op_builder.fused_optim
import
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
try
:
from
colossalai._C
import
cpu_optim
except
ImportError
:
from
colossalai.kernel.op_builder
import
CPUAdamBuilder
cpu_optim
=
CPUAdamBuilder
().
load
()
__all__
=
[
"fused_optim"
,
"cpu_optim"
,
"LayerNorm"
,
"FusedScaleMaskSoftmax"
,
"MultiHeadAttention"
]
colossalai/nn/optimizer/fused_adam.py
View file @
355ffb38
...
...
@@ -65,11 +65,8 @@ class FusedAdam(torch.optim.Optimizer):
self
.
adamw_mode
=
1
if
adamw_mode
else
0
self
.
set_grad_none
=
set_grad_none
if
multi_tensor_applier
.
available
:
try
:
from
colossalai._C
import
fused_optim
except
:
from
colossalai.kernel.op_builder.fused_optim
import
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
from
colossalai.kernel
import
fused_optim
# Skip buffer
self
.
_dummy_overflow_buf
=
torch
.
cuda
.
IntTensor
([
0
])
self
.
multi_tensor_adam
=
fused_optim
.
multi_tensor_adam
...
...
colossalai/nn/optimizer/fused_lamb.py
View file @
355ffb38
...
...
@@ -76,11 +76,7 @@ class FusedLAMB(torch.optim.Optimizer):
max_grad_norm
=
max_grad_norm
)
super
(
FusedLAMB
,
self
).
__init__
(
params
,
defaults
)
if
multi_tensor_applier
.
available
:
try
:
from
colossalai._C
import
fused_optim
except
:
from
colossalai.kernel.op_builder.fused_optim
import
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
from
colossalai.kernel
import
fused_optim
self
.
multi_tensor_l2norm
=
fused_optim
.
multi_tensor_l2norm
# Skip buffer
...
...
colossalai/nn/optimizer/fused_sgd.py
View file @
355ffb38
...
...
@@ -80,11 +80,8 @@ class FusedSGD(Optimizer):
self
.
wd_after_momentum
=
wd_after_momentum
if
multi_tensor_applier
.
available
:
try
:
from
colossalai._C
import
fused_optim
except
:
from
colossalai.kernel.op_builder
import
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
from
colossalai.kernel
import
fused_optim
# Skip buffer
self
.
_dummy_overflow_buf
=
torch
.
tensor
([
0
],
dtype
=
torch
.
int
,
...
...
colossalai/nn/optimizer/hybrid_adam.py
View file @
355ffb38
...
...
@@ -76,13 +76,8 @@ class HybridAdam(NVMeOptimizer):
default_args
=
dict
(
lr
=
lr
,
betas
=
betas
,
eps
=
eps
,
weight_decay
=
weight_decay
,
bias_correction
=
bias_correction
)
super
(
HybridAdam
,
self
).
__init__
(
model_params
,
default_args
,
nvme_offload_fraction
,
nvme_offload_dir
)
self
.
adamw_mode
=
adamw_mode
try
:
from
colossalai._C
import
cpu_optim
,
fused_optim
except
ImportError
:
from
colossalai.kernel.op_builder
import
CPUAdamBuilder
,
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
cpu_optim
=
CPUAdamBuilder
().
load
()
from
colossalai.kernel
import
cpu_optim
,
fused_optim
self
.
cpu_adam_op
=
cpu_optim
.
CPUAdamOptimizer
(
lr
,
betas
[
0
],
betas
[
1
],
eps
,
weight_decay
,
adamw_mode
)
self
.
gpu_adam_op
=
fused_optim
.
multi_tensor_adam
...
...
colossalai/utils/common.py
View file @
355ffb38
...
...
@@ -4,28 +4,21 @@ import functools
import
os
import
random
import
socket
from
collections
import
defaultdict
from
contextlib
import
contextmanager
from
pathlib
import
Path
from
typing
import
Callable
,
Dict
,
List
,
Optional
,
Union
import
torch
import
torch.distributed
as
dist
from
torch._six
import
inf
from
torch.nn.parameter
import
Parameter
try
:
from
colossalai._C
import
fused_optim
except
:
from
colossalai.kernel.op_builder
import
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
from
collections
import
defaultdict
from
contextlib
import
contextmanager
import
torch.distributed
as
dist
from
colossalai.constants
import
IS_TENSOR_PARALLEL
,
NUM_PARTITIONS
,
TENSOR_PARALLEL_ATTRIBUTES
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.global_variables
import
tensor_parallel_env
as
env
from
colossalai.kernel
import
fused_optim
from
colossalai.tensor
import
ColoParameter
,
ProcessGroup
from
.multi_tensor_apply
import
multi_tensor_applier
...
...
colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
View file @
355ffb38
...
...
@@ -14,7 +14,7 @@ class MultiTensorApply(object):
def
__init__
(
self
,
chunk_size
):
try
:
import
colossalai.
_C.
fused_optim
from
colossalai.
kernel
import
fused_optim
MultiTensorApply
.
available
=
True
self
.
chunk_size
=
chunk_size
except
ImportError
as
err
:
...
...
tests/test_optimizer/test_fused_adam_kernel.py
View file @
355ffb38
...
...
@@ -46,13 +46,8 @@ def torch_adam_update(
@
parameterize
(
'p_dtype'
,
[
torch
.
float
,
torch
.
half
])
@
parameterize
(
'g_dtype'
,
[
torch
.
float
,
torch
.
half
])
def
test_adam
(
adamw
,
step
,
p_dtype
,
g_dtype
):
try
:
import
colossalai._C.fused_optim
fused_adam
=
colossalai
.
_C
.
fused_optim
.
multi_tensor_adam
except
:
from
colossalai.kernel.op_builder
import
FusedOptimBuilder
fused_optim
=
FusedOptimBuilder
().
load
()
fused_adam
=
fused_optim
.
multi_tensor_adam
from
colossalai.kernel
import
fused_optim
fused_adam
=
fused_optim
.
multi_tensor_adam
dummy_overflow_buf
=
torch
.
cuda
.
IntTensor
([
0
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment