Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
653b0a62
Commit
653b0a62
authored
Nov 09, 2022
by
zbian
Committed by
アマデウス
Nov 09, 2022
Browse files
added skip_bias_add for non-tp linear
parent
e5b1a0c9
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
493 additions
and
440 deletions
+493
-440
colossalai/nn/layer/colossalai_layer/linear.py
colossalai/nn/layer/colossalai_layer/linear.py
+141
-147
colossalai/nn/layer/vanilla/__init__.py
colossalai/nn/layer/vanilla/__init__.py
+11
-3
colossalai/nn/layer/vanilla/layers.py
colossalai/nn/layer/vanilla/layers.py
+341
-290
No files found.
colossalai/nn/layer/colossalai_layer/linear.py
View file @
653b0a62
import
math
import
inspect
import
inspect
import
math
from
typing
import
Callable
from
typing
import
Callable
from
colossalai.utils
import
get_current_device
from
torch
import
dtype
,
nn
from
torch
import
dtype
,
nn
from
colossalai.utils
import
get_current_device
from
...
import
init
as
init
from
..parallel_1d
import
*
from
...
import
init
as
init
from
..parallel_2d
import
*
from
..parallel_1d
import
*
from
..parallel_2p5d
import
*
from
..parallel_2d
import
*
from
..parallel_3d
import
*
from
..parallel_2p5d
import
*
from
..utils
import
get_tensor_parallel_mode
from
..parallel_3d
import
*
from
..vanilla
import
*
from
..utils
import
get_tensor_parallel_mode
from
._utils
import
ColossalaiModule
from
..vanilla
import
*
from
._utils
import
ColossalaiModule
_parallel_linear
=
{
'1d'
:
Linear1D
,
'2d'
:
Linear2D
,
'2.5d'
:
Linear2p5D
,
'3d'
:
Linear3D
}
_parallel_linear
=
{
None
:
VanillaLinear
,
'1d'
:
Linear1D
,
'2d'
:
Linear2D
,
'2.5d'
:
Linear2p5D
,
'3d'
:
Linear3D
}
_parallel_classifier
=
{
None
:
VanillaClassifier
,
_parallel_classifier
=
{
'1d'
:
Classifier1D
,
None
:
VanillaClassifier
,
'2d'
:
Classifier2D
,
'1d'
:
Classifier1D
,
'2.5d'
:
Classifier2p5D
,
'2d'
:
Classifier2D
,
'3d'
:
Classifier3D
'2.5d'
:
Classifier2p5D
,
}
'3d'
:
Classifier3D
}
_vocab_parallel_classifier
=
{
'1d'
:
VocabParallelClassifier1D
,
_vocab_parallel_classifier
=
{
'2d'
:
VocabParallelClassifier2D
,
'1d'
:
VocabParallelClassifier1D
,
'2.5d'
:
VocabParallelClassifier2p5D
,
'2d'
:
VocabParallelClassifier2D
,
'3d'
:
VocabParallelClassifier3D
'2.5d'
:
VocabParallelClassifier2p5D
,
}
'3d'
:
VocabParallelClassifier3D
}
class
Linear
(
ColossalaiModule
):
"""Linear layer of colossalai.
class
Linear
(
ColossalaiModule
):
"""Linear layer of colossalai.
Args:
in_features (int): size of each input sample.
Args:
out_features (int): size of each output sample.
in_features (int): size of each input sample.
bias (bool, optional): If set to ``False``, the layer will not learn an additive bias, defaults to ``True``.
out_features (int): size of each output sample.
dtype (:class:`torch.dtype`, optional): The dtype of parameters, defaults to None.
bias (bool, optional): If set to ``False``, the layer will not learn an additive bias, defaults to ``True``.
weight_initializer (:class:`typing.Callable`, optional):
dtype (:class:`torch.dtype`, optional): The dtype of parameters, defaults to None.
The initializer of weight, defaults to kaiming uniform initializer.
weight_initializer (:class:`typing.Callable`, optional):
bias_initializer (:class:`typing.Callable`, optional):
The initializer of weight, defaults to kaiming uniform initializer.
The initializer of bias, defaults to xavier uniform initializer.
bias_initializer (:class:`typing.Callable`, optional):
The initializer of bias, defaults to xavier uniform initializer.
Note: ``kwargs`` would contain different parameters when you use different parallelisms.
Note: ``kwargs`` would contain different parameters when you use different parallelisms.
The ``kwargs`` should contain parameters below:
::
The ``kwargs`` should contain parameters below:
::
Linear1D:
gather_output: bool (optional, default to be false)
Linear1D:
skip_bias_add: bool (optional, default to be false)
gather_output: bool (optional, default to be false)
Linear2D:
skip_bias_add: bool (optional, default to be false)
skip_bias_add: bool (optional, default to be false)
Linear2D:
Linear2p5D:
skip_bias_add: bool (optional, default to be false)
skip_bias_add: bool (optional, default to be false)
Linear2p5D:
Linear3D:
skip_bias_add: bool (optional, default to be false)
None
Linear3D:
None
More details about ``initializer`` please refer to
`init <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/nn/init.py>`_.
More details about ``initializer`` please refer to
"""
`init <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/nn/init.py>`_.
"""
def
__init__
(
self
,
in_features
:
int
,
def
__init__
(
self
,
out_features
:
int
,
in_features
:
int
,
bias
:
bool
=
True
,
out_features
:
int
,
dtype
:
dtype
=
None
,
bias
:
bool
=
True
,
weight_initializer
:
Callable
=
init
.
kaiming_uniform_
(
a
=
math
.
sqrt
(
5
)),
dtype
:
dtype
=
None
,
bias_initializer
:
Callable
=
init
.
xavier_uniform_
(
a
=
1
,
scale
=
1
),
weight_initializer
:
Callable
=
init
.
kaiming_uniform_
(
a
=
math
.
sqrt
(
5
)),
**
kwargs
)
->
None
:
bias_initializer
:
Callable
=
init
.
xavier_uniform_
(
a
=
1
,
scale
=
1
),
tensor_parallel
=
get_tensor_parallel_mode
()
**
kwargs
)
->
None
:
if
tensor_parallel
is
None
:
tensor_parallel
=
get_tensor_parallel_mode
()
layer
=
nn
.
Linear
(
in_features
,
out_features
,
bias
=
bias
).
to
(
dtype
).
to
(
get_current_device
())
linear_cls
=
_parallel_linear
[
tensor_parallel
]
weight_initializer
(
layer
.
weight
,
fan_in
=
in_features
,
fan_out
=
out_features
)
gather_output
=
kwargs
.
pop
(
'gather_output'
,
None
)
if
layer
.
bias
is
not
None
:
if
'gather_output'
in
inspect
.
signature
(
linear_cls
.
__init__
).
parameters
.
keys
():
# gather_out arg is available
bias_initializer
(
layer
.
bias
,
fan_in
=
in_features
)
kwargs
[
'gather_output'
]
=
gather_output
else
:
layer
=
linear_cls
(
linear_cls
=
_parallel_linear
[
tensor_parallel
]
in_features
,
gather_output
=
kwargs
.
pop
(
'gather_output'
,
None
)
out_features
,
if
'gather_output'
in
inspect
.
signature
(
bias
=
bias
,
linear_cls
.
__init__
).
parameters
.
keys
():
# gather_out arg is available
dtype
=
dtype
,
kwargs
[
'gather_output'
]
=
gather_output
weight_initializer
=
weight_initializer
,
layer
=
linear_cls
(
bias_initializer
=
bias_initializer
,
in_features
,
**
kwargs
,
out_features
,
)
bias
=
bias
,
super
().
__init__
(
layer
)
dtype
=
dtype
,
weight_initializer
=
weight_initializer
,
bias_initializer
=
bias_initializer
,
class
Classifier
(
ColossalaiModule
):
**
kwargs
,
"""Classifier layer of colossalai.
)
super
().
__init__
(
layer
)
Args:
in_features (int): size of each input sample.
num_classes (int): number of classes.
class
Classifier
(
ColossalaiModule
):
weight (:class:`torch.nn.Parameter`, optional): weight of the classifier, defaults to None.
"""Classifier layer of colossalai.
bias (bool, optional): If set to ``False``, the layer will not learn an additive bias, defaults to ``True``.
dtype (:class:`torch.dtype`, optional): The dtype of parameters, defaults to None.
Args:
weight_initializer (:class:`typing.Callable`, optional):
in_features (int): size of each input sample.
The initializer of weight, defaults to kaiming uniform initializer.
num_classes (int): number of classes.
bias_initializer (:class:`typing.Callable`, optional):
weight (:class:`torch.nn.Parameter`, optional): weight of the classifier, defaults to None.
The initializer of bias, defaults to xavier uniform initializer.
bias (bool, optional): If set to ``False``, the layer will not learn an additive bias, defaults to ``True``.
dtype (:class:`torch.dtype`, optional): The dtype of parameters, defaults to None.
More details about ``initializer`` please refer to
weight_initializer (:class:`typing.Callable`, optional):
`init <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/nn/init.py>`_.
The initializer of weight, defaults to kaiming uniform initializer.
"""
bias_initializer (:class:`typing.Callable`, optional):
The initializer of bias, defaults to xavier uniform initializer.
def
__init__
(
self
,
in_features
:
int
,
More details about ``initializer`` please refer to
num_classes
:
int
,
`init <https://github.com/hpcaitech/ColossalAI/blob/main/colossalai/nn/init.py>`_.
weight
:
nn
.
Parameter
=
None
,
"""
bias
:
bool
=
True
,
dtype
:
dtype
=
None
,
def
__init__
(
self
,
weight_initializer
:
Callable
=
init
.
kaiming_uniform_
(
a
=
math
.
sqrt
(
5
)),
in_features
:
int
,
bias_initializer
:
Callable
=
init
.
xavier_uniform_
(
a
=
1
,
scale
=
1
),
num_classes
:
int
,
vocab_parallel_limit
:
int
=
2048
)
->
None
:
weight
:
nn
.
Parameter
=
None
,
tensor_parallel
=
get_tensor_parallel_mode
()
bias
:
bool
=
True
,
if
num_classes
<=
vocab_parallel_limit
or
tensor_parallel
is
None
:
dtype
:
dtype
=
None
,
layer
=
_parallel_classifier
[
tensor_parallel
](
weight_initializer
:
Callable
=
init
.
kaiming_uniform_
(
a
=
math
.
sqrt
(
5
)),
in_features
,
bias_initializer
:
Callable
=
init
.
xavier_uniform_
(
a
=
1
,
scale
=
1
),
num_classes
,
vocab_parallel_limit
:
int
=
2048
)
->
None
:
weight
=
weight
,
tensor_parallel
=
get_tensor_parallel_mode
()
bias
=
bias
,
if
num_classes
<=
vocab_parallel_limit
or
tensor_parallel
is
None
:
dtype
=
dtype
,
layer
=
_parallel_classifier
[
tensor_parallel
](
weight_initializer
=
weight_initializer
,
in_features
,
bias_initializer
=
bias_initializer
,
num_classes
,
)
weight
=
weight
,
else
:
bias
=
bias
,
layer
=
_vocab_parallel_classifier
[
tensor_parallel
](
dtype
=
dtype
,
in_features
,
weight_initializer
=
weight_initializer
,
num_classes
,
bias_initializer
=
bias_initializer
,
weight
=
weight
,
)
bias
=
bias
,
else
:
dtype
=
dtype
,
layer
=
_vocab_parallel_classifier
[
tensor_parallel
](
weight_initializer
=
weight_initializer
,
in_features
,
bias_initializer
=
bias_initializer
,
num_classes
,
)
weight
=
weight
,
super
().
__init__
(
layer
)
bias
=
bias
,
dtype
=
dtype
,
weight_initializer
=
weight_initializer
,
bias_initializer
=
bias_initializer
,
)
super
().
__init__
(
layer
)
colossalai/nn/layer/vanilla/__init__.py
View file @
653b0a62
from
.layers
import
(
DropPath
,
VanillaClassifier
,
VanillaLayerNorm
,
VanillaPatchEmbedding
,
WrappedDropout
,
from
.layers
import
(
WrappedDropPath
)
DropPath
,
VanillaClassifier
,
VanillaLayerNorm
,
VanillaLinear
,
VanillaPatchEmbedding
,
WrappedDropout
,
WrappedDropPath
,
)
__all__
=
[
__all__
=
[
"VanillaLayerNorm"
,
"VanillaPatchEmbedding"
,
"VanillaClassifier"
,
"DropPath"
,
"WrappedDropout"
,
"WrappedDropPath"
"VanillaLayerNorm"
,
"VanillaPatchEmbedding"
,
"VanillaClassifier"
,
"DropPath"
,
"WrappedDropout"
,
"WrappedDropPath"
,
"VanillaLinear"
]
]
colossalai/nn/layer/vanilla/layers.py
View file @
653b0a62
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment