Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
da3f0934
Commit
da3f0934
authored
Apr 23, 2023
by
zhuwenwen
Browse files
delete unused files
parent
c4dd1fd4
Changes
728
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
0 additions
and
276 deletions
+0
-276
model_zoo/moe/__init__.py
model_zoo/moe/__init__.py
+0
-0
model_zoo/moe/models.py
model_zoo/moe/models.py
+0
-147
model_zoo/moe/util.py
model_zoo/moe/util.py
+0
-41
model_zoo/vit/__init__.py
model_zoo/vit/__init__.py
+0
-1
model_zoo/vit/__pycache__/__init__.cpython-37.pyc
model_zoo/vit/__pycache__/__init__.cpython-37.pyc
+0
-0
model_zoo/vit/__pycache__/vision_transformer_from_config.cpython-37.pyc
...__pycache__/vision_transformer_from_config.cpython-37.pyc
+0
-0
model_zoo/vit/__pycache__/vit.cpython-37.pyc
model_zoo/vit/__pycache__/vit.cpython-37.pyc
+0
-0
model_zoo/vit/vision_transformer_from_config.py
model_zoo/vit/vision_transformer_from_config.py
+0
-87
No files found.
Too many changes to show.
To preserve performance only
728 of 728+
files are displayed.
Plain diff
Email patch
model_zoo/moe/__init__.py
deleted
100644 → 0
View file @
c4dd1fd4
model_zoo/moe/models.py
deleted
100644 → 0
View file @
c4dd1fd4
import
math
import
torch
import
torch.nn
as
nn
from
colossalai.context
import
ParallelMode
from
colossalai.nn.layer
import
VanillaPatchEmbedding
,
VanillaClassifier
,
\
WrappedDropout
as
Dropout
,
WrappedDropPath
as
DropPath
from
colossalai.nn.layer.moe
import
Experts
,
MoeLayer
,
Top2Router
,
NormalNoiseGenerator
from
.util
import
moe_sa_args
,
moe_mlp_args
from
..helper
import
TransformerLayer
from
colossalai.global_variables
import
moe_env
from
colossalai.utils
import
get_current_device
class
VanillaSelfAttention
(
nn
.
Module
):
"""Standard ViT self attention.
"""
def
__init__
(
self
,
d_model
:
int
,
n_heads
:
int
,
d_kv
:
int
,
attention_drop
:
float
=
0
,
drop_rate
:
float
=
0
,
bias
:
bool
=
True
,
dropout1
=
None
,
dropout2
=
None
):
super
().
__init__
()
self
.
n_heads
=
n_heads
self
.
d_kv
=
d_kv
self
.
scale
=
1.0
/
math
.
sqrt
(
self
.
d_kv
)
self
.
dense1
=
nn
.
Linear
(
d_model
,
3
*
n_heads
*
d_kv
,
bias
,
device
=
get_current_device
())
self
.
softmax
=
nn
.
Softmax
(
dim
=-
1
)
self
.
atten_drop
=
nn
.
Dropout
(
attention_drop
)
if
dropout1
is
None
else
dropout1
self
.
dense2
=
nn
.
Linear
(
n_heads
*
d_kv
,
d_model
,
device
=
get_current_device
())
self
.
dropout
=
nn
.
Dropout
(
drop_rate
)
if
dropout2
is
None
else
dropout2
def
forward
(
self
,
x
):
qkv
=
self
.
dense1
(
x
)
new_shape
=
qkv
.
shape
[:
2
]
+
(
3
,
self
.
n_heads
,
self
.
d_kv
)
qkv
=
qkv
.
view
(
*
new_shape
)
qkv
=
qkv
.
permute
(
2
,
0
,
3
,
1
,
4
)
q
,
k
,
v
=
qkv
[:]
x
=
torch
.
matmul
(
q
,
k
.
transpose
(
-
2
,
-
1
))
*
self
.
scale
x
=
self
.
atten_drop
(
self
.
softmax
(
x
))
x
=
torch
.
matmul
(
x
,
v
)
x
=
x
.
transpose
(
1
,
2
)
new_shape
=
x
.
shape
[:
2
]
+
(
self
.
n_heads
*
self
.
d_kv
,)
x
=
x
.
reshape
(
*
new_shape
)
x
=
self
.
dense2
(
x
)
x
=
self
.
dropout
(
x
)
return
x
class
VanillaFFN
(
nn
.
Module
):
"""FFN composed with two linear layers, also called MLP.
"""
def
__init__
(
self
,
d_model
:
int
,
d_ff
:
int
,
activation
=
None
,
drop_rate
:
float
=
0
,
bias
:
bool
=
True
,
dropout1
=
None
,
dropout2
=
None
):
super
().
__init__
()
dense1
=
nn
.
Linear
(
d_model
,
d_ff
,
bias
,
device
=
get_current_device
())
act
=
nn
.
GELU
()
if
activation
is
None
else
activation
dense2
=
nn
.
Linear
(
d_ff
,
d_model
,
bias
,
device
=
get_current_device
())
drop1
=
nn
.
Dropout
(
drop_rate
)
if
dropout1
is
None
else
dropout1
drop2
=
nn
.
Dropout
(
drop_rate
)
if
dropout2
is
None
else
dropout2
self
.
ffn
=
nn
.
Sequential
(
dense1
,
act
,
drop1
,
dense2
,
drop2
)
def
forward
(
self
,
x
):
return
self
.
ffn
(
x
)
class
Widenet
(
nn
.
Module
):
def
__init__
(
self
,
num_experts
:
int
,
capacity_factor
:
float
,
img_size
:
int
=
224
,
patch_size
:
int
=
16
,
in_chans
:
int
=
3
,
num_classes
:
int
=
1000
,
depth
:
int
=
12
,
d_model
:
int
=
768
,
num_heads
:
int
=
12
,
d_kv
:
int
=
64
,
d_ff
:
int
=
4096
,
attention_drop
:
float
=
0.
,
drop_rate
:
float
=
0.1
,
drop_path
:
float
=
0.
):
super
().
__init__
()
embedding
=
VanillaPatchEmbedding
(
img_size
=
img_size
,
patch_size
=
patch_size
,
in_chans
=
in_chans
,
embed_size
=
d_model
)
embed_dropout
=
Dropout
(
p
=
drop_rate
,
mode
=
ParallelMode
.
TENSOR
)
shared_sa
=
VanillaSelfAttention
(
**
moe_sa_args
(
d_model
=
d_model
,
n_heads
=
num_heads
,
d_kv
=
d_kv
,
attention_drop
=
attention_drop
,
drop_rate
=
drop_rate
))
noisy_func
=
NormalNoiseGenerator
(
num_experts
)
shared_router
=
Top2Router
(
capacity_factor
,
noisy_func
=
noisy_func
)
shared_experts
=
Experts
(
expert
=
VanillaFFN
,
num_experts
=
num_experts
,
**
moe_mlp_args
(
d_model
=
d_model
,
d_ff
=
d_ff
,
drop_rate
=
drop_rate
))
# stochastic depth decay rule
dpr
=
[
x
.
item
()
for
x
in
torch
.
linspace
(
0
,
drop_path
,
depth
)]
blocks
=
[
TransformerLayer
(
att
=
shared_sa
,
ffn
=
MoeLayer
(
dim_model
=
d_model
,
num_experts
=
num_experts
,
router
=
shared_router
,
experts
=
shared_experts
),
norm1
=
nn
.
LayerNorm
(
d_model
,
eps
=
1e-6
),
norm2
=
nn
.
LayerNorm
(
d_model
,
eps
=
1e-6
),
droppath
=
DropPath
(
p
=
dpr
[
i
],
mode
=
ParallelMode
.
TENSOR
)
)
for
i
in
range
(
depth
)
]
norm
=
nn
.
LayerNorm
(
d_model
,
eps
=
1e-6
)
self
.
linear
=
VanillaClassifier
(
in_features
=
d_model
,
num_classes
=
num_classes
)
nn
.
init
.
zeros_
(
self
.
linear
.
weight
)
nn
.
init
.
zeros_
(
self
.
linear
.
bias
)
self
.
widenet
=
nn
.
Sequential
(
embedding
,
embed_dropout
,
*
blocks
,
norm
)
def
forward
(
self
,
x
):
moe_env
.
reset_loss
()
x
=
self
.
widenet
(
x
)
x
=
torch
.
mean
(
x
,
dim
=
1
)
x
=
self
.
linear
(
x
)
return
x
model_zoo/moe/util.py
deleted
100644 → 0
View file @
c4dd1fd4
from
colossalai.context
import
ParallelMode
from
colossalai.nn.layer
import
WrappedDropout
as
Dropout
def
moe_sa_args
(
d_model
:
int
,
n_heads
:
int
,
d_kv
:
int
,
attention_drop
:
float
=
0
,
drop_rate
:
float
=
0
,
bias
:
bool
=
True
):
"""This is an example for args in moe self attention, since lots of modules should be
adapted before putting them in experts.
"""
dropout1
=
Dropout
(
attention_drop
,
mode
=
ParallelMode
.
TENSOR
)
dropout2
=
Dropout
(
drop_rate
,
mode
=
ParallelMode
.
TENSOR
)
return
dict
(
d_model
=
d_model
,
n_heads
=
n_heads
,
d_kv
=
d_kv
,
bias
=
bias
,
dropout1
=
dropout1
,
dropout2
=
dropout2
)
def
moe_mlp_args
(
d_model
:
int
,
d_ff
:
int
,
drop_rate
:
float
,
bias
:
bool
=
True
):
"""This is an example for args of MLP in Experts, since lots of modules should be adapted
before putting them in experts.
"""
dropout1
=
Dropout
(
drop_rate
,
mode
=
ParallelMode
.
TENSOR
)
dropout2
=
Dropout
(
drop_rate
,
mode
=
ParallelMode
.
TENSOR
)
return
dict
(
d_model
=
d_model
,
d_ff
=
d_ff
,
bias
=
bias
,
dropout1
=
dropout1
,
dropout2
=
dropout2
)
model_zoo/vit/__init__.py
deleted
100644 → 0
View file @
c4dd1fd4
from
.vit
import
*
\ No newline at end of file
model_zoo/vit/__pycache__/__init__.cpython-37.pyc
deleted
100644 → 0
View file @
c4dd1fd4
File deleted
model_zoo/vit/__pycache__/vision_transformer_from_config.cpython-37.pyc
deleted
100644 → 0
View file @
c4dd1fd4
File deleted
model_zoo/vit/__pycache__/vit.cpython-37.pyc
deleted
100644 → 0
View file @
c4dd1fd4
File deleted
model_zoo/vit/vision_transformer_from_config.py
deleted
100644 → 0
View file @
c4dd1fd4
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import
torch
from
colossalai.registry
import
MODELS
from
colossalai.nn.model.model_from_config
import
ModelFromConfig
@
MODELS
.
register_module
class
VisionTransformerFromConfig
(
ModelFromConfig
):
"""Vision Transformer from
`"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" <https://arxiv.org/pdf/2010.11929>`_.
"""
def
__init__
(
self
,
embedding_cfg
:
dict
,
norm_cfg
:
dict
,
block_cfg
:
dict
,
head_cfg
:
dict
,
token_fusion_cfg
:
dict
=
None
,
embed_dim
=
768
,
depth
=
12
,
drop_path_rate
=
0.
,
tensor_splitting_cfg
:
dict
=
None
):
super
().
__init__
()
self
.
embed_dim
=
embed_dim
self
.
num_tokens
=
1
self
.
tensor_splitting_cfg
=
tensor_splitting_cfg
dpr
=
[
x
.
item
()
for
x
in
torch
.
linspace
(
0
,
drop_path_rate
,
depth
)
]
# stochastic depth decay rule
if
token_fusion_cfg
is
None
:
token_fusion_cfg
=
[]
else
:
token_fusion_cfg
=
[
token_fusion_cfg
]
self
.
layers_cfg
=
[
embedding_cfg
,
# input tensor splitting
*
self
.
_generate_tensor_splitting_cfg
(),
*
token_fusion_cfg
,
# blocks
*
self
.
_generate_block_cfg
(
dpr
=
dpr
,
block_cfg
=
block_cfg
,
depth
=
depth
),
# norm
norm_cfg
,
# head
head_cfg
]
def
_fuse_tokens
(
self
,
x
):
cls_token
=
self
.
cls_token
.
expand
(
x
.
shape
[
0
],
-
1
,
-
1
)
x
=
torch
.
cat
((
cls_token
,
x
),
dim
=
1
)
return
x
def
_generate_block_cfg
(
self
,
dpr
,
depth
,
block_cfg
):
blocks_cfg
=
[]
for
i
in
range
(
depth
):
_cfg
=
block_cfg
.
copy
()
_cfg
[
'droppath_cfg'
][
'drop_path'
]
=
dpr
[
i
]
blocks_cfg
.
append
(
_cfg
)
return
blocks_cfg
def
_generate_tensor_splitting_cfg
(
self
):
if
self
.
tensor_splitting_cfg
:
return
[
self
.
tensor_splitting_cfg
]
else
:
return
[]
def
forward
(
self
,
x
):
# [512, 3, 32, 32]
for
layer
in
self
.
layers
:
if
isinstance
(
x
,
tuple
):
x
=
layer
(
*
x
)
else
:
x
=
layer
(
x
)
return
x
# [256, 5]
def
init_weights
(
self
):
# TODO: add init weights
pass
Prev
1
…
33
34
35
36
37
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment