Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
2ac24040
"...git@developer.sourcefind.cn:OpenDAS/colossalai.git" did not exist on "2a951955ade14fd067bc5bee34a5ff7e57513ac6"
Unverified
Commit
2ac24040
authored
Jul 04, 2023
by
digger yu
Committed by
GitHub
Jul 04, 2023
Browse files
fix some typo colossalai/shardformer (#4160)
parent
c77b3b19
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
9 additions
and
9 deletions
+9
-9
colossalai/shardformer/README.md
colossalai/shardformer/README.md
+1
-1
colossalai/shardformer/layer/loss.py
colossalai/shardformer/layer/loss.py
+6
-6
colossalai/shardformer/policies/basepolicy.py
colossalai/shardformer/policies/basepolicy.py
+1
-1
colossalai/shardformer/shard/sharder.py
colossalai/shardformer/shard/sharder.py
+1
-1
No files found.
colossalai/shardformer/README.md
View file @
2ac24040
...
@@ -252,7 +252,7 @@ class ModelSharder:
...
@@ -252,7 +252,7 @@ class ModelSharder:
def
shard
(
self
)
->
None
:
def
shard
(
self
)
->
None
:
"""
"""
Shard model with parallel
el
ism with the help of pre-processing, replace_model_class, replace_module, and post-processing.
Shard model with parallelism with the help of pre-processing, replace_model_class, replace_module, and post-processing.
"""
"""
...
...
...
...
colossalai/shardformer/layer/loss.py
View file @
2ac24040
...
@@ -48,13 +48,13 @@ class DistCrossEntropy(Function):
...
@@ -48,13 +48,13 @@ class DistCrossEntropy(Function):
# [down, up) => false, other device and -100 => true
# [down, up) => false, other device and -100 => true
delta
=
(
global_vocab_size
+
world_size
-
1
)
//
world_size
delta
=
(
global_vocab_size
+
world_size
-
1
)
//
world_size
down_
s
hreshold
=
rank
*
delta
down_
t
hreshold
=
rank
*
delta
up_
s
hreshold
=
down_
s
hreshold
+
delta
up_
t
hreshold
=
down_
t
hreshold
+
delta
mask
=
(
target
<
down_
s
hreshold
)
|
(
target
>=
up_
s
hreshold
)
mask
=
(
target
<
down_
t
hreshold
)
|
(
target
>=
up_
t
hreshold
)
masked_target
=
target
.
clone
()
-
down_
s
hreshold
masked_target
=
target
.
clone
()
-
down_
t
hreshold
masked_target
[
mask
]
=
0
masked_target
[
mask
]
=
0
# reshape the logi
s
t and target
# reshape the logit
s
and target
# reshape the vocab_logits to [bath_size * seq_len, vocab_size]
# reshape the vocab_logits to [bath_size * seq_len, vocab_size]
# reshape the labels to [bath_size * seq_len]
# reshape the labels to [bath_size * seq_len]
logits_2d
=
vocab_logits
.
view
(
-
1
,
partition_vocab_size
)
logits_2d
=
vocab_logits
.
view
(
-
1
,
partition_vocab_size
)
...
@@ -79,7 +79,7 @@ class DistCrossEntropy(Function):
...
@@ -79,7 +79,7 @@ class DistCrossEntropy(Function):
loss
=
torch
.
where
(
target
==
ignore_index
,
0.0
,
torch
.
log
(
sum_exp_logits
)
-
pred_logits
)
loss
=
torch
.
where
(
target
==
ignore_index
,
0.0
,
torch
.
log
(
sum_exp_logits
)
-
pred_logits
)
loss
=
torch
.
sum
(
loss
).
div_
(
torch
.
sum
(
loss
!=
0.0
))
loss
=
torch
.
sum
(
loss
).
div_
(
torch
.
sum
(
loss
!=
0.0
))
# caculate the softmax
# ca
l
culate the softmax
exp_logits
.
div_
(
sum_exp_logits
.
unsqueeze
(
dim
=-
1
))
exp_logits
.
div_
(
sum_exp_logits
.
unsqueeze
(
dim
=-
1
))
ctx
.
save_for_backward
(
exp_logits
,
mask
,
masked_target_1d
)
ctx
.
save_for_backward
(
exp_logits
,
mask
,
masked_target_1d
)
...
...
colossalai/shardformer/policies/basepolicy.py
View file @
2ac24040
...
@@ -66,7 +66,7 @@ class Policy(ABC):
...
@@ -66,7 +66,7 @@ class Policy(ABC):
like BertPolicy for Bert Model or OPTPolicy for OPT model.
like BertPolicy for Bert Model or OPTPolicy for OPT model.
Shardformer has provided many built-in sharding policies for the mainstream models. You can use the
Shardformer has provided many built-in sharding policies for the mainstream models. You can use the
built-in policies by setting `policy = None`, which is already the default argu
e
mnt for `Shardformer.optimize`.
built-in policies by setting `policy = None`, which is already the default argum
e
nt for `Shardformer.optimize`.
If you want to define your own policy, you can inherit from this class and overwrite the methods you want to modify.
If you want to define your own policy, you can inherit from this class and overwrite the methods you want to modify.
"""
"""
...
...
colossalai/shardformer/shard/sharder.py
View file @
2ac24040
...
@@ -73,7 +73,7 @@ class ModelSharder(object):
...
@@ -73,7 +73,7 @@ class ModelSharder(object):
layer (torch.nn.Module): The object of layer to shard
layer (torch.nn.Module): The object of layer to shard
origin_cls (Union[str, torch.nn.Module]): The origin layer class or a string of layer class name.
origin_cls (Union[str, torch.nn.Module]): The origin layer class or a string of layer class name.
attr_replacement (Dict): The attribute dict to modify
attr_replacement (Dict): The attribute dict to modify
param_replacement (List[Callable]): The function list to get parameter shard information in polic
param_replacement (List[Callable]): The function list to get parameter shard information in polic
y
sub_module_replacement (List[Callable]): The function list to get sub module shard information in policy
sub_module_replacement (List[Callable]): The function list to get sub module shard information in policy
"""
"""
if
(
isinstance
(
origin_cls
,
str
)
and
origin_cls
==
module
.
__class__
.
__name__
)
or
\
if
(
isinstance
(
origin_cls
,
str
)
and
origin_cls
==
module
.
__class__
.
__name__
)
or
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment