Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
71321a07
Unverified
Commit
71321a07
authored
Jan 30, 2024
by
digger yu
Committed by
GitHub
Jan 30, 2024
Browse files
fix typo change dosen't to doesn't (#5308)
parent
6a3086a5
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
14 additions
and
14 deletions
+14
-14
colossalai/shardformer/policies/falcon.py
colossalai/shardformer/policies/falcon.py
+1
-1
colossalai/shardformer/policies/llama.py
colossalai/shardformer/policies/llama.py
+1
-1
colossalai/shardformer/policies/mistral.py
colossalai/shardformer/policies/mistral.py
+4
-4
colossalai/shardformer/policies/opt.py
colossalai/shardformer/policies/opt.py
+1
-1
colossalai/shardformer/policies/t5.py
colossalai/shardformer/policies/t5.py
+2
-2
colossalai/shardformer/policies/vit.py
colossalai/shardformer/policies/vit.py
+1
-1
colossalai/shardformer/policies/whisper.py
colossalai/shardformer/policies/whisper.py
+3
-3
examples/language/openmoe/model/openmoe_policy.py
examples/language/openmoe/model/openmoe_policy.py
+1
-1
No files found.
colossalai/shardformer/policies/falcon.py
View file @
71321a07
...
...
@@ -49,7 +49,7 @@ class FalconPolicy(Policy):
if
not
self
.
model
.
config
.
new_decoder_architecture
and
self
.
model
.
config
.
multi_query
:
warnings
.
warn
(
"Falcon do
s
en't support tensor parallelism when (not new_decoder_architecture and multi_query) is True, will ignore the tensor parallelism flag."
"Falcon doe
s
n't support tensor parallelism when (not new_decoder_architecture and multi_query) is True, will ignore the tensor parallelism flag."
)
self
.
shard_config
.
enable_tensor_parallelism
=
False
...
...
colossalai/shardformer/policies/llama.py
View file @
71321a07
...
...
@@ -46,7 +46,7 @@ class LlamaPolicy(Policy):
if
self
.
shard_config
.
enable_sequence_parallelism
:
self
.
shard_config
.
enable_sequence_parallelism
=
False
warnings
.
warn
(
"Llama do
s
en't support sequence parallelism now, will ignore the sequence parallelism flag."
)
warnings
.
warn
(
"Llama doe
s
n't support sequence parallelism now, will ignore the sequence parallelism flag."
)
if
self
.
shard_config
.
enable_tensor_parallelism
:
decoder_attribute_replacement
=
{
...
...
colossalai/shardformer/policies/mistral.py
View file @
71321a07
...
...
@@ -35,7 +35,7 @@ class MistralPolicy(Policy):
if
self
.
shard_config
.
enable_sequence_parallelism
:
self
.
shard_config
.
enable_sequence_parallelism
=
False
warnings
.
warn
(
"Mistral do
s
en't support sequence parallelism now, will ignore the sequence parallelism flag."
"Mistral doe
s
n't support sequence parallelism now, will ignore the sequence parallelism flag."
)
if
self
.
shard_config
.
enable_tensor_parallelism
:
...
...
@@ -136,7 +136,7 @@ class MistralModelPolicy(MistralPolicy):
def
module_policy
(
self
):
if
self
.
pipeline_stage_manager
:
warnings
.
warn
(
"Mistral do
s
en't support pipeline parallelism now."
)
warnings
.
warn
(
"Mistral doe
s
n't support pipeline parallelism now."
)
return
super
().
module_policy
()
...
...
@@ -160,7 +160,7 @@ class MistralForCausalLMPolicy(MistralPolicy):
}
if
self
.
pipeline_stage_manager
:
warnings
.
warn
(
"Mistral do
s
en't support pipeline parallelism now."
)
warnings
.
warn
(
"Mistral doe
s
n't support pipeline parallelism now."
)
policy
.
update
(
new_item
)
...
...
@@ -186,7 +186,7 @@ class MistralForSequenceClassificationPolicy(MistralPolicy):
}
if
self
.
pipeline_stage_manager
:
warnings
.
warn
(
"Mistral do
s
en't support pipeline parallelism now."
)
warnings
.
warn
(
"Mistral doe
s
n't support pipeline parallelism now."
)
policy
.
update
(
new_item
)
return
policy
colossalai/shardformer/policies/opt.py
View file @
71321a07
...
...
@@ -59,7 +59,7 @@ class OPTPolicy(Policy):
if
self
.
shard_config
.
enable_sequence_parallelism
:
self
.
shard_config
.
enable_sequence_parallelism
=
False
warnings
.
warn
(
"OPT do
s
en't support sequence parallelism now, will ignore the sequence parallelism flag."
)
warnings
.
warn
(
"OPT doe
s
n't support sequence parallelism now, will ignore the sequence parallelism flag."
)
if
self
.
shard_config
.
enable_tensor_parallelism
:
policy
[
OPTDecoder
]
=
ModulePolicyDescription
(
...
...
colossalai/shardformer/policies/t5.py
View file @
71321a07
...
...
@@ -66,7 +66,7 @@ class T5BasePolicy(Policy):
if
self
.
shard_config
.
enable_sequence_parallelism
:
self
.
shard_config
.
enable_sequence_parallelism
=
False
warnings
.
warn
(
"T5 do
s
en't support sequence parallelism now, will ignore the sequence parallelism flag."
)
warnings
.
warn
(
"T5 doe
s
n't support sequence parallelism now, will ignore the sequence parallelism flag."
)
if
self
.
shard_config
.
enable_tensor_parallelism
:
policy
[
T5Stack
]
=
ModulePolicyDescription
(
...
...
@@ -263,7 +263,7 @@ class T5BasePolicy(Policy):
if
num_decoder_layers
==
0
:
return
Policy
.
distribute_layers
(
num_encoder_layers
,
num_stages
),
num_stages
# the number of stages distributed between encoder and decoder is optmized in this way:
# the number of stages distributed between encoder and decoder is opt
i
mized in this way:
# num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages))
# s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1
def
objective
(
num_encoder_stages
):
...
...
colossalai/shardformer/policies/vit.py
View file @
71321a07
...
...
@@ -33,7 +33,7 @@ class ViTPolicy(Policy):
if
self
.
shard_config
.
enable_sequence_parallelism
:
self
.
shard_config
.
enable_sequence_parallelism
=
False
warnings
.
warn
(
"Vit do
s
en't support sequence parallelism now, will ignore the sequence parallelism flag."
)
warnings
.
warn
(
"Vit doe
s
n't support sequence parallelism now, will ignore the sequence parallelism flag."
)
if
self
.
shard_config
.
enable_tensor_parallelism
:
policy
[
ViTEmbeddings
]
=
ModulePolicyDescription
(
...
...
colossalai/shardformer/policies/whisper.py
View file @
71321a07
...
...
@@ -69,13 +69,13 @@ class WhisperPolicy(Policy):
if
self
.
shard_config
.
enable_sequence_parallelism
:
self
.
shard_config
.
enable_sequence_parallelism
=
False
warnings
.
warn
(
"Whisper do
s
en't support sequence parallelism now, will ignore the sequence parallelism flag."
"Whisper doe
s
n't support sequence parallelism now, will ignore the sequence parallelism flag."
)
# TODO using the jit fused add_and_dropout affect the accuracy
if
self
.
shard_config
.
enable_jit_fused
:
self
.
shard_config
.
enable_jit_fused
=
False
warnings
.
warn
(
"Whisper do
s
en't support jit fused operator now, will ignore the jit fused operator flag."
)
warnings
.
warn
(
"Whisper doe
s
n't support jit fused operator now, will ignore the jit fused operator flag."
)
if
self
.
shard_config
.
enable_tensor_parallelism
:
policy
[
WhisperEncoderLayer
]
=
ModulePolicyDescription
(
...
...
@@ -302,7 +302,7 @@ class WhisperPolicy(Policy):
if
num_decoder_layers
==
0
:
return
Policy
.
distribute_layers
(
num_encoder_layers
,
num_stages
),
num_stages
# the number of stages distributed between encoder and decoder is optmized in this way:
# the number of stages distributed between encoder and decoder is opt
i
mized in this way:
# num_encoder_stages = argmin(abs(num_encoder_layers / encoder_stages - num_decoder_layers / decoder_stages))
# s.t. num_encoder_stages + num_decoder_stages = num_stages, num_encoder_stages >= 1, num_decoder_stages >= 1
def
objective
(
num_encoder_stages
):
...
...
examples/language/openmoe/model/openmoe_policy.py
View file @
71321a07
...
...
@@ -43,7 +43,7 @@ class OpenMoePolicy(Policy):
if
self
.
shard_config
.
enable_sequence_parallelism
:
self
.
shard_config
.
enable_sequence_parallelism
=
False
raise
NotImplementedError
(
"openmoe do
s
en't support sequence parallelism now, will ignore the sequence parallelism flag."
)
"openmoe doe
s
n't support sequence parallelism now, will ignore the sequence parallelism flag."
)
if
self
.
shard_config
.
enable_tensor_parallelism
:
raise
NotImplementedError
(
"Tensor parallelism is not supported for openmoe model now."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment