Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
40b7d55b
Unverified
Commit
40b7d55b
authored
Dec 05, 2022
by
Jiarui Fang
Committed by
GitHub
Dec 05, 2022
Browse files
[Gemini] add albert in test models. (#2075)
parent
616ed91e
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
64 additions
and
3 deletions
+64
-3
tests/components_to_test/__init__.py
tests/components_to_test/__init__.py
+2
-0
tests/components_to_test/albert.py
tests/components_to_test/albert.py
+59
-0
tests/test_gemini/test_runtime_mem_tracer.py
tests/test_gemini/test_runtime_mem_tracer.py
+1
-1
tests/test_gemini/update/test_fwd_bwd.py
tests/test_gemini/update/test_fwd_bwd.py
+1
-1
tests/test_gemini/update/test_optim.py
tests/test_gemini/update/test_optim.py
+1
-1
No files found.
tests/components_to_test/__init__.py
View file @
40b7d55b
...
@@ -9,3 +9,5 @@ from . import (
...
@@ -9,3 +9,5 @@ from . import (
simple_net
,
simple_net
,
)
)
from
.utils
import
run_fwd_bwd
from
.utils
import
run_fwd_bwd
from
.
import
albert
# isort:skip
tests/components_to_test/albert.py
0 → 100644
View file @
40b7d55b
import
torch
import
transformers
from
packaging
import
version
from
transformers
import
AlbertConfig
,
AlbertForSequenceClassification
from
.bert
import
get_bert_data_loader
from
.registry
import
non_distributed_component_funcs
@
non_distributed_component_funcs
.
register
(
name
=
'albert'
)
def
get_training_components
():
hidden_dim
=
8
num_head
=
4
sequence_length
=
12
num_layer
=
2
vocab_size
=
32
def
bert_model_builder
(
checkpoint
:
bool
=
False
):
config
=
AlbertConfig
(
vocab_size
=
vocab_size
,
gradient_checkpointing
=
checkpoint
,
hidden_size
=
hidden_dim
,
intermediate_size
=
hidden_dim
*
4
,
num_attention_heads
=
num_head
,
max_position_embeddings
=
sequence_length
,
num_hidden_layers
=
num_layer
,
hidden_dropout_prob
=
0.
,
attention_probs_dropout_prob
=
0.
)
print
(
'building AlbertForSequenceClassification model'
)
# adapting huggingface BertForSequenceClassification for single unitest calling interface
class
ModelAaptor
(
AlbertForSequenceClassification
):
def
forward
(
self
,
input_ids
,
labels
):
"""
inputs: data, label
outputs: loss
"""
return
super
().
forward
(
input_ids
=
input_ids
,
labels
=
labels
)[
0
]
model
=
ModelAaptor
(
config
)
# if checkpoint and version.parse(transformers.__version__) >= version.parse("4.11.0"):
# model.gradient_checkpointing_enable()
return
model
is_distrbuted
=
torch
.
distributed
.
is_initialized
()
trainloader
=
get_bert_data_loader
(
n_class
=
vocab_size
,
batch_size
=
2
,
total_samples
=
10000
,
sequence_length
=
sequence_length
,
is_distrbuted
=
is_distrbuted
)
testloader
=
get_bert_data_loader
(
n_class
=
vocab_size
,
batch_size
=
2
,
total_samples
=
10000
,
sequence_length
=
sequence_length
,
is_distrbuted
=
is_distrbuted
)
criterion
=
None
return
bert_model_builder
,
trainloader
,
testloader
,
torch
.
optim
.
Adam
,
criterion
tests/test_gemini/test_runtime_mem_tracer.py
View file @
40b7d55b
...
@@ -22,7 +22,7 @@ def run_fwd_bwd(model, data, label, criterion, enable_autocast=False, dtype=torc
...
@@ -22,7 +22,7 @@ def run_fwd_bwd(model, data, label, criterion, enable_autocast=False, dtype=torc
def
test_runtime_mem_tracer
():
def
test_runtime_mem_tracer
():
test_models
=
[
'gpt2'
,
'bert'
,
'simple_net'
,
'repeated_computed_layers'
,
'nested_model'
]
test_models
=
[
'gpt2'
,
'bert'
,
'simple_net'
,
'repeated_computed_layers'
,
'nested_model'
,
'albert'
]
for
model_name
in
test_models
:
for
model_name
in
test_models
:
get_components_func
=
non_distributed_component_funcs
.
get_callable
(
model_name
)
get_components_func
=
non_distributed_component_funcs
.
get_callable
(
model_name
)
...
...
tests/test_gemini/update/test_fwd_bwd.py
View file @
40b7d55b
...
@@ -36,7 +36,7 @@ def check_grad(model: ZeroDDP, torch_model: torch.nn.Module):
...
@@ -36,7 +36,7 @@ def check_grad(model: ZeroDDP, torch_model: torch.nn.Module):
@
parameterize
(
'placement_policy'
,
[
'cuda'
,
'cpu'
,
'auto'
,
'const'
])
@
parameterize
(
'placement_policy'
,
[
'cuda'
,
'cpu'
,
'auto'
,
'const'
])
@
parameterize
(
'keep_gather'
,
[
False
,
True
])
@
parameterize
(
'keep_gather'
,
[
False
,
True
])
@
parameterize
(
'model_name'
,
[
'gpt2'
,
'bert'
])
@
parameterize
(
'model_name'
,
[
'gpt2'
,
'bert'
,
'albert'
])
@
parameterize
(
'use_grad_checkpoint'
,
[
False
,
True
])
@
parameterize
(
'use_grad_checkpoint'
,
[
False
,
True
])
def
exam_gpt_fwd_bwd
(
placement_policy
,
keep_gather
,
model_name
:
str
,
use_grad_checkpoint
:
bool
=
False
):
def
exam_gpt_fwd_bwd
(
placement_policy
,
keep_gather
,
model_name
:
str
,
use_grad_checkpoint
:
bool
=
False
):
set_seed
(
42
)
set_seed
(
42
)
...
...
tests/test_gemini/update/test_optim.py
View file @
40b7d55b
...
@@ -27,7 +27,7 @@ from tests.test_tensor.common_utils import debug_print, set_seed
...
@@ -27,7 +27,7 @@ from tests.test_tensor.common_utils import debug_print, set_seed
# this model is large enough to slice to chunks
# this model is large enough to slice to chunks
TEST_MODELS
=
[
'gpt2'
]
TEST_MODELS
=
[
'gpt2'
]
# these models are too small, all parameters in these models are compacted into one chunk
# these models are too small, all parameters in these models are compacted into one chunk
EXAMPLE_MODELS
=
[
'hanging_param_model'
,
'bert'
,
'simple_net'
,
'nested_model'
,
'repeated_computed_layers'
]
EXAMPLE_MODELS
=
[
'albert'
,
'hanging_param_model'
,
'bert'
,
'simple_net'
,
'nested_model'
,
'repeated_computed_layers'
]
def
check_param
(
model
:
ZeroDDP
,
torch_model
:
torch
.
nn
.
Module
):
def
check_param
(
model
:
ZeroDDP
,
torch_model
:
torch
.
nn
.
Module
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment