Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
fae6c92e
Unverified
Commit
fae6c92e
authored
Sep 05, 2023
by
Hongxin Liu
Committed by
GitHub
Sep 05, 2023
Browse files
Merge branch 'main' into feature/shardformer
parents
bd186784
ac178ca5
Changes
113
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
23 additions
and
120 deletions
+23
-120
examples/tutorial/sequence_parallel/train.py
examples/tutorial/sequence_parallel/train.py
+1
-1
pytest.ini
pytest.ini
+1
-1
tests/test_booster/test_plugin/test_gemini_plugin.py
tests/test_booster/test_plugin/test_gemini_plugin.py
+1
-1
tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py
...s/test_checkpoint_io/test_low_level_zero_checkpoint_io.py
+12
-0
tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py
...e_tensor_parallel/test_cifar_with_data_pipeline_tensor.py
+0
-100
tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
tests/test_legacy/test_trainer/test_pipeline/test_p2p.py
+0
-0
tests/test_legacy/test_trainer/test_pipeline/test_pipeline_schedule.py
...gacy/test_trainer/test_pipeline/test_pipeline_schedule.py
+0
-0
tests/test_legacy/test_trainer/test_trainer_with_non_pipe_schedule.py
...egacy/test_trainer/test_trainer_with_non_pipe_schedule.py
+1
-1
tests/test_legacy/test_trainer/test_trainer_with_pipe_schedule.py
...st_legacy/test_trainer/test_trainer_with_pipe_schedule.py
+1
-1
tests/test_moe/test_grad_handler.py
tests/test_moe/test_grad_handler.py
+1
-1
tests/test_moe/test_moe_zero_model.py
tests/test_moe/test_moe_zero_model.py
+1
-1
tests/test_moe/test_moe_zero_optim.py
tests/test_moe/test_moe_zero_optim.py
+1
-1
tests/test_pipeline/test_cuda_rpc_performance.py
tests/test_pipeline/test_cuda_rpc_performance.py
+3
-12
No files found.
examples/tutorial/sequence_parallel/train.py
View file @
fae6c92e
...
@@ -11,8 +11,8 @@ import colossalai
...
@@ -11,8 +11,8 @@ import colossalai
from
colossalai.amp
import
AMP_TYPE
from
colossalai.amp
import
AMP_TYPE
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.engine.schedule
import
PipelineSchedule
from
colossalai.kernel
import
LayerNorm
from
colossalai.kernel
import
LayerNorm
from
colossalai.legacy.engine.schedule
import
PipelineSchedule
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.nn.optimizer
import
FusedAdam
from
colossalai.nn.optimizer
import
FusedAdam
from
colossalai.utils
import
MultiTimer
,
is_using_pp
from
colossalai.utils
import
MultiTimer
,
is_using_pp
...
...
pytest.ini
View file @
fae6c92e
...
@@ -2,4 +2,4 @@
...
@@ -2,4 +2,4 @@
markers
=
markers
=
dist:
tests
which
are
run
in
a
multi-GPU
or
multi-machine
environment
(at
least
4
GPUs)
dist:
tests
which
are
run
in
a
multi-GPU
or
multi-machine
environment
(at
least
4
GPUs)
largedist:
tests
which
are
run
in
a
multi-GPU
or
multi-machine
environment
(at
least
8
GPUs)
largedist:
tests
which
are
run
in
a
multi-GPU
or
multi-machine
environment
(at
least
8
GPUs)
addopts
=
--ignore=tests/test_analyzer --ignore=tests/test_auto_parallel --ignore=tests/test_autochunk --ignore=tests/test_moe --ignore=tests/test_fx
addopts
=
--ignore=tests/test_analyzer --ignore=tests/test_auto_parallel --ignore=tests/test_autochunk --ignore=tests/test_moe --ignore=tests/test_fx --ignore=tests/test_legacy
\ No newline at end of file
tests/test_booster/test_plugin/test_gemini_plugin.py
View file @
fae6c92e
...
@@ -98,7 +98,7 @@ def check_gemini_plugin(subset: str, init_method: str = 'none', early_stop: bool
...
@@ -98,7 +98,7 @@ def check_gemini_plugin(subset: str, init_method: str = 'none', early_stop: bool
]:
]:
continue
continue
err
=
run_fn
(
init_method
,
model_fn
,
data_gen_fn
,
output_transform_fn
)
err
=
run_fn
(
init_method
,
model_fn
,
data_gen_fn
,
output_transform_fn
)
torch
.
cuda
.
empty_cache
()
if
err
is
None
:
if
err
is
None
:
passed_models
.
append
(
name
)
passed_models
.
append
(
name
)
else
:
else
:
...
...
tests/test_checkpoint_io/test_low_level_zero_checkpoint_io.py
View file @
fae6c92e
...
@@ -14,6 +14,7 @@ from colossalai.testing import (
...
@@ -14,6 +14,7 @@ from colossalai.testing import (
rerun_if_address_is_in_use
,
rerun_if_address_is_in_use
,
spawn
,
spawn
,
)
)
from
colossalai.zero
import
LowLevelZeroOptimizer
# stage 1 and 2 process the optimizer/mode the same way
# stage 1 and 2 process the optimizer/mode the same way
...
@@ -50,6 +51,17 @@ def check_low_level_zero_checkpointIO(stage: int, shard: bool, offload: bool):
...
@@ -50,6 +51,17 @@ def check_low_level_zero_checkpointIO(stage: int, shard: bool, offload: bool):
booster
.
load_model
(
new_model
,
model_ckpt_path
)
booster
.
load_model
(
new_model
,
model_ckpt_path
)
check_state_dict_equal
(
model
.
state_dict
(),
new_model
.
state_dict
(),
False
)
check_state_dict_equal
(
model
.
state_dict
(),
new_model
.
state_dict
(),
False
)
# check master weight
assert
isinstance
(
new_optimizer
,
LowLevelZeroOptimizer
)
working_param_id_set
=
set
(
id
(
p
)
for
p
in
new_model
.
parameters
())
for
p_id
,
master_param
in
new_optimizer
.
_param_store
.
working_to_master_param
.
items
():
assert
p_id
in
working_param_id_set
working_param
=
new_optimizer
.
_param_store
.
master_to_working_param
[
id
(
master_param
)]
padding
=
new_optimizer
.
_param_store
.
get_param_padding_size
(
working_param
)
padded_param
=
torch
.
nn
.
functional
.
pad
(
working_param
.
data
.
view
(
-
1
),
(
0
,
padding
))
working_shard
=
padded_param
.
chunk
(
dist
.
get_world_size
())[
dist
.
get_rank
()]
assert
torch
.
equal
(
working_shard
,
master_param
.
data
.
view
(
-
1
).
to
(
dtype
=
padded_param
.
dtype
,
device
=
padded_param
.
device
))
booster
.
load_optimizer
(
new_optimizer
,
optimizer_ckpt_path
)
booster
.
load_optimizer
(
new_optimizer
,
optimizer_ckpt_path
)
check_state_dict_equal
(
optimizer
.
optim
.
state_dict
(),
new_optimizer
.
optim
.
state_dict
(),
False
)
check_state_dict_equal
(
optimizer
.
optim
.
state_dict
(),
new_optimizer
.
optim
.
state_dict
(),
False
)
...
...
tests/test_data_pipeline_tensor_parallel/test_cifar_with_data_pipeline_tensor.py
deleted
100644 → 0
View file @
bd186784
import
os
from
pathlib
import
Path
import
pytest
import
torch
from
torchvision
import
transforms
from
torchvision.datasets
import
CIFAR10
import
colossalai
from
colossalai.amp
import
AMP_TYPE
from
colossalai.context
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.logging
import
get_dist_logger
from
colossalai.nn
import
CrossEntropyLoss
from
colossalai.nn.lr_scheduler
import
CosineAnnealingWarmupLR
from
colossalai.pipeline.pipelinable
import
PipelinableContext
from
colossalai.testing
import
rerun_if_address_is_in_use
,
skip_if_not_enough_gpus
,
spawn
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.utils
import
get_dataloader
BATCH_SIZE
=
4
NUM_EPOCHS
=
60
WARMUP_EPOCHS
=
5
CONFIG
=
dict
(
NUM_MICRO_BATCHES
=
2
,
parallel
=
dict
(
pipeline
=
2
,
tensor
=
dict
(
size
=
2
,
mode
=
'1d'
)),
fp16
=
dict
(
mode
=
AMP_TYPE
.
NAIVE
),
gradient_accumulation
=
2
)
def
run_trainer
(
rank
,
world_size
,
port
):
colossalai
.
launch
(
config
=
CONFIG
,
rank
=
rank
,
world_size
=
world_size
,
host
=
'localhost'
,
port
=
port
,
backend
=
'nccl'
)
logger
=
get_dist_logger
()
# get logger
logger
=
get_dist_logger
()
pipelinable
=
PipelinableContext
()
try
:
from
titans.model.vit
import
vit_tiny_patch4_32
except
ImportError
:
logger
.
warning
(
'skip the test_cifar_with_data_pipeline_tensor test because titan is not installed'
)
logger
.
warning
(
'please install titan from https://github.com/hpcaitech/Titans'
)
return
with
pipelinable
:
model
=
vit_tiny_patch4_32
()
pipelinable
.
to_layer_list
()
pipelinable
.
policy
=
"uniform"
model
=
pipelinable
.
partition
(
1
,
gpc
.
pipeline_parallel_size
,
gpc
.
get_local_rank
(
ParallelMode
.
PIPELINE
))
# create dataloaders
root
=
Path
(
os
.
environ
[
'DATA'
])
transform_train
=
transforms
.
Compose
([
transforms
.
RandomCrop
(
32
,
padding
=
4
,
pad_if_needed
=
True
),
transforms
.
AutoAugment
(
policy
=
transforms
.
AutoAugmentPolicy
.
CIFAR10
),
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
)),
])
train_dataset
=
CIFAR10
(
root
=
root
,
train
=
True
,
download
=
True
,
transform
=
transform_train
)
train_dataloader
=
get_dataloader
(
dataset
=
train_dataset
,
shuffle
=
True
,
batch_size
=
BATCH_SIZE
,
pin_memory
=
True
)
# create loss function
criterion
=
CrossEntropyLoss
(
label_smoothing
=
0.1
)
# create optimizer
optimizer
=
torch
.
optim
.
AdamW
(
model
.
parameters
(),
lr
=
0.001
,
weight_decay
=
0
)
# create lr scheduler
lr_scheduler
=
CosineAnnealingWarmupLR
(
optimizer
=
optimizer
,
total_steps
=
NUM_EPOCHS
,
warmup_steps
=
WARMUP_EPOCHS
)
# initialize
engine
,
train_dataloader
,
*
_
=
colossalai
.
initialize
(
model
=
model
,
optimizer
=
optimizer
,
criterion
=
criterion
,
train_dataloader
=
train_dataloader
)
logger
=
get_dist_logger
()
trainer
=
Trainer
(
engine
=
engine
,
logger
=
logger
)
hook_list
=
[
hooks
.
LRSchedulerHook
(
lr_scheduler
=
lr_scheduler
,
by_epoch
=
False
),
]
trainer
.
fit
(
train_dataloader
=
train_dataloader
,
epochs
=
NUM_EPOCHS
,
max_steps
=
2
,
hooks
=
hook_list
,
display_progress
=
True
)
@
pytest
.
mark
.
dist
@
skip_if_not_enough_gpus
(
min_gpus
=
8
)
@
rerun_if_address_is_in_use
()
def
test_hybrid_parallel
():
spawn
(
run_trainer
,
8
)
if
__name__
==
'__main__'
:
test_hybrid_parallel
()
tests/test_trainer/test_pipeline/test_p2p.py
→
tests/test_
legacy/test_
trainer/test_pipeline/test_p2p.py
View file @
fae6c92e
File moved
tests/test_trainer/test_pipeline/test_pipeline_schedule.py
→
tests/test_
legacy/test_
trainer/test_pipeline/test_pipeline_schedule.py
View file @
fae6c92e
File moved
tests/test_trainer/test_trainer_with_non_pipe_schedule.py
→
tests/test_
legacy/test_
trainer/test_trainer_with_non_pipe_schedule.py
View file @
fae6c92e
...
@@ -3,9 +3,9 @@ import torch
...
@@ -3,9 +3,9 @@ import torch
import
colossalai
import
colossalai
from
colossalai.amp.amp_type
import
AMP_TYPE
from
colossalai.amp.amp_type
import
AMP_TYPE
from
colossalai.legacy.trainer
import
Trainer
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.testing
import
parameterize
,
rerun_if_address_is_in_use
,
spawn
from
colossalai.testing
import
parameterize
,
rerun_if_address_is_in_use
,
spawn
from
colossalai.trainer
import
Trainer
from
colossalai.utils
import
MultiTimer
from
colossalai.utils
import
MultiTimer
from
tests.components_to_test.registry
import
non_distributed_component_funcs
from
tests.components_to_test.registry
import
non_distributed_component_funcs
...
...
tests/test_trainer/test_trainer_with_pipe_schedule.py
→
tests/test_
legacy/test_
trainer/test_trainer_with_pipe_schedule.py
View file @
fae6c92e
...
@@ -12,9 +12,9 @@ from torchvision.models import resnet18
...
@@ -12,9 +12,9 @@ from torchvision.models import resnet18
import
colossalai
import
colossalai
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.core
import
global_context
as
gpc
from
colossalai.legacy.trainer
import
Trainer
from
colossalai.logging
import
get_dist_logger
from
colossalai.logging
import
get_dist_logger
from
colossalai.testing
import
rerun_if_address_is_in_use
,
spawn
from
colossalai.testing
import
rerun_if_address_is_in_use
,
spawn
from
colossalai.trainer
import
Trainer
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
colossalai.utils
import
MultiTimer
,
get_dataloader
BATCH_SIZE
=
4
BATCH_SIZE
=
4
...
...
tests/test_moe/test_grad_handler.py
View file @
fae6c92e
...
@@ -5,7 +5,7 @@ import torch.nn as nn
...
@@ -5,7 +5,7 @@ import torch.nn as nn
import
colossalai
import
colossalai
from
colossalai.context.moe_context
import
MOE_CONTEXT
from
colossalai.context.moe_context
import
MOE_CONTEXT
from
colossalai.engine.gradient_handler
import
MoeGradientHandler
from
colossalai.
legacy.
engine.gradient_handler
import
MoeGradientHandler
from
colossalai.nn.layer.moe
import
Experts
,
MoeLayer
,
Top1Router
,
UniformNoiseGenerator
from
colossalai.nn.layer.moe
import
Experts
,
MoeLayer
,
Top1Router
,
UniformNoiseGenerator
from
colossalai.testing
import
assert_equal_in_group
,
rerun_if_address_is_in_use
,
spawn
from
colossalai.testing
import
assert_equal_in_group
,
rerun_if_address_is_in_use
,
spawn
from
colossalai.utils
import
get_current_device
from
colossalai.utils
import
get_current_device
...
...
tests/test_moe/test_moe_zero_model.py
View file @
fae6c92e
...
@@ -3,7 +3,7 @@ import torch
...
@@ -3,7 +3,7 @@ import torch
import
colossalai
import
colossalai
from
colossalai.context
import
MOE_CONTEXT
from
colossalai.context
import
MOE_CONTEXT
from
colossalai.engine.gradient_handler
import
MoeGradientHandler
from
colossalai.
legacy.
engine.gradient_handler
import
MoeGradientHandler
from
colossalai.nn
import
MoeLoss
from
colossalai.nn
import
MoeLoss
from
colossalai.testing
import
assert_equal_in_group
,
parameterize
,
rerun_if_address_is_in_use
,
spawn
from
colossalai.testing
import
assert_equal_in_group
,
parameterize
,
rerun_if_address_is_in_use
,
spawn
from
colossalai.zero.legacy.init_ctx
import
ZeroInitContext
from
colossalai.zero.legacy.init_ctx
import
ZeroInitContext
...
...
tests/test_moe/test_moe_zero_optim.py
View file @
fae6c92e
...
@@ -4,7 +4,7 @@ import torch
...
@@ -4,7 +4,7 @@ import torch
import
colossalai
import
colossalai
from
colossalai.amp
import
convert_to_apex_amp
from
colossalai.amp
import
convert_to_apex_amp
from
colossalai.context
import
MOE_CONTEXT
from
colossalai.context
import
MOE_CONTEXT
from
colossalai.engine.gradient_handler
import
MoeGradientHandler
from
colossalai.
legacy.
engine.gradient_handler
import
MoeGradientHandler
from
colossalai.nn
import
MoeLoss
from
colossalai.nn
import
MoeLoss
from
colossalai.nn.optimizer
import
CPUAdam
from
colossalai.nn.optimizer
import
CPUAdam
from
colossalai.testing
import
assert_equal_in_group
,
parameterize
,
rerun_if_address_is_in_use
,
spawn
from
colossalai.testing
import
assert_equal_in_group
,
parameterize
,
rerun_if_address_is_in_use
,
spawn
...
...
tests/test_pipeline/test_cuda_rpc_performance.py
View file @
fae6c92e
import
os
import
os
from
typing
import
Callable
,
List
,
Optional
,
Type
,
Union
import
time
import
time
import
pytest
import
pytest
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
rpc_test_utils
import
parse_args
,
rpc_run
from
titans.dataloader.cifar10
import
build_cifar
from
titans.dataloader.cifar10
import
build_cifar
from
torchvision.models
import
resnet50
from
torchvision.models
import
resnet50
from
torchvision.models.resnet
import
BasicBlock
,
Bottleneck
,
conv1x1
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
rpc_test_utils
import
rpc_run
,
parse_args
from
colossalai.pipeline.pipelinable
import
PipelinableContext
import
colossalai
from
colossalai.pipeline.rpc
import
OneFOneBPipelineEngine
import
colossalai.nn
as
col_nn
from
colossalai.logging
import
disable_existing_loggers
,
get_dist_logger
from
colossalai.trainer
import
Trainer
,
hooks
from
colossalai.utils
import
MultiTimer
,
get_dataloader
from
colossalai.context
import
ParallelMode
from
colossalai.pipeline.pipelinable
import
PipelinableContext
,
PipelinableModel
from
colossalai.pipeline.rpc
import
OneFOneBPipelineEngine
,
ChimeraPipelineEngine
from
colossalai.pipeline.pipeline_process_group
import
ppg
def
flatten
(
x
):
def
flatten
(
x
):
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment