Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
cd9c28e0
Unverified
Commit
cd9c28e0
authored
Dec 16, 2021
by
Frank Lee
Committed by
GitHub
Dec 16, 2021
Browse files
added CI for unit testing (#69)
parent
45355a62
Changes
68
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
593 additions
and
488 deletions
+593
-488
tests/test_trainer/configs/test_trainer_vit_2d.py
tests/test_trainer/configs/test_trainer_vit_2d.py
+0
-133
tests/test_trainer/test.sh
tests/test_trainer/test.sh
+0
-4
tests/test_trainer/test_pipeline/debug_schedule.py
tests/test_trainer/test_pipeline/debug_schedule.py
+0
-232
tests/test_trainer/test_pipeline/model/__init__.py
tests/test_trainer/test_pipeline/model/__init__.py
+2
-0
tests/test_trainer/test_pipeline/model/layers/__init__.py
tests/test_trainer/test_pipeline/model/layers/__init__.py
+3
-0
tests/test_trainer/test_pipeline/model/layers/basic_block.py
tests/test_trainer/test_pipeline/model/layers/basic_block.py
+64
-0
tests/test_trainer/test_pipeline/model/layers/bottleneck.py
tests/test_trainer/test_pipeline/model/layers/bottleneck.py
+69
-0
tests/test_trainer/test_pipeline/model/layers/conv.py
tests/test_trainer/test_pipeline/model/layers/conv.py
+15
-0
tests/test_trainer/test_pipeline/model/layers/reslayer.py
tests/test_trainer/test_pipeline/model/layers/reslayer.py
+63
-0
tests/test_trainer/test_pipeline/model/resnet.py
tests/test_trainer/test_pipeline/model/resnet.py
+163
-0
tests/test_trainer/test_pipeline/resnet_config.py
tests/test_trainer/test_pipeline/resnet_config.py
+19
-0
tests/test_trainer/test_pipeline/test_p2p.py
tests/test_trainer/test_pipeline/test_p2p.py
+28
-15
tests/test_trainer/test_pipeline/test_partition.py
tests/test_trainer/test_pipeline/test_partition.py
+24
-13
tests/test_trainer/test_pipeline/test_pipeline_schedule.py
tests/test_trainer/test_pipeline/test_pipeline_schedule.py
+94
-0
tests/test_trainer/test_pipeline/test_schedule.py
tests/test_trainer/test_pipeline/test_schedule.py
+0
-51
tests/test_trainer/test_trainer_with_non_pipe_schedule.py
tests/test_trainer/test_trainer_with_non_pipe_schedule.py
+22
-14
tests/test_trainer/test_trainer_with_pipe_schedule.py
tests/test_trainer/test_trainer_with_pipe_schedule.py
+22
-19
tests/test_utils/test_activation_checkpointing.py
tests/test_utils/test_activation_checkpointing.py
+1
-0
tests/test_utils/test_gradient_accumluation.py
tests/test_utils/test_gradient_accumluation.py
+4
-3
tests/test_zero_data_parallel/config.py
tests/test_zero_data_parallel/config.py
+0
-4
No files found.
tests/test_trainer/configs/test_trainer_vit_2d.py
deleted
100644 → 0
View file @
45355a62
import
os
from
pathlib
import
Path
from
colossalai.engine
import
AMP_TYPE
BATCH_SIZE
=
512
IMG_SIZE
=
32
PATCH_SIZE
=
4
DIM
=
512
NUM_ATTENTION_HEADS
=
8
SUMMA_DIM
=
2
NUM_CLASSES
=
10
DEPTH
=
6
num_epochs
=
60
train_data
=
dict
(
dataset
=
dict
(
type
=
'CIFAR10Dataset'
,
root
=
Path
(
os
.
environ
[
'DATA'
]),
transform_pipeline
=
[
dict
(
type
=
'Resize'
,
size
=
IMG_SIZE
),
dict
(
type
=
'RandomCrop'
,
size
=
IMG_SIZE
,
padding
=
4
),
dict
(
type
=
'RandomHorizontalFlip'
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'Normalize'
,
mean
=
[
0.4914
,
0.4822
,
0.4465
],
std
=
[
0.2023
,
0.1994
,
0.2010
]),
]),
dataloader
=
dict
(
batch_size
=
BATCH_SIZE
,
pin_memory
=
True
,
# num_workers=1,
shuffle
=
True
,
))
test_data
=
dict
(
dataset
=
dict
(
type
=
'CIFAR10Dataset'
,
root
=
Path
(
os
.
environ
[
'DATA'
]),
train
=
False
,
transform_pipeline
=
[
dict
(
type
=
'Resize'
,
size
=
IMG_SIZE
),
dict
(
type
=
'ToTensor'
),
dict
(
type
=
'Normalize'
,
mean
=
[
0.4914
,
0.4822
,
0.4465
],
std
=
[
0.2023
,
0.1994
,
0.2010
]),
]),
dataloader
=
dict
(
batch_size
=
400
,
pin_memory
=
True
,
# num_workers=1,
))
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0
)
loss
=
dict
(
type
=
'CrossEntropyLoss2D'
,
)
model
=
dict
(
type
=
'VisionTransformerFromConfig'
,
tensor_splitting_cfg
=
dict
(
type
=
'ViTInputSplitter2D'
,
),
embedding_cfg
=
dict
(
type
=
'ViTPatchEmbedding2D'
,
img_size
=
IMG_SIZE
,
patch_size
=
PATCH_SIZE
,
embed_dim
=
DIM
,
),
token_fusion_cfg
=
dict
(
type
=
'ViTTokenFuser2D'
,
img_size
=
IMG_SIZE
,
patch_size
=
PATCH_SIZE
,
embed_dim
=
DIM
,
drop_rate
=
0.1
),
norm_cfg
=
dict
(
type
=
'LayerNorm2D'
,
normalized_shape
=
DIM
,
eps
=
1e-6
,
),
block_cfg
=
dict
(
type
=
'ViTBlock'
,
attention_cfg
=
dict
(
type
=
'ViTSelfAttention2D'
,
hidden_size
=
DIM
,
num_attention_heads
=
NUM_ATTENTION_HEADS
,
attention_dropout_prob
=
0.
,
hidden_dropout_prob
=
0.1
,
),
droppath_cfg
=
dict
(
type
=
'VanillaViTDropPath'
,
),
mlp_cfg
=
dict
(
type
=
'ViTMLP2D'
,
in_features
=
DIM
,
dropout_prob
=
0.1
,
mlp_ratio
=
1
),
norm_cfg
=
dict
(
type
=
'LayerNorm2D'
,
normalized_shape
=
DIM
,
eps
=
1e-6
,
),
),
head_cfg
=
dict
(
type
=
'ViTHead2D'
,
hidden_size
=
DIM
,
num_classes
=
NUM_CLASSES
,
),
embed_dim
=
DIM
,
depth
=
DEPTH
,
drop_path_rate
=
0.
,
)
hooks
=
[
dict
(
type
=
'LogMetricByEpochHook'
),
dict
(
type
=
'LogTimingByEpochHook'
),
dict
(
type
=
'Accuracy2DHook'
),
dict
(
type
=
'LossHook'
),
dict
(
type
=
'TensorboardHook'
,
log_dir
=
'./tfb_logs'
),
dict
(
type
=
'LRSchedulerHook'
,
by_epoch
=
True
,
lr_scheduler_cfg
=
dict
(
type
=
'LinearWarmupLR'
,
warmup_steps
=
5
)
),
dict
(
type
=
'SaveCheckpointHook'
,
interval
=
5
,
checkpoint_dir
=
'./ckpt'
),
]
parallel
=
dict
(
pipeline
=
dict
(
size
=
1
),
tensor
=
dict
(
size
=
4
,
mode
=
'2d'
),
)
fp16
=
dict
(
mode
=
AMP_TYPE
.
PARALLEL
,
initial_scale
=
2
**
8
)
engine
=
dict
(
schedule
=
dict
(
num_microbatches
=
1
)
)
logging
=
dict
(
root_path
=
'./logs'
)
tests/test_trainer/test.sh
deleted
100644 → 0
View file @
45355a62
#!/usr/bin/env sh
test_file
=
$1
python
$test_file
--rank
$SLURM_PROCID
--world_size
$SLURM_NPROCS
--host
$HOST
--port
29500
tests/test_trainer/test_pipeline/debug_schedule.py
deleted
100644 → 0
View file @
45355a62
# referenced from Megatron and used to testify communication
import
os.path
as
osp
import
pytest
import
torch
from
torch.utils.data
import
DataLoader
from
colossalai.builder
import
ModelInitializer
,
build_dataset
,
build_optimizer
,
build_loss
from
colossalai.communication
import
p2p
as
p2p_communication
from
colossalai.communication.utils
import
send_tensor_meta
,
recv_tensor_meta
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.initialize
import
initialize
from
colossalai.utils
import
print_rank_0
,
get_current_device
NUM_BATCH
=
128
NUM_MICRO
=
6
def
get_num_microbatches
():
return
NUM_MICRO
def
to_cuda
(
data
):
if
isinstance
(
data
,
(
tuple
,
list
)):
data
=
data
[
0
].
to
(
get_current_device
())
else
:
data
=
data
.
to
(
get_current_device
())
return
data
def
step_func
(
loss
):
def
_step_func
(
input_tensor
,
model
):
output
=
model
(
input_tensor
)
if
isinstance
(
output
,
(
tuple
,
list
)):
if
len
(
output
)
>
1
:
raise
NotImplementedError
(
"Multiple output!!!"
)
else
:
output
=
output
[
0
]
return
output
,
loss
return
_step_func
def
forward_step
(
forward_step_func
,
data_iterator
,
model
,
input_tensor
,
losses_reduced
):
"""Forward step for passed-in model.
If first stage, input tensor is obtained from data_iterator, otherwise
passed-in input_tensor is used.
Returns output tensor."""
if
input_tensor
is
None
:
data
,
label
=
data_iterator
.
next
()
input_tensor
=
to_cuda
(
data
)
output_tensor
,
loss_func
=
forward_step_func
(
input_tensor
,
model
)
if
gpc
.
is_last_rank
(
ParallelMode
.
PIPELINE
):
data
,
label
=
data_iterator
.
next
()
label
=
to_cuda
(
label
)
output_tensor
=
loss_func
(
output_tensor
,
label
)
/
get_num_microbatches
()
losses_reduced
.
append
(
output_tensor
)
return
output_tensor
def
backward_step
(
optimizer
,
input_tensor
,
output_tensor
,
output_tensor_grad
):
"""Backward step through passed-in output tensor.
If last stage, output_tensor_grad is None, otherwise gradient of loss
with respect to stage's output tensor.
Returns gradient of loss with respect to input tensor (None if first
stage)."""
# Retain the grad on the input_tensor.
if
input_tensor
is
not
None
:
input_tensor
.
retain_grad
()
# Backward pass.
torch
.
autograd
.
backward
(
output_tensor
,
grad_tensors
=
output_tensor_grad
)
# Collect the grad of the input_tensor.
input_tensor_grad
=
None
if
input_tensor
is
not
None
:
input_tensor_grad
=
input_tensor
.
grad
return
input_tensor_grad
def
forward_backward_pipelining_without_interleaving
(
forward_step_func
,
data_iterator
,
model
,
optimizer
,
forward_only
):
"""Run non-interleaved 1F1B schedule, with communication between pipeline
stages.
Returns dictionary with losses if the last stage, empty dict otherwise."""
# Compute number of warmup microbatches.
num_microbatches
=
get_num_microbatches
()
num_warmup_microbatches
=
\
(
gpc
.
get_world_size
(
ParallelMode
.
PIPELINE
)
-
gpc
.
get_local_rank
(
ParallelMode
.
PIPELINE
)
-
1
)
num_warmup_microbatches
=
min
(
num_warmup_microbatches
,
num_microbatches
)
num_microbatches_remaining
=
\
num_microbatches
-
num_warmup_microbatches
# Input, output tensors only need to be saved when doing backward passes
input_tensors
=
None
output_tensors
=
None
if
not
forward_only
:
input_tensors
=
[]
output_tensors
=
[]
losses_reduced
=
[]
# Used for tensor meta information communication
ft_shape
=
None
bt_shape
=
None
fs_checker
=
True
# Run warmup forward passes.
for
i
in
range
(
num_warmup_microbatches
):
if
not
gpc
.
is_first_rank
(
ParallelMode
.
PIPELINE
):
ft_shape
=
recv_tensor_meta
(
ft_shape
)
input_tensor
=
p2p_communication
.
recv_forward
(
ft_shape
)
output_tensor
=
forward_step
(
forward_step_func
,
data_iterator
,
model
,
input_tensor
,
losses_reduced
)
if
not
gpc
.
is_last_rank
(
ParallelMode
.
PIPELINE
):
bt_shape
=
output_tensor
.
shape
fs_checker
=
send_tensor_meta
(
output_tensor
,
fs_checker
)
p2p_communication
.
send_forward
(
output_tensor
)
if
not
forward_only
:
input_tensors
.
append
(
input_tensor
)
output_tensors
.
append
(
output_tensor
)
# Before running 1F1B, need to receive first forward tensor.
# If all microbatches are run in warmup / cooldown phase, then no need to
# receive this tensor here.
if
num_microbatches_remaining
>
0
:
if
not
gpc
.
is_first_rank
(
ParallelMode
.
PIPELINE
):
ft_shape
=
recv_tensor_meta
(
ft_shape
)
input_tensor
=
p2p_communication
.
recv_forward
(
ft_shape
)
# Run 1F1B in steady state.
for
i
in
range
(
num_microbatches_remaining
):
last_iteration
=
(
i
==
(
num_microbatches_remaining
-
1
))
output_tensor
=
forward_step
(
forward_step_func
,
data_iterator
,
model
,
input_tensor
,
losses_reduced
)
if
forward_only
:
p2p_communication
.
send_forward
(
output_tensor
)
if
not
last_iteration
:
input_tensor
=
p2p_communication
.
recv_forward
(
ft_shape
)
else
:
output_tensor_grad
=
\
p2p_communication
.
send_forward_recv_backward
(
output_tensor
,
bt_shape
)
# Add input_tensor and output_tensor to end of list.
input_tensors
.
append
(
input_tensor
)
output_tensors
.
append
(
output_tensor
)
# Pop input_tensor and output_tensor from the start of the list for
# the backward pass.
input_tensor
=
input_tensors
.
pop
(
0
)
output_tensor
=
output_tensors
.
pop
(
0
)
input_tensor_grad
=
\
backward_step
(
optimizer
,
input_tensor
,
output_tensor
,
output_tensor_grad
)
if
last_iteration
:
input_tensor
=
None
p2p_communication
.
send_backward
(
input_tensor_grad
)
else
:
input_tensor
=
\
p2p_communication
.
send_backward_recv_forward
(
input_tensor_grad
,
ft_shape
)
# Run cooldown backward passes.
if
not
forward_only
:
for
i
in
range
(
num_warmup_microbatches
):
input_tensor
=
input_tensors
.
pop
(
0
)
output_tensor
=
output_tensors
.
pop
(
0
)
output_tensor_grad
=
p2p_communication
.
recv_backward
(
bt_shape
)
input_tensor_grad
=
\
backward_step
(
optimizer
,
input_tensor
,
output_tensor
,
output_tensor_grad
)
p2p_communication
.
send_backward
(
input_tensor_grad
)
return
losses_reduced
DIR_PATH
=
osp
.
dirname
(
osp
.
realpath
(
__file__
))
CONFIG_PATH
=
osp
.
join
(
DIR_PATH
,
'../configs/pipeline_vanilla_vit.py'
)
@
pytest
.
mark
.
skip
(
reason
=
"This is only for debugging purpose, please ignore this test"
)
@
pytest
.
mark
.
dist
def
test_schedule
():
initialize
(
CONFIG_PATH
)
# build model
model
=
ModelInitializer
(
gpc
.
config
.
model
,
1
).
model_initialize
()
print_rank_0
(
'model is created'
)
# keep the same sampler for all process
torch
.
manual_seed
(
1331
)
dataset
=
build_dataset
(
gpc
.
config
.
data
.
dataset
)
dataloader
=
DataLoader
(
dataset
=
dataset
,
**
gpc
.
config
.
data
.
dataloader
)
print_rank_0
(
'train data is created'
)
# build optimizer and loss
optim
=
build_optimizer
(
gpc
.
config
.
optimizer
,
model
)
loss
=
build_loss
(
gpc
.
config
.
loss
)
print_rank_0
(
'optim and loss is created'
)
forward_backward_pipelining_without_interleaving
(
step_func
(
loss
),
iter
(
dataloader
),
model
,
optim
,
False
)
gpc
.
destroy
()
print_rank_0
(
'training finished'
)
if
__name__
==
'__main__'
:
test_schedule
()
tests/test_trainer/test_pipeline/model/__init__.py
0 → 100644
View file @
cd9c28e0
from
.layers
import
*
from
.resnet
import
VanillaResNet
tests/test_trainer/test_pipeline/model/layers/__init__.py
0 → 100644
View file @
cd9c28e0
from
.basic_block
import
ResNetBasicBlock
from
.bottleneck
import
ResNetBottleneck
from
.reslayer
import
ResLayer
\ No newline at end of file
tests/test_trainer/test_pipeline/model/layers/basic_block.py
0 → 100644
View file @
cd9c28e0
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from
typing
import
Optional
,
Callable
import
torch.nn
as
nn
from
torch
import
Tensor
from
colossalai.registry
import
LAYERS
from
.conv
import
conv3x3
@
LAYERS
.
register_module
class
ResNetBasicBlock
(
nn
.
Module
):
"""Basic ResNet block
"""
expansion
:
int
=
1
def
__init__
(
self
,
inplanes
:
int
,
planes
:
int
,
stride
:
int
=
1
,
downsample
:
Optional
[
nn
.
Module
]
=
None
,
groups
:
int
=
1
,
base_width
:
int
=
64
,
dilation
:
int
=
1
,
norm_layer
:
Optional
[
Callable
[...,
nn
.
Module
]]
=
None
)
->
None
:
super
().
__init__
()
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm2d
if
groups
!=
1
or
base_width
!=
64
:
raise
ValueError
(
'BasicBlock only supports groups=1 and base_width=64'
)
if
dilation
>
1
:
raise
NotImplementedError
(
"Dilation > 1 not supported in BasicBlock"
)
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self
.
conv1
=
conv3x3
(
inplanes
,
planes
,
stride
)
self
.
bn1
=
norm_layer
(
planes
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
conv2
=
conv3x3
(
planes
,
planes
)
self
.
bn2
=
norm_layer
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
:
Tensor
)
->
Tensor
:
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
+=
identity
out
=
self
.
relu
(
out
)
return
out
tests/test_trainer/test_pipeline/model/layers/bottleneck.py
0 → 100644
View file @
cd9c28e0
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from
typing
import
Optional
,
Callable
import
torch.nn
as
nn
from
torch
import
Tensor
from
colossalai.registry
import
LAYERS
from
.conv
import
conv3x3
,
conv1x1
@
LAYERS
.
register_module
class
ResNetBottleneck
(
nn
.
Module
):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion
:
int
=
4
def
__init__
(
self
,
inplanes
:
int
,
planes
:
int
,
stride
:
int
=
1
,
downsample
:
Optional
[
nn
.
Module
]
=
None
,
groups
:
int
=
1
,
base_width
:
int
=
64
,
dilation
:
int
=
1
,
norm_layer
:
Optional
[
Callable
[...,
nn
.
Module
]]
=
None
)
->
None
:
super
().
__init__
()
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm2d
width
=
int
(
planes
*
(
base_width
/
64.
))
*
groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self
.
conv1
=
conv1x1
(
inplanes
,
width
)
self
.
bn1
=
norm_layer
(
width
)
self
.
conv2
=
conv3x3
(
width
,
width
,
stride
,
groups
,
dilation
)
self
.
bn2
=
norm_layer
(
width
)
self
.
conv3
=
conv1x1
(
width
,
planes
*
self
.
expansion
)
self
.
bn3
=
norm_layer
(
planes
*
self
.
expansion
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
:
Tensor
)
->
Tensor
:
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
bn3
(
out
)
if
self
.
downsample
is
not
None
:
identity
=
self
.
downsample
(
x
)
out
+=
identity
out
=
self
.
relu
(
out
)
return
out
tests/test_trainer/test_pipeline/model/layers/conv.py
0 → 100644
View file @
cd9c28e0
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import
torch.nn
as
nn
def
conv3x3
(
in_planes
:
int
,
out_planes
:
int
,
stride
:
int
=
1
,
groups
:
int
=
1
,
dilation
:
int
=
1
)
->
nn
.
Conv2d
:
"""3x3 convolution with padding"""
return
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
dilation
,
groups
=
groups
,
bias
=
False
,
dilation
=
dilation
)
def
conv1x1
(
in_planes
:
int
,
out_planes
:
int
,
stride
:
int
=
1
)
->
nn
.
Conv2d
:
"""1x1 convolution"""
return
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
False
)
tests/test_trainer/test_pipeline/model/layers/reslayer.py
0 → 100644
View file @
cd9c28e0
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import
torch.nn
as
nn
from
colossalai.registry
import
LAYERS
from
.conv
import
conv1x1
@
LAYERS
.
register_module
class
ResLayer
(
nn
.
Module
):
def
__init__
(
self
,
block_type
:
str
,
norm_layer_type
:
str
,
inplanes
:
int
,
planes
:
int
,
blocks
:
int
,
groups
:
int
,
base_width
:
int
,
stride
:
int
=
1
,
dilation
:
int
=
1
,
dilate
:
bool
=
False
,
):
super
().
__init__
()
self
.
block
=
LAYERS
.
get_module
(
block_type
)
self
.
norm_layer
=
LAYERS
.
get_module
(
norm_layer_type
)
self
.
inplanes
=
inplanes
self
.
planes
=
planes
self
.
blocks
=
blocks
self
.
groups
=
groups
self
.
dilation
=
dilation
self
.
base_width
=
base_width
self
.
dilate
=
dilate
self
.
stride
=
stride
self
.
layer
=
self
.
_make_layer
()
def
_make_layer
(
self
):
norm_layer
=
self
.
norm_layer
downsample
=
None
previous_dilation
=
self
.
dilation
if
self
.
dilate
:
self
.
dilation
*=
self
.
stride
self
.
stride
=
1
if
self
.
stride
!=
1
or
self
.
inplanes
!=
self
.
planes
*
self
.
block
.
expansion
:
downsample
=
nn
.
Sequential
(
conv1x1
(
self
.
inplanes
,
self
.
planes
*
self
.
block
.
expansion
,
self
.
stride
),
norm_layer
(
self
.
planes
*
self
.
block
.
expansion
),
)
layers
=
[]
layers
.
append
(
self
.
block
(
self
.
inplanes
,
self
.
planes
,
self
.
stride
,
downsample
,
self
.
groups
,
self
.
base_width
,
previous_dilation
,
norm_layer
))
self
.
inplanes
=
self
.
planes
*
self
.
block
.
expansion
for
_
in
range
(
1
,
self
.
blocks
):
layers
.
append
(
self
.
block
(
self
.
inplanes
,
self
.
planes
,
groups
=
self
.
groups
,
base_width
=
self
.
base_width
,
dilation
=
self
.
dilation
,
norm_layer
=
norm_layer
))
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
return
self
.
layer
(
x
)
tests/test_trainer/test_pipeline/model/resnet.py
0 → 100644
View file @
cd9c28e0
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from
typing
import
List
,
Optional
import
torch
import
torch.nn
as
nn
from
torch
import
Tensor
from
colossalai.registry
import
LAYERS
from
colossalai.registry
import
MODELS
from
colossalai.nn.model
import
ModelFromConfig
@
MODELS
.
register_module
class
VanillaResNet
(
ModelFromConfig
):
"""ResNet from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
"""
def
__init__
(
self
,
num_cls
:
int
,
block_type
:
str
,
layers
:
List
[
int
],
norm_layer_type
:
str
=
'BatchNorm2d'
,
in_channels
:
int
=
3
,
groups
:
int
=
1
,
width_per_group
:
int
=
64
,
zero_init_residual
:
bool
=
False
,
replace_stride_with_dilation
:
Optional
[
List
[
bool
]]
=
None
,
dilations
=
(
1
,
1
,
1
,
1
)
)
->
None
:
super
().
__init__
()
self
.
inplanes
=
64
self
.
zero_init_residual
=
zero_init_residual
self
.
blocks
=
layers
self
.
block_expansion
=
LAYERS
.
get_module
(
block_type
).
expansion
self
.
dilations
=
dilations
self
.
reslayer_common_cfg
=
dict
(
type
=
'ResLayer'
,
block_type
=
block_type
,
norm_layer_type
=
norm_layer_type
,
groups
=
groups
,
base_width
=
width_per_group
)
if
replace_stride_with_dilation
is
None
:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation
=
[
False
,
False
,
False
]
if
len
(
replace_stride_with_dilation
)
!=
3
:
raise
ValueError
(
"replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}"
.
format
(
replace_stride_with_dilation
))
self
.
layers_cfg
=
[
# conv1
dict
(
type
=
'Conv2d'
,
in_channels
=
in_channels
,
out_channels
=
self
.
inplanes
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
),
# bn1
dict
(
type
=
norm_layer_type
,
num_features
=
self
.
inplanes
),
# relu
dict
(
type
=
'ReLU'
,
inplace
=
True
),
# maxpool
dict
(
type
=
'MaxPool2d'
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
),
# layer 1
dict
(
inplanes
=
self
.
inplanes
,
planes
=
64
,
blocks
=
self
.
blocks
[
0
],
dilation
=
self
.
dilations
[
0
],
**
self
.
reslayer_common_cfg
),
# layer 2
dict
(
inplanes
=
64
*
self
.
block_expansion
,
planes
=
128
,
blocks
=
self
.
blocks
[
1
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
0
],
dilation
=
self
.
dilations
[
1
],
**
self
.
reslayer_common_cfg
),
# layer 3
dict
(
inplanes
=
128
*
self
.
block_expansion
,
planes
=
256
,
blocks
=
layers
[
2
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
1
],
dilation
=
self
.
dilations
[
2
],
**
self
.
reslayer_common_cfg
),
# layer 4
dict
(
inplanes
=
256
*
self
.
block_expansion
,
planes
=
512
,
blocks
=
layers
[
3
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
2
],
dilation
=
self
.
dilations
[
3
],
**
self
.
reslayer_common_cfg
),
# avg pool
dict
(
type
=
'AdaptiveAvgPool2d'
,
output_size
=
(
1
,
1
)
),
# flatten
dict
(
type
=
'LambdaWrapper'
,
func
=
lambda
mod
,
x
:
torch
.
flatten
(
x
,
1
)
),
# linear
dict
(
type
=
'Linear'
,
in_features
=
512
*
self
.
block_expansion
,
out_features
=
num_cls
)
]
def
forward
(
self
,
x
:
Tensor
):
for
layer
in
self
.
layers
:
x
=
layer
(
x
)
return
x
,
def
init_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
'fan_out'
,
nonlinearity
=
'relu'
)
elif
isinstance
(
m
,
(
nn
.
BatchNorm2d
,
nn
.
GroupNorm
)):
nn
.
init
.
constant_
(
m
.
weight
,
1
)
nn
.
init
.
constant_
(
m
.
bias
,
0
)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if
self
.
zero_init_residual
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
LAYERS
.
get_module
(
'ResNetBottleneck'
)):
# type: ignore[arg-type]
nn
.
init
.
constant_
(
m
.
bn3
.
weight
,
0
)
elif
isinstance
(
m
,
LAYERS
.
get_module
(
'ResNetBasicBlock'
)):
# type: ignore[arg-type]
nn
.
init
.
constant_
(
m
.
bn2
.
weight
,
0
)
tests/test_trainer/test_pipeline/resnet_config.py
0 → 100644
View file @
cd9c28e0
import
os
from
pathlib
import
Path
BATCH_SIZE
=
128
IMG_SIZE
=
224
DIM
=
768
NUM_CLASSES
=
10
NUM_ATTN_HEADS
=
12
# resnet 18
model
=
dict
(
type
=
'VanillaResNet'
,
block_type
=
'ResNetBasicBlock'
,
layers
=
[
2
,
2
,
2
,
2
],
num_cls
=
10
)
parallel
=
dict
(
pipeline
=
dict
(
size
=
4
),
tensor
=
dict
(
size
=
1
,
mode
=
None
)
)
tests/test_trainer/test_pipeline/test_p2p.py
View file @
cd9c28e0
...
...
@@ -4,6 +4,7 @@
import
pytest
import
torch
import
torch.distributed
as
dist
import
torch.multiprocessing
as
mp
from
colossalai.communication
import
(
recv_backward
,
recv_forward
,
recv_tensor_meta
,
send_backward
,
...
...
@@ -12,13 +13,14 @@ from colossalai.communication import (recv_backward, recv_forward,
send_tensor_meta
)
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.initialize
import
init_dist
,
parse_args
from
colossalai.initialize
import
launch
from
colossalai.logging
import
get_dist_logger
from
colossalai.utils
import
get_current_device
from
functools
import
partial
BATCH_SIZE
=
32
SEQ_LENGTH
=
128
HIDDEN_SIZE
=
5
12
BATCH_SIZE
=
16
SEQ_LENGTH
=
64
HIDDEN_SIZE
=
12
8
CONFIG
=
dict
(
parallel
=
dict
(
...
...
@@ -106,7 +108,7 @@ def check_op(size, rank, prev_rank, next_rank, up_group, down_group, logger):
rank
,
check_equal
(
tensor
,
out
)))
def
test
_comm
(
size
,
rank
,
prev_rank
,
next_rank
,
up_group
,
down_group
,
logger
):
def
check
_comm
(
size
,
rank
,
prev_rank
,
next_rank
,
up_group
,
down_group
,
logger
):
dtype
=
torch
.
float32
device
=
get_current_device
()
tensor_shape
=
(
BATCH_SIZE
,
SEQ_LENGTH
,
HIDDEN_SIZE
)
...
...
@@ -121,13 +123,15 @@ def test_comm(size, rank, prev_rank, next_rank, up_group, down_group, logger):
check_forward_backward
(
tensor
,
grad
,
rank
,
logger
)
@
pytest
.
mark
.
skip
(
"This test should be invoked using the test.sh provided"
)
@
pytest
.
mark
.
dist
def
test_main
():
args
=
parse_args
()
world_size
=
args
.
world_size
init_dist
(
CONFIG
)
def
run_check
(
rank
,
world_size
):
launch
(
config
=
CONFIG
,
rank
=
rank
,
world_size
=
world_size
,
host
=
'localhost'
,
port
=
29932
,
backend
=
'nccl'
)
logger
=
get_dist_logger
()
rank
=
gpc
.
get_global_rank
()
prev_rank
=
gpc
.
get_prev_global_rank
(
ParallelMode
.
PIPELINE
)
...
...
@@ -141,9 +145,18 @@ def test_main():
rank
,
prev_rank
,
up_ranks
,
next_rank
,
down_ranks
))
logger
.
info
(
'Distributed environment is initialzied.'
)
test_comm
(
world_size
,
rank
,
prev_rank
,
next_rank
,
up_group
,
down_group
,
logger
)
check_comm
(
world_size
,
rank
,
prev_rank
,
next_rank
,
up_group
,
down_group
,
logger
)
gpc
.
destroy
()
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
dist
def
test_p2p
():
world_size
=
4
run_func
=
partial
(
run_check
,
world_size
=
world_size
)
mp
.
spawn
(
run_func
,
nprocs
=
world_size
)
if
__name__
==
'__main__'
:
test_
main
()
test_
p2p
()
tests/test_trainer/test_pipeline/test_partition.py
View file @
cd9c28e0
...
...
@@ -2,35 +2,46 @@ import os.path as osp
import
pytest
import
torch
import
torch.multiprocessing
as
mp
from
torch.utils.data
import
DataLoader
from
colossalai.builder
import
build_dataset
,
ModelInitializer
from
colossalai.builder
.pipeline
import
Pipeline
ModelInitializer
from
colossalai.core
import
global_context
from
colossalai.initialize
import
init_dist
from
colossalai.initialize
import
launch
from
colossalai.logging
import
get_dist_logger
from
functools
import
partial
import
model
DIR_PATH
=
osp
.
dirname
(
osp
.
realpath
(
__file__
))
CONFIG_PATH
=
osp
.
join
(
DIR_PATH
,
'
../configs/pipeline_vanilla_resnet
.py'
)
CONFIG_PATH
=
osp
.
join
(
DIR_PATH
,
'
resnet_config
.py'
)
@
pytest
.
mark
.
skip
(
"This test should be invoked using the test.sh provided"
)
@
pytest
.
mark
.
dist
def
test_partition
():
init_dist
(
CONFIG_PATH
)
def
run_partition
(
rank
,
world_size
):
launch
(
config
=
CONFIG_PATH
,
rank
=
rank
,
world_size
=
world_size
,
host
=
'localhost'
,
port
=
29933
,
backend
=
'nccl'
)
logger
=
get_dist_logger
()
logger
.
info
(
'finished initialization'
)
# build model
model
=
ModelInitializer
(
global_context
.
config
.
model
,
1
,
verbose
=
True
).
model_initialize
()
model
=
PipelineModelInitializer
(
global_context
.
config
.
model
,
1
,
verbose
=
True
).
initialize
()
assert
isinstance
(
model
,
torch
.
nn
.
Module
)
logger
.
info
(
'model is created'
)
dataset
=
build_dataset
(
global_context
.
config
.
train_data
.
dataset
)
dataloader
=
DataLoader
(
dataset
=
dataset
,
**
global_context
.
config
.
train_data
.
dataloader
)
logger
.
info
(
'train data is created'
)
global_context
.
destroy
()
torch
.
cuda
.
synchronize
()
logger
.
info
(
'training finished'
)
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
dist
def
test_partition
():
world_size
=
4
run_func
=
partial
(
run_partition
,
world_size
=
world_size
)
mp
.
spawn
(
run_func
,
nprocs
=
world_size
)
if
__name__
==
'__main__'
:
...
...
tests/test_trainer/test_pipeline/test_pipeline_schedule.py
0 → 100644
View file @
cd9c28e0
# referenced from Megatron and used to testify communication
import
colossalai
import
os
import
os.path
as
osp
import
pytest
import
torch
import
torch.multiprocessing
as
mp
import
model
from
colossalai.builder
import
PipelineModelInitializer
from
colossalai.communication
import
p2p
as
p2p_communication
from
colossalai.communication.utils
import
send_tensor_meta
,
recv_tensor_meta
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.initialize
import
launch
from
colossalai.utils
import
print_rank_0
,
get_current_device
,
get_dataloader
from
colossalai.engine.schedule
import
PipelineSchedule
from
torchvision.datasets
import
CIFAR10
from
torchvision
import
transforms
from
pathlib
import
Path
from
functools
import
partial
BATCH_SIZE
=
32
NUM_MICRO
=
8
DIR_PATH
=
osp
.
dirname
(
osp
.
realpath
(
__file__
))
CONFIG_PATH
=
osp
.
join
(
DIR_PATH
,
'./resnet_config.py'
)
def
run_schedule
(
rank
,
world_size
):
launch
(
config
=
CONFIG_PATH
,
rank
=
rank
,
world_size
=
world_size
,
host
=
'localhost'
,
port
=
29934
,
backend
=
'nccl'
)
# build model
model
=
PipelineModelInitializer
(
gpc
.
config
.
model
,
1
).
initialize
()
print_rank_0
(
'model is created'
)
train_dataset
=
CIFAR10
(
root
=
Path
(
os
.
environ
[
'DATA'
]),
download
=
True
,
transform
=
transforms
.
Compose
(
[
transforms
.
RandomCrop
(
size
=
32
,
padding
=
4
),
transforms
.
RandomHorizontalFlip
(),
transforms
.
ToTensor
(),
transforms
.
Normalize
(
mean
=
[
0.4914
,
0.4822
,
0.4465
],
std
=
[
0.2023
,
0.1994
,
0.2010
]),
]
)
)
train_dataloader
=
get_dataloader
(
dataset
=
train_dataset
,
shuffle
=
True
,
add_sampler
=
True
,
batch_size
=
BATCH_SIZE
,
pin_memory
=
True
,
)
# build criterion
criterion
=
torch
.
nn
.
CrossEntropyLoss
()
# optimizer
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
0.001
,
weight_decay
=
0
)
# initialize
engine
,
train_dataloader
,
_
,
_
=
colossalai
.
initialize
(
model
,
optimizer
,
criterion
,
train_dataloader
)
# build pipeline schedule
schedule
=
PipelineSchedule
(
num_microbatches
=
NUM_MICRO
)
# run schedule
data_iter
=
iter
(
train_dataloader
)
schedule
.
forward_backward_step
(
engine
,
data_iter
)
gpc
.
destroy
()
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
dist
def
test_pipeline_schedule
():
world_size
=
4
run_func
=
partial
(
run_schedule
,
world_size
=
world_size
)
mp
.
spawn
(
run_func
,
nprocs
=
world_size
)
if
__name__
==
'__main__'
:
test_pipeline_schedule
()
tests/test_trainer/test_pipeline/test_schedule.py
deleted
100644 → 0
View file @
45355a62
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import
os.path
as
osp
import
pytest
from
colossalai.context
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.initialize
import
initialize
from
colossalai.logging
import
get_dist_logger
NUM_BATCH
=
128
BATCH_SIZE
=
32
SEQ_LENGTH
=
128
HIDDEN_SIZE
=
512
DIR_PATH
=
osp
.
dirname
(
osp
.
realpath
(
__file__
))
CONFIG_PATH
=
osp
.
join
(
DIR_PATH
,
'../configs/pipeline_vanilla_resnet.py'
)
@
pytest
.
mark
.
skip
(
"This test should be invoked using the test.sh provided"
)
@
pytest
.
mark
.
dist
def
test_schedule
():
engine
,
train_dataloader
,
test_dataloader
=
initialize
(
CONFIG_PATH
)
logger
=
get_dist_logger
()
model
=
engine
.
model
optimizer
=
engine
.
optimizer
criterion
=
engine
.
criterion
schedule
=
engine
.
_schedule
output
,
label
,
loss
=
schedule
.
forward_backward_step
(
data_iter
=
iter
(
train_dataloader
),
model
=
model
,
optimizer
=
optimizer
,
criterion
=
criterion
,
forward_only
=
False
)
schedule
.
optimizer_step
(
model
,
optimizer
)
if
gpc
.
is_last_rank
(
ParallelMode
.
PIPELINE
):
logger
.
info
(
'losses: {}'
.
format
(
loss
))
gpc
.
destroy
()
logger
.
info
(
'training finished'
)
if
__name__
==
'__main__'
:
test_schedule
()
tests/test_trainer/test_trainer_with_non_pipe_schedule.py
View file @
cd9c28e0
import
colossalai
import
os
from
colossalai.amp.amp_type
import
AMP_TYPE
import
pytest
import
torch
import
torch.nn
as
nn
import
torch.multiprocessing
as
mp
from
pathlib
import
Path
from
torchvision
import
transforms
from
torch.optim
import
Adam
from
colossalai.
initialize
import
get_default_parser
from
colossalai.
amp.amp_type
import
AMP_TYPE
from
colossalai.core
import
global_context
as
gpc
from
colossalai.logging
import
get_dist_logger
from
colossalai.trainer
import
Trainer
from
colossalai.utils
import
get_dataloader
from
torchvision.models
import
resnet18
from
torchvision.datasets
import
CIFAR10
from
functools
import
partial
BATCH_SIZE
=
1
28
BATCH_SIZE
=
1
6
IMG_SIZE
=
32
NUM_EPOCHS
=
200
...
...
@@ -26,16 +29,14 @@ CONFIG = dict(
)
def
test_trainer
():
parser
=
get_default_parser
()
args
=
parser
.
parse_args
()
def
run_trainer_no_pipeline
(
rank
,
world_size
):
colossalai
.
launch
(
config
=
CONFIG
,
rank
=
args
.
rank
,
world_size
=
args
.
world_size
,
host
=
args
.
host
,
port
=
args
.
port
,
backend
=
args
.
backend
rank
=
rank
,
world_size
=
world_size
,
host
=
'local
host
'
,
port
=
29930
,
backend
=
'nccl'
)
# build model
...
...
@@ -70,13 +71,11 @@ def test_trainer():
train_dataloader
=
get_dataloader
(
dataset
=
train_dataset
,
shuffle
=
True
,
batch_size
=
BATCH_SIZE
,
num_workers
=
1
,
pin_memory
=
True
,
drop_last
=
True
)
test_dataloader
=
get_dataloader
(
dataset
=
test_dataset
,
batch_size
=
BATCH_SIZE
,
num_workers
=
1
,
pin_memory
=
True
,
drop_last
=
True
)
...
...
@@ -107,7 +106,16 @@ def test_trainer():
display_progress
=
True
,
test_interval
=
5
)
gpc
.
destroy
()
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
dist
def
test_trainer_no_pipeline
():
world_size
=
4
run_func
=
partial
(
run_trainer_no_pipeline
,
world_size
=
world_size
)
mp
.
spawn
(
run_func
,
nprocs
=
world_size
)
if
__name__
==
'__main__'
:
test_trainer
()
test_trainer
_no_pipeline
()
tests/test_trainer/test_trainer_with_pipe_schedule.py
View file @
cd9c28e0
import
colossalai
import
os
import
pytest
import
torch
from
colossalai.amp.amp_type
import
AMP_TYPE
from
colossalai.context.parallel_mode
import
ParallelMode
import
torch.nn
as
nn
import
torch.multiprocessing
as
mp
from
pathlib
import
Path
from
torchvision
import
transforms
from
torch.optim
import
Adam
from
colossalai.
initialize
import
get_default_parser
from
colossalai.
context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.logging
import
get_dist_logger
from
colossalai.trainer
import
Trainer
...
...
@@ -16,8 +16,10 @@ from colossalai.utils import get_dataloader
from
colossalai.engine.schedule
import
PipelineSchedule
from
torchvision.models
import
resnet18
from
torchvision.datasets
import
CIFAR10
from
functools
import
partial
BATCH_SIZE
=
32
BATCH_SIZE
=
16
IMG_SIZE
=
32
NUM_EPOCHS
=
200
...
...
@@ -25,23 +27,17 @@ CONFIG = dict(
parallel
=
dict
(
pipeline
=
2
,
),
# Config
fp16
=
dict
(
mode
=
AMP_TYPE
.
TORCH
)
)
def
test_trainer
():
parser
=
get_default_parser
()
args
=
parser
.
parse_args
()
def
run_trainer_with_pipeline
(
rank
,
world_size
):
colossalai
.
launch
(
config
=
CONFIG
,
rank
=
args
.
rank
,
world_size
=
args
.
world_size
,
host
=
args
.
host
,
port
=
args
.
port
,
backend
=
args
.
backend
rank
=
rank
,
world_size
=
world_size
,
host
=
'local
host
'
,
port
=
29931
,
backend
=
'nccl'
)
# build model
...
...
@@ -101,13 +97,11 @@ def test_trainer():
train_dataloader
=
get_dataloader
(
dataset
=
train_dataset
,
shuffle
=
True
,
batch_size
=
BATCH_SIZE
,
num_workers
=
1
,
pin_memory
=
True
,
drop_last
=
True
)
test_dataloader
=
get_dataloader
(
dataset
=
test_dataset
,
batch_size
=
BATCH_SIZE
,
num_workers
=
1
,
pin_memory
=
True
,
drop_last
=
True
)
...
...
@@ -140,7 +134,16 @@ def test_trainer():
display_progress
=
True
,
test_interval
=
5
)
gpc
.
destroy
()
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
dist
def
test_trainer_with_pipeline
():
world_size
=
4
run_func
=
partial
(
run_trainer_with_pipeline
,
world_size
=
world_size
)
mp
.
spawn
(
run_func
,
nprocs
=
world_size
)
if
__name__
==
'__main__'
:
test_trainer
()
test_trainer
_with_pipeline
()
tests/test_utils/test_activation_checkpointing.py
View file @
cd9c28e0
...
...
@@ -54,6 +54,7 @@ def test_activation_checkpointing():
loss
.
backward
()
assert
torch
.
all
(
data
.
grad
==
data_
.
grad
),
'Gradient of the input does not match'
torch
.
cuda
.
empty_cache
()
if
__name__
==
'__main__'
:
...
...
tests/test_utils/test_gradient_accumluation.py
View file @
cd9c28e0
...
...
@@ -104,13 +104,14 @@ def run_no_pipeline(rank, world_size):
'param should be the same in the first few iterations and only changed in the last iteration'
gpc
.
destroy
()
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
skip
(
"This test should be invoked using the test.sh provided"
)
@
pytest
.
mark
.
dist
def
test_engine
():
func
=
partial
(
run_no_pipeline
,
world_size
=
4
)
mp
.
spawn
(
func
,
nprocs
=
4
)
world_size
=
4
func
=
partial
(
run_no_pipeline
,
world_size
=
world_size
)
mp
.
spawn
(
func
,
nprocs
=
world_size
)
if
__name__
==
'__main__'
:
...
...
tests/test_zero_data_parallel/config.py
deleted
100644 → 0
View file @
45355a62
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import
os
from
pathlib
import
Path
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment