Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f2f58dbb
Unverified
Commit
f2f58dbb
authored
Jul 30, 2021
by
Zhenhua Han
Committed by
GitHub
Jul 30, 2021
Browse files
[Retiarii] cross-graph optimization: device placement and input deduplication (#3202)
parent
6645bd33
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
782 additions
and
471 deletions
+782
-471
test/retiarii_test/cgo/test.py
test/retiarii_test/cgo/test.py
+54
-0
test/retiarii_test/cgo_mnasnet/base_mnasnet.py
test/retiarii_test/cgo_mnasnet/base_mnasnet.py
+298
-0
test/retiarii_test/cgo_mnasnet/mutator.py
test/retiarii_test/cgo_mnasnet/mutator.py
+64
-0
test/retiarii_test/cgo_mnasnet/test.py
test/retiarii_test/cgo_mnasnet/test.py
+80
-0
test/retiarii_test/darts/test.py
test/retiarii_test/darts/test.py
+2
-1
test/ut/retiarii/converted_mnist_pytorch.json
test/ut/retiarii/converted_mnist_pytorch.json
+0
-363
test/ut/retiarii/test_cgo_engine.py
test/ut/retiarii/test_cgo_engine.py
+280
-20
test/ut/retiarii/test_dedup_input.py
test/ut/retiarii/test_dedup_input.py
+0
-86
test/ut/retiarii/test_engine.py
test/ut/retiarii/test_engine.py
+4
-1
No files found.
test/retiarii_test/cgo/test.py
0 → 100644
View file @
f2f58dbb
import
json
import
os
import
sys
import
torch
from
pathlib
import
Path
import
nni.retiarii.evaluator.pytorch.cgo.evaluator
as
cgo
import
nni.retiarii.evaluator.pytorch.lightning
as
pl
import
nni.retiarii.strategy
as
strategy
from
nni.retiarii
import
serialize
from
nni.retiarii.experiment.pytorch
import
RetiariiExperiment
,
RetiariiExeConfig
from
torchvision
import
transforms
from
torchvision.datasets
import
CIFAR10
from
darts_model
import
CNN
if
__name__
==
'__main__'
:
base_model
=
CNN
(
32
,
3
,
16
,
10
,
8
)
train_transform
=
transforms
.
Compose
([
transforms
.
RandomCrop
(
32
,
padding
=
4
),
transforms
.
RandomHorizontalFlip
(),
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
)),
])
valid_transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
)),
])
train_dataset
=
serialize
(
CIFAR10
,
root
=
'data/cifar10'
,
train
=
True
,
download
=
True
,
transform
=
train_transform
)
test_dataset
=
serialize
(
CIFAR10
,
root
=
'data/cifar10'
,
train
=
False
,
download
=
True
,
transform
=
valid_transform
)
trainer
=
cgo
.
Classification
(
train_dataloader
=
pl
.
DataLoader
(
train_dataset
,
batch_size
=
100
),
val_dataloaders
=
pl
.
DataLoader
(
test_dataset
,
batch_size
=
100
),
max_epochs
=
1
,
limit_train_batches
=
0.2
)
simple_strategy
=
strategy
.
Random
()
exp
=
RetiariiExperiment
(
base_model
,
trainer
,
[],
simple_strategy
)
exp_config
=
RetiariiExeConfig
(
'local'
)
exp_config
.
experiment_name
=
'darts_search'
exp_config
.
execution_engine
=
'cgo'
exp_config
.
trial_concurrency
=
3
# since CGO may merge multiple trials into one, RetiariiExperiment may run more trials than max_trial_number
# when max_trial_number = 3, it actually runs 9 models since each merged trial contains 3 trials from strategy
exp_config
.
max_trial_number
=
100
exp_config
.
devices
=
[
'cuda:0'
,
'cuda:1'
,
'cuda:2'
]
exp_config
.
trial_gpu_number
=
1
exp_config
.
batch_waiting_time
=
100
exp_config
.
training_service
.
use_active_gpu
=
True
exp_config
.
training_service
.
gpu_indices
=
[
0
,
1
,
2
]
exp
.
run
(
exp_config
,
8081
)
test/retiarii_test/cgo_mnasnet/base_mnasnet.py
0 → 100644
View file @
f2f58dbb
from
nni.retiarii
import
basic_unit
import
nni.retiarii.nn.pytorch
as
nn
import
warnings
import
torch
import
torch.nn
as
torch_nn
from
torchvision.models.utils
import
load_state_dict_from_url
import
torch.nn.functional
as
F
import
sys
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
resolve
().
parents
[
2
]))
# Paper suggests 0.9997 momentum, for TensorFlow. Equivalent PyTorch momentum is
# 1.0 - tensorflow.
_BN_MOMENTUM
=
1
-
0.9997
_FIRST_DEPTH
=
32
_MOBILENET_V2_FILTERS
=
[
16
,
24
,
32
,
64
,
96
,
160
,
320
]
_MOBILENET_V2_NUM_LAYERS
=
[
1
,
2
,
3
,
4
,
3
,
3
,
1
]
class
_ResidualBlock
(
nn
.
Module
):
def
__init__
(
self
,
net
):
super
().
__init__
()
self
.
net
=
net
def
forward
(
self
,
x
):
return
self
.
net
(
x
)
+
x
class
_InvertedResidual
(
nn
.
Module
):
def
__init__
(
self
,
in_ch
,
out_ch
,
kernel_size
,
stride
,
expansion_factor
,
skip
,
bn_momentum
=
0.1
):
super
(
_InvertedResidual
,
self
).
__init__
()
assert
stride
in
[
1
,
2
]
assert
kernel_size
in
[
3
,
5
]
mid_ch
=
in_ch
*
expansion_factor
self
.
apply_residual
=
skip
and
in_ch
==
out_ch
and
stride
==
1
self
.
layers
=
nn
.
Sequential
(
# Pointwise
nn
.
Conv2d
(
in_ch
,
mid_ch
,
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
mid_ch
,
momentum
=
bn_momentum
),
nn
.
ReLU
(
inplace
=
True
),
# Depthwise
nn
.
Conv2d
(
mid_ch
,
mid_ch
,
kernel_size
,
padding
=
kernel_size
//
2
,
stride
=
stride
,
groups
=
mid_ch
,
bias
=
False
),
nn
.
BatchNorm2d
(
mid_ch
,
momentum
=
bn_momentum
),
nn
.
ReLU
(
inplace
=
True
),
# Linear pointwise. Note that there's no activation.
nn
.
Conv2d
(
mid_ch
,
out_ch
,
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
out_ch
,
momentum
=
bn_momentum
))
def
forward
(
self
,
input
):
if
self
.
apply_residual
:
ret
=
self
.
layers
(
input
)
+
input
else
:
ret
=
self
.
layers
(
input
)
return
ret
def
_stack_inverted_residual
(
in_ch
,
out_ch
,
kernel_size
,
skip
,
stride
,
exp_factor
,
repeats
,
bn_momentum
):
""" Creates a stack of inverted residuals. """
assert
repeats
>=
1
# First one has no skip, because feature map size changes.
first
=
_InvertedResidual
(
in_ch
,
out_ch
,
kernel_size
,
stride
,
exp_factor
,
skip
,
bn_momentum
=
bn_momentum
)
remaining
=
[]
for
_
in
range
(
1
,
repeats
):
remaining
.
append
(
_InvertedResidual
(
out_ch
,
out_ch
,
kernel_size
,
1
,
exp_factor
,
skip
,
bn_momentum
=
bn_momentum
))
return
nn
.
Sequential
(
first
,
*
remaining
)
def
_stack_normal_conv
(
in_ch
,
out_ch
,
kernel_size
,
skip
,
dconv
,
stride
,
repeats
,
bn_momentum
):
assert
repeats
>=
1
stack
=
[]
for
i
in
range
(
repeats
):
s
=
stride
if
i
==
0
else
1
if
dconv
:
modules
=
[
nn
.
Conv2d
(
in_ch
,
in_ch
,
kernel_size
,
padding
=
kernel_size
//
2
,
stride
=
s
,
groups
=
in_ch
,
bias
=
False
),
nn
.
BatchNorm2d
(
in_ch
,
momentum
=
bn_momentum
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
in_ch
,
out_ch
,
1
,
padding
=
0
,
stride
=
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
out_ch
,
momentum
=
bn_momentum
)
]
else
:
modules
=
[
nn
.
Conv2d
(
in_ch
,
out_ch
,
kernel_size
,
padding
=
kernel_size
//
2
,
stride
=
s
,
bias
=
False
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
BatchNorm2d
(
out_ch
,
momentum
=
bn_momentum
)
]
if
skip
and
in_ch
==
out_ch
and
s
==
1
:
# use different implementation for skip and noskip to align with pytorch
stack
.
append
(
_ResidualBlock
(
nn
.
Sequential
(
*
modules
)))
else
:
stack
+=
modules
in_ch
=
out_ch
return
stack
def
_round_to_multiple_of
(
val
,
divisor
,
round_up_bias
=
0.9
):
""" Asymmetric rounding to make `val` divisible by `divisor`. With default
bias, will round up, unless the number is no more than 10% greater than the
smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88. """
assert
0.0
<
round_up_bias
<
1.0
new_val
=
max
(
divisor
,
int
(
val
+
divisor
/
2
)
//
divisor
*
divisor
)
return
new_val
if
new_val
>=
round_up_bias
*
val
else
new_val
+
divisor
def
_get_depths
(
depths
,
alpha
):
""" Scales tensor depths as in reference MobileNet code, prefers rouding up
rather than down. """
return
[
_round_to_multiple_of
(
depth
*
alpha
,
8
)
for
depth
in
depths
]
class
MNASNet
(
nn
.
Module
):
""" MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
implements the B1 variant of the model.
>>> model = MNASNet(1000, 1.0)
>>> x = torch.rand(1, 3, 224, 224)
>>> y = model(x)
>>> y.dim()
1
>>> y.nelement()
1000
"""
# Version 2 adds depth scaling in the initial stages of the network.
_version
=
2
def
__init__
(
self
,
alpha
,
depths
,
convops
,
kernel_sizes
,
num_layers
,
skips
,
num_classes
=
1000
,
dropout
=
0.2
):
super
().
__init__
()
assert
alpha
>
0.0
assert
len
(
depths
)
==
len
(
convops
)
==
len
(
kernel_sizes
)
==
len
(
num_layers
)
==
len
(
skips
)
==
7
self
.
alpha
=
alpha
self
.
num_classes
=
num_classes
depths
=
_get_depths
([
_FIRST_DEPTH
]
+
depths
,
alpha
)
base_filter_sizes
=
[
16
,
24
,
40
,
80
,
96
,
192
,
320
]
exp_ratios
=
[
3
,
3
,
3
,
6
,
6
,
6
,
6
]
strides
=
[
1
,
2
,
2
,
2
,
1
,
2
,
1
]
layers
=
[
# First layer: regular conv.
nn
.
Conv2d
(
3
,
depths
[
0
],
3
,
padding
=
1
,
stride
=
2
,
bias
=
False
),
nn
.
BatchNorm2d
(
depths
[
0
],
momentum
=
_BN_MOMENTUM
),
nn
.
ReLU
(
inplace
=
True
),
]
count
=
0
# for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
# zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios):
for
filter_size
,
exp_ratio
,
stride
in
zip
(
base_filter_sizes
,
exp_ratios
,
strides
):
# TODO: restrict that "choose" can only be used within mutator
ph
=
nn
.
Placeholder
(
label
=
f
'mutable_
{
count
}
'
,
**
{
'kernel_size_options'
:
[
1
,
3
,
5
],
'n_layer_options'
:
[
1
,
2
,
3
,
4
],
'op_type_options'
:
[
'__mutated__.base_mnasnet.RegularConv'
,
'__mutated__.base_mnasnet.DepthwiseConv'
,
'__mutated__.base_mnasnet.MobileConv'
],
# 'se_ratio_options': [0, 0.25],
'skip_options'
:
[
'identity'
,
'no'
],
'n_filter_options'
:
[
int
(
filter_size
*
x
)
for
x
in
[
0.75
,
1.0
,
1.25
]],
'exp_ratio'
:
exp_ratio
,
'stride'
:
stride
,
'in_ch'
:
depths
[
0
]
if
count
==
0
else
None
})
layers
.
append
(
ph
)
'''if conv == "mconv":
# MNASNet blocks: stacks of inverted residuals.
layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip,
stride, exp_ratio, repeat, _BN_MOMENTUM))
else:
# Normal conv and depth-separated conv
layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv",
stride, repeat, _BN_MOMENTUM)'''
count
+=
1
if
count
>=
2
:
break
layers
+=
[
# Final mapping to classifier input.
nn
.
Conv2d
(
depths
[
7
],
1280
,
1
,
padding
=
0
,
stride
=
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
1280
,
momentum
=
_BN_MOMENTUM
),
nn
.
ReLU
(
inplace
=
True
),
]
self
.
layers
=
nn
.
Sequential
(
*
layers
)
self
.
classifier
=
nn
.
Sequential
(
nn
.
Dropout
(
p
=
dropout
,
inplace
=
True
),
nn
.
Linear
(
1280
,
num_classes
))
self
.
_initialize_weights
()
#self.for_test = 10
def
forward
(
self
,
x
):
# if self.for_test == 10:
x
=
self
.
layers
(
x
)
# Equivalent to global avgpool and removing H and W dimensions.
x
=
x
.
mean
([
2
,
3
])
x
=
F
.
relu
(
x
)
return
self
.
classifier
(
x
)
def
_initialize_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
torch_nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
"fan_out"
,
nonlinearity
=
"relu"
)
if
m
.
bias
is
not
None
:
torch_nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
torch_nn
.
init
.
ones_
(
m
.
weight
)
torch_nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
Linear
):
torch_nn
.
init
.
kaiming_uniform_
(
m
.
weight
,
mode
=
"fan_out"
,
nonlinearity
=
"sigmoid"
)
torch_nn
.
init
.
zeros_
(
m
.
bias
)
def
test_model
(
model
):
model
(
torch
.
randn
(
2
,
3
,
224
,
224
))
# ====================definition of candidate op classes
BN_MOMENTUM
=
1
-
0.9997
class
RegularConv
(
nn
.
Module
):
def
__init__
(
self
,
kernel_size
,
in_ch
,
out_ch
,
skip
,
exp_ratio
,
stride
):
super
().
__init__
()
self
.
kernel_size
=
kernel_size
self
.
in_ch
=
in_ch
self
.
out_ch
=
out_ch
self
.
skip
=
skip
self
.
exp_ratio
=
exp_ratio
self
.
stride
=
stride
self
.
conv
=
nn
.
Conv2d
(
in_ch
,
out_ch
,
kernel_size
,
padding
=
kernel_size
//
2
,
stride
=
stride
,
bias
=
False
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_ch
,
momentum
=
BN_MOMENTUM
)
def
forward
(
self
,
x
):
out
=
self
.
bn
(
self
.
relu
(
self
.
conv
(
x
)))
if
self
.
skip
==
'identity'
:
out
=
out
+
x
return
out
class
DepthwiseConv
(
nn
.
Module
):
def
__init__
(
self
,
kernel_size
,
in_ch
,
out_ch
,
skip
,
exp_ratio
,
stride
):
super
().
__init__
()
self
.
kernel_size
=
kernel_size
self
.
in_ch
=
in_ch
self
.
out_ch
=
out_ch
self
.
skip
=
skip
self
.
exp_ratio
=
exp_ratio
self
.
stride
=
stride
self
.
conv1
=
nn
.
Conv2d
(
in_ch
,
in_ch
,
kernel_size
,
padding
=
kernel_size
//
2
,
stride
=
stride
,
groups
=
in_ch
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
in_ch
,
momentum
=
BN_MOMENTUM
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
conv2
=
nn
.
Conv2d
(
in_ch
,
out_ch
,
1
,
padding
=
0
,
stride
=
1
,
bias
=
False
)
self
.
bn2
=
nn
.
BatchNorm2d
(
out_ch
,
momentum
=
BN_MOMENTUM
)
def
forward
(
self
,
x
):
out
=
self
.
relu
(
self
.
bn1
(
self
.
conv1
(
x
)))
out
=
self
.
bn2
(
self
.
conv2
(
out
))
if
self
.
skip
==
'identity'
:
out
=
out
+
x
return
out
class
MobileConv
(
nn
.
Module
):
def
__init__
(
self
,
kernel_size
,
in_ch
,
out_ch
,
skip
,
exp_ratio
,
stride
):
super
().
__init__
()
self
.
kernel_size
=
kernel_size
self
.
in_ch
=
in_ch
self
.
out_ch
=
out_ch
self
.
skip
=
skip
self
.
exp_ratio
=
exp_ratio
self
.
stride
=
stride
mid_ch
=
in_ch
*
exp_ratio
self
.
layers
=
nn
.
Sequential
(
# Pointwise
nn
.
Conv2d
(
in_ch
,
mid_ch
,
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
mid_ch
,
momentum
=
BN_MOMENTUM
),
nn
.
ReLU
(
inplace
=
True
),
# Depthwise
nn
.
Conv2d
(
mid_ch
,
mid_ch
,
kernel_size
,
padding
=
(
kernel_size
-
1
)
//
2
,
stride
=
stride
,
groups
=
mid_ch
,
bias
=
False
),
nn
.
BatchNorm2d
(
mid_ch
,
momentum
=
BN_MOMENTUM
),
nn
.
ReLU
(
inplace
=
True
),
# Linear pointwise. Note that there's no activation.
nn
.
Conv2d
(
mid_ch
,
out_ch
,
1
,
bias
=
False
),
nn
.
BatchNorm2d
(
out_ch
,
momentum
=
BN_MOMENTUM
))
def
forward
(
self
,
x
):
out
=
self
.
layers
(
x
)
if
self
.
skip
==
'identity'
:
out
=
out
+
x
return
out
# mnasnet0_5
ir_module
=
_InvertedResidual
(
16
,
16
,
3
,
1
,
1
,
True
)
test/retiarii_test/cgo_mnasnet/mutator.py
0 → 100644
View file @
f2f58dbb
import
logging
import
sys
from
pathlib
import
Path
sys
.
path
.
append
(
str
(
Path
(
__file__
).
resolve
().
parents
[
2
]))
from
nni.retiarii
import
Mutator
from
base_mnasnet
import
RegularConv
,
DepthwiseConv
,
MobileConv
_logger
=
logging
.
getLogger
(
__name__
)
class
BlockMutator
(
Mutator
):
def
__init__
(
self
,
target
:
str
):
super
(
BlockMutator
,
self
).
__init__
()
self
.
target
=
target
def
mutate
(
self
,
model
):
nodes
=
model
.
get_nodes_by_label
(
self
.
target
)
assert
len
(
nodes
)
==
1
node
=
nodes
[
0
]
graph
=
node
.
graph
related_info
=
node
.
operation
.
parameters
kernel_size
=
self
.
choice
(
related_info
[
'kernel_size_options'
])
op_type
=
self
.
choice
(
related_info
[
'op_type_options'
])
#self.choice(related_info['se_ratio_options'])
skip
=
self
.
choice
(
related_info
[
'skip_options'
])
n_filter
=
self
.
choice
(
related_info
[
'n_filter_options'
])
if
related_info
[
'in_ch'
]
is
not
None
:
in_ch
=
related_info
[
'in_ch'
]
else
:
assert
len
(
node
.
predecessors
)
==
1
the_node
=
node
.
predecessors
[
0
]
_logger
.
debug
(
repr
(
the_node
.
operation
.
parameters
))
_logger
.
debug
(
the_node
.
__repr__
())
in_ch
=
the_node
.
operation
.
parameters
[
'out_ch'
]
# update the placeholder to be a new operation
node
.
update_operation
(
op_type
,
{
'kernel_size'
:
kernel_size
,
'in_ch'
:
in_ch
,
'out_ch'
:
n_filter
,
'skip'
:
'no'
,
'exp_ratio'
:
related_info
[
'exp_ratio'
],
'stride'
:
related_info
[
'stride'
]
})
# insert new nodes after the placeholder
n_layer
=
self
.
choice
(
related_info
[
'n_layer_options'
])
for
i
in
range
(
1
,
n_layer
):
node
=
graph
.
insert_node_on_edge
(
node
.
outgoing_edges
[
0
],
'{}_{}'
.
format
(
self
.
target
,
i
),
op_type
,
{
'kernel_size'
:
kernel_size
,
'in_ch'
:
n_filter
,
'out_ch'
:
n_filter
,
'skip'
:
skip
,
'exp_ratio'
:
related_info
[
'exp_ratio'
],
'stride'
:
1
})
# fix possible shape mismatch
# TODO: use formal method function to update parameters
if
len
(
node
.
successors
)
==
1
and
'in_channels'
in
node
.
successors
[
0
].
operation
.
parameters
:
node
.
successors
[
0
].
operation
.
parameters
[
'in_channels'
]
=
n_filter
\ No newline at end of file
test/retiarii_test/cgo_mnasnet/test.py
0 → 100644
View file @
f2f58dbb
import
os
import
sys
import
torch
from
pathlib
import
Path
import
nni.retiarii.evaluator.pytorch.lightning
as
pl
import
nni.retiarii.evaluator.pytorch.cgo.evaluator
as
cgo
from
nni.retiarii
import
serialize
from
base_mnasnet
import
MNASNet
from
nni.experiment
import
RemoteMachineConfig
from
nni.retiarii.experiment.pytorch
import
RetiariiExperiment
,
RetiariiExeConfig
from
nni.retiarii.strategy
import
TPEStrategy
from
torchvision
import
transforms
from
torchvision.datasets
import
CIFAR10
from
mutator
import
BlockMutator
if
__name__
==
'__main__'
:
_DEFAULT_DEPTHS
=
[
16
,
24
,
40
,
80
,
96
,
192
,
320
]
_DEFAULT_CONVOPS
=
[
"dconv"
,
"mconv"
,
"mconv"
,
"mconv"
,
"mconv"
,
"mconv"
,
"mconv"
]
_DEFAULT_SKIPS
=
[
False
,
True
,
True
,
True
,
True
,
True
,
True
]
_DEFAULT_KERNEL_SIZES
=
[
3
,
3
,
5
,
5
,
3
,
5
,
3
]
_DEFAULT_NUM_LAYERS
=
[
1
,
3
,
3
,
3
,
2
,
4
,
1
]
base_model
=
MNASNet
(
0.5
,
_DEFAULT_DEPTHS
,
_DEFAULT_CONVOPS
,
_DEFAULT_KERNEL_SIZES
,
_DEFAULT_NUM_LAYERS
,
_DEFAULT_SKIPS
)
train_transform
=
transforms
.
Compose
([
transforms
.
RandomCrop
(
32
,
padding
=
4
),
transforms
.
RandomHorizontalFlip
(),
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
)),
])
valid_transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.4914
,
0.4822
,
0.4465
),
(
0.2023
,
0.1994
,
0.2010
)),
])
train_dataset
=
serialize
(
CIFAR10
,
root
=
'data/cifar10'
,
train
=
True
,
download
=
True
,
transform
=
train_transform
)
test_dataset
=
serialize
(
CIFAR10
,
root
=
'data/cifar10'
,
train
=
False
,
download
=
True
,
transform
=
valid_transform
)
# trainer = pl.Classification(train_dataloader=pl.DataLoader(train_dataset, batch_size=100),
# val_dataloaders=pl.DataLoader(test_dataset, batch_size=100),
# max_epochs=1, limit_train_batches=0.2)
trainer
=
cgo
.
Classification
(
train_dataloader
=
pl
.
DataLoader
(
train_dataset
,
batch_size
=
100
),
val_dataloaders
=
pl
.
DataLoader
(
test_dataset
,
batch_size
=
100
),
max_epochs
=
1
,
limit_train_batches
=
0.2
)
applied_mutators
=
[
BlockMutator
(
'mutable_0'
),
BlockMutator
(
'mutable_1'
)
]
simple_strategy
=
TPEStrategy
()
exp
=
RetiariiExperiment
(
base_model
,
trainer
,
applied_mutators
,
simple_strategy
)
exp_config
=
RetiariiExeConfig
(
'remote'
)
exp_config
.
experiment_name
=
'darts_search'
exp_config
.
trial_concurrency
=
3
exp_config
.
max_trial_number
=
10
exp_config
.
trial_gpu_number
=
1
exp_config
.
training_service
.
use_active_gpu
=
True
exp_config
.
training_service
.
reuse_mode
=
True
exp_config
.
training_service
.
gpu_indices
=
[
0
,
1
,
2
]
exp_config
.
max_concurrency_cgo
=
1
exp_config
.
batch_waiting_time
=
0
rm_conf
=
RemoteMachineConfig
()
rm_conf
.
host
=
'127.0.0.1'
rm_conf
.
user
=
'xxx'
rm_conf
.
password
=
'xxx'
rm_conf
.
port
=
22
rm_conf
.
python_path
=
'/home/xxx/py38/bin'
rm_conf
.
gpu_indices
=
[
0
,
1
,
2
]
rm_conf
.
use_active_gpu
=
True
rm_conf
.
max_trial_number_per_gpu
=
3
exp_config
.
training_service
.
machine_list
=
[
rm_conf
]
exp_config
.
execution_engine
=
'cgo'
exp
.
run
(
exp_config
,
8099
)
\ No newline at end of file
test/retiarii_test/darts/test.py
View file @
f2f58dbb
...
@@ -31,7 +31,8 @@ if __name__ == '__main__':
...
@@ -31,7 +31,8 @@ if __name__ == '__main__':
test_dataset
=
serialize
(
CIFAR10
,
root
=
'data/cifar10'
,
train
=
False
,
download
=
True
,
transform
=
valid_transform
)
test_dataset
=
serialize
(
CIFAR10
,
root
=
'data/cifar10'
,
train
=
False
,
download
=
True
,
transform
=
valid_transform
)
trainer
=
pl
.
Classification
(
train_dataloader
=
pl
.
DataLoader
(
train_dataset
,
batch_size
=
100
),
trainer
=
pl
.
Classification
(
train_dataloader
=
pl
.
DataLoader
(
train_dataset
,
batch_size
=
100
),
val_dataloaders
=
pl
.
DataLoader
(
test_dataset
,
batch_size
=
100
),
val_dataloaders
=
pl
.
DataLoader
(
test_dataset
,
batch_size
=
100
),
max_epochs
=
1
,
limit_train_batches
=
0.2
)
max_epochs
=
1
,
limit_train_batches
=
0.2
,
progress_bar_refresh_rate
=
0
)
simple_strategy
=
strategy
.
Random
()
simple_strategy
=
strategy
.
Random
()
...
...
test/ut/retiarii/converted_mnist_pytorch.json
deleted
100644 → 0
View file @
6645bd33
{
"_model__stem"
:{
"inputs"
:[
"_inputs__1"
],
"outputs"
:[
"pool2__1"
],
"nodes"
:{
"_model__stem__conv1"
:{
"operation"
:{
"type"
:
"__torch__.torch.nn.modules.conv.Conv2d"
,
"parameters"
:{
"out_channels"
:
32
,
"in_channels"
:
1
,
"kernel_size"
:
5
}
}
},
"_model__stem__pool1"
:{
"operation"
:{
"type"
:
"__torch__.torch.nn.modules.pooling.MaxPool2d"
,
"parameters"
:{
"kernel_size"
:
2
}
}
},
"_model__stem__conv2"
:{
"operation"
:{
"type"
:
"__torch__.torch.nn.modules.conv.Conv2d"
,
"parameters"
:{
"out_channels"
:
64
,
"in_channels"
:
32
,
"kernel_size"
:
5
}
}
},
"_model__stem__pool2"
:{
"operation"
:{
"type"
:
"__torch__.torch.nn.modules.pooling.MaxPool2d"
,
"parameters"
:{
"kernel_size"
:
2
}
}
}
},
"edges"
:[
{
"head"
:[
"_inputs"
,
0
],
"tail"
:[
"_model__stem__conv1"
,
0
]
},
{
"head"
:[
"_model__stem__conv1"
,
null
],
"tail"
:[
"_model__stem__pool1"
,
0
]
},
{
"head"
:[
"_model__stem__pool1"
,
null
],
"tail"
:[
"_model__stem__conv2"
,
0
]
},
{
"head"
:[
"_model__stem__conv2"
,
null
],
"tail"
:[
"_model__stem__pool2"
,
0
]
},
{
"head"
:[
"_model__stem__pool2"
,
null
],
"tail"
:[
"_outputs"
,
null
]
}
]
},
"_model"
:{
"inputs"
:[
"image__1"
],
"outputs"
:[
"softmax__1"
],
"nodes"
:{
"_model__Constant2"
:{
"operation"
:{
"type"
:
"prim::Constant"
,
"parameters"
:{
}
}
},
"_model__Constant3"
:{
"operation"
:{
"type"
:
"prim::Constant"
,
"parameters"
:{
"value"
:
3
}
}
},
"_model__Constant4"
:{
"operation"
:{
"type"
:
"prim::Constant"
,
"parameters"
:{
"value"
:
-1
}
}
},
"_model__Constant5"
:{
"operation"
:{
"type"
:
"prim::Constant"
,
"parameters"
:{
"value"
:
0
}
}
},
"_model__stem"
:{
"operation"
:{
"type"
:
"_cell"
,
"parameters"
:{
},
"cell_name"
:
"_model__stem"
}
},
"_model__Size6"
:{
"operation"
:{
"type"
:
"aten::size"
,
"parameters"
:{
}
}
},
"_model__ListConstruct7"
:{
"operation"
:{
"type"
:
"prim::ListConstruct"
,
"parameters"
:{
}
}
},
"_model__View8"
:{
"operation"
:{
"type"
:
"aten::view"
,
"parameters"
:{
}
}
},
"_model__fc1"
:{
"operation"
:{
"type"
:
"__torch__.torch.nn.modules.linear.Linear"
,
"parameters"
:{
"in_features"
:
1024
,
"out_features"
:
256
}
}
},
"_model__fc2"
:{
"operation"
:{
"type"
:
"__torch__.torch.nn.modules.linear.Linear"
,
"parameters"
:{
"in_features"
:
256
,
"out_features"
:
10
}
}
},
"_model__softmax9"
:{
"operation"
:{
"type"
:
"Function.softmax"
,
"parameters"
:{
}
}
}
},
"edges"
:[
{
"head"
:[
"_inputs"
,
0
],
"tail"
:[
"_model__stem"
,
0
]
},
{
"head"
:[
"_model__stem"
,
null
],
"tail"
:[
"_model__Size6"
,
0
]
},
{
"head"
:[
"_model__Constant5"
,
null
],
"tail"
:[
"_model__Size6"
,
1
]
},
{
"head"
:[
"_model__Size6"
,
null
],
"tail"
:[
"_model__ListConstruct7"
,
0
]
},
{
"head"
:[
"_model__Constant4"
,
null
],
"tail"
:[
"_model__ListConstruct7"
,
1
]
},
{
"head"
:[
"_model__stem"
,
null
],
"tail"
:[
"_model__View8"
,
0
]
},
{
"head"
:[
"_model__ListConstruct7"
,
null
],
"tail"
:[
"_model__View8"
,
1
]
},
{
"head"
:[
"_model__View8"
,
null
],
"tail"
:[
"_model__fc1"
,
0
]
},
{
"head"
:[
"_model__fc1"
,
null
],
"tail"
:[
"_model__fc2"
,
0
]
},
{
"head"
:[
"_model__fc2"
,
null
],
"tail"
:[
"_model__softmax9"
,
0
]
},
{
"head"
:[
"_model__Constant4"
,
null
],
"tail"
:[
"_model__softmax9"
,
1
]
},
{
"head"
:[
"_model__Constant3"
,
null
],
"tail"
:[
"_model__softmax9"
,
2
]
},
{
"head"
:[
"_model__Constant2"
,
null
],
"tail"
:[
"_model__softmax9"
,
3
]
},
{
"head"
:[
"_model__softmax9"
,
null
],
"tail"
:[
"_outputs"
,
null
]
}
]
},
"_evaluator"
:
{
"module"
:
"nni.retiarii.trainer.PyTorchImageClassificationTrainer"
,
"kwargs"
:
{
"dataset_cls"
:
"MNIST"
,
"dataset_kwargs"
:
{
"root"
:
"data/mnist"
,
"download"
:
true
},
"dataloader_kwargs"
:
{
"batch_size"
:
32
},
"optimizer_cls"
:
"SGD"
,
"optimizer_kwargs"
:
{
"lr"
:
1e-3
},
"trainer_kwargs"
:
{
"max_epochs"
:
1
}
}
}
}
\ No newline at end of file
test/ut/retiarii/test_cgo_engine.py
View file @
f2f58dbb
import
json
import
json
import
os
import
os
import
sys
import
threading
import
threading
import
unittest
import
unittest
import
logging
import
time
import
time
import
torch
import
torch
import
torch.nn
as
nn
from
pathlib
import
Path
from
pathlib
import
Path
from
nni.retiarii.execution.cgo_engine
import
CGOExecutionEngine
import
nni
from
nni.retiarii.execution.logical_optimizer.logical_plan
import
LogicalPlan
from
nni.retiarii.execution.logical_optimizer.opt_dedup_input
import
DedupInputOptimizer
from
nni.retiarii.codegen
import
model_to_pytorch_script
from
nni.retiarii
import
Model
,
Node
from
nni.retiarii
import
Model
,
submit_models
try
:
from
nni.retiarii.codegen
import
model_to_pytorch_script
from
nni.common.device
import
GPUDevice
from
nni.retiarii.integration
import
RetiariiAdvisor
from
nni.retiarii.execution.cgo_engine
import
CGOExecutionEngine
from
nni.retiarii.evaluator.pytorch
import
PyTorchImageClassificationTrainer
,
PyTorchMultiModelTrainer
from
nni.retiarii
import
Model
from
nni.retiarii.utils
import
import_
from
nni.retiarii.graph
import
Node
from
nni.retiarii
import
Model
,
submit_models
from
nni.retiarii.integration
import
RetiariiAdvisor
from
nni.retiarii.execution
import
set_execution_engine
from
nni.retiarii.execution.logical_optimizer.opt_dedup_input
import
DedupInputOptimizer
from
nni.retiarii.execution.logical_optimizer.logical_plan
import
LogicalPlan
from
nni.retiarii.utils
import
import_
from
nni.retiarii
import
serialize
import
nni.retiarii.evaluator.pytorch.lightning
as
pl
from
nni.retiarii.evaluator.pytorch.cgo.evaluator
import
MultiModelSupervisedLearningModule
,
_MultiModelSupervisedLearningModule
import
nni.retiarii.evaluator.pytorch.cgo.trainer
as
cgo_trainer
module_import_failed
=
False
except
ImportError
:
module_import_failed
=
True
import
pytest
from
torchvision.datasets
import
MNIST
from
torchvision
import
transforms
from
torch.utils.data
import
Dataset
from
sklearn.datasets
import
load_diabetes
class
_model_cpu
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
M_1_stem
=
M_1_stem
()
self
.
M_2_stem
=
M_2_stem
()
self
.
M_1_flatten
=
torch
.
nn
.
Flatten
()
self
.
M_2_flatten
=
torch
.
nn
.
Flatten
()
self
.
M_1_fc1
=
torch
.
nn
.
Linear
(
out_features
=
256
,
in_features
=
1024
)
self
.
M_2_fc1
=
torch
.
nn
.
Linear
(
out_features
=
256
,
in_features
=
1024
)
self
.
M_1_fc2
=
torch
.
nn
.
Linear
(
out_features
=
10
,
in_features
=
256
)
self
.
M_2_fc2
=
torch
.
nn
.
Linear
(
out_features
=
10
,
in_features
=
256
)
self
.
M_1_softmax
=
torch
.
nn
.
Softmax
()
self
.
M_2_softmax
=
torch
.
nn
.
Softmax
()
def
forward
(
self
,
*
_inputs
):
M_1__inputs_to_M_2_stem
=
_inputs
[
0
]
M_1_stem
=
self
.
M_1_stem
(
_inputs
[
0
])
M_2_stem
=
self
.
M_2_stem
(
M_1__inputs_to_M_2_stem
)
M_1_flatten
=
self
.
M_1_flatten
(
M_1_stem
)
M_2_flatten
=
self
.
M_2_flatten
(
M_2_stem
)
M_1_fc1
=
self
.
M_1_fc1
(
M_1_flatten
)
M_2_fc1
=
self
.
M_2_fc1
(
M_2_flatten
)
M_1_fc2
=
self
.
M_1_fc2
(
M_1_fc1
)
M_2_fc2
=
self
.
M_2_fc2
(
M_2_fc1
)
M_1_softmax
=
self
.
M_1_softmax
(
M_1_fc2
)
M_2_softmax
=
self
.
M_2_softmax
(
M_2_fc2
)
return
M_1_softmax
,
M_2_softmax
class
_model_gpu
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
M_1_stem
=
M_1_stem
().
to
(
'cuda:0'
)
self
.
M_2_stem
=
M_2_stem
().
to
(
'cuda:1'
)
self
.
M_1_flatten
=
torch
.
nn
.
Flatten
().
to
(
'cuda:0'
)
self
.
M_2_flatten
=
torch
.
nn
.
Flatten
().
to
(
'cuda:1'
)
self
.
M_1_fc1
=
torch
.
nn
.
Linear
(
out_features
=
256
,
in_features
=
1024
).
to
(
'cuda:0'
)
self
.
M_2_fc1
=
torch
.
nn
.
Linear
(
out_features
=
256
,
in_features
=
1024
).
to
(
'cuda:1'
)
self
.
M_1_fc2
=
torch
.
nn
.
Linear
(
out_features
=
10
,
in_features
=
256
).
to
(
'cuda:0'
)
self
.
M_2_fc2
=
torch
.
nn
.
Linear
(
out_features
=
10
,
in_features
=
256
).
to
(
'cuda:1'
)
self
.
M_1_softmax
=
torch
.
nn
.
Softmax
().
to
(
'cuda:0'
)
self
.
M_2_softmax
=
torch
.
nn
.
Softmax
().
to
(
'cuda:1'
)
def
forward
(
self
,
*
_inputs
):
M_1__inputs_to_M_1_stem
=
_inputs
[
0
].
to
(
"cuda:0"
)
M_1__inputs_to_M_2_stem
=
_inputs
[
0
].
to
(
"cuda:1"
)
M_1_stem
=
self
.
M_1_stem
(
M_1__inputs_to_M_1_stem
)
M_2_stem
=
self
.
M_2_stem
(
M_1__inputs_to_M_2_stem
)
M_1_flatten
=
self
.
M_1_flatten
(
M_1_stem
)
M_2_flatten
=
self
.
M_2_flatten
(
M_2_stem
)
M_1_fc1
=
self
.
M_1_fc1
(
M_1_flatten
)
M_2_fc1
=
self
.
M_2_fc1
(
M_2_flatten
)
M_1_fc2
=
self
.
M_1_fc2
(
M_1_fc1
)
M_2_fc2
=
self
.
M_2_fc2
(
M_2_fc1
)
M_1_softmax
=
self
.
M_1_softmax
(
M_1_fc2
)
M_2_softmax
=
self
.
M_2_softmax
(
M_2_fc2
)
return
M_1_softmax
,
M_2_softmax
class
M_1_stem
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
conv1
=
torch
.
nn
.
Conv2d
(
out_channels
=
32
,
in_channels
=
1
,
kernel_size
=
5
)
self
.
pool1
=
torch
.
nn
.
MaxPool2d
(
kernel_size
=
2
)
self
.
conv2
=
torch
.
nn
.
Conv2d
(
out_channels
=
64
,
in_channels
=
32
,
kernel_size
=
5
)
self
.
pool2
=
torch
.
nn
.
MaxPool2d
(
kernel_size
=
2
)
def
forward
(
self
,
*
_inputs
):
conv1
=
self
.
conv1
(
_inputs
[
0
])
pool1
=
self
.
pool1
(
conv1
)
conv2
=
self
.
conv2
(
pool1
)
pool2
=
self
.
pool2
(
conv2
)
return
pool2
class
M_2_stem
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
conv1
=
torch
.
nn
.
Conv2d
(
out_channels
=
32
,
in_channels
=
1
,
kernel_size
=
5
)
self
.
pool1
=
torch
.
nn
.
MaxPool2d
(
kernel_size
=
2
)
self
.
conv2
=
torch
.
nn
.
Conv2d
(
out_channels
=
64
,
in_channels
=
32
,
kernel_size
=
5
)
self
.
pool2
=
torch
.
nn
.
MaxPool2d
(
kernel_size
=
2
)
def
forward
(
self
,
*
_inputs
):
conv1
=
self
.
conv1
(
_inputs
[
0
])
pool1
=
self
.
pool1
(
conv1
)
conv2
=
self
.
conv2
(
pool1
)
pool2
=
self
.
pool2
(
conv2
)
return
pool2
def
_reset
():
# this is to not affect other tests in sdk
nni
.
trial
.
_intermediate_seq
=
0
nni
.
trial
.
_params
=
{
'foo'
:
'bar'
,
'parameter_id'
:
0
}
nni
.
runtime
.
platform
.
test
.
_last_metric
=
None
nni
.
retiarii
.
integration_api
.
_advisor
=
None
nni
.
retiarii
.
execution
.
api
.
_execution_engine
=
None
def
_new_trainer
():
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))])
train_dataset
=
serialize
(
MNIST
,
root
=
'data/mnist'
,
train
=
True
,
download
=
True
,
transform
=
transform
)
test_dataset
=
serialize
(
MNIST
,
root
=
'data/mnist'
,
train
=
False
,
download
=
True
,
transform
=
transform
)
multi_module
=
MultiModelSupervisedLearningModule
(
nn
.
CrossEntropyLoss
,
{
'acc'
:
pl
.
_AccuracyWithLogits
})
lightning
=
pl
.
Lightning
(
multi_module
,
cgo_trainer
.
Trainer
(
use_cgo
=
True
,
max_epochs
=
1
,
limit_train_batches
=
0.25
,
progress_bar_refresh_rate
=
0
),
train_dataloader
=
pl
.
DataLoader
(
train_dataset
,
batch_size
=
100
),
val_dataloaders
=
pl
.
DataLoader
(
test_dataset
,
batch_size
=
100
))
return
lightning
def
_load_mnist
(
n_models
:
int
=
1
):
def
_load_mnist
(
n_models
:
int
=
1
):
path
=
Path
(
__file__
).
parent
/
'
converted_
mnist_pytorch.json'
path
=
Path
(
__file__
).
parent
/
'mnist_pytorch.json'
with
open
(
path
)
as
f
:
with
open
(
path
)
as
f
:
mnist_model
=
Model
.
_load
(
json
.
load
(
f
))
mnist_model
=
Model
.
_load
(
json
.
load
(
f
))
mnist_model
.
evaluator
=
_new_trainer
()
if
n_models
==
1
:
if
n_models
==
1
:
return
mnist_model
return
mnist_model
else
:
else
:
models
=
[
mnist_model
]
models
=
[
mnist_model
]
for
i
in
range
(
n_models
-
1
):
for
i
in
range
(
n_models
-
1
):
models
.
append
(
mnist_model
.
fork
())
forked_model
=
mnist_model
.
fork
()
forked_model
.
evaluator
=
_new_trainer
()
models
.
append
(
forked_model
)
return
models
return
models
@
unittest
.
skip
(
'Skipped in this version'
)
def
_get_final_result
():
result
=
json
.
loads
(
nni
.
runtime
.
platform
.
test
.
_last_metric
)[
'value'
]
if
isinstance
(
result
,
list
):
return
[
float
(
_
)
for
_
in
result
]
else
:
if
isinstance
(
result
,
str
)
and
'['
in
result
:
return
json
.
loads
(
result
)
return
[
float
(
result
)]
class
CGOEngineTest
(
unittest
.
TestCase
):
class
CGOEngineTest
(
unittest
.
TestCase
):
def
setUp
(
self
):
if
module_import_failed
:
self
.
skipTest
(
'test skip due to failed import of nni.retiarii.evaluator.pytorch.lightning'
)
def
test_multi_model_trainer_cpu
(
self
):
_reset
()
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))])
train_dataset
=
serialize
(
MNIST
,
root
=
'data/mnist'
,
train
=
True
,
download
=
True
,
transform
=
transform
)
test_dataset
=
serialize
(
MNIST
,
root
=
'data/mnist'
,
train
=
False
,
download
=
True
,
transform
=
transform
)
multi_module
=
_MultiModelSupervisedLearningModule
(
nn
.
CrossEntropyLoss
,
{
'acc'
:
pl
.
_AccuracyWithLogits
},
n_models
=
2
)
lightning
=
pl
.
Lightning
(
multi_module
,
cgo_trainer
.
Trainer
(
use_cgo
=
True
,
max_epochs
=
1
,
limit_train_batches
=
0.25
),
train_dataloader
=
pl
.
DataLoader
(
train_dataset
,
batch_size
=
100
),
val_dataloaders
=
pl
.
DataLoader
(
test_dataset
,
batch_size
=
100
))
lightning
.
_execute
(
_model_cpu
)
result
=
_get_final_result
()
assert
len
(
result
)
==
2
for
_
in
result
:
assert
_
>
0.8
def
test_multi_model_trainer_gpu
(
self
):
_reset
()
if
not
(
torch
.
cuda
.
is_available
()
and
torch
.
cuda
.
device_count
()
>=
2
):
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
((
0.1307
,),
(
0.3081
,))])
train_dataset
=
serialize
(
MNIST
,
root
=
'data/mnist'
,
train
=
True
,
download
=
True
,
transform
=
transform
)
test_dataset
=
serialize
(
MNIST
,
root
=
'data/mnist'
,
train
=
False
,
download
=
True
,
transform
=
transform
)
multi_module
=
_MultiModelSupervisedLearningModule
(
nn
.
CrossEntropyLoss
,
{
'acc'
:
pl
.
_AccuracyWithLogits
},
n_models
=
2
)
lightning
=
pl
.
Lightning
(
multi_module
,
cgo_trainer
.
Trainer
(
use_cgo
=
True
,
max_epochs
=
1
,
limit_train_batches
=
0.25
),
train_dataloader
=
pl
.
DataLoader
(
train_dataset
,
batch_size
=
100
),
val_dataloaders
=
pl
.
DataLoader
(
test_dataset
,
batch_size
=
100
))
lightning
.
_execute
(
_model_gpu
)
result
=
_get_final_result
()
assert
len
(
result
)
==
2
for
_
in
result
:
assert
_
>
0.8
def
_build_logical_with_mnist
(
self
,
n_models
:
int
):
lp
=
LogicalPlan
()
models
=
_load_mnist
(
n_models
=
n_models
)
for
m
in
models
:
lp
.
add_model
(
m
)
return
lp
,
models
def
test_add_model
(
self
):
_reset
()
lp
,
models
=
self
.
_build_logical_with_mnist
(
3
)
for
node
in
lp
.
logical_graph
.
hidden_nodes
:
old_nodes
=
[
m
.
root_graph
.
get_node_by_id
(
node
.
id
)
for
m
in
models
]
self
.
assertTrue
(
any
([
old_nodes
[
0
].
__repr__
()
==
Node
.
__repr__
(
x
)
for
x
in
old_nodes
]))
def
test_dedup_input_four_devices
(
self
):
_reset
()
lp
,
models
=
self
.
_build_logical_with_mnist
(
3
)
opt
=
DedupInputOptimizer
()
opt
.
convert
(
lp
)
advisor
=
RetiariiAdvisor
()
available_devices
=
[
GPUDevice
(
"test"
,
0
),
GPUDevice
(
"test"
,
1
),
GPUDevice
(
"test"
,
2
),
GPUDevice
(
"test"
,
3
)]
cgo
=
CGOExecutionEngine
(
devices
=
available_devices
,
batch_waiting_time
=
0
)
phy_models
=
cgo
.
_assemble
(
lp
)
self
.
assertTrue
(
len
(
phy_models
)
==
1
)
advisor
.
stopping
=
True
advisor
.
default_worker
.
join
()
advisor
.
assessor_worker
.
join
()
cgo
.
join
()
def
test_dedup_input_two_devices
(
self
):
_reset
()
lp
,
models
=
self
.
_build_logical_with_mnist
(
3
)
opt
=
DedupInputOptimizer
()
opt
.
convert
(
lp
)
advisor
=
RetiariiAdvisor
()
available_devices
=
[
GPUDevice
(
"test"
,
0
),
GPUDevice
(
"test"
,
1
)]
cgo
=
CGOExecutionEngine
(
devices
=
available_devices
,
batch_waiting_time
=
0
)
phy_models
=
cgo
.
_assemble
(
lp
)
self
.
assertTrue
(
len
(
phy_models
)
==
2
)
advisor
.
stopping
=
True
advisor
.
default_worker
.
join
()
advisor
.
assessor_worker
.
join
()
cgo
.
join
()
def
test_submit_models
(
self
):
def
test_submit_models
(
self
):
os
.
environ
[
'CGO'
]
=
'true'
_reset
()
nni
.
retiarii
.
debug_configs
.
framework
=
'pytorch'
os
.
makedirs
(
'generated'
,
exist_ok
=
True
)
os
.
makedirs
(
'generated'
,
exist_ok
=
True
)
from
nni.runtime
import
protocol
,
platform
from
nni.runtime
import
protocol
import
nni.runtime.platform.test
as
tt
import
nni.runtime.platform.test
as
tt
protocol
.
_out_file
=
open
(
'generated/debug_protocol_out_file.py'
,
'wb'
)
protocol
.
_out_file
=
open
(
'generated/debug_protocol_out_file.py'
,
'wb'
)
protocol
.
_in_file
=
open
(
'generated/debug_protocol_out_file.py'
,
'rb'
)
protocol
.
_in_file
=
open
(
'generated/debug_protocol_out_file.py'
,
'rb'
)
models
=
_load_mnist
(
2
)
models
=
_load_mnist
(
2
)
advisor
=
RetiariiAdvisor
()
advisor
=
RetiariiAdvisor
()
cgo_engine
=
CGOExecutionEngine
(
devices
=
[
GPUDevice
(
"test"
,
0
),
GPUDevice
(
"test"
,
1
),
GPUDevice
(
"test"
,
2
),
GPUDevice
(
"test"
,
3
)],
batch_waiting_time
=
0
)
set_execution_engine
(
cgo_engine
)
submit_models
(
*
models
)
submit_models
(
*
models
)
time
.
sleep
(
3
)
if
torch
.
cuda
.
is_available
()
and
torch
.
cuda
.
device_count
()
>=
2
:
if
torch
.
cuda
.
is_available
()
and
torch
.
cuda
.
device_count
()
>=
2
:
cmd
,
data
=
protocol
.
receive
()
cmd
,
data
=
protocol
.
receive
()
params
=
json
.
loads
(
data
)
params
=
json
.
loads
(
data
)
params
[
'parameters'
][
'training_kwargs'
][
'max_steps'
]
=
100
tt
.
init_params
(
params
)
tt
.
init_params
(
params
)
trial_thread
=
threading
.
Thread
(
target
=
CGOExecutionEngine
.
trial_execute_graph
()
)
trial_thread
=
threading
.
Thread
(
target
=
CGOExecutionEngine
.
trial_execute_graph
)
trial_thread
.
start
()
trial_thread
.
start
()
last_metric
=
None
last_metric
=
None
while
True
:
while
True
:
...
@@ -66,15 +321,20 @@ class CGOEngineTest(unittest.TestCase):
...
@@ -66,15 +321,20 @@ class CGOEngineTest(unittest.TestCase):
metric
=
tt
.
get_last_metric
()
metric
=
tt
.
get_last_metric
()
if
metric
==
last_metric
:
if
metric
==
last_metric
:
continue
continue
if
'value'
in
metric
:
metric
[
'value'
]
=
json
.
dumps
(
metric
[
'value'
])
advisor
.
handle_report_metric_data
(
metric
)
advisor
.
handle_report_metric_data
(
metric
)
last_metric
=
metric
last_metric
=
metric
if
not
trial_thread
.
is_alive
():
if
not
trial_thread
.
is_alive
():
trial_thread
.
join
()
break
break
trial_thread
.
join
()
trial_thread
.
join
()
advisor
.
stopping
=
True
advisor
.
stopping
=
True
advisor
.
default_worker
.
join
()
advisor
.
default_worker
.
join
()
advisor
.
assessor_worker
.
join
()
advisor
.
assessor_worker
.
join
()
cgo_engine
.
join
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
test/ut/retiarii/test_dedup_input.py
deleted
100644 → 0
View file @
6645bd33
import
json
import
os
import
sys
import
threading
import
unittest
import
logging
import
time
from
pathlib
import
Path
from
nni.retiarii.execution.cgo_engine
import
CGOExecutionEngine
from
nni.retiarii.execution.logical_optimizer.logical_plan
import
LogicalPlan
from
nni.retiarii.execution.logical_optimizer.opt_dedup_input
import
DedupInputOptimizer
from
nni.retiarii.codegen
import
model_to_pytorch_script
from
nni.retiarii
import
Model
,
Node
from
nni.retiarii
import
Model
,
submit_models
from
nni.retiarii.codegen
import
model_to_pytorch_script
from
nni.retiarii.integration
import
RetiariiAdvisor
from
nni.retiarii.utils
import
import_
def
_load_mnist
(
n_models
:
int
=
1
):
path
=
Path
(
__file__
).
parent
/
'converted_mnist_pytorch.json'
with
open
(
path
)
as
f
:
mnist_model
=
Model
.
_load
(
json
.
load
(
f
))
if
n_models
==
1
:
return
mnist_model
else
:
models
=
[
mnist_model
]
for
i
in
range
(
n_models
-
1
):
models
.
append
(
mnist_model
.
fork
())
return
models
@
unittest
.
skip
(
'Skipped in this version'
)
class
DedupInputTest
(
unittest
.
TestCase
):
def
_build_logical_with_mnist
(
self
,
n_models
:
int
):
lp
=
LogicalPlan
()
models
=
_load_mnist
(
n_models
=
n_models
)
for
m
in
models
:
lp
.
add_model
(
m
)
return
lp
,
models
def
_test_add_model
(
self
):
lp
,
models
=
self
.
_build_logical_with_mnist
(
3
)
for
node
in
lp
.
logical_graph
.
hidden_nodes
:
old_nodes
=
[
m
.
root_graph
.
get_node_by_id
(
node
.
id
)
for
m
in
models
]
self
.
assertTrue
(
any
([
old_nodes
[
0
].
__repr__
()
==
Node
.
__repr__
(
x
)
for
x
in
old_nodes
]))
def
test_dedup_input
(
self
):
os
.
environ
[
'CGO'
]
=
'true'
lp
,
models
=
self
.
_build_logical_with_mnist
(
3
)
opt
=
DedupInputOptimizer
()
opt
.
convert
(
lp
)
with
open
(
'dedup_logical_graph.json'
,
'r'
)
as
fp
:
correct_dump
=
fp
.
readlines
()
lp_dump
=
lp
.
logical_graph
.
_dump
()
self
.
assertTrue
(
correct_dump
[
0
]
==
json
.
dumps
(
lp_dump
))
advisor
=
RetiariiAdvisor
()
cgo
=
CGOExecutionEngine
()
phy_models
=
cgo
.
_assemble
(
lp
)
self
.
assertTrue
(
len
(
phy_models
)
==
1
)
# logging.info(phy_models[0][0]._dump())
# script=model_to_pytorch_script(phy_models[0][0], placement = phy_models[0][1])
# logging.info(script)
# with open('generated/debug_dedup_input.py', 'w') as fp:
# fp.write(script)
# sys.path.insert(0, 'generated')
# multi_model = import_('debug_dedup_input.logical_0')
# trainer = PyTorchMultiModelTrainer(
# multi_model(), phy_models[0][0].evaluator.kwargs
# )
# trainer.fit()
advisor
.
stopping
=
True
advisor
.
default_worker
.
join
()
advisor
.
assessor_worker
.
join
()
if
__name__
==
'__main__'
:
unittest
.
main
()
test/ut/retiarii/test_engine.py
View file @
f2f58dbb
...
@@ -22,6 +22,8 @@ class EngineTest(unittest.TestCase):
...
@@ -22,6 +22,8 @@ class EngineTest(unittest.TestCase):
self
.
assertEqual
(
script
.
strip
(),
reference_script
.
strip
())
self
.
assertEqual
(
script
.
strip
(),
reference_script
.
strip
())
def
test_base_execution_engine
(
self
):
def
test_base_execution_engine
(
self
):
nni
.
retiarii
.
integration_api
.
_advisor
=
None
nni
.
retiarii
.
execution
.
api
.
_execution_engine
=
None
advisor
=
RetiariiAdvisor
()
advisor
=
RetiariiAdvisor
()
set_execution_engine
(
BaseExecutionEngine
())
set_execution_engine
(
BaseExecutionEngine
())
with
open
(
self
.
enclosing_dir
/
'mnist_pytorch.json'
)
as
f
:
with
open
(
self
.
enclosing_dir
/
'mnist_pytorch.json'
)
as
f
:
...
@@ -33,7 +35,8 @@ class EngineTest(unittest.TestCase):
...
@@ -33,7 +35,8 @@ class EngineTest(unittest.TestCase):
advisor
.
assessor_worker
.
join
()
advisor
.
assessor_worker
.
join
()
def
test_py_execution_engine
(
self
):
def
test_py_execution_engine
(
self
):
nni
.
retiarii
.
integration_api
.
_advisor
=
None
nni
.
retiarii
.
execution
.
api
.
_execution_engine
=
None
advisor
=
RetiariiAdvisor
()
advisor
=
RetiariiAdvisor
()
set_execution_engine
(
PurePythonExecutionEngine
())
set_execution_engine
(
PurePythonExecutionEngine
())
model
=
Model
.
_load
({
model
=
Model
.
_load
({
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment