Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
122b5b89
Unverified
Commit
122b5b89
authored
May 25, 2021
by
Yuge Zhang
Committed by
GitHub
May 25, 2021
Browse files
[Retiarii] Policy-based RL Strategy (#3650)
parent
92f6754e
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
246 additions
and
4 deletions
+246
-4
dependencies/recommended.txt
dependencies/recommended.txt
+2
-0
dependencies/recommended_legacy.txt
dependencies/recommended_legacy.txt
+2
-0
docs/requirements.txt
docs/requirements.txt
+2
-0
nni/retiarii/strategy/__init__.py
nni/retiarii/strategy/__init__.py
+1
-0
nni/retiarii/strategy/_rl_impl.py
nni/retiarii/strategy/_rl_impl.py
+121
-0
nni/retiarii/strategy/rl.py
nni/retiarii/strategy/rl.py
+92
-0
pipelines/full-test-linux.yml
pipelines/full-test-linux.yml
+1
-0
pipelines/full-test-windows.yml
pipelines/full-test-windows.yml
+1
-0
test/ut/retiarii/test_strategy.py
test/ut/retiarii/test_strategy.py
+24
-4
No files found.
dependencies/recommended.txt
View file @
122b5b89
...
...
@@ -10,3 +10,5 @@ pytorch-lightning >= 1.1.1
onnx
peewee
graphviz
gym
tianshou >= 0.4.1
dependencies/recommended_legacy.txt
View file @
122b5b89
...
...
@@ -11,3 +11,5 @@ keras == 2.1.6
onnx
peewee
graphviz
gym
tianshou >= 0.4.1
docs/requirements.txt
View file @
122b5b89
...
...
@@ -22,5 +22,7 @@ prettytable
psutil
ruamel.yaml
ipython
gym
tianshou
https://download.pytorch.org/whl/cpu/torch-1.7.1%2Bcpu-cp37-cp37m-linux_x86_64.whl
https://download.pytorch.org/whl/cpu/torchvision-0.8.2%2Bcpu-cp37-cp37m-linux_x86_64.whl
nni/retiarii/strategy/__init__.py
View file @
122b5b89
...
...
@@ -6,3 +6,4 @@ from .bruteforce import Random, GridSearch
from
.evolution
import
RegularizedEvolution
from
.tpe_strategy
import
TPEStrategy
from
.local_debug_strategy
import
_LocalDebugStrategy
from
.rl
import
PolicyBasedRL
nni/retiarii/strategy/_rl_impl.py
0 → 100644
View file @
122b5b89
# This file might cause import error for those who didn't install RL-related dependencies
import
logging
import
gym
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
gym
import
spaces
from
tianshou.data
import
to_torch
from
.utils
import
get_targeted_model
from
..graph
import
ModelStatus
from
..execution
import
submit_models
,
wait_models
_logger
=
logging
.
getLogger
(
__name__
)
class
ModelEvaluationEnv
(
gym
.
Env
):
def
__init__
(
self
,
base_model
,
mutators
,
search_space
):
self
.
base_model
=
base_model
self
.
mutators
=
mutators
self
.
search_space
=
search_space
self
.
ss_keys
=
list
(
self
.
search_space
.
keys
())
self
.
action_dim
=
max
(
map
(
lambda
v
:
len
(
v
),
self
.
search_space
.
values
()))
self
.
num_steps
=
len
(
self
.
search_space
)
@
property
def
observation_space
(
self
):
return
spaces
.
Dict
({
'action_history'
:
spaces
.
MultiDiscrete
([
self
.
action_dim
]
*
self
.
num_steps
),
'cur_step'
:
spaces
.
Discrete
(
self
.
num_steps
+
1
),
'action_dim'
:
spaces
.
Discrete
(
self
.
action_dim
+
1
)
})
@
property
def
action_space
(
self
):
return
spaces
.
Discrete
(
self
.
action_dim
)
def
reset
(
self
):
self
.
action_history
=
np
.
zeros
(
self
.
num_steps
,
dtype
=
np
.
int32
)
self
.
cur_step
=
0
self
.
sample
=
{}
return
{
'action_history'
:
self
.
action_history
,
'cur_step'
:
self
.
cur_step
,
'action_dim'
:
len
(
self
.
search_space
[
self
.
ss_keys
[
self
.
cur_step
]])
}
def
step
(
self
,
action
):
cur_key
=
self
.
ss_keys
[
self
.
cur_step
]
assert
action
<
len
(
self
.
search_space
[
cur_key
]),
\
f
'Current action
{
action
}
out of range
{
self
.
search_space
[
cur_key
]
}
.'
self
.
action_history
[
self
.
cur_step
]
=
action
self
.
sample
[
cur_key
]
=
self
.
search_space
[
cur_key
][
action
]
self
.
cur_step
+=
1
obs
=
{
'action_history'
:
self
.
action_history
,
'cur_step'
:
self
.
cur_step
,
'action_dim'
:
len
(
self
.
search_space
[
self
.
ss_keys
[
self
.
cur_step
]])
\
if
self
.
cur_step
<
self
.
num_steps
else
self
.
action_dim
}
if
self
.
cur_step
==
self
.
num_steps
:
model
=
get_targeted_model
(
self
.
base_model
,
self
.
mutators
,
self
.
sample
)
_logger
.
info
(
f
'New model created:
{
self
.
sample
}
'
)
submit_models
(
model
)
wait_models
(
model
)
if
model
.
status
==
ModelStatus
.
Failed
:
return
self
.
reset
(),
0.
,
False
,
{}
rew
=
model
.
metric
_logger
.
info
(
f
'Model metric received as reward:
{
rew
}
'
)
return
obs
,
rew
,
True
,
{}
else
:
return
obs
,
0.
,
False
,
{}
class
Preprocessor
(
nn
.
Module
):
def
__init__
(
self
,
obs_space
,
hidden_dim
=
64
,
num_layers
=
1
):
super
().
__init__
()
self
.
action_dim
=
obs_space
[
'action_history'
].
nvec
[
0
]
self
.
hidden_dim
=
hidden_dim
# first token is [SOS]
self
.
embedding
=
nn
.
Embedding
(
self
.
action_dim
+
1
,
hidden_dim
)
self
.
rnn
=
nn
.
LSTM
(
hidden_dim
,
hidden_dim
,
num_layers
,
batch_first
=
True
)
def
forward
(
self
,
obs
):
seq
=
nn
.
functional
.
pad
(
obs
[
'action_history'
]
+
1
,
(
1
,
1
))
# pad the start token and end token
# end token is used to avoid out-of-range of v_s_. Will not actually affect BP.
seq
=
self
.
embedding
(
seq
.
long
())
feature
,
_
=
self
.
rnn
(
seq
)
return
feature
[
torch
.
arange
(
len
(
feature
),
device
=
feature
.
device
),
obs
[
'cur_step'
].
long
()
+
1
]
class
Actor
(
nn
.
Module
):
def
__init__
(
self
,
action_space
,
preprocess
):
super
().
__init__
()
self
.
preprocess
=
preprocess
self
.
action_dim
=
action_space
.
n
self
.
linear
=
nn
.
Linear
(
self
.
preprocess
.
hidden_dim
,
self
.
action_dim
)
def
forward
(
self
,
obs
,
**
kwargs
):
obs
=
to_torch
(
obs
,
device
=
self
.
linear
.
weight
.
device
)
out
=
self
.
linear
(
self
.
preprocess
(
obs
))
# to take care of choices with different number of options
mask
=
torch
.
arange
(
self
.
action_dim
).
expand
(
len
(
out
),
self
.
action_dim
)
>=
obs
[
'action_dim'
].
unsqueeze
(
1
)
out
[
mask
.
to
(
out
.
device
)]
=
float
(
'-inf'
)
return
nn
.
functional
.
softmax
(
out
),
kwargs
.
get
(
'state'
,
None
)
class
Critic
(
nn
.
Module
):
def
__init__
(
self
,
preprocess
):
super
().
__init__
()
self
.
preprocess
=
preprocess
self
.
linear
=
nn
.
Linear
(
self
.
preprocess
.
hidden_dim
,
1
)
def
forward
(
self
,
obs
,
**
kwargs
):
obs
=
to_torch
(
obs
,
device
=
self
.
linear
.
weight
.
device
)
return
self
.
linear
(
self
.
preprocess
(
obs
)).
squeeze
(
-
1
)
nni/retiarii/strategy/rl.py
0 → 100644
View file @
122b5b89
import
logging
from
typing
import
Optional
,
Callable
from
.base
import
BaseStrategy
from
.utils
import
dry_run_for_search_space
from
..execution
import
query_available_resources
try
:
has_tianshou
=
True
import
torch
from
tianshou.data
import
AsyncCollector
,
Collector
,
VectorReplayBuffer
from
tianshou.env
import
SubprocVectorEnv
from
tianshou.policy
import
BasePolicy
,
PPOPolicy
# pylint: disable=unused-import
from
._rl_impl
import
ModelEvaluationEnv
,
Preprocessor
,
Actor
,
Critic
except
ImportError
:
has_tianshou
=
False
_logger
=
logging
.
getLogger
(
__name__
)
class
PolicyBasedRL
(
BaseStrategy
):
"""
Algorithm for policy-based reinforcement learning.
This is a wrapper of algorithms provided in tianshou (PPO by default),
and can be easily customized with other algorithms that inherit ``BasePolicy`` (e.g., REINFORCE [1]_).
Note that RL algorithms are known to have issues on Windows and MacOS. They will be supported in future.
Parameters
----------
max_collect : int
How many times collector runs to collect trials for RL. Default 100.
trial_per_collect : int
How many trials (trajectories) each time collector collects.
After each collect, trainer will sample batch from replay buffer and do the update. Default: 20.
policy_fn : function
Takes ``ModelEvaluationEnv`` as input and return a policy. See ``_default_policy_fn`` for an example.
asynchronous : bool
If true, in each step, collector won't wait for all the envs to complete.
This should generally not affect the result, but might affect the efficiency. Note that a slightly more trials
than expected might be collected if this is enabled.
If asynchronous is false, collector will wait for all parallel environments to complete in each step.
See ``tianshou.data.AsyncCollector`` for more details.
References
----------
.. [1] Barret Zoph and Quoc V. Le, "Neural Architecture Search with Reinforcement Learning".
https://arxiv.org/abs/1611.01578
"""
def
__init__
(
self
,
max_collect
:
int
=
100
,
trial_per_collect
=
20
,
policy_fn
:
Optional
[
Callable
[[
'ModelEvaluationEnv'
],
'BasePolicy'
]]
=
None
,
asynchronous
:
bool
=
True
):
if
not
has_tianshou
:
raise
ImportError
(
'`tianshou` is required to run RL-based strategy. '
'Please use "pip install tianshou" to install it beforehand.'
)
self
.
policy_fn
=
policy_fn
or
self
.
_default_policy_fn
self
.
max_collect
=
max_collect
self
.
trial_per_collect
=
trial_per_collect
self
.
asynchronous
=
asynchronous
@
staticmethod
def
_default_policy_fn
(
env
):
net
=
Preprocessor
(
env
.
observation_space
)
actor
=
Actor
(
env
.
action_space
,
net
)
critic
=
Critic
(
net
)
optim
=
torch
.
optim
.
Adam
(
set
(
actor
.
parameters
()).
union
(
critic
.
parameters
()),
lr
=
1e-4
)
return
PPOPolicy
(
actor
,
critic
,
optim
,
torch
.
distributions
.
Categorical
,
discount_factor
=
1.
,
action_space
=
env
.
action_space
)
def
run
(
self
,
base_model
,
applied_mutators
):
search_space
=
dry_run_for_search_space
(
base_model
,
applied_mutators
)
concurrency
=
query_available_resources
()
env_fn
=
lambda
:
ModelEvaluationEnv
(
base_model
,
applied_mutators
,
search_space
)
policy
=
self
.
policy_fn
(
env_fn
())
if
self
.
asynchronous
:
# wait for half of the env complete in each step
env
=
SubprocVectorEnv
([
env_fn
for
_
in
range
(
concurrency
)],
wait_num
=
int
(
concurrency
*
0.5
))
collector
=
AsyncCollector
(
policy
,
env
,
VectorReplayBuffer
(
20000
,
len
(
env
)))
else
:
env
=
SubprocVectorEnv
([
env_fn
for
_
in
range
(
concurrency
)])
collector
=
Collector
(
policy
,
env
,
VectorReplayBuffer
(
20000
,
len
(
env
)))
for
cur_collect
in
range
(
1
,
self
.
max_collect
+
1
):
_logger
.
info
(
'Collect [%d] Running...'
,
cur_collect
)
result
=
collector
.
collect
(
n_episode
=
self
.
trial_per_collect
)
_logger
.
info
(
'Collect [%d] Result: %s'
,
cur_collect
,
str
(
result
))
policy
.
update
(
0
,
collector
.
buffer
,
batch_size
=
64
,
repeat
=
5
)
pipelines/full-test-linux.yml
View file @
122b5b89
...
...
@@ -35,6 +35,7 @@ jobs:
python3 -m pip install keras==2.1.6
python3 -m pip install tensorflow==2.3.1 tensorflow-estimator==2.3.0
python3 -m pip install thop
python3 -m pip install tianshou>=0.4.1 gym
sudo apt-get install swig -y
displayName
:
Install extra dependencies
...
...
pipelines/full-test-windows.yml
View file @
122b5b89
...
...
@@ -30,6 +30,7 @@ jobs:
python -m pip install torch==1.6.0 torchvision==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
python -m pip install 'pytorch-lightning>=1.1.1'
python -m pip install tensorflow==2.3.1 tensorflow-estimator==2.3.0
python -m pip install tianshou>=0.4.1 gym
displayName
:
Install extra dependencies
# Need del later
...
...
test/ut/retiarii/test_strategy.py
View file @
122b5b89
import
random
import
sys
import
time
import
threading
from
typing
import
*
...
...
@@ -6,6 +7,7 @@ from typing import *
import
nni.retiarii.execution.api
import
nni.retiarii.nn.pytorch
as
nn
import
nni.retiarii.strategy
as
strategy
import
pytest
import
torch
import
torch.nn.functional
as
F
from
nni.retiarii
import
Model
...
...
@@ -58,7 +60,7 @@ def _reset_execution_engine(engine=None):
class
Net
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
=
32
):
def
__init__
(
self
,
hidden_size
=
32
,
diff_size
=
False
):
super
(
Net
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
1
,
20
,
5
,
1
)
self
.
conv2
=
nn
.
Conv2d
(
20
,
50
,
5
,
1
)
...
...
@@ -69,7 +71,7 @@ class Net(nn.Module):
self
.
fc2
=
nn
.
LayerChoice
([
nn
.
Linear
(
hidden_size
,
10
,
bias
=
False
),
nn
.
Linear
(
hidden_size
,
10
,
bias
=
True
)
],
label
=
'fc2'
)
]
+
([]
if
not
diff_size
else
[
nn
.
Linear
(
hidden_size
,
10
,
bias
=
False
)])
,
label
=
'fc2'
)
def
forward
(
self
,
x
):
x
=
F
.
relu
(
self
.
conv1
(
x
))
...
...
@@ -82,8 +84,8 @@ class Net(nn.Module):
return
F
.
log_softmax
(
x
,
dim
=
1
)
def
_get_model_and_mutators
():
base_model
=
Net
()
def
_get_model_and_mutators
(
**
kwargs
):
base_model
=
Net
(
**
kwargs
)
script_module
=
torch
.
jit
.
script
(
base_model
)
base_model_ir
=
convert_to_graph
(
script_module
,
base_model
)
base_model_ir
.
evaluator
=
DebugEvaluator
()
...
...
@@ -139,7 +141,25 @@ def test_evolution():
_reset_execution_engine
()
@
pytest
.
mark
.
skipif
(
sys
.
platform
in
(
'win32'
,
'darwin'
),
reason
=
'Does not run on Windows and MacOS'
)
def
test_rl
():
rl
=
strategy
.
PolicyBasedRL
(
max_collect
=
2
,
trial_per_collect
=
10
)
engine
=
MockExecutionEngine
(
failure_prob
=
0.2
)
_reset_execution_engine
(
engine
)
rl
.
run
(
*
_get_model_and_mutators
(
diff_size
=
True
))
wait_models
(
*
engine
.
models
)
_reset_execution_engine
()
rl
=
strategy
.
PolicyBasedRL
(
max_collect
=
2
,
trial_per_collect
=
10
,
asynchronous
=
False
)
engine
=
MockExecutionEngine
(
failure_prob
=
0.2
)
_reset_execution_engine
(
engine
)
rl
.
run
(
*
_get_model_and_mutators
())
wait_models
(
*
engine
.
models
)
_reset_execution_engine
()
if
__name__
==
'__main__'
:
test_grid_search
()
test_random_search
()
test_evolution
()
test_rl
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment