Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
a95ccb6e
Unverified
Commit
a95ccb6e
authored
Apr 08, 2020
by
Yuge Zhang
Committed by
GitHub
Apr 08, 2020
Browse files
remove old nas examples (#2285)
parent
4b598dd1
Changes
33
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
0 additions
and
1490 deletions
+0
-1490
examples/trials/nas_cifar10/macro_cifar10.sh
examples/trials/nas_cifar10/macro_cifar10.sh
+0
-31
examples/trials/nas_cifar10/macro_cifar10_pai.sh
examples/trials/nas_cifar10/macro_cifar10_pai.sh
+0
-31
examples/trials/nas_cifar10/src/__init__.py
examples/trials/nas_cifar10/src/__init__.py
+0
-0
examples/trials/nas_cifar10/src/cifar10/__init__.py
examples/trials/nas_cifar10/src/cifar10/__init__.py
+0
-0
examples/trials/nas_cifar10/src/cifar10/data_utils.py
examples/trials/nas_cifar10/src/cifar10/data_utils.py
+0
-74
examples/trials/nas_cifar10/src/cifar10/general_child.py
examples/trials/nas_cifar10/src/cifar10/general_child.py
+0
-423
examples/trials/nas_cifar10/src/cifar10/models.py
examples/trials/nas_cifar10/src/cifar10/models.py
+0
-196
examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py
examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py
+0
-162
examples/trials/nas_cifar10/src/cifar10_flags.py
examples/trials/nas_cifar10/src/cifar10_flags.py
+0
-45
examples/trials/nas_cifar10/src/common_ops.py
examples/trials/nas_cifar10/src/common_ops.py
+0
-255
examples/trials/nas_cifar10/src/utils.py
examples/trials/nas_cifar10/src/utils.py
+0
-262
examples/tuners/enas_nni/README.md
examples/tuners/enas_nni/README.md
+0
-6
examples/tuners/enas_nni/README_zh_CN.md
examples/tuners/enas_nni/README_zh_CN.md
+0
-5
No files found.
examples/trials/nas_cifar10/macro_cifar10.sh
deleted
100644 → 0
View file @
4b598dd1
#!/bin/bash
set
-e
export
PYTHONPATH
=
"
$(
pwd
)
"
python3 src/cifar10/nni_child_cifar10.py
\
--data_format
=
"NCHW"
\
--search_for
=
"macro"
\
--reset_output_dir
\
--data_path
=
"data/cifar10"
\
--output_dir
=
"outputs"
\
--train_data_size
=
45000
\
--batch_size
=
100
\
--num_epochs
=
8
\
--log_every
=
50
\
--eval_every_epochs
=
1
\
--child_use_aux_heads
\
--child_num_layers
=
12
\
--child_out_filters
=
36
\
--child_l2_reg
=
0.0002
\
--child_num_branches
=
6
\
--child_num_cell_layers
=
5
\
--child_keep_prob
=
0.50
\
--child_drop_path_keep_prob
=
0.60
\
--child_lr_cosine
\
--child_lr_max
=
0.05
\
--child_lr_min
=
0.001
\
--child_lr_T_0
=
10
\
--child_lr_T_mul
=
2
\
--child_mode
=
"subgraph"
\
"
$@
"
examples/trials/nas_cifar10/macro_cifar10_pai.sh
deleted
100644 → 0
View file @
4b598dd1
#!/bin/bash
set
-e
export
PYTHONPATH
=
"
$(
pwd
)
"
python3 src/cifar10/nni_child_cifar10.py
\
--data_format
=
"NCHW"
\
--search_for
=
"macro"
\
--reset_output_dir
\
--data_path
=
"data/cifar10"
\
--output_dir
=
"outputs"
\
--train_data_size
=
45000
\
--batch_size
=
100
\
--num_epochs
=
30
\
--log_every
=
50
\
--eval_every_epochs
=
1
\
--child_use_aux_heads
\
--child_num_layers
=
12
\
--child_out_filters
=
36
\
--child_l2_reg
=
0.0002
\
--child_num_branches
=
6
\
--child_num_cell_layers
=
5
\
--child_keep_prob
=
0.50
\
--child_drop_path_keep_prob
=
0.60
\
--child_lr_cosine
\
--child_lr_max
=
0.05
\
--child_lr_min
=
0.001
\
--child_lr_T_0
=
10
\
--child_lr_T_mul
=
2
\
--child_mode
=
"subgraph"
\
"
$@
"
examples/trials/nas_cifar10/src/__init__.py
deleted
100644 → 0
View file @
4b598dd1
examples/trials/nas_cifar10/src/cifar10/__init__.py
deleted
100644 → 0
View file @
4b598dd1
examples/trials/nas_cifar10/src/cifar10/data_utils.py
deleted
100644 → 0
View file @
4b598dd1
import
os
import
sys
import
pickle
import
numpy
as
np
import
tensorflow
as
tf
def
_read_data
(
data_path
,
train_files
):
"""Reads CIFAR-10 format data. Always returns NHWC format.
Returns:
images: np tensor of size [N, H, W, C]
labels: np tensor of size [N]
"""
images
,
labels
=
[],
[]
for
file_name
in
train_files
:
print
(
file_name
)
full_name
=
os
.
path
.
join
(
data_path
,
file_name
)
with
open
(
full_name
,
"rb"
)
as
finp
:
data
=
pickle
.
load
(
finp
,
encoding
=
'latin1'
)
batch_images
=
data
[
"data"
].
astype
(
np
.
float32
)
/
255.0
batch_labels
=
np
.
array
(
data
[
"labels"
],
dtype
=
np
.
int32
)
images
.
append
(
batch_images
)
labels
.
append
(
batch_labels
)
images
=
np
.
concatenate
(
images
,
axis
=
0
)
labels
=
np
.
concatenate
(
labels
,
axis
=
0
)
images
=
np
.
reshape
(
images
,
[
-
1
,
3
,
32
,
32
])
images
=
np
.
transpose
(
images
,
[
0
,
2
,
3
,
1
])
return
images
,
labels
def
read_data
(
data_path
,
num_valids
=
5000
):
print
(
"-"
*
80
)
print
(
"Reading data"
)
images
,
labels
=
{},
{}
train_files
=
[
"data_batch_1"
,
"data_batch_2"
,
"data_batch_3"
,
"data_batch_4"
,
"data_batch_5"
,
]
test_file
=
[
"test_batch"
,
]
images
[
"train"
],
labels
[
"train"
]
=
_read_data
(
data_path
,
train_files
)
if
num_valids
:
images
[
"valid"
]
=
images
[
"train"
][
-
num_valids
:]
labels
[
"valid"
]
=
labels
[
"train"
][
-
num_valids
:]
images
[
"train"
]
=
images
[
"train"
][:
-
num_valids
]
labels
[
"train"
]
=
labels
[
"train"
][:
-
num_valids
]
else
:
images
[
"valid"
],
labels
[
"valid"
]
=
None
,
None
images
[
"test"
],
labels
[
"test"
]
=
_read_data
(
data_path
,
test_file
)
print
(
"Prepropcess: [subtract mean], [divide std]"
)
mean
=
np
.
mean
(
images
[
"train"
],
axis
=
(
0
,
1
,
2
),
keepdims
=
True
)
std
=
np
.
std
(
images
[
"train"
],
axis
=
(
0
,
1
,
2
),
keepdims
=
True
)
print
(
"mean: {}"
.
format
(
np
.
reshape
(
mean
*
255.0
,
[
-
1
])))
print
(
"std: {}"
.
format
(
np
.
reshape
(
std
*
255.0
,
[
-
1
])))
images
[
"train"
]
=
(
images
[
"train"
]
-
mean
)
/
std
if
num_valids
:
images
[
"valid"
]
=
(
images
[
"valid"
]
-
mean
)
/
std
images
[
"test"
]
=
(
images
[
"test"
]
-
mean
)
/
std
return
images
,
labels
examples/trials/nas_cifar10/src/cifar10/general_child.py
deleted
100644 → 0
View file @
4b598dd1
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
tensorflow
as
tf
from
src.common_ops
import
create_weight
,
batch_norm
,
batch_norm_with_mask
,
global_avg_pool
,
conv_op
,
pool_op
from
src.utils
import
count_model_params
,
get_train_ops
,
get_C
,
get_strides
from
src.cifar10.models
import
Model
class
GeneralChild
(
Model
):
def
__init__
(
self
,
images
,
labels
,
cutout_size
=
None
,
fixed_arc
=
None
,
out_filters_scale
=
1
,
num_layers
=
2
,
num_branches
=
6
,
out_filters
=
24
,
keep_prob
=
1.0
,
batch_size
=
32
,
clip_mode
=
None
,
grad_bound
=
None
,
l2_reg
=
1e-4
,
lr_init
=
0.1
,
lr_dec_start
=
0
,
lr_dec_every
=
10000
,
lr_dec_rate
=
0.1
,
lr_cosine
=
False
,
lr_max
=
None
,
lr_min
=
None
,
lr_T_0
=
None
,
lr_T_mul
=
None
,
optim_algo
=
None
,
sync_replicas
=
False
,
num_aggregate
=
None
,
num_replicas
=
None
,
data_format
=
"NHWC"
,
name
=
"child"
,
mode
=
"subgraph"
,
*
args
,
**
kwargs
):
super
(
self
.
__class__
,
self
).
__init__
(
images
,
labels
,
cutout_size
=
cutout_size
,
batch_size
=
batch_size
,
clip_mode
=
clip_mode
,
grad_bound
=
grad_bound
,
l2_reg
=
l2_reg
,
lr_init
=
lr_init
,
lr_dec_start
=
lr_dec_start
,
lr_dec_every
=
lr_dec_every
,
lr_dec_rate
=
lr_dec_rate
,
keep_prob
=
keep_prob
,
optim_algo
=
optim_algo
,
sync_replicas
=
sync_replicas
,
num_aggregate
=
num_aggregate
,
num_replicas
=
num_replicas
,
data_format
=
data_format
,
name
=
name
)
self
.
lr_cosine
=
lr_cosine
self
.
lr_max
=
lr_max
self
.
lr_min
=
lr_min
self
.
lr_T_0
=
lr_T_0
self
.
lr_T_mul
=
lr_T_mul
self
.
out_filters
=
out_filters
*
out_filters_scale
self
.
num_layers
=
num_layers
self
.
mode
=
mode
self
.
num_branches
=
num_branches
self
.
fixed_arc
=
fixed_arc
self
.
out_filters_scale
=
out_filters_scale
pool_distance
=
self
.
num_layers
//
3
self
.
pool_layers
=
[
pool_distance
-
1
,
2
*
pool_distance
-
1
]
def
_factorized_reduction
(
self
,
x
,
out_filters
,
stride
,
is_training
):
"""Reduces the shape of x without information loss due to striding."""
assert
out_filters
%
2
==
0
,
(
"Need even number of filters when using this factorized reduction."
)
if
stride
==
1
:
with
tf
.
variable_scope
(
"path_conv"
):
inp_c
=
get_C
(
x
,
self
.
data_format
)
w
=
create_weight
(
"w"
,
[
1
,
1
,
inp_c
,
out_filters
])
x
=
tf
.
nn
.
conv2d
(
x
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
self
.
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
self
.
data_format
)
return
x
stride_spec
=
get_strides
(
stride
,
self
.
data_format
)
# Skip path 1
path1
=
tf
.
nn
.
avg_pool
(
x
,
[
1
,
1
,
1
,
1
],
stride_spec
,
"VALID"
,
data_format
=
self
.
data_format
)
with
tf
.
variable_scope
(
"path1_conv"
):
inp_c
=
get_C
(
path1
,
self
.
data_format
)
w
=
create_weight
(
"w"
,
[
1
,
1
,
inp_c
,
out_filters
//
2
])
path1
=
tf
.
nn
.
conv2d
(
path1
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
self
.
data_format
)
# Skip path 2
# First pad with 0"s on the right and bottom, then shift the filter to
# include those 0"s that were added.
if
self
.
data_format
==
"NHWC"
:
pad_arr
=
[[
0
,
0
],
[
0
,
1
],
[
0
,
1
],
[
0
,
0
]]
path2
=
tf
.
pad
(
x
,
pad_arr
)[:,
1
:,
1
:,
:]
concat_axis
=
3
else
:
pad_arr
=
[[
0
,
0
],
[
0
,
0
],
[
0
,
1
],
[
0
,
1
]]
path2
=
tf
.
pad
(
x
,
pad_arr
)[:,
:,
1
:,
1
:]
concat_axis
=
1
path2
=
tf
.
nn
.
avg_pool
(
path2
,
[
1
,
1
,
1
,
1
],
stride_spec
,
"VALID"
,
data_format
=
self
.
data_format
)
with
tf
.
variable_scope
(
"path2_conv"
):
inp_c
=
get_C
(
path2
,
self
.
data_format
)
w
=
create_weight
(
"w"
,
[
1
,
1
,
inp_c
,
out_filters
//
2
])
path2
=
tf
.
nn
.
conv2d
(
path2
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
self
.
data_format
)
# Concat and apply BN
final_path
=
tf
.
concat
(
values
=
[
path1
,
path2
],
axis
=
concat_axis
)
final_path
=
batch_norm
(
final_path
,
is_training
,
data_format
=
self
.
data_format
)
return
final_path
def
_model
(
self
,
images
,
is_training
,
reuse
=
False
):
'''Build model'''
with
tf
.
variable_scope
(
self
.
name
,
reuse
=
reuse
):
layers
=
[]
out_filters
=
self
.
out_filters
with
tf
.
variable_scope
(
"stem_conv"
):
w
=
create_weight
(
"w"
,
[
3
,
3
,
3
,
out_filters
])
x
=
tf
.
nn
.
conv2d
(
images
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
self
.
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
self
.
data_format
)
layers
.
append
(
x
)
def
add_fixed_pooling_layer
(
layer_id
,
layers
,
out_filters
,
is_training
):
'''Add a fixed pooling layer every four layers'''
out_filters
*=
2
with
tf
.
variable_scope
(
"pool_at_{0}"
.
format
(
layer_id
)):
pooled_layers
=
[]
for
i
,
layer
in
enumerate
(
layers
):
with
tf
.
variable_scope
(
"from_{0}"
.
format
(
i
)):
x
=
self
.
_factorized_reduction
(
layer
,
out_filters
,
2
,
is_training
)
pooled_layers
.
append
(
x
)
return
pooled_layers
,
out_filters
def
post_process_out
(
out
,
optional_inputs
):
'''Form skip connection and perform batch norm'''
with
tf
.
variable_scope
(
"skip"
):
inputs
=
layers
[
-
1
]
if
self
.
data_format
==
"NHWC"
:
inp_h
=
inputs
.
get_shape
()[
1
].
value
inp_w
=
inputs
.
get_shape
()[
2
].
value
inp_c
=
inputs
.
get_shape
()[
3
].
value
out
.
set_shape
([
None
,
inp_h
,
inp_w
,
out_filters
])
elif
self
.
data_format
==
"NCHW"
:
inp_c
=
inputs
.
get_shape
()[
1
].
value
inp_h
=
inputs
.
get_shape
()[
2
].
value
inp_w
=
inputs
.
get_shape
()[
3
].
value
out
.
set_shape
([
None
,
out_filters
,
inp_h
,
inp_w
])
optional_inputs
.
append
(
out
)
pout
=
tf
.
add_n
(
optional_inputs
)
out
=
batch_norm
(
pout
,
is_training
,
data_format
=
self
.
data_format
)
layers
.
append
(
out
)
return
out
global
layer_id
layer_id
=
-
1
def
get_layer_id
():
global
layer_id
layer_id
+=
1
return
'layer_'
+
str
(
layer_id
)
def
conv3
(
inputs
):
# res_layers is pre_layers that are chosen to form skip connection
# layers[-1] is always the latest input
with
tf
.
variable_scope
(
get_layer_id
()):
with
tf
.
variable_scope
(
'branch_0'
):
out
=
conv_op
(
inputs
[
0
][
0
],
3
,
is_training
,
out_filters
,
out_filters
,
self
.
data_format
,
start_idx
=
None
)
out
=
post_process_out
(
out
,
inputs
[
1
])
return
out
def
conv3_sep
(
inputs
):
with
tf
.
variable_scope
(
get_layer_id
()):
with
tf
.
variable_scope
(
'branch_1'
):
out
=
conv_op
(
inputs
[
0
][
0
],
3
,
is_training
,
out_filters
,
out_filters
,
self
.
data_format
,
start_idx
=
None
,
separable
=
True
)
out
=
post_process_out
(
out
,
inputs
[
1
])
return
out
def
conv5
(
inputs
):
with
tf
.
variable_scope
(
get_layer_id
()):
with
tf
.
variable_scope
(
'branch_2'
):
out
=
conv_op
(
inputs
[
0
][
0
],
5
,
is_training
,
out_filters
,
out_filters
,
self
.
data_format
,
start_idx
=
None
)
out
=
post_process_out
(
out
,
inputs
[
1
])
return
out
def
conv5_sep
(
inputs
):
with
tf
.
variable_scope
(
get_layer_id
()):
with
tf
.
variable_scope
(
'branch_3'
):
out
=
conv_op
(
inputs
[
0
][
0
],
5
,
is_training
,
out_filters
,
out_filters
,
self
.
data_format
,
start_idx
=
None
,
separable
=
True
)
out
=
post_process_out
(
out
,
inputs
[
1
])
return
out
def
avg_pool
(
inputs
):
with
tf
.
variable_scope
(
get_layer_id
()):
with
tf
.
variable_scope
(
'branch_4'
):
out
=
pool_op
(
inputs
[
0
][
0
],
is_training
,
out_filters
,
out_filters
,
"avg"
,
self
.
data_format
,
start_idx
=
None
)
out
=
post_process_out
(
out
,
inputs
[
1
])
return
out
def
max_pool
(
inputs
):
with
tf
.
variable_scope
(
get_layer_id
()):
with
tf
.
variable_scope
(
'branch_5'
):
out
=
pool_op
(
inputs
[
0
][
0
],
is_training
,
out_filters
,
out_filters
,
"max"
,
self
.
data_format
,
start_idx
=
None
)
out
=
post_process_out
(
out
,
inputs
[
1
])
return
out
"""@nni.mutable_layers(
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[x],
layer_output: layer_0_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_0_out],
optional_inputs: [layer_0_out],
optional_input_size: [0, 1],
layer_output: layer_1_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_1_out],
optional_inputs: [layer_0_out, layer_1_out],
optional_input_size: [0, 1],
layer_output: layer_2_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_2_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out],
optional_input_size: [0, 1],
layer_output: layer_3_out
}
)"""
layers
,
out_filters
=
add_fixed_pooling_layer
(
3
,
layers
,
out_filters
,
is_training
)
layer_0_out
,
layer_1_out
,
layer_2_out
,
layer_3_out
=
layers
[
-
4
:]
"""@nni.mutable_layers(
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_3_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out],
optional_input_size: [0, 1],
layer_output: layer_4_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_4_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out],
optional_input_size: [0, 1],
layer_output: layer_5_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_5_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out],
optional_input_size: [0, 1],
layer_output: layer_6_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_6_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out],
optional_input_size: [0, 1],
layer_output: layer_7_out
}
)"""
layers
,
out_filters
=
add_fixed_pooling_layer
(
7
,
layers
,
out_filters
,
is_training
)
layer_0_out
,
layer_1_out
,
layer_2_out
,
layer_3_out
,
layer_4_out
,
layer_5_out
,
layer_6_out
,
layer_7_out
=
layers
[
-
8
:]
"""@nni.mutable_layers(
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_7_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out],
optional_input_size: [0, 1],
layer_output: layer_8_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_8_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out],
optional_input_size: [0, 1],
layer_output: layer_9_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs: [layer_9_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out],
optional_input_size: [0, 1],
layer_output: layer_10_out
},
{
layer_choice: [conv3(), conv3_sep(), conv5(), conv5_sep(), avg_pool(), max_pool()],
fixed_inputs:[layer_10_out],
optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out],
optional_input_size: [0, 1],
layer_output: layer_11_out
}
)"""
x
=
global_avg_pool
(
layer_11_out
,
data_format
=
self
.
data_format
)
if
is_training
:
x
=
tf
.
nn
.
dropout
(
x
,
self
.
keep_prob
)
with
tf
.
variable_scope
(
"fc"
):
if
self
.
data_format
==
"NHWC"
:
inp_c
=
x
.
get_shape
()[
3
].
value
elif
self
.
data_format
==
"NCHW"
:
inp_c
=
x
.
get_shape
()[
1
].
value
else
:
raise
ValueError
(
"Unknown data_format {0}"
.
format
(
self
.
data_format
))
w
=
create_weight
(
"w"
,
[
inp_c
,
10
])
x
=
tf
.
matmul
(
x
,
w
)
return
x
# override
def
_build_train
(
self
):
print
(
"-"
*
80
)
print
(
"Build train graph"
)
logits
=
self
.
_model
(
self
.
x_train
,
is_training
=
True
)
log_probs
=
tf
.
nn
.
sparse_softmax_cross_entropy_with_logits
(
logits
=
logits
,
labels
=
self
.
y_train
)
self
.
loss
=
tf
.
reduce_mean
(
log_probs
)
self
.
train_preds
=
tf
.
argmax
(
logits
,
axis
=
1
)
self
.
train_preds
=
tf
.
to_int32
(
self
.
train_preds
)
self
.
train_acc
=
tf
.
equal
(
self
.
train_preds
,
self
.
y_train
)
self
.
train_acc
=
tf
.
to_int32
(
self
.
train_acc
)
self
.
train_acc
=
tf
.
reduce_sum
(
self
.
train_acc
)
tf_variables
=
[
var
for
var
in
tf
.
trainable_variables
()
if
var
.
name
.
startswith
(
self
.
name
)]
self
.
num_vars
=
count_model_params
(
tf_variables
)
print
(
"Model has {} params"
.
format
(
self
.
num_vars
))
self
.
global_step
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int32
,
trainable
=
False
,
name
=
"global_step"
)
self
.
train_op
,
self
.
lr
,
self
.
grad_norm
,
self
.
optimizer
=
get_train_ops
(
self
.
loss
,
tf_variables
,
self
.
global_step
,
clip_mode
=
self
.
clip_mode
,
grad_bound
=
self
.
grad_bound
,
l2_reg
=
self
.
l2_reg
,
lr_init
=
self
.
lr_init
,
lr_dec_start
=
self
.
lr_dec_start
,
lr_dec_every
=
self
.
lr_dec_every
,
lr_dec_rate
=
self
.
lr_dec_rate
,
lr_cosine
=
self
.
lr_cosine
,
lr_max
=
self
.
lr_max
,
lr_min
=
self
.
lr_min
,
lr_T_0
=
self
.
lr_T_0
,
lr_T_mul
=
self
.
lr_T_mul
,
num_train_batches
=
self
.
num_train_batches
,
optim_algo
=
self
.
optim_algo
,
sync_replicas
=
False
,
num_aggregate
=
self
.
num_aggregate
,
num_replicas
=
self
.
num_replicas
)
# override
def
_build_valid
(
self
):
if
self
.
x_valid
is
not
None
:
print
(
"-"
*
80
)
print
(
"Build valid graph"
)
logits
=
self
.
_model
(
self
.
x_valid
,
False
,
reuse
=
True
)
self
.
valid_preds
=
tf
.
argmax
(
logits
,
axis
=
1
)
self
.
valid_preds
=
tf
.
to_int32
(
self
.
valid_preds
)
self
.
valid_acc
=
tf
.
equal
(
self
.
valid_preds
,
self
.
y_valid
)
self
.
valid_acc
=
tf
.
to_int32
(
self
.
valid_acc
)
self
.
valid_acc
=
tf
.
reduce_sum
(
self
.
valid_acc
)
# override
def
_build_test
(
self
):
print
(
"-"
*
80
)
print
(
"Build test graph"
)
logits
=
self
.
_model
(
self
.
x_test
,
False
,
reuse
=
True
)
self
.
test_preds
=
tf
.
argmax
(
logits
,
axis
=
1
)
self
.
test_preds
=
tf
.
to_int32
(
self
.
test_preds
)
self
.
test_acc
=
tf
.
equal
(
self
.
test_preds
,
self
.
y_test
)
self
.
test_acc
=
tf
.
to_int32
(
self
.
test_acc
)
self
.
test_acc
=
tf
.
reduce_sum
(
self
.
test_acc
)
def
build_model
(
self
):
self
.
_build_train
()
self
.
_build_valid
()
self
.
_build_test
()
examples/trials/nas_cifar10/src/cifar10/models.py
deleted
100644 → 0
View file @
4b598dd1
import
os
import
sys
import
numpy
as
np
import
tensorflow
as
tf
class
Model
(
object
):
def
__init__
(
self
,
images
,
labels
,
cutout_size
=
None
,
batch_size
=
32
,
eval_batch_size
=
100
,
clip_mode
=
None
,
grad_bound
=
None
,
l2_reg
=
1e-4
,
lr_init
=
0.1
,
lr_dec_start
=
0
,
lr_dec_every
=
100
,
lr_dec_rate
=
0.1
,
keep_prob
=
1.0
,
optim_algo
=
None
,
sync_replicas
=
False
,
num_aggregate
=
None
,
num_replicas
=
None
,
data_format
=
"NHWC"
,
name
=
"generic_model"
,
seed
=
None
,
):
"""
Args:
lr_dec_every: number of epochs to decay
"""
print
(
"-"
*
80
)
print
(
"Build model {}"
.
format
(
name
))
self
.
cutout_size
=
cutout_size
self
.
batch_size
=
batch_size
self
.
eval_batch_size
=
eval_batch_size
self
.
clip_mode
=
clip_mode
self
.
grad_bound
=
grad_bound
self
.
l2_reg
=
l2_reg
self
.
lr_init
=
lr_init
self
.
lr_dec_start
=
lr_dec_start
self
.
lr_dec_rate
=
lr_dec_rate
self
.
keep_prob
=
keep_prob
self
.
optim_algo
=
optim_algo
self
.
sync_replicas
=
sync_replicas
self
.
num_aggregate
=
num_aggregate
self
.
num_replicas
=
num_replicas
self
.
data_format
=
data_format
self
.
name
=
name
self
.
seed
=
seed
self
.
global_step
=
None
self
.
valid_acc
=
None
self
.
test_acc
=
None
print
(
"Build data ops"
)
with
tf
.
device
(
"/cpu:0"
):
# training data
self
.
num_train_examples
=
np
.
shape
(
images
[
"train"
])[
0
]
self
.
num_train_batches
=
(
self
.
num_train_examples
+
self
.
batch_size
-
1
)
//
self
.
batch_size
x_train
,
y_train
=
tf
.
train
.
shuffle_batch
(
[
images
[
"train"
],
labels
[
"train"
]],
batch_size
=
self
.
batch_size
,
capacity
=
50000
,
enqueue_many
=
True
,
min_after_dequeue
=
0
,
num_threads
=
16
,
seed
=
self
.
seed
,
allow_smaller_final_batch
=
True
,
)
self
.
lr_dec_every
=
lr_dec_every
*
self
.
num_train_batches
def
_pre_process
(
x
):
x
=
tf
.
pad
(
x
,
[[
4
,
4
],
[
4
,
4
],
[
0
,
0
]])
x
=
tf
.
random_crop
(
x
,
[
32
,
32
,
3
],
seed
=
self
.
seed
)
x
=
tf
.
image
.
random_flip_left_right
(
x
,
seed
=
self
.
seed
)
if
self
.
cutout_size
is
not
None
:
mask
=
tf
.
ones
(
[
self
.
cutout_size
,
self
.
cutout_size
],
dtype
=
tf
.
int32
)
start
=
tf
.
random_uniform
(
[
2
],
minval
=
0
,
maxval
=
32
,
dtype
=
tf
.
int32
)
mask
=
tf
.
pad
(
mask
,
[[
self
.
cutout_size
+
start
[
0
],
32
-
start
[
0
]],
[
self
.
cutout_size
+
start
[
1
],
32
-
start
[
1
]]])
mask
=
mask
[
self
.
cutout_size
:
self
.
cutout_size
+
32
,
self
.
cutout_size
:
self
.
cutout_size
+
32
]
mask
=
tf
.
reshape
(
mask
,
[
32
,
32
,
1
])
mask
=
tf
.
tile
(
mask
,
[
1
,
1
,
3
])
x
=
tf
.
where
(
tf
.
equal
(
mask
,
0
),
x
=
x
,
y
=
tf
.
zeros_like
(
x
))
if
self
.
data_format
==
"NCHW"
:
x
=
tf
.
transpose
(
x
,
[
2
,
0
,
1
])
return
x
self
.
x_train
=
tf
.
map_fn
(
_pre_process
,
x_train
,
back_prop
=
False
)
self
.
y_train
=
y_train
# valid data
self
.
x_valid
,
self
.
y_valid
=
None
,
None
if
images
[
"valid"
]
is
not
None
:
images
[
"valid_original"
]
=
np
.
copy
(
images
[
"valid"
])
labels
[
"valid_original"
]
=
np
.
copy
(
labels
[
"valid"
])
if
self
.
data_format
==
"NCHW"
:
images
[
"valid"
]
=
tf
.
transpose
(
images
[
"valid"
],
[
0
,
3
,
1
,
2
])
self
.
num_valid_examples
=
np
.
shape
(
images
[
"valid"
])[
0
]
self
.
num_valid_batches
=
(
(
self
.
num_valid_examples
+
self
.
eval_batch_size
-
1
)
//
self
.
eval_batch_size
)
self
.
x_valid
,
self
.
y_valid
=
tf
.
train
.
batch
(
[
images
[
"valid"
],
labels
[
"valid"
]],
batch_size
=
self
.
eval_batch_size
,
capacity
=
5000
,
enqueue_many
=
True
,
num_threads
=
1
,
allow_smaller_final_batch
=
True
,
)
# test data
if
self
.
data_format
==
"NCHW"
:
images
[
"test"
]
=
tf
.
transpose
(
images
[
"test"
],
[
0
,
3
,
1
,
2
])
self
.
num_test_examples
=
np
.
shape
(
images
[
"test"
])[
0
]
self
.
num_test_batches
=
(
(
self
.
num_test_examples
+
self
.
eval_batch_size
-
1
)
//
self
.
eval_batch_size
)
self
.
x_test
,
self
.
y_test
=
tf
.
train
.
batch
(
[
images
[
"test"
],
labels
[
"test"
]],
batch_size
=
self
.
eval_batch_size
,
capacity
=
10000
,
enqueue_many
=
True
,
num_threads
=
1
,
allow_smaller_final_batch
=
True
,
)
# cache images and labels
self
.
images
=
images
self
.
labels
=
labels
def
eval_once
(
self
,
sess
,
eval_set
,
child_model
,
verbose
=
False
):
"""Expects self.acc and self.global_step to be defined.
Args:
sess: tf.Session() or one of its wrap arounds.
feed_dict: can be used to give more information to sess.run().
eval_set: "valid" or "test"
"""
assert
self
.
global_step
is
not
None
global_step
=
sess
.
run
(
self
.
global_step
)
print
(
"Eval at {}"
.
format
(
global_step
))
if
eval_set
==
"valid"
:
assert
self
.
x_valid
is
not
None
assert
self
.
valid_acc
is
not
None
num_examples
=
self
.
num_valid_examples
num_batches
=
self
.
num_valid_batches
acc_op
=
self
.
valid_acc
elif
eval_set
==
"test"
:
assert
self
.
test_acc
is
not
None
num_examples
=
self
.
num_test_examples
num_batches
=
self
.
num_test_batches
acc_op
=
self
.
test_acc
else
:
raise
NotImplementedError
(
"Unknown eval_set '{}'"
.
format
(
eval_set
))
total_acc
=
0
total_exp
=
0
for
batch_id
in
range
(
num_batches
):
acc
=
sess
.
run
(
acc_op
)
total_acc
+=
acc
total_exp
+=
self
.
eval_batch_size
if
verbose
:
sys
.
stdout
.
write
(
"
\r
{:<5d}/{:>5d}"
.
format
(
total_acc
,
total_exp
))
if
verbose
:
print
(
""
)
print
(
"{}_accuracy: {:<6.4f}"
.
format
(
eval_set
,
float
(
total_acc
)
/
total_exp
))
return
float
(
total_acc
)
/
total_exp
def
_model
(
self
,
images
,
is_training
,
reuse
=
None
):
raise
NotImplementedError
(
"Abstract method"
)
def
_build_train
(
self
):
raise
NotImplementedError
(
"Abstract method"
)
def
_build_valid
(
self
):
raise
NotImplementedError
(
"Abstract method"
)
def
_build_test
(
self
):
raise
NotImplementedError
(
"Abstract method"
)
examples/trials/nas_cifar10/src/cifar10/nni_child_cifar10.py
deleted
100644 → 0
View file @
4b598dd1
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
shutil
import
logging
import
tensorflow
as
tf
from
src.cifar10.data_utils
import
read_data
from
src.cifar10.general_child
import
GeneralChild
import
src.cifar10_flags
from
src.cifar10_flags
import
FLAGS
def
build_logger
(
log_name
):
logger
=
logging
.
getLogger
(
log_name
)
logger
.
setLevel
(
logging
.
DEBUG
)
fh
=
logging
.
FileHandler
(
log_name
+
'.log'
)
fh
.
setLevel
(
logging
.
DEBUG
)
logger
.
addHandler
(
fh
)
return
logger
logger
=
build_logger
(
"nni_child_cifar10"
)
def
build_trial
(
images
,
labels
,
ChildClass
):
'''Build child class'''
child_model
=
ChildClass
(
images
,
labels
,
use_aux_heads
=
FLAGS
.
child_use_aux_heads
,
cutout_size
=
FLAGS
.
child_cutout_size
,
num_layers
=
FLAGS
.
child_num_layers
,
num_cells
=
FLAGS
.
child_num_cells
,
num_branches
=
FLAGS
.
child_num_branches
,
fixed_arc
=
FLAGS
.
child_fixed_arc
,
out_filters_scale
=
FLAGS
.
child_out_filters_scale
,
out_filters
=
FLAGS
.
child_out_filters
,
keep_prob
=
FLAGS
.
child_keep_prob
,
drop_path_keep_prob
=
FLAGS
.
child_drop_path_keep_prob
,
num_epochs
=
FLAGS
.
num_epochs
,
l2_reg
=
FLAGS
.
child_l2_reg
,
data_format
=
FLAGS
.
data_format
,
batch_size
=
FLAGS
.
batch_size
,
clip_mode
=
"norm"
,
grad_bound
=
FLAGS
.
child_grad_bound
,
lr_init
=
FLAGS
.
child_lr
,
lr_dec_every
=
FLAGS
.
child_lr_dec_every
,
lr_dec_rate
=
FLAGS
.
child_lr_dec_rate
,
lr_cosine
=
FLAGS
.
child_lr_cosine
,
lr_max
=
FLAGS
.
child_lr_max
,
lr_min
=
FLAGS
.
child_lr_min
,
lr_T_0
=
FLAGS
.
child_lr_T_0
,
lr_T_mul
=
FLAGS
.
child_lr_T_mul
,
optim_algo
=
"momentum"
,
sync_replicas
=
FLAGS
.
child_sync_replicas
,
num_aggregate
=
FLAGS
.
child_num_aggregate
,
num_replicas
=
FLAGS
.
child_num_replicas
)
return
child_model
def
get_child_ops
(
child_model
):
'''Assemble child op to a dict'''
child_ops
=
{
"global_step"
:
child_model
.
global_step
,
"loss"
:
child_model
.
loss
,
"train_op"
:
child_model
.
train_op
,
"lr"
:
child_model
.
lr
,
"grad_norm"
:
child_model
.
grad_norm
,
"train_acc"
:
child_model
.
train_acc
,
"optimizer"
:
child_model
.
optimizer
,
"num_train_batches"
:
child_model
.
num_train_batches
,
"eval_every"
:
child_model
.
num_train_batches
*
FLAGS
.
eval_every_epochs
,
"eval_func"
:
child_model
.
eval_once
,
}
return
child_ops
class
NASTrial
():
def
__init__
(
self
):
images
,
labels
=
read_data
(
FLAGS
.
data_path
,
num_valids
=
0
)
self
.
output_dir
=
os
.
path
.
join
(
os
.
getenv
(
'NNI_OUTPUT_DIR'
),
'../..'
)
self
.
file_path
=
os
.
path
.
join
(
self
.
output_dir
,
'trainable_variable.txt'
)
self
.
graph
=
tf
.
Graph
()
with
self
.
graph
.
as_default
():
self
.
child_model
=
build_trial
(
images
,
labels
,
GeneralChild
)
self
.
total_data
=
{}
self
.
child_model
.
build_model
()
self
.
child_ops
=
get_child_ops
(
self
.
child_model
)
config
=
tf
.
ConfigProto
(
intra_op_parallelism_threads
=
0
,
inter_op_parallelism_threads
=
0
,
allow_soft_placement
=
True
)
self
.
sess
=
tf
.
train
.
SingularMonitoredSession
(
config
=
config
)
logger
.
debug
(
'initlize NASTrial done.'
)
def
run_one_step
(
self
):
'''Run this model on a batch of data'''
run_ops
=
[
self
.
child_ops
[
"loss"
],
self
.
child_ops
[
"lr"
],
self
.
child_ops
[
"grad_norm"
],
self
.
child_ops
[
"train_acc"
],
self
.
child_ops
[
"train_op"
],
]
loss
,
lr
,
gn
,
tr_acc
,
_
=
self
.
sess
.
run
(
run_ops
)
global_step
=
self
.
sess
.
run
(
self
.
child_ops
[
"global_step"
])
log_string
=
""
log_string
+=
"ch_step={:<6d}"
.
format
(
global_step
)
log_string
+=
" loss={:<8.6f}"
.
format
(
loss
)
log_string
+=
" lr={:<8.4f}"
.
format
(
lr
)
log_string
+=
" |g|={:<8.4f}"
.
format
(
gn
)
log_string
+=
" tr_acc={:<3d}/{:>3d}"
.
format
(
tr_acc
,
FLAGS
.
batch_size
)
if
int
(
global_step
)
%
FLAGS
.
log_every
==
0
:
logger
.
debug
(
log_string
)
return
loss
,
global_step
def
run
(
self
):
'''Run this model according to the `epoch` set in FALGS'''
max_acc
=
0
while
True
:
_
,
global_step
=
self
.
run_one_step
()
if
global_step
%
self
.
child_ops
[
'num_train_batches'
]
==
0
:
acc
=
self
.
child_ops
[
"eval_func"
](
self
.
sess
,
"test"
,
self
.
child_model
)
max_acc
=
max
(
max_acc
,
acc
)
'''@nni.report_intermediate_result(acc)'''
if
global_step
/
self
.
child_ops
[
'num_train_batches'
]
>=
FLAGS
.
num_epochs
:
'''@nni.report_final_result(max_acc)'''
break
def
main
(
_
):
logger
.
debug
(
"-"
*
80
)
if
not
os
.
path
.
isdir
(
FLAGS
.
output_dir
):
logger
.
debug
(
"Path {} does not exist. Creating."
.
format
(
FLAGS
.
output_dir
))
os
.
makedirs
(
FLAGS
.
output_dir
)
elif
FLAGS
.
reset_output_dir
:
logger
.
debug
(
"Path {} exists. Remove and remake."
.
format
(
FLAGS
.
output_dir
))
shutil
.
rmtree
(
FLAGS
.
output_dir
)
os
.
makedirs
(
FLAGS
.
output_dir
)
logger
.
debug
(
"-"
*
80
)
trial
=
NASTrial
()
trial
.
run
()
if
__name__
==
"__main__"
:
tf
.
app
.
run
()
examples/trials/nas_cifar10/src/cifar10_flags.py
deleted
100644 → 0
View file @
4b598dd1
import
tensorflow
as
tf
from
src.utils
import
DEFINE_boolean
from
src.utils
import
DEFINE_float
from
src.utils
import
DEFINE_integer
from
src.utils
import
DEFINE_string
flags
=
tf
.
app
.
flags
FLAGS
=
flags
.
FLAGS
DEFINE_boolean
(
"reset_output_dir"
,
False
,
"Delete output_dir if exists."
)
DEFINE_string
(
"data_path"
,
""
,
""
)
DEFINE_string
(
"output_dir"
,
""
,
""
)
DEFINE_string
(
"data_format"
,
"NHWC"
,
"'NHWC' or 'NCWH'"
)
DEFINE_string
(
"search_for"
,
None
,
"Must be [macro|micro]"
)
DEFINE_integer
(
"train_data_size"
,
45000
,
""
)
DEFINE_integer
(
"batch_size"
,
32
,
""
)
DEFINE_integer
(
"num_epochs"
,
300
,
""
)
DEFINE_integer
(
"child_lr_dec_every"
,
100
,
""
)
DEFINE_integer
(
"child_num_layers"
,
5
,
""
)
DEFINE_integer
(
"child_num_cells"
,
5
,
""
)
DEFINE_integer
(
"child_filter_size"
,
5
,
""
)
DEFINE_integer
(
"child_out_filters"
,
48
,
""
)
DEFINE_integer
(
"child_out_filters_scale"
,
1
,
""
)
DEFINE_integer
(
"child_num_branches"
,
4
,
""
)
DEFINE_integer
(
"child_num_aggregate"
,
None
,
""
)
DEFINE_integer
(
"child_num_replicas"
,
1
,
""
)
DEFINE_integer
(
"child_block_size"
,
3
,
""
)
DEFINE_integer
(
"child_lr_T_0"
,
None
,
"for lr schedule"
)
DEFINE_integer
(
"child_lr_T_mul"
,
None
,
"for lr schedule"
)
DEFINE_integer
(
"child_cutout_size"
,
None
,
"CutOut size"
)
DEFINE_float
(
"child_grad_bound"
,
5.0
,
"Gradient clipping"
)
DEFINE_float
(
"child_lr"
,
0.1
,
""
)
DEFINE_float
(
"child_lr_dec_rate"
,
0.1
,
""
)
DEFINE_float
(
"child_keep_prob"
,
0.5
,
""
)
DEFINE_float
(
"child_drop_path_keep_prob"
,
1.0
,
"minimum drop_path_keep_prob"
)
DEFINE_float
(
"child_l2_reg"
,
1e-4
,
""
)
DEFINE_float
(
"child_lr_max"
,
None
,
"for lr schedule"
)
DEFINE_float
(
"child_lr_min"
,
None
,
"for lr schedule"
)
DEFINE_string
(
"child_skip_pattern"
,
None
,
"Must be ['dense', None]"
)
DEFINE_string
(
"child_fixed_arc"
,
None
,
""
)
DEFINE_boolean
(
"child_use_aux_heads"
,
False
,
"Should we use an aux head"
)
DEFINE_boolean
(
"child_sync_replicas"
,
False
,
"To sync or not to sync."
)
DEFINE_boolean
(
"child_lr_cosine"
,
False
,
"Use cosine lr schedule"
)
DEFINE_integer
(
"log_every"
,
50
,
"How many steps to log"
)
DEFINE_integer
(
"eval_every_epochs"
,
1
,
"How many epochs to eval"
)
examples/trials/nas_cifar10/src/common_ops.py
deleted
100644 → 0
View file @
4b598dd1
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.training
import
moving_averages
def
lstm
(
x
,
prev_c
,
prev_h
,
w
):
ifog
=
tf
.
matmul
(
tf
.
concat
([
x
,
prev_h
],
axis
=
1
),
w
)
i
,
f
,
o
,
g
=
tf
.
split
(
ifog
,
4
,
axis
=
1
)
i
=
tf
.
sigmoid
(
i
)
f
=
tf
.
sigmoid
(
f
)
o
=
tf
.
sigmoid
(
o
)
g
=
tf
.
tanh
(
g
)
next_c
=
i
*
g
+
f
*
prev_c
next_h
=
o
*
tf
.
tanh
(
next_c
)
return
next_c
,
next_h
def
stack_lstm
(
x
,
prev_c
,
prev_h
,
w
):
next_c
,
next_h
=
[],
[]
for
layer_id
,
(
_c
,
_h
,
_w
)
in
enumerate
(
zip
(
prev_c
,
prev_h
,
w
)):
inputs
=
x
if
layer_id
==
0
else
next_h
[
-
1
]
curr_c
,
curr_h
=
lstm
(
inputs
,
_c
,
_h
,
_w
)
next_c
.
append
(
curr_c
)
next_h
.
append
(
curr_h
)
return
next_c
,
next_h
def
create_weight
(
name
,
shape
,
initializer
=
None
,
trainable
=
True
,
seed
=
None
):
if
initializer
is
None
:
initializer
=
tf
.
contrib
.
keras
.
initializers
.
he_normal
(
seed
=
seed
)
return
tf
.
get_variable
(
name
,
shape
,
initializer
=
initializer
,
trainable
=
trainable
)
def
create_bias
(
name
,
shape
,
initializer
=
None
):
if
initializer
is
None
:
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
)
return
tf
.
get_variable
(
name
,
shape
,
initializer
=
initializer
)
def
conv_op
(
inputs
,
filter_size
,
is_training
,
count
,
out_filters
,
data_format
,
ch_mul
=
1
,
start_idx
=
None
,
separable
=
False
):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if
data_format
==
"NHWC"
:
inp_c
=
inputs
.
get_shape
()[
3
].
value
elif
data_format
==
"NCHW"
:
inp_c
=
inputs
.
get_shape
()[
1
].
value
with
tf
.
variable_scope
(
"inp_conv_1"
):
w
=
create_weight
(
"w"
,
[
1
,
1
,
inp_c
,
out_filters
])
x
=
tf
.
nn
.
conv2d
(
inputs
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
x
=
tf
.
nn
.
relu
(
x
)
with
tf
.
variable_scope
(
"out_conv_{}"
.
format
(
filter_size
)):
if
start_idx
is
None
:
if
separable
:
w_depth
=
create_weight
(
"w_depth"
,
[
filter_size
,
filter_size
,
out_filters
,
ch_mul
])
w_point
=
create_weight
(
"w_point"
,
[
1
,
1
,
out_filters
*
ch_mul
,
count
])
x
=
tf
.
nn
.
separable_conv2d
(
x
,
w_depth
,
w_point
,
strides
=
[
1
,
1
,
1
,
1
],
padding
=
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
else
:
w
=
create_weight
(
"w"
,
[
filter_size
,
filter_size
,
inp_c
,
count
])
x
=
tf
.
nn
.
conv2d
(
x
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
else
:
if
separable
:
w_depth
=
create_weight
(
"w_depth"
,
[
filter_size
,
filter_size
,
out_filters
,
ch_mul
])
#test_depth = w_depth
w_point
=
create_weight
(
"w_point"
,
[
out_filters
,
out_filters
*
ch_mul
])
w_point
=
w_point
[
start_idx
:
start_idx
+
count
,
:]
w_point
=
tf
.
transpose
(
w_point
,
[
1
,
0
])
w_point
=
tf
.
reshape
(
w_point
,
[
1
,
1
,
out_filters
*
ch_mul
,
count
])
x
=
tf
.
nn
.
separable_conv2d
(
x
,
w_depth
,
w_point
,
strides
=
[
1
,
1
,
1
,
1
],
padding
=
"SAME"
,
data_format
=
data_format
)
mask
=
tf
.
range
(
0
,
out_filters
,
dtype
=
tf
.
int32
)
mask
=
tf
.
logical_and
(
start_idx
<=
mask
,
mask
<
start_idx
+
count
)
x
=
batch_norm_with_mask
(
x
,
is_training
,
mask
,
out_filters
,
data_format
=
data_format
)
else
:
w
=
create_weight
(
"w"
,
[
filter_size
,
filter_size
,
out_filters
,
out_filters
])
w
=
tf
.
transpose
(
w
,
[
3
,
0
,
1
,
2
])
w
=
w
[
start_idx
:
start_idx
+
count
,
:,
:,
:]
w
=
tf
.
transpose
(
w
,
[
1
,
2
,
3
,
0
])
x
=
tf
.
nn
.
conv2d
(
x
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
mask
=
tf
.
range
(
0
,
out_filters
,
dtype
=
tf
.
int32
)
mask
=
tf
.
logical_and
(
start_idx
<=
mask
,
mask
<
start_idx
+
count
)
x
=
batch_norm_with_mask
(
x
,
is_training
,
mask
,
out_filters
,
data_format
=
data_format
)
x
=
tf
.
nn
.
relu
(
x
)
return
x
def
pool_op
(
inputs
,
is_training
,
count
,
out_filters
,
avg_or_max
,
data_format
,
start_idx
=
None
):
"""
Args:
start_idx: where to start taking the output channels. if None, assuming
fixed_arc mode
count: how many output_channels to take.
"""
if
data_format
==
"NHWC"
:
inp_c
=
inputs
.
get_shape
()[
3
].
value
elif
data_format
==
"NCHW"
:
inp_c
=
inputs
.
get_shape
()[
1
].
value
with
tf
.
variable_scope
(
"conv_1"
):
w
=
create_weight
(
"w"
,
[
1
,
1
,
inp_c
,
out_filters
])
x
=
tf
.
nn
.
conv2d
(
inputs
,
w
,
[
1
,
1
,
1
,
1
],
"SAME"
,
data_format
=
data_format
)
x
=
batch_norm
(
x
,
is_training
,
data_format
=
data_format
)
x
=
tf
.
nn
.
relu
(
x
)
with
tf
.
variable_scope
(
"pool"
):
if
data_format
==
"NHWC"
:
actual_data_format
=
"channels_last"
elif
data_format
==
"NCHW"
:
actual_data_format
=
"channels_first"
if
avg_or_max
==
"avg"
:
x
=
tf
.
layers
.
average_pooling2d
(
x
,
[
3
,
3
],
[
1
,
1
],
"SAME"
,
data_format
=
actual_data_format
)
elif
avg_or_max
==
"max"
:
x
=
tf
.
layers
.
max_pooling2d
(
x
,
[
3
,
3
],
[
1
,
1
],
"SAME"
,
data_format
=
actual_data_format
)
else
:
raise
ValueError
(
"Unknown pool {}"
.
format
(
avg_or_max
))
if
start_idx
is
not
None
:
if
data_format
==
"NHWC"
:
x
=
x
[:,
:,
:,
start_idx
:
start_idx
+
count
]
elif
data_format
==
"NCHW"
:
x
=
x
[:,
start_idx
:
start_idx
+
count
,
:,
:]
return
x
def
global_avg_pool
(
x
,
data_format
=
"NHWC"
):
if
data_format
==
"NHWC"
:
x
=
tf
.
reduce_mean
(
x
,
[
1
,
2
])
elif
data_format
==
"NCHW"
:
x
=
tf
.
reduce_mean
(
x
,
[
2
,
3
])
else
:
raise
NotImplementedError
(
"Unknown data_format {}"
.
format
(
data_format
))
return
x
def
batch_norm
(
x
,
is_training
,
name
=
"bn"
,
decay
=
0.9
,
epsilon
=
1e-5
,
data_format
=
"NHWC"
):
if
data_format
==
"NHWC"
:
shape
=
[
x
.
get_shape
()[
3
]]
elif
data_format
==
"NCHW"
:
shape
=
[
x
.
get_shape
()[
1
]]
else
:
raise
NotImplementedError
(
"Unknown data_format {}"
.
format
(
data_format
))
with
tf
.
variable_scope
(
name
,
reuse
=
None
if
is_training
else
True
):
offset
=
tf
.
get_variable
(
"offset"
,
shape
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
scale
=
tf
.
get_variable
(
"scale"
,
shape
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
moving_mean
=
tf
.
get_variable
(
"moving_mean"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
moving_variance
=
tf
.
get_variable
(
"moving_variance"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
if
is_training
:
x
,
mean
,
variance
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
True
)
update_mean
=
moving_averages
.
assign_moving_average
(
moving_mean
,
mean
,
decay
)
update_variance
=
moving_averages
.
assign_moving_average
(
moving_variance
,
variance
,
decay
)
with
tf
.
control_dependencies
([
update_mean
,
update_variance
]):
x
=
tf
.
identity
(
x
)
else
:
x
,
_
,
_
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
mean
=
moving_mean
,
variance
=
moving_variance
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
False
)
return
x
def
batch_norm_with_mask
(
x
,
is_training
,
mask
,
num_channels
,
name
=
"bn"
,
decay
=
0.9
,
epsilon
=
1e-3
,
data_format
=
"NHWC"
):
shape
=
[
num_channels
]
indices
=
tf
.
where
(
mask
)
indices
=
tf
.
to_int32
(
indices
)
indices
=
tf
.
reshape
(
indices
,
[
-
1
])
with
tf
.
variable_scope
(
name
,
reuse
=
None
if
is_training
else
True
):
offset
=
tf
.
get_variable
(
"offset"
,
shape
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
scale
=
tf
.
get_variable
(
"scale"
,
shape
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
offset
=
tf
.
boolean_mask
(
offset
,
mask
)
scale
=
tf
.
boolean_mask
(
scale
,
mask
)
moving_mean
=
tf
.
get_variable
(
"moving_mean"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
0.0
,
dtype
=
tf
.
float32
))
moving_variance
=
tf
.
get_variable
(
"moving_variance"
,
shape
,
trainable
=
False
,
initializer
=
tf
.
constant_initializer
(
1.0
,
dtype
=
tf
.
float32
))
if
is_training
:
x
,
mean
,
variance
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
True
)
mean
=
(
1.0
-
decay
)
*
(
tf
.
boolean_mask
(
moving_mean
,
mask
)
-
mean
)
variance
=
(
1.0
-
decay
)
*
\
(
tf
.
boolean_mask
(
moving_variance
,
mask
)
-
variance
)
update_mean
=
tf
.
scatter_sub
(
moving_mean
,
indices
,
mean
,
use_locking
=
True
)
update_variance
=
tf
.
scatter_sub
(
moving_variance
,
indices
,
variance
,
use_locking
=
True
)
with
tf
.
control_dependencies
([
update_mean
,
update_variance
]):
x
=
tf
.
identity
(
x
)
else
:
masked_moving_mean
=
tf
.
boolean_mask
(
moving_mean
,
mask
)
masked_moving_variance
=
tf
.
boolean_mask
(
moving_variance
,
mask
)
x
,
_
,
_
=
tf
.
nn
.
fused_batch_norm
(
x
,
scale
,
offset
,
mean
=
masked_moving_mean
,
variance
=
masked_moving_variance
,
epsilon
=
epsilon
,
data_format
=
data_format
,
is_training
=
False
)
return
x
examples/trials/nas_cifar10/src/utils.py
deleted
100644 → 0
View file @
4b598dd1
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
sys
import
numpy
as
np
import
tensorflow
as
tf
user_flags
=
[]
def
DEFINE_string
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_string
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
DEFINE_integer
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_integer
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
DEFINE_float
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_float
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
DEFINE_boolean
(
name
,
default_value
,
doc_string
):
tf
.
app
.
flags
.
DEFINE_boolean
(
name
,
default_value
,
doc_string
)
global
user_flags
user_flags
.
append
(
name
)
def
print_user_flags
(
line_limit
=
80
):
print
(
"-"
*
80
)
global
user_flags
FLAGS
=
tf
.
app
.
flags
.
FLAGS
for
flag_name
in
sorted
(
user_flags
):
value
=
"{}"
.
format
(
getattr
(
FLAGS
,
flag_name
))
log_string
=
flag_name
log_string
+=
"."
*
(
line_limit
-
len
(
flag_name
)
-
len
(
value
))
log_string
+=
value
print
(
log_string
)
def
get_C
(
x
,
data_format
):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if
data_format
==
"NHWC"
:
return
x
.
get_shape
()[
3
].
value
elif
data_format
==
"NCHW"
:
return
x
.
get_shape
()[
1
].
value
else
:
raise
ValueError
(
"Unknown data_format '{0}'"
.
format
(
data_format
))
def
get_HW
(
x
,
data_format
):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
return
x
.
get_shape
()[
2
].
value
def
get_strides
(
stride
,
data_format
):
"""
Args:
x: tensor of shape [N, H, W, C] or [N, C, H, W]
"""
if
data_format
==
"NHWC"
:
return
[
1
,
stride
,
stride
,
1
]
elif
data_format
==
"NCHW"
:
return
[
1
,
1
,
stride
,
stride
]
else
:
raise
ValueError
(
"Unknown data_format '{0}'"
.
format
(
data_format
))
class
TextColors
:
HEADER
=
'
\033
[95m'
OKBLUE
=
'
\033
[94m'
OKGREEN
=
'
\033
[92m'
WARNING
=
'
\033
[93m'
FAIL
=
'
\033
[91m'
ENDC
=
'
\033
[0m'
BOLD
=
'
\033
[1m'
UNDERLINE
=
'
\033
[4m'
class
Logger
(
object
):
def
__init__
(
self
,
output_file
):
self
.
terminal
=
sys
.
stdout
self
.
log
=
open
(
output_file
,
"a"
)
def
write
(
self
,
message
):
self
.
terminal
.
write
(
message
)
self
.
terminal
.
flush
()
self
.
log
.
write
(
message
)
self
.
log
.
flush
()
def
count_model_params
(
tf_variables
):
"""
Args:
tf_variables: list of all model variables
"""
num_vars
=
0
for
var
in
tf_variables
:
num_vars
+=
np
.
prod
([
dim
.
value
for
dim
in
var
.
get_shape
()])
return
num_vars
def
get_train_ops
(
loss
,
tf_variables
,
train_step
,
clip_mode
=
None
,
grad_bound
=
None
,
l2_reg
=
1e-4
,
lr_warmup_val
=
None
,
lr_warmup_steps
=
100
,
lr_init
=
0.1
,
lr_dec_start
=
0
,
lr_dec_every
=
10000
,
lr_dec_rate
=
0.1
,
lr_dec_min
=
None
,
lr_cosine
=
False
,
lr_max
=
None
,
lr_min
=
None
,
lr_T_0
=
None
,
lr_T_mul
=
None
,
num_train_batches
=
None
,
optim_algo
=
None
,
sync_replicas
=
False
,
num_aggregate
=
None
,
num_replicas
=
None
,
get_grad_norms
=
False
,
moving_average
=
None
):
"""
Args:
clip_mode: "global", "norm", or None.
moving_average: store the moving average of parameters
"""
if
l2_reg
>
0
:
l2_losses
=
[]
for
var
in
tf_variables
:
l2_losses
.
append
(
tf
.
reduce_sum
(
var
**
2
))
l2_loss
=
tf
.
add_n
(
l2_losses
)
loss
+=
l2_reg
*
l2_loss
grads
=
tf
.
gradients
(
loss
,
tf_variables
)
grad_norm
=
tf
.
global_norm
(
grads
)
grad_norms
=
{}
for
v
,
g
in
zip
(
tf_variables
,
grads
):
if
v
is
None
or
g
is
None
:
continue
if
isinstance
(
g
,
tf
.
IndexedSlices
):
grad_norms
[
v
.
name
]
=
tf
.
sqrt
(
tf
.
reduce_sum
(
g
.
values
**
2
))
else
:
grad_norms
[
v
.
name
]
=
tf
.
sqrt
(
tf
.
reduce_sum
(
g
**
2
))
if
clip_mode
is
not
None
:
assert
grad_bound
is
not
None
,
"Need grad_bound to clip gradients."
if
clip_mode
==
"global"
:
grads
,
_
=
tf
.
clip_by_global_norm
(
grads
,
grad_bound
)
elif
clip_mode
==
"norm"
:
clipped
=
[]
for
g
in
grads
:
if
isinstance
(
g
,
tf
.
IndexedSlices
):
c_g
=
tf
.
clip_by_norm
(
g
.
values
,
grad_bound
)
c_g
=
tf
.
IndexedSlices
(
g
.
indices
,
c_g
)
else
:
c_g
=
tf
.
clip_by_norm
(
g
,
grad_bound
)
clipped
.
append
(
g
)
grads
=
clipped
else
:
raise
NotImplementedError
(
"Unknown clip_mode {}"
.
format
(
clip_mode
))
if
lr_cosine
:
assert
lr_max
is
not
None
,
"Need lr_max to use lr_cosine"
assert
lr_min
is
not
None
,
"Need lr_min to use lr_cosine"
assert
lr_T_0
is
not
None
,
"Need lr_T_0 to use lr_cosine"
assert
lr_T_mul
is
not
None
,
"Need lr_T_mul to use lr_cosine"
assert
num_train_batches
is
not
None
,
(
"Need num_train_batches to use"
" lr_cosine"
)
curr_epoch
=
train_step
//
num_train_batches
last_reset
=
tf
.
Variable
(
0
,
dtype
=
tf
.
int32
,
trainable
=
False
,
name
=
"last_reset"
)
T_i
=
tf
.
Variable
(
lr_T_0
,
dtype
=
tf
.
int32
,
trainable
=
False
,
name
=
"T_i"
)
T_curr
=
curr_epoch
-
last_reset
def
_update
():
update_last_reset
=
tf
.
assign
(
last_reset
,
curr_epoch
,
use_locking
=
True
)
update_T_i
=
tf
.
assign
(
T_i
,
T_i
*
lr_T_mul
,
use_locking
=
True
)
with
tf
.
control_dependencies
([
update_last_reset
,
update_T_i
]):
rate
=
tf
.
to_float
(
T_curr
)
/
tf
.
to_float
(
T_i
)
*
3.1415926
lr
=
lr_min
+
0.5
*
(
lr_max
-
lr_min
)
*
(
1.0
+
tf
.
cos
(
rate
))
return
lr
def
_no_update
():
rate
=
tf
.
to_float
(
T_curr
)
/
tf
.
to_float
(
T_i
)
*
3.1415926
lr
=
lr_min
+
0.5
*
(
lr_max
-
lr_min
)
*
(
1.0
+
tf
.
cos
(
rate
))
return
lr
learning_rate
=
tf
.
cond
(
tf
.
greater_equal
(
T_curr
,
T_i
),
_update
,
_no_update
)
else
:
learning_rate
=
tf
.
train
.
exponential_decay
(
lr_init
,
tf
.
maximum
(
train_step
-
lr_dec_start
,
0
),
lr_dec_every
,
lr_dec_rate
,
staircase
=
True
)
if
lr_dec_min
is
not
None
:
learning_rate
=
tf
.
maximum
(
learning_rate
,
lr_dec_min
)
if
lr_warmup_val
is
not
None
:
learning_rate
=
tf
.
cond
(
tf
.
less
(
train_step
,
lr_warmup_steps
),
lambda
:
lr_warmup_val
,
lambda
:
learning_rate
)
if
optim_algo
==
"momentum"
:
opt
=
tf
.
train
.
MomentumOptimizer
(
learning_rate
,
0.9
,
use_locking
=
True
,
use_nesterov
=
True
)
elif
optim_algo
==
"sgd"
:
opt
=
tf
.
train
.
GradientDescentOptimizer
(
learning_rate
,
use_locking
=
True
)
elif
optim_algo
==
"adam"
:
opt
=
tf
.
train
.
AdamOptimizer
(
learning_rate
,
beta1
=
0.0
,
epsilon
=
1e-3
,
use_locking
=
True
)
else
:
raise
ValueError
(
"Unknown optim_algo {}"
.
format
(
optim_algo
))
if
sync_replicas
:
assert
num_aggregate
is
not
None
,
"Need num_aggregate to sync."
assert
num_replicas
is
not
None
,
"Need num_replicas to sync."
opt
=
tf
.
train
.
SyncReplicasOptimizer
(
opt
,
replicas_to_aggregate
=
num_aggregate
,
total_num_replicas
=
num_replicas
,
use_locking
=
True
)
if
moving_average
is
not
None
:
opt
=
tf
.
contrib
.
opt
.
MovingAverageOptimizer
(
opt
,
average_decay
=
moving_average
)
train_op
=
opt
.
apply_gradients
(
zip
(
grads
,
tf_variables
),
global_step
=
train_step
)
if
get_grad_norms
:
return
train_op
,
learning_rate
,
grad_norm
,
opt
,
grad_norms
else
:
return
train_op
,
learning_rate
,
grad_norm
,
opt
examples/tuners/enas_nni/README.md
deleted
100644 → 0
View file @
4b598dd1
**Run ENAS in NNI**
===
Now we have an enas example
[
enas-nni
](
https://github.com/countif/enas_nni
)
run in NNI from our contributors.
Thanks our lovely contributors.
And welcome more and more people to join us!
examples/tuners/enas_nni/README_zh_CN.md
deleted
100644 → 0
View file @
4b598dd1
**在 NNI 中运行 ENAS**
===
来自贡献者的
[
enas-nni
](
https://github.com/countif/enas_nni
)
可运行在 NNI 中。 非常感谢!
欢迎更多志愿者加入我们!
\ No newline at end of file
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment