Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
252f36f8
Commit
252f36f8
authored
Aug 20, 2018
by
Deshui Yu
Browse files
NNI dogfood version 1
parent
781cea26
Changes
214
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2697 additions
and
0 deletions
+2697
-0
examples/trials/ga_squad/evaluate.py
examples/trials/ga_squad/evaluate.py
+158
-0
examples/trials/ga_squad/graph.py
examples/trials/ga_squad/graph.py
+287
-0
examples/trials/ga_squad/graph_to_tf.py
examples/trials/ga_squad/graph_to_tf.py
+338
-0
examples/trials/ga_squad/readme.md
examples/trials/ga_squad/readme.md
+10
-0
examples/trials/ga_squad/requirements.txt
examples/trials/ga_squad/requirements.txt
+1
-0
examples/trials/ga_squad/rnn.py
examples/trials/ga_squad/rnn.py
+118
-0
examples/trials/ga_squad/train_model.py
examples/trials/ga_squad/train_model.py
+259
-0
examples/trials/ga_squad/trial.py
examples/trials/ga_squad/trial.py
+455
-0
examples/trials/ga_squad/util.py
examples/trials/ga_squad/util.py
+76
-0
examples/trials/mnist-annotation/config.yml
examples/trials/mnist-annotation/config.yml
+18
-0
examples/trials/mnist-annotation/mnist.py
examples/trials/mnist-annotation/mnist.py
+236
-0
examples/trials/mnist-keras/config.yml
examples/trials/mnist-keras/config.yml
+19
-0
examples/trials/mnist-keras/mnist-keras.py
examples/trials/mnist-keras/mnist-keras.py
+131
-0
examples/trials/mnist-keras/search_space.json
examples/trials/mnist-keras/search_space.json
+4
-0
examples/trials/mnist-smartparam/config.yml
examples/trials/mnist-smartparam/config.yml
+18
-0
examples/trials/mnist-smartparam/mnist.py
examples/trials/mnist-smartparam/mnist.py
+229
-0
examples/trials/mnist/config.yml
examples/trials/mnist/config.yml
+19
-0
examples/trials/mnist/mnist.py
examples/trials/mnist/mnist.py
+230
-0
examples/trials/mnist/search_space.json
examples/trials/mnist/search_space.json
+6
-0
examples/tuners/README.md
examples/tuners/README.md
+85
-0
No files found.
examples/trials/ga_squad/evaluate.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from
__future__
import
print_function
from
collections
import
Counter
import
string
import
re
import
argparse
import
json
import
sys
def
normalize_answer
(
str_input
):
"""Lower text and remove punctuation, articles and extra whitespace."""
def
remove_articles
(
text
):
'''
Remove "a|an|the"
'''
return
re
.
sub
(
r
'\b(a|an|the)\b'
,
' '
,
text
)
def
white_space_fix
(
text
):
'''
Remove unnessary whitespace
'''
return
' '
.
join
(
text
.
split
())
def
remove_punc
(
text
):
'''
Remove punc
'''
exclude
=
set
(
string
.
punctuation
)
return
''
.
join
(
ch
for
ch
in
text
if
ch
not
in
exclude
)
def
lower
(
text
):
'''
Change string to lower form.
'''
return
text
.
lower
()
return
white_space_fix
(
remove_articles
(
remove_punc
(
lower
(
str_input
))))
def
f1_score
(
prediction
,
ground_truth
):
'''
Calculate the f1 score.
'''
prediction_tokens
=
normalize_answer
(
prediction
).
split
()
ground_truth_tokens
=
normalize_answer
(
ground_truth
).
split
()
common
=
Counter
(
prediction_tokens
)
&
Counter
(
ground_truth_tokens
)
num_same
=
sum
(
common
.
values
())
if
num_same
==
0
:
return
0
precision
=
1.0
*
num_same
/
len
(
prediction_tokens
)
recall
=
1.0
*
num_same
/
len
(
ground_truth_tokens
)
f1
=
(
2
*
precision
*
recall
)
/
(
precision
+
recall
)
return
f1
def
exact_match_score
(
prediction
,
ground_truth
):
'''
Calculate the match score with prediction and ground truth.
'''
return
normalize_answer
(
prediction
)
==
normalize_answer
(
ground_truth
)
def
metric_max_over_ground_truths
(
metric_fn
,
prediction
,
ground_truths
):
'''
Metric max over the ground truths.
'''
scores_for_ground_truths
=
[]
for
ground_truth
in
ground_truths
:
score
=
metric_fn
(
prediction
,
ground_truth
)
scores_for_ground_truths
.
append
(
score
)
return
max
(
scores_for_ground_truths
)
def
_evaluate
(
dataset
,
predictions
):
'''
Evaluate function.
'''
f1
=
exact_match
=
total
=
0
count
=
0
for
article
in
dataset
:
for
paragraph
in
article
[
'paragraphs'
]:
for
qa
in
paragraph
[
'qas'
]:
total
+=
1
if
qa
[
'id'
]
not
in
predictions
:
message
=
'Unanswered question '
+
qa
[
'id'
]
+
\
' will receive score 0.'
#print(message, file=sys.stderr)
count
+=
1
continue
ground_truths
=
list
(
map
(
lambda
x
:
x
[
'text'
],
qa
[
'answers'
]))
prediction
=
predictions
[
qa
[
'id'
]]
exact_match
+=
metric_max_over_ground_truths
(
exact_match_score
,
prediction
,
ground_truths
)
f1
+=
metric_max_over_ground_truths
(
f1_score
,
prediction
,
ground_truths
)
print
(
'total'
,
total
,
'exact_match'
,
exact_match
,
'unanswer_question '
,
count
)
exact_match
=
100.0
*
exact_match
/
total
f1
=
100.0
*
f1
/
total
return
{
'exact_match'
:
exact_match
,
'f1'
:
f1
}
def
evaluate
(
data_file
,
pred_file
):
'''
Evaluate.
'''
expected_version
=
'1.1'
with
open
(
data_file
)
as
dataset_file
:
dataset_json
=
json
.
load
(
dataset_file
)
if
dataset_json
[
'version'
]
!=
expected_version
:
print
(
'Evaluation expects v-'
+
expected_version
+
', but got dataset with v-'
+
dataset_json
[
'version'
],
file
=
sys
.
stderr
)
dataset
=
dataset_json
[
'data'
]
with
open
(
pred_file
)
as
prediction_file
:
predictions
=
json
.
load
(
prediction_file
)
# print(json.dumps(evaluate(dataset, predictions)))
result
=
_evaluate
(
dataset
,
predictions
)
# print('em:', result['exact_match'], 'f1:', result['f1'])
return
result
[
'exact_match'
]
def
evaluate_with_predictions
(
data_file
,
predictions
):
'''
Evalutate with predictions/
'''
expected_version
=
'1.1'
with
open
(
data_file
)
as
dataset_file
:
dataset_json
=
json
.
load
(
dataset_file
)
if
dataset_json
[
'version'
]
!=
expected_version
:
print
(
'Evaluation expects v-'
+
expected_version
+
', but got dataset with v-'
+
dataset_json
[
'version'
],
file
=
sys
.
stderr
)
dataset
=
dataset_json
[
'data'
]
result
=
_evaluate
(
dataset
,
predictions
)
return
result
[
'exact_match'
]
if
__name__
==
'__main__'
:
EXPECT_VERSION
=
'1.1'
parser
=
argparse
.
ArgumentParser
(
description
=
'Evaluation for SQuAD '
+
EXPECT_VERSION
)
parser
.
add_argument
(
'dataset_file'
,
help
=
'Dataset file'
)
parser
.
add_argument
(
'prediction_file'
,
help
=
'Prediction File'
)
args
=
parser
.
parse_args
()
print
(
evaluate
(
args
.
dataset_file
,
args
.
prediction_file
))
examples/trials/ga_squad/graph.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''
Graph is customed-define class, this module contains related class and function about graph.
'''
import
copy
import
json
import
random
from
enum
import
Enum
,
unique
@
unique
class
LayerType
(
Enum
):
'''
Layer type
'''
attention
=
0
self_attention
=
1
rnn
=
2
input
=
3
output
=
4
class
Layer
(
object
):
'''
Layer class, which contains the information of graph.
'''
def
__init__
(
self
,
graph_type
,
input
=
None
,
output
=
None
,
size
=
None
):
self
.
input
=
input
if
input
is
not
None
else
[]
self
.
output
=
output
if
output
is
not
None
else
[]
self
.
graph_type
=
graph_type
self
.
is_delete
=
False
self
.
size
=
size
if
graph_type
==
LayerType
.
attention
.
value
:
self
.
input_size
=
2
self
.
output_size
=
1
elif
graph_type
==
LayerType
.
rnn
.
value
:
self
.
input_size
=
1
self
.
output_size
=
1
elif
graph_type
==
LayerType
.
self_attention
.
value
:
self
.
input_size
=
1
self
.
output_size
=
1
elif
graph_type
==
LayerType
.
input
.
value
:
self
.
input_size
=
0
self
.
output_size
=
1
elif
graph_type
==
LayerType
.
output
.
value
:
self
.
input_size
=
1
self
.
output_size
=
0
else
:
print
(
graph_type
)
def
set_size
(
self
,
graph_id
,
size
):
'''
Set size.
'''
if
self
.
graph_type
==
LayerType
.
attention
.
value
:
if
self
.
input
[
0
]
==
graph_id
:
self
.
size
=
size
if
self
.
graph_type
==
LayerType
.
rnn
.
value
:
self
.
size
=
size
if
self
.
graph_type
==
LayerType
.
self_attention
.
value
:
self
.
size
=
size
if
self
.
graph_type
==
LayerType
.
output
.
value
:
if
self
.
size
!=
size
:
return
False
return
True
def
clear_size
(
self
):
'''
Clear size
'''
if
self
.
graph_type
==
LayerType
.
attention
.
value
or
\
LayerType
.
rnn
.
value
or
LayerType
.
self_attention
.
value
:
self
.
size
=
None
def
__str__
(
self
):
return
'input:'
+
str
(
self
.
input
)
+
' output:'
+
str
(
self
.
output
)
+
' type:'
+
str
(
self
.
graph_type
)
+
' is_delete:'
+
str
(
self
.
is_delete
)
+
' size:'
+
str
(
self
.
size
)
def
graph_dumps
(
graph
):
'''
Dump the graph.
'''
return
json
.
dumps
(
graph
,
default
=
lambda
obj
:
obj
.
__dict__
)
def
graph_loads
(
graph_json
):
'''
Load graph
'''
layers
=
[]
for
layer
in
graph_json
[
'layers'
]:
layer_info
=
Layer
(
layer
[
'type'
],
layer
[
'input'
],
layer
[
'output'
],
layer
[
'size'
])
layer_info
.
is_delete
=
layer
[
'is_delete'
]
layers
.
append
(
layer_info
)
graph
=
Graph
(
graph_json
[
'max_layer_num'
],
[],
[],
[])
graph
.
layers
=
layers
return
graph
class
Graph
(
object
):
'''
Customed Graph class.
'''
def
__init__
(
self
,
max_layer_num
,
input
,
output
,
hide
):
self
.
layers
=
[]
self
.
max_layer_num
=
max_layer_num
for
layer
in
input
:
self
.
layers
.
append
(
layer
)
for
layer
in
output
:
self
.
layers
.
append
(
layer
)
if
hide
is
not
None
:
for
layer
in
hide
:
self
.
layers
.
append
(
layer
)
assert
self
.
is_legal
()
def
is_topology
(
self
,
layers
=
None
):
'''
valid the topology
'''
if
layers
is
None
:
layers
=
self
.
layers
layers_nodle
=
[]
result
=
[]
for
i
,
layer
in
enumerate
(
layers
):
if
layer
.
is_delete
is
False
:
layers_nodle
.
append
(
i
)
while
True
:
flag_break
=
True
layers_toremove
=
[]
for
layer1
in
layers_nodle
:
flag_arrive
=
True
for
layer2
in
layers
[
layer1
].
input
:
if
layer2
in
layers_nodle
:
flag_arrive
=
False
if
flag_arrive
is
True
:
for
layer2
in
layers
[
layer1
].
output
:
# Size is error
if
layers
[
layer2
].
set_size
(
layer1
,
layers
[
layer1
].
size
)
is
False
:
return
False
layers_toremove
.
append
(
layer1
)
result
.
append
(
layer1
)
flag_break
=
False
for
layer
in
layers_toremove
:
layers_nodle
.
remove
(
layer
)
result
.
append
(
'|'
)
if
flag_break
:
break
# There is loop in graph || some layers can't to arrive
if
layers_nodle
:
return
False
return
result
def
layer_num
(
self
,
layers
=
None
):
'''
Reutn number of layer.
'''
if
layers
is
None
:
layers
=
self
.
layers
layer_num
=
0
for
layer
in
layers
:
if
layer
.
is_delete
is
False
and
layer
.
graph_type
!=
LayerType
.
input
.
value
\
and
layer
.
graph_type
!=
LayerType
.
output
.
value
:
layer_num
+=
1
return
layer_num
def
is_legal
(
self
,
layers
=
None
):
'''
Judge whether is legal for layers
'''
if
layers
is
None
:
layers
=
self
.
layers
for
layer
in
layers
:
if
layer
.
is_delete
is
False
:
if
len
(
layer
.
input
)
!=
layer
.
input_size
:
return
False
if
len
(
layer
.
output
)
<
layer
.
output_size
:
return
False
# layer_num <= max_layer_num
if
self
.
layer_num
(
layers
)
>
self
.
max_layer_num
:
return
False
# There is loop in graph || some layers can't to arrive
if
self
.
is_topology
(
layers
)
is
False
:
return
False
return
True
def
mutation
(
self
,
only_add
=
False
):
'''
Mutation for a graph
'''
types
=
[]
if
self
.
layer_num
()
<
self
.
max_layer_num
:
types
.
append
(
0
)
types
.
append
(
1
)
if
self
.
layer_num
()
>
5
and
only_add
is
False
:
types
.
append
(
2
)
types
.
append
(
3
)
# 0 : add a layer , delete a edge
# 1 : add a layer , change a edge
# 2 : delete a layer, delete a edge
# 3 : delete a layer, change a edge
graph_type
=
random
.
choice
(
types
)
layer_type
=
random
.
choice
([
LayerType
.
attention
.
value
,
\
LayerType
.
self_attention
.
value
,
LayerType
.
rnn
.
value
])
layers
=
copy
.
deepcopy
(
self
.
layers
)
cnt_try
=
0
while
True
:
layers_in
=
[]
layers_out
=
[]
layers_del
=
[]
for
i
,
layer
in
enumerate
(
layers
):
if
layer
.
is_delete
is
False
:
if
layer
.
graph_type
!=
LayerType
.
output
.
value
:
layers_in
.
append
(
i
)
if
layer
.
graph_type
!=
LayerType
.
input
.
value
:
layers_out
.
append
(
i
)
if
layer
.
graph_type
!=
LayerType
.
output
.
value
\
and
layer
.
graph_type
!=
LayerType
.
input
.
value
:
layers_del
.
append
(
i
)
if
graph_type
<=
1
:
new_id
=
len
(
layers
)
out
=
random
.
choice
(
layers_out
)
input
=
[]
output
=
[
out
]
pos
=
random
.
randint
(
0
,
len
(
layers
[
out
].
input
)
-
1
)
last_in
=
layers
[
out
].
input
[
pos
]
layers
[
out
].
input
[
pos
]
=
new_id
if
graph_type
==
0
:
layers
[
last_in
].
output
.
remove
(
out
)
if
graph_type
==
1
:
layers
[
last_in
].
output
.
remove
(
out
)
layers
[
last_in
].
output
.
append
(
new_id
)
input
=
[
last_in
]
lay
=
Layer
(
graph_type
=
layer_type
,
input
=
input
,
output
=
output
)
while
len
(
input
)
<
lay
.
input_size
:
layer1
=
random
.
choice
(
layers_in
)
input
.
append
(
layer1
)
layers
[
layer1
].
output
.
append
(
new_id
)
lay
.
input
=
input
layers
.
append
(
lay
)
else
:
layer1
=
random
.
choice
(
layers_del
)
for
layer2
in
layers
[
layer1
].
output
:
layers
[
layer2
].
input
.
remove
(
layer1
)
if
graph_type
==
2
:
random_in
=
random
.
choice
(
layers_in
)
else
:
random_in
=
random
.
choice
(
layers
[
layer1
].
input
)
layers
[
layer2
].
input
.
append
(
random_in
)
layers
[
random_in
].
output
.
append
(
layer2
)
for
layer2
in
layers
[
layer1
].
input
:
layers
[
layer2
].
output
.
remove
(
layer1
)
layers
[
layer1
].
is_delete
=
True
if
self
.
is_legal
(
layers
):
self
.
layers
=
layers
break
else
:
layers
=
copy
.
deepcopy
(
self
.
layers
)
cnt_try
+=
1
def
__str__
(
self
):
info
=
""
for
l_id
,
layer
in
enumerate
(
self
.
layers
):
if
layer
.
is_delete
is
False
:
info
+=
'id:%d '
%
l_id
+
str
(
layer
)
+
'
\n
'
return
info
examples/trials/ga_squad/graph_to_tf.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
tensorflow
as
tf
from
rnn
import
XGRUCell
from
util
import
dropout
from
graph
import
LayerType
def
normalize
(
inputs
,
epsilon
=
1e-8
,
scope
=
"ln"
):
'''Applies layer normalization.
Args:
inputs: A tensor with 2 or more dimensions, where the first dimension has
`batch_size`.
epsilon: A floating number. A very small number for preventing ZeroDivision Error.
scope: Optional scope for `variable_scope`.
reuse: Boolean, whether to reuse the weights of a previous layer
by the same name.
Returns:
A tensor with the same shape and data dtype as `inputs`.
'''
with
tf
.
variable_scope
(
scope
):
inputs_shape
=
inputs
.
get_shape
()
params_shape
=
inputs_shape
[
-
1
:]
mean
,
variance
=
tf
.
nn
.
moments
(
inputs
,
[
-
1
],
keep_dims
=
True
)
beta
=
tf
.
Variable
(
tf
.
zeros
(
params_shape
))
gamma
=
tf
.
Variable
(
tf
.
ones
(
params_shape
))
normalized
=
(
inputs
-
mean
)
/
((
variance
+
epsilon
)
**
(.
5
))
outputs
=
gamma
*
normalized
+
beta
return
outputs
def
multihead_attention
(
queries
,
keys
,
scope
=
"multihead_attention"
,
num_units
=
None
,
num_heads
=
4
,
dropout_rate
=
0
,
is_training
=
True
,
causality
=
False
):
'''Applies multihead attention.
Args:
queries: A 3d tensor with shape of [N, T_q, C_q].
keys: A 3d tensor with shape of [N, T_k, C_k].
num_units: A cdscalar. Attention size.
dropout_rate: A floating point number.
is_training: Boolean. Controller of mechanism for dropout.
causality: Boolean. If true, units that reference the future are masked.
num_heads: An int. Number of heads.
scope: Optional scope for `variable_scope`.
reuse: Boolean, whether to reuse the weights of a previous layer
by the same name.
Returns
A 3d tensor with shape of (N, T_q, C)
'''
global
look5
with
tf
.
variable_scope
(
scope
):
# Set the fall back option for num_units
if
num_units
is
None
:
num_units
=
queries
.
get_shape
().
as_list
()[
-
1
]
Q_
=
[]
K_
=
[]
V_
=
[]
for
_
in
range
(
num_heads
):
Q
=
tf
.
layers
.
dense
(
queries
,
num_units
/
num_heads
,
activation
=
tf
.
nn
.
relu
)
# (N, T_q, C)
K
=
tf
.
layers
.
dense
(
keys
,
num_units
/
num_heads
,
activation
=
tf
.
nn
.
relu
)
# (N, T_k, C)
V
=
tf
.
layers
.
dense
(
keys
,
num_units
/
num_heads
,
activation
=
tf
.
nn
.
relu
)
# (N, T_k, C)
Q_
.
append
(
Q
)
K_
.
append
(
K
)
V_
.
append
(
V
)
# Split and concat
Q_
=
tf
.
concat
(
Q_
,
axis
=
0
)
# (h*N, T_q, C/h)
K_
=
tf
.
concat
(
K_
,
axis
=
0
)
# (h*N, T_k, C/h)
V_
=
tf
.
concat
(
V_
,
axis
=
0
)
# (h*N, T_k, C/h)
# Multiplication
outputs
=
tf
.
matmul
(
Q_
,
tf
.
transpose
(
K_
,
[
0
,
2
,
1
]))
# (h*N, T_q, T_k)
# Scale
outputs
=
outputs
/
(
K_
.
get_shape
().
as_list
()[
-
1
]
**
0.5
)
# Key Masking
key_masks
=
tf
.
sign
(
tf
.
abs
(
tf
.
reduce_sum
(
keys
,
axis
=-
1
)))
# (N, T_k)
key_masks
=
tf
.
tile
(
key_masks
,
[
num_heads
,
1
])
# (h*N, T_k)
key_masks
=
tf
.
tile
(
tf
.
expand_dims
(
key_masks
,
1
),
[
1
,
tf
.
shape
(
queries
)[
1
],
1
])
# (h*N, T_q, T_k)
paddings
=
tf
.
ones_like
(
outputs
)
*
(
-
2
**
32
+
1
)
outputs
=
tf
.
where
(
tf
.
equal
(
key_masks
,
0
),
paddings
,
outputs
)
# (h*N, T_q, T_k)
# Causality = Future blinding
if
causality
:
diag_vals
=
tf
.
ones_like
(
outputs
[
0
,
:,
:])
# (T_q, T_k)
tril
=
tf
.
contrib
.
linalg
.
LinearOperatorTriL
(
diag_vals
).
to_dense
()
# (T_q, T_k)
masks
=
tf
.
tile
(
tf
.
expand_dims
(
tril
,
0
),
[
tf
.
shape
(
outputs
)[
0
],
1
,
1
])
# (h*N, T_q, T_k)
paddings
=
tf
.
ones_like
(
masks
)
*
(
-
2
**
32
+
1
)
outputs
=
tf
.
where
(
tf
.
equal
(
masks
,
0
),
paddings
,
outputs
)
# (h*N, T_q, T_k)
# Activation
look5
=
outputs
outputs
=
tf
.
nn
.
softmax
(
outputs
)
# (h*N, T_q, T_k)
# Query Masking
query_masks
=
tf
.
sign
(
tf
.
abs
(
tf
.
reduce_sum
(
queries
,
axis
=-
1
)))
# (N, T_q)
query_masks
=
tf
.
tile
(
query_masks
,
[
num_heads
,
1
])
# (h*N, T_q)
query_masks
=
tf
.
tile
(
tf
.
expand_dims
(
query_masks
,
-
1
),
[
1
,
1
,
tf
.
shape
(
keys
)[
1
]])
# (h*N, T_q, T_k)
outputs
*=
query_masks
# broadcasting. (N, T_q, C)
# Dropouts
outputs
=
dropout
(
outputs
,
dropout_rate
,
is_training
)
# Weighted sum
outputs
=
tf
.
matmul
(
outputs
,
V_
)
# ( h*N, T_q, C/h)
# Restore shape
outputs
=
tf
.
concat
(
tf
.
split
(
outputs
,
num_heads
,
axis
=
0
),
axis
=
2
)
# (N, T_q, C)
# Residual connection
if
queries
.
get_shape
().
as_list
()[
-
1
]
==
num_units
:
outputs
+=
queries
# Normalize
outputs
=
normalize
(
outputs
,
scope
=
scope
)
# (N, T_q, C)
return
outputs
def
positional_encoding
(
inputs
,
num_units
=
None
,
zero_pad
=
True
,
scale
=
True
,
scope
=
"positional_encoding"
,
reuse
=
None
):
'''
Return positinal embedding.
'''
Shape
=
tf
.
shape
(
inputs
)
N
=
Shape
[
0
]
T
=
Shape
[
1
]
num_units
=
Shape
[
2
]
with
tf
.
variable_scope
(
scope
,
reuse
=
reuse
):
position_ind
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
range
(
T
),
0
),
[
N
,
1
])
# First part of the PE function: sin and cos argument
# Second part, apply the cosine to even columns and sin to odds.
X
=
tf
.
expand_dims
(
tf
.
cast
(
tf
.
range
(
T
),
tf
.
float32
),
axis
=
1
)
Y
=
tf
.
expand_dims
(
tf
.
cast
(
10000
**
-
(
2
*
tf
.
range
(
num_units
)
/
num_units
),
tf
.
float32
),
axis
=
0
)
h1
=
tf
.
cast
((
tf
.
range
(
num_units
)
+
1
)
%
2
,
tf
.
float32
)
h2
=
tf
.
cast
((
tf
.
range
(
num_units
)
%
2
),
tf
.
float32
)
position_enc
=
tf
.
multiply
(
X
,
Y
)
position_enc
=
tf
.
sin
(
position_enc
)
*
tf
.
multiply
(
tf
.
ones_like
(
X
),
h1
)
+
\
tf
.
cos
(
position_enc
)
*
tf
.
multiply
(
tf
.
ones_like
(
X
),
h2
)
# Convert to a tensor
lookup_table
=
position_enc
if
zero_pad
:
lookup_table
=
tf
.
concat
((
tf
.
zeros
(
shape
=
[
1
,
num_units
]),
lookup_table
[
1
:,
:]),
0
)
outputs
=
tf
.
nn
.
embedding_lookup
(
lookup_table
,
position_ind
)
if
scale
:
outputs
=
outputs
*
tf
.
sqrt
(
tf
.
cast
(
num_units
,
tf
.
float32
))
return
outputs
def
feedforward
(
inputs
,
num_units
,
scope
=
"multihead_attention"
):
'''Point-wise feed forward net.
Args:
inputs: A 3d tensor with shape of [N, T, C].
num_units: A list of two integers.
scope: Optional scope for `variable_scope`.
reuse: Boolean, whether to reuse the weights of a previous layer
by the same name.
Returns:
A 3d tensor with the same shape and dtype as inputs
'''
with
tf
.
variable_scope
(
scope
):
# Inner layer
params
=
{
"inputs"
:
inputs
,
"filters"
:
num_units
[
0
],
"kernel_size"
:
1
,
"activation"
:
tf
.
nn
.
relu
,
"use_bias"
:
True
}
outputs
=
tf
.
layers
.
conv1d
(
**
params
)
# Readout layer
params
=
{
"inputs"
:
outputs
,
"filters"
:
num_units
[
1
],
"kernel_size"
:
1
,
"activation"
:
None
,
"use_bias"
:
True
}
outputs
=
tf
.
layers
.
conv1d
(
**
params
)
# Residual connection
outputs
+=
inputs
# Normalize
outputs
=
normalize
(
outputs
)
return
outputs
def
rnn
(
input_states
,
sequence_lengths
,
dropout_rate
,
is_training
,
num_units
):
layer_cnt
=
1
states
=
[]
xs
=
tf
.
transpose
(
input_states
,
perm
=
[
1
,
0
,
2
])
for
i
in
range
(
0
,
layer_cnt
):
xs
=
dropout
(
xs
,
dropout_rate
,
is_training
)
with
tf
.
variable_scope
(
'layer_'
+
str
(
i
)):
cell_fw
=
XGRUCell
(
num_units
)
cell_bw
=
XGRUCell
(
num_units
)
outputs
,
_
=
tf
.
nn
.
bidirectional_dynamic_rnn
(
cell_fw
=
cell_fw
,
cell_bw
=
cell_bw
,
dtype
=
tf
.
float32
,
sequence_length
=
sequence_lengths
,
inputs
=
xs
,
time_major
=
True
)
y_lr
,
y_rl
=
outputs
xs
=
tf
.
concat
([
y_lr
,
y_rl
],
2
)
states
.
append
(
xs
)
return
tf
.
transpose
(
dropout
(
tf
.
concat
(
states
,
axis
=
2
),
dropout_rate
,
is_training
),
perm
=
[
1
,
0
,
2
])
def
graph_to_network
(
input1
,
input2
,
input1_lengths
,
input2_lengths
,
graph
,
dropout_rate
,
is_training
,
num_heads
=
1
,
rnn_units
=
256
):
topology
=
graph
.
is_topology
()
layers
=
dict
()
layers_sequence_lengths
=
dict
()
num_units
=
input1
.
get_shape
().
as_list
()[
-
1
]
layers
[
0
]
=
input1
*
tf
.
sqrt
(
tf
.
cast
(
num_units
,
tf
.
float32
))
+
\
positional_encoding
(
input1
,
scale
=
False
,
zero_pad
=
False
)
layers
[
1
]
=
input2
*
tf
.
sqrt
(
tf
.
cast
(
num_units
,
tf
.
float32
))
layers
[
0
]
=
dropout
(
layers
[
0
],
dropout_rate
,
is_training
)
layers
[
1
]
=
dropout
(
layers
[
1
],
dropout_rate
,
is_training
)
layers_sequence_lengths
[
0
]
=
input1_lengths
layers_sequence_lengths
[
1
]
=
input2_lengths
for
_
,
topo_i
in
enumerate
(
topology
):
if
topo_i
==
'|'
:
continue
if
graph
.
layers
[
topo_i
].
graph_type
==
LayerType
.
input
.
value
:
continue
elif
graph
.
layers
[
topo_i
].
graph_type
==
LayerType
.
attention
.
value
:
with
tf
.
variable_scope
(
'attation_%d'
%
topo_i
):
layer
=
multihead_attention
(
layers
[
graph
.
layers
[
topo_i
].
input
[
0
]],
layers
[
graph
.
layers
[
topo_i
].
input
[
1
]],
scope
=
"multihead_attention%d"
%
topo_i
,
dropout_rate
=
dropout_rate
,
is_training
=
is_training
,
num_heads
=
num_heads
,
num_units
=
rnn_units
*
2
)
layer
=
feedforward
(
layer
,
scope
=
"feedforward%d"
%
topo_i
,
num_units
=
[
rnn_units
*
2
*
4
,
rnn_units
*
2
])
layers
[
topo_i
]
=
layer
layers_sequence_lengths
[
topo_i
]
=
layers_sequence_lengths
[
graph
.
layers
[
topo_i
].
input
[
0
]]
elif
graph
.
layers
[
topo_i
].
graph_type
==
LayerType
.
self_attention
.
value
:
with
tf
.
variable_scope
(
'self-attation_%d'
%
topo_i
):
layer
=
multihead_attention
(
layers
[
graph
.
layers
[
topo_i
].
input
[
0
]],
layers
[
graph
.
layers
[
topo_i
].
input
[
0
]],
scope
=
"multihead_attention%d"
%
topo_i
,
dropout_rate
=
dropout_rate
,
is_training
=
is_training
,
num_heads
=
num_heads
,
num_units
=
rnn_units
*
2
)
layer
=
feedforward
(
layer
,
scope
=
"feedforward%d"
%
topo_i
,
num_units
=
[
rnn_units
*
2
*
4
,
rnn_units
*
2
])
layers
[
topo_i
]
=
layer
layers_sequence_lengths
[
topo_i
]
=
layers_sequence_lengths
[
graph
.
layers
[
topo_i
].
input
[
0
]]
elif
graph
.
layers
[
topo_i
].
graph_type
==
LayerType
.
rnn
.
value
:
with
tf
.
variable_scope
(
'rnn_%d'
%
topo_i
):
layer
=
rnn
(
layers
[
graph
.
layers
[
topo_i
].
input
[
0
]],
layers_sequence_lengths
[
graph
.
layers
[
topo_i
].
input
[
0
]],
dropout_rate
,
is_training
,
rnn_units
)
layers
[
topo_i
]
=
layer
layers_sequence_lengths
[
topo_i
]
=
layers_sequence_lengths
[
graph
.
layers
[
topo_i
].
input
[
0
]]
elif
graph
.
layers
[
topo_i
].
graph_type
==
LayerType
.
output
.
value
:
layers
[
topo_i
]
=
layers
[
graph
.
layers
[
topo_i
].
input
[
0
]]
if
layers
[
topo_i
].
get_shape
().
as_list
()[
-
1
]
!=
rnn_units
*
1
*
2
:
with
tf
.
variable_scope
(
'add_dense'
):
layers
[
topo_i
]
=
tf
.
layers
.
dense
(
layers
[
topo_i
],
units
=
rnn_units
*
2
)
return
layers
[
2
],
layers
[
3
]
examples/trials/ga_squad/readme.md
0 → 100644
View file @
252f36f8
## How to download data
1.
download "dev-v1.1.json" and "train-v1.1.json" in https://rajpurkar.github.io/SQuAD-explorer/
2.
download "glove.840B.300d.txt" in "https://nlp.stanford.edu/projects/glove/"
## How to submit this job
1.
run "$NNI_ROOT_DIR/auto_run.py" as "$NNI_ROOT_DIR/README-AUTO.md" said.
2.
use the dockerImage openpai.azurecr.io/nni_v0.0.1, which means it use a tensorflow cpu-version.
3.
this model don't need search_space.json.
\ No newline at end of file
examples/trials/ga_squad/requirements.txt
0 → 100644
View file @
252f36f8
tensorflow==1.4.0
\ No newline at end of file
examples/trials/ga_squad/rnn.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
tensorflow
as
tf
from
tensorflow.python.ops.rnn_cell_impl
import
RNNCell
class
GRU
:
'''
GRU class.
'''
def
__init__
(
self
,
name
,
input_dim
,
hidden_dim
):
self
.
name
=
'/'
.
join
([
name
,
'gru'
])
self
.
input_dim
=
input_dim
self
.
hidden_dim
=
hidden_dim
self
.
w_matrix
=
None
self
.
U
=
None
self
.
bias
=
None
def
define_params
(
self
):
'''
Define parameters.
'''
input_dim
=
self
.
input_dim
hidden_dim
=
self
.
hidden_dim
prefix
=
self
.
name
self
.
w_matrix
=
tf
.
Variable
(
tf
.
random_normal
([
input_dim
,
3
*
hidden_dim
],
stddev
=
0.1
),
name
=
'/'
.
join
([
prefix
,
'W'
]))
self
.
U
=
tf
.
Variable
(
tf
.
random_normal
([
hidden_dim
,
3
*
hidden_dim
],
stddev
=
0.1
),
name
=
'/'
.
join
([
prefix
,
'U'
]))
self
.
bias
=
tf
.
Variable
(
tf
.
random_normal
([
1
,
3
*
hidden_dim
],
stddev
=
0.1
),
name
=
'/'
.
join
([
prefix
,
'b'
]))
return
self
def
build
(
self
,
x
,
h
,
mask
=
None
):
'''
Build the GRU cell.
'''
xw
=
tf
.
split
(
tf
.
matmul
(
x
,
self
.
w_matrix
)
+
self
.
bias
,
3
,
1
)
hu
=
tf
.
split
(
tf
.
matmul
(
h
,
self
.
U
),
3
,
1
)
r
=
tf
.
sigmoid
(
xw
[
0
]
+
hu
[
0
])
z
=
tf
.
sigmoid
(
xw
[
1
]
+
hu
[
1
])
h1
=
tf
.
tanh
(
xw
[
2
]
+
r
*
hu
[
2
])
next_h
=
h1
*
(
1
-
z
)
+
h
*
z
if
mask
is
not
None
:
next_h
=
next_h
*
mask
+
h
*
(
1
-
mask
)
return
next_h
def
build_sequence
(
self
,
xs
,
masks
,
init
,
is_left_to_right
):
'''
Build GRU sequence.
'''
states
=
[]
last
=
init
if
is_left_to_right
:
for
i
,
xs_i
in
enumerate
(
xs
):
h
=
self
.
build
(
xs_i
,
last
,
masks
[
i
])
states
.
append
(
h
)
last
=
h
else
:
for
i
in
range
(
len
(
xs
)
-
1
,
-
1
,
-
1
):
h
=
self
.
build
(
xs
[
i
],
last
,
masks
[
i
])
states
.
insert
(
0
,
h
)
last
=
h
return
states
class
XGRUCell
(
RNNCell
):
def
__init__
(
self
,
hidden_dim
,
reuse
=
None
):
super
(
XGRUCell
,
self
).
__init__
(
self
,
_reuse
=
reuse
)
self
.
_num_units
=
hidden_dim
self
.
_activation
=
tf
.
tanh
@
property
def
state_size
(
self
):
return
self
.
_num_units
@
property
def
output_size
(
self
):
return
self
.
_num_units
def
call
(
self
,
inputs
,
state
):
input_dim
=
inputs
.
get_shape
()[
-
1
]
assert
input_dim
is
not
None
,
"input dimension must be defined"
W
=
tf
.
get_variable
(
name
=
"W"
,
shape
=
[
input_dim
,
3
*
self
.
_num_units
],
dtype
=
tf
.
float32
)
U
=
tf
.
get_variable
(
name
=
'U'
,
shape
=
[
self
.
_num_units
,
3
*
self
.
_num_units
],
dtype
=
tf
.
float32
)
b
=
tf
.
get_variable
(
name
=
'b'
,
shape
=
[
1
,
3
*
self
.
_num_units
],
dtype
=
tf
.
float32
)
xw
=
tf
.
split
(
tf
.
matmul
(
inputs
,
W
)
+
b
,
3
,
1
)
hu
=
tf
.
split
(
tf
.
matmul
(
state
,
U
),
3
,
1
)
r
=
tf
.
sigmoid
(
xw
[
0
]
+
hu
[
0
])
z
=
tf
.
sigmoid
(
xw
[
1
]
+
hu
[
1
])
h1
=
self
.
_activation
(
xw
[
2
]
+
r
*
hu
[
2
])
next_h
=
h1
*
(
1
-
z
)
+
state
*
z
return
next_h
,
next_h
examples/trials/ga_squad/train_model.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''
Train the network combined by RNN and attention.
'''
import
tensorflow
as
tf
from
attention
import
DotAttention
from
rnn
import
XGRUCell
from
util
import
dropout
from
graph_to_tf
import
graph_to_network
class
GAGConfig
:
def
__init__
(
self
):
self
.
batch_size
=
128
self
.
dropout
=
0.1
self
.
char_vcb_size
=
1371
self
.
max_char_length
=
20
self
.
char_embed_dim
=
100
self
.
max_query_length
=
40
self
.
max_passage_length
=
800
self
.
att_is_vanilla
=
True
self
.
att_need_padding
=
False
self
.
att_is_id
=
False
self
.
ptr_dim
=
70
self
.
learning_rate
=
0.1
self
.
labelsmoothing
=
0.1
self
.
num_heads
=
1
self
.
rnn_units
=
256
class
GAG
:
def
__init__
(
self
,
cfg
,
embed
,
graph
):
self
.
cfg
=
cfg
self
.
embed
=
embed
self
.
graph
=
graph
self
.
query_word
=
None
self
.
query_mask
=
None
self
.
query_lengths
=
None
self
.
passage_word
=
None
self
.
passage_mask
=
None
self
.
passage_lengths
=
None
self
.
answer_begin
=
None
self
.
answer_end
=
None
self
.
query_char_ids
=
None
self
.
query_char_lengths
=
None
self
.
passage_char_ids
=
None
self
.
passage_char_lengths
=
None
self
.
passage_states
=
None
self
.
query_states
=
None
self
.
query_init
=
None
self
.
begin_prob
=
None
self
.
end_prob
=
None
self
.
loss
=
None
self
.
train_op
=
None
def
build_net
(
self
,
is_training
):
cfg
=
self
.
cfg
with
tf
.
device
(
'/cpu:0'
):
word_embed
=
tf
.
get_variable
(
name
=
'word_embed'
,
initializer
=
self
.
embed
,
dtype
=
tf
.
float32
,
trainable
=
False
)
char_embed
=
tf
.
get_variable
(
name
=
'char_embed'
,
shape
=
[
cfg
.
char_vcb_size
,
cfg
.
char_embed_dim
],
dtype
=
tf
.
float32
)
# [query_length, batch_size]
self
.
query_word
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
,
None
],
name
=
'query_word'
)
self
.
query_mask
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
None
,
None
],
name
=
'query_mask'
)
# [batch_size]
self
.
query_lengths
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
],
name
=
'query_lengths'
)
# [passage_length, batch_size]
self
.
passage_word
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
,
None
],
name
=
'passage_word'
)
self
.
passage_mask
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
None
,
None
],
name
=
'passage_mask'
)
# [batch_size]
self
.
passage_lengths
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
],
name
=
'passage_lengths'
)
if
is_training
:
self
.
answer_begin
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
],
name
=
'answer_begin'
)
self
.
answer_end
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
],
name
=
'answer_end'
)
self
.
query_char_ids
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
self
.
cfg
.
max_char_length
,
None
,
None
],
name
=
'query_char_ids'
)
# sequence_length, batch_size
self
.
query_char_lengths
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
,
None
],
name
=
'query_char_lengths'
)
self
.
passage_char_ids
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
self
.
cfg
.
max_char_length
,
None
,
None
],
name
=
'passage_char_ids'
)
# sequence_length, batch_size
self
.
passage_char_lengths
=
tf
.
placeholder
(
dtype
=
tf
.
int32
,
shape
=
[
None
,
None
],
name
=
'passage_char_lengths'
)
query_char_states
=
self
.
build_char_states
(
char_embed
=
char_embed
,
is_training
=
is_training
,
reuse
=
False
,
char_ids
=
self
.
query_char_ids
,
char_lengths
=
self
.
query_char_lengths
)
passage_char_states
=
self
.
build_char_states
(
char_embed
=
char_embed
,
is_training
=
is_training
,
reuse
=
True
,
char_ids
=
self
.
passage_char_ids
,
char_lengths
=
self
.
passage_char_lengths
)
with
tf
.
variable_scope
(
"encoding"
)
as
scope
:
query_states
=
tf
.
concat
([
tf
.
nn
.
embedding_lookup
(
word_embed
,
self
.
query_word
),
query_char_states
],
axis
=
2
)
scope
.
reuse_variables
()
passage_states
=
tf
.
concat
([
tf
.
nn
.
embedding_lookup
(
word_embed
,
self
.
passage_word
),
passage_char_states
],
axis
=
2
)
passage_states
=
tf
.
transpose
(
passage_states
,
perm
=
[
1
,
0
,
2
])
query_states
=
tf
.
transpose
(
query_states
,
perm
=
[
1
,
0
,
2
])
self
.
passage_states
=
passage_states
self
.
query_states
=
query_states
output
,
output2
=
graph_to_network
(
passage_states
,
query_states
,
self
.
passage_lengths
,
self
.
query_lengths
,
self
.
graph
,
self
.
cfg
.
dropout
,
is_training
,
num_heads
=
cfg
.
num_heads
,
rnn_units
=
cfg
.
rnn_units
)
passage_att_mask
=
self
.
passage_mask
batch_size_x
=
tf
.
shape
(
self
.
query_lengths
)
answer_h
=
tf
.
zeros
(
tf
.
concat
([
batch_size_x
,
tf
.
constant
([
cfg
.
ptr_dim
],
dtype
=
tf
.
int32
)],
axis
=
0
))
answer_context
=
tf
.
reduce_mean
(
output2
,
axis
=
1
)
query_init_w
=
tf
.
get_variable
(
'query_init_w'
,
shape
=
[
output2
.
get_shape
().
as_list
()[
-
1
],
cfg
.
ptr_dim
])
self
.
query_init
=
query_init_w
answer_context
=
tf
.
matmul
(
answer_context
,
query_init_w
)
output
=
tf
.
transpose
(
output
,
perm
=
[
1
,
0
,
2
])
with
tf
.
variable_scope
(
'answer_ptr_layer'
):
ptr_att
=
DotAttention
(
'ptr'
,
hidden_dim
=
cfg
.
ptr_dim
,
is_vanilla
=
self
.
cfg
.
att_is_vanilla
,
is_identity_transform
=
self
.
cfg
.
att_is_id
,
need_padding
=
self
.
cfg
.
att_need_padding
)
answer_pre_compute
=
ptr_att
.
get_pre_compute
(
output
)
ptr_gru
=
XGRUCell
(
hidden_dim
=
cfg
.
ptr_dim
)
begin_prob
,
begin_logits
=
ptr_att
.
get_prob
(
output
,
answer_context
,
passage_att_mask
,
answer_pre_compute
,
True
)
att_state
=
ptr_att
.
get_att
(
output
,
begin_prob
)
(
_
,
answer_h
)
=
ptr_gru
.
call
(
inputs
=
att_state
,
state
=
answer_h
)
answer_context
=
answer_h
end_prob
,
end_logits
=
ptr_att
.
get_prob
(
output
,
answer_context
,
passage_att_mask
,
answer_pre_compute
,
True
)
self
.
begin_prob
=
tf
.
transpose
(
begin_prob
,
perm
=
[
1
,
0
])
self
.
end_prob
=
tf
.
transpose
(
end_prob
,
perm
=
[
1
,
0
])
begin_logits
=
tf
.
transpose
(
begin_logits
,
perm
=
[
1
,
0
])
end_logits
=
tf
.
transpose
(
end_logits
,
perm
=
[
1
,
0
])
if
is_training
:
def
label_smoothing
(
inputs
,
masks
,
epsilon
=
0.1
):
epsilon
=
cfg
.
labelsmoothing
num_of_channel
=
tf
.
shape
(
inputs
)[
-
1
]
# number of channels
inputs
=
tf
.
cast
(
inputs
,
tf
.
float32
)
return
(((
1
-
epsilon
)
*
inputs
)
+
(
epsilon
/
tf
.
cast
(
num_of_channel
,
tf
.
float32
)))
*
masks
cost1
=
tf
.
reduce_mean
(
tf
.
losses
.
softmax_cross_entropy
(
label_smoothing
(
tf
.
one_hot
(
self
.
answer_begin
,
depth
=
tf
.
shape
(
self
.
passage_word
)[
0
]),
tf
.
transpose
(
self
.
passage_mask
,
perm
=
[
1
,
0
])),
begin_logits
))
cost2
=
tf
.
reduce_mean
(
tf
.
losses
.
softmax_cross_entropy
(
label_smoothing
(
tf
.
one_hot
(
self
.
answer_end
,
depth
=
tf
.
shape
(
self
.
passage_word
)[
0
]),
tf
.
transpose
(
self
.
passage_mask
,
perm
=
[
1
,
0
])),
end_logits
))
reg_ws
=
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
l2_loss
=
tf
.
reduce_sum
(
reg_ws
)
loss
=
cost1
+
cost2
+
l2_loss
self
.
loss
=
loss
optimizer
=
tf
.
train
.
AdamOptimizer
(
learning_rate
=
cfg
.
learning_rate
)
self
.
train_op
=
optimizer
.
minimize
(
self
.
loss
)
return
tf
.
stack
([
self
.
begin_prob
,
self
.
end_prob
])
def
build_char_states
(
self
,
char_embed
,
is_training
,
reuse
,
char_ids
,
char_lengths
):
max_char_length
=
self
.
cfg
.
max_char_length
inputs
=
dropout
(
tf
.
nn
.
embedding_lookup
(
char_embed
,
char_ids
),
self
.
cfg
.
dropout
,
is_training
)
inputs
=
tf
.
reshape
(
inputs
,
shape
=
[
max_char_length
,
-
1
,
self
.
cfg
.
char_embed_dim
])
char_lengths
=
tf
.
reshape
(
char_lengths
,
shape
=
[
-
1
])
with
tf
.
variable_scope
(
'char_encoding'
,
reuse
=
reuse
):
cell_fw
=
XGRUCell
(
hidden_dim
=
self
.
cfg
.
char_embed_dim
)
cell_bw
=
XGRUCell
(
hidden_dim
=
self
.
cfg
.
char_embed_dim
)
_
,
(
left_right
,
right_left
)
=
tf
.
nn
.
bidirectional_dynamic_rnn
(
cell_fw
=
cell_fw
,
cell_bw
=
cell_bw
,
sequence_length
=
char_lengths
,
inputs
=
inputs
,
time_major
=
True
,
dtype
=
tf
.
float32
)
left_right
=
tf
.
reshape
(
left_right
,
shape
=
[
-
1
,
self
.
cfg
.
char_embed_dim
])
right_left
=
tf
.
reshape
(
right_left
,
shape
=
[
-
1
,
self
.
cfg
.
char_embed_dim
])
states
=
tf
.
concat
([
left_right
,
right_left
],
axis
=
1
)
out_shape
=
tf
.
shape
(
char_ids
)[
1
:
3
]
out_shape
=
tf
.
concat
([
out_shape
,
tf
.
constant
(
value
=
[
self
.
cfg
.
char_embed_dim
*
2
],
dtype
=
tf
.
int32
)],
axis
=
0
)
return
tf
.
reshape
(
states
,
shape
=
out_shape
)
examples/trials/ga_squad/trial.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
os
import
logging
logger
=
logging
.
getLogger
(
'ga_squad'
)
try
:
import
argparse
import
heapq
import
json
import
numpy
as
np
import
pickle
import
graph
from
util
import
Timer
import
nni
import
data
import
evaluate
from
train_model
import
*
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
except
:
logger
.
exception
(
'Catch exception in trial.py.'
)
raise
def
get_config
():
'''
Get config from arument parser.
'''
parser
=
argparse
.
ArgumentParser
(
description
=
'This program is using genetic algorithm to search architecture for SQuAD.'
)
parser
.
add_argument
(
'--input_file'
,
type
=
str
,
default
=
'./dev-v1.1.json'
,
help
=
'input file'
)
parser
.
add_argument
(
'--dev_file'
,
type
=
str
,
default
=
'./dev-v1.1.json'
,
help
=
'dev file'
)
parser
.
add_argument
(
'--embedding_file'
,
type
=
str
,
default
=
'./glove.840B.300d.txt'
,
help
=
'dev file'
)
parser
.
add_argument
(
'--root_path'
,
default
=
'./data/'
,
type
=
str
,
help
=
'Root path of models'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
2
,
help
=
'batch size'
)
parser
.
add_argument
(
'--save_path'
,
type
=
str
,
default
=
'./save'
,
help
=
'save path dir'
)
parser
.
add_argument
(
'--learning_rate'
,
type
=
float
,
default
=
0.0001
,
help
=
'set half of original learning rate reload data and train.'
)
parser
.
add_argument
(
'--max_epoch'
,
type
=
int
,
default
=
30
)
parser
.
add_argument
(
'--dropout_rate'
,
type
=
float
,
default
=
0.1
,
help
=
'dropout_rate'
)
parser
.
add_argument
(
'--labelsmoothing'
,
type
=
float
,
default
=
0.1
,
help
=
'labelsmoothing'
)
parser
.
add_argument
(
'--num_heads'
,
type
=
int
,
default
=
1
,
help
=
'num_heads'
)
parser
.
add_argument
(
'--rnn_units'
,
type
=
int
,
default
=
256
,
help
=
'rnn_units'
)
args
=
parser
.
parse_args
()
return
args
def
get_id
(
word_dict
,
word
):
'''
Return word id.
'''
if
word
in
word_dict
.
keys
():
return
word_dict
[
word
]
return
word_dict
[
'<unk>'
]
def
load_embedding
(
path
):
'''
return embedding for a specif file by given file path.
'''
embedding_dict
=
{}
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
file
:
pairs
=
[
line
.
strip
(
'
\r\n
'
).
split
()
for
line
in
file
.
readlines
()]
for
pair
in
pairs
:
embedding_dict
[
pair
[
0
]]
=
[
float
(
x
)
for
x
in
pair
[
1
:]]
logger
.
debug
(
'embedding_dict size: %d'
,
len
(
embedding_dict
))
return
embedding_dict
class
MaxQueue
:
'''
Queue for max value.
'''
def
__init__
(
self
,
capacity
):
assert
capacity
>
0
,
'queue size must be larger than 0'
self
.
_capacity
=
capacity
self
.
_entries
=
[]
@
property
def
entries
(
self
):
return
self
.
_entries
@
property
def
capacity
(
self
):
return
self
.
_capacity
@
property
def
size
(
self
):
return
len
(
self
.
_entries
)
def
clear
(
self
):
self
.
_entries
=
[]
def
push
(
self
,
item
):
if
self
.
size
<
self
.
capacity
:
heapq
.
heappush
(
self
.
entries
,
item
)
else
:
heapq
.
heappushpop
(
self
.
entries
,
item
)
def
find_best_answer_span
(
left_prob
,
right_prob
,
passage_length
,
max_answer_length
):
left
=
0
right
=
0
max_prob
=
left_prob
[
0
]
*
right_prob
[
0
]
for
i
in
range
(
0
,
passage_length
):
left_p
=
left_prob
[
i
]
for
j
in
range
(
i
,
min
(
i
+
max_answer_length
,
passage_length
)):
total_prob
=
left_p
*
right_prob
[
j
]
if
max_prob
<
total_prob
:
left
,
right
,
max_prob
=
i
,
j
,
total_prob
return
[(
max_prob
,
left
,
right
)]
def
write_prediction
(
path
,
position1_result
,
position2_result
):
import
codecs
with
codecs
.
open
(
path
,
'w'
,
encoding
=
'utf8'
)
as
file
:
batch_num
=
len
(
position1_result
)
for
i
in
range
(
batch_num
):
position1_batch
=
position1_result
[
i
]
position2_batch
=
position2_result
[
i
]
for
j
in
range
(
position1_batch
.
shape
[
0
]):
file
.
write
(
str
(
position1_batch
[
j
])
+
'
\t
'
+
str
(
position2_batch
[
j
])
+
'
\n
'
)
def
find_kbest_answer_span
(
k
,
left_prob
,
right_prob
,
passage_length
,
max_answer_length
):
if
k
==
1
:
return
find_best_answer_span
(
left_prob
,
right_prob
,
passage_length
,
max_answer_length
)
queue
=
MaxQueue
(
k
)
for
i
in
range
(
0
,
passage_length
):
left_p
=
left_prob
[
i
]
for
j
in
range
(
i
,
min
(
i
+
max_answer_length
,
passage_length
)):
total_prob
=
left_p
*
right_prob
[
j
]
queue
.
push
((
total_prob
,
i
,
j
))
return
list
(
sorted
(
queue
.
entries
,
key
=
lambda
x
:
-
x
[
0
]))
def
run_epoch
(
batches
,
answer_net
,
is_training
):
if
not
is_training
:
position1_result
=
[]
position2_result
=
[]
contexts
=
[]
ids
=
[]
loss_sum
=
0
timer
=
Timer
()
count
=
0
for
batch
in
batches
:
used
=
timer
.
get_elapsed
(
False
)
count
+=
1
qps
=
batch
[
'qp_pairs'
]
question_tokens
=
[
qp
[
'question_tokens'
]
for
qp
in
qps
]
passage_tokens
=
[
qp
[
'passage_tokens'
]
for
qp
in
qps
]
context
=
[(
qp
[
'passage'
],
qp
[
'passage_tokens'
])
for
qp
in
qps
]
sample_id
=
[
qp
[
'id'
]
for
qp
in
qps
]
_
,
query
,
query_mask
,
query_lengths
=
data
.
get_word_input
(
data
=
question_tokens
,
word_dict
=
word_vcb
,
embed
=
embed
,
embed_dim
=
cfg
.
word_embed_dim
)
_
,
passage
,
passage_mask
,
passage_lengths
=
data
.
get_word_input
(
data
=
passage_tokens
,
word_dict
=
word_vcb
,
embed
=
embed
,
embed_dim
=
cfg
.
word_embed_dim
)
query_char
,
query_char_lengths
=
data
.
get_char_input
(
data
=
question_tokens
,
char_dict
=
char_vcb
,
max_char_length
=
cfg
.
max_char_length
)
passage_char
,
passage_char_lengths
=
data
.
get_char_input
(
data
=
passage_tokens
,
char_dict
=
char_vcb
,
max_char_length
=
cfg
.
max_char_length
)
if
is_training
:
answer_begin
,
answer_end
=
data
.
get_answer_begin_end
(
qps
)
if
is_training
:
feed_dict
=
{
answer_net
.
query_word
:
query
,
answer_net
.
query_mask
:
query_mask
,
answer_net
.
query_lengths
:
query_lengths
,
answer_net
.
passage_word
:
passage
,
answer_net
.
passage_mask
:
passage_mask
,
answer_net
.
passage_lengths
:
passage_lengths
,
answer_net
.
query_char_ids
:
query_char
,
answer_net
.
query_char_lengths
:
query_char_lengths
,
answer_net
.
passage_char_ids
:
passage_char
,
answer_net
.
passage_char_lengths
:
passage_char_lengths
,
answer_net
.
answer_begin
:
answer_begin
,
answer_net
.
answer_end
:
answer_end
}
loss
,
_
,
=
sess
.
run
(
[
answer_net
.
loss
,
answer_net
.
train_op
],
feed_dict
=
feed_dict
)
if
count
%
100
==
0
:
logger
.
debug
(
'%d %g except:%g, loss:%g'
%
(
count
,
used
,
used
/
count
*
len
(
batches
),
loss
))
loss_sum
+=
loss
else
:
feed_dict
=
{
answer_net
.
query_word
:
query
,
answer_net
.
query_mask
:
query_mask
,
answer_net
.
query_lengths
:
query_lengths
,
answer_net
.
passage_word
:
passage
,
answer_net
.
passage_mask
:
passage_mask
,
answer_net
.
passage_lengths
:
passage_lengths
,
answer_net
.
query_char_ids
:
query_char
,
answer_net
.
query_char_lengths
:
query_char_lengths
,
answer_net
.
passage_char_ids
:
passage_char
,
answer_net
.
passage_char_lengths
:
passage_char_lengths
}
position1
,
position2
=
sess
.
run
(
[
answer_net
.
begin_prob
,
answer_net
.
end_prob
],
feed_dict
=
feed_dict
)
position1_result
+=
position1
.
tolist
()
position2_result
+=
position2
.
tolist
()
contexts
+=
context
ids
=
np
.
concatenate
((
ids
,
sample_id
))
if
count
%
100
==
0
:
logger
.
debug
(
'%d %g except:%g'
%
(
count
,
used
,
used
/
count
*
len
(
batches
)))
if
count
%
100
==
0
:
break
loss
=
loss_sum
/
len
(
batches
)
if
is_training
:
return
loss
return
loss
,
position1_result
,
position2_result
,
ids
,
contexts
def
generate_predict_json
(
position1_result
,
position2_result
,
ids
,
passage_tokens
):
'''
Generate json by prediction.
'''
predict_len
=
len
(
position1_result
)
logger
.
debug
(
'total prediction num is %s'
,
str
(
predict_len
))
answers
=
{}
for
i
in
range
(
predict_len
):
sample_id
=
ids
[
i
]
passage
,
tokens
=
passage_tokens
[
i
]
kbest
=
find_best_answer_span
(
position1_result
[
i
],
position2_result
[
i
],
len
(
tokens
),
23
)
_
,
start
,
end
=
kbest
[
0
]
answer
=
passage
[
tokens
[
start
][
'char_begin'
]:
tokens
[
end
][
'char_end'
]]
answers
[
sample_id
]
=
answer
logger
.
debug
(
'generate predict done.'
)
return
answers
def
generate_data
(
path
,
tokenizer
,
char_vcb
,
word_vcb
,
is_training
=
False
):
'''
Generate data
'''
global
root_path
qp_pairs
=
data
.
load_from_file
(
path
=
path
,
is_training
=
is_training
)
tokenized_sent
=
0
# qp_pairs = qp_pairs[:1000]1
for
qp_pair
in
qp_pairs
:
tokenized_sent
+=
1
data
.
tokenize
(
qp_pair
,
tokenizer
,
is_training
)
for
word
in
qp_pair
[
'question_tokens'
]:
word_vcb
.
add
(
word
[
'word'
])
for
char
in
word
[
'word'
]:
char_vcb
.
add
(
char
)
for
word
in
qp_pair
[
'passage_tokens'
]:
word_vcb
.
add
(
word
[
'word'
])
for
char
in
word
[
'word'
]:
char_vcb
.
add
(
char
)
max_query_length
=
max
(
len
(
x
[
'question_tokens'
])
for
x
in
qp_pairs
)
max_passage_length
=
max
(
len
(
x
[
'passage_tokens'
])
for
x
in
qp_pairs
)
#min_passage_length = min(len(x['passage_tokens']) for x in qp_pairs)
cfg
.
max_query_length
=
max_query_length
cfg
.
max_passage_length
=
max_passage_length
return
qp_pairs
def
train_with_graph
(
graph
,
qp_pairs
,
dev_qp_pairs
):
'''
Train a network from a specific graph.
'''
global
sess
with
tf
.
Graph
().
as_default
():
train_model
=
GAG
(
cfg
,
embed
,
graph
)
train_model
.
build_net
(
is_training
=
True
)
tf
.
get_variable_scope
().
reuse_variables
()
dev_model
=
GAG
(
cfg
,
embed
,
graph
)
dev_model
.
build_net
(
is_training
=
False
)
with
tf
.
Session
()
as
sess
:
logger
.
debug
(
'init variables'
)
init
=
tf
.
global_variables_initializer
()
sess
.
run
(
init
)
# writer = tf.summary.FileWriter('%s/graph/'%execution_path, sess.graph)
logger
.
debug
(
'assign to graph'
)
saver
=
tf
.
train
.
Saver
()
train_loss
=
None
bestacc
=
0
patience
=
5
patience_increase
=
2
improvement_threshold
=
0.995
for
epoch
in
range
(
max_epoch
):
logger
.
debug
(
'begin to train'
)
train_batches
=
data
.
get_batches
(
qp_pairs
,
cfg
.
batch_size
)
train_loss
=
run_epoch
(
train_batches
,
train_model
,
True
)
logger
.
debug
(
'epoch '
+
str
(
epoch
)
+
' loss: '
+
str
(
train_loss
))
dev_batches
=
list
(
data
.
get_batches
(
dev_qp_pairs
,
cfg
.
batch_size
))
_
,
position1
,
position2
,
ids
,
contexts
=
run_epoch
(
dev_batches
,
dev_model
,
False
)
answers
=
generate_predict_json
(
position1
,
position2
,
ids
,
contexts
)
if
save_path
is
not
None
:
with
open
(
save_path
+
'epoch%d.prediction'
%
epoch
,
'w'
)
as
file
:
json
.
dump
(
answers
,
file
)
else
:
answers
=
json
.
dumps
(
answers
)
answers
=
json
.
loads
(
answers
)
iter
=
epoch
+
1
acc
=
evaluate
.
evaluate_with_predictions
(
args
.
dev_file
,
answers
)
logger
.
debug
(
'Send intermediate acc: %s'
,
str
(
acc
))
nni
.
report_intermediate_result
(
acc
)
logger
.
debug
(
'Send intermediate result done.'
)
if
acc
>
bestacc
:
if
acc
*
improvement_threshold
>
bestacc
:
patience
=
max
(
patience
,
iter
*
patience_increase
)
bestacc
=
acc
if
save_path
is
not
None
:
saver
.
save
(
sess
,
save_path
+
'epoch%d.model'
%
epoch
)
with
open
(
save_path
+
'epoch%d.score'
%
epoch
,
'wb'
)
as
file
:
pickle
.
dump
(
(
position1
,
position2
,
ids
,
contexts
),
file
)
logger
.
debug
(
'epoch %d acc %g bestacc %g'
%
(
epoch
,
acc
,
bestacc
))
if
patience
<=
iter
:
break
logger
.
debug
(
'save done.'
)
return
train_loss
,
bestacc
embed
=
None
char_vcb
=
None
tokenizer
=
None
word_vcb
=
None
def
load_data
():
global
embed
,
char_vcb
,
tokenizer
,
word_vcb
logger
.
debug
(
'tokenize data'
)
tokenizer
=
data
.
WhitespaceTokenizer
()
char_set
=
set
()
word_set
=
set
()
logger
.
debug
(
'generate train data'
)
qp_pairs
=
generate_data
(
input_file
,
tokenizer
,
char_set
,
word_set
,
is_training
=
True
)
logger
.
debug
(
'generate dev data'
)
dev_qp_pairs
=
generate_data
(
dev_file
,
tokenizer
,
char_set
,
word_set
,
is_training
=
False
)
logger
.
debug
(
'generate data done.'
)
char_vcb
=
{
char
:
sample_id
for
sample_id
,
char
in
enumerate
(
char_set
)}
word_vcb
=
{
word
:
sample_id
for
sample_id
,
word
in
enumerate
(
word_set
)}
timer
.
start
()
logger
.
debug
(
'read embedding table'
)
cfg
.
word_embed_dim
=
300
embed
=
np
.
zeros
((
len
(
word_vcb
),
cfg
.
word_embed_dim
),
dtype
=
np
.
float32
)
embedding
=
load_embedding
(
args
.
embedding_file
)
for
word
,
sample_id
in
enumerate
(
word_vcb
):
if
word
in
embedding
:
embed
[
sample_id
]
=
embedding
[
word
]
# add UNK into dict
unk
=
np
.
zeros
((
1
,
cfg
.
word_embed_dim
),
dtype
=
np
.
float32
)
embed
=
np
.
concatenate
((
unk
,
embed
),
axis
=
0
)
word_vcb
=
{
key
:
value
+
1
for
key
,
value
in
word_vcb
.
items
()}
return
qp_pairs
,
dev_qp_pairs
if
__name__
==
'__main__'
:
try
:
args
=
get_config
()
root_path
=
os
.
path
.
expanduser
(
args
.
root_path
)
input_file
=
os
.
path
.
expanduser
(
args
.
input_file
)
dev_file
=
os
.
path
.
expanduser
(
args
.
dev_file
)
save_path
=
None
max_epoch
=
args
.
max_epoch
cfg
=
GAGConfig
()
cfg
.
batch_size
=
args
.
batch_size
cfg
.
learning_rate
=
float
(
args
.
learning_rate
)
cfg
.
dropout
=
args
.
dropout_rate
cfg
.
rnn_units
=
args
.
rnn_units
cfg
.
labelsmoothing
=
args
.
labelsmoothing
cfg
.
num_heads
=
args
.
num_heads
timer
=
Timer
()
qp_pairs
,
dev_qp_pairs
=
load_data
()
logger
.
debug
(
'Init finish.'
)
original_params
=
nni
.
get_parameters
()
'''
with open('data.json') as f:
original_params = json.load(f)
'''
try
:
graph
=
graph
.
graph_loads
(
original_params
)
except
Exception
:
logger
.
debug
(
'Can
\'
t load graph.'
)
train_loss
,
best_acc
=
train_with_graph
(
graph
,
qp_pairs
,
dev_qp_pairs
)
logger
.
debug
(
'Send best acc: %s'
,
str
(
best_acc
))
nni
.
report_final_result
(
best_acc
)
logger
.
debug
(
'Send final result done'
)
except
:
logger
.
exception
(
'Catch exception in trial.py.'
)
raise
examples/trials/ga_squad/util.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''
Util Module
'''
import
time
import
tensorflow
as
tf
def
shape
(
tensor
):
'''
Get shape of variable.
Return type is tuple.
'''
temp_s
=
tensor
.
get_shape
()
return
tuple
([
temp_s
[
i
].
value
for
i
in
range
(
0
,
len
(
temp_s
))])
def
get_variable
(
name
,
temp_s
):
'''
Get variable by name.
'''
return
tf
.
Variable
(
tf
.
zeros
(
temp_s
),
name
=
name
)
def
dropout
(
tensor
,
drop_prob
,
is_training
):
'''
Dropout except test.
'''
if
not
is_training
:
return
tensor
return
tf
.
nn
.
dropout
(
tensor
,
1.0
-
drop_prob
)
class
Timer
:
'''
Class Timer is for calculate time.
'''
def
__init__
(
self
):
self
.
__start
=
time
.
time
()
def
start
(
self
):
'''
Start to calculate time.
'''
self
.
__start
=
time
.
time
()
def
get_elapsed
(
self
,
restart
=
True
):
'''
Calculate time span.
'''
end
=
time
.
time
()
span
=
end
-
self
.
__start
if
restart
:
self
.
__start
=
end
return
span
examples/trials/mnist-annotation/config.yml
0 → 100644
View file @
252f36f8
authorName
:
default
experimentName
:
example_mnist
trialConcurrency
:
1
maxExecDuration
:
1h
maxTrialNum
:
1
#choice: local, remote
trainingServicePlatform
:
local
#choice: true, false
useAnnotation
:
true
tuner
:
#choice: TPE, Random, Anneal, Evolution
tunerName
:
TPE
#choice: Maximize, Minimize
optimizationMode
:
Maximize
trial
:
trialCommand
:
python3 mnist.py
trialCodeDir
:
/usr/share/nni/examples/trials/mnist-annotation
trialGpuNum
:
0
\ No newline at end of file
examples/trials/mnist-annotation/mnist.py
0 → 100644
View file @
252f36f8
"""A deep MNIST classifier using convolutional layers."""
import
logging
import
math
import
tempfile
import
tensorflow
as
tf
from
tensorflow.examples.tutorials.mnist
import
input_data
FLAGS
=
None
logger
=
logging
.
getLogger
(
'mnist_AutoML'
)
class
MnistNetwork
(
object
):
'''
MnistNetwork is for initlizing and building basic network for mnist.
'''
def
__init__
(
self
,
channel_1_num
,
channel_2_num
,
conv_size
,
hidden_size
,
pool_size
,
learning_rate
,
x_dim
=
784
,
y_dim
=
10
):
self
.
channel_1_num
=
channel_1_num
self
.
channel_2_num
=
channel_2_num
"""@nni.variable(nni.choice(2, 3, 5, 7),name=self.conv_size)"""
self
.
conv_size
=
conv_size
"""@nni.variable(nni.choice(124, 512, 1024), name=self.hidden_size)"""
self
.
hidden_size
=
hidden_size
self
.
pool_size
=
pool_size
"""@nni.variable(nni.uniform(0.0001, 0.1), name=self.learning_rate)"""
self
.
learning_rate
=
learning_rate
self
.
x_dim
=
x_dim
self
.
y_dim
=
y_dim
self
.
images
=
tf
.
placeholder
(
tf
.
float32
,
[
None
,
self
.
x_dim
],
name
=
'input_x'
)
self
.
labels
=
tf
.
placeholder
(
tf
.
float32
,
[
None
,
self
.
y_dim
],
name
=
'input_y'
)
self
.
keep_prob
=
tf
.
placeholder
(
tf
.
float32
,
name
=
'keep_prob'
)
self
.
train_step
=
None
self
.
accuracy
=
None
def
build_network
(
self
):
'''
Building network for mnist
'''
# Reshape to use within a convolutional neural net.
# Last dimension is for "features" - there is only one here, since images are
# grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
with
tf
.
name_scope
(
'reshape'
):
try
:
input_dim
=
int
(
math
.
sqrt
(
self
.
x_dim
))
except
:
print
(
'input dim cannot be sqrt and reshape. input dim: '
+
str
(
self
.
x_dim
))
logger
.
debug
(
'input dim cannot be sqrt and reshape. input dim: %s'
,
str
(
self
.
x_dim
))
raise
x_image
=
tf
.
reshape
(
self
.
images
,
[
-
1
,
input_dim
,
input_dim
,
1
])
# First convolutional layer - maps one grayscale image to 32 feature maps.
with
tf
.
name_scope
(
'conv1'
):
w_conv1
=
weight_variable
(
[
self
.
conv_size
,
self
.
conv_size
,
1
,
self
.
channel_1_num
])
b_conv1
=
bias_variable
([
self
.
channel_1_num
])
"""@nni.function_choice(tf.nn.relu(conv2d(x_image, w_conv1) + b_conv1), tf.nn.sigmoid(conv2d(x_image, w_conv1) + b_conv1), tf.nn.tanh(conv2d(x_image, w_conv1) + b_conv1), name=tf.nn.relu)"""
h_conv1
=
tf
.
nn
.
relu
(
conv2d
(
x_image
,
w_conv1
)
+
b_conv1
)
# Pooling layer - downsamples by 2X.
with
tf
.
name_scope
(
'pool1'
):
"""@nni.function_choice(max_pool(h_conv1, self.pool_size), avg_pool(h_conv1, self.pool_size), name=max_pool)"""
h_pool1
=
max_pool
(
h_conv1
,
self
.
pool_size
)
# Second convolutional layer -- maps 32 feature maps to 64.
with
tf
.
name_scope
(
'conv2'
):
w_conv2
=
weight_variable
([
self
.
conv_size
,
self
.
conv_size
,
self
.
channel_1_num
,
self
.
channel_2_num
])
b_conv2
=
bias_variable
([
self
.
channel_2_num
])
h_conv2
=
tf
.
nn
.
relu
(
conv2d
(
h_pool1
,
w_conv2
)
+
b_conv2
)
# Second pooling layer.
with
tf
.
name_scope
(
'pool2'
):
h_pool2
=
max_pool
(
h_conv2
,
self
.
pool_size
)
# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
last_dim
=
int
(
input_dim
/
(
self
.
pool_size
*
self
.
pool_size
))
with
tf
.
name_scope
(
'fc1'
):
w_fc1
=
weight_variable
(
[
last_dim
*
last_dim
*
self
.
channel_2_num
,
self
.
hidden_size
])
b_fc1
=
bias_variable
([
self
.
hidden_size
])
h_pool2_flat
=
tf
.
reshape
(
h_pool2
,
[
-
1
,
last_dim
*
last_dim
*
self
.
channel_2_num
])
h_fc1
=
tf
.
nn
.
relu
(
tf
.
matmul
(
h_pool2_flat
,
w_fc1
)
+
b_fc1
)
# Dropout - controls the complexity of the model, prevents co-adaptation of features.
with
tf
.
name_scope
(
'dropout'
):
h_fc1_drop
=
tf
.
nn
.
dropout
(
h_fc1
,
self
.
keep_prob
)
# Map the 1024 features to 10 classes, one for each digit
with
tf
.
name_scope
(
'fc2'
):
w_fc2
=
weight_variable
([
self
.
hidden_size
,
self
.
y_dim
])
b_fc2
=
bias_variable
([
self
.
y_dim
])
y_conv
=
tf
.
matmul
(
h_fc1_drop
,
w_fc2
)
+
b_fc2
with
tf
.
name_scope
(
'loss'
):
cross_entropy
=
tf
.
reduce_mean
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
self
.
labels
,
logits
=
y_conv
))
with
tf
.
name_scope
(
'adam_optimizer'
):
self
.
train_step
=
tf
.
train
.
AdamOptimizer
(
self
.
learning_rate
).
minimize
(
cross_entropy
)
with
tf
.
name_scope
(
'accuracy'
):
correct_prediction
=
tf
.
equal
(
tf
.
argmax
(
y_conv
,
1
),
tf
.
argmax
(
self
.
labels
,
1
))
self
.
accuracy
=
tf
.
reduce_mean
(
tf
.
cast
(
correct_prediction
,
tf
.
float32
))
def
conv2d
(
x_input
,
w_matrix
):
"""conv2d returns a 2d convolution layer with full stride."""
return
tf
.
nn
.
conv2d
(
x_input
,
w_matrix
,
strides
=
[
1
,
1
,
1
,
1
],
padding
=
'SAME'
)
def
max_pool
(
x_input
,
pool_size
):
"""max_pool downsamples a feature map by 2X."""
return
tf
.
nn
.
max_pool
(
x_input
,
ksize
=
[
1
,
pool_size
,
pool_size
,
1
],
strides
=
[
1
,
pool_size
,
pool_size
,
1
],
padding
=
'SAME'
)
def
avg_pool
(
x_input
,
pool_size
):
return
tf
.
nn
.
avg_pool
(
x_input
,
ksize
=
[
1
,
pool_size
,
pool_size
,
1
],
strides
=
[
1
,
pool_size
,
pool_size
,
1
],
padding
=
'SAME'
)
def
weight_variable
(
shape
):
"""weight_variable generates a weight variable of a given shape."""
initial
=
tf
.
truncated_normal
(
shape
,
stddev
=
0.1
)
return
tf
.
Variable
(
initial
)
def
bias_variable
(
shape
):
"""bias_variable generates a bias variable of a given shape."""
initial
=
tf
.
constant
(
0.1
,
shape
=
shape
)
return
tf
.
Variable
(
initial
)
def
main
(
params
):
'''
Main function, build mnist network, run and send result to NNI.
'''
# Import data
mnist
=
input_data
.
read_data_sets
(
params
[
'data_dir'
],
one_hot
=
True
)
print
(
'Mnist download data down.'
)
logger
.
debug
(
'Mnist download data down.'
)
# Create the model
# Build the graph for the deep net
mnist_network
=
MnistNetwork
(
channel_1_num
=
params
[
'channel_1_num'
],
channel_2_num
=
params
[
'channel_2_num'
],
conv_size
=
params
[
'conv_size'
],
hidden_size
=
params
[
'hidden_size'
],
pool_size
=
params
[
'pool_size'
],
learning_rate
=
params
[
'learning_rate'
])
mnist_network
.
build_network
()
logger
.
debug
(
'Mnist build network done.'
)
# Write log
graph_location
=
tempfile
.
mkdtemp
()
logger
.
debug
(
'Saving graph to: %s'
,
graph_location
)
train_writer
=
tf
.
summary
.
FileWriter
(
graph_location
)
train_writer
.
add_graph
(
tf
.
get_default_graph
())
test_acc
=
0.0
with
tf
.
Session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
"""@nni.variable(nni.choice(50, 250, 500), name=batch_num)"""
batch_num
=
params
[
'batch_num'
]
for
i
in
range
(
batch_num
):
batch
=
mnist
.
train
.
next_batch
(
batch_num
)
"""@nni.variable(nni.choice(1, 5), name=dropout_rate)"""
dropout_rate
=
params
[
'dropout_rate'
]
mnist_network
.
train_step
.
run
(
feed_dict
=
{
mnist_network
.
images
:
batch
[
0
],
mnist_network
.
labels
:
batch
[
1
],
mnist_network
.
keep_prob
:
dropout_rate
}
)
if
i
%
100
==
0
:
test_acc
=
mnist_network
.
accuracy
.
eval
(
feed_dict
=
{
mnist_network
.
images
:
mnist
.
test
.
images
,
mnist_network
.
labels
:
mnist
.
test
.
labels
,
mnist_network
.
keep_prob
:
1.0
})
"""@nni.report_intermediate_result(test_acc)"""
logger
.
debug
(
'test accuracy %g'
,
test_acc
)
logger
.
debug
(
'Pipe send intermediate result done.'
)
test_acc
=
mnist_network
.
accuracy
.
eval
(
feed_dict
=
{
mnist_network
.
images
:
mnist
.
test
.
images
,
mnist_network
.
labels
:
mnist
.
test
.
labels
,
mnist_network
.
keep_prob
:
1.0
})
"""@nni.report_final_result(test_acc)"""
logger
.
debug
(
'Final result is %g'
,
test_acc
)
logger
.
debug
(
'Send final result done.'
)
def
generate_defualt_params
():
'''
Generate default parameters for mnist network.
'''
params
=
{
'data_dir'
:
'/tmp/tensorflow/mnist/input_data'
,
'dropout_rate'
:
0.5
,
'channel_1_num'
:
32
,
'channel_2_num'
:
64
,
'conv_size'
:
5
,
'pool_size'
:
2
,
'hidden_size'
:
1024
,
'learning_rate'
:
1e-4
,
'batch_num'
:
200
}
return
params
if
__name__
==
'__main__'
:
try
:
main
(
generate_defualt_params
())
except
Exception
as
exception
:
logger
.
exception
(
exception
)
raise
examples/trials/mnist-keras/config.yml
0 → 100644
View file @
252f36f8
authorName
:
default
experimentName
:
example_mnist-keras
trialConcurrency
:
1
maxExecDuration
:
1h
maxTrialNum
:
1
#choice: local, remote
trainingServicePlatform
:
local
searchSpacePath
:
/usr/share/nni/examples/trials/mnist-keras/search_space.json
#choice: true, false
useAnnotation
:
false
tuner
:
#choice: TPE, Random, Anneal, Evolution
tunerName
:
TPE
#choice: Maximize, Minimize
optimizationMode
:
Maximize
trial
:
trialCommand
:
python3 mnist-keras.py
trialCodeDir
:
/usr/share/nni/examples/trials/mnist-keras
trialGpuNum
:
0
\ No newline at end of file
examples/trials/mnist-keras/mnist-keras.py
0 → 100644
View file @
252f36f8
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
argparse
import
logging
import
os
import
keras
import
numpy
as
np
from
keras
import
backend
as
K
from
keras.callbacks
import
TensorBoard
from
keras.datasets
import
mnist
from
keras.layers
import
Conv2D
,
Dense
,
Flatten
,
MaxPooling2D
from
keras.models
import
Sequential
import
nni
LOG
=
logging
.
getLogger
(
'mnist_keras'
)
K
.
set_image_data_format
(
'channels_last'
)
TENSORBOARD_DIR
=
os
.
environ
[
'NNI_OUTPUT_DIR'
]
H
,
W
=
28
,
28
NUM_CLASSES
=
10
def
create_mnist_model
(
hyper_params
,
input_shape
=
(
H
,
W
,
1
),
num_classes
=
NUM_CLASSES
):
'''
Create simple convolutional model
'''
layers
=
[
Conv2D
(
32
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
input_shape
=
input_shape
),
Conv2D
(
64
,
(
3
,
3
),
activation
=
'relu'
),
MaxPooling2D
(
pool_size
=
(
2
,
2
)),
Flatten
(),
Dense
(
100
,
activation
=
'relu'
),
Dense
(
num_classes
,
activation
=
'softmax'
)
]
model
=
Sequential
(
layers
)
if
hyper_params
[
'optimizer'
]
==
'Adam'
:
optimizer
=
keras
.
optimizers
.
Adam
(
lr
=
hyper_params
[
'learning_rate'
])
else
:
optimizer
=
keras
.
optimizers
.
SGD
(
lr
=
hyper_params
[
'learning_rate'
],
momentum
=
0.9
)
model
.
compile
(
loss
=
keras
.
losses
.
categorical_crossentropy
,
optimizer
=
optimizer
,
metrics
=
[
'accuracy'
])
return
model
def
load_mnist_data
(
args
):
'''
Load MNIST dataset
'''
(
x_train
,
y_train
),
(
x_test
,
y_test
)
=
mnist
.
load_data
()
x_train
=
(
np
.
expand_dims
(
x_train
,
-
1
).
astype
(
np
.
float
)
/
255.
)[:
args
.
num_train
]
x_test
=
(
np
.
expand_dims
(
x_test
,
-
1
).
astype
(
np
.
float
)
/
255.
)[:
args
.
num_test
]
y_train
=
keras
.
utils
.
to_categorical
(
y_train
,
NUM_CLASSES
)[:
args
.
num_train
]
y_test
=
keras
.
utils
.
to_categorical
(
y_test
,
NUM_CLASSES
)[:
args
.
num_test
]
LOG
.
debug
(
'x_train shape: %s'
,
(
x_train
.
shape
,))
LOG
.
debug
(
'x_test shape: %s'
,
(
x_test
.
shape
,))
return
x_train
,
y_train
,
x_test
,
y_test
class
SendMetrics
(
keras
.
callbacks
.
Callback
):
'''
Keras callback to send metrics to NNI framework
'''
def
on_epoch_end
(
self
,
epoch
,
logs
=
{}):
'''
Run on end of each epoch
'''
LOG
.
debug
(
logs
)
nni
.
report_intermediate_result
(
logs
)
def
train
(
args
,
params
):
'''
Train model
'''
x_train
,
y_train
,
x_test
,
y_test
=
load_mnist_data
(
args
)
model
=
create_mnist_model
(
params
)
model
.
fit
(
x_train
,
y_train
,
batch_size
=
args
.
batch_size
,
epochs
=
args
.
epochs
,
verbose
=
1
,
validation_data
=
(
x_test
,
y_test
),
callbacks
=
[
SendMetrics
(),
TensorBoard
(
log_dir
=
TENSORBOARD_DIR
)])
_
,
acc
=
model
.
evaluate
(
x_test
,
y_test
,
verbose
=
0
)
LOG
.
debug
(
'Final result is: %d'
,
acc
)
nni
.
report_final_result
(
acc
)
def
generate_default_params
():
'''
Generate default hyper parameters
'''
return
{
'optimizer'
:
'Adam'
,
'learning_rate'
:
0.001
}
if
__name__
==
'__main__'
:
PARSER
=
argparse
.
ArgumentParser
()
PARSER
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
200
,
help
=
"batch size"
,
required
=
False
)
PARSER
.
add_argument
(
"--epochs"
,
type
=
int
,
default
=
10
,
help
=
"Train epochs"
,
required
=
False
)
PARSER
.
add_argument
(
"--num_train"
,
type
=
int
,
default
=
60000
,
help
=
"Number of train samples to be used, maximum 60000"
,
required
=
False
)
PARSER
.
add_argument
(
"--num_test"
,
type
=
int
,
default
=
10000
,
help
=
"Number of test samples to be used, maximum 10000"
,
required
=
False
)
ARGS
,
UNKNOWN
=
PARSER
.
parse_known_args
()
try
:
# get parameters from tuner
RECEIVED_PARAMS
=
nni
.
get_parameters
()
LOG
.
debug
(
RECEIVED_PARAMS
)
PARAMS
=
generate_default_params
()
PARAMS
.
update
(
RECEIVED_PARAMS
)
# train
train
(
ARGS
,
PARAMS
)
except
Exception
as
e
:
LOG
.
exception
(
e
)
raise
examples/trials/mnist-keras/search_space.json
0 → 100644
View file @
252f36f8
{
"optimizer"
:{
"_type"
:
"choice"
,
"_value"
:[
"Adam"
,
"SGD"
]},
"learning_rate"
:{
"_type"
:
"choice"
,
"_value"
:[
0.0001
,
0.001
,
0.002
,
0.005
,
0.01
]}
}
examples/trials/mnist-smartparam/config.yml
0 → 100644
View file @
252f36f8
authorName
:
default
experimentName
:
example_mnist-smartparam
trialConcurrency
:
1
maxExecDuration
:
1h
maxTrialNum
:
1
#choice: local, remote
trainingServicePlatform
:
local
#choice: true, false
useAnnotation
:
true
tuner
:
#choice: TPE, Random, Anneal, Evolution
tunerName
:
TPE
#choice: Maximize, Minimize
optimizationMode
:
Maximize
trial
:
trialCommand
:
python3 mnist.py
trialCodeDir
:
/usr/share/nni/examples/trials/mnist-smartparam
trialGpuNum
:
0
\ No newline at end of file
examples/trials/mnist-smartparam/mnist.py
0 → 100644
View file @
252f36f8
This diff is collapsed.
Click to expand it.
examples/trials/mnist/config.yml
0 → 100644
View file @
252f36f8
authorName
:
default
experimentName
:
example_mnist
trialConcurrency
:
1
maxExecDuration
:
1h
maxTrialNum
:
1
#choice: local, remote
trainingServicePlatform
:
local
searchSpacePath
:
/usr/share/nni/examples/trials/mnist/search_space.json
#choice: true, false
useAnnotation
:
false
tuner
:
#choice: TPE, Random, Anneal, Evolution
tunerName
:
TPE
#choice: Maximize, Minimize
optimizationMode
:
Maximize
trial
:
trialCommand
:
python3 mnist.py
trialCodeDir
:
/usr/share/nni/examples/trials/mnist
trialGpuNum
:
0
\ No newline at end of file
examples/trials/mnist/mnist.py
0 → 100644
View file @
252f36f8
This diff is collapsed.
Click to expand it.
examples/trials/mnist/search_space.json
0 → 100644
View file @
252f36f8
{
"dropout_rate"
:{
"_type"
:
"uniform"
,
"_value"
:[
0.1
,
0.5
]},
"conv_size"
:{
"_type"
:
"choice"
,
"_value"
:[
2
,
3
,
5
,
7
]},
"hidden_size"
:{
"_type"
:
"choice"
,
"_value"
:[
124
,
512
,
1024
]},
"learning_rate"
:{
"_type"
:
"uniform"
,
"_value"
:[
0.0001
,
0.1
]}
}
\ No newline at end of file
examples/tuners/README.md
0 → 100644
View file @
252f36f8
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment