Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
eb77376e
Commit
eb77376e
authored
Sep 27, 2020
by
huchen
Browse files
Merge branch 'xuan_dev' into 'develop'
DTK-203 See merge request dcutoolkit/deeplearing/NNI!4
parents
602e1842
ba0de4b1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
209 additions
and
1 deletion
+209
-1
examples/trials/network_morphism/FashionMNIST/FashionMNIST_keras_tf_v2.py
...network_morphism/FashionMNIST/FashionMNIST_keras_tf_v2.py
+208
-0
src/nni_manager/training_service/common/gpuData.ts
src/nni_manager/training_service/common/gpuData.ts
+1
-1
No files found.
examples/trials/network_morphism/FashionMNIST/FashionMNIST_keras_tf_v2.py
0 → 100644
View file @
eb77376e
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
argparse
import
logging
import
os
import
tensorflow
as
tf
import
keras
from
keras.callbacks
import
EarlyStopping
,
TensorBoard
from
keras.datasets
import
fashion_mnist
from
keras.optimizers
import
SGD
,
Adadelta
,
Adagrad
,
Adam
,
Adamax
,
RMSprop
from
keras.utils
import
multi_gpu_model
,
to_categorical
#import keras.backend.tensorflow_backend as KTF
#import tf.compat.v1.keras.backend as KTF
import
nni
from
nni.networkmorphism_tuner.graph
import
json_to_graph
# set the logger format
log_format
=
"%(asctime)s %(message)s"
logging
.
basicConfig
(
filename
=
"networkmorphism.log"
,
filemode
=
"a"
,
level
=
logging
.
INFO
,
format
=
log_format
,
datefmt
=
"%m/%d %I:%M:%S %p"
,
)
# set the logger format
logger
=
logging
.
getLogger
(
"FashionMNIST-network-morphism-keras"
)
# restrict gpu usage background
#config = tf.ConfigProto()
config
=
tf
.
compat
.
v1
.
ConfigProto
()
# pylint: disable=E1101,W0603
config
.
gpu_options
.
allow_growth
=
True
#sess = tf.Session(config=config)
sess
=
tf
.
compat
.
v1
.
Session
(
config
=
config
)
tf
.
compat
.
v1
.
keras
.
backend
.
set_session
(
sess
)
def
get_args
():
""" get args from command line
"""
parser
=
argparse
.
ArgumentParser
(
"fashion_mnist"
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
128
,
help
=
"batch size"
)
parser
.
add_argument
(
"--optimizer"
,
type
=
str
,
default
=
"SGD"
,
help
=
"optimizer"
)
parser
.
add_argument
(
"--epochs"
,
type
=
int
,
default
=
200
,
help
=
"epoch limit"
)
parser
.
add_argument
(
"--learning_rate"
,
type
=
float
,
default
=
0.001
,
help
=
"learning rate"
)
parser
.
add_argument
(
"--weight_decay"
,
type
=
float
,
default
=
1e-5
,
help
=
"weight decay of the learning rate"
,
)
return
parser
.
parse_args
()
trainloader
=
None
testloader
=
None
net
=
None
args
=
get_args
()
TENSORBOARD_DIR
=
os
.
environ
[
"NNI_OUTPUT_DIR"
]
def
build_graph_from_json
(
ir_model_json
):
"""build model from json representation
"""
graph
=
json_to_graph
(
ir_model_json
)
logging
.
debug
(
graph
.
operation_history
)
model
=
graph
.
produce_keras_model
()
return
model
def
parse_rev_args
(
receive_msg
):
""" parse reveive msgs to global variable
"""
global
trainloader
global
testloader
global
net
# Loading Data
logger
.
debug
(
"Preparing data.."
)
(
x_train
,
y_train
),
(
x_test
,
y_test
)
=
fashion_mnist
.
load_data
()
y_train
=
to_categorical
(
y_train
,
10
)
y_test
=
to_categorical
(
y_test
,
10
)
x_train
=
x_train
.
reshape
(
x_train
.
shape
+
(
1
,)).
astype
(
"float32"
)
x_test
=
x_test
.
reshape
(
x_test
.
shape
+
(
1
,)).
astype
(
"float32"
)
x_train
/=
255.0
x_test
/=
255.0
trainloader
=
(
x_train
,
y_train
)
testloader
=
(
x_test
,
y_test
)
# Model
logger
.
debug
(
"Building model.."
)
net
=
build_graph_from_json
(
receive_msg
)
# parallel model
try
:
available_devices
=
os
.
environ
[
"HIP_VISIBLE_DEVICES"
]
gpus
=
len
(
available_devices
.
split
(
","
))
if
gpus
>
1
:
net
=
multi_gpu_model
(
net
,
gpus
)
except
KeyError
:
logger
.
debug
(
"parallel model not support in this config settings"
)
if
args
.
optimizer
==
"SGD"
:
optimizer
=
SGD
(
lr
=
args
.
learning_rate
,
momentum
=
0.9
,
decay
=
args
.
weight_decay
)
if
args
.
optimizer
==
"Adadelta"
:
optimizer
=
Adadelta
(
lr
=
args
.
learning_rate
,
decay
=
args
.
weight_decay
)
if
args
.
optimizer
==
"Adagrad"
:
optimizer
=
Adagrad
(
lr
=
args
.
learning_rate
,
decay
=
args
.
weight_decay
)
if
args
.
optimizer
==
"Adam"
:
optimizer
=
Adam
(
lr
=
args
.
learning_rate
,
decay
=
args
.
weight_decay
)
if
args
.
optimizer
==
"Adamax"
:
optimizer
=
Adamax
(
lr
=
args
.
learning_rate
,
decay
=
args
.
weight_decay
)
if
args
.
optimizer
==
"RMSprop"
:
optimizer
=
RMSprop
(
lr
=
args
.
learning_rate
,
decay
=
args
.
weight_decay
)
# Compile the model
net
.
compile
(
loss
=
"categorical_crossentropy"
,
optimizer
=
optimizer
,
metrics
=
[
"accuracy"
]
)
return
0
class
SendMetrics
(
keras
.
callbacks
.
Callback
):
"""
Keras callback to send metrics to NNI framework
"""
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
"""
Run on end of each epoch
"""
if
logs
is
None
:
logs
=
dict
()
logger
.
debug
(
logs
)
# TensorFlow 2.0 API reference claims the key is `val_acc`, but in fact it's `val_accuracy`
if
'val_acc'
in
logs
:
nni
.
report_intermediate_result
(
logs
[
'val_acc'
])
else
:
nni
.
report_intermediate_result
(
logs
[
'val_accuracy'
])
# Training
def
train_eval
():
""" train and eval the model
"""
global
trainloader
global
testloader
global
net
(
x_train
,
y_train
)
=
trainloader
(
x_test
,
y_test
)
=
testloader
# train procedure
net
.
fit
(
x
=
x_train
,
y
=
y_train
,
batch_size
=
args
.
batch_size
,
validation_data
=
(
x_test
,
y_test
),
epochs
=
args
.
epochs
,
shuffle
=
True
,
callbacks
=
[
SendMetrics
(),
EarlyStopping
(
min_delta
=
0.001
,
patience
=
10
),
TensorBoard
(
log_dir
=
TENSORBOARD_DIR
),
],
)
# trial report final acc to tuner
_
,
acc
=
net
.
evaluate
(
x_test
,
y_test
)
logger
.
debug
(
"Final result is: %.3f"
,
acc
)
nni
.
report_final_result
(
acc
)
if
__name__
==
"__main__"
:
try
:
# trial get next parameter from network morphism tuner
RCV_CONFIG
=
nni
.
get_next_parameter
()
logger
.
debug
(
RCV_CONFIG
)
print
(
RCV_CONFIG
)
parse_rev_args
(
RCV_CONFIG
)
train_eval
()
except
Exception
as
exception
:
logger
.
exception
(
exception
)
raise
src/nni_manager/training_service/common/gpuData.ts
View file @
eb77376e
...
@@ -79,6 +79,6 @@ export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined {
...
@@ -79,6 +79,6 @@ export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined {
export
const
GPU_INFO_COLLECTOR_FORMAT_WINDOWS
:
string
=
export
const
GPU_INFO_COLLECTOR_FORMAT_WINDOWS
:
string
=
`
`
$env:METRIC_OUTPUT_DIR="{0}"
$env:METRIC_OUTPUT_DIR="{0}"
$app = Start-Process "python" -ArgumentList "-m nni_gpu_tool.gpu_metrics_collector" -passthru -NoNewWindow
$app = Start-Process "python
3
" -ArgumentList "-m nni_gpu_tool.gpu_metrics_collector" -passthru -NoNewWindow
Write $app.ID | Out-File {1} -NoNewline -encoding utf8
Write $app.ID | Out-File {1} -NoNewline -encoding utf8
`
;
`
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment