"backend/apps/vscode:/vscode.git/clone" did not exist on "4c490132ba77e78433d3b7b2474b69b07bf60eb8"
Commit ee3997b3 authored by qianyj's avatar qianyj
Browse files

new tf branch for dtk21.10.1

parent 2795dc1f
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DeepSpeech2 model configuration.
References:
https://arxiv.org/abs/1512.02595
Deep Speech 2: End-to-End Speech Recognition in English and Mandarin
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import itertools
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
import constants
from cnn_util import log_fn
from models import model as model_lib
from tensorflow.python.ops import variables # pylint: disable=g-direct-tensorflow-import
class DeepSpeechDecoder(object):
"""Greedy decoder implementation for Deep Speech model."""
def __init__(self, labels, blank_index=28):
"""Decoder initialization.
Args:
labels: a string specifying the speech labels for the decoder to use.
blank_index: an integer specifying index for the blank character. Defaults
to 28.
"""
self.labels = labels
self.blank_index = blank_index
self.int_to_char = dict([(i, c) for (i, c) in enumerate(labels)])
def convert_to_string(self, sequence):
"""Convert a sequence of indexes into corresponding string."""
return ''.join([self.int_to_char[i] for i in sequence])
def wer(self, decode, target):
"""Computes the Word Error Rate (WER).
WER is defined as the edit distance between the two provided sentences after
tokenizing to words.
Args:
decode: string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number for the WER of the current decode-target pair.
"""
try:
from nltk.metrics import distance # pylint: disable=g-import-not-at-top
except ImportError as e:
if 'nltk.metrics' not in e.message:
raise
raise ImportError('To use the experimental deepspeech model, you must '
'pip install -U nltk')
# Map each word to a new char.
words = set(decode.split() + target.split())
word2char = dict(zip(words, range(len(words))))
new_decode = [chr(word2char[w]) for w in decode.split()]
new_target = [chr(word2char[w]) for w in target.split()]
return distance.edit_distance(''.join(new_decode), ''.join(new_target))
def cer(self, decode, target):
"""Computes the Character Error Rate (CER).
CER is defined as the edit distance between the two given strings.
Args:
decode: a string of the decoded output.
target: a string for the ground truth label.
Returns:
A float number denoting the CER for the current sentence pair.
"""
try:
from nltk.metrics import distance # pylint: disable=g-import-not-at-top
except ImportError as e:
if 'nltk.metrics' not in e.message:
raise
raise ImportError('To use the experimental deepspeech model, you must '
'pip install -U nltk')
return distance.edit_distance(decode, target)
def decode(self, char_indexes):
"""Decode the best guess from logits using greedy algorithm."""
# Merge repeated chars.
merge = [k for k, _ in itertools.groupby(char_indexes)]
# Remove the blank index in the decoded sequence.
merge_remove_blank = []
for k in merge:
if k != self.blank_index:
merge_remove_blank.append(k)
return self.convert_to_string(merge_remove_blank)
def decode_logits(self, logits):
"""Decode the best guess from logits using greedy algorithm."""
# Choose the class with maximimum probability.
best = list(np.argmax(logits, axis=1))
return self.decode(best)
class DeepSpeech2Model(model_lib.Model):
"""Define DeepSpeech2 model."""
# Supported rnn cells.
SUPPORTED_RNNS = {
'lstm': tf.nn.rnn_cell.BasicLSTMCell,
'rnn': tf.nn.rnn_cell.RNNCell,
'gru': tf.nn.rnn_cell.GRUCell,
}
# Parameters for batch normalization.
BATCH_NORM_EPSILON = 1e-5
BATCH_NORM_DECAY = 0.997
# Filters of convolution layer
CONV_FILTERS = 32
def __init__(self,
num_rnn_layers=5,
rnn_type='lstm',
is_bidirectional=True,
rnn_hidden_size=800,
use_bias=True,
params=None):
"""Initialize DeepSpeech2 model.
Args:
num_rnn_layers: an integer, the number of rnn layers (default: 5).
rnn_type: a string, one of the supported rnn cells: gru, rnn or lstm.
is_bidirectional: a boolean to indicate if the rnn layer is bidirectional.
rnn_hidden_size: an integer for the number of hidden units in the RNN
cell.
use_bias: a boolean specifying whether to use a bias in the last fc layer.
params: the params from BenchmarkCNN.
"""
super(DeepSpeech2Model, self).__init__(
'deepspeech2',
batch_size=128,
learning_rate=0.0005,
fp16_loss_scale=128,
params=params)
self.num_rnn_layers = num_rnn_layers
self.rnn_type = rnn_type
self.is_bidirectional = is_bidirectional
self.rnn_hidden_size = rnn_hidden_size
self.use_bias = use_bias
self.num_feature_bins = 161
self.max_time_steps = 3494
self.max_label_length = 576
def _batch_norm(self, inputs, training):
"""Batch normalization layer.
Note that the momentum to use will affect validation accuracy over time.
Batch norm has different behaviors during training/evaluation. With a large
momentum, the model takes longer to get a near-accurate estimation of the
moving mean/variance over the entire training dataset, which means we need
more iterations to see good evaluation results. If the training data is
evenly distributed over the feature space, we can also try setting a smaller
momentum (such as 0.1) to get good evaluation result sooner.
Args:
inputs: input data for batch norm layer.
training: a boolean to indicate if it is in training stage.
Returns:
tensor output from batch norm layer.
"""
return tf.layers.batch_normalization(
inputs=inputs,
momentum=DeepSpeech2Model.BATCH_NORM_DECAY,
epsilon=DeepSpeech2Model.BATCH_NORM_EPSILON,
fused=True,
training=training)
def _conv_bn_layer(self, inputs, padding, filters, kernel_size, strides,
layer_id, training):
"""Defines 2D convolutional + batch normalization layer.
Args:
inputs: input data for convolution layer.
padding: padding to be applied before convolution layer.
filters: an integer, number of output filters in the convolution.
kernel_size: a tuple specifying the height and width of the 2D convolution
window.
strides: a tuple specifying the stride length of the convolution.
layer_id: an integer specifying the layer index.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output from the current layer.
"""
# Perform symmetric padding on the feature dimension of time_step
# This step is required to avoid issues when RNN output sequence is shorter
# than the label length.
inputs = tf.pad(
inputs,
[[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]])
inputs = tf.layers.conv2d(
inputs=inputs,
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding='valid',
use_bias=False,
activation=tf.nn.relu6,
name='cnn_{}'.format(layer_id))
return self._batch_norm(inputs, training)
def _rnn_layer(self, inputs, rnn_cell, rnn_hidden_size, layer_id,
use_batch_norm, is_bidirectional, training):
"""Defines a batch normalization + rnn layer.
Args:
inputs: input tensors for the current layer.
rnn_cell: RNN cell instance to use.
rnn_hidden_size: an integer for the dimensionality of the rnn output
space.
layer_id: an integer for the index of current layer.
use_batch_norm: a boolean specifying whether to perform batch
normalization on input states.
is_bidirectional: a boolean specifying whether the rnn layer is
bi-directional.
training: a boolean to indicate which stage we are in (training/eval).
Returns:
tensor output for the current layer.
"""
if use_batch_norm:
inputs = self._batch_norm(inputs, training)
# Construct forward/backward RNN cells.
fw_cell = rnn_cell(
num_units=rnn_hidden_size, name='rnn_fw_{}'.format(layer_id))
if is_bidirectional:
bw_cell = rnn_cell(
num_units=rnn_hidden_size, name='rnn_bw_{}'.format(layer_id))
outputs, _ = tf.nn.bidirectional_dynamic_rnn(
cell_fw=fw_cell,
cell_bw=bw_cell,
inputs=inputs,
dtype=tf.float32,
swap_memory=True)
rnn_outputs = tf.concat(outputs, -1)
else:
rnn_outputs = tf.nn.dynamic_rnn(
fw_cell, inputs, dtype=tf.float32, swap_memory=True)
return rnn_outputs
def get_input_data_types(self, subset):
"""Returns the list of data types of the inputs."""
del subset # Same data types for both train and validation subsets.
return [self.data_type, tf.int32, tf.int32, tf.int32]
def get_input_shapes(self, subset):
"""Returns the list of shapes of the padded inputs."""
del subset # Same shapes for both train and validation subsets
return [
[self.batch_size, self.max_time_steps, self.num_feature_bins, 1],
[self.batch_size, self.max_label_length],
[self.batch_size, 1],
[self.batch_size, 1],
]
def get_synthetic_inputs(self, input_name, nclass):
inputs = tf.random_uniform(self.get_input_shapes('train')[0],
dtype=self.get_input_data_types('train')[0])
inputs = variables.VariableV1(inputs, trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES],
name=input_name)
labels = tf.convert_to_tensor(
np.random.randint(28, size=[self.batch_size, self.max_label_length]))
input_lengths = tf.convert_to_tensor(
[self.max_time_steps] * self.batch_size)
label_lengths = tf.convert_to_tensor(
[self.max_label_length] * self.batch_size)
return [inputs, labels, input_lengths, label_lengths]
# TODO(laigd): support fp16.
# TODO(laigd): support multiple gpus.
def build_network(self, inputs, phase_train=True, nclass=29):
"""Builds the forward pass of the deepspeech2 model.
Args:
inputs: The input list of the model.
phase_train: True during training. False during evaluation.
nclass: Number of classes that the input spectrogram can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
inputs = inputs[0] # Get the spectrogram feature.
# Two cnn layers.
inputs = self._conv_bn_layer(
inputs,
padding=(20, 5),
filters=DeepSpeech2Model.CONV_FILTERS,
kernel_size=(41, 11),
strides=(2, 2),
layer_id=1,
training=phase_train)
inputs = self._conv_bn_layer(
inputs,
padding=(10, 5),
filters=DeepSpeech2Model.CONV_FILTERS,
kernel_size=(21, 11),
strides=(2, 1),
layer_id=2,
training=phase_train)
# output of conv_layer2 with the shape of
# [batch_size (N), times (T), features (F), channels (C)].
# Convert the conv output to rnn input.
# batch_size = tf.shape(inputs)[0]
feat_size = inputs.get_shape().as_list()[2]
inputs = tf.reshape(
inputs,
[self.batch_size, -1, feat_size * DeepSpeech2Model.CONV_FILTERS])
# RNN layers.
rnn_cell = DeepSpeech2Model.SUPPORTED_RNNS[self.rnn_type]
for layer_counter in xrange(self.num_rnn_layers):
# No batch normalization on the first layer.
use_batch_norm = (layer_counter != 0)
inputs = self._rnn_layer(inputs, rnn_cell, self.rnn_hidden_size,
layer_counter + 1, use_batch_norm,
self.is_bidirectional, phase_train)
# FC layer with batch norm.
inputs = self._batch_norm(inputs, phase_train)
logits = tf.layers.dense(inputs, nclass, use_bias=self.use_bias)
return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
def loss_function(self, inputs, build_network_result):
"""Computes the ctc loss for the current batch of predictions.
Args:
inputs: the input list of the model.
build_network_result: a BuildNetworkResult returned by build_network().
Returns:
The loss tensor of the model.
"""
logits = build_network_result.logits
actual_time_steps = inputs[2]
probs = tf.nn.softmax(logits)
ctc_time_steps = tf.shape(probs)[1]
ctc_input_length = tf.to_float(
tf.multiply(actual_time_steps, ctc_time_steps))
ctc_input_length = tf.to_int32(
tf.floordiv(ctc_input_length, tf.to_float(self.max_time_steps)))
label_length = inputs[3]
label_length = tf.to_int32(tf.squeeze(label_length))
ctc_input_length = tf.to_int32(tf.squeeze(ctc_input_length))
labels = inputs[1]
sparse_labels = tf.to_int32(
tf.keras.backend.ctc_label_dense_to_sparse(labels, label_length))
y_pred = tf.log(
tf.transpose(probs, perm=[1, 0, 2]) + tf.keras.backend.epsilon())
losses = tf.expand_dims(
tf.nn.ctc_loss(
labels=sparse_labels,
inputs=y_pred,
sequence_length=ctc_input_length,
ignore_longer_outputs_than_inputs=True),
axis=1)
loss = tf.reduce_mean(losses)
return loss
PROBABILITY_TENSOR = 'deepspeech2_prob'
LABEL_TENSOR = 'deepspeech2_label'
def accuracy_function(self, inputs, logits):
"""Returns the ops to evaluate the model performance."""
# Get probabilities of each predicted class
probs = tf.nn.softmax(logits)
assert probs.shape.as_list()[0] == self.batch_size
return {
(constants.UNREDUCED_ACCURACY_OP_PREFIX + self.PROBABILITY_TENSOR):
probs,
(constants.UNREDUCED_ACCURACY_OP_PREFIX + self.LABEL_TENSOR):
inputs[1],
}
def postprocess(self, results):
"""Postprocess results returned from model in Python."""
probs = results[self.PROBABILITY_TENSOR]
total_wer, total_cer = 0, 0
speech_labels = " abcdefghijklmnopqrstuvwxyz'-"
greedy_decoder = DeepSpeechDecoder(speech_labels)
# Evaluate the performance using WER (Word Error Rate) and CER (Character
# Error Rate) as metrics.
targets = results[self.LABEL_TENSOR] # The ground truth transcript
for i in range(self.batch_size):
# Decode string.
predicted_str = greedy_decoder.decode_logits(probs[i])
expected_str = greedy_decoder.decode(targets[i])
# Compute CER.
total_cer += (greedy_decoder.cer(predicted_str, expected_str) /
len(expected_str))
# Compute WER.
total_wer += (greedy_decoder.wer(predicted_str, expected_str) /
len(expected_str.split()))
# Get mean value
total_cer /= self.batch_size
total_wer /= self.batch_size
log_fn('total CER: {:f}; total WER: {:f}; total example: {:d}.'.format(
total_cer, total_wer, self.batch_size))
# TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base model configuration for CNN benchmarks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
import tensorflow.compat.v1 as tf
import convnet_builder
import mlperf
from tensorflow.python.ops import variables as variables_module # pylint: disable=g-direct-tensorflow-import
# BuildNetworkResult encapsulate the result (e.g. logits) of a
# Model.build_network() call.
BuildNetworkResult = namedtuple(
'BuildNetworkResult',
[
'logits', # logits of the network
'extra_info', # Model specific extra information
])
class Model(object):
"""Base model config for DNN benchmarks."""
def __init__(self,
model_name,
batch_size,
learning_rate,
fp16_loss_scale,
params=None):
self.model_name = model_name
self.batch_size = batch_size
self.default_batch_size = batch_size
self.learning_rate = learning_rate
# TODO(reedwm) Set custom loss scales for each model instead of using the
# default of 128.
self.fp16_loss_scale = fp16_loss_scale
# use_tf_layers specifies whether to build the model using tf.layers.
# fp16_vars specifies whether to create the variables in float16.
if params:
self.use_tf_layers = params.use_tf_layers
self.fp16_vars = params.fp16_vars
self.data_type = tf.float16 if params.use_fp16 else tf.float32
else:
self.use_tf_layers = True
self.fp16_vars = False
self.data_type = tf.float32
def get_model_name(self):
return self.model_name
def get_batch_size(self):
return self.batch_size
def set_batch_size(self, batch_size):
self.batch_size = batch_size
def get_default_batch_size(self):
return self.default_batch_size
def get_fp16_loss_scale(self):
return self.fp16_loss_scale
def filter_l2_loss_vars(self, variables):
"""Filters out variables that the L2 loss should not be computed for.
By default, this filters out batch normalization variables and keeps all
other variables. This behavior can be overridden by subclasses.
Args:
variables: A list of the trainable variables.
Returns:
A list of variables that the L2 loss should be computed for.
"""
mlperf.logger.log(key=mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2,
value=True)
return [v for v in variables if 'batchnorm' not in v.name]
def get_learning_rate(self, global_step, batch_size):
del global_step
del batch_size
return self.learning_rate
def get_input_shapes(self, subset):
"""Returns the list of expected shapes of all the inputs to this model."""
del subset
raise NotImplementedError('Must be implemented in derived classes')
def get_input_data_types(self, subset):
"""Returns the list of data types of all the inputs to this model."""
del subset
raise NotImplementedError('Must be implemented in derived classes')
def get_synthetic_inputs(self, input_name, nclass):
"""Returns the ops to generate synthetic inputs."""
raise NotImplementedError('Must be implemented in derived classes')
def build_network(self, inputs, phase_train, nclass):
"""Builds the forward pass of the model.
Args:
inputs: The list of inputs, including labels
phase_train: True during training. False during evaluation.
nclass: Number of classes that the inputs can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
raise NotImplementedError('Must be implemented in derived classes')
def loss_function(self, inputs, build_network_result):
"""Returns the op to measure the loss of the model.
Args:
inputs: the input list of the model.
build_network_result: a BuildNetworkResult returned by build_network().
Returns:
The loss tensor of the model.
"""
raise NotImplementedError('Must be implemented in derived classes')
# TODO(laigd): have accuracy_function() take build_network_result instead.
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the accuracy of the model."""
raise NotImplementedError('Must be implemented in derived classes')
def postprocess(self, results):
"""Postprocess results returned from model in Python."""
return results
def reached_target(self):
"""Define custom methods to stop training when model's target is reached."""
return False
class CNNModel(Model):
"""Base model configuration for CNN benchmarks."""
# TODO(laigd): reduce the number of parameters and read everything from
# params.
def __init__(self,
model,
image_size,
batch_size,
learning_rate,
layer_counts=None,
fp16_loss_scale=128,
params=None):
super(CNNModel, self).__init__(
model, batch_size, learning_rate, fp16_loss_scale,
params=params)
self.image_size = image_size
self.layer_counts = layer_counts
self.depth = 3
self.params = params
self.data_format = params.data_format if params else 'NCHW'
def get_layer_counts(self):
return self.layer_counts
def skip_final_affine_layer(self):
"""Returns if the caller of this class should skip the final affine layer.
Normally, this class adds a final affine layer to the model after calling
self.add_inference(), to generate the logits. If a subclass override this
method to return True, the caller should not add the final affine layer.
This is useful for tests.
"""
return False
def add_backbone_saver(self):
"""Creates a tf.train.Saver as self.backbone_saver for loading backbone.
A tf.train.Saver must be created and saved in self.backbone_saver before
calling load_backbone_model, with correct variable name mapping to load
variables from checkpoint correctly into the current model.
"""
raise NotImplementedError(self.getName() + ' does not have backbone model.')
def load_backbone_model(self, sess, backbone_model_path):
"""Loads variable values from a pre-trained backbone model.
This should be used at the beginning of the training process for transfer
learning models using checkpoints of base models.
Args:
sess: session to train the model.
backbone_model_path: path to backbone model checkpoint file.
"""
del sess, backbone_model_path
raise NotImplementedError(self.getName() + ' does not have backbone model.')
def add_inference(self, cnn):
"""Adds the core layers of the CNN's forward pass.
This should build the forward pass layers, except for the initial transpose
of the images and the final Dense layer producing the logits. The layers
should be build with the ConvNetBuilder `cnn`, so that when this function
returns, `cnn.top_layer` and `cnn.top_size` refer to the last layer and the
number of units of the layer layer, respectively.
Args:
cnn: A ConvNetBuilder to build the forward pass layers with.
"""
del cnn
raise NotImplementedError('Must be implemented in derived classes')
def get_input_data_types(self, subset):
"""Return data types of inputs for the specified subset."""
del subset # Same types for both 'train' and 'validation' subsets.
return [self.data_type, tf.int32]
def get_input_shapes(self, subset):
"""Return data shapes of inputs for the specified subset."""
del subset # Same shapes for both 'train' and 'validation' subsets.
# Each input is of shape [batch_size, height, width, depth]
# Each label is of shape [batch_size]
return [[self.batch_size, self.image_size, self.image_size, self.depth],
[self.batch_size]]
def get_synthetic_inputs(self, input_name, nclass):
# Synthetic input should be within [0, 255].
image_shape, label_shape = self.get_input_shapes('train')
inputs = tf.truncated_normal(
image_shape,
dtype=self.data_type,
mean=127,
stddev=60,
name=self.model_name + '_synthetic_inputs')
inputs = variables_module.VariableV1(
inputs, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES],
name=input_name)
labels = tf.random_uniform(
label_shape,
minval=0,
maxval=nclass - 1,
dtype=tf.int32,
name=self.model_name + '_synthetic_labels')
return (inputs, labels)
def gpu_preprocess_nhwc(self, images, phase_train=True):
del phase_train
return images
def build_network(self,
inputs,
phase_train=True,
nclass=1001):
"""Returns logits from input images.
Args:
inputs: The input images and labels
phase_train: True during training. False during evaluation.
nclass: Number of classes that the images can belong to.
Returns:
A BuildNetworkResult which contains the logits and model-specific extra
information.
"""
images = inputs[0]
images = self.gpu_preprocess_nhwc(images, phase_train)
if self.data_format == 'NCHW':
images = tf.transpose(images, [0, 3, 1, 2])
var_type = tf.float32
if self.data_type == tf.float16 and self.fp16_vars:
var_type = tf.float16
network = convnet_builder.ConvNetBuilder(
images, self.depth, phase_train, self.use_tf_layers, self.data_format,
self.data_type, var_type)
with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
self.add_inference(network)
# Add the final fully-connected class layer
logits = (
network.affine(nclass, activation='linear')
if not self.skip_final_affine_layer() else network.top_layer)
mlperf.logger.log(key=mlperf.tags.MODEL_HP_FINAL_SHAPE,
value=logits.shape.as_list()[1:])
aux_logits = None
if network.aux_top_layer is not None:
with network.switch_to_aux_top_layer():
aux_logits = network.affine(nclass, activation='linear', stddev=0.001)
if self.data_type == tf.float16:
# TODO(reedwm): Determine if we should do this cast here.
logits = tf.cast(logits, tf.float32)
if aux_logits is not None:
aux_logits = tf.cast(aux_logits, tf.float32)
return BuildNetworkResult(
logits=logits, extra_info=None if aux_logits is None else aux_logits)
def loss_function(self, inputs, build_network_result):
"""Returns the op to measure the loss of the model."""
logits = build_network_result.logits
_, labels = inputs
# TODO(laigd): consider putting the aux logit in the Inception model,
# which could call super.loss_function twice, once with the normal logits
# and once with the aux logits.
aux_logits = build_network_result.extra_info
with tf.name_scope('xentropy'):
mlperf.logger.log(key=mlperf.tags.MODEL_HP_LOSS_FN, value=mlperf.tags.CCE)
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
if aux_logits is not None:
with tf.name_scope('aux_xentropy'):
aux_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=aux_logits, labels=labels)
aux_loss = 0.4 * tf.reduce_mean(aux_cross_entropy, name='aux_loss')
loss = tf.add_n([loss, aux_loss])
return loss
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the accuracy of the model."""
_, labels = inputs
top_1_op = tf.reduce_sum(
tf.cast(tf.nn.in_top_k(logits, labels, 1), self.data_type))
top_5_op = tf.reduce_sum(
tf.cast(tf.nn.in_top_k(logits, labels, 5), self.data_type))
return {'top_1_accuracy': top_1_op, 'top_5_accuracy': top_5_op}
......@@ -150,8 +150,7 @@ def register_model(model_name, dataset_name, model_func):
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import slim # pylint: disable=unused-import
#xuan
can_import_contrib = False
can_import_contrib = True
except ImportError:
can_import_contrib = False
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Import official resnet models."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
import datasets
from models import model as model_lib
class ImagenetResnetModel(model_lib.CNNModel):
"""Official resnet models."""
def __init__(self, resnet_size, version=2, params=None):
"""These are the parameters that work for Imagenet data.
Args:
resnet_size: The number of convolutional layers needed in the model.
version: 1 or 2 for v1 or v2, respectively.
params: params passed by BenchmarkCNN.
"""
default_batch_sizes = {
50: 128,
101: 32,
152: 32
}
batch_size = default_batch_sizes.get(resnet_size, 32)
default_learning_rate = 0.0125 * batch_size / 32
model_name = 'official_resnet_{}_v{}'.format(resnet_size, version)
super(ImagenetResnetModel, self).__init__(
model_name, 224, batch_size, default_learning_rate, params=params)
self.resnet_size = resnet_size
self.version = version
def get_learning_rate(self, global_step, batch_size):
num_batches_per_epoch = (
float(datasets.IMAGENET_NUM_TRAIN_IMAGES) / batch_size)
boundaries = [int(num_batches_per_epoch * x) for x in [30, 60, 80, 90]]
values = [1, 0.1, 0.01, 0.001, 0.0001]
adjusted_learning_rate = (
self.learning_rate / self.default_batch_size * batch_size)
values = [v * adjusted_learning_rate for v in values]
return tf.train.piecewise_constant(global_step, boundaries, values)
def build_network(self, images, phase_train=True, nclass=1001,
data_type=tf.float32):
# pylint: disable=g-import-not-at-top
try:
from official.r1.resnet.imagenet_main import ImagenetModel
except ImportError:
tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
raise
images = tf.cast(images, data_type)
model_class = ImagenetModel(resnet_size=self.resnet_size,
resnet_version=self.version,
# The official model dtype seems to be ignored,
# as the dtype it uses is the dtype of the input
# images. Doesn't hurt to set it though.
dtype=data_type)
logits = model_class(images, phase_train)
logits = tf.cast(logits, tf.float32)
return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment