Commit b2c3a9ba authored by A. Unique TensorFlower's avatar A. Unique TensorFlower Committed by saberkun
Browse files

Internal change

PiperOrigin-RevId: 404080616
parent ca3d3920
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions of mobilenet_edgetpu_v2 Networks."""
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import hyperparams
from official.projects.edgetpu.vision.modeling.mobilenet_edgetpu_v1_model import MobilenetEdgeTPU
from official.projects.edgetpu.vision.modeling.mobilenet_edgetpu_v2_model import MobilenetEdgeTPUV2
from official.vision.beta.modeling.backbones import factory
layers = tf.keras.layers
# MobileNet-EdgeTPU-V2 configs.
MOBILENET_EDGETPU_V2_CONFIGS = frozenset([
'mobilenet_edgetpu_v2_tiny',
'mobilenet_edgetpu_v2_xs',
'mobilenet_edgetpu_v2_s',
'mobilenet_edgetpu_v2_m',
'mobilenet_edgetpu_v2_l',
'autoseg_edgetpu_backbone_xs',
'autoseg_edgetpu_backbone_s',
'autoseg_edgetpu_backbone_m',
])
# MobileNet-EdgeTPU-V1 configs.
MOBILENET_EDGETPU_CONFIGS = frozenset([
'mobilenet_edgetpu',
'mobilenet_edgetpu_dm1p25',
'mobilenet_edgetpu_dm1p5',
'mobilenet_edgetpu_dm1p75',
])
def freeze_large_filters(model: tf.keras.Model, threshold: int):
"""Freezes layer with large number of filters."""
for layer in model.layers:
if isinstance(layer.output_shape, tuple):
filter_size = layer.output_shape[-1]
if filter_size >= threshold:
logging.info('Freezing layer: %s', layer.name)
layer.trainable = False
@factory.register_backbone_builder('mobilenet_edgetpu')
def build_mobilenet_edgetpu(input_specs: tf.keras.layers.InputSpec,
backbone_config: hyperparams.Config,
**unused_kwargs) -> tf.keras.Model:
"""Builds MobileNetEdgeTpu backbone from a config."""
backbone_type = backbone_config.type
backbone_cfg = backbone_config.get()
assert backbone_type == 'mobilenet_edgetpu', (f'Inconsistent backbone type '
f'{backbone_type}')
if backbone_cfg.model_id in MOBILENET_EDGETPU_V2_CONFIGS:
model = MobilenetEdgeTPUV2.from_name(
model_name=backbone_cfg.model_id,
overrides={
'batch_norm': 'tpu',
'rescale_input': False,
'resolution': input_specs.shape[1:3],
'backbone_only': True,
'features_as_dict': True,
'dtype': 'bfloat16'
},
model_weights_path=backbone_cfg.pretrained_checkpoint_path)
if backbone_cfg.freeze_large_filters:
freeze_large_filters(model, backbone_cfg.freeze_large_filters)
return model
elif backbone_cfg.model_id in MOBILENET_EDGETPU_CONFIGS:
model = MobilenetEdgeTPU.from_name(
model_name=backbone_cfg.model_id,
overrides={
'batch_norm': 'tpu',
'rescale_input': False,
'resolution': input_specs.shape[1:3],
'backbone_only': True,
'dtype': 'bfloat16'
},
model_weights_path=backbone_cfg.pretrained_checkpoint_path)
if backbone_cfg.freeze_large_filters:
freeze_large_filters(model, backbone_cfg.freeze_large_filters)
return model
else:
raise ValueError(f'Unsupported model/id type {backbone_cfg.model_id}.')
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Tests for MobileNet."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.projects.edgetpu.vision.modeling.backbones import mobilenet_edgetpu
class TestInputSpec:
def __init__(self, shape):
self.shape = shape
class TestBackboneConfig:
def __init__(self, model_id):
self.model_id = model_id
self.freeze_large_filters = 99
self.pretrained_checkpoint_path = None
self.type = 'mobilenet_edgetpu'
def get(self):
return self
class MobileNetEdgeTPUTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
('mobilenet_edgetpu_v2_s', (1, 512, 512, 3)),
('mobilenet_edgetpu_v2_l', (1, None, None, 3)),
('mobilenet_edgetpu', (1, 512, 512, 3)),
('mobilenet_edgetpu_dm1p25', (1, None, None, 3)),
)
def test_mobilenet_creation(self, model_id, input_shape):
"""Test creation of MobileNet family models."""
tf.keras.backend.set_image_data_format('channels_last')
test_model = mobilenet_edgetpu.build_mobilenet_edgetpu(
input_specs=TestInputSpec(input_shape),
backbone_config=TestBackboneConfig(model_id))
self.assertGreater(len(test_model.outputs), 1)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common modeling utilities."""
from typing import Optional, Tuple
# Import libraries
import numpy as np
import tensorflow as tf
import tensorflow.compat.v1 as tf1
from tensorflow.python.tpu import tpu_function # pylint: disable=g-direct-tensorflow-import
MEAN_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
STDDEV_RGB = (0.5 * 255, 0.5 * 255, 0.5 * 255)
@tf.keras.utils.register_keras_serializable(package='Vision')
class TpuBatchNormalization(tf.keras.layers.BatchNormalization):
"""Cross replica batch normalization."""
def __init__(self, fused: Optional[bool] = False, **kwargs):
if fused in (True, None):
raise ValueError('TpuBatchNormalization does not support fused=True.')
super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs)
def _cross_replica_average(self, t: tf.Tensor, num_shards_per_group: int):
"""Calculates the average value of input tensor across TPU replicas."""
num_shards = tpu_function.get_tpu_context().number_of_shards
group_assignment = None
if num_shards_per_group > 1:
if num_shards % num_shards_per_group != 0:
raise ValueError(
'num_shards: %d mod shards_per_group: %d, should be 0' %
(num_shards, num_shards_per_group))
num_groups = num_shards // num_shards_per_group
group_assignment = [[
x for x in range(num_shards) if x // num_shards_per_group == y
] for y in range(num_groups)]
return tf1.tpu.cross_replica_sum(t, group_assignment) / tf.cast(
num_shards_per_group, t.dtype)
def _moments(self, inputs: tf.Tensor, reduction_axes: int, keep_dims: int):
"""Compute the mean and variance: it overrides the original _moments."""
shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments(
inputs, reduction_axes, keep_dims=keep_dims)
num_shards = tpu_function.get_tpu_context().number_of_shards or 1
if num_shards <= 8: # Skip cross_replica for 2x2 or smaller slices.
num_shards_per_group = 1
else:
num_shards_per_group = max(8, num_shards // 8)
if num_shards_per_group > 1:
# Compute variance using: Var[X]= E[X^2] - E[X]^2.
shard_square_of_mean = tf.math.square(shard_mean)
shard_mean_of_square = shard_variance + shard_square_of_mean
group_mean = self._cross_replica_average(shard_mean, num_shards_per_group)
group_mean_of_square = self._cross_replica_average(
shard_mean_of_square, num_shards_per_group)
group_variance = group_mean_of_square - tf.math.square(group_mean)
return (group_mean, group_variance)
else:
return (shard_mean, shard_variance)
def get_batch_norm(batch_norm_type: str) -> tf.keras.layers.BatchNormalization:
"""A helper to create a batch normalization getter.
Args:
batch_norm_type: The type of batch normalization layer implementation. `tpu`
will use `TpuBatchNormalization`.
Returns:
An instance of `tf.keras.layers.BatchNormalization`.
"""
if batch_norm_type == 'tpu':
return TpuBatchNormalization
return tf.keras.layers.BatchNormalization # pytype: disable=bad-return-type # typed-keras
def count_params(model, trainable_only=True):
"""Returns the count of all model parameters, or just trainable ones."""
if not trainable_only:
return model.count_params()
else:
return int(np.sum([tf.keras.backend.count_params(p)
for p in model.trainable_weights]))
def load_weights(model: tf.keras.Model,
model_weights_path: str,
checkpoint_format: str = 'tf_checkpoint'):
"""Load model weights from the given file path.
Args:
model: the model to load weights into
model_weights_path: the path of the model weights
checkpoint_format: The source of checkpoint files. By default, we assume the
checkpoint is saved by tf.train.Checkpoint().save(). For legacy reasons,
we can also resotre checkpoint from keras model.save_weights() method by
setting checkpoint_format = 'keras_checkpoint'.
"""
if checkpoint_format == 'tf_checkpoint':
checkpoint_dict = {'model': model}
checkpoint = tf.train.Checkpoint(**checkpoint_dict)
checkpoint.restore(model_weights_path).assert_existing_objects_matched()
elif checkpoint_format == 'keras_checkpoint':
# Assert makes sure load is successeful.
model.load_weights(model_weights_path).assert_existing_objects_matched()
else:
raise ValueError(f'Unsupported checkpoint format {checkpoint_format}.')
def normalize_images(
features: tf.Tensor,
num_channels: int = 3,
dtype: str = 'float32',
data_format: str = 'channels_last',
mean_rgb: Tuple[float, ...] = MEAN_RGB,
stddev_rgb: Tuple[float, ...] = STDDEV_RGB,
) -> tf.Tensor:
"""Normalizes the input image channels with the given mean and stddev.
Args:
features: `Tensor` representing decoded images in float format.
num_channels: the number of channels in the input image tensor.
dtype: the dtype to convert the images to. Set to `None` to skip conversion.
data_format: the format of the input image tensor ['channels_first',
'channels_last'].
mean_rgb: the mean of the channels to subtract.
stddev_rgb: the stddev of the channels to divide.
Returns:
A normalized image `Tensor`.
"""
if data_format == 'channels_first':
stats_shape = [num_channels, 1, 1]
else:
stats_shape = [1, 1, num_channels]
if dtype is not None:
if dtype == 'bfloat16':
features = tf.image.convert_image_dtype(features, dtype=tf.bfloat16)
if mean_rgb is not None:
mean_rgb = tf.constant(mean_rgb, shape=stats_shape, dtype=features.dtype)
mean_rgb = tf.broadcast_to(mean_rgb, tf.shape(features))
features = features - mean_rgb
if stddev_rgb is not None:
stddev_rgb = tf.constant(
stddev_rgb, shape=stats_shape, dtype=features.dtype)
stddev_rgb = tf.broadcast_to(stddev_rgb, tf.shape(features))
features = features / stddev_rgb
return features
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Customized keras layers used in the EdgeTPU models."""
import inspect
from typing import Any, MutableMapping, Optional, Union, Tuple
import tensorflow as tf
class GroupConv2D(tf.keras.layers.Conv2D):
"""2D group convolution as a Keras Layer."""
def __init__(self,
filters: int,
kernel_size: Union[int, Tuple[int, int]],
groups: int,
strides: Tuple[int, int] = (1, 1),
padding: str = 'valid',
data_format: str = 'channels_last',
dilation_rate: Tuple[int, int] = (1, 1),
activation: Any = None,
use_bias: bool = True,
kernel_initializer: Any = 'glorot_uniform',
bias_initializer: Any = 'zeros',
kernel_regularizer: Any = None,
bias_regularizer: Any = None,
activity_regularizer: Any = None,
kernel_constraint: Any = None,
bias_constraint: Any = None,
batch_norm_layer: Optional[tf.keras.layers.Layer] = None,
bn_epsilon: float = 1e-3,
bn_momentum: float = 0.99,
**kwargs: Any) -> tf.keras.layers.Layer:
"""Creates a 2D group convolution keras layer.
Args:
filters: Integer, the dimensionality of the output space (i.e. the number
of output filters in the convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the height
and width of the 2D convolution window. Can be a single integer to
specify the same value for all spatial dimensions.
groups: The number of input/output channel groups.
strides: An integer or tuple/list of n integers, specifying the stride
length of the convolution. Specifying any stride value != 1 is
incompatible with specifying any `dilation_rate` value != 1.
padding: one of `"valid"` or `"same"` (case-insensitive).
data_format: The ordering of the dimensions in the inputs. `channels_last`
corresponds to inputs with shape `(batch_size, height, width, channels)`
dilation_rate: an integer or tuple/list of 2 integers, specifying the
dilation rate to use for dilated convolution. Can be a single integer to
specify the same value for all spatial dimensions. Currently, specifying
any `dilation_rate` value != 1 is incompatible with specifying any
stride value != 1.
activation: Activation function to use. If you don't specify anything, no
activation is applied ( see `keras.activations`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix ( see
`keras.initializers`).
bias_initializer: Initializer for the bias vector ( see
`keras.initializers`).
kernel_regularizer: Regularizer function applied to the `kernel` weights
matrix (see `keras.regularizers`).
bias_regularizer: Regularizer function applied to the bias vector ( see
`keras.regularizers`).
activity_regularizer: Regularizer function applied to the output of the
layer (its "activation") ( see `keras.regularizers`).
kernel_constraint: Constraint function applied to the kernel matrix ( see
`keras.constraints`).
bias_constraint: Constraint function applied to the bias vector ( see
`keras.constraints`).
batch_norm_layer: The batch normalization layer to use. This is typically
tf.keras.layer.BatchNormalization or a derived class.
bn_epsilon: Batch normalization epsilon.
bn_momentum: Momentum used for moving average in batch normalization.
**kwargs: Additional keyword arguments.
Input shape:
4D tensor with shape: `(batch_size, rows, cols, channels)`
Output shape:
4D tensor with shape: `(batch_size, new_rows, new_cols, filters)` `rows`
and `cols` values might have changed due to padding.
Returns:
A tensor of rank 4 representing
`activation(GroupConv2D(inputs, kernel) + bias)`.
Raises:
ValueError: if groups < 1 or groups > filters
ValueError: if data_format is not "channels_last".
ValueError: if `padding` is not `same` or `valid`.
ValueError: if `batch_norm_layer` is not a callable when provided.
ValueError: when both `strides` > 1 and `dilation_rate` > 1.
"""
if groups <= 1 or groups >= filters:
raise ValueError('Number of groups should be greater than 1 and less '
'than the output filters.')
self._groups = groups
if data_format != 'channels_last':
raise ValueError(
'GroupConv2D expects input to be in channels_last format.')
if padding.lower() not in ('same', 'valid'):
raise ValueError('Valid padding options are : same, or valid.')
self.use_batch_norm = False
if batch_norm_layer is not None:
if not inspect.isclass(batch_norm_layer):
raise ValueError('batch_norm_layer is not a class.')
self.use_batch_norm = True
self.bn_epsilon = bn_epsilon
self.bn_momentum = bn_momentum
self.batch_norm_layer = []
if self.use_batch_norm:
self.batch_norm_layer = [
batch_norm_layer(
axis=-1, momentum=self.bn_momentum, epsilon=self.bn_epsilon)
for i in range(self._groups)
]
super().__init__(
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
groups=1,
**kwargs) # pytype: disable=bad-return-type # typed-keras
def build(self, input_shape: Tuple[int, ...]) -> None:
"""Builds GroupConv2D layer as a collection of smaller Conv2D layers."""
input_shape = tf.TensorShape(input_shape)
input_channel = self._get_input_channel(input_shape)
if input_channel % self._groups != 0:
raise ValueError(
f'Number of input channels: {input_channel} are not divisible '
f'by number of groups: {self._groups}.')
self.group_input_channel = int(input_channel / self._groups)
self.group_output_channel = int(self.filters / self._groups)
self.group_kernel_shape = self.kernel_size + (self.group_input_channel,
self.group_output_channel)
self.kernel = []
self.bias = []
for g in range(self._groups):
self.kernel.append(
self.add_weight(
name='kernel_{}'.format(g),
shape=self.group_kernel_shape,
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint,
trainable=True,
dtype=self.dtype))
if self.use_bias:
self.bias.append(
self.add_weight(
name='bias_{}'.format(g),
shape=(self.group_output_channel,),
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint,
trainable=True,
dtype=self.dtype))
channel_axis = self._get_channel_axis()
self.input_spec = tf.keras.layers.InputSpec(
ndim=self.rank + 2, axes={channel_axis: input_channel})
self._build_conv_op_data_shape = input_shape[-(self.rank + 1):]
self._build_input_channel = input_channel
self._padding_op = self._get_padding_op()
# channels_last corresponds to 'NHWC' data format.
self._conv_op_data_format = 'NHWC'
self.bn_layers = []
if self.use_batch_norm:
for group_index in range(self._groups):
self.bn_layers.append(self.batch_norm_layer[group_index])
self.built = True
def call(self, inputs: Any, training: Optional[bool] = None) -> Any:
"""Performs the GroupConv2D operation on the inputs."""
input_slices = tf.split(inputs, num_or_size_splits=self._groups, axis=-1)
output_slices = []
for i in range(self._groups):
# Apply conv2d to each slice
output_slice = tf.nn.conv2d(
input_slices[i],
self.kernel[i],
strides=self.strides,
padding=self._padding_op,
data_format=self._conv_op_data_format,
dilations=self.dilation_rate)
if self.use_bias:
output_slice = tf.nn.bias_add(
output_slice, self.bias[i], data_format='NHWC')
# Apply batch norm after bias addition.
if self.use_batch_norm:
output_slice = self.bn_layers[i](output_slice, training=training)
if self.activation is not None:
output_slice = self.activation(output_slice)
output_slices.append(output_slice)
# Concat the outputs back along the channel dimension
outputs = tf.concat(output_slices, axis=-1)
return outputs
def get_config(self) -> MutableMapping[str, Any]:
"""Enables serialization for the group convolution layer."""
config = super().get_config()
config['groups'] = self._groups
config['batch_norm_layer'] = self.batch_norm_layer
config['bn_epsilon'] = self.bn_epsilon
config['bn_momentum'] = self.bn_momentum
return config
@classmethod
def from_config(cls, config):
"""Creates a layer from its config.
This method is the reverse of `get_config`, capable of instantiating the
same layer from the config dictionary. It does not handle layer connectivity
(handled by Network), nor weights (handled by `set_weights`).
Also, the get_config returns a config with a list type of `batch_norm_layer`
we need to convert it either to None or the batch_norm class.
Arguments:
config: A Python dictionary, typically the output of get_config.
Returns:
A layer instance.
"""
if not config['batch_norm_layer']:
config['batch_norm_layer'] = None
else:
config['batch_norm_layer'] = type(config['batch_norm_layer'][0])
return cls(**config)
class GroupConv2DKerasModel(tf.keras.Model):
"""2D group convolution as a keras model."""
def __init__(self,
filters: int,
kernel_size: Tuple[int, int],
groups: int,
batch_norm_layer: Optional[tf.keras.layers.Layer] = None,
bn_epsilon: float = 1e-3,
bn_momentum: float = 0.99,
data_format: str = 'channels_last',
padding: str = 'valid',
**kwargs: Any) -> tf.keras.Model:
"""Creates a 2D group convolution layer as a keras model.
Args:
filters: Integer, the dimensionality of the output space (i.e. the number
of output filters in the convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the height
and width of the 2D convolution window. Can be a single integer to
specify the same value for all spatial dimensions.
groups: The number of input/output channel groups.
batch_norm_layer: The batch normalization layer to use. This is typically
tf.keras.layer.BatchNormalization or a derived class.
bn_epsilon: Batch normalization epsilon.
bn_momentum: Momentum used for moving average in batch normalization.
data_format: The ordering of the dimensions in the inputs. `channels_last`
corresponds to inputs with shape `(batch_size, height, width, channels)`
padding: one of `"valid"` or `"same"` (case-insensitive).
**kwargs: Additional keyword arguments passed to the underlying conv
layers.
Raises:
ValueError: if groups < 1 or groups > filters
ValueError: if `batch_norm_layer` is not a callable when provided.
ValueError: if `data_format` is not channels_last
ValueError: if `padding` is not `same` or `valid`.
"""
super().__init__()
self.conv_layers = []
self.bn_layers = []
per_conv_filter_size = filters / groups
if groups <= 1 or groups >= filters:
raise ValueError('Number of groups should be greater than 1 and less '
'than the output filters.')
self.batch_norm_layer = batch_norm_layer
self.use_batch_norm = False
if self.batch_norm_layer is not None:
if not inspect.isclass(self.batch_norm_layer):
raise ValueError('batch_norm_layer is not a class.')
self.use_batch_norm = True
if 'activation' in kwargs.keys():
self.activation = tf.keras.activations.get(kwargs['activation'])
kwargs.pop('activation')
else:
self.activation = None
if data_format != 'channels_last':
raise ValueError(
'GroupConv2D expects input to be in channels_last format.')
if padding.lower() not in ('same', 'valid'):
raise ValueError('Valid padding options are : same, or valid.')
self._groups = groups
for _ in range(self._groups):
# Override the activation so that batchnorm can be applied after the conv.
self.conv_layers.append(
tf.keras.layers.Conv2D(per_conv_filter_size, kernel_size, **kwargs))
if self.use_batch_norm:
for _ in range(self._groups):
self.bn_layers.append(
self.batch_norm_layer(
axis=-1, momentum=bn_momentum, epsilon=bn_epsilon)) # pytype: disable=bad-return-type # typed-keras
def call(self, inputs: Any) -> Any:
"""Applies 2d group convolution on the inputs."""
input_shape = inputs.get_shape().as_list()
if input_shape[-1] % self._groups != 0:
raise ValueError(
f'Number of input channels: {input_shape[-1]} are not divisible '
f'by number of groups: {self._groups}.')
input_slices = tf.split(inputs, num_or_size_splits=self._groups, axis=-1)
output_slices = []
for g in range(self._groups):
output_slice = self.conv_layers[g](input_slices[g])
if self.use_batch_norm:
output_slice = self.bn_layers[g](output_slice)
output_slice = self.activation(output_slice)
output_slices.append(output_slice)
outputs = tf.concat(output_slices, axis=-1)
return outputs
def _nnapi_scalar(value, dtype):
# Resolves "Scalar operand should be constant" at cost of broadcasting
return tf.constant(value, dtype=dtype, shape=(1,))
def _fqop(x, min_val=-128, max_val=127):
"""Wraps an op x with fake quant op and given min/max."""
return tf.quantization.fake_quant_with_min_max_args(
x, min=min_val, max=max_val)
def argmax(input_tensor,
axis=-1,
output_type: tf.DType = tf.dtypes.float32,
name: Optional[str] = None,
keepdims: bool = False,
epsilon: Optional[float] = None):
"""Returns the index with the largest value across axes of a tensor.
Approximately tf.compat.v1.argmax, but not equivalent. If arithmetic allows
value to be anomalously close to the maximum, but not equal to it, the
behavior is undefined.
Args:
input_tensor: A Tensor.
axis: A Value. Must be in the range [-rank(input), rank(input)). Describes
which axis of the input Tensor to reduce across. For vectors, use axis =
0.
output_type: An optional tf.DType. Note that default is different from
tflite (int64) to make default behavior compatible with darwinn.
name: Optional name for operations.
keepdims: If true, retains reduced dimensions with length 1.
epsilon: Optional small number which is intended to be always below
quantization threshold, used to distinguish equal and not equal numbers.
Returns:
A Tensor of type output_type.
"""
fqop = _fqop if output_type.is_floating else tf.identity
safe_axis = axis
if safe_axis < 0:
safe_axis = len(input_tensor.shape) + safe_axis
reduction_size = input_tensor.shape[axis]
axis_max = tf.math.reduce_max(input_tensor, axis=axis, keepdims=True)
zero_if_max = tf.subtract(axis_max, input_tensor)
eps = epsilon if epsilon else 1e-6
if input_tensor.dtype.is_floating:
zero_if_max_else_eps = tf.math.minimum(
_nnapi_scalar(eps, input_tensor.dtype), zero_if_max)
zero_if_max_else_one = zero_if_max_else_eps * _nnapi_scalar(
1 / eps, input_tensor.dtype)
elif input_tensor.dtype.is_integer:
zero_if_max_else_one = tf.math.minimum(
_nnapi_scalar(1, input_tensor.dtype), zero_if_max)
else:
raise ValueError('Please specify epsilon for unknown input data type')
# Input type ends here, output type starts here
zero_if_max_else_one = tf.cast(zero_if_max_else_one, dtype=output_type)
zero_if_max_else_one = fqop(zero_if_max_else_one)
one_if_max_else_zero = fqop(
tf.math.subtract(
fqop(_nnapi_scalar(1, output_type)), zero_if_max_else_one))
rev_index = tf.range(reduction_size, 0, -1, dtype=output_type)
for index in range(safe_axis + 1, len(input_tensor.shape)):
rev_index = tf.expand_dims(rev_index, axis=index - safe_axis)
rev_index = fqop(rev_index)
rev_index_if_max_else_zero = fqop(
tf.math.multiply(one_if_max_else_zero, rev_index))
reverse_argmax = fqop(
tf.math.reduce_max(
rev_index_if_max_else_zero, axis=axis, keepdims=keepdims, name=name))
# Final operation obtains name if argmax layer if provided
return fqop(
tf.math.subtract(
fqop(_nnapi_scalar(reduction_size, output_type)),
reverse_argmax,
name=name))
class ArgmaxKerasLayer(tf.keras.layers.Layer):
"""Implements argmax as a keras model."""
def __init__(self,
axis=-1,
name=None,
output_type=tf.dtypes.int32,
**kwargs: Any) -> tf.keras.Model:
"""Implements argmax as a keras model.
Args:
axis: A Value. Must be in the range [-rank(input), rank(input)). Describes
which axis of the input Tensor to reduce across. For vectors, use axis =
0.
name: Optional name for operations.
output_type: An optional tf.DType.
**kwargs: Other arguments passed to model constructor.
Returns:
A Tensor of type output_type.
"""
super().__init__(name=name, **kwargs)
self.axis = axis
self.output_type = output_type # pytype: disable=bad-return-type # typed-keras
def call(self, inputs: Any) -> Any:
"""Applies argmax on the inputs."""
return argmax(
input_tensor=inputs,
axis=self.axis,
output_type=self.output_type,
name=self.name)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for custom_layers."""
import itertools
from absl.testing import parameterized
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import custom_layers
GROUPS = [2, 4]
INPUT_CHANNEL = [8, 16]
OUTPUT_CHANNEL = [8, 16]
USE_BATCH_NORM = [True, False]
ACTIVATION = ['relu', 'linear']
BATCH_NORM_LAYER = tf.keras.layers.BatchNormalization
# 2 functionally identical group conv implementations.
GROUP_CONV_IMPL = {
'layer': custom_layers.GroupConv2D,
'model': custom_layers.GroupConv2DKerasModel
}
def _get_random_inputs(input_shape):
return tf.random.uniform(shape=input_shape)
class GroupConv2DTest(tf.test.TestCase, parameterized.TestCase):
# Test for combinations of groups, input_channel, output_channel, and
# whether to use batch_norm
@parameterized.parameters(
itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL, USE_BATCH_NORM))
def test_construction(self, groups, input_channel, output_channel,
use_batch_norm):
batch_norm_layer = BATCH_NORM_LAYER if use_batch_norm else None
l = custom_layers.GroupConv2D(
output_channel,
3,
groups=groups,
use_bias=True,
batch_norm_layer=batch_norm_layer)
inputs = _get_random_inputs(input_shape=(1, 4, 4, output_channel))
_ = l(inputs)
# kernel and bias for each group. When using batch norm, 2 additional
# trainable weights per group for batchnorm layers: gamma and beta.
expected_num_trainable_weights = groups * (2 + 2 * use_batch_norm)
self.assertLen(l.trainable_weights, expected_num_trainable_weights)
@parameterized.parameters(
itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL))
def test_kernel_shapes(self, groups, input_channel, output_channel):
l = custom_layers.GroupConv2D(
output_channel, 3, groups=groups, use_bias=False)
_ = l(_get_random_inputs(input_shape=(1, 32, 32, input_channel)))
expected_kernel_shapes = [(3, 3, int(input_channel / groups),
int(output_channel / groups))
for _ in range(groups)]
kernel_shapes = [
l.trainable_weights[i].get_shape()
for i in range(len(l.trainable_weights))
]
self.assertListEqual(kernel_shapes, expected_kernel_shapes)
@parameterized.parameters(
itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL))
def test_output_shapes(self, groups, input_channel, output_channel):
l = custom_layers.GroupConv2D(
output_channel, 3, groups=groups, use_bias=False, padding='same')
outputs = l(_get_random_inputs(input_shape=[2, 32, 32, input_channel]))
self.assertListEqual(outputs.get_shape().as_list(),
[2, 32, 32, output_channel])
@parameterized.parameters(
itertools.product(GROUPS, USE_BATCH_NORM, ACTIVATION))
def test_serialization_deserialization(self, groups, use_batch_norm,
activation):
batch_norm_layer = BATCH_NORM_LAYER if use_batch_norm else None
l = custom_layers.GroupConv2D(
filters=8,
kernel_size=1,
groups=groups,
use_bias=False,
padding='same',
batch_norm_layer=batch_norm_layer,
activation=activation)
config = l.get_config()
# New layer from config
new_l = custom_layers.GroupConv2D.from_config(config)
# Copy the weights too.
l.build(input_shape=(1, 1, 4))
new_l.build(input_shape=(1, 1, 4))
new_l.set_weights(l.get_weights())
inputs = _get_random_inputs((1, 1, 1, 4))
self.assertNotEqual(l, new_l)
self.assertAllEqual(l(inputs), new_l(inputs))
@parameterized.parameters(
itertools.product(GROUPS, INPUT_CHANNEL, OUTPUT_CHANNEL, USE_BATCH_NORM,
ACTIVATION))
def test_equivalence(self, groups, input_channel, output_channel,
use_batch_norm, activation):
batch_norm_layer = BATCH_NORM_LAYER if use_batch_norm else None
kwargs = dict(
filters=output_channel,
groups=groups,
kernel_size=1,
use_bias=False,
batch_norm_layer=batch_norm_layer,
activation=activation)
gc_layer = tf.keras.Sequential([custom_layers.GroupConv2D(**kwargs)])
gc_model = custom_layers.GroupConv2DKerasModel(**kwargs)
gc_layer.build(input_shape=(None, 3, 3, input_channel))
gc_model.build(input_shape=(None, 3, 3, input_channel))
inputs = _get_random_inputs((2, 3, 3, input_channel))
gc_layer.set_weights(gc_model.get_weights())
self.assertAllEqual(gc_layer(inputs), gc_model(inputs))
@parameterized.parameters(('layer', 1, 4), ('layer', 4, 4), ('model', 1, 4),
('model', 4, 4))
def test_invalid_groups_raises_value_error(self, gc_type, groups,
output_channel):
with self.assertRaisesRegex(ValueError, r'^(Number of groups)'):
_ = GROUP_CONV_IMPL[gc_type](
filters=output_channel, groups=groups, kernel_size=3)
@parameterized.parameters(('layer', 3, 4), ('layer', 4, 6), ('model', 3, 4),
('model', 4, 6))
def test_non_group_divisible_raises_value_error(self, gc_type, groups,
input_channel):
with self.assertRaisesRegex(ValueError, r'^(Number of input channels)'):
l = GROUP_CONV_IMPL[gc_type](
filters=groups * 4, groups=groups, kernel_size=3)
l.build(input_shape=(4, 4, input_channel))
@parameterized.parameters(('layer'), ('model'))
def test_non_supported_data_format_raises_value_error(self, gc_type):
with self.assertRaisesRegex(ValueError, r'^(.*(channels_last).*)'):
_ = GROUP_CONV_IMPL[gc_type](
filters=4, groups=2, kernel_size=1, data_format='channels_first')
@parameterized.parameters(('layer'), ('model'))
def test_invalid_batch_norm_raises_value_error(self, gc_type):
def my_batch_norm(x):
return x**2
with self.assertRaisesRegex(ValueError, r'^(.*(not a class).*)'):
_ = GROUP_CONV_IMPL[gc_type](
filters=4, groups=2, kernel_size=1, batch_norm_layer=my_batch_norm)
@parameterized.parameters(('layer'), ('model'))
def test_invalid_padding_raises_value_error(self, gc_type):
with self.assertRaisesRegex(ValueError, r'^(.*(same, or valid).*)'):
_ = GROUP_CONV_IMPL[gc_type](
filters=4, groups=2, kernel_size=1, padding='causal')
class ArgmaxTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(([16, 32, 64], tf.dtypes.float32, tf.dtypes.int32),
([255, 19], tf.dtypes.int32, tf.dtypes.int64))
def test_reference_match(self, shape, input_type, output_type):
random_inputs = tf.random.uniform(shape=shape, maxval=10, dtype=input_type)
for axis in range(-len(shape) + 1, len(shape)):
control_output = tf.math.argmax(
random_inputs, axis=axis, output_type=output_type)
test_output = custom_layers.argmax(
random_inputs, axis=axis, output_type=output_type)
self.assertAllEqual(control_output, test_output)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the definitions of Bi-Directional Feature Pyramid Networks (BiFPN)."""
import functools
import itertools
from typing import Text, Optional
# Import libraries
from absl import logging
import numpy as np
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import common_modules
def activation_fn(features: tf.Tensor, act_type: Text):
"""Customized non-linear activation type."""
if act_type in ('silu', 'swish'):
return tf.nn.swish(features)
elif act_type == 'swish_native':
return features * tf.sigmoid(features)
elif act_type == 'hswish':
return features * tf.nn.relu6(features + 3) / 6
elif act_type == 'relu':
return tf.nn.relu(features)
elif act_type == 'relu6':
return tf.nn.relu6(features)
else:
raise ValueError('Unsupported act_type {}'.format(act_type))
def build_batch_norm(is_training_bn: bool,
beta_initializer: Text = 'zeros',
gamma_initializer: Text = 'ones',
data_format: Text = 'channels_last',
momentum: float = 0.99,
epsilon: float = 1e-3,
strategy: Optional[Text] = None,
name: Text = 'tpu_batch_normalization'):
"""Builds a batch normalization layer.
Args:
is_training_bn: `bool` for whether the model is training.
beta_initializer: `str`, beta initializer.
gamma_initializer: `str`, gamma initializer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
momentum: `float`, momentume of batch norm.
epsilon: `float`, small value for numerical stability.
strategy: `str`, whether to use tpu, gpus or other version of batch norm.
name: the name of the batch normalization layer
Returns:
A normalized `Tensor` with the same `data_format`.
"""
axis = 1 if data_format == 'channels_first' else -1
if is_training_bn:
batch_norm_class = common_modules.get_batch_norm(strategy)
else:
batch_norm_class = tf.keras.layers.BatchNormalization
bn_layer = batch_norm_class(
axis=axis,
momentum=momentum,
epsilon=epsilon,
center=True,
scale=True,
beta_initializer=beta_initializer,
gamma_initializer=gamma_initializer,
name=name)
return bn_layer
def bifpn_config(min_level, max_level):
"""A dynamic bifpn config that can adapt to different min/max levels."""
p = {}
# Node id starts from the input features and monotonically increase whenever
# a new node is added. Here is an example for level P3 - P7:
# P7 (4) P7" (12)
# P6 (3) P6' (5) P6" (11)
# P5 (2) P5' (6) P5" (10)
# P4 (1) P4' (7) P4" (9)
# P3 (0) P3" (8)
# So output would be like:
# [
# {'feat_level': 6, 'inputs_offsets': [3, 4]}, # for P6'
# {'feat_level': 5, 'inputs_offsets': [2, 5]}, # for P5'
# {'feat_level': 4, 'inputs_offsets': [1, 6]}, # for P4'
# {'feat_level': 3, 'inputs_offsets': [0, 7]}, # for P3"
# {'feat_level': 4, 'inputs_offsets': [1, 7, 8]}, # for P4"
# {'feat_level': 5, 'inputs_offsets': [2, 6, 9]}, # for P5"
# {'feat_level': 6, 'inputs_offsets': [3, 5, 10]}, # for P6"
# {'feat_level': 7, 'inputs_offsets': [4, 11]}, # for P7"
# ]
num_levels = max_level - min_level + 1
node_ids = {min_level + i: [i] for i in range(num_levels)}
level_last_id = lambda level: node_ids[level][-1]
level_all_ids = lambda level: node_ids[level]
id_cnt = itertools.count(num_levels)
p['nodes'] = []
for i in range(max_level - 1, min_level - 1, -1):
# top-down path.
p['nodes'].append({
'feat_level': i,
'inputs_offsets': [level_last_id(i),
level_last_id(i + 1)]
})
node_ids[i].append(next(id_cnt))
for i in range(min_level + 1, max_level + 1):
# bottom-up path.
p['nodes'].append({
'feat_level': i,
'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)]
})
node_ids[i].append(next(id_cnt))
return p
def get_conv_op(conv_type):
"""Gets convlution op."""
kernel_size = int(conv_type.split('_')[-1])
if conv_type.startswith('sep'):
conv_op = functools.partial(
tf.keras.layers.SeparableConv2D,
depth_multiplier=1,
kernel_size=(kernel_size, kernel_size))
elif conv_type.startswith('conv'):
conv_op = functools.partial(
tf.keras.layers.Conv2D, kernel_size=(kernel_size, kernel_size))
else:
raise ValueError('Unknown conv type: {}'.format(conv_type))
return conv_op
def add_n(nodes):
"""A customized add_n to add up a list of tensors."""
# tf.add_n is not supported by EdgeTPU, while tf.reduce_sum is not supported
# by GPU and runs slow on EdgeTPU because of the 5-dimension op.
with tf.name_scope('add_n'):
new_node = nodes[0]
for n in nodes[1:]:
new_node = new_node + n
return new_node
def resize_nearest_neighbor(data, height_scale, width_scale):
"""Nearest neighbor upsampling implementation."""
with tf.name_scope('nearest_upsampling'):
bs, h, w, c = data.get_shape().as_list()
bs = -1 if bs is None else bs
# Use reshape to quickly upsample the input. The nearest pixel is selected
# implicitly via broadcasting.
data = tf.reshape(data, [bs, h, 1, w, 1, c]) * tf.ones(
[1, 1, height_scale, 1, width_scale, 1], dtype=data.dtype)
return tf.reshape(data, [bs, h * height_scale, w * width_scale, c])
def resize(feat,
target_height,
target_width,
strategy,
training=False,
method='bilinear'):
"""Resizes the spitial dimensions."""
dtype = feat.dtype
feat_shape = feat.get_shape()
if method == 'bilinear':
if strategy == 'tpu' and training:
if dtype == tf.bfloat16:
feat = tf.cast(feat, tf.float32)
feat = tf.image.resize(feat, [target_height, target_width])
feat = tf.cast(feat, dtype)
elif feat_shape.is_fully_defined():
# Batch dimension is known. Mimic resize[h,w] with
# resize[h,1]+resize[1,w] to reduce HBM padding.
b, h, w, c = feat_shape.as_list()
feat = tf.reshape(feat, [b, h, 1, -1])
feat = tf.image.resize(feat, [target_height, 1])
feat = tf.reshape(feat, [-1, 1, w, c])
feat = tf.image.resize(feat, [1, target_width])
feat = tf.reshape(feat, [b, target_height, target_width, c])
else:
feat = tf.image.resize(feat, [target_height, target_width])
else:
feat = tf.image.resize(feat, [target_height, target_width])
elif method == 'nearest':
_, h, w, _ = feat_shape.as_list()
if training and target_height % h == 0 and target_width % w == 0:
feat = resize_nearest_neighbor(feat, target_height // h,
target_width // w)
else:
feat = tf.cast(feat, tf.float32)
feat = tf.image.resize(feat, [target_height, target_width],
tf.image.ResizeMethod.NEAREST_NEIGHBOR)
else:
raise ValueError('Upsampling type {} is not supported.'.format(method))
return tf.cast(feat, dtype)
class ResampleFeatureMap(tf.keras.layers.Layer):
"""Resamples feature map for downsampling or upsampling."""
def __init__(self,
feat_level,
target_num_channels,
apply_bn=False,
is_training_bn=None,
conv_after_downsample=False,
strategy=None,
data_format=None,
pooling_type=None,
upsampling_type=None,
name='resample_p0'):
super().__init__(name=name)
self.apply_bn = apply_bn
self.is_training_bn = is_training_bn
self.data_format = data_format
self.target_num_channels = target_num_channels
self.feat_level = feat_level
self.strategy = strategy
self.conv_after_downsample = conv_after_downsample
self.pooling_type = pooling_type or 'max'
self.upsampling_type = upsampling_type or 'nearest'
def _pool2d(self, inputs, height, width, target_height, target_width):
"""Pools the inputs to target height and width."""
height_stride_size = int((height - 1) // target_height + 1)
width_stride_size = int((width - 1) // target_width + 1)
if self.pooling_type == 'max':
return tf.keras.layers.MaxPooling2D(
pool_size=[height_stride_size + 1, width_stride_size + 1],
strides=[height_stride_size, width_stride_size],
padding='SAME',
data_format=self.data_format)(
inputs)
if self.pooling_type == 'avg':
return tf.keras.layers.AveragePooling2D(
pool_size=[height_stride_size + 1, width_stride_size + 1],
strides=[height_stride_size, width_stride_size],
padding='SAME',
data_format=self.data_format)(
inputs)
raise ValueError('Unsupported pooling type {}.'.format(self.pooling_type))
def _upsample2d(self, inputs, target_height, target_width, training):
return resize(inputs, target_height, target_width, self.strategy, training,
self.upsampling_type)
def _maybe_apply_1x1(self, feat, training, num_channels):
"""Applies 1x1 conv to change layer width if necessary."""
target_num_channels = self.target_num_channels
if target_num_channels is None or num_channels != target_num_channels:
feat = self.conv2d(feat)
if self.apply_bn:
feat = self.bn(feat, training=training)
return feat
def build(self, feat_shape):
num_channels = self.target_num_channels or feat_shape[-1]
self.conv2d = tf.keras.layers.Conv2D(
num_channels, (1, 1),
padding='same',
data_format=self.data_format,
name='conv2d')
self.bn = build_batch_norm(
is_training_bn=self.is_training_bn,
data_format=self.data_format,
strategy=self.strategy,
name='bn')
self.built = True
super().build(feat_shape)
def call(self, feat, training, all_feats):
hwc_idx = (2, 3, 1) if self.data_format == 'channels_first' else (1, 2, 3)
height, width, num_channels = [feat.shape.as_list()[i] for i in hwc_idx]
if all_feats:
target_feat_shape = all_feats[self.feat_level].shape.as_list()
target_height, target_width, _ = [target_feat_shape[i] for i in hwc_idx]
else:
# Default to downsampling if all_feats is empty.
target_height, target_width = (height + 1) // 2, (width + 1) // 2
# If conv_after_downsample is True, when downsampling, apply 1x1 after
# downsampling for efficiency.
if height > target_height and width > target_width:
if not self.conv_after_downsample:
feat = self._maybe_apply_1x1(feat, training, num_channels)
feat = self._pool2d(feat, height, width, target_height, target_width)
if self.conv_after_downsample:
feat = self._maybe_apply_1x1(feat, training, num_channels)
elif height <= target_height and width <= target_width:
feat = self._maybe_apply_1x1(feat, training, num_channels)
if height < target_height or width < target_width:
feat = self._upsample2d(feat, target_height, target_width, training)
else:
raise ValueError(
'Incompatible Resampling : feat shape {}x{} target_shape: {}x{}'
.format(height, width, target_height, target_width))
return feat
class FNode(tf.keras.layers.Layer):
"""A Keras Layer implementing BiFPN Node."""
def __init__(self,
feat_level,
inputs_offsets,
fpn_num_filters,
apply_bn_for_resampling,
is_training_bn,
conv_after_downsample,
conv_bn_act_pattern,
conv_type,
act_type,
strategy,
weight_method,
data_format,
pooling_type,
upsampling_type,
name='fnode'):
super().__init__(name=name)
self.feat_level = feat_level
self.inputs_offsets = inputs_offsets
self.fpn_num_filters = fpn_num_filters
self.apply_bn_for_resampling = apply_bn_for_resampling
self.conv_type = conv_type
self.act_type = act_type
self.is_training_bn = is_training_bn
self.conv_after_downsample = conv_after_downsample
self.strategy = strategy
self.data_format = data_format
self.weight_method = weight_method
self.conv_bn_act_pattern = conv_bn_act_pattern
self.pooling_type = pooling_type
self.upsampling_type = upsampling_type
self.resample_layers = []
self.vars = []
def fuse_features(self, nodes):
"""Fuses features from different resolutions and return a weighted sum.
Args:
nodes: a list of tensorflow features at different levels
Returns:
A tensor denoting the fused feature.
"""
dtype = nodes[0].dtype
if self.weight_method == 'attn':
edge_weights = [tf.cast(var, dtype=dtype) for var in self.vars]
normalized_weights = tf.nn.softmax(tf.stack(edge_weights))
nodes = tf.stack(nodes, axis=-1)
new_node = tf.reduce_sum(nodes * normalized_weights, -1)
elif self.weight_method == 'fastattn':
edge_weights = [
tf.nn.relu(tf.cast(var, dtype=dtype)) for var in self.vars
]
weights_sum = add_n(edge_weights)
nodes = [
nodes[i] * edge_weights[i] / (weights_sum + 0.0001)
for i in range(len(nodes))
]
new_node = add_n(nodes)
elif self.weight_method == 'channel_attn':
edge_weights = [tf.cast(var, dtype=dtype) for var in self.vars]
normalized_weights = tf.nn.softmax(tf.stack(edge_weights, -1), axis=-1)
nodes = tf.stack(nodes, axis=-1)
new_node = tf.reduce_sum(nodes * normalized_weights, -1)
elif self.weight_method == 'channel_fastattn':
edge_weights = [
tf.nn.relu(tf.cast(var, dtype=dtype)) for var in self.vars
]
weights_sum = add_n(edge_weights)
nodes = [
nodes[i] * edge_weights[i] / (weights_sum + 0.0001)
for i in range(len(nodes))
]
new_node = add_n(nodes)
elif self.weight_method == 'sum':
new_node = add_n(nodes)
else:
raise ValueError('unknown weight_method %s' % self.weight_method)
return new_node
def _add_wsm(self, initializer, shape=None):
for i, _ in enumerate(self.inputs_offsets):
name = 'WSM' + ('' if i == 0 else '_' + str(i))
self.vars.append(
self.add_weight(initializer=initializer, name=name, shape=shape))
def build(self, feats_shape):
for i, input_offset in enumerate(self.inputs_offsets):
name = 'resample_{}_{}_{}'.format(i, input_offset, len(feats_shape))
self.resample_layers.append(
ResampleFeatureMap(
self.feat_level,
self.fpn_num_filters,
self.apply_bn_for_resampling,
self.is_training_bn,
self.conv_after_downsample,
strategy=self.strategy,
data_format=self.data_format,
pooling_type=self.pooling_type,
upsampling_type=self.upsampling_type,
name=name))
if self.weight_method == 'attn':
self._add_wsm('ones')
elif self.weight_method == 'fastattn':
self._add_wsm('ones')
elif self.weight_method == 'channel_attn':
num_filters = int(self.fpn_num_filters)
self._add_wsm(tf.ones, num_filters)
elif self.weight_method == 'channel_fastattn':
num_filters = int(self.fpn_num_filters)
self._add_wsm(tf.ones, num_filters)
self.op_after_combine = OpAfterCombine(
self.is_training_bn,
self.conv_bn_act_pattern,
self.conv_type,
self.fpn_num_filters,
self.act_type,
self.data_format,
self.strategy,
name='op_after_combine{}'.format(len(feats_shape)))
self.built = True
super().build(feats_shape)
def call(self, feats, training):
nodes = []
for i, input_offset in enumerate(self.inputs_offsets):
input_node = feats[input_offset]
input_node = self.resample_layers[i](input_node, training, feats)
nodes.append(input_node)
new_node = self.fuse_features(nodes)
new_node = self.op_after_combine(new_node)
return feats + [new_node]
class OpAfterCombine(tf.keras.layers.Layer):
"""Operation after combining input features during feature fusiong."""
def __init__(self,
is_training_bn,
conv_bn_act_pattern,
conv_type,
fpn_num_filters,
act_type,
data_format,
strategy,
name='op_after_combine'):
super().__init__(name=name)
self.conv_bn_act_pattern = conv_bn_act_pattern
self.fpn_num_filters = fpn_num_filters
self.act_type = act_type
self.data_format = data_format
self.strategy = strategy
self.is_training_bn = is_training_bn
self.conv_op = get_conv_op(conv_type)(
filters=fpn_num_filters,
padding='same',
use_bias=not self.conv_bn_act_pattern,
data_format=self.data_format,
name='conv')
self.bn = build_batch_norm(
is_training_bn=self.is_training_bn,
data_format=self.data_format,
strategy=self.strategy,
name='bn')
def call(self, new_node, training):
if not self.conv_bn_act_pattern:
new_node = activation_fn(new_node, self.act_type)
new_node = self.conv_op(new_node)
new_node = self.bn(new_node, training=training)
if self.conv_bn_act_pattern:
new_node = activation_fn(new_node, self.act_type)
return new_node
class FPNCells(tf.keras.layers.Layer):
"""FPN cells."""
def __init__(self,
min_level=3,
max_level=8,
fpn_num_filters=96,
apply_bn_for_resampling=True,
is_training_bn=True,
conv_after_downsample=True,
conv_bn_act_pattern=True,
conv_type='sep_3',
act_type='swish',
strategy='tpu',
fpn_weight_method='sum',
data_format='channels_last',
pooling_type='avg',
upsampling_type='bilinear',
fpn_name='bifpn',
fpn_cell_repeats=4,
**kwargs):
super(FPNCells, self).__init__(**kwargs)
self.min_level = min_level
self.max_level = max_level
if fpn_name != 'bifpn':
raise ValueError('Only bifpn config is supported.')
self.fpn_config = bifpn_config(min_level, max_level)
self.cells = [
FPNCell( # pylint: disable=g-complex-comprehension
min_level=min_level,
max_level=max_level,
fpn_num_filters=fpn_num_filters,
apply_bn_for_resampling=apply_bn_for_resampling,
is_training_bn=is_training_bn,
conv_after_downsample=conv_after_downsample,
conv_bn_act_pattern=conv_bn_act_pattern,
conv_type=conv_type,
act_type=act_type,
strategy=strategy,
fpn_weight_method=fpn_weight_method,
data_format=data_format,
pooling_type=pooling_type,
upsampling_type=upsampling_type,
fpn_name=fpn_name,
name='cell_%d' % rep) for rep in range(fpn_cell_repeats)
]
def call(self, feats, training):
"""Model call function."""
for cell in self.cells:
cell_feats = cell(feats, training)
min_level = self.min_level
max_level = self.max_level
feats = []
for level in range(min_level, max_level + 1):
for i, fnode in enumerate(reversed(self.fpn_config['nodes'])):
if fnode['feat_level'] == level:
feats.append(cell_feats[-1 - i])
break
return feats
class FPNCell(tf.keras.layers.Layer):
"""A single FPN cell."""
def __init__(self,
min_level=3,
max_level=7,
fpn_num_filters=80,
apply_bn_for_resampling=True,
is_training_bn=True,
conv_after_downsample=True,
conv_bn_act_pattern=True,
conv_type='sep_3',
act_type='swish',
strategy='tpu',
fpn_weight_method='sum',
data_format='channels_last',
pooling_type='avg',
upsampling_type='bilinear',
fpn_name='bifpn',
name='fpn_cell',
**kwargs):
super(FPNCell, self).__init__(**kwargs)
if fpn_name != 'bifpn':
raise ValueError('Only bifpn config is supported')
self.fpn_config = bifpn_config(min_level, max_level)
self.fnodes = []
for i, fnode_cfg in enumerate(self.fpn_config['nodes']):
logging.info('fnode %d : %s', i, fnode_cfg)
fnode = FNode(
fnode_cfg['feat_level'] - min_level,
fnode_cfg['inputs_offsets'],
fpn_num_filters=fpn_num_filters,
apply_bn_for_resampling=apply_bn_for_resampling,
is_training_bn=is_training_bn,
conv_after_downsample=conv_after_downsample,
conv_bn_act_pattern=conv_bn_act_pattern,
conv_type=conv_type,
act_type=act_type,
strategy=strategy,
weight_method=fpn_weight_method,
data_format=data_format,
pooling_type=pooling_type,
upsampling_type=upsampling_type,
name='fnode%d' % i)
self.fnodes.append(fnode)
def call(self, feats, training):
def _call(feats):
for fnode in self.fnodes:
feats = fnode(feats, training)
return feats
return _call(feats)
class SegClassNet(tf.keras.layers.Layer):
"""Segmentation class prediction network."""
def __init__(self,
min_level=3,
max_level=7,
output_filters=256,
apply_bn_for_resampling=True,
is_training_bn=True,
conv_after_downsample=True,
conv_bn_act_pattern=True,
head_conv_type='sep_3',
act_type='swish',
strategy='tpu',
output_weight_method='attn',
data_format='channels_last',
pooling_type='avg',
upsampling_type='bilinear',
fullres_output=False,
fullres_skip_connections=False,
num_classes=32,
name='seg_class_net'):
"""Initialize the SegClassNet.
Args:
min_level: minimum feature level to use in the head.
max_level: maximum feature level to use in the head.
output_filters: output filter size.
apply_bn_for_resampling:
whether to apply batch normalization for resampling.
is_training_bn: is training mode.
conv_after_downsample: whether to apply conv after downsample.
conv_bn_act_pattern: conv batch norm activation pattern.
head_conv_type: head convolution type.
act_type: activation type.
strategy: device strategy, eg. tpu.
output_weight_method: output weight method.
data_format: data format.
pooling_type: pooling type.
upsampling_type: upsamplihng type.
fullres_output: full resolution output.
fullres_skip_connections: full resolution skip connection.
num_classes: number of classes.
name: the name of this layer.
"""
super().__init__(name=name)
conv2d_layer = get_conv_op(head_conv_type)
self.min_level = min_level
self.max_level = max_level
self.fullres_output = fullres_output
self.fullres_conv_transpose = fullres_skip_connections
self.fnode = FNode(
0, # Always use the first level with highest resolution.
list(range(max_level - min_level + 1)),
output_filters,
apply_bn_for_resampling,
is_training_bn,
conv_after_downsample,
conv_bn_act_pattern,
head_conv_type,
act_type,
strategy,
output_weight_method,
data_format,
pooling_type,
upsampling_type,
name='seg_class_fusion')
if fullres_output:
self.fullres_conv_transpose = {}
self.fullres_conv = {}
for i in reversed(range(min_level)):
num_filters = min(num_classes * 2**(i + 1),
output_filters)
self.fullres_conv[str(i)] = conv2d_layer(
filters=num_filters,
data_format=data_format,
kernel_size=3,
strides=1,
padding='same',
activation=act_type,
name='fullres_conv_%d' % i)
self.fullres_conv_transpose[str(i)] = tf.keras.layers.Conv2DTranspose(
filters=num_filters,
data_format=data_format,
kernel_size=3,
strides=2,
padding='same',
activation=act_type,
name='fullres_conv_transpose_%d' % i)
self.classes = conv2d_layer(
num_classes,
bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)),
padding='same',
name='seg-class-predict')
def call(self, inputs, backbone_feats, training):
"""Call SegClassNet."""
seg_output = self.fnode(inputs, training)
net = seg_output[-1]
if self.fullres_output:
for i in reversed(range(self.min_level)):
if self.config.fullres_skip_connections:
net = tf.keras.layers.Concatenate()([net, backbone_feats[i + 1]])
net = self.fullres_conv[str(i)](net)
net = self.fullres_conv_transpose[str(i)](net)
class_outputs = self.classes(net)
return class_outputs
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for MobilenetEdgeTPU image classification models."""
from typing import Any, Dict, Optional, Text
# Import libraries
from absl import logging
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model_blocks
ModelConfig = mobilenet_edgetpu_v1_model_blocks.ModelConfig
MODEL_CONFIGS = {
# (width, depth, resolution, dropout)
'mobilenet_edgetpu': ModelConfig.from_args(1.0, 1.0, 224, 0.1),
'mobilenet_edgetpu_dm1p25': ModelConfig.from_args(1.25, 1.0, 224, 0.1),
'mobilenet_edgetpu_dm1p5': ModelConfig.from_args(1.5, 1.0, 224, 0.1),
'mobilenet_edgetpu_dm1p75': ModelConfig.from_args(1.75, 1.0, 224, 0.1)
}
@tf.keras.utils.register_keras_serializable(package='Vision')
class MobilenetEdgeTPU(tf.keras.Model):
"""Wrapper class for a MobilenetEdgeTPU Keras model.
Contains helper methods to build, manage, and save metadata about the model.
"""
def __init__(self,
config: Optional[ModelConfig] = None,
overrides: Optional[Dict[Text, Any]] = None):
"""Create a MobilenetEdgeTPU model.
Args:
config: (optional) the main model parameters to create the model
overrides: (optional) a dict containing keys that can override config
"""
overrides = overrides or {}
config = config or ModelConfig()
self.config = config.replace(**overrides)
input_channels = self.config.input_channels
model_name = self.config.model_name
if isinstance(self.config.resolution, tuple):
input_shape = (self.config.resolution[0], self.config.resolution[1],
input_channels)
else:
input_shape = (self.config.resolution, self.config.resolution,
input_channels)
image_input = tf.keras.layers.Input(shape=input_shape)
output = mobilenet_edgetpu_v1_model_blocks.mobilenet_edgetpu(
image_input, self.config)
if not isinstance(output, dict):
# Cast to float32 in case we have a different model dtype
output = tf.cast(output, tf.float32)
self._output_specs = output.get_shape()
else:
self._output_specs = {
feature: output[feature].get_shape() for feature in output
}
logging.info('Building model %s with params %s',
model_name,
self.config)
super(MobilenetEdgeTPU, self).__init__(
inputs=image_input, outputs=output, name=model_name)
@classmethod
def from_name(cls,
model_name: str,
model_weights_path: Optional[str] = None,
checkpoint_format: Optional[str] = 'tf_checkpoint',
overrides: Optional[Dict[str, Any]] = None):
"""Construct an MobilenetEdgeTPU model from a predefined model name.
E.g., `MobilenetEdgeTPU.from_name('mobilenet_edgetpu')`.
Args:
model_name: the predefined model name
model_weights_path: the path to the weights (h5 file or saved model dir)
checkpoint_format: the model weights format. One of 'tf_checkpoint' or
'keras_checkpoint'.
overrides: (optional) a dict containing keys that can override config
Returns:
A constructed EfficientNet instance.
"""
model_configs = dict(MODEL_CONFIGS)
overrides = dict(overrides) if overrides else {}
# One can define their own custom models if necessary
model_configs.update(overrides.pop('model_config', {}))
if model_name not in model_configs:
raise ValueError('Unknown model name {}'.format(model_name))
config = model_configs[model_name]
model = cls(config=config, overrides=overrides)
if model_weights_path:
common_modules.load_weights(model,
model_weights_path,
checkpoint_format=checkpoint_format)
return model
@property
def output_specs(self):
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for MobilenetEdgeTPU image classification models."""
import dataclasses
import math
from typing import Any, Optional, Tuple, Union
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import tf_utils
from official.modeling.hyperparams import base_config
from official.projects.edgetpu.vision.modeling import common_modules
@dataclasses.dataclass
class BlockConfig(base_config.Config):
"""Config for a single MB Conv Block."""
input_filters: int = 0
output_filters: int = 0
kernel_size: int = 3
num_repeat: int = 1
expand_ratio: int = 1
strides: Tuple[int, int] = (1, 1)
se_ratio: Optional[float] = None
id_skip: bool = True
fused_conv: bool = False
conv_type: str = 'depthwise'
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""Default Config for MobilenetEdgeTPU."""
width_coefficient: float = 1.0
depth_coefficient: float = 1.0
resolution: Union[int, Tuple[int, int]] = 224
dropout_rate: float = 0.1
blocks: Tuple[BlockConfig, ...] = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
# pylint: disable=bad-whitespace
BlockConfig.from_args(32, 16, 3, 1, 1, (1, 1), conv_type='no_depthwise'),
BlockConfig.from_args(16, 32, 3, 1, 8, (2, 2), fused_conv=True),
BlockConfig.from_args(32, 32, 3, 3, 4, (1, 1), conv_type='no_depthwise'),
BlockConfig.from_args(32, 48, 3, 1, 8, (2, 2), fused_conv=True),
BlockConfig.from_args(48, 48, 3, 3, 4, (1, 1), conv_type='no_depthwise'),
BlockConfig.from_args(48, 96, 3, 1, 8, (2, 2)),
BlockConfig.from_args(96, 96, 3, 3, 4, (1, 1)),
BlockConfig.from_args(96, 96, 3, 1, 8, (1, 1), id_skip=False),
BlockConfig.from_args(96, 96, 3, 3, 4, (1, 1)),
BlockConfig.from_args(96, 160, 5, 1, 8, (2, 2)),
BlockConfig.from_args(160, 160, 5, 3, 4, (1, 1)),
BlockConfig.from_args(160, 192, 3, 1, 8, (1, 1)),
# pylint: enable=bad-whitespace
)
stem_base_filters: int = 32
top_base_filters: int = 1280
activation: str = 'relu'
batch_norm: str = 'default'
bn_momentum: float = 0.99
bn_epsilon: float = 1e-3
# While the original implementation used a weight decay of 1e-5,
# tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
weight_decay: float = 5e-6
drop_connect_rate: float = 0.1
depth_divisor: int = 8
min_depth: Optional[int] = None
# No Squeeze/Excite for MobilenetEdgeTPU
use_se: bool = False
input_channels: int = 3
num_classes: int = 1001
model_name: str = 'mobilenet_edgetpu'
rescale_input: bool = False
data_format: str = 'channels_last'
dtype: str = 'float32'
backbone_only: bool = False
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
# Note: this is a truncated normal distribution
'distribution': 'normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1 / 3.0,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
# TODO(longy): Reuse the utility functions for V1/V2 models.
def round_filters(filters: int,
config: ModelConfig) -> int:
"""Round number of filters based on width coefficient."""
width_coefficient = config.width_coefficient
min_depth = config.min_depth
divisor = config.depth_divisor
orig_filters = filters
if not width_coefficient:
return filters
filters *= width_coefficient
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
return int(new_filters)
def round_repeats(repeats: int, depth_coefficient: float) -> int:
"""Round number of repeats based on depth coefficient."""
return int(math.ceil(depth_coefficient * repeats))
def conv2d_block(inputs: tf.Tensor,
conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Any = None,
depthwise: bool = False,
name: Optional[str] = None):
"""A conv2d followed by batch norm and an activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm)
bn_momentum = config.bn_momentum
bn_epsilon = config.bn_epsilon
data_format = tf.keras.backend.image_data_format()
weight_decay = config.weight_decay
name = name or ''
# Collect args based on what kind of conv2d block is desired
init_kwargs = {
'kernel_size': kernel_size,
'strides': strides,
'use_bias': use_bias,
'padding': 'same',
'name': name + '_conv2d',
'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
}
if depthwise:
conv2d = tf.keras.layers.DepthwiseConv2D
init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
else:
conv2d = tf.keras.layers.Conv2D
init_kwargs.update({'filters': conv_filters,
'kernel_initializer': CONV_KERNEL_INITIALIZER})
x = conv2d(**init_kwargs)(inputs)
if use_batch_norm:
bn_axis = 1 if data_format == 'channels_first' else -1
x = batch_norm(axis=bn_axis,
momentum=bn_momentum,
epsilon=bn_epsilon,
name=name + '_bn')(x)
if activation is not None:
x = tf.keras.layers.Activation(activation,
name=name + '_activation')(x)
return x
def mb_conv_block(inputs: tf.Tensor,
block: BlockConfig,
config: ModelConfig,
prefix: Optional[str] = None):
"""Mobile Inverted Residual Bottleneck.
Args:
inputs: the Keras input to the block
block: BlockConfig, arguments to create a Block
config: ModelConfig, a set of model parameters
prefix: prefix for naming all layers
Returns:
the output of the block
"""
use_se = config.use_se
activation = tf_utils.get_activation(config.activation)
drop_connect_rate = config.drop_connect_rate
data_format = tf.keras.backend.image_data_format()
use_depthwise = block.conv_type == 'depthwise'
prefix = prefix or ''
filters = block.input_filters * block.expand_ratio
x = inputs
if block.fused_conv:
# If we use fused mbconv, skip expansion and use regular conv.
x = conv2d_block(x,
filters,
config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
name=prefix + 'fused')
else:
if block.expand_ratio != 1:
# Expansion phase
kernel_size = (1, 1) if use_depthwise else (3, 3)
x = conv2d_block(x,
filters,
config,
kernel_size=kernel_size,
activation=activation,
name=prefix + 'expand')
# Depthwise Convolution
if use_depthwise:
x = conv2d_block(x,
conv_filters=None,
config=config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
depthwise=True,
name=prefix + 'depthwise')
# Squeeze and Excitation phase
if use_se:
assert block.se_ratio is not None
assert 0 < block.se_ratio <= 1
num_reduced_filters = max(1, int(
block.input_filters * block.se_ratio
))
if data_format == 'channels_first':
se_shape = (filters, 1, 1)
else:
se_shape = (1, 1, filters)
se = tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x)
se = tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')(se)
se = conv2d_block(se,
num_reduced_filters,
config,
use_bias=True,
use_batch_norm=False,
activation=activation,
name=prefix + 'se_reduce')
se = conv2d_block(se,
filters,
config,
use_bias=True,
use_batch_norm=False,
activation='sigmoid',
name=prefix + 'se_expand')
x = tf.keras.layers.multiply([x, se], name=prefix + 'se_excite')
# Output phase
x = conv2d_block(x,
block.output_filters,
config,
activation=None,
name=prefix + 'project')
# Add identity so that quantization-aware training can insert quantization
# ops correctly.
x = tf.keras.layers.Activation('linear', name=prefix + 'id')(x)
if (block.id_skip
and all(s == 1 for s in block.strides)
and block.input_filters == block.output_filters):
if drop_connect_rate and drop_connect_rate > 0:
# Apply dropconnect
# The only difference between dropout and dropconnect in TF is scaling by
# drop_connect_rate during training. See:
# https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
x = tf.keras.layers.Dropout(drop_connect_rate,
noise_shape=(None, 1, 1, 1),
name=prefix + 'drop')(x)
x = tf.keras.layers.add([x, inputs], name=prefix + 'add')
return x
def mobilenet_edgetpu(image_input: tf.keras.layers.Input, config: ModelConfig): # pytype: disable=invalid-annotation # typed-keras
"""Creates a MobilenetEdgeTPU graph given the model parameters.
This function is wrapped by the `MobilenetEdgeTPU` class to make a
tf.keras.Model.
Args:
image_input: the input batch of images
config: the model config
Returns:
The output of clossification model or if backbone is needed, dictionary with
backbone feature levels.
"""
depth_coefficient = config.depth_coefficient
blocks = config.blocks
stem_base_filters = config.stem_base_filters
top_base_filters = config.top_base_filters
activation = tf_utils.get_activation(config.activation)
dropout_rate = config.dropout_rate
drop_connect_rate = config.drop_connect_rate
num_classes = config.num_classes
input_channels = config.input_channels
rescale_input = config.rescale_input
data_format = tf.keras.backend.image_data_format()
dtype = config.dtype
weight_decay = config.weight_decay
x = image_input
if data_format == 'channels_first':
# Happens on GPU/TPU if available.
x = tf.keras.layers.Permute((3, 1, 2))(x)
if rescale_input:
x = common_modules.normalize_images(
x, num_channels=input_channels, dtype=dtype, data_format=data_format)
# Build stem
x = conv2d_block(x,
round_filters(stem_base_filters, config),
config,
kernel_size=[3, 3],
strides=[2, 2],
activation=activation,
name='stem')
# Build blocks
num_blocks_total = sum(block.num_repeat for block in blocks)
block_num = 0
backbone_levels = {}
for stack_idx, block in enumerate(blocks):
assert block.num_repeat > 0
# Update block input and output filters based on depth multiplier
block = block.replace(
input_filters=round_filters(block.input_filters, config),
output_filters=round_filters(block.output_filters, config),
num_repeat=round_repeats(block.num_repeat, depth_coefficient))
# The first block needs to take care of stride and filter size increase
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_0/'.format(stack_idx)
x = mb_conv_block(x, block, config, block_prefix)
block_num += 1
if block.num_repeat > 1:
block = block.replace(
input_filters=block.output_filters,
strides=[1, 1]
)
for block_idx in range(block.num_repeat - 1):
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
x = mb_conv_block(x, block, config, prefix=block_prefix)
block_num += 1
backbone_levels[str(stack_idx)] = x
if config.backbone_only:
return backbone_levels
# Build top
x = conv2d_block(x,
round_filters(top_base_filters, config),
config,
activation=activation,
name='top')
# Build classifier
pool_size = (x.shape.as_list()[1], x.shape.as_list()[2])
x = tf.keras.layers.AveragePooling2D(pool_size, name='top_pool')(x)
if dropout_rate and dropout_rate > 0:
x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
x = tf.keras.layers.Conv2D(
num_classes,
1,
kernel_initializer=DENSE_KERNEL_INITIALIZER,
kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
bias_regularizer=tf.keras.regularizers.l2(weight_decay),
name='logits')(
x)
x = tf.keras.layers.Activation('softmax', name='probs')(x)
x = tf.squeeze(x, axis=[1, 2])
return x
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mobilenet_edgetpu model."""
import os
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model_blocks
from official.vision.image_classification import preprocessing
# TODO(b/151324383): Enable once training is supported for mobilenet-edgetpu
EXAMPLE_IMAGE = ('third_party/tensorflow_models/official/vision/'
'image_classification/testdata/panda.jpg')
CKPTS = 'gs://**/efficientnets'
class MobilenetEdgeTPUBlocksTest(tf.test.TestCase):
def setUp(self):
super(tf.test.TestCase, self).setUp()
# Ensure no model duplicates
tf.keras.backend.clear_session()
def test_bottleneck_block(self):
"""Test for creating a model with bottleneck block arguments."""
images = tf.zeros((4, 224, 224, 3), dtype=tf.float32)
tf.keras.backend.set_image_data_format('channels_last')
blocks = [
mobilenet_edgetpu_v1_model_blocks.BlockConfig.from_args(
input_filters=3,
output_filters=6,
kernel_size=3,
num_repeat=3,
expand_ratio=6,
strides=(2, 2),
fused_conv=False,
)
]
config = mobilenet_edgetpu_v1_model.ModelConfig.from_args(
blocks=blocks,
num_classes=10,
use_se=False,
)
model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU(config)
outputs = model(images, training=True)
self.assertEqual((4, 10), outputs.shape)
ref_var_names = set([
'stem_conv2d/kernel:0',
'stem_bn/gamma:0',
'stem_bn/beta:0',
'stack_0/block_0/expand_conv2d/kernel:0',
'stack_0/block_0/expand_bn/gamma:0',
'stack_0/block_0/expand_bn/beta:0',
'stack_0/block_0/depthwise_conv2d/depthwise_kernel:0',
'stack_0/block_0/depthwise_bn/gamma:0',
'stack_0/block_0/depthwise_bn/beta:0',
'stack_0/block_0/project_conv2d/kernel:0',
'stack_0/block_0/project_bn/gamma:0',
'stack_0/block_0/project_bn/beta:0',
'stack_0/block_1/expand_conv2d/kernel:0',
'stack_0/block_1/expand_bn/gamma:0',
'stack_0/block_1/expand_bn/beta:0',
'stack_0/block_1/depthwise_conv2d/depthwise_kernel:0',
'stack_0/block_1/depthwise_bn/gamma:0',
'stack_0/block_1/depthwise_bn/beta:0',
'stack_0/block_1/project_conv2d/kernel:0',
'stack_0/block_1/project_bn/gamma:0',
'stack_0/block_1/project_bn/beta:0',
'stack_0/block_2/expand_conv2d/kernel:0',
'stack_0/block_2/expand_bn/gamma:0',
'stack_0/block_2/expand_bn/beta:0',
'stack_0/block_2/depthwise_conv2d/depthwise_kernel:0',
'stack_0/block_2/depthwise_bn/gamma:0',
'stack_0/block_2/depthwise_bn/beta:0',
'stack_0/block_2/project_conv2d/kernel:0',
'stack_0/block_2/project_bn/gamma:0',
'stack_0/block_2/project_bn/beta:0',
'top_conv2d/kernel:0',
'top_bn/gamma:0',
'top_bn/beta:0',
'logits/kernel:0',
'logits/bias:0'
])
var_names = set([var.name for var in model.trainable_variables])
self.assertEqual(var_names, ref_var_names)
def test_fused_bottleneck_block(self):
"""Test for creating a model with fused bottleneck block arguments."""
images = tf.zeros((4, 224, 224, 3), dtype=tf.float32)
tf.keras.backend.set_image_data_format('channels_last')
blocks = [
mobilenet_edgetpu_v1_model_blocks.BlockConfig.from_args(
input_filters=3,
output_filters=6,
kernel_size=3,
num_repeat=3,
expand_ratio=6,
strides=(2, 2),
fused_conv=True,
)
]
config = mobilenet_edgetpu_v1_model.ModelConfig.from_args(
blocks=blocks,
num_classes=10,
use_se=False,
)
model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU(config)
outputs = model(images, training=True)
self.assertEqual((4, 10), outputs.shape)
var_names = {var.name for var in model.trainable_variables}
ref_var_names = [
'stack_0/block_0/fused_conv2d/kernel:0',
'stack_0/block_1/fused_conv2d/kernel:0',
'stack_0/block_2/fused_conv2d/kernel:0',
]
for ref_var_name in ref_var_names:
self.assertIn(ref_var_name, var_names)
def test_variables(self):
"""Test for variables in blocks to be included in `model.variables`."""
images = tf.zeros((4, 224, 224, 3), dtype=tf.float32)
tf.keras.backend.set_image_data_format('channels_last')
blocks = [
mobilenet_edgetpu_v1_model_blocks.BlockConfig.from_args(
input_filters=3,
output_filters=6,
kernel_size=3,
num_repeat=3,
expand_ratio=6,
id_skip=False,
strides=(2, 2),
se_ratio=0.8,
fused_conv=False,
)
]
config = mobilenet_edgetpu_v1_model.ModelConfig.from_args(
blocks=blocks,
num_classes=10,
use_se=True,
)
model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU(config)
_ = model(images, training=True)
var_names = {var.name for var in model.variables}
self.assertIn('stack_0/block_0/depthwise_conv2d/depthwise_kernel:0',
var_names)
class MobilenetEdgeTPUBuildTest(tf.test.TestCase):
def setUp(self):
super(tf.test.TestCase, self).setUp()
# Ensure no model duplicates
tf.keras.backend.clear_session()
def test_create_mobilenet_edgetpu(self):
model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU()
self.assertEqual(common_modules.count_params(model), 4092713)
class MobilenetEdgeTPUPredictTest(tf.test.TestCase):
def setUp(self):
super(tf.test.TestCase, self).setUp()
# Ensure no model duplicates
tf.keras.backend.clear_session()
def _copy_saved_model_to_local(self, model_ckpt):
# Copy saved model to local first for speed
tmp_path = '/tmp/saved_model'
tf.io.gfile.RecursivelyCopyDir(model_ckpt, tmp_path, overwrite=True)
return tmp_path
def _test_prediction(self, model_name, image_size):
model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU.from_name(model_name)
# Predict image filled with zeros
images = tf.zeros((4, image_size, image_size, 3), dtype=tf.float32)
pred = model(images, training=False)
self.assertEqual(pred.shape, (4, 1000))
# Predict image with loaded weights
images = preprocessing.load_eval_image(EXAMPLE_IMAGE, image_size)
images = tf.expand_dims(images, axis=0)
model_ckpt = os.path.join(CKPTS, model_name)
model_ckpt = self._copy_saved_model_to_local(model_ckpt)
model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU.from_name(
model_name, model_weights_path=model_ckpt)
pred = model(images, training=False)
pred = pred[0].numpy()
pred_idx, pred_prob = pred.argmax(), pred.max()
# 388 is 'giant panda' (see labels_map_file)
self.assertEqual(pred_idx, 388)
self.assertGreater(pred_prob, 0.75)
def test_mobilenet_edgetpu_image_shape(self):
self.skipTest(
'TODO(b/151324383): Enable once training is supported for mobilenet-edgetpu'
)
params = dict(input_channels=5, num_classes=20, rescale_input=False)
model = mobilenet_edgetpu_v1_model.MobilenetEdgeTPU.from_name(
'mobilenet_edgetpu', overrides=params)
images = tf.zeros((6, 100, 38, 5), dtype=tf.float32)
pred = model(images, training=False)
self.assertEqual(pred.shape, (6, 20))
def test_mobilenet_edgetpu_predict(self):
self.skipTest(
'TODO(b/151324383): Enable once training is supported for mobilenet-edgetpu'
)
self._test_prediction('mobilenet_edgetpu', 224)
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for MobilenetEdgeTPUV2 image classification models."""
from typing import Any, Mapping, Optional
from absl import logging
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v2_model_blocks
ModelConfig = mobilenet_edgetpu_v2_model_blocks.ModelConfig
MODEL_CONFIGS = {
'mobilenet_edgetpu_v2':
mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_s(),
'mobilenet_edgetpu_v2_tiny':
mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_tiny(),
'mobilenet_edgetpu_v2_xs':
mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_xs(),
'mobilenet_edgetpu_v2_s':
mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_s(),
'mobilenet_edgetpu_v2_m':
mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_m(),
'mobilenet_edgetpu_v2_l':
mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2_l(),
'autoseg_edgetpu_backbone_xs':
mobilenet_edgetpu_v2_model_blocks.autoseg_edgetpu_backbone_xs(),
'autoseg_edgetpu_backbone_s':
mobilenet_edgetpu_v2_model_blocks.autoseg_edgetpu_backbone_s(),
'autoseg_edgetpu_backbone_m':
mobilenet_edgetpu_v2_model_blocks.autoseg_edgetpu_backbone_m(),
}
@tf.keras.utils.register_keras_serializable(package='Vision')
class MobilenetEdgeTPUV2(tf.keras.Model):
"""Wrapper class for a MobilenetEdgeTPUV2 Keras model.
Contains helper methods to build, manage, and save metadata about the model.
"""
def __init__(self,
model_config_name: Optional[str] = None,
overrides: Optional[Mapping[str, Any]] = None,
**kwargs):
"""Creates a MobilenetEdgeTPUV2 model.
Args:
model_config_name: (optional) the model parameters to create the model.
overrides: (optional) a dict containing keys that can override config.
**kwargs: All the rest model arguments in a dictionary.
"""
self.model_config_name = model_config_name
self._self_setattr_tracking = False
self.overrides = overrides or {}
if model_config_name is None:
model_config = ModelConfig()
else:
if model_config_name not in MODEL_CONFIGS:
supported_model_list = list(MODEL_CONFIGS.keys())
raise ValueError(f'Unknown model name {model_config_name}. Only support'
f'model configs in {supported_model_list}.')
model_config = MODEL_CONFIGS[model_config_name]
self.config = model_config.replace(**self.overrides)
input_channels = self.config.input_channels
model_name = self.config.model_name
if isinstance(self.config.resolution, tuple):
input_shape = (self.config.resolution[0], self.config.resolution[1],
input_channels)
else:
input_shape = (self.config.resolution, self.config.resolution,
input_channels)
image_input = tf.keras.layers.Input(shape=input_shape)
output = mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2(
image_input, self.config)
if not isinstance(output, list):
# Cast to float32 in case we have a different model dtype
output = tf.cast(output, tf.float32)
self._output_specs = output.get_shape()
else:
if self.config.features_as_dict:
# Dict output is required for the decoder ASPP module.
self._output_specs = {
str(i): output[i].get_shape() for i in range(len(output))
}
output = {str(i): output[i] for i in range(len(output))}
else:
# edgetpu/tasks/segmentation assumes features as list.
self._output_specs = [feat.get_shape() for feat in output]
logging.info('Building model %s with params %s',
model_name,
self.config)
super(MobilenetEdgeTPUV2, self).__init__(
inputs=image_input, outputs=output, **kwargs)
self._self_setattr_tracking = True
@classmethod
def from_name(cls,
model_name: str,
model_weights_path: Optional[str] = None,
checkpoint_format: Optional[str] = 'tf_checkpoint',
overrides: Optional[Mapping[str, Any]] = None):
"""Constructs an MobilenetEdgeTPUV2 model from a predefined model name.
E.g., `MobilenetEdgeTPUV2.from_name('mobilenet_edgetpu_v2_s')`.
Args:
model_name: the predefined model name
model_weights_path: the path to the weights (h5 file or saved model dir)
checkpoint_format: the model weights format. One of 'tf_checkpoint' or
'keras_checkpoint'.
overrides: (optional) a dict containing keys that can override config
Returns:
A constructed EfficientNet instance.
"""
overrides = dict(overrides) if overrides else {}
# One can define their own custom models if necessary
MODEL_CONFIGS.update(overrides.pop('model_config', {}))
model = cls(model_config_name=model_name, overrides=overrides)
if model_weights_path:
common_modules.load_weights(model,
model_weights_path,
checkpoint_format=checkpoint_format)
return model
def get_config(self):
config = {'model_config_name': self.model_config_name,
'overrides': self.overrides}
keras_model_config = super().get_config()
return dict(list(config.items()) + list(keras_model_config.items()))
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(model_config_name=config['model_config_name'],
overrides=config['overrides'])
@property
def output_specs(self):
"""A dict of {level: TensorShape} pairs for the model output."""
return self._output_specs
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for MobilenetEdgeTPUV2 model's building blocks."""
import dataclasses
import math
from typing import Any, Dict, List, Optional, Tuple, Union
# Import libraries
from absl import logging
import tensorflow as tf
from official.modeling import tf_utils
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import oneof
from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.modeling import custom_layers
@dataclasses.dataclass
class BlockType(oneof.OneOfConfig):
"""Block OP types representing IBN version."""
type: str = 'ibn_dw'
skip: str = 'skip'
ibn_dw: str = 'ibn_dw'
ibn_fused: str = 'ibn_fused'
ibn_grouped: str = 'ibn_grouped'
ibn_fused_grouped: str = 'ibn_fused_grouped'
@dataclasses.dataclass
class BlockSearchConfig(base_config.Config):
"""Config for searchable BlockConfig parameters."""
op_type: BlockType = BlockType()
kernel_size: Optional[int] = None
expand_ratio: Optional[int] = None
stride: Optional[int] = None
group_size: Optional[int] = None
@dataclasses.dataclass
class BlockConfig(base_config.Config):
"""Full config for a single MB Conv Block."""
input_filters: int = 0
output_filters: int = 0
kernel_size: int = 3
num_repeat: int = 1
expand_ratio: int = 1
strides: Tuple[int, int] = (1, 1)
se_ratio: Optional[float] = None
id_skip: bool = True
fused_expand: bool = False
fused_project: bool = False
conv_type: str = 'depthwise'
group_size: Optional[int] = None
@classmethod
def from_search_config(cls,
input_filters: int,
output_filters: int,
block_search_config: BlockSearchConfig,
num_repeat: int = 1,
se_ratio: Optional[float] = None,
id_skip: bool = True) -> 'BlockConfig':
"""Creates BlockConfig from the given parameters."""
block_op_type = block_search_config.op_type
if block_op_type.type == BlockType.skip:
raise ValueError('Received skip type within block creation.')
elif block_op_type.type == BlockType.ibn_dw:
fused_expand = False
fused_project = False
conv_type = 'depthwise'
elif block_op_type.type == BlockType.ibn_fused:
fused_expand = True
fused_project = False
conv_type = 'full'
elif block_op_type.type == BlockType.ibn_fused_grouped:
fused_expand = True
fused_project = False
conv_type = 'group'
elif block_op_type.type == BlockType.ibn_grouped:
fused_expand = False
fused_project = False
conv_type = 'group'
else:
raise NotImplementedError(f'Unsupported IBN type {block_op_type.type}.')
return cls.from_args(
input_filters=input_filters,
output_filters=output_filters,
kernel_size=block_search_config.kernel_size,
num_repeat=num_repeat,
expand_ratio=block_search_config.expand_ratio,
strides=(block_search_config.stride, block_search_config.stride),
se_ratio=se_ratio,
id_skip=id_skip,
fused_expand=fused_expand,
fused_project=fused_project,
conv_type=conv_type,
group_size=block_search_config.group_size)
@dataclasses.dataclass
class BlockGroupConfig(base_config.Config):
"""Config for group of blocks that share the same filter size."""
blocks: List[BlockSearchConfig] = dataclasses.field(default_factory=list)
filters: int = 64
def _default_mobilenet_edgetpu_v2_topology():
return [
# Block Group 0
BlockGroupConfig(
blocks=[
# BlockSearchConfig: op_type, kernel_size, expand_ratio, stride
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused'), 3, 1, 1),
],
filters=24),
# Block Group 1
BlockGroupConfig(
blocks=[
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused'), 3, 8, 2),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused_grouped'), 3, 4, 1),
],
filters=48),
# Block Group 2
BlockGroupConfig(
blocks=[
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused'), 3, 8, 2),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused_grouped'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused_grouped'), 3, 4, 1),
],
filters=64),
# Block Group 3
BlockGroupConfig(
blocks=[
BlockSearchConfig.from_args(
BlockType.from_args('ibn_fused'), 3, 8, 2),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
],
filters=128),
# Block Group 4
BlockGroupConfig(
blocks=[
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 8, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
],
filters=160),
# Block Group 5
BlockGroupConfig(
blocks=[
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 8, 2),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 4, 1),
],
filters=192),
# Block Group 6
BlockGroupConfig(
blocks=[
BlockSearchConfig.from_args(
BlockType.from_args('ibn_dw'), 3, 8, 1),
],
filters=256),
]
@dataclasses.dataclass
class TopologyConfig(base_config.Config):
"""Config for model topology as a collection of BlockGroupConfigs."""
block_groups: List[BlockGroupConfig] = dataclasses.field(
default_factory=_default_mobilenet_edgetpu_v2_topology)
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""Default Config for MobilenetEdgeTPUV2."""
width_coefficient: float = 1.0
depth_coefficient: float = 1.0
resolution: Union[int, Tuple[int, int]] = 224
dropout_rate: float = 0.1
stem_base_filters: int = 64
stem_kernel_size: int = 5
top_base_filters: int = 1280
blocks: Tuple[BlockConfig, ...] = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
# pylint: disable=bad-whitespace
BlockConfig.from_args(
stem_base_filters, 24, 3, 1, 1, (1, 1), conv_type='full'),
BlockConfig.from_args(
24, 48, 3, 1, 8, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(
48, 48, 3, 1, 4, (1, 1), fused_expand=True, conv_type='group'),
BlockConfig.from_args(
48, 64, 3, 1, 8, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(
64, 64, 3, 1, 4, (1, 1), fused_expand=True, conv_type='group'),
BlockConfig.from_args(
64, 64, 3, 1, 4, (1, 1), fused_expand=True, conv_type='full'),
BlockConfig.from_args(
64, 64, 3, 1, 4, (1, 1), fused_expand=True, conv_type='group'),
BlockConfig.from_args(
64, 128, 3, 1, 8, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(128, 128, 3, 3, 4, (1, 1)),
BlockConfig.from_args(128, 160, 3, 1, 8, (1, 1)),
BlockConfig.from_args(160, 160, 3, 3, 4, (1, 1)),
BlockConfig.from_args(160, 192, 5, 1, 8, (2, 2)),
BlockConfig.from_args(192, 192, 5, 3, 4, (1, 1)),
BlockConfig.from_args(192, 256, 5, 1, 8, (1, 1)),
# pylint: enable=bad-whitespace
)
activation: str = 'relu'
batch_norm: str = 'default'
bn_momentum: float = 0.99
bn_epsilon: float = 1e-3
# While the original implementation used a weight decay of 1e-5,
# tf.nn.l2_loss divides it by 2, so we halve this to compensate in Keras
weight_decay: float = 5e-6
drop_connect_rate: float = 0.1
depth_divisor: int = 8
min_depth: Optional[int] = None
# No Squeeze/Excite for MobilenetEdgeTPUV2
use_se: bool = False
input_channels: int = 3
num_classes: int = 1001
model_name: str = 'mobilenet_edgetpu_v2'
rescale_input: bool = False
data_format: str = 'channels_last'
dtype: str = 'float32'
# The number of filters in each group. HW arch dependent.
group_base_size: int = 64
backbone_only: bool = False
features_as_dict: bool = False
def mobilenet_edgetpu_v2_base(
width_coefficient: float = 1.0,
depth_coefficient: float = 1.0,
stem_base_filters: int = 64,
stem_kernel_size: int = 5,
top_base_filters: int = 1280,
group_base_size: int = 64,
dropout_rate: float = 0.2,
drop_connect_rate: float = 0.1,
filter_size_overrides: Optional[Dict[int, int]] = None,
block_op_overrides: Optional[Dict[int, Dict[int, Dict[str, Any]]]] = None,
block_group_overrides: Optional[Dict[int, Dict[str, Any]]] = None):
"""Creates MobilenetEdgeTPUV2 ModelConfig based on tuning parameters."""
config = ModelConfig()
param_overrides = {
'width_coefficient': width_coefficient,
'depth_coefficient': depth_coefficient,
'stem_base_filters': stem_base_filters,
'stem_kernel_size': stem_kernel_size,
'top_base_filters': top_base_filters,
'group_base_size': group_base_size,
'dropout_rate': dropout_rate,
'drop_connect_rate': drop_connect_rate
}
config = config.replace(**param_overrides)
topology_config = TopologyConfig()
if filter_size_overrides:
for group_id in filter_size_overrides:
topology_config.block_groups[group_id].filters = filter_size_overrides[
group_id]
if block_op_overrides:
for group_id in block_op_overrides:
for block_id in block_op_overrides[group_id]:
replaced_block = topology_config.block_groups[group_id].blocks[
block_id].replace(**block_op_overrides[group_id][block_id])
topology_config.block_groups[group_id].blocks[block_id] = replaced_block
if block_group_overrides:
for group_id in block_group_overrides:
replaced_group = topology_config.block_groups[group_id].replace(
**block_group_overrides[group_id])
topology_config.block_groups[group_id] = replaced_group
blocks = ()
input_filters = stem_base_filters
for group in topology_config.block_groups:
for block_search in group.blocks:
if block_search.op_type != BlockType.skip:
block = BlockConfig.from_search_config(
input_filters=input_filters,
output_filters=group.filters,
block_search_config=block_search)
blocks += (block,)
# Set input filters for the next block
input_filters = group.filters
config = config.replace(blocks=blocks)
return config
def autoseg_edgetpu_backbone_base(
width_coefficient: float = 1.0,
depth_coefficient: float = 1.0,
stem_base_filters: int = 64,
stem_kernel_size: int = 5,
top_base_filters: int = 1280,
group_base_size: int = 64,
dropout_rate: float = 0.2,
drop_connect_rate: float = 0.1,
blocks_overrides: Optional[Tuple[BlockConfig, ...]] = None):
"""Creates a edgetpu ModelConfig based on search on segmentation."""
config = ModelConfig()
config.depth_divisor = 4
param_overrides = {
'width_coefficient': width_coefficient,
'depth_coefficient': depth_coefficient,
'stem_base_filters': stem_base_filters,
'stem_kernel_size': stem_kernel_size,
'top_base_filters': top_base_filters,
'group_base_size': group_base_size,
'dropout_rate': dropout_rate,
'drop_connect_rate': drop_connect_rate,
}
if blocks_overrides:
param_overrides['blocks'] = blocks_overrides
config = config.replace(**param_overrides)
return config
def autoseg_edgetpu_backbone_s() -> ModelConfig:
"""AutoML searched model with 2.5ms target simulated latency."""
stem_base_filters = 32
stem_kernel_size = 3
top_base_filters = 1280
blocks = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
# pylint: disable=bad-whitespace
BlockConfig.from_args(
stem_base_filters,
12,
3,
1,
1, (1, 1),
fused_expand=True,
conv_type='full'),
BlockConfig.from_args(
12, 36, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(36, 18, 5, 1, 3, (1, 1)),
BlockConfig.from_args(
18, 60, 5, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(60, 60, 3, 1, 3, (1, 1)),
BlockConfig.from_args(60, 120, 5, 1, 6, (2, 2)),
BlockConfig.from_args(120, 120, 3, 1, 3, (1, 1)),
BlockConfig.from_args(120, 120, 5, 1, 6, (1, 1)),
BlockConfig.from_args(120, 112, 3, 1, 6, (1, 1)),
BlockConfig.from_args(112, 112, 5, 2, 6, (1, 1)),
BlockConfig.from_args(112, 112, 5, 1, 1, (2, 2), id_skip=False),
BlockConfig.from_args(
112, 192, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
192, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(96, 96, 5, 1, 3, (1, 1)),
BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
96, 192, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
192, 160, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
# pylint: enable=bad-whitespace
)
return autoseg_edgetpu_backbone_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
blocks_overrides=blocks,
dropout_rate=0.2,
drop_connect_rate=0.2)
def autoseg_edgetpu_backbone_xs() -> ModelConfig:
"""AutoML searched model with 2ms target simulated latency."""
stem_base_filters = 32
stem_kernel_size = 3
top_base_filters = 1280
blocks = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
# pylint: disable=bad-whitespace
BlockConfig.from_args(
stem_base_filters,
12,
3,
1,
1, (1, 1),
fused_expand=True,
conv_type='full'),
BlockConfig.from_args(
12, 24, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(24, 24, 3, 1, 3, (1, 1)),
BlockConfig.from_args(
24, 60, 3, 1, 3, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(60, 40, 3, 1, 6, (1, 1)),
BlockConfig.from_args(40, 40, 5, 1, 3, (2, 2)),
BlockConfig.from_args(40, 40, 3, 1, 6, (1, 1)),
BlockConfig.from_args(
40, 120, 3, 1, 6, (1, 1), fused_expand=True, conv_type='full'),
BlockConfig.from_args(120, 168, 3, 1, 6, (1, 1)),
BlockConfig.from_args(168, 84, 5, 1, 6, (1, 1)),
BlockConfig.from_args(84, 84, 5, 1, 3, (1, 1)),
BlockConfig.from_args(84, 84, 5, 1, 1, (2, 2), id_skip=False),
BlockConfig.from_args(
84, 288, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(288, 288, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
288, 96, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
96, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
96, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(96, 480, 5, 1, 3, (1, 1)),
# pylint: enable=bad-whitespace
)
return autoseg_edgetpu_backbone_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
blocks_overrides=blocks,
dropout_rate=0.2,
drop_connect_rate=0.2)
def autoseg_edgetpu_backbone_m() -> ModelConfig:
"""AutoML searched model with 3ms target simulated latency."""
stem_base_filters = 32
stem_kernel_size = 3
top_base_filters = 1280
blocks = (
# (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
# pylint: disable=bad-whitespace
BlockConfig.from_args(stem_base_filters, 16, 5, 1, 1, (1, 1)),
BlockConfig.from_args(
16, 36, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(36, 36, 3, 1, 3, (1, 1)),
BlockConfig.from_args(
36, 60, 3, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(60, 60, 3, 1, 6, (1, 1)),
BlockConfig.from_args(
60, 120, 5, 1, 6, (2, 2), fused_expand=True, conv_type='full'),
BlockConfig.from_args(120, 120, 5, 1, 6, (1, 1)),
BlockConfig.from_args(
120, 80, 3, 1, 6, (1, 1), fused_expand=True, conv_type='full'),
BlockConfig.from_args(80, 168, 3, 1, 6, (1, 1)),
BlockConfig.from_args(168, 168, 5, 1, 6, (1, 1)),
BlockConfig.from_args(168, 168, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
168, 168, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(168, 168, 3, 1, 1, (2, 2), id_skip=False),
BlockConfig.from_args(
168, 192, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
192, 288, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(288, 288, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
288, 96, 1, 1, 6, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(96, 96, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
96, 192, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
BlockConfig.from_args(192, 192, 5, 1, 1, (1, 1), id_skip=False),
BlockConfig.from_args(
192, 320, 1, 1, 3, (1, 1), fused_expand=True, id_skip=False),
# pylint: enable=bad-whitespace
)
return autoseg_edgetpu_backbone_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
blocks_overrides=blocks,
dropout_rate=0.3,
drop_connect_rate=0.3)
def mobilenet_edgetpu_v2_tiny() -> ModelConfig:
"""MobilenetEdgeTPUV2 tiny model config."""
stem_base_filters = 32
stem_kernel_size = 5
top_base_filters = 1280
filter_sizes = [16, 32, 48, 80, 112, 160, 192]
filter_size_overrides = {
k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
}
block_op_overrides = {
2: {
0: {'op_type': BlockType.from_args('ibn_fused_grouped')},
2: {'op_type': BlockType.from_args('ibn_fused_grouped')},
},
3: {
0: {'op_type': BlockType.from_args('ibn_fused_grouped')},
}
}
return mobilenet_edgetpu_v2_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
filter_size_overrides=filter_size_overrides,
block_op_overrides=block_op_overrides,
dropout_rate=0.05,
drop_connect_rate=0.05)
def mobilenet_edgetpu_v2_xs() -> ModelConfig:
"""MobilenetEdgeTPUV2 extra small model config."""
stem_base_filters = 32
stem_kernel_size = 5
top_base_filters = 1280
filter_sizes = [16, 32, 48, 96, 144, 160, 192]
filter_size_overrides = {
k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
}
return mobilenet_edgetpu_v2_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
filter_size_overrides=filter_size_overrides,
dropout_rate=0.05,
drop_connect_rate=0.05)
def mobilenet_edgetpu_v2_s():
"""MobilenetEdgeTPUV2 small model config."""
stem_base_filters = 64
stem_kernel_size = 5
top_base_filters = 1280
filter_sizes = [24, 48, 64, 128, 160, 192, 256]
filter_size_overrides = {
k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
}
return mobilenet_edgetpu_v2_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
filter_size_overrides=filter_size_overrides)
def mobilenet_edgetpu_v2_m():
"""MobilenetEdgeTPUV2 medium model config."""
stem_base_filters = 64
stem_kernel_size = 5
top_base_filters = 1344
filter_sizes = [32, 64, 80, 160, 192, 240, 320]
filter_size_overrides = {
k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
}
return mobilenet_edgetpu_v2_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
filter_size_overrides=filter_size_overrides)
def mobilenet_edgetpu_v2_l():
"""MobilenetEdgeTPUV2 large model config."""
stem_base_filters = 64
stem_kernel_size = 7
top_base_filters = 1408
filter_sizes = [32, 64, 96, 192, 240, 256, 384]
filter_size_overrides = {
k: v for (k, v) in zip(range(len(filter_sizes)), filter_sizes)
}
group_base_size = 128
return mobilenet_edgetpu_v2_base(
stem_base_filters=stem_base_filters,
stem_kernel_size=stem_kernel_size,
top_base_filters=top_base_filters,
group_base_size=group_base_size,
filter_size_overrides=filter_size_overrides)
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
# Note: this is a truncated normal distribution
'distribution': 'normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1 / 3.0,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
def round_filters(filters: int,
config: ModelConfig) -> int:
"""Round number of filters based on width coefficient."""
width_coefficient = config.width_coefficient
min_depth = config.min_depth
divisor = config.depth_divisor
orig_filters = filters
if not width_coefficient:
return filters
filters *= width_coefficient
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
logging.info('round_filter input=%s output=%s', orig_filters, new_filters)
return int(new_filters)
def round_repeats(repeats: int, depth_coefficient: float) -> int:
"""Round number of repeats based on depth coefficient."""
return int(math.ceil(depth_coefficient * repeats))
def groupconv2d_block(conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
group_size: Optional[int] = None,
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Any = None,
name: Optional[str] = None) -> tf.keras.layers.Layer:
"""2D group convolution with batchnorm and activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm)
bn_momentum = config.bn_momentum
bn_epsilon = config.bn_epsilon
data_format = tf.keras.backend.image_data_format()
weight_decay = config.weight_decay
if group_size is None:
group_size = config.group_base_size
name = name or ''
# Compute the # of groups
if conv_filters % group_size != 0:
raise ValueError(f'Number of filters: {conv_filters} is not divisible by '
f'size of the groups: {group_size}')
groups = int(conv_filters / group_size)
# Collect args based on what kind of groupconv2d block is desired
init_kwargs = {
'kernel_size': kernel_size,
'strides': strides,
'use_bias': use_bias,
'padding': 'same',
'name': name + '_groupconv2d',
'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
'filters': conv_filters,
'groups': groups,
'batch_norm_layer': batch_norm if use_batch_norm else None,
'bn_epsilon': bn_epsilon,
'bn_momentum': bn_momentum,
'activation': activation,
'data_format': data_format,
}
return custom_layers.GroupConv2D(**init_kwargs)
def conv2d_block_as_layers(
conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Any = None,
depthwise: bool = False,
name: Optional[str] = None) -> List[tf.keras.layers.Layer]:
"""A conv2d followed by batch norm and an activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm)
bn_momentum = config.bn_momentum
bn_epsilon = config.bn_epsilon
data_format = tf.keras.backend.image_data_format()
weight_decay = config.weight_decay
name = name or ''
# Collect args based on what kind of conv2d block is desired
init_kwargs = {
'kernel_size': kernel_size,
'strides': strides,
'use_bias': use_bias,
'padding': 'same',
'name': name + '_conv2d',
'kernel_regularizer': tf.keras.regularizers.l2(weight_decay),
'bias_regularizer': tf.keras.regularizers.l2(weight_decay),
}
sequential_layers: List[tf.keras.layers.Layer] = []
if depthwise:
conv2d = tf.keras.layers.DepthwiseConv2D
init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER})
else:
conv2d = tf.keras.layers.Conv2D
init_kwargs.update({'filters': conv_filters,
'kernel_initializer': CONV_KERNEL_INITIALIZER})
sequential_layers.append(conv2d(**init_kwargs))
if use_batch_norm:
bn_axis = 1 if data_format == 'channels_first' else -1
sequential_layers.append(
batch_norm(
axis=bn_axis,
momentum=bn_momentum,
epsilon=bn_epsilon,
name=name + '_bn'))
if activation is not None:
sequential_layers.append(
tf.keras.layers.Activation(activation, name=name + '_activation'))
return sequential_layers
def conv2d_block(inputs: tf.Tensor,
conv_filters: Optional[int],
config: ModelConfig,
kernel_size: Any = (1, 1),
strides: Any = (1, 1),
use_batch_norm: bool = True,
use_bias: bool = False,
activation: Any = None,
depthwise: bool = False,
name: Optional[str] = None) -> tf.Tensor:
"""Compatibility with third_party/car/deep_nets."""
x = inputs
for layer in conv2d_block_as_layers(conv_filters, config, kernel_size,
strides, use_batch_norm, use_bias,
activation, depthwise, name):
x = layer(x)
return x
# Do not inherit from (tf.keras.layers.Layer), will break weights loading.
class _MbConvBlock:
"""Mobile Inverted Residual Bottleneck composite layer."""
def __call__(self, inputs: tf.Tensor, training=False):
x = inputs
for layer in self.expand_block:
x = layer(x)
if self.squeeze_excitation:
se = x
for layer in self.squeeze_excitation:
se = layer(se)
x = tf.keras.layers.multiply([x, se], name=self.name + 'se_excite')
for layer in self.project_block:
x = layer(x)
if self.has_skip_add:
x = tf.keras.layers.add([x, inputs], name=self.name + 'add')
return x
def __init__(self,
block: BlockConfig,
config: ModelConfig,
prefix: Optional[str] = None):
"""Mobile Inverted Residual Bottleneck.
Args:
block: BlockConfig, arguments to create a Block
config: ModelConfig, a set of model parameters
prefix: prefix for naming all layers
"""
use_se = config.use_se
activation = tf_utils.get_activation(config.activation)
drop_connect_rate = config.drop_connect_rate
data_format = tf.keras.backend.image_data_format()
use_depthwise = block.conv_type == 'depthwise'
use_groupconv = block.conv_type == 'group'
prefix = prefix or ''
self.name = prefix
filters = block.input_filters * block.expand_ratio
self.expand_block: List[tf.keras.layers.Layer] = []
self.squeeze_excitation: List[tf.keras.layers.Layer] = []
self.project_block: List[tf.keras.layers.Layer] = []
if block.fused_project:
raise NotImplementedError('Fused projection is not supported.')
if block.fused_expand and block.expand_ratio != 1:
# If we use fused mbconv, fuse expansion with the main kernel.
# If conv_type is depthwise we still fuse it to a full conv.
if use_groupconv:
self.expand_block.append(groupconv2d_block(
filters,
config,
kernel_size=block.kernel_size,
strides=block.strides,
group_size=block.group_size,
activation=activation,
name=prefix + 'fused'))
else:
self.expand_block.extend(conv2d_block_as_layers(
filters,
config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
name=prefix + 'fused'))
else:
if block.expand_ratio != 1:
# Expansion phase with a pointwise conv
self.expand_block.extend(conv2d_block_as_layers(
filters,
config,
kernel_size=(1, 1),
activation=activation,
name=prefix + 'expand'))
# Main kernel, after the expansion (if applicable, i.e. not fused).
if use_depthwise:
self.expand_block.extend(conv2d_block_as_layers(
conv_filters=filters,
config=config,
kernel_size=block.kernel_size,
strides=block.strides,
activation=activation,
depthwise=True,
name=prefix + 'depthwise'))
elif use_groupconv:
self.expand_block.append(groupconv2d_block(
conv_filters=filters,
config=config,
kernel_size=block.kernel_size,
strides=block.strides,
group_size=block.group_size,
activation=activation,
name=prefix + 'group'))
# Squeeze and Excitation phase
if use_se:
assert block.se_ratio is not None
assert 0 < block.se_ratio <= 1
num_reduced_filters = max(1, int(
block.input_filters * block.se_ratio
))
if data_format == 'channels_first':
se_shape = (filters, 1, 1)
else:
se_shape = (1, 1, filters)
self.squeeze_excitation.append(
tf.keras.layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze'))
self.squeeze_excitation.append(
tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape'))
self.squeeze_excitation.extend(
conv2d_block_as_layers(
num_reduced_filters,
config,
use_bias=True,
use_batch_norm=False,
activation=activation,
name=prefix + 'se_reduce'))
self.squeeze_excitation.extend(
conv2d_block_as_layers(
filters,
config,
use_bias=True,
use_batch_norm=False,
activation='sigmoid',
name=prefix + 'se_expand'))
# Output phase
self.project_block.extend(
conv2d_block_as_layers(
block.output_filters,
config,
activation=None,
name=prefix + 'project'))
# Add identity so that quantization-aware training can insert quantization
# ops correctly.
self.project_block.append(
tf.keras.layers.Activation('linear', name=prefix + 'id'))
self.has_skip_add = False
if (block.id_skip
and all(s == 1 for s in block.strides)
and block.input_filters == block.output_filters):
self.has_skip_add = True
if drop_connect_rate and drop_connect_rate > 0:
# Apply dropconnect
# The only difference between dropout and dropconnect in TF is scaling
# by drop_connect_rate during training. See:
# https://github.com/keras-team/keras/pull/9898#issuecomment-380577612
self.project_block.append(
tf.keras.layers.Dropout(
drop_connect_rate,
noise_shape=(None, 1, 1, 1),
name=prefix + 'drop'))
def mb_conv_block(inputs: tf.Tensor,
block: BlockConfig,
config: ModelConfig,
prefix: Optional[str] = None) -> tf.Tensor:
"""Mobile Inverted Residual Bottleneck.
Args:
inputs: the Keras input to the block
block: BlockConfig, arguments to create a Block
config: ModelConfig, a set of model parameters
prefix: prefix for naming all layers
Returns:
the output of the block
"""
return _MbConvBlock(block, config, prefix)(inputs)
def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
config: ModelConfig): # pytype: disable=invalid-annotation # typed-keras
"""Creates a MobilenetEdgeTPUV2 graph given the model parameters.
This function is wrapped by the `MobilenetEdgeTPUV2` class to make a
tf.keras.Model.
Args:
image_input: the input batch of images
config: the model config
Returns:
The output of classification model or if backbone is needed, dictionary with
backbone feature levels.
"""
depth_coefficient = config.depth_coefficient
blocks = config.blocks
stem_base_filters = config.stem_base_filters
stem_kernel_size = config.stem_kernel_size
top_base_filters = config.top_base_filters
activation = tf_utils.get_activation(config.activation)
dropout_rate = config.dropout_rate
drop_connect_rate = config.drop_connect_rate
num_classes = config.num_classes
input_channels = config.input_channels
rescale_input = config.rescale_input
data_format = tf.keras.backend.image_data_format()
dtype = config.dtype
weight_decay = config.weight_decay
x = image_input
if data_format == 'channels_first':
# Happens on GPU/TPU if available.
x = tf.keras.layers.Permute((3, 1, 2))(x)
if rescale_input:
x = common_modules.normalize_images(
x, num_channels=input_channels, dtype=dtype, data_format=data_format)
# Build stem
x = conv2d_block(
x,
round_filters(stem_base_filters, config),
config,
kernel_size=[stem_kernel_size, stem_kernel_size],
strides=[2, 2],
activation=activation,
name='stem')
# Build blocks
num_blocks_total = sum(block.num_repeat for block in blocks)
block_num = 0
backbone_levels = []
for stack_idx, block in enumerate(blocks):
is_reduction = False
assert block.num_repeat > 0
# Update block input and output filters based on depth multiplier
block = block.replace(
input_filters=round_filters(block.input_filters, config),
output_filters=round_filters(block.output_filters, config),
num_repeat=round_repeats(block.num_repeat, depth_coefficient))
if stack_idx == 0:
backbone_levels.append(x)
elif (stack_idx == len(blocks) - 1) or (blocks[stack_idx + 1].strides
== (2, 2)):
is_reduction = True
# The first block needs to take care of stride and filter size increase
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_0/'.format(stack_idx)
x = _MbConvBlock(block, config, block_prefix)(x)
block_num += 1
if block.num_repeat > 1:
block = block.replace(
input_filters=block.output_filters,
strides=[1, 1]
)
for block_idx in range(block.num_repeat - 1):
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
config = config.replace(drop_connect_rate=drop_rate)
block_prefix = 'stack_{}/block_{}/'.format(stack_idx, block_idx + 1)
x = _MbConvBlock(block, config, prefix=block_prefix)(x)
block_num += 1
if is_reduction:
backbone_levels.append(x)
if config.backbone_only:
return backbone_levels
# Build top
x = conv2d_block(x,
round_filters(top_base_filters, config),
config,
activation=activation,
name='top')
# Build classifier
pool_size = (x.shape.as_list()[1], x.shape.as_list()[2])
x = tf.keras.layers.AveragePooling2D(pool_size, name='top_pool')(x)
if dropout_rate and dropout_rate > 0:
x = tf.keras.layers.Dropout(dropout_rate, name='top_dropout')(x)
x = tf.keras.layers.Conv2D(
num_classes,
1,
kernel_initializer=DENSE_KERNEL_INITIALIZER,
kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
bias_regularizer=tf.keras.regularizers.l2(weight_decay),
name='logits')(
x)
x = tf.keras.layers.Activation('softmax', name='probs')(x)
x = tf.squeeze(x, axis=[1, 2])
return x
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mobilenet_edgetpu model."""
import os
from absl.testing import parameterized
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v2_model
class MobilenetEdgeTPUV2BuildTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
super(tf.test.TestCase, self).setUp()
# Ensure no model duplicates
tf.keras.backend.clear_session()
def test_create_mobilenet_edgetpu(self):
model = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2()
self.assertEqual(common_modules.count_params(model), 6069657)
def test_export_tflite(self):
model = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2()
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tmp_dir = self.create_tempdir()
output_tflite = os.path.join(tmp_dir, 'model_quant.tflite')
tflite_buffer = converter.convert()
tf.io.gfile.GFile(output_tflite, 'wb').write(tflite_buffer)
self.assertTrue(tf.io.gfile.exists(output_tflite))
def test_model_save_load(self):
"""Serializes and de-serializeds the model."""
model_builder = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2
model = model_builder.from_name(model_name='mobilenet_edgetpu_v2')
# Model always has a conv2d layer followed by the input layer, and we
# compare the weight parameters of this layers for the original model and
# the save-then-load model.
first_conv_layer = model.get_layer('stem_conv2d')
kernel_tensor = first_conv_layer.trainable_weights[0].numpy()
model.save('/tmp/test_model')
loaded_model = tf.keras.models.load_model('/tmp/test_model')
loaded_first_conv_layer = loaded_model.get_layer('stem_conv2d')
loaded_kernel_tensor = loaded_first_conv_layer.trainable_weights[0].numpy()
self.assertAllClose(kernel_tensor, loaded_kernel_tensor)
def test_model_initialization_failure(self):
"""Tests model can only be initialized with predefined model name."""
model_builder = mobilenet_edgetpu_v2_model.MobilenetEdgeTPUV2
with self.assertRaises(ValueError):
_ = model_builder.from_name(model_name='undefined_model_name')
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=line-too-long
r"""Export model (float or quantized tflite, and saved model) from a trained checkpoint.
Example:
To export dummy quantized model:
export_tflite --model_name=mobilenet_edgetpu_v2_s --output_dir=/tmp --quantize
Using a training checkpoint:
export_tflite --model_name=mobilenet_edgetpu_v2_s \
--ckpt_path=/path/to/training/checkpoint \
--dataset_dir=/path/to/your/dataset --output_dir=/tmp --quantize
Exporting w/o final squeeze layer:
export_tflite --model_name=mobilenet_edgetpu_v2_xs \
--output_layer=probs \
--dataset_dir=/path/to/your/dataset --output_dir=/tmp --quantize
"""
# pylint: enable=line-too-long
import os
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.serving import export_util
flags.DEFINE_string('model_name', None,
'Used to build model using experiment config factory.')
flags.DEFINE_string(
'ckpt_path', None, 'Path to the checkpoint. '
'If not provided tflite with random parameters is exported.')
flags.DEFINE_enum(
'ckpt_format', 'tf_checkpoint',
['tf_checkpoint', 'keras_checkpoint'],
'tf_checkpoint is for ckpt files from tf.train.Checkpoint.save() method'
'keras_checkpoint is for ckpt files from keras.Model.save_weights() method')
flags.DEFINE_string('output_dir', None, 'Directory to output exported files.')
flags.DEFINE_integer(
'image_size', 224,
'Size of the input image. Ideally should be the same as the image_size used '
'in training config.')
flags.DEFINE_string(
'output_layer', None,
'Layer name to take the output from. Can be used to take the output from '
'an intermediate layer. None means use the original model output.')
flags.DEFINE_string(
'finalize_method', 'none',
'Additional layers to be added to customize serving output.\n'
'Supported are (none|(argmax|resize<?>)[,...]).\n'
'- none: do not add extra serving layers.\n'
'- argmax: adds argmax.\n'
'- squeeze: removes dimensions of size 1 from the shape of a tensor.\n'
'- resize<?> (for example resize512): adds resize bilinear|nn to <?> size.'
'For example: --finalize_method=resize128,argmax,resize512,squeeze\n'
'Will do resize bilinear to 128x128, then argmax then resize nn to 512x512')
# Quantization related parameters
flags.DEFINE_bool(
'quantize', False,
'Quantize model before exporting tflite. Note that only the exported '
'TFLite is quantized not the SavedModel.')
flags.DEFINE_bool('use_experimental_quantizer', True, 'Enables experimental '
'quantizer of TFLiteConverter 2.0.')
flags.DEFINE_bool(
'quantize_less_restrictive', False,
'Allows non int8 based intermediate types, automatic model output type.')
flags.DEFINE_integer(
'num_calibration_steps', 100,
'Number of post-training quantization calibration steps to run.')
flags.DEFINE_string('dataset_name', 'imagenet2012',
'Name of the dataset to use for quantization calibration.')
flags.DEFINE_string('dataset_dir', None, 'Dataset location.')
flags.DEFINE_string(
'dataset_split', 'train',
'The dataset split (train, validation etc.) to use for calibration.')
FLAGS = flags.FLAGS
def get_export_config_from_flags():
"""Creates ExportConfig from cmd line flags."""
quantization_config = export_util.QuantizationConfig(
quantize=FLAGS.quantize,
quantize_less_restrictive=FLAGS.quantize_less_restrictive,
use_experimental_quantizer=FLAGS.use_experimental_quantizer,
num_calibration_steps=FLAGS.num_calibration_steps,
dataset_name=FLAGS.dataset_name,
dataset_dir=FLAGS.dataset_dir,
dataset_split=FLAGS.dataset_split)
export_config = export_util.ExportConfig(
model_name=FLAGS.model_name,
ckpt_path=FLAGS.ckpt_path,
ckpt_format=FLAGS.ckpt_format,
output_dir=FLAGS.output_dir,
image_size=FLAGS.image_size,
finalize_method=FLAGS.finalize_method.lower().split(','),
quantization_config=quantization_config)
return export_config
def run_export():
"""Exports TFLite with PTQ."""
export_config = get_export_config_from_flags()
model = export_util.build_experiment_model(
experiment_type=export_config.model_name)
if export_config.ckpt_path:
logging.info('Loading checkpoint from %s', FLAGS.ckpt_path)
common_modules.load_weights(
model,
export_config.ckpt_path,
checkpoint_format=export_config.ckpt_format)
else:
logging.info('No checkpoint provided. Using randomly initialized weights.')
if export_config.output_layer is not None:
all_layer_names = {l.name for l in model.layers}
if export_config.output_layer not in all_layer_names:
model.summary()
logging.info(
'Cannot find the layer %s in the model. See the above summary to '
'chose an output layer.', export_config.output_layer)
return
output_layer = model.get_layer(export_config.output_layer)
model = tf.keras.Model(model.input, output_layer.output)
model_input = tf.keras.Input(
shape=(export_config.image_size, export_config.image_size, 3),
batch_size=1)
model_output = export_util.finalize_serving(model(model_input), export_config)
model_for_inference = tf.keras.Model(model_input, model_output)
# Convert to tflite. Quantize if quantization parameters are specified.
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_inference)
export_util.configure_tflite_converter(export_config, converter)
tflite_buffer = converter.convert()
# Make sure the base directory exists and write tflite.
tf.io.gfile.makedirs(os.path.dirname(export_config.output_dir))
tflite_path = os.path.join(export_config.output_dir,
f'{export_config.model_name}.tflite')
tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_buffer)
print('TfLite model exported to {}'.format(tflite_path))
# Export saved model.
saved_model_path = os.path.join(export_config.output_dir,
export_config.model_name)
model_for_inference.save(saved_model_path)
print('SavedModel exported to {}'.format(saved_model_path))
def main(_):
run_export()
if __name__ == '__main__':
flags.mark_flag_as_required('model_name')
app.run(main)
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for export_tflite."""
import itertools
import os
from absl.testing import parameterized
import tensorflow as tf
from official.projects.edgetpu.vision.serving import export_util
def _build_model(config):
model = export_util.build_experiment_model(config.model_name)
model_input = tf.keras.Input(
shape=(config.image_size, config.image_size, 3), batch_size=1)
model_output = export_util.finalize_serving(model(model_input), config)
model_for_inference = tf.keras.Model(model_input, model_output)
return model_for_inference
def _dump_tflite(model, config):
converter = tf.lite.TFLiteConverter.from_keras_model(model)
export_util.configure_tflite_converter(config, converter)
tflite_buffer = converter.convert()
tf.io.gfile.makedirs(os.path.dirname(config.output_dir))
tflite_path = os.path.join(config.output_dir, f'{config.model_name}.tflite')
tf.io.gfile.GFile(tflite_path, 'wb').write(tflite_buffer)
return tflite_path
SEG_MODELS = [
'autoseg_edgetpu_xs',
]
FINALIZE_METHODS = [
'resize512,argmax,squeeze', 'resize256,argmax,resize512,squeeze',
'resize128,argmax,resize512,squeeze'
]
class ExportTfliteTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
('mobilenet_edgetpu_v2_xs', 224),
('autoseg_edgetpu_xs', 512),
('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k', 512),
('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32', 512),
)
def test_model_build_and_export_tflite(self, model_name, image_size):
tmp_dir = self.create_tempdir().full_path
config = export_util.ExportConfig(
model_name=model_name, image_size=image_size, output_dir=tmp_dir)
config.quantization_config.quantize = False
model = _build_model(config)
tflite_path = _dump_tflite(model, config)
self.assertTrue(tf.io.gfile.exists(tflite_path))
@parameterized.parameters(
('mobilenet_edgetpu_v2_xs', 224),
('autoseg_edgetpu_xs', 512),
('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k', 512),
('deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32', 512),
)
def test_model_build_and_export_saved_model(self, model_name, image_size):
tmp_dir = self.create_tempdir().full_path
config = export_util.ExportConfig(
model_name=model_name, image_size=image_size, output_dir=tmp_dir)
model = _build_model(config)
saved_model_path = os.path.join(config.output_dir, config.model_name)
model.save(saved_model_path)
self.assertTrue(tf.saved_model.contains_saved_model(saved_model_path))
@parameterized.parameters(itertools.product(SEG_MODELS, FINALIZE_METHODS))
def test_segmentation_finalize_methods(self, model_name, finalize_method):
tmp_dir = self.create_tempdir().full_path
config = export_util.ExportConfig(
model_name=model_name,
image_size=512,
output_dir=tmp_dir,
finalize_method=finalize_method.split(','))
config.quantization_config.quantize = False
model = _build_model(config)
model_input = tf.random.normal([1, config.image_size, config.image_size, 3])
self.assertEqual(
model(model_input).get_shape().as_list(),
[1, config.image_size, config.image_size])
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implements serving with custom post processing."""
import dataclasses
from typing import List, Optional
import tensorflow as tf
import tensorflow_datasets as tfds
from official.core import exp_factory
from official.core import task_factory
from official.modeling.hyperparams import base_config
# pylint: disable=unused-import
from official.projects.edgetpu.vision.configs import mobilenet_edgetpu_config
from official.projects.edgetpu.vision.configs import semantic_segmentation_config
from official.projects.edgetpu.vision.configs import semantic_segmentation_searched_config
from official.projects.edgetpu.vision.modeling import custom_layers
from official.projects.edgetpu.vision.modeling.backbones import mobilenet_edgetpu
from official.projects.edgetpu.vision.tasks import image_classification
from official.projects.edgetpu.vision.tasks import semantic_segmentation as edgetpu_semantic_segmentation
from official.vision.beta.tasks import semantic_segmentation
# pylint: enable=unused-import
MEAN_RGB = [127.5, 127.5, 127.5]
STDDEV_RGB = [127.5, 127.5, 127.5]
@dataclasses.dataclass
class QuantizationConfig(base_config.Config):
"""Configuration for post training quantization.
Attributes:
quantize: Whether to quantize model before exporting tflite.
quantize_less_restrictive: Allows non int8 based intermediate types,
automatic model output type.
use_experimental_quantizer: Enables experimental quantizer of
TFLiteConverter 2.0.
num_calibration_steps: Number of post-training quantization calibration
steps to run.
dataset_name: Name of the dataset to use for quantization calibration.
dataset_dir: Dataset location.
dataset_split: The dataset split (train, validation etc.) to use for
calibration.
"""
quantize: bool = False
quantize_less_restrictive: bool = False
use_experimental_quantizer: bool = True
dataset_name: Optional[str] = None
dataset_dir: Optional[str] = None
dataset_split: Optional[str] = None
num_calibration_steps: int = 100
@dataclasses.dataclass
class ExportConfig(base_config.Config):
"""Configuration for exporting models as tflite and saved_models.
Attributes:
model_name: One of the registered model names
ckpt_path: Path of the training checkpoint. If not provided tflite with
random parameters is exported.
ckpt_format: Format of the checkpoint. tf_checkpoint is for ckpt files from
tf.train.Checkpoint.save() method. keras_checkpoint is for ckpt files from
keras.Model.save_weights() method
output_dir: Directory to output exported files.
image_size: Size of the input image. Ideally should be the same as the
image_size used in training config
output_layer: Layer name to take the output from. Can be used to take the
output from an intermediate layer. None means use the original model
output.
finalize_method: 'Additional layers to be added to customize serving output
Supported are (none|(argmax|resize<?>)[,...]).
- none: do not add extra serving layers.
- argmax: adds argmax.
- squeeze: removes dimensions (except batch dim) of size 1 from the shape
of a tensor.
- resize<?> (for example resize512): adds resize bilinear|nn to <?> size.
For example: --finalize_method=resize128,argmax,resize512,squeeze will do
resize bilinear to 128x128, then argmax then resize nn to 512x512
"""
quantization_config: QuantizationConfig = QuantizationConfig()
model_name: str = None
ckpt_path: Optional[str] = None
ckpt_format: Optional[str] = 'tf_checkpoint'
output_dir: str = '/tmp/'
image_size: int = 224
output_layer: Optional[str] = None
finalize_method: Optional[List[str]] = None
def finalize_serving(model_output, export_config):
"""Adds extra layers based on the provided configuration."""
finalize_method = export_config.finalize_method
output_layer = model_output
if not finalize_method or finalize_method[0] == 'none':
return output_layer
discrete = False
for i in range(len(finalize_method)):
if finalize_method[i] == 'argmax':
discrete = True
is_argmax_last = (i + 1) == len(finalize_method)
if is_argmax_last:
output_layer = tf.argmax(
output_layer, axis=3, output_type=tf.dtypes.int32)
else:
# TODO(tohaspiridonov): add first_match=False when cl/383951533 submited
output_layer = custom_layers.argmax(
output_layer, keepdims=True, epsilon=1e-3)
elif finalize_method[i] == 'squeeze':
output_layer = tf.squeeze(output_layer, axis=3)
else:
resize_params = finalize_method[i].split('resize')
if len(resize_params) != 2 or resize_params[0]:
raise ValueError('Cannot finalize with ' + finalize_method[i] + '.')
resize_to_size = int(resize_params[1])
if discrete:
output_layer = tf.image.resize(
output_layer, [resize_to_size, resize_to_size],
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
else:
output_layer = tf.image.resize(
output_layer, [resize_to_size, resize_to_size],
method=tf.image.ResizeMethod.BILINEAR)
return output_layer
def preprocess_for_quantization(image_data, image_size, crop_padding=32):
"""Crops to center of image with padding then scales, normalizes image_size.
Args:
image_data: A 3D Tensor representing the RGB image data. Image can be of
arbitrary height and width.
image_size: image height/width dimension.
crop_padding: the padding size to use when centering the crop.
Returns:
A decoded and cropped image Tensor. Image is normalized to [-1,1].
"""
shape = tf.shape(image_data)
image_height = shape[0]
image_width = shape[1]
padded_center_crop_size = tf.cast(
(image_size * 1.0 / (image_size + crop_padding)) *
tf.cast(tf.minimum(image_height, image_width), tf.float32), tf.int32)
offset_height = ((image_height - padded_center_crop_size) + 1) // 2
offset_width = ((image_width - padded_center_crop_size) + 1) // 2
image = tf.image.crop_to_bounding_box(
image_data,
offset_height=offset_height,
offset_width=offset_width,
target_height=padded_center_crop_size,
target_width=padded_center_crop_size)
image = tf.image.resize([image], [image_size, image_size],
method=tf.image.ResizeMethod.BILINEAR)[0]
image = tf.cast(image, tf.float32)
image -= tf.constant(MEAN_RGB)
image /= tf.constant(STDDEV_RGB)
return image
def representative_dataset_gen(export_config):
"""Gets a python generator of numpy arrays for the given dataset."""
quantization_config = export_config.quantization_config
dataset = tfds.builder(
quantization_config.dataset_name,
data_dir=quantization_config.dataset_dir)
dataset.download_and_prepare()
data = dataset.as_dataset()[quantization_config.dataset_split]
iterator = data.as_numpy_iterator()
for _ in range(quantization_config.num_calibration_steps):
features = next(iterator)
image = features['image']
image = preprocess_for_quantization(image, export_config.image_size)
image = tf.reshape(
image, [1, export_config.image_size, export_config.image_size, 3])
yield [image]
def configure_tflite_converter(export_config, converter):
"""Common code for picking up quantization parameters."""
quantization_config = export_config.quantization_config
if quantization_config.quantize:
if quantization_config.dataset_dir is None:
raise ValueError(
'Must provide a representative dataset when quantizing the model.')
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [
tf.lite.OpsSet.TFLITE_BUILTINS_INT8
]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
if quantization_config.quantize_less_restrictive:
converter.target_spec.supported_ops += [
tf.lite.OpsSet.TFLITE_BUILTINS
]
converter.inference_output_type = tf.float32
def _representative_dataset_gen():
return representative_dataset_gen(export_config)
converter.representative_dataset = _representative_dataset_gen
def build_experiment_model(experiment_type):
"""Builds model from experiment type configuration."""
params = exp_factory.get_exp_config(experiment_type)
params.validate()
params.lock()
task = task_factory.get_task(params.task)
return task.build_model()
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "Klhdy8pnk5J8"
},
"source": [
"**A tool to visualize the segmentation model inference output.**\\\n",
"This tool is used verify that the exported tflite can produce expected segmentation results.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-vGHZSPWXbyu"
},
"outputs": [],
"source": [
"MODEL='gs://**/placeholder_for_edgetpu_models/autoseg/segmentation_search_edgetpu_s_not_fused.tflite'#@param\n",
"IMAGE_HOME = 'gs://**/PS_Compare/20190711'#@param\n",
"# Relative image file names separated by comas.\n",
"TEST_IMAGES = 'ADE_val_00001626.jpg,ADE_val_00001471.jpg,ADE_val_00000557.jpg'#@param\n",
"IMAGE_WIDTH = 512 #@param\n",
"IMAGE_HEIGHT = 512 #@param"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zzhF1ASDkxTU"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"from PIL import Image as PILImage\n",
"import matplotlib.pyplot as plt\n",
"from scipy import ndimage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "AXaJgLg1ml16"
},
"outputs": [],
"source": [
"# This block creates local copies of /cns and /x20 files.\n",
"TEST_IMAGES=','.join([IMAGE_HOME+'/'+image for image in TEST_IMAGES.split(',')])\n",
"\n",
"# The tflite interpreter only accepts model in local path.\n",
"def local_copy(awaypath):\n",
" localpath = '/tmp/' + awaypath.split('/')[-1]\n",
" !rm -f {localpath}\n",
" !fileutil cp -f {awaypath} {localpath}\n",
" !ls -lht {localpath}\n",
" %download_file {localpath}\n",
" return localpath\n",
"\n",
"IMAGES = [local_copy(image) for image in TEST_IMAGES.split(',')]\n",
"MODEL_COPY=local_copy(MODEL)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "KhS1lOrxHp5C"
},
"outputs": [],
"source": [
"# Creates a 6px wide boolean edge mask to highlight the segmentation.\n",
"def edge(mydata):\n",
" mydata = mydata.reshape(512, 512)\n",
" mydatat = mydata.transpose([1, 0])\n",
" mydata = np.convolve(mydata.reshape(-1), [-1, 0, 1], mode='same').reshape(512, 512)\n",
" mydatat = np.convolve(mydatat.reshape(-1), [-1, 0, 1], mode='same').reshape(512, 512).transpose([1, 0])\n",
" mydata = np.maximum((mydata != 0).astype(np.int8), (mydatat != 0).astype(np.int8))\n",
" mydata = ndimage.binary_dilation(mydata).astype(np.int8)\n",
" mydata = ndimage.binary_dilation(mydata).astype(np.int8)\n",
" mydata = ndimage.binary_dilation(mydata).astype(np.int8)\n",
" return mydata"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "GdlsbiVqL5JZ"
},
"outputs": [],
"source": [
"def run_model(input_data):\n",
" _input_data = input_data\n",
" _input_data = (_input_data-128).astype(np.int8)\n",
" # Load the tflite model and allocate tensors.\n",
" interpreter_x = tf.lite.Interpreter(model_path=MODEL_COPY)\n",
" interpreter_x.allocate_tensors()\n",
" # Get input and output tensors.\n",
" input_details = interpreter_x.get_input_details()\n",
" output_details = interpreter_x.get_output_details()\n",
" interpreter_x.set_tensor(input_details[0]['index'], _input_data)\n",
" interpreter_x.invoke()\n",
" output_data = interpreter_x.get_tensor(output_details[0]['index'])\n",
" return output_data.reshape((512, 512, 1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1mot5M_nl5P7"
},
"outputs": [],
"source": [
"# Set visualization wind sizes.\n",
"fig, ax = plt.subplots(max(len(IMAGES),2), 3)\n",
"fig.set_figwidth(30)\n",
"fig.set_figheight(10*max(len(IMAGES),2))\n",
"\n",
"# Read and test image.\n",
"for r, image in enumerate(IMAGES):\n",
" im = PILImage.open(image).convert('RGB')\n",
" min_dim=min(im.size[0], im.size[1])\n",
" im = im.resize((IMAGE_WIDTH*im.size[0] // min_dim, IMAGE_HEIGHT*im.size[1] // min_dim))\n",
" input_data = np.expand_dims(im, axis=0)\n",
" input_data = input_data[:, :IMAGE_WIDTH,:IMAGE_HEIGHT]\n",
" ax[r, 0].imshow(input_data.reshape([512, 512, 3]).astype(np.uint8))\n",
" ax[r, 0].set_title('Original')\n",
" ax[r, 0].grid(False)\n",
"\n",
" # Test the model on random input data.\n",
" output_data = run_model(input_data)\n",
" ax[r, 1].imshow(output_data, vmin = 0, vmax = 32)\n",
" ax[r, 1].set_title('Segmentation')\n",
" ax[r, 1].grid(False)\n",
"\n",
" output_data = np.reshape(np.minimum(output_data, 32), [512,512])\n",
" output_edge = edge(output_data).reshape(512,512, 1)\n",
" output_data = np.stack([output_data%3, (output_data//3)%3, (output_data//9)%3], axis = -1)\n",
" \n",
" output_data = input_data.reshape([512, 512, 3]).astype(np.float32) * (1-output_edge) + output_data * output_edge * 255\n",
" ax[r, 2].imshow(output_data.astype(np.uint8), vmin = 0, vmax = 256)\n",
" ax[r, 2].set_title('Segmentation \u0026 original')\n",
" ax[r, 2].grid(False)\n"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"last_runtime": {
"build_target": "//quality/ranklab/experimental/notebook:rl_colab",
"kind": "private"
},
"name": "Inference_visualization_tool.ipynb",
"private_outputs": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment