Unverified Commit 0225b135 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'tensorflow:master' into panoptic-deeplab-modeling

parents 7479dbb8 4c571a3c
...@@ -24,7 +24,7 @@ import tensorflow as tf ...@@ -24,7 +24,7 @@ import tensorflow as tf
from official.modeling import tf_utils from official.modeling import tf_utils
from official.projects.detr.modeling import transformer from official.projects.detr.modeling import transformer
from official.vision.beta.modeling.backbones import resnet from official.vision.modeling.backbones import resnet
def position_embedding_sine(attention_mask, def position_embedding_sine(attention_mask,
......
...@@ -22,8 +22,8 @@ from official.projects.detr.configs import detr as detr_cfg ...@@ -22,8 +22,8 @@ from official.projects.detr.configs import detr as detr_cfg
from official.projects.detr.dataloaders import coco from official.projects.detr.dataloaders import coco
from official.projects.detr.modeling import detr from official.projects.detr.modeling import detr
from official.projects.detr.ops import matchers from official.projects.detr.ops import matchers
from official.vision.beta.evaluation import coco_evaluator from official.vision.evaluation import coco_evaluator
from official.vision.beta.ops import box_ops from official.vision.ops import box_ops
@task_factory.register_task_cls(detr_cfg.DetectionConfig) @task_factory.register_task_cls(detr_cfg.DetectionConfig)
......
...@@ -13,7 +13,7 @@ task: ...@@ -13,7 +13,7 @@ task:
num_attention_heads: 4 num_attention_heads: 4
intermediate_size: 512 intermediate_size: 512
hidden_activation: relu hidden_activation: relu
hidden_dropout_prob: 0.0 hidden_dropout_prob: 0.1
attention_probs_dropout_prob: 0.1 attention_probs_dropout_prob: 0.1
intra_bottleneck_size: 128 intra_bottleneck_size: 128
initializer_range: 0.02 initializer_range: 0.02
......
# MobileBERT-EdgeTPU-XXS model.
task:
model:
encoder:
type: mobilebert
mobilebert:
word_vocab_size: 30522
word_embed_size: 128
type_vocab_size: 2
max_sequence_length: 512
num_blocks: 6
hidden_size: 512
num_attention_heads: 4
intermediate_size: 1024
hidden_activation: relu
hidden_dropout_prob: 0.1
attention_probs_dropout_prob: 0.1
intra_bottleneck_size: 128
initializer_range: 0.02
key_query_shared_bottleneck: true
num_feedforward_networks: 2
normalization_type: no_norm
classifier_activation: false
layer_wise_distillation:
num_steps: 30000
warmup_steps: 0
initial_learning_rate: 1.5e-3
end_learning_rate: 1.5e-3
decay_steps: 30000
end_to_end_distillation:
num_steps: 585000
warmup_steps: 20000
initial_learning_rate: 1.5e-3
end_learning_rate: 1.5e-7
decay_steps: 585000
distill_ground_truth_ratio: 0.5
optimizer:
optimizer:
lamb:
beta_1: 0.9
beta_2: 0.999
clipnorm: 1.0
epsilon: 1.0e-06
exclude_from_layer_adaptation: null
exclude_from_weight_decay: ['LayerNorm', 'bias', 'norm']
global_clipnorm: null
name: LAMB
weight_decay_rate: 0.01
type: lamb
orbit_config:
eval_interval: 1000
eval_steps: -1
mode: train
steps_per_loop: 1000
total_steps: 825000
runtime:
distribution_strategy: 'tpu'
student_model:
cls_heads: [{'activation': 'tanh',
'cls_token_idx': 0,
'dropout_rate': 0.0,
'inner_dim': 512,
'name': 'next_sentence',
'num_classes': 2}]
encoder:
mobilebert:
attention_probs_dropout_prob: 0.1
classifier_activation: false
hidden_activation: relu
hidden_dropout_prob: 0.0
hidden_size: 512
initializer_range: 0.02
input_mask_dtype: int32
intermediate_size: 1024
intra_bottleneck_size: 128
key_query_shared_bottleneck: true
max_sequence_length: 512
normalization_type: no_norm
num_attention_heads: 4
num_blocks: 6
num_feedforward_networks: 2
type_vocab_size: 2
use_bottleneck_attention: false
word_embed_size: 128
word_vocab_size: 30522
type: mobilebert
mlm_activation: relu
mlm_initializer_range: 0.02
teacher_model:
cls_heads: []
encoder:
mobilebert:
attention_probs_dropout_prob: 0.1
classifier_activation: false
hidden_activation: gelu
hidden_dropout_prob: 0.1
hidden_size: 512
initializer_range: 0.02
input_mask_dtype: int32
intermediate_size: 4096
intra_bottleneck_size: 1024
key_query_shared_bottleneck: false
max_sequence_length: 512
normalization_type: layer_norm
num_attention_heads: 4
num_blocks: 24
num_feedforward_networks: 1
type_vocab_size: 2
use_bottleneck_attention: false
word_embed_size: 128
word_vocab_size: 30522
type: mobilebert
mlm_activation: gelu
mlm_initializer_range: 0.02
teacher_model_init_checkpoint: gs://**/uncased_L-24_H-1024_B-512_A-4_teacher/tf2_checkpoint/bert_model.ckpt-1
student_model_init_checkpoint: ''
train_datasest:
block_length: 1
cache: false
cycle_length: null
deterministic: null
drop_remainder: true
enable_tf_data_service: false
global_batch_size: 2048
input_path: gs://**/seq_512_mask_20/wikipedia.tfrecord*,gs://**/seq_512_mask_20/books.tfrecord*
is_training: true
max_predictions_per_seq: 20
seq_length: 512
sharding: true
shuffle_buffer_size: 100
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ''
tfds_name: ''
tfds_skip_decoding_feature: ''
tfds_split: ''
use_next_sentence_label: true
use_position_id: false
use_v2_feature_names: false
eval_dataset:
block_length: 1
cache: false
cycle_length: null
deterministic: null
drop_remainder: true
enable_tf_data_service: false
global_batch_size: 2048
input_path: gs://**/seq_512_mask_20/wikipedia.tfrecord-00141-of-00500,gs://**/seq_512_mask_20/books.tfrecord-00141-of-00500
is_training: false
max_predictions_per_seq: 20
seq_length: 512
sharding: true
shuffle_buffer_size: 100
tf_data_service_address: null
tf_data_service_job_name: null
tfds_as_supervised: false
tfds_data_dir: ''
tfds_name: ''
tfds_skip_decoding_feature: ''
tfds_split: ''
use_next_sentence_label: true
use_position_id: false
use_v2_feature_names: false
...@@ -26,6 +26,8 @@ from official.modeling.hyperparams import oneof ...@@ -26,6 +26,8 @@ from official.modeling.hyperparams import oneof
from official.projects.edgetpu.vision.modeling import common_modules from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.modeling import custom_layers from official.projects.edgetpu.vision.modeling import custom_layers
InitializerType = Optional[Union[str, tf.keras.initializers.Initializer]]
@dataclasses.dataclass @dataclasses.dataclass
class BlockType(oneof.OneOfConfig): class BlockType(oneof.OneOfConfig):
...@@ -216,6 +218,8 @@ class ModelConfig(base_config.Config): ...@@ -216,6 +218,8 @@ class ModelConfig(base_config.Config):
stem_base_filters: int = 64 stem_base_filters: int = 64
stem_kernel_size: int = 5 stem_kernel_size: int = 5
top_base_filters: int = 1280 top_base_filters: int = 1280
conv_kernel_initializer: InitializerType = None
dense_kernel_initializer: InitializerType = None
blocks: Tuple[BlockConfig, ...] = ( blocks: Tuple[BlockConfig, ...] = (
# (input_filters, output_filters, kernel_size, num_repeat, # (input_filters, output_filters, kernel_size, num_repeat,
# expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type) # expand_ratio, strides, se_ratio, id_skip, fused_conv, conv_type)
...@@ -279,7 +283,8 @@ def mobilenet_edgetpu_v2_base( ...@@ -279,7 +283,8 @@ def mobilenet_edgetpu_v2_base(
drop_connect_rate: float = 0.1, drop_connect_rate: float = 0.1,
filter_size_overrides: Optional[Dict[int, int]] = None, filter_size_overrides: Optional[Dict[int, int]] = None,
block_op_overrides: Optional[Dict[int, Dict[int, Dict[str, Any]]]] = None, block_op_overrides: Optional[Dict[int, Dict[int, Dict[str, Any]]]] = None,
block_group_overrides: Optional[Dict[int, Dict[str, Any]]] = None): block_group_overrides: Optional[Dict[int, Dict[str, Any]]] = None,
topology: Optional[TopologyConfig] = None):
"""Creates MobilenetEdgeTPUV2 ModelConfig based on tuning parameters.""" """Creates MobilenetEdgeTPUV2 ModelConfig based on tuning parameters."""
config = ModelConfig() config = ModelConfig()
...@@ -295,7 +300,7 @@ def mobilenet_edgetpu_v2_base( ...@@ -295,7 +300,7 @@ def mobilenet_edgetpu_v2_base(
} }
config = config.replace(**param_overrides) config = config.replace(**param_overrides)
topology_config = TopologyConfig() topology_config = TopologyConfig() if topology is None else topology
if filter_size_overrides: if filter_size_overrides:
for group_id in filter_size_overrides: for group_id in filter_size_overrides:
topology_config.block_groups[group_id].filters = filter_size_overrides[ topology_config.block_groups[group_id].filters = filter_size_overrides[
...@@ -724,6 +729,7 @@ def conv2d_block_as_layers( ...@@ -724,6 +729,7 @@ def conv2d_block_as_layers(
use_bias: bool = False, use_bias: bool = False,
activation: Any = None, activation: Any = None,
depthwise: bool = False, depthwise: bool = False,
kernel_initializer: InitializerType = None,
name: Optional[str] = None) -> List[tf.keras.layers.Layer]: name: Optional[str] = None) -> List[tf.keras.layers.Layer]:
"""A conv2d followed by batch norm and an activation.""" """A conv2d followed by batch norm and an activation."""
batch_norm = common_modules.get_batch_norm(config.batch_norm) batch_norm = common_modules.get_batch_norm(config.batch_norm)
...@@ -748,11 +754,13 @@ def conv2d_block_as_layers( ...@@ -748,11 +754,13 @@ def conv2d_block_as_layers(
sequential_layers: List[tf.keras.layers.Layer] = [] sequential_layers: List[tf.keras.layers.Layer] = []
if depthwise: if depthwise:
conv2d = tf.keras.layers.DepthwiseConv2D conv2d = tf.keras.layers.DepthwiseConv2D
init_kwargs.update({'depthwise_initializer': CONV_KERNEL_INITIALIZER}) init_kwargs.update({'depthwise_initializer': kernel_initializer})
else: else:
conv2d = tf.keras.layers.Conv2D conv2d = tf.keras.layers.Conv2D
init_kwargs.update({'filters': conv_filters, init_kwargs.update({
'kernel_initializer': CONV_KERNEL_INITIALIZER}) 'filters': conv_filters,
'kernel_initializer': kernel_initializer
})
sequential_layers.append(conv2d(**init_kwargs)) sequential_layers.append(conv2d(**init_kwargs))
...@@ -780,12 +788,21 @@ def conv2d_block(inputs: tf.Tensor, ...@@ -780,12 +788,21 @@ def conv2d_block(inputs: tf.Tensor,
use_bias: bool = False, use_bias: bool = False,
activation: Any = None, activation: Any = None,
depthwise: bool = False, depthwise: bool = False,
kernel_initializer: Optional[InitializerType] = None,
name: Optional[str] = None) -> tf.Tensor: name: Optional[str] = None) -> tf.Tensor:
"""Compatibility with third_party/car/deep_nets.""" """Compatibility with third_party/car/deep_nets."""
x = inputs x = inputs
for layer in conv2d_block_as_layers(conv_filters, config, kernel_size, for layer in conv2d_block_as_layers(
strides, use_batch_norm, use_bias, conv_filters=conv_filters,
activation, depthwise, name): config=config,
kernel_size=kernel_size,
strides=strides,
use_batch_norm=use_batch_norm,
use_bias=use_bias,
activation=activation,
depthwise=depthwise,
kernel_initializer=kernel_initializer,
name=name):
x = layer(x) x = layer(x)
return x return x
...@@ -828,6 +845,9 @@ class _MbConvBlock: ...@@ -828,6 +845,9 @@ class _MbConvBlock:
use_groupconv = block.conv_type == 'group' use_groupconv = block.conv_type == 'group'
prefix = prefix or '' prefix = prefix or ''
self.name = prefix self.name = prefix
conv_kernel_initializer = (
config.conv_kernel_initializer if config.conv_kernel_initializer
is not None else CONV_KERNEL_INITIALIZER)
filters = block.input_filters * block.expand_ratio filters = block.input_filters * block.expand_ratio
...@@ -851,22 +871,26 @@ class _MbConvBlock: ...@@ -851,22 +871,26 @@ class _MbConvBlock:
activation=activation, activation=activation,
name=prefix + 'fused')) name=prefix + 'fused'))
else: else:
self.expand_block.extend(conv2d_block_as_layers( self.expand_block.extend(
filters, conv2d_block_as_layers(
config, conv_filters=filters,
kernel_size=block.kernel_size, config=config,
strides=block.strides, kernel_size=block.kernel_size,
activation=activation, strides=block.strides,
name=prefix + 'fused')) activation=activation,
kernel_initializer=conv_kernel_initializer,
name=prefix + 'fused'))
else: else:
if block.expand_ratio != 1: if block.expand_ratio != 1:
# Expansion phase with a pointwise conv # Expansion phase with a pointwise conv
self.expand_block.extend(conv2d_block_as_layers( self.expand_block.extend(
filters, conv2d_block_as_layers(
config, conv_filters=filters,
kernel_size=(1, 1), config=config,
activation=activation, kernel_size=(1, 1),
name=prefix + 'expand')) activation=activation,
kernel_initializer=conv_kernel_initializer,
name=prefix + 'expand'))
# Main kernel, after the expansion (if applicable, i.e. not fused). # Main kernel, after the expansion (if applicable, i.e. not fused).
if use_depthwise: if use_depthwise:
...@@ -876,6 +900,7 @@ class _MbConvBlock: ...@@ -876,6 +900,7 @@ class _MbConvBlock:
kernel_size=block.kernel_size, kernel_size=block.kernel_size,
strides=block.strides, strides=block.strides,
activation=activation, activation=activation,
kernel_initializer=conv_kernel_initializer,
depthwise=True, depthwise=True,
name=prefix + 'depthwise')) name=prefix + 'depthwise'))
elif use_groupconv: elif use_groupconv:
...@@ -907,27 +932,30 @@ class _MbConvBlock: ...@@ -907,27 +932,30 @@ class _MbConvBlock:
tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape')) tf.keras.layers.Reshape(se_shape, name=prefix + 'se_reshape'))
self.squeeze_excitation.extend( self.squeeze_excitation.extend(
conv2d_block_as_layers( conv2d_block_as_layers(
num_reduced_filters, conv_filters=num_reduced_filters,
config, config=config,
use_bias=True, use_bias=True,
use_batch_norm=False, use_batch_norm=False,
activation=activation, activation=activation,
kernel_initializer=conv_kernel_initializer,
name=prefix + 'se_reduce')) name=prefix + 'se_reduce'))
self.squeeze_excitation.extend( self.squeeze_excitation.extend(
conv2d_block_as_layers( conv2d_block_as_layers(
filters, conv_filters=filters,
config, config=config,
use_bias=True, use_bias=True,
use_batch_norm=False, use_batch_norm=False,
activation='sigmoid', activation='sigmoid',
kernel_initializer=conv_kernel_initializer,
name=prefix + 'se_expand')) name=prefix + 'se_expand'))
# Output phase # Output phase
self.project_block.extend( self.project_block.extend(
conv2d_block_as_layers( conv2d_block_as_layers(
block.output_filters, conv_filters=block.output_filters,
config, config=config,
activation=None, activation=None,
kernel_initializer=conv_kernel_initializer,
name=prefix + 'project')) name=prefix + 'project'))
# Add identity so that quantization-aware training can insert quantization # Add identity so that quantization-aware training can insert quantization
...@@ -993,6 +1021,12 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input, ...@@ -993,6 +1021,12 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
activation = tf_utils.get_activation(config.activation) activation = tf_utils.get_activation(config.activation)
dropout_rate = config.dropout_rate dropout_rate = config.dropout_rate
drop_connect_rate = config.drop_connect_rate drop_connect_rate = config.drop_connect_rate
conv_kernel_initializer = (
config.conv_kernel_initializer if config.conv_kernel_initializer
is not None else CONV_KERNEL_INITIALIZER)
dense_kernel_initializer = (
config.dense_kernel_initializer if config.dense_kernel_initializer
is not None else DENSE_KERNEL_INITIALIZER)
num_classes = config.num_classes num_classes = config.num_classes
input_channels = config.input_channels input_channels = config.input_channels
rescale_input = config.rescale_input rescale_input = config.rescale_input
...@@ -1010,12 +1044,13 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input, ...@@ -1010,12 +1044,13 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
# Build stem # Build stem
x = conv2d_block( x = conv2d_block(
x, inputs=x,
round_filters(stem_base_filters, config), conv_filters=round_filters(stem_base_filters, config),
config, config=config,
kernel_size=[stem_kernel_size, stem_kernel_size], kernel_size=[stem_kernel_size, stem_kernel_size],
strides=[2, 2], strides=[2, 2],
activation=activation, activation=activation,
kernel_initializer=conv_kernel_initializer,
name='stem') name='stem')
# Build blocks # Build blocks
...@@ -1061,11 +1096,13 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input, ...@@ -1061,11 +1096,13 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
if config.backbone_only: if config.backbone_only:
return backbone_levels return backbone_levels
# Build top # Build top
x = conv2d_block(x, x = conv2d_block(
round_filters(top_base_filters, config), inputs=x,
config, conv_filters=round_filters(top_base_filters, config),
activation=activation, config=config,
name='top') activation=activation,
kernel_initializer=conv_kernel_initializer,
name='top')
# Build classifier # Build classifier
pool_size = (x.shape.as_list()[1], x.shape.as_list()[2]) pool_size = (x.shape.as_list()[1], x.shape.as_list()[2])
...@@ -1075,7 +1112,7 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input, ...@@ -1075,7 +1112,7 @@ def mobilenet_edgetpu_v2(image_input: tf.keras.layers.Input,
x = tf.keras.layers.Conv2D( x = tf.keras.layers.Conv2D(
num_classes, num_classes,
1, 1,
kernel_initializer=DENSE_KERNEL_INITIALIZER, kernel_initializer=dense_kernel_initializer,
kernel_regularizer=tf.keras.regularizers.l2(weight_decay), kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
bias_regularizer=tf.keras.regularizers.l2(weight_decay), bias_regularizer=tf.keras.regularizers.l2(weight_decay),
name='logits')( name='logits')(
......
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for mobilenet_edgetpu_v2_model_blocks."""
import tensorflow as tf
from official.projects.edgetpu.vision.modeling import custom_layers
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v2_model_blocks
class MobilenetEdgetpuV2ModelBlocksTest(tf.test.TestCase):
def setUp(self):
super().setUp()
self.model_config = mobilenet_edgetpu_v2_model_blocks.ModelConfig()
def test_model_creatation(self):
model_input = tf.keras.layers.Input(shape=(224, 224, 1))
model_output = mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2(
image_input=model_input,
config=self.model_config)
test_model = tf.keras.Model(inputs=model_input, outputs=model_output)
self.assertIsInstance(test_model, tf.keras.Model)
self.assertEqual(test_model.input.shape, (None, 224, 224, 1))
self.assertEqual(test_model.output.shape, (None, 1001))
def test_model_with_customized_kernel_initializer(self):
self.model_config.conv_kernel_initializer = 'he_uniform'
self.model_config.dense_kernel_initializer = 'glorot_normal'
model_input = tf.keras.layers.Input(shape=(224, 224, 1))
model_output = mobilenet_edgetpu_v2_model_blocks.mobilenet_edgetpu_v2(
image_input=model_input,
config=self.model_config)
test_model = tf.keras.Model(inputs=model_input, outputs=model_output)
conv_layer_stack = []
for layer in test_model.layers:
if (isinstance(layer, tf.keras.layers.Conv2D) or
isinstance(layer, tf.keras.layers.DepthwiseConv2D) or
isinstance(layer, custom_layers.GroupConv2D)):
conv_layer_stack.append(layer)
self.assertGreater(len(conv_layer_stack), 2)
# The last Conv layer is used as a Dense layer.
for layer in conv_layer_stack[:-1]:
if isinstance(layer, custom_layers.GroupConv2D):
self.assertIsInstance(layer.kernel_initializer,
tf.keras.initializers.GlorotUniform)
elif isinstance(layer, tf.keras.layers.Conv2D):
self.assertIsInstance(layer.kernel_initializer,
tf.keras.initializers.HeUniform)
elif isinstance(layer, tf.keras.layers.DepthwiseConv2D):
self.assertIsInstance(layer.depthwise_initializer,
tf.keras.initializers.HeUniform)
self.assertIsInstance(conv_layer_stack[-1].kernel_initializer,
tf.keras.initializers.GlorotNormal)
if __name__ == '__main__':
tf.test.main()
...@@ -28,9 +28,9 @@ import dataclasses ...@@ -28,9 +28,9 @@ import dataclasses
from official.core import config_definitions as cfg from official.core import config_definitions as cfg
from official.core import exp_factory from official.core import exp_factory
from official.modeling import hyperparams from official.modeling import hyperparams
from official.vision.beta.configs import backbones_3d from official.vision.configs import backbones_3d
from official.vision.beta.configs import common from official.vision.configs import common
from official.vision.beta.configs import video_classification from official.vision.configs import video_classification
@dataclasses.dataclass @dataclasses.dataclass
......
...@@ -20,7 +20,7 @@ import tensorflow as tf ...@@ -20,7 +20,7 @@ import tensorflow as tf
from official.core import config_definitions as cfg from official.core import config_definitions as cfg
from official.core import exp_factory from official.core import exp_factory
from official.projects.movinet.configs import movinet from official.projects.movinet.configs import movinet
from official.vision.beta.configs import video_classification as exp_cfg from official.vision.configs import video_classification as exp_cfg
class MovinetConfigTest(tf.test.TestCase, parameterized.TestCase): class MovinetConfigTest(tf.test.TestCase, parameterized.TestCase):
......
...@@ -25,7 +25,7 @@ import tensorflow as tf ...@@ -25,7 +25,7 @@ import tensorflow as tf
from official.modeling import hyperparams from official.modeling import hyperparams
from official.projects.movinet.modeling import movinet_layers from official.projects.movinet.modeling import movinet_layers
from official.vision.beta.modeling.backbones import factory from official.vision.modeling.backbones import factory
# Defines a set of kernel sizes and stride sizes to simplify and shorten # Defines a set of kernel sizes and stride sizes to simplify and shorten
# architecture definitions for configs below. # architecture definitions for configs below.
......
...@@ -23,7 +23,7 @@ from typing import Any, Mapping, Optional, Sequence, Tuple, Union ...@@ -23,7 +23,7 @@ from typing import Any, Mapping, Optional, Sequence, Tuple, Union
import tensorflow as tf import tensorflow as tf
from official.modeling import tf_utils from official.modeling import tf_utils
from official.vision.beta.modeling.layers import nn_layers from official.vision.modeling.layers import nn_layers
# Default kernel weight decay that may be overridden # Default kernel weight decay that may be overridden
KERNEL_WEIGHT_DECAY = 1.5e-5 KERNEL_WEIGHT_DECAY = 1.5e-5
......
...@@ -19,7 +19,7 @@ from absl.testing import parameterized ...@@ -19,7 +19,7 @@ from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
from official.projects.movinet.modeling import movinet_layers from official.projects.movinet.modeling import movinet_layers
from official.vision.beta.modeling.layers import nn_layers from official.vision.modeling.layers import nn_layers
class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase): class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
......
...@@ -23,8 +23,8 @@ import tensorflow as tf ...@@ -23,8 +23,8 @@ import tensorflow as tf
from official.projects.movinet.configs import movinet as cfg from official.projects.movinet.configs import movinet as cfg
from official.projects.movinet.modeling import movinet_layers from official.projects.movinet.modeling import movinet_layers
from official.vision.beta.modeling import backbones from official.vision.modeling import backbones
from official.vision.beta.modeling import factory_3d as model_factory from official.vision.modeling import factory_3d as model_factory
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
......
...@@ -38,8 +38,8 @@ import numpy as np ...@@ -38,8 +38,8 @@ import numpy as np
import tensorflow.compat.v2 as tf import tensorflow.compat.v2 as tf
import tensorflow_hub as hub import tensorflow_hub as hub
from official.vision.beta.configs import video_classification as video_classification_configs from official.vision.configs import video_classification as video_classification_configs
from official.vision.beta.tasks import video_classification from official.vision.tasks import video_classification
tf.enable_v2_behavior() tf.enable_v2_behavior()
......
...@@ -34,9 +34,6 @@ from absl import app ...@@ -34,9 +34,6 @@ from absl import app
from absl import flags from absl import flags
import gin import gin
# pylint: disable=unused-import
from official.common import registry_imports
# pylint: enable=unused-import
from official.common import distribute_utils from official.common import distribute_utils
from official.common import flags as tfm_flags from official.common import flags as tfm_flags
from official.core import task_factory from official.core import task_factory
...@@ -48,6 +45,7 @@ from official.modeling import performance ...@@ -48,6 +45,7 @@ from official.modeling import performance
# pylint: disable=unused-import # pylint: disable=unused-import
from official.projects.movinet.modeling import movinet from official.projects.movinet.modeling import movinet
from official.projects.movinet.modeling import movinet_model from official.projects.movinet.modeling import movinet_model
from official.vision import registry_imports
# pylint: enable=unused-import # pylint: enable=unused-import
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
......
...@@ -25,7 +25,7 @@ from absl.testing import flagsaver ...@@ -25,7 +25,7 @@ from absl.testing import flagsaver
import tensorflow as tf import tensorflow as tf
from official.projects.movinet import train as train_lib from official.projects.movinet import train as train_lib
from official.vision.beta.dataloaders import tfexample_utils from official.vision.dataloaders import tfexample_utils
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
......
# Quantization Aware Training Project for Computer Vision Models # Quantization Aware Training Project for Computer Vision Models
[TOC]
⚠️ Disclaimer: All datasets hyperlinked from this page are not owned or ⚠️ Disclaimer: All datasets hyperlinked from this page are not owned or
distributed by Google. The dataset is made available by third parties. distributed by Google. The dataset is made available by third parties.
Please review the terms and conditions made available by the third parties Please review the terms and conditions made available by the third parties
......
...@@ -435,7 +435,7 @@ class Conv2DBNBlockQuantized(tf.keras.layers.Layer): ...@@ -435,7 +435,7 @@ class Conv2DBNBlockQuantized(tf.keras.layers.Layer):
conv2d_quantized = _quantize_wrapped_layer( conv2d_quantized = _quantize_wrapped_layer(
tf.keras.layers.Conv2D, tf.keras.layers.Conv2D,
configs.Default8BitConvQuantizeConfig( configs.Default8BitConvQuantizeConfig(
['kernel'], ['activation'], False)) ['kernel'], ['activation'], not self._use_normalization))
self._conv0 = conv2d_quantized( self._conv0 = conv2d_quantized(
filters=self._filters, filters=self._filters,
kernel_size=self._kernel_size, kernel_size=self._kernel_size,
......
...@@ -21,6 +21,7 @@ import tensorflow as tf ...@@ -21,6 +21,7 @@ import tensorflow as tf
import tensorflow_model_optimization as tfmot import tensorflow_model_optimization as tfmot
from official.modeling import tf_utils from official.modeling import tf_utils
from official.projects.qat.vision.quantization import configs from official.projects.qat.vision.quantization import configs
from official.projects.qat.vision.quantization import helper
from official.vision.beta.modeling.decoders import aspp from official.vision.beta.modeling.decoders import aspp
from official.vision.beta.modeling.layers import nn_layers from official.vision.beta.modeling.layers import nn_layers
...@@ -61,7 +62,9 @@ def _quantize_wrapped_layer(cls, quantize_config): ...@@ -61,7 +62,9 @@ def _quantize_wrapped_layer(cls, quantize_config):
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class SqueezeExcitationQuantized(tf.keras.layers.Layer): class SqueezeExcitationQuantized(
helper.LayerQuantizerHelper,
tf.keras.layers.Layer):
"""Creates a squeeze and excitation layer.""" """Creates a squeeze and excitation layer."""
def __init__(self, def __init__(self,
...@@ -129,9 +132,8 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer): ...@@ -129,9 +132,8 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
# Convert hard_sigmoid activation to quantizable keras layers so each op # Convert hard_sigmoid activation to quantizable keras layers so each op
# can be properly quantized. # can be properly quantized.
# Formula is hard_sigmoid(x) = relu6(x + 3) * 0.16667. # Formula is hard_sigmoid(x) = relu6(x + 3) * 0.16667.
self._add = tfmot.quantization.keras.QuantizeWrapperV2( self._add_quantizer('add_three')
tf.keras.layers.Add(), configs.Default8BitQuantizeConfig([], [], self._add_quantizer('divide_six')
True))
self._relu6 = tfmot.quantization.keras.QuantizeWrapperV2( self._relu6 = tfmot.quantization.keras.QuantizeWrapperV2(
tf_utils.get_activation('relu6', use_keras_layer=True), tf_utils.get_activation('relu6', use_keras_layer=True),
configs.Default8BitActivationQuantizeConfig()) configs.Default8BitActivationQuantizeConfig())
...@@ -141,11 +143,12 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer): ...@@ -141,11 +143,12 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
self._gating_activation, use_keras_layer=True), self._gating_activation, use_keras_layer=True),
configs.Default8BitActivationQuantizeConfig()) configs.Default8BitActivationQuantizeConfig())
def _apply_gating_activation_layer(self, x: tf.Tensor) -> tf.Tensor: def _apply_gating_activation_layer(
self, x: tf.Tensor, training: bool) -> tf.Tensor:
if self._gating_activation == 'hard_sigmoid': if self._gating_activation == 'hard_sigmoid':
x = self._add([x, 3.0 * tf.ones_like(x)]) x = self._apply_quantizer('add_three', x + 3.0, training)
x = self._relu6(x) x = self._relu6(x)
x = self._multiply([x, 0.16667 * tf.ones_like(x)]) x = self._apply_quantizer('divide_six', x * 1.6667, training)
else: else:
x = self._gating_activation_layer(x) x = self._gating_activation_layer(x)
return x return x
...@@ -200,6 +203,7 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer): ...@@ -200,6 +203,7 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
configs.Default8BitActivationQuantizeConfig()) configs.Default8BitActivationQuantizeConfig())
self._create_gating_activation_layer() self._create_gating_activation_layer()
self._build_quantizer_vars()
super().build(input_shape) super().build(input_shape)
def get_config(self): def get_config(self):
...@@ -224,7 +228,7 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer): ...@@ -224,7 +228,7 @@ class SqueezeExcitationQuantized(tf.keras.layers.Layer):
x = self._reduce_mean_quantizer( x = self._reduce_mean_quantizer(
x, training, self._reduce_mean_quantizer_vars) x, training, self._reduce_mean_quantizer_vars)
x = self._activation_layer(self._se_reduce(x)) x = self._activation_layer(self._se_reduce(x))
x = self._apply_gating_activation_layer(self._se_expand(x)) x = self._apply_gating_activation_layer(self._se_expand(x), training)
x = self._multiply([x, inputs]) x = self._multiply([x, inputs])
return x return x
......
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Quantization helpers."""
import tensorflow_model_optimization as tfmot
class LayerQuantizerHelper(object):
"""Helper class that handles quantizers."""
def __init__(self, *args, **kwargs):
self._quantizers = {}
self._quantizer_vars = {}
super().__init__(*args, **kwargs)
def _all_value_quantizer(self):
return tfmot.quantization.keras.quantizers.AllValuesQuantizer(
num_bits=8, per_axis=False, symmetric=False, narrow_range=False)
def _moving_average_quantizer(self):
return tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
num_bits=8, per_axis=False, symmetric=False, narrow_range=False)
def _add_quantizer(self, name, all_value_quantizer=False):
if all_value_quantizer:
self._quantizers[name] = self._all_value_quantizer()
else:
self._quantizers[name] = self._moving_average_quantizer()
def _apply_quantizer(self, name, inputs, training, **kwargs):
return self._quantizers[name](
inputs, training, self._quantizer_vars[name], **kwargs)
def _build_quantizer_vars(self):
for name in self._quantizers:
self._quantizer_vars[name] = self._quantizers[name].build(
tensor_shape=None, name=name, layer=self)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment