Commit c57e975a authored by saberkun's avatar saberkun
Browse files

Merge pull request #10338 from srihari-humbarwadi:readme

PiperOrigin-RevId: 413033276
parents 7fb4f3cd acf4156e
......@@ -76,8 +76,8 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer):
with tf.name_scope("embedding"):
# Create binary mask of size [batch_size, length]
embeddings = tf.gather(self.shared_weights, inputs)
mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype)
embeddings *= tf.expand_dims(mask, -1)
# mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype)
# embeddings *= tf.expand_dims(mask, -1)
# Scale embedding by the sqrt of the hidden size
embeddings *= self.hidden_size**0.5
......
......@@ -196,13 +196,12 @@ class Transformer(tf.keras.Model):
with tf.name_scope("decode"):
# Prepare inputs to decoder layers by shifting targets, adding positional
# encoding and applying dropout.
with tf.name_scope("shift_targets"):
# Shift targets to the right, and remove the last element
targets = tf.pad(targets, [[0, 0], [1, 0]])[:, :-1]
decoder_inputs = self.embedding_softmax_layer(targets)
decoder_inputs = tf.cast(decoder_inputs, self.params["dtype"])
attention_bias = tf.cast(attention_bias, self.params["dtype"])
with tf.name_scope("shift_targets"):
# Shift targets to the right, and remove the last element
decoder_inputs = tf.pad(decoder_inputs,
[[0, 0], [1, 0], [0, 0]])[:, :-1, :]
with tf.name_scope("add_pos_encoding"):
length = tf.shape(decoder_inputs)[1]
pos_encoding = self.position_embedding(decoder_inputs)
......
......@@ -440,7 +440,6 @@ class TransformerTask(object):
opt = performance.configure_optimizer(
opt,
use_float16=params["dtype"] == tf.float16,
use_graph_rewrite=self.flags_obj.fp16_implementation == "graph_rewrite",
loss_scale=flags_core.get_loss_scale(
self.flags_obj, default_for_fp16="dynamic"))
......
......@@ -53,9 +53,9 @@ class TransformerTaskTest(tf.test.TestCase):
FLAGS.param_set = 'tiny'
FLAGS.use_synthetic_data = True
FLAGS.steps_between_evals = 1
FLAGS.train_steps = 2
FLAGS.train_steps = 1
FLAGS.validation_steps = 1
FLAGS.batch_size = 8
FLAGS.batch_size = 4
FLAGS.max_length = 1
FLAGS.num_gpus = 1
FLAGS.distribution_strategy = 'off'
......
......@@ -16,7 +16,6 @@
"""Utilities used in SQUAD task."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import collections
......
......@@ -20,7 +20,7 @@ import sys
from setuptools import find_packages
from setuptools import setup
version = '2.5.0'
version = '2.7.0'
project_name = 'tf-models-official'
......@@ -61,8 +61,8 @@ if project_name == 'tf-models-nightly':
install_requires.append('tf-nightly')
install_requires.append('tensorflow-text-nightly')
else:
install_requires.append('tensorflow>=2.4.0')
install_requires.append('tensorflow-text>=2.4.0')
install_requires.append('tensorflow>=2.7.0')
install_requires.append('tensorflow-text>=2.7.0')
print('install_requires: ', install_requires)
print('dependency_links: ', dependency_links)
......@@ -88,5 +88,5 @@ setup(
},
install_requires=install_requires,
dependency_links=dependency_links,
python_requires='>=3.6',
python_requires='>=3.7',
)
# TensorFlow Model Garden Modeling Projects
This directory contains projects using TensorFlow Model Garden Modeling
libraries.
## Projects
* [NHNet](nhnet):
[Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386)
by Gu et al, 2020
......@@ -16,11 +16,10 @@
import dataclasses
import os
from typing import List, Optional, Union
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.configs import common
......
......@@ -18,8 +18,8 @@
from absl.testing import parameterized
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling.hyperparams import config_definitions as cfg
from official.projects.basnet.configs import basnet as exp_cfg
......
# Machine Learning Models Optimized for Google Tensor's Edge TPU
## Requirements
[![TensorFlow 2.4](https://img.shields.io/badge/TensorFlow-2.4-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.4.0)
[![Python 3.7](https://img.shields.io/badge/Python-3.7-3776AB)](https://www.python.org/downloads/release/python-379/)
## Overview
<figure align="center">
<img width=70% src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/neural%20architecture%20search.gif>
<figcaption><i>An illustration of NAS to find Edge TPU optimized models, Each column represents a stage in the natural network, with dots indicating different options, and each color representing a different type of building block. A path from inputs (e.g., an image) to outputs (e.g., per-pixel label predictions) through the matrix represents a candidate neural network. In each iteration of the search, a neural network is formed using the blocks chosen at every stage, and the search algorithm aims to find neural networks that jointly minimize TPU latency and/or energy and maximize accuracy.
</i></figcaption>
</figure>
This repository contains machine learning models optimized for the Edge TPU in
Pixel 6's SoC,
[Google Tensor](https://blog.google/products/pixel/google-tensor-debuts-new-pixel-6-fall/).
We use Neural Architecture Search (NAS) to automate the process of designing ML
models and incentivize the search algorithms to discover models that achieve
higher quality as well as better latency and computing efficiency. This
automation also allows us to scale the development of ML models for a variety of
on-device tasks. We’re making these ML models publicly available through the
Tensorflow model garden and [Tensorflow Hub](https://tfhub.dev/s?q=edgetpu) to
enable researchers and developers to bootstrap further use case development on
Pixel 6.
### [Image Classification](https://github.com/tensorflow/models/tree/master/official/projects/edgetpu/vision#edgetpu-optimized-vision-models)
### [Object Detection](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md#pixel-6-edge-tpu-models)
### [Semantic Segmentation](https://github.com/tensorflow/models/tree/master/official/projects/edgetpu/vision#edgetpu-optimized-vision-models)
### [Natural Language Understanding](https://github.com/tensorflow/models/tree/master/official/projects/edgetpu/nlp#mobilebert-edgetpu)
# EdgeTPU-optimized Vision Models
## Overview
## Image classification task
This project includes computer vision models optimized for Edge TPU featured in
Pixel Phones, Coral Products, and more. These models improve the latency and
energy vs. accuracy pareto-frontier significantly compared to the existing
SOTA models when running on the Edge TPU devices.
### Introduction
We are presenting a family of computer vision models based on MobileNetEdgeTPUV2
that are optimized for the next generation Edge TPU ML accelerator in the Google
Tensor SoC that powers the Pixel 6 phones. These models improve the
latency-accuracy pareto-frontier compared to the existing SOTA on-device models
including their predecessor MobileNetEdgeTPUs. MobileNetEdgeTPUV2 can be used as
a standalone image classification model or as a backbone for other computer
vision tasks such as object detection or semantic segmentation.
### Search space design
During the design of MobileNetEdgeTPUV2 we crafted a neural network search space
which includes building blocks that run efficiently on the Edge TPU accelerator
while providing better algorithmic qualities and leveraged AutoML to find the
optimal architectures. As one of the key optimizations, we introduce Group
Convolution based Inverted Bottleneck (IBN) blocks that provide great
flexibility in achieving a tradeoff between latency and accuracy.
Inverted Bottleneck (IBN) is a widely used building block in architecting a
neural network for mobile vision tasks. A conventional IBN uses pointwise
convolutions for expansion/projection before/after a depthwise convolution.
Previously it has been shown that using a full convolution replacing the
pointwise expansion and depthwise convolution can provide more trainable
parameters while being faster. However, one big limitation is that using these
full-convolution IBNs can get very expensive in terms of latency and memory
requirements, especially for narrow/deep tensors that we see in later stages of
vision models. This limits the use of “fused” full-convolution IBNs throughout
the model and leaves depthwise IBN as the only alternative.
<figure align="center">
<img width=70% src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/readme-ibn-intro.png>
<figcaption><i>Inverted bottleneck block (IBN) variants: (a) Conventional with depthwise, (b) Fused-IBN, (c)GC-IBN with group convolutions in the expansion phase</i></figcaption>
</figure>
In this work we utilize Group Convolution (GC) as part of the fused expansion in
constructing IBNs (Figure 1). GC based IBN becomes a versatile block that opens
up a large design space between conventional depthwise IBNs and fused
full-convolution IBNs which can be controlled by the group size parameter.
Figure 2 demonstrates the search space enabled by GC-based IBNs that allows a
flexible tradeoff between latency and number of trainable parameters. GC-based
IBNs allow increasing the number of trainable parameters gradually without
requiring the latency cost of full-convolution based IBNs. Moreover, they can
also be faster than conventional IBNs with depthwise convolutions while
providing more trainable parameters.
<figure align="center">
<img width=60% src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/readme-gc-comparison.png>
</figure>
### Model performance on Edge TPU
Tradeoffs discussed above and exemplified in Figure 2 are highly dependent on
the tensor shapes and cannot be generalized throughout the neural network.
Hence, we use AutoML techniques as a rescue to find the optimal block decisions
and craft a family of network architectures at different latency targets. Figure
3 demonstrates that the resulting MobileNetEdgeTPUV2 model-family improves the
pareto-frontier compared to the existing on-device SOTA models when run on Edge
TPU.
<figure align="center">
<img width=70% src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/readme-edgetpu-classification-plot.png>
<figcaption><i>Comparison of Imagenet top-1 accuracy and Pixel 6 Edge TPU latency of MobileNetEdgeTPUV2 models with other on-device classification models</i></figcaption>
</figure>
#### On-device benchmarking of classification models
Results on on-device benchmarking of various int8 quantized image classification
models for 224x224 input resolution:
Model (Checkpoint) | Accuracy (int8) | Pixel 6 Edge TPU Latency (ms) | tflite
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------: | :---------------------------: | :----:
[MobileNetEdgeTPUv2-Tiny](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet-edgetpu-v2-tiny.tar.gz) | 74.66% | 0.78 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet_edgetpu_v2_tiny.tflite)
[MobileNetEdgeTPUv2-XS](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet-edgetpu-v2-xs.tar.gz) | 75.79% | 0.82 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet_edgetpu_v2_xs.tflite)
[MobileNetEdgeTPUv2-S](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet-edgetpu-v2-s.tar.gz) | 77.36% | 1.03 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet_edgetpu_v2_s.tflite)
[MobileNetEdgeTPUv2-M](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet-edgetpu-v2-m.tar.gz) | 78.43% | 1.35 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet_edgetpu_v2_m.tflite)
[MobileNetEdgeTPUv2-L](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet-edgetpu-v2-l.tar.gz) | 79.00% | 1.64 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v2/tiny/mobilenet_edgetpu_v2_l.tflite)
[MobileNetEdgeTPU dm1.0](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p0/mobilenet-edgetpu-dm1p0.tar.gz) | 75.6% | 0.92 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p0/mobilenet_edgetpu.tflite)
[MobileNetEdgeTPU dm1.25](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p25/mobilenet-edgetpu-dm1p25.tar.gz) | 77.06% | 1.20 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p25/mobilenet_edgetpu_dm1p25.tflite)
[MobileNetEdgeTPU dm1.5](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p5/mobilenet-edgetpu-dm1p5.tar.gz) | 75.9% | 1.42 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p5/mobilenet_edgetpu_dm1p5.tflite)
[MobileNetEdgeTPU dm1.75](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p75/mobilenet-edgetpu-dm1p75.tar.gz) | 78.6% | 1.93 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/mobilenet-edgetpu-v1/dm1p75/mobilenet_edgetpu_dm1p75.tflite)
### Model performance on Pixel 6 CPU
Our primary optimization target is the Edge TPU accelerator however in our
search space we include operations that also run well on Pixel 6 CPU to be able
to reach a wide range of platforms. Moreover, we implement GC using functionally
equivalent series of commonly used ML primitives (channelwise slice, full
convolution, concatenation) as shown in Figure 2, since a native GC operation
may not be supported for all target platforms. As a result, the performance of
MobileNetEdgeTPUV2 is also superior to other on-device models when run on Pixel
6 CPU as shown in Figure 4.
<figure align="center">
<img width=70% src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/readme-cpu-classification-plot.png>
<figcaption><i>Comparison of Imagenet top-1 accuracy and Pixel 6 latency of MobileNetEdgeTPUV2 models with other on-device classification models</i></figcaption>
</figure>
## Semantic segmentation task
### Using classification models as backbone
We also present segmentation models based on MobileNetEdgeTPUV2 backbone and
DeepLab v3 plus decoder and head (first used
[here](https://arxiv.org/pdf/1802.02611.pdf)). These models optimized for the
next generation Edge TPU accelerators featured in Pixel 6 phones and improve the
latency-accuracy pareto-frontier compared to the their predecessor based on
MobileNetV2 and DeepLabV3+.
The segmentation model is built using the pretrained MobileNetEdgeTPUV2 as a
feature encoder and ASPP decoder in conjunction with a Deeplab V3 Plus head.
Separable convolutions used to reduce the size of the model.
<figure align="center">
<img width=60% src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/readme-seg-flow.png>
<figcaption></figcaption>
</figure>
#### Using architecture search to find high-quality, low-latency segmentation models
To further improve the quality of on-device segmentation models, we invoke
architecture search to jointly search for the model's feature extractor and the
segmentation head. Autoseg-EdgeTPU is a set of searched segmentation models
customized for the Edge TPU in Pixel 6. The feature extractor is derived from
Edge TPU search space where a mixture of IBN and fused IBN are used. We
automatically find the optimal kernel size, channel multiplier, expansion ratio,
and groups on a per layer basis using a reinforcement learning algorithm. The
segmentation head is an optimized version of
[Bi-FPN](https://arxiv.org/abs/1911.09070) head, with customized number of
repeats and feature selection.
#### Argmax fusion to improve segmentation model latency
The last two levels of the model (bilinear resizing and Argmax) contribute
significantly to latency on the device model. This is due to the large
activation size between these layers (512 x 512 x Number of classes). These
layers can be merged without significantly impacting quality scores by making
Argmax smaller and scaling the classes to the desired size with nearest
neighbor.
<figure align="center">
<img width=60% src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/readme-seg-fused-argmax.png>
</figure>
### On-device benchmarking of segmentation models
<figure align="center">
<img src=https://storage.googleapis.com/tf_model_garden/models/edgetpu/images/readme-seg-plot.png width=60%>
<figcaption><i>Performance of AutosegEdgeTPU and MobileNetEdgeTPUV2+DeeplabV3+ models on the 32-class ADE20K semantic segmentation task.</i></figcaption>
</figure>
Model Name (Checkpoint) | Backbone | Segmentation Head| #Parameters (million)| ADE20K 32-class mIOU| Pixel 6 EdgeTPU latency (ms)| Tflite |
|:---|:-----------------------:|:----------------:|:--------------------:|:-------------------:|:---------------------------:|:------:|
deeplabv3plus_mobilenet_edgetpuv2_baseline| MobileNet V2 (baseline)| DeeplabV3+ | 2.34 | 54.06% | 7.5 | link |
[deeplabv3plus_mobilenet_edgetpuv2_xs](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/checkpoints/deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32/deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32.tar.gz)| MobileNetEdgeTPUV2-XS | DeeplabV3+ | 3.6 | 56.02% | 5.2 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/default_argmax/deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32.tflite) |
[deeplabv3plus_mobilenet_edgetpuv2_s](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/checkpoints/deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32/deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32.tar.gz)| MobileNetEdgeTPUV2-S | DeeplabV3+ | 5.2 | 59.43% | 5.9 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/default_argmax/deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32.tflite) |
[deeplabv3plus_mobilenet_edgetpuv2_m](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/checkpoints/deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32/deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32.tar.gz)| MobileNetEdgeTPUV2-M | DeeplabV3+ | 7.7 | 59.81% | 7.2 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/default_argmax/deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32.tflite) |
[autoseg_edgetpu_xs](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/checkpoints/autoseg_edgetpu_xs/autoseg_edgetpu_xs.tar.gz)| AutosegEdgeTPU-XS | BiFPN | 2.9 | 59.64% | 5.4 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/default_argmax/autoseg_edgetpu_xs.tflite) |
[autoseg_edgetpu_s](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/checkpoints/autoseg_edgetpu_s/autoseg_edgetpu_s.tar.gz)| AutosegEdgeTPU-S | BiFPN | 3.1 | 61.31% | 5.7 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/default_argmax/autoseg_edgetpu_s.tflite) |
By fusing argmax with resize operator as shown above, it is possible to further
improve the on-device latency of the segmentation models without significantly
impacting the quality:
Note: Models with default argmax and fusing argmax are using the same checkpoint
since there is no parameter change.
| Model Name | ADE20K 32-class mIOU| Pixel 6 EdgeTPU latency (ms)| tflite |
|----------------------|:------------------:|:----------------------:|:---------------------:|
| deeplabv3plus_mobilenet_edgetpuv2_xs | 56% | 3.4 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/fused_argmax/deeplabv3plus_mobilenet_edgetpuv2_xs_ade20k_32.tflite) |
| deeplabv3plus_mobilenet_edgetpuv2_s | 59.41% | 4.2 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/fused_argmax/deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32.tflite) |
| deeplabv3plus_mobilenet_edgetpuv2_m | 59.79% | 5.5 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/fused_argmax/deeplabv3plus_mobilenet_edgetpuv2_m_ade20k_32.tflite) |
| autoseg_edgetpu_xs | 59.62% | 3.6 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/fused_argmax/autoseg_edgetpu_xs.tflite) |
| autoseg_edgetpu_s | 61.28% | 3.9 | [link](https://storage.cloud.google.com/tf_model_garden/models/edgetpu/checkpoint_and_tflite/vision/segmentation-edgetpu/tflite/fused_argmax/autoseg_edgetpu_s.tflite) |
## Object detection task
EdgeTPU-optimized models for object detection are hosted in the [TensorFlow object
detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md#pixel-6-edge-tpu-models)
### Training the models
Note that the `EXPERIMENT_TYPE` has to be in one of the preregistered
classification configs, such as `mobilenet_edgetpu_xs` for classification
models. In case you train segmentation model `EXPERIMENT_TYPE` has to be in one
of the preregistered segmentations configs], such as
`seg_deeplabv3plus_mobilenet_edgetpuv2_s_ade20k`, `autoseg_edgetpu_xs`
```
EXPERIMENT_NAME=xxx # Change this for your run, for example, 'mobilenet-edgetpu-test-run'
EXPERIMENT_TYPE=xxx # Change this for your run, for example, 'mobilenet_edgetpu_v2_xs'
$ python3 train.py \
--experiment_name=${EXPERIMENT_NAME} \
--experiment_type=${EXPERIMENT_TYPE} \
--mode=train_and_eval
```
### From training to quantized inference deployment
To export quantized tflite models using tensorflow post-training quantization:
**For classification models**:
```
$ python3 serving/export_tflite.py
--model_name=${EXPERIMENT_TYPE} \
--ckpt_path=${CHECKPOINT} \
--dataset_dir=/path/to/calibration/dataset \
--output_dir=/tmp \
--quantize \
--image_size=224
```
Note that the `EXPERIMENT_TYPE` has to be in one of the preregistered
classification configs, such as `mobilenet_edgetpu_xs`.
**For segmentation models**:
```
$ python3 serving/export_tflite.py \
--model_name=${EXPERIMENT_TYPE}
--ckpt_path=${CHECKPOINT} \
--dataset_dir=/path/to/calibration/dataset \
--output_dir=/tmp \
--quantize \
--quantize_less_restrictive \
--image_size=512 \
--finalize_method=${ARGMAX_FUSION}
```
`EXPERIMENT_TYPE` has to be in one of the preregistered segmentations configs,
such as `deeplabv3plus_mobilenet_edgetpuv2_s_ade20k_32`.
`ARGMAX_FUSION` has to be in one of the following:
- `resize512,argmax`: Argmax applied after scaling the output to 512x512.
- `resize256,argmax,resize512,squeeze`: Scale the output to 256x256, apply
argmax, scale to 512x512 using nearest neighbor upsampling
- `resize128,argmax,resize512,squeeze`: Scale the output to 128x128, apply
argmax, scale to 512x512 using nearest neighbor upsampling
### On-device benchmarking
The models in this repository are compatible with NNAPI and can be benchmarked
on Pixel 6 devices using the
[tflite benchmark tool](https://www.tensorflow.org/lite/performance/measurement)
While using the benchmark tool, enable the use of NNAPI by setting the
`use_nnapi` command line argument to `true`, and specifying the
`nnapi_accelerator` as `google-edgetpu`
```shell
$ bazel build -c opt --config=android_arm64 tensorflow/lite/tools/benchmark:benchmark_model
# Push binary to device
$ adb push bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model /data/local/tmp
# Push model to device
$ adb push /path/to/model.tflite /data/local/tmp/
# Run on-device benchmarking
$ adb shell /data/local/tmp/benchmark_model --graph=/data/local/tmp/model.tflite --use_nnapi=true --
nnapi_accelerator_name=google-edgetpu
```
......@@ -21,11 +21,10 @@ deeplab v3 segmentation head.
import dataclasses
import os
from typing import Optional
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling import optimization
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.configs import backbones
from official.vision.beta.configs import common
from official.vision.beta.configs import decoders
......
......@@ -17,11 +17,10 @@
import os
import tensorflow as tf
from official.legacy.image_classification import preprocessing
from official.projects.edgetpu.vision.modeling import common_modules
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model_blocks
from official.vision.image_classification import preprocessing
# TODO(b/151324383): Enable once training is supported for mobilenet-edgetpu
EXAMPLE_IMAGE = ('third_party/tensorflow_models/official/vision/'
......
......@@ -110,6 +110,7 @@ def get_export_config_from_flags():
dataset_split=FLAGS.dataset_split)
export_config = export_util.ExportConfig(
model_name=FLAGS.model_name,
output_layer=FLAGS.output_layer,
ckpt_path=FLAGS.ckpt_path,
ckpt_format=FLAGS.ckpt_format,
output_dir=FLAGS.output_dir,
......
......@@ -69,7 +69,9 @@ class ExportConfig(base_config.Config):
"""Configuration for exporting models as tflite and saved_models.
Attributes:
model_name: One of the registered model names
model_name: One of the registered model names.
output_layer: Layer name to take the output from. Can be used to take the
output from an intermediate layer.
ckpt_path: Path of the training checkpoint. If not provided tflite with
random parameters is exported.
ckpt_format: Format of the checkpoint. tf_checkpoint is for ckpt files from
......@@ -92,7 +94,8 @@ class ExportConfig(base_config.Config):
resize bilinear to 128x128, then argmax then resize nn to 512x512
"""
quantization_config: QuantizationConfig = QuantizationConfig()
model_name: str = None
model_name: Optional[str] = None
output_layer: Optional[str] = None
ckpt_path: Optional[str] = None
ckpt_format: Optional[str] = 'tf_checkpoint'
output_dir: str = '/tmp/'
......
......@@ -112,7 +112,6 @@ class EdgeTPUTask(base_task.Task):
else:
raise ValueError('Model has to be mobilenet-edgetpu model or searched'
'model with given saved model path.')
model.summary()
return model
......
......@@ -19,8 +19,8 @@ from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import config_definitions as cfg
from official.core import task_factory
from official.modeling.hyperparams import config_definitions as cfg
from official.projects.edgetpu.vision.configs import semantic_segmentation_config as exp_cfg
from official.projects.edgetpu.vision.configs import semantic_segmentation_searched_config as searched_cfg
from official.projects.edgetpu.vision.modeling import mobilenet_edgetpu_v1_model
......
......@@ -12,6 +12,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Keras-CV layers package definition."""
from official.vision.keras_cv.losses.focal_loss import FocalLoss
from official.vision.keras_cv.losses.loss_utils import multi_level_flatten
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment