".github/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "4c450aa192a895d5a714e7c38ea1e22d029dccf7"
Unverified Commit f91b59c6 authored by thunderfyc's avatar thunderfyc Committed by GitHub
Browse files

Initial checkin of sequence_projection (#9153)



* Initial checkin of sequence_projection

* Fix the path

* Fix paths and deps

* Fix path and deps
Co-authored-by: default avatarLearn2Compress <expander-robot@google.com>
parent 67efd3ab
# gRPC using libcares in opensource has some issues.
build --define=grpc_no_ares=true
# Suppress all warning messages.
build:short_logs --output_filter=DONT_MATCH_ANYTHING
# Force python3
build --action_env=PYTHON_BIN_PATH=/usr/bin/python3
build --repo_env=PYTHON_BIN_PATH=/usr/bin/python3
build --python_path=/usr/bin/python3
common --experimental_repo_remote_exec
build:manylinux2010 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010:toolchain
build -c opt
build --cxxopt="-std=c++14"
build --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"
build --auto_output_filter=subpackages
build --copt="-Wall" --copt="-Wno-sign-compare"
build --linkopt="-lrt -lm"
# TF isn't built in dbg mode, so our dbg builds will segfault due to inconsistency
# of defines when using tf's headers. In particular in refcount.h.
build --cxxopt="-DNDEBUG"
# Options from ./configure
try-import %workspace%/.reverb.bazelrc
licenses(["notice"])
package(
default_visibility = [
"//:__subpackages__",
],
)
# How to Contribute
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Code reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
## Community Guidelines
This project follows
[Google's Open Source Community Guidelines](https://opensource.google/conduct/).
# Sequence Projection Models
This repository contains implementation of the following papers.
* [*PRADO: Projection Attention Networks for Document Classification On-Device*](https://www.aclweb.org/anthology/D19-1506/)
* [*Self-Governing Neural Networks for On-Device Short Text Classification*](https://www.aclweb.org/anthology/D18-1105/)
## Description
We provide a family of models that projects sequence to fixed sized features.
The idea behind is to build embedding-free models that minimize the model size.
Instead of using embedding table to lookup embeddings, sequence projection
models computes them on the fly.
## History
### August 24, 2020
* Add PRADO and SGNN implementation.
## Authors or Maintainers
* Prabhu Kaliamoorthi
* Yicheng Fan ([@thunderfyc](https://github.com/thunderfyc))
## Requirements
[![TensorFlow 2.3](https://img.shields.io/badge/TensorFlow-2.3-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.3.0)
[![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/)
## Training
Train a PRADO model on civil comments dataset
```shell
bazel run -c opt prado:runner -- \
--config_path=$(pwd)/prado/civil_comments_prado.txt \
--runner_mode=train --logtostderr --output_dir=/tmp/prado
```
Train a SGNN model to detect languages:
```shell
bazel run -c opt sgnn:train -- --logtostderr --output_dir=/tmp/sgnn
```
## Evaluation
Evaluate PRADO model:
```shell
bazel run -c opt prado:runner -- \
--config_path=$(pwd)/prado/civil_comments_prado.txt \
--runner_mode=eval --output_dir= --logtostderr
```
Evaluate SGNN model:
```shell
bazel run -c opt sgnn:run_tflite -- --model=/tmp/sgnn/model.tflite "Hello world"
```
## References
1. **Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift**<br />
Sergey Ioffe, Christian Szegedy <br />
[[link]](https://arxiv.org/abs/1502.03167). In ICML, 2015.
2. **Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference**<br />
Benoit Jacob, Skirmantas Kligys, Bo Chen, Menglong Zhu, Matthew Tang, Andrew Howard, Hartwig Adam, Dmitry Kalenichenko <br />
[[link]](https://arxiv.org/abs/1712.05877). In CVPR, 2018.
3. **PRADO: Projection Attention Networks for Document Classification On-Device**<br/>
Prabhu Kaliamoorthi, Sujith Ravi, Zornitsa Kozareva <br />
[[link]](https://www.aclweb.org/anthology/D19-1506/). In EMNLP-IJCNLP, 2019
4. **Self-Governing Neural Networks for On-Device Short Text Classification**<br />
Sujith Ravi, Zornitsa Kozareva <br />
[[link]](https://www.aclweb.org/anthology/D18-1105). In EMNLP, 2018
## License
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
This project is licensed under the terms of the **Apache License 2.0**.
workspace(name = "tensorflow_models_sequence_projection")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@//third_party/py:python_configure.bzl", "python_configure")
http_archive(
name = "io_bazel_rules_closure",
sha256 = "5b00383d08dd71f28503736db0500b6fb4dda47489ff5fc6bed42557c07c6ba9",
strip_prefix = "rules_closure-308b05b2419edb5c8ee0471b67a40403df940149",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz",
"https://github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", # 2019-06-13
],
)
http_archive(
name = "org_tensorflow",
sha256 = "fc6d7c57cd9427e695a38ad00fb6ecc3f623bac792dd44ad73a3f85b338b68be",
strip_prefix = "tensorflow-8a4ffe2e1ae722cff5306778df0cfca8b7f503fe",
urls = [
"https://github.com/tensorflow/tensorflow/archive/8a4ffe2e1ae722cff5306778df0cfca8b7f503fe.tar.gz",
],
)
http_archive(
name = "org_tflite_support",
strip_prefix = "tflite-support-9bc45390fc99e627348578ad1f3365f0d555db9a",
sha256 = "06df23dbe89f18eb198643c34bc8aa6a0db3e921242e4042452f1fc0a76f4085",
urls = ["https://github.com/tensorflow/tflite-support/archive/9bc45390fc99e627348578ad1f3365f0d555db9a.zip"],
)
http_archive(
name = "org_tensorflow_text",
sha256 = "f64647276f7288d1b1fe4c89581d51404d0ce4ae97f2bcc4c19bd667549adca8",
strip_prefix = "text-2.2.0",
urls = [
"https://github.com/tensorflow/text/archive/v2.2.0.zip",
],
patches = ["@//third_party:tensorflow_text_fix_local_config_tf.patch"],
patch_args = ["-p1"],
repo_mapping = {"@com_google_re2": "@com_googlesource_code_re2"},
)
load("//tf_ops:repo.bzl", "cc_tf_configure", "reverb_protoc_deps")
cc_tf_configure()
PROTOC_VERSION = "3.9.0"
PROTOC_SHA256 = "15e395b648a1a6dda8fd66868824a396e9d3e89bc2c8648e3b9ab9801bea5d55"
reverb_protoc_deps(version = PROTOC_VERSION, sha256 = PROTOC_SHA256)
# ABSL cpp library.
http_archive(
name = "com_google_absl",
sha256 = "f368a8476f4e2e0eccf8a7318b98dafbe30b2600f4e3cf52636e5eb145aba06a", # SHARED_ABSL_SHA
strip_prefix = "abseil-cpp-df3ea785d8c30a9503321a3d35ee7d35808f190d",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz",
"https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz",
],
)
http_archive(
name = "rules_cc",
strip_prefix = "rules_cc-master",
urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
)
# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/master.zip"],
strip_prefix = "googletest-master",
)
# gflags needed by glog
http_archive(
name = "com_github_gflags_gflags",
sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe",
strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a",
urls = [
"https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
"https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
],
)
# glog
http_archive(
name = "com_google_glog",
sha256 = "f28359aeba12f30d73d9e4711ef356dc842886968112162bc73002645139c39c",
strip_prefix = "glog-0.4.0",
urls = ["https://github.com/google/glog/archive/v0.4.0.tar.gz"],
)
http_archive(
name = "absl_py",
sha256 = "603febc9b95a8f2979a7bdb77d2f5e4d9b30d4e0d59579f88eba67d4e4cc5462",
strip_prefix = "abseil-py-pypi-v0.9.0",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz",
"https://github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz",
],
)
http_archive(
name = "utf_archive",
build_file = "@//third_party:utf.BUILD",
sha256 = "262a902f622dcd28e05b8a4be10da0aa3899050d0be8f4a71780eed6b2ea65ca",
urls = [
"https://mirror.bazel.build/9fans.github.io/plan9port/unix/libutf.tgz",
"https://9fans.github.io/plan9port/unix/libutf.tgz",
],
)
#-----------------------------------------------------------------------------
# proto
#-----------------------------------------------------------------------------
# proto_library, cc_proto_library and java_proto_library rules implicitly depend
# on @com_google_protobuf//:proto, @com_google_protobuf//:cc_toolchain and
# @com_google_protobuf//:java_toolchain, respectively.
# This statement defines the @com_google_protobuf repo.
http_archive(
name = "com_google_protobuf",
strip_prefix = "protobuf-3.8.0",
urls = ["https://github.com/google/protobuf/archive/v3.8.0.zip"],
sha256 = "1e622ce4b84b88b6d2cdf1db38d1a634fe2392d74f0b7b74ff98f3a51838ee53",
)
load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
flatbuffers()
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
tf_workspace(tf_repo_name = "org_tensorflow")
# TF submodule compilation doesn't take care of grpc deps. Do it manually here.
load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps")
grpc_deps()
load(
"@build_bazel_rules_apple//apple:repositories.bzl",
"apple_rules_dependencies",
)
apple_rules_dependencies()
load(
"@build_bazel_apple_support//lib:repositories.bzl",
"apple_support_dependencies",
)
apple_support_dependencies()
load("@upb//bazel:repository_defs.bzl", "bazel_version_repository")
bazel_version_repository(name = "bazel_version")
# Set up Android.
load("//third_party/android:android_configure.bzl", "android_configure")
android_configure(name="local_config_android")
load("@local_config_android//:android.bzl", "android_workspace")
android_workspace()
python_configure(name = "local_config_python")
new_git_repository(
name = "icu4c",
tag = "release-66-1",
remote = "https://github.com/unicode-org/icu",
build_file = "@//third_party:icu.BUILD",
patch_cmds = [
"find . -type f -exec sed -i 's/#\s*include \"unicode/#include \"icu4c\/source\/common\/unicode/g' {} \;",
],
)
http_archive(
name = "farmhash_archive",
build_file = "//third_party:farmhash.BUILD",
sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", # SHARED_FARMHASH_SHA
strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz",
"https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz",
],
)
# Demo app for sequence projection model
licenses(["notice"])
package(
default_visibility = [
"//:__subpackages__",
],
)
py_library(
name = "common_layer",
srcs = ["common_layer.py"],
srcs_version = "PY3",
)
py_library(
name = "prado_model",
srcs = ["prado_model.py"],
srcs_version = "PY3",
deps = [
":common_layer",
"//tf_ops:sequence_string_projection_op_py",
],
)
py_library(
name = "metric_functions",
srcs = ["metric_functions.py"],
srcs_version = "PY3",
)
py_library(
name = "input_fn_reader",
srcs = ["input_fn_reader.py"],
srcs_version = "PY3",
)
py_binary(
name = "runner",
srcs = ["runner.py"],
python_version = "PY3",
srcs_version = "PY3",
deps = [
":input_fn_reader",
":metric_functions",
":prado_model",
],
)
{
"model_config" : {
"labels": ["identity_attack", "insult", "obscene", "severe_toxicity", "sexual_explicit", "threat", "toxicity"],
"multilabel": true,
"quantize": true,
"max_seq_len": 128,
"max_seq_len_inference": 128,
"exclude_nonalphaspace_unicodes": false,
"split_on_space": true,
"embedding_regularizer_scale": 35e-3,
"embedding_size": 64,
"heads": [0, 64, 64, 0, 0],
"feature_size": 512,
"network_regularizer_scale": 1e-4,
"keep_prob": 0.5,
"word_novelty_bits": 0,
"doc_size_levels": 0,
"add_eos_tag": false,
"pre_logits_fc_layers": [],
"text_distortion_probability": 0.25
},
"batch_size": 1024,
"save_checkpoints_steps": 100,
"train_steps": 100000,
"learning_rate": 1e-3,
"learning_rate_decay_steps": 42000,
"learning_rate_decay_rate": 0.7,
"iterations_per_loop": 100,
"dataset": "civil_comments"
}
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# python3
"""Common layer creator."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow.compat.v1 as tf
from tensorflow.python.training import moving_averages # pylint: disable=g-direct-tensorflow-import
class CommonLayers(object):
"""A base class that defines TfLite compatible NN layers."""
def __init__(self,
mode,
regularizer_scale=0.0,
weights_initializer=tf.keras.initializers.glorot_uniform(),
quantization_enabled=True):
"""PoDLayers constructor.
Args:
mode: Graph creation mode.
regularizer_scale: Optional regularizer for the weights.
weights_initializer: Optional initializer for the weights.
quantization_enabled: Enables quantization of weights and activation in
the DNN.
"""
self._mode = mode
self._regularizer_scale = regularizer_scale
self._weights_initializer = weights_initializer
self._quantization_enabled = quantization_enabled
# Batch normalization is the default normalization scheme.
self._normalizer = self.batch_normalization
self._moment_fn = None
def qrange_sigmoid(self, tensor):
"""Quantize the tensor in sigmoid range (0.0, 1.0)."""
return tf.fake_quant_with_min_max_args(
tensor, 0.0, 1.0) if self._quantization_enabled else tensor
def qrange_tanh(self, tensor):
"""Quantize the tensor in tanh range (-1.0, 1.0)."""
return tf.fake_quant_with_min_max_args(
tensor, -1.0, 1.0) if self._quantization_enabled else tensor
def _quantized_tanh(self, tensor):
"""Apply tanh op and quantize in the range (-1.0, 1.0)."""
return self.qrange_tanh(tf.tanh(tensor))
def _quantized_sigmoid(self, tensor):
"""Apply sigmoid op and quantize in the range (0.0, 1.0)."""
return self.qrange_sigmoid(tf.sigmoid(tensor))
def set_moment_fn(self, moment_fn):
"""Set a moment function that will be used by batch norm."""
self._moment_fn = moment_fn
def set_regularizer_scale(self, regularizer_scale):
"""Override / set a new weights regularizer scale."""
self._regularizer_scale = regularizer_scale
def set_variable_length_moment_fn(self, sequence_length, max_sequence_length):
"""Set variable length moment function for use in batch norm.
Args:
sequence_length: An vector of sequence lengths.
max_sequence_length: Padding length for the batch.
Returns:
Returns sequence mask.
"""
mask = tf.sequence_mask(
sequence_length, maxlen=max_sequence_length, dtype=tf.float32)
mask = tf.expand_dims(mask, 2)
mask_r4 = tf.expand_dims(mask, 3)
mask_r2 = tf.reshape(mask, [-1, 1])
inverse_numsteps = tf.math.reciprocal(tf.reduce_sum(mask))
def _varlen_moment_fn(input_tensor, axes):
"""Moment function to use with batch normalization."""
input_tensor_shape = input_tensor.get_shape().as_list()
input_tensor_rank = len(input_tensor_shape)
if input_tensor_rank == 2:
input_tensor = mask_r2 * input_tensor
elif input_tensor_rank == 4:
assert input_tensor_shape[2] == 1
input_tensor = mask_r4 * input_tensor
else:
assert False, "Supports rank2 and rank4 tensors."
ex = tf.reduce_sum(input_tensor, axis=axes) * inverse_numsteps
exx = tf.reduce_sum(
input_tensor * input_tensor, axis=axes) * inverse_numsteps
return ex, (exx - ex * ex)
self._moment_fn = _varlen_moment_fn
return mask
def batch_normalization(self, input_tensor, decay=0.999):
"""Add batch normalization network structure after input_tensor.
It performs batch normalization of the input tensor. This routine is
verified to works for rank 4 or 2 tensors.
Args:
input_tensor: Input tensor that needs to be normalized.
decay: Moving average decay
Returns:
A tensor that is normalized.
"""
input_tensor_shape = input_tensor.get_shape().as_list()
nstat = input_tensor_shape[-1]
reduce_dims = list(range(len(input_tensor_shape) - 1))
with tf.variable_scope(name_or_scope=None, default_name="batch_norm"):
offset = tf.get_variable(
"offset",
shape=[nstat],
initializer=tf.zeros_initializer,
trainable=True)
scale = tf.get_variable(
"scale",
shape=[nstat],
initializer=tf.ones_initializer,
trainable=True)
moving_mean = tf.get_variable(
"moving_mean",
shape=[nstat],
initializer=tf.zeros_initializer,
trainable=False)
moving_var = tf.get_variable(
"moving_variance",
shape=[nstat],
initializer=tf.ones_initializer,
trainable=False)
if self._mode == tf.estimator.ModeKeys.TRAIN:
# During training compute summay stats, update them to moving average
# variables and use the summary stas for batch normalization.
moment_fn = self._moment_fn or tf.nn.moments
mean_mom, var_mom = moment_fn(input_tensor, reduce_dims)
with tf.control_dependencies([
moving_averages.assign_moving_average(
moving_mean, mean_mom, decay, name="mean_op"),
moving_averages.assign_moving_average(
moving_var, var_mom, decay, name="variance_op")
]):
tensor = tf.nn.batch_normalization(
input_tensor,
mean_mom,
var_mom,
offset,
scale,
1e-9,
name="batch_norm_core")
else:
# During eval/inference use the moving average variable for batch
# normalization. The variables would be frozen to constants before
# saving graph.
tensor = tf.nn.batch_normalization(
input_tensor,
moving_mean,
moving_var,
offset,
scale,
1e-9,
name="batch_norm_core")
return tensor
def get_quantization_ranges(self, tensor, ema_decay=0.99):
"""Perform fake quantization of the tensor.
The method computes ranges for quantization by first computing the
batch min/max and then computing a moving average of the min/max across
batches. The moving average of min/max is used for quantization during
inference. During training the batch min/maxs are used directly.
Args:
tensor: Input tensor that needs to be quantized.
ema_decay: Moving average decay
Returns:
Min/Max for fake quantization.
"""
# If neither quantization is enabled, nor are we calculating ranges for
# floating point models, this method is a no-op.
if not self._quantization_enabled:
return None, None
# Calculate min/max for the tensor.
min_var = tf.get_variable("min", initializer=0.0, trainable=False)
max_var = tf.get_variable("max", initializer=1.0, trainable=False)
if self._mode == tf.estimator.ModeKeys.TRAIN:
# During training estimate moving average for min/max. Use the min/max
# values directly for quantization.
ops = []
batch_min = tf.reduce_min(tensor, name="BatchMin")
# Toco expects 0.0 to be part of the quantization range.
batch_min = tf.minimum(batch_min, 0.0)
ops.append(
moving_averages.assign_moving_average(min_var, batch_min, ema_decay))
batch_max = tf.reduce_max(tensor, name="BatchMax")
# Toco expects 0.0 to be part of the quantization range.
batch_max = tf.maximum(batch_max, 0.0)
ops.append(
moving_averages.assign_moving_average(max_var, batch_max, ema_decay))
with tf.control_dependencies(ops):
return tf.identity(batch_min), tf.identity(batch_max)
else:
# During inference/eval use the moving average min/maxs for
# quantization.
return min_var, max_var
def quantization(self, tensor, ema_decay=0.99, num_bits=8):
"""Perform fake quantization of the tensor.
The method performs fake quantization of the tensor by first computing the
batch min/max and then computing a moving average of the min/max across
batches. The moving average of min/max is used for quantization during
inference. During training the batch min/maxs are used directly.
Args:
tensor: Input tensor that needs to be quantized.
ema_decay: Moving average decay
num_bits: Number of bits used for quantization
Returns:
Quantized tensor.
"""
with tf.variable_scope(
name_or_scope=None, default_name="MovingAvgQuantize"):
min_tensor, max_tensor = self.get_quantization_ranges(tensor, ema_decay)
if min_tensor is None or max_tensor is None:
return tensor
else:
return tf.fake_quant_with_min_max_vars(
tensor, min_tensor, max_tensor, num_bits=num_bits)
def _weight_quantization(self, tensor, num_bits=8):
"""Quantize weights when enabled."""
if not self._quantization_enabled:
return tensor
# For infer mode, toco computes the min/max from the weights offline to
# quantize it. During train/eval this is computed from the current value
# in the session by the graph itself.
modes = set([tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL])
if self._mode in modes:
batch_min = tf.reduce_min(tensor, name="BatchMin")
# Toco expects 0.0 to be part of the quantization range.
batch_min = tf.minimum(batch_min, 0.0)
batch_max = tf.reduce_max(tensor, name="BatchMax")
# Toco expects 0.0 to be part of the quantization range.
batch_max = tf.maximum(batch_max, 0.0)
return tf.fake_quant_with_min_max_vars(
tensor, batch_min, batch_max, num_bits=num_bits)
else:
return tensor
def _get_weight(self, shape, num_bits=8):
"""Return a weight variable for the given shape.
The disable_pruning flag overrides the global pruning_obj object. When set
to True, the returned weight tensor is not pruned.
Args:
shape: Shape of the weight tensor
num_bits: Number of bits to use for the variable.
Returns:
Quantized tensor with the mask and threshold variables needed for pruning.
"""
weight = tf.get_variable(
"weight", shape, initializer=self._weights_initializer)
if self._regularizer_scale > 0.0:
reg_loss = tf.nn.l2_loss(weight) * tf.convert_to_tensor(
self._regularizer_scale)
tf.losses.add_loss(
reg_loss, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES)
return self._weight_quantization(weight, num_bits=num_bits)
def _get_bias(self, shape):
weight = tf.get_variable("bias", shape, initializer=tf.zeros_initializer())
if self._regularizer_scale > 0.0:
reg_loss = tf.nn.l2_loss(weight) * tf.convert_to_tensor(
self._regularizer_scale)
tf.losses.add_loss(
reg_loss, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES)
return weight
def zero_beyond_sequence_length(self, sequence_length, gate):
"""Generate a binary mask for the sequence based on the timestep's validity.
Args:
sequence_length: The sequence length tensor of [batch size] elements.
gate: A gate tensor used by the QuasiRNN cell to infer shape from it.
Returns:
Mask tensor with one for valid time and zero for invalid timestep.
"""
mask = tf.sequence_mask(
sequence_length, maxlen=tf.shape(gate)[1], dtype=tf.float32)
return tf.expand_dims(mask, 2)
def _convolution2d(self,
inputs,
kernel_size,
filters,
stride,
padding,
dilations=None,
weight_mask=None,
scope="convolution2d"):
"""Linear part of the convolution layer."""
if isinstance(stride, int):
strides = [1, stride, stride, 1]
else:
if not isinstance(stride, list) or len(stride) != 2:
raise ValueError("`Stride` should be an integer or a list of length 2")
strides = [1, stride[0], stride[1], 1]
if dilations is not None:
if not isinstance(dilations, list) or len(dilations) != 2:
raise ValueError("`Dilations` should be an integer list of length 2")
dilations = [1, dilations[0], dilations[1], 1]
else:
dilations = [1, 1, 1, 1]
with tf.variable_scope(name_or_scope=None, default_name=scope):
input_channels = inputs.get_shape().as_list()[-1]
kernel_shape = kernel_size + [input_channels, filters]
weight = self._get_weight(kernel_shape)
if weight_mask is not None:
# Tensor multiply for disabling backprop
weight = weight * weight_mask
bias = self._get_bias([filters])
features = tf.nn.conv2d(
inputs, weight, strides, padding, dilations=dilations)
return tf.nn.bias_add(features, bias)
def convolution2d(self,
inputs,
kernel_size,
filters,
scope="convolution2d",
stride=1,
padding="SAME",
dilations=None,
weight_mask=None,
activation=tf.nn.relu,
normalization=True):
"""Creates a 2d convolution layer.
Performs batch normalization to the tensor pre activation and fake
quantization post activation.
Args:
inputs: Input tensor, that is expected to be a rank 4 tensor.
kernel_size: 2D convolution kernel size (2 tuple).
filters: Number of output channels (integer).
scope: A string that would be used as variable scope for the layer.
stride: Convolution stride, can be a constant or a 2 tuple.
padding: Padding to use for the convolution.
dilations: tuple of size 2 specifying the dilation rates for input height
and width respectively. Refer to tf.nn.conv2d API for more details.
weight_mask: A floating point numpy array or constant tensor mask to turn
off weights in the convolution kernel.
activation: Activation function to be used, Relu is used by default.
normalization: A boolean flag indicating if batchnorm should be performed.
Returns:
Tensor result of the convolution layer.
Raises:
ValueError: If inputs is not a rank 4 tensor
ValueError: If kernel_size is not a list or tuple of length 2
"""
if len(inputs.get_shape().as_list()) != 4:
raise ValueError("`inputs` should be a rank 4 tensor. "
"Was: {}.".format(len(inputs.get_shape().as_list())))
kernel_size = list(kernel_size)
if len(kernel_size) != 2:
raise ValueError("`kernel_size` should be a tuple or list of length 2. "
"Was: {}.".format(kernel_size))
features_rank4 = self._convolution2d(
inputs,
kernel_size,
filters,
stride,
padding,
dilations,
weight_mask=weight_mask,
scope=scope)
if normalization and self._normalizer:
features_rank4 = self._normalizer(features_rank4)
if activation is not None:
features_rank4 = activation(features_rank4)
return self.quantization(features_rank4)
def _fully_connected(self,
features,
output_size,
scope="fully_connected",
use_bias=True):
"""Performs fully connected operation."""
with tf.variable_scope(name_or_scope=None, default_name=scope):
weight = self._get_weight(
[features.get_shape().as_list()[-1], output_size])
bias = self._get_bias([output_size])
features = tf.matmul(features, weight)
return tf.nn.bias_add(features, bias) if use_bias else features
def fully_connected(self,
features,
output_size,
scope="fully_connected",
activation=tf.nn.relu,
normalization=True,
use_bias=True):
"""Creates a fully connected layer.
Performs batch normalization to the tensor pre activation and fake
quantization post activation.
Args:
features: Input features to the fully connected layer.
output_size: Number of output features.
scope: A variable scope for the connected layer.
activation: activation function to be used, Relu is used by default.
normalization: A flag indicating if batchnorm should be performed.
use_bias: If True, bias is added to the result
Returns:
Tensor result of the fully connected layer.
Raises:
ValueError: If last dimension of features is dynamic (shape = None).
"""
input_shape = features.get_shape().as_list()
if not input_shape[-1]:
raise ValueError("Last dimension of features should be static")
need_reshape = len(input_shape) > 2
input_tensor = features
if need_reshape:
features = tf.reshape(features, [-1, input_shape[-1]])
features = self._fully_connected(
features, output_size, scope=scope, use_bias=use_bias)
if normalization and self._normalizer:
features = self._normalizer(features)
if activation:
# Batch normalization is done pre activation as suggested in the original
# paper. Quantization is done post activation because the range will
# change after applying the squashing function.
features = activation(features)
features = self.quantization(features)
if not need_reshape:
return features
else:
# The fully connected layer changes the last dimension to output_size.
# If a reshape was done before applying the fully connected layer, change
# it back to the right rank. If the input dimensions are known use the
# static shape otherwise use the shape tensor.
if sum([val is None for val in input_shape]) <= 1:
# Just one dynamic shape, we can reshape with -1
output_shape = [-1 if val is None else val for val in input_shape]
else:
input_shape_tensor = tf.shape(input_tensor)
output_shape = [
shape or input_shape_tensor[index]
for index, shape in enumerate(input_shape)
]
output_shape[-1] = output_size
return tf.reshape(features, output_shape)
{
"model_config" : {
"labels": ["admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"],
"multilabel": true,
"quantize": true,
"max_seq_len": 128,
"max_seq_len_inference": 128,
"exclude_nonalphaspace_unicodes": false,
"split_on_space": true,
"embedding_regularizer_scale": 35e-3,
"embedding_size": 64,
"heads": [0, 64, 64, 0, 0],
"feature_size": 512,
"network_regularizer_scale": 1e-4,
"keep_prob": 0.5,
"word_novelty_bits": 0,
"doc_size_levels": 0,
"add_eos_tag": false,
"pre_logits_fc_layers": [],
"text_distortion_probability": 0.0
},
"batch_size": 1024,
"save_checkpoints_steps": 100,
"train_steps": 100000,
"learning_rate": 0.0006,
"learning_rate_decay_steps": 340,
"learning_rate_decay_rate": 0.7,
"iterations_per_loop": 100,
"dataset": "goemotions"
}
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Methods related to input datasets and readers."""
import functools
import sys
from typing import Any, Callable, Mapping, Optional, Tuple, Dict
from absl import logging
import tensorflow.compat.v1 as tf
import tensorflow_datasets as tfds
def imdb_reviews(features, _):
return features["text"], features["label"]
def civil_comments(features, runner_config):
labels = runner_config["model_config"]["labels"]
label_tensor = tf.stack([features[label] for label in labels], axis=1)
label_tensor = tf.floor(label_tensor + 0.5)
return features["text"], label_tensor
def goemotions(features, runner_config):
labels = runner_config["model_config"]["labels"]
label_tensor = tf.stack([features[label] for label in labels], axis=1)
return features["comment_text"], tf.cast(label_tensor, tf.float32)
def random_substr(str_tensor, max_words):
"""Select random substring if the input has more than max_words."""
word_batch_r = tf.strings.split(str_tensor, result_type="RaggedTensor")
row_splits = word_batch_r.row_splits
words = word_batch_r.values
start_idx = row_splits[:-1]
end_idx = row_splits[1:]
words_per_example = end_idx - start_idx
ones = tf.ones_like(end_idx)
max_val = tf.maximum(ones, words_per_example - max_words)
max_words_batch = tf.reduce_max(words_per_example)
rnd = tf.random.uniform(
tf.shape(start_idx), minval=0, maxval=max_words_batch, dtype=tf.int64)
off_start_idx = tf.math.floormod(rnd, max_val)
new_words_per_example = tf.where(
tf.equal(max_val, 1), words_per_example, ones * max_words)
new_start_idx = start_idx + off_start_idx
new_end_idx = new_start_idx + new_words_per_example
indices = tf.expand_dims(tf.range(tf.size(words), dtype=tf.int64), axis=0)
within_limit = tf.logical_and(
tf.greater_equal(indices, tf.expand_dims(new_start_idx, axis=1)),
tf.less(indices, tf.expand_dims(new_end_idx, axis=1)))
keep_indices = tf.reduce_any(within_limit, axis=0)
keep_indices = tf.cast(keep_indices, dtype=tf.int32)
_, selected_words = tf.dynamic_partition(words, keep_indices, 2)
row_splits = tf.math.cumsum(new_words_per_example)
row_splits = tf.concat([[0], row_splits], axis=0)
new_tensor = tf.RaggedTensor.from_row_splits(
values=selected_words, row_splits=row_splits)
return tf.strings.reduce_join(new_tensor, axis=1, separator=" ")
def _post_processor(features, runner_config, mode, create_projection,
batch_size):
"""Post process the data to a form expected by model_fn."""
data_processor = getattr(sys.modules[__name__], runner_config["dataset"])
text, label = data_processor(features, runner_config)
if "max_seq_len" in runner_config["model_config"]:
max_seq_len = runner_config["model_config"]["max_seq_len"]
logging.info("Truncating text to have at most %d tokens", max_seq_len)
text = random_substr(text, max_seq_len)
text = tf.reshape(text, [batch_size])
num_classes = len(runner_config["model_config"]["labels"])
label = tf.reshape(label, [batch_size, num_classes])
projection, seq_length = create_projection(runner_config["model_config"],
mode, text)
return {"projection": projection, "seq_length": seq_length, "label": label}
def create_input_fn(runner_config: Dict[str, Any], create_projection: Callable,
mode: tf.estimator.ModeKeys, drop_remainder: bool):
"""Returns an input function to use in the instantiation of tf.estimator.*."""
def _input_fn(
params: Mapping[str, Any]
) -> Tuple[Mapping[str, tf.Tensor], Optional[Mapping[str, tf.Tensor]]]:
"""Method to be used for reading the data."""
assert mode != tf.estimator.ModeKeys.PREDICT
split = "train" if mode == tf.estimator.ModeKeys.TRAIN else "test"
ds = tfds.load(runner_config["dataset"], split=split)
ds = ds.batch(params["batch_size"], drop_remainder=drop_remainder)
ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
ds = ds.shuffle(buffer_size=100)
ds = ds.repeat(count=1 if mode == tf.estimator.ModeKeys.EVAL else None)
ds = ds.map(
functools.partial(
_post_processor,
runner_config=runner_config,
mode=mode,
create_projection=create_projection,
batch_size=params["batch_size"]),
num_parallel_calls=tf.data.experimental.AUTOTUNE,
deterministic=False)
return ds
return _input_fn
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Metric functions."""
import tensorflow.compat.v1 as tf
def classification_metric(per_example_loss, label_ids, logits):
"""Compute eval metrics."""
return {
"accuracy":
tf.metrics.accuracy(label_ids, tf.math.argmax(logits, axis=-1)),
"eval_loss":
tf.metrics.mean(per_example_loss)
}
def labeling_metric(per_example_loss, label_ids, logits):
"""Compute eval metrics."""
num_classes = label_ids.get_shape().as_list()[-1]
return_dict = {"eval_loss": tf.metrics.mean(per_example_loss)}
for idx in range(num_classes):
return_dict["auc/" + str(idx)] = tf.metrics.auc(
label_ids[:, idx], tf.math.sigmoid(logits[:, idx]))
return return_dict
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow graph creator for PRADO model."""
import collections
import functools
from typing import Mapping, Dict, Any
from absl import logging
import tensorflow.compat.v1 as tf
from prado import common_layer # import sequence_projection module
from tf_ops import sequence_string_projection_op as ssp # import sequence_projection module
_NGRAM_INFO = [
{
"name": "unigram",
"padding": 0,
"kernel_size": [1, 1],
"mask": None
},
{
"name": "bigram",
"padding": 1,
"kernel_size": [2, 1],
"mask": None
},
{
"name": "trigram",
"padding": 2,
"kernel_size": [3, 1],
"mask": None
},
{
"name": "bigramskip1",
"padding": 2,
"kernel_size": [3, 1],
"mask": [[[[1]]], [[[0]]], [[[1]]]]
},
{
"name": "bigramskip2",
"padding": 3,
"kernel_size": [4, 1],
"mask": [[[[1]]], [[[0]]], [[[0]]], [[[1]]]]
},
{
"name": "fourgram",
"padding": 3,
"kernel_size": [4, 1],
"mask": None
},
{
"name": "fivegram",
"padding": 4,
"kernel_size": [5, 1],
"mask": None
},
]
def _get_params(model_config, varname, default_value=None):
value = model_config[varname] if varname in model_config else default_value
logging.info("%s = %s", varname, value)
return value
def create_projection(model_config, mode, inputs):
"""Create projection."""
feature_size = _get_params(model_config, "feature_size")
text_distortion_probability = _get_params(model_config,
"text_distortion_probability", 0.0)
max_seq_len = _get_params(model_config, "max_seq_len", 0)
add_eos_tag = _get_params(model_config, "add_eos_tag")
is_training = mode == tf.estimator.ModeKeys.TRAIN
distortion_probability = text_distortion_probability if is_training else 0.0
raw_string = tf.identity(inputs, "Input")
features, _, seq_length = ssp.sequence_string_projection(
input=raw_string,
feature_size=feature_size,
max_splits=max_seq_len - 1,
distortion_probability=distortion_probability,
split_on_space=True,
add_eos_tag=add_eos_tag,
vocabulary="")
if mode != tf.estimator.ModeKeys.PREDICT and max_seq_len > 0:
pad_value = [[0, 0], [0, max_seq_len - tf.shape(features)[1]], [0, 0]]
features = tf.pad(features, pad_value)
batch_size = inputs.get_shape().as_list()[0]
features = tf.reshape(features,
[batch_size, max_seq_len, feature_size])
return features, seq_length
def _fully_connected(pod_layers, tensor, num_features, mode, bsz, keep_prob):
"""Fully connected layer."""
tensor_out = pod_layers.fully_connected(tensor, num_features)
if mode == tf.estimator.ModeKeys.TRAIN:
tensor_out = tf.nn.dropout(tensor_out, rate=(1 - keep_prob))
return tf.reshape(tensor_out, [bsz, -1, 1, num_features])
def _get_convolutional_layer(pod_layers, head_type, channels, valid_step_mask,
tensor, invalid_value):
"""Get convolutional layer."""
info = _NGRAM_INFO[head_type]
pad = info["padding"]
weight_mask = info["mask"]
kernel_size = info["kernel_size"]
paddings = [[0, 0], [0, pad], [0, 0], [0, 0]]
# Padding before convolution and using 'valid' instead of 'same' padding
# structure ensures that the convolution output is identical between
# train/eval and inference models. It also ensures that they lineup
# correctly with the valid_step_mask.
tensor = tf.pad(tensor, paddings) if pad != 0 else tensor
# Not using activation allows a bigram feature to de-emphasize a feature
# that triggers positive for unigram for example. The output weights
# should be allowed to be positve or negative for this to happen.
tensor = pod_layers.convolution2d(
tensor,
kernel_size,
channels,
padding="VALID",
weight_mask=weight_mask,
activation=None)
if valid_step_mask is not None:
tensor = tensor * valid_step_mask + (1 - valid_step_mask) * invalid_value
return tensor
def _get_predictions(pod_layers, head_type, keys, values, channels,
valid_step_mask):
"""Get predictions using one ngram head."""
conv_layer = functools.partial(_get_convolutional_layer, pod_layers,
head_type, channels, valid_step_mask)
return conv_layer(keys, -100), conv_layer(values, 0)
def reduce_tensors(pod_layers, bsz, attention_logits, values):
"""Reduce information using attention."""
channels = attention_logits.get_shape().as_list()[-1]
attention_logits = tf.reshape(attention_logits, [bsz, -1, channels])
values = tf.reshape(values, [bsz, -1, channels])
with tf.variable_scope("attention_expected_value"):
attention_logits = tf.identity(attention_logits, "attention_logits_in")
values = tf.identity(values, "values_in")
attention_logits = tf.transpose(attention_logits, [0, 2, 1])
values = tf.transpose(values, [0, 2, 1])
attention = tf.nn.softmax(attention_logits, axis=2)
evalue = tf.reduce_sum(attention * values, axis=[2])
evalue = tf.identity(evalue, "expected_value_out")
return pod_layers.quantization(evalue)
def ngram_attention_args_v2(projection, seq_length, mode, num_classes,
model_args):
"""Implements an ngram attention network.
Args:
projection: Projection features from text.
seq_length: Sequence length.
mode: Model creation mode (train, eval or predict).
num_classes: Number of classes to be predicted.
model_args: A namedtuple containing all model arguments.
Returns:
A tensor corresponding to the logits of the graph.
"""
pod_layers = common_layer.CommonLayers(
mode, quantization_enabled=model_args.quantize)
features = pod_layers.qrange_tanh(projection)
bsz = features.get_shape().as_list()[0] or tf.shape(features)[0]
# Regularizer just for the word embedding.
pod_layers.set_regularizer_scale(model_args.embedding_regularizer_scale)
values = _fully_connected(pod_layers, features, model_args.embedding_size,
mode, bsz, model_args.keep_prob)
keys = _fully_connected(pod_layers, features, model_args.embedding_size, mode,
bsz, model_args.keep_prob)
# Regularizer for the rest of the network.
pod_layers.set_regularizer_scale(model_args.network_regularizer_scale)
valid_step_mask = None
if mode != tf.estimator.ModeKeys.PREDICT:
valid_step_mask = pod_layers.zero_beyond_sequence_length(
seq_length, features)
valid_step_mask = tf.expand_dims(valid_step_mask, 3)
# Mask out the sentence beyond valid sequence length for training graph.
# This ensures that these values are all zeroed out. Without masking, the
# fully connected layer before will make them take an arbitrary constant
# value during training/eval in the minibatches. But these values won't
# be present during inference as the inference is not batched.
keys = valid_step_mask * keys
values = valid_step_mask * values
pod_layers.set_variable_length_moment_fn(seq_length, tf.shape(features)[1])
multi_head_predictions = []
for head_type, head in zip(model_args.head_types, model_args.heads):
if not head:
continue
att_logits, att_values = _get_predictions(pod_layers, head_type, keys,
values, head, valid_step_mask)
multi_head_predictions.append(
reduce_tensors(pod_layers, bsz, att_logits, att_values))
multi_head_predictions = tf.concat(multi_head_predictions, axis=1)
multi_head_predictions = pod_layers.quantization(multi_head_predictions)
# Sequence dimension has been summed out, so we don't need special moment
# function.
pod_layers.set_moment_fn(None)
output = multi_head_predictions
# Add FC layers before the logits.
for fc_layer_size in model_args.pre_logits_fc_layers:
output = pod_layers.fully_connected(
output, fc_layer_size, activation=tf.nn.relu)
return pod_layers.fully_connected(output, num_classes, activation=None)
def create_encoder(model_config: Dict[str, Any], projection: tf.Tensor,
seq_length: tf.Tensor,
mode: tf.estimator.ModeKeys) -> Mapping[str, tf.Tensor]:
"""Implements a simple attention network for brand safety."""
args = {}
def _get_params(varname, default_value=None):
value = model_config[varname] if varname in model_config else default_value
logging.info("%s = %s", varname, value)
args[varname] = value
_get_params("labels")
_get_params("quantize", True)
_get_params("max_seq_len", 0)
_get_params("max_seq_len_inference", 0)
_get_params("split_on_space", True)
_get_params("exclude_nonalphaspace_unicodes", False)
_get_params("embedding_regularizer_scale", 35e-3)
_get_params("embedding_size", 64)
_get_params("heads", [0, 64, 64, 0, 0])
_get_params("feature_size", 512)
_get_params("network_regularizer_scale", 1e-4)
_get_params("keep_prob", 0.5)
_get_params("word_novelty_bits", 0)
_get_params("doc_size_levels", 0)
_get_params("pre_logits_fc_layers", [])
args["head_types"] = list(range(len(args["heads"])))
args["text_distortion_probability"] = 0.0
if mode == tf.estimator.ModeKeys.TRAIN:
_get_params("text_distortion_probability", 0.25)
model_args = collections.namedtuple("ModelArgs", sorted(args))(**args)
num_classes = len(model_args.labels)
logits = ngram_attention_args_v2(
projection=projection,
seq_length=seq_length,
mode=mode,
num_classes=num_classes,
model_args=model_args)
outputs = {
"logits":
tf.identity(logits, "Logits"),
"label_map":
tf.constant(list(model_args.labels), tf.string, name="LabelMap")
}
return outputs
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""A utility for PRADO model to do train, eval, inference and model export."""
import json
import os
from typing import Any, Mapping, Optional, Sequence, Tuple, Dict
from absl import logging
import tensorflow.compat.v1 as tf
from tensorflow.core.framework import types_pb2 as tf_types
from tensorflow.python.tools import optimize_for_inference_lib # pylint: disable=g-direct-tensorflow-import
from prado import input_fn_reader # import sequence_projection module
from prado import metric_functions # import sequence_projection module
from prado import prado_model as model # import sequence_projection module
tf.disable_v2_behavior()
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("config_path", None, "Path to a RunnerConfig.")
tf.flags.DEFINE_enum("runner_mode", None,
["train", "train_and_eval", "eval", "export"],
"Runner mode.")
tf.flags.DEFINE_string("master", None, "TensorFlow master URL.")
tf.flags.DEFINE_string(
"output_dir", None,
"The output directory where the model checkpoints will be written.")
tf.flags.DEFINE_bool("use_tpu", False, "Whether to use TPU or GPU/CPU.")
tf.flags.DEFINE_integer(
"num_tpu_cores", 8,
"Only used if `use_tpu` is True. Total number of TPU cores to use.")
def load_runner_config() -> Dict[str, Any]:
with tf.gfile.GFile(FLAGS.config_path, "r") as f:
return json.loads(f.read())
def create_model(
model_config: Dict[str, Any], projection: tf.Tensor, seq_length: tf.Tensor,
mode: tf.estimator.ModeKeys, label_ids: tf.Tensor
) -> Tuple[tf.Tensor, tf.Tensor, Mapping[str, tf.Tensor]]:
"""Creates a sequence labeling model."""
outputs = model.create_encoder(model_config, projection, seq_length, mode)
with tf.variable_scope("loss"):
loss = None
per_example_loss = None
if mode != tf.estimator.ModeKeys.PREDICT:
if not model_config["multilabel"]:
per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=label_ids, logits=outputs["logits"])
else:
per_label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=label_ids, logits=outputs["logits"])
per_example_loss = tf.reduce_mean(per_label_loss, axis=1)
loss = tf.reduce_mean(per_example_loss)
loss += tf.add_n(tf.compat.v1.losses.get_regularization_losses())
return (loss, per_example_loss, outputs)
def create_optimizer(loss: tf.Tensor, runner_config: Dict[str,
Any]) -> tf.Operation:
"""Returns a train_op using Adam optimizer."""
learning_rate = tf.train.exponential_decay(
learning_rate=runner_config["learning_rate"],
global_step=tf.train.get_global_step(),
decay_steps=runner_config["learning_rate_decay_steps"],
decay_rate=runner_config["learning_rate_decay_rate"],
staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
if FLAGS.use_tpu:
optimizer = tf.tpu.CrossShardOptimizer(optimizer)
else:
tf.compat.v1.summary.scalar("learning_rate", learning_rate)
tvars = tf.trainable_variables()
grads = tf.gradients(loss, tvars)
train_op = optimizer.apply_gradients(
zip(grads, tvars), global_step=tf.train.get_global_step())
return train_op
def model_fn_builder(runner_config: Dict[str, Any]):
"""Returns `model_fn` closure for TPUEstimator."""
def model_fn(
features: Mapping[str, tf.Tensor],
mode: tf.estimator.ModeKeys,
params: Optional[Mapping[str, Any]] # pylint: disable=unused-argument
) -> tf.compat.v1.estimator.tpu.TPUEstimatorSpec:
"""The `model_fn` for TPUEstimator."""
projection = features["projection"]
seq_length = features["seq_length"]
label_ids = None
if mode != tf.estimator.ModeKeys.PREDICT:
label_ids = features["label"]
(total_loss, per_example_loss,
model_outputs) = create_model(runner_config["model_config"], projection,
seq_length, mode, label_ids)
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = create_optimizer(total_loss, runner_config)
return tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
mode=mode, loss=total_loss, train_op=train_op)
if mode == tf.estimator.ModeKeys.EVAL:
if not runner_config["model_config"]["multilabel"]:
metric_fn = metric_functions.classification_metric
else:
metric_fn = metric_functions.labeling_metric
eval_metrics = (metric_fn,
[per_example_loss, label_ids, model_outputs["logits"]])
return tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
mode=mode, loss=total_loss, eval_metrics=eval_metrics)
# Prediction mode
return tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
mode=mode, predictions=model_outputs)
return model_fn
def set_output_types_and_quantized(graph_def, quantize):
"""Set _output_types and _output_quantized for custom ops."""
for node in graph_def.node:
if node.op == "SequenceStringProjection":
node.attr["_output_quantized"].b = quantize
node.attr["_output_types"].list.type[:] = [tf_types.DT_FLOAT]
node.op = "SEQUENCE_STRING_PROJECTION"
elif node.op == "SequenceStringProjectionV2":
node.attr["_output_quantized"].b = quantize
node.attr["_output_types"].list.type[:] = [tf_types.DT_FLOAT]
node.op = "SEQUENCE_STRING_PROJECTION_V2"
def export_frozen_graph_def(
session: tf.compat.v1.Session, model_config: Dict[str, Any],
input_tensors: Sequence[tf.Tensor],
output_tensors: Sequence[tf.Tensor]) -> tf.compat.v1.GraphDef:
"""Returns a GraphDef object holding a processed network ready for exporting.
Args:
session: Active TensorFlow session containing the variables.
model_config: `ModelConfig` of the exported model.
input_tensors: A list of input tensors.
output_tensors: A list of output tensors.
Returns:
A frozen GraphDef object holding a processed network ready for exporting.
"""
graph_def = session.graph_def
input_node_names = [tensor.op.name for tensor in input_tensors]
output_node_names = [tensor.op.name for tensor in output_tensors]
input_node_types = [tensor.dtype.as_datatype_enum for tensor in input_tensors]
graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
session, graph_def, output_node_names)
set_output_types_and_quantized(
graph_def, quantize=model_config["quantize"])
# Optimize the graph for inference by removing unused nodes. Also removes
# nodes related to training, which are not going to be used for inference.
graph_def = optimize_for_inference_lib.optimize_for_inference(
graph_def, input_node_names, output_node_names, input_node_types)
return graph_def
def convert_frozen_graph_def_to_tflite(
graph_def: tf.compat.v1.GraphDef, model_config: Dict[str, Any],
input_tensors: Sequence[tf.Tensor],
output_tensors: Sequence[tf.Tensor]) -> bytes:
"""Converts a TensorFlow GraphDef into a serialized TFLite Flatbuffer."""
converter = tf.lite.TFLiteConverter(graph_def, input_tensors, output_tensors)
if model_config["quantize"]:
converter.inference_type = tf.uint8
converter.inference_input_type = tf.uint8
converter.default_ranges_stats = (0., 1.)
converter.quantized_input_stats = {
tensor.op.name: (0., 1.) for tensor in input_tensors
}
# Custom ops 'PoolingOp' and 'SequenceStringProjection' are used.
converter.allow_custom_ops = True
converter.experimental_new_converter = False
return converter.convert()
def export_tflite_model(model_config: Dict[str, Any], saved_model_dir: str,
export_dir: str) -> None:
"""Exports a saved_model into a tflite format."""
graph = tf.Graph()
with graph.as_default():
with tf.Session(graph=graph) as session:
metagraph_def = tf.compat.v1.saved_model.loader.load(
session, [tf.saved_model.tag_constants.SERVING], saved_model_dir)
serving_signature_key = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
signature_def = metagraph_def.signature_def[serving_signature_key]
def _get_tensors(tensor_infos):
tensor_names = [tensor_info.name for tensor_info in tensor_infos]
# Always use reverse lexicographic order for consistency and
# compatibility with PoD inference libraries.
tensor_names.sort(reverse=True)
return [graph.get_tensor_by_name(name) for name in tensor_names]
input_tensors = _get_tensors(signature_def.inputs.values())
output_tensors = _get_tensors(signature_def.outputs.values())
graph_def = export_frozen_graph_def(session, model_config, input_tensors,
output_tensors)
tflite_model = convert_frozen_graph_def_to_tflite(graph_def, model_config,
input_tensors,
output_tensors)
export_path = os.path.join(export_dir, "model.tflite")
with tf.gfile.GFile(export_path, "wb") as handle:
handle.write(tflite_model)
logging.info("TFLite model written to: %s", export_path)
def main(_):
runner_config = load_runner_config()
if FLAGS.output_dir:
tf.gfile.MakeDirs(FLAGS.output_dir)
is_per_host = tf.estimator.tpu.InputPipelineConfig.PER_HOST_V2
run_config = tf.estimator.tpu.RunConfig(
master=FLAGS.master,
model_dir=FLAGS.output_dir,
save_checkpoints_steps=runner_config["save_checkpoints_steps"],
keep_checkpoint_max=20,
tpu_config=tf.estimator.tpu.TPUConfig(
iterations_per_loop=runner_config["iterations_per_loop"],
num_shards=FLAGS.num_tpu_cores,
per_host_input_for_training=is_per_host))
model_fn = model_fn_builder(runner_config=runner_config)
# If TPU is not available, this will fall back to normal Estimator on CPU
# or GPU.
estimator = tf.estimator.tpu.TPUEstimator(
use_tpu=FLAGS.use_tpu,
model_fn=model_fn,
config=run_config,
train_batch_size=runner_config["batch_size"],
eval_batch_size=runner_config["batch_size"],
predict_batch_size=runner_config["batch_size"])
if FLAGS.runner_mode == "train":
train_input_fn = input_fn_reader.create_input_fn(
runner_config=runner_config,
create_projection=model.create_projection,
mode=tf.estimator.ModeKeys.TRAIN,
drop_remainder=True)
estimator.train(
input_fn=train_input_fn, max_steps=runner_config["train_steps"])
if FLAGS.runner_mode == "eval":
# TPU needs fixed shapes, so if the last batch is smaller, we drop it.
eval_input_fn = input_fn_reader.create_input_fn(
runner_config=runner_config,
create_projection=model.create_projection,
mode=tf.estimator.ModeKeys.EVAL,
drop_remainder=True)
for _ in tf.train.checkpoints_iterator(FLAGS.output_dir):
result = estimator.evaluate(input_fn=eval_input_fn)
for key in sorted(result):
logging.info(" %s = %s", key, str(result[key]))
if FLAGS.runner_mode == "export":
logging.info("Exporting the model...")
def serving_input_fn():
"""Input function of the exported model."""
def _input_fn():
text = tf.placeholder(tf.string, shape=[1], name="Input")
projection, seq_length = model.create_projection(
model_config=runner_config["model_config"],
mode=tf.estimator.ModeKeys.PREDICT,
inputs=text)
features = {"projection": projection, "seq_length": seq_length}
return tf.estimator.export.ServingInputReceiver(
features=features, receiver_tensors=features)
return _input_fn
saved_model_dir = estimator.export_saved_model(FLAGS.output_dir,
serving_input_fn())
export_tflite_model(runner_config["model_config"], saved_model_dir,
FLAGS.output_dir)
if __name__ == "__main__":
tf.app.run()
numpy
tensorflow
tensorflow-text
licenses(["notice"])
package(
default_visibility = [
"//visibility:public",
],
)
cc_library(
name = "sgnn_projection",
srcs = ["sgnn_projection.cc"],
hdrs = ["sgnn_projection.h"],
deps = [
"@org_tensorflow//tensorflow/lite:context",
"@org_tensorflow//tensorflow/lite:string_util",
"@org_tensorflow//tensorflow/lite/kernels:kernel_util",
"@org_tensorflow//tensorflow/lite/kernels/internal:tensor",
"@farmhash_archive//:farmhash",
"@flatbuffers",
],
)
cc_library(
name = "sgnn_projection_op_resolver",
srcs = ["sgnn_projection_op_resolver.cc"],
hdrs = ["sgnn_projection_op_resolver.h"],
visibility = ["//visibility:public"],
deps = [
":sgnn_projection",
"@org_tensorflow//tensorflow/lite:framework",
],
alwayslink = 1,
)
cc_test(
name = "sgnn_projection_test",
srcs = ["sgnn_projection_test.cc"],
deps = [
":sgnn_projection",
"@org_tensorflow//tensorflow/lite:string_util",
"@org_tensorflow//tensorflow/lite/kernels:test_util",
"@org_tensorflow//tensorflow/lite/schema:schema_fbs",
"@com_google_googletest//:gtest_main",
"@flatbuffers",
],
)
py_library(
name = "sgnn",
srcs = [
"sgnn.py",
],
srcs_version = "PY3",
deps = [
# package tensorflow
"@org_tflite_support//tensorflow_lite_support/custom_ops/python:tflite_text_api",
# Expect tensorflow text installed
],
)
py_test(
name = "sgnn_test",
srcs = [
"sgnn_test.py",
],
deps = [
":sgnn",
# package tensorflow
# Expect tensorflow text installed
],
)
py_binary(
name = "train",
srcs = [
"train.py",
],
main = "train.py",
python_version = "PY3",
deps = [
":sgnn",
# package tensorflow
# package tensorflow_datasets
],
)
py_binary(
name = "run_tflite",
srcs = ["run_tflite.py"],
main = "run_tflite.py",
python_version = "PY3",
deps = [
# Expect numpy installed
# package TFLite flex delegate
# package TFLite interpreter
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:ngrams_op_resolver",
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:whitespace_tokenizer_op_resolver",
# Expect tensorflow text installed
],
)
# pip install numpy
py_library(
name = "expect_numpy_installed",
)
# pip install tensroflow_text
py_library(
name = "expect_tensorflow_text_installed",
)
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to run a langid TFLite model."""
from absl import app
from absl import flags
import numpy as np
from tensorflow.lite.python import interpreter as interpreter_wrapper # pylint: disable=g-direct-tensorflow-import
FLAGS = flags.FLAGS
flags.DEFINE_string('model', '/tmp/langid/model.tflite',
'Path to LangID TFLite model.')
LANGIDS = ['ar', 'en', 'es', 'fr', 'ru', 'zh', 'unk']
def main(argv):
with open(FLAGS.model, 'rb') as file:
model = file.read()
interpreter = interpreter_wrapper.InterpreterWithCustomOps(
model_content=model,
custom_op_registerers=[
'AddWhitespaceTokenizerCustomOp', 'AddNgramsCustomOp',
'AddSgnnProjectionCustomOp',
])
interpreter.resize_tensor_input(0, [1, 1])
interpreter.allocate_tensors()
input_string = ' '.join(argv[1:])
print('Input: "{}"'.format(input_string))
input_array = np.array([[input_string]], dtype=np.str)
interpreter.set_tensor(interpreter.get_input_details()[0]['index'],
input_array)
interpreter.invoke()
output = interpreter.get_tensor(interpreter.get_output_details()[0]['index'])
for x in range(output.shape[0]):
for y in range(output.shape[1]):
print('{:>3s}: {:.4f}'.format(LANGIDS[y], output[x][y]))
if __name__ == '__main__':
app.run(main)
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builds SGNN model.
[1] Sujith Ravi and Zornitsa Kozareva. 2018. "Self-governing neural networks for
on-device short text
classification." In Proceedings of the 2018 Conference on Empirical Methods in
Natural Language
Processing, pages 887-893. Association for Computational Linguistics
The model will be constructed in this way:
* Projects text to float features, the size is defined by projection_size
* Fully connected layer predicts the class of predictions.
"""
import collections
import tensorflow.compat.v2 as tf
import tensorflow_text as tf_text
from tensorflow_lite_support.custom_ops.python import tflite_text_api
# Hparam collections that will be used to tune the model.
Hparams = collections.namedtuple(
'Hparams',
[
# Learning rate for the optimizer.
'learning_rate'
])
def preprocess(text):
"""Normalize the text, and return tokens."""
assert len(text.get_shape().as_list()) == 2
assert text.get_shape().as_list()[-1] == 1
text = tf.reshape(text, [-1])
text = tf_text.case_fold_utf8(text)
tokenizer = tflite_text_api.WhitespaceTokenizer()
return tokenizer.tokenize(text)
def get_ngrams(tokens, n):
"""Generates character ngrams from tokens.
Args:
tokens: A string ragged tensor for tokens, in shape of [batch_size,
num_token].
n: ngram size for char ngrams.
Returns:
A string ragged tensor for ngrams, in shape of [batch_size, num_token,
ngrams].
"""
chars_split = tf.strings.unicode_split('^' + tokens + '$', 'UTF-8')
chars_joined = tflite_text_api.ngrams(
chars_split,
width=n,
axis=-1,
reduction_type=tf_text.Reduction.STRING_JOIN,
string_separator='')
flat_row_splits = tf.nn.embedding_lookup(chars_joined.values.row_splits,
chars_joined.row_splits)
return tf.RaggedTensor.from_row_splits(chars_joined.values.values,
flat_row_splits)
def project(ngrams, hash_seed, buckets):
"""Projects a ngram RaggedTensor to float tensor.
Args:
ngrams: A string ragged tensor, in shape of [batch_size, num_token, ngrams].
hash_seed: A python int list, in shape of [num_hash].
buckets: An int for the max value of projected integers.
Returns:
A float tensor that projects ngrams to the space represented by hash_seed,
in shape of [batch_size, num_hash].
"""
num_hash = len(hash_seed)
# Hash ngrams string tensor to hash signatures.
signatures = tf.ragged.map_flat_values(tf.strings.to_hash_bucket_fast, ngrams,
buckets)
# Each ngram signature will be multiplied by a different hash seed,
# mod by hash buckets, and linear mapping.
# value = abs(signature * seed % bucket)
# if value > bucket / 2: value -= buckets
hash_tensor = tf.constant(hash_seed, dtype=tf.int64)
value = tf.math.floormod(
tf.abs(signatures.values * tf.reshape(hash_tensor, [-1, 1])), buckets)
value = value - tf.cast(tf.greater(value, buckets >> 1), tf.int64) * buckets
# Wrap values to ragged tensor, and calculates
# output_i,j = mean(value_i,j,k) for k-th ngram in i-th text
# computed with j-th hash seed
row_lengths = tf.repeat(
tf.reshape(signatures.row_lengths(), [1, -1]), num_hash, axis=0)
row_lengths = tf.cast(tf.reshape(row_lengths, [-1]), tf.int32)
result = tf.RaggedTensor.from_row_lengths(
tf.RaggedTensor.from_row_lengths(tf.reshape(value, [-1]), row_lengths),
tf.repeat(tf.shape(signatures.row_lengths()), num_hash))
result = tf.reduce_mean(result, 2) / (buckets >> 1)
return tf.transpose(tf.reshape(result.values, [num_hash, -1]))
def fused_project(ngrams, hash_seed, buckets):
"""A wrapper to fuse project method when converting to TFLite model.
Args:
ngrams: A string ragged tensor, in shape of [batch_size, num_token, ngrams].
hash_seed: A python int list, in shape of [num_hash].
buckets: An int for the max value of projected integers.
Returns:
A float tensor that projects ngrams to the space represented by hash_seed,
in shape of [batch_size, num_hash].
"""
hash_seed_attr = ' '.join(['i: %d' % seed for seed in hash_seed])
experimental_implements = [
'name: "tftext:custom:SgnnProjection"',
'attr { key: "hash_seed" value { list {%s} } }' % hash_seed_attr,
'attr { key: "buckets" value { i: %d } }' % buckets,
]
experimental_implements = ' '.join(experimental_implements)
@tf.function(experimental_implements=experimental_implements)
def func(ngrams_values, *ngrams_row_splits):
ngrams = tf.RaggedTensor.from_nested_row_splits(
flat_values=ngrams_values, nested_row_splits=ngrams_row_splits)
return project(ngrams, hash_seed, buckets)
return func(ngrams.flat_values, *ngrams.nested_row_splits)
def sgnn(texts, hash_seed, ngram_size):
"""Projects the string text to float features.
It first generasts N ngrams of the tokens from given text,
then projects each ngram tensor with a partion of the seeds.
Args:
texts: a string tensor, in shape of [batch_size].
hash_seed: a list of integers, in shape of [projection_size].
ngram_size: max size of ngram to generate features.
Returns:
A float tensor that projects ngrams to the space represented by hash_seed,
in shape of [batch_size, projection_size].
"""
projection_size = len(hash_seed)
partition_size = int(projection_size / ((ngram_size + 1) * ngram_size / 2))
if partition_size == 0:
raise ValueError(
'projection size %d is not enough for %d ngram partitions' %
(projection_size, ngram_size))
indices = [int(i * (i + 1) / 2) * partition_size for i in range(ngram_size)]
indices.append(projection_size)
projection_layer = []
tokens = preprocess(texts)
for i in range(ngram_size):
ngram = get_ngrams(tokens, i + 1)
projection = fused_project(ngram, hash_seed[indices[i]:indices[i + 1]],
0x7FFFFFFF)
projection_layer.append(projection)
return tf.cast(tf.concat(projection_layer, -1), tf.float32)
class ProjectLayer(tf.keras.layers.Layer):
"""Projects the texts to a fixed sized features."""
def __init__(self, seed, ngram_size, **kwargs):
self.seed = seed
self.ngram_size = ngram_size
super(ProjectLayer, self).__init__(**kwargs)
def get_config(self):
return {
'seed': self.seed,
'ngram_size': self.ngram_size,
}
def call(self, x):
return sgnn(x, self.seed, self.ngram_size)
def compute_output_shape(self, input_shape):
return (input_shape[0], len(self.seed))
def keras_model(hash_seed, ngram_size, fc_size_list, hparams):
"""Compiles a keras model from projected features to labels.
Args:
hash_seed: a list of int used to project the feature.
ngram_size: maximum size of ngram to generate features from texts.
fc_size_list: a list of int, sizes of each fully connected layer.
hparams: hyper parameters for the model.
Returns:
A keras model that predicts the language id.
"""
if not fc_size_list:
raise ValueError(
'Must specify one or more fully connected layers via fc_size_list')
model = tf.keras.Sequential()
model.add(ProjectLayer(hash_seed, ngram_size))
for size in fc_size_list[:-1]:
model.add(tf.keras.layers.Dense(size))
model.add(tf.keras.layers.Dense(fc_size_list[-1], activation='softmax'))
model.compile(
optimizer=tf.keras.optimizers.Adam(lr=hparams.learning_rate),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
return model
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "sgnn/sgnn_projection.h" // sequence_projection
#include <cstdlib>
#include <iostream>
#include "flatbuffers/flexbuffers.h" // flatbuffer
#include "farmhash.h"
#include "tensorflow/lite/context.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/string_util.h"
namespace tflite {
namespace ops {
namespace custom {
namespace sgnn {
// This TFLite op implements the SGNN Projection
//
// Input:
// * data: A ragged string tensor of rank 2 (a 1D string value tensor and
// a 1D int64 row_split tensor).
//
// Attributes:
// * hash_seed: list of integers
// Hash seeds to project features
// * buckets: scalar integer
// Bucketize computed hash signatures.
//
// Output:
// * output: A 2D float tensor, 1st dimension is the batch of `data`,
// 2nd dimension is the size of `hash_seed`.
constexpr int kValues = 0;
constexpr int kRowSplits = 1;
struct SgnnProjectionAttributes {
int buckets;
std::vector<int32_t> hash_seed;
explicit SgnnProjectionAttributes(const flexbuffers::Map& m)
: buckets(m["buckets"].AsInt32()) {
buckets = m["buckets"].AsInt32();
auto hash_seed_attr = m["hash_seed"].AsTypedVector();
hash_seed = std::vector<int32_t>(hash_seed_attr.size());
for (int i = 0; i < hash_seed_attr.size(); ++i) {
hash_seed[i] = hash_seed_attr[i].AsInt32();
}
}
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
return new SgnnProjectionAttributes(
flexbuffers::GetRoot(buffer_t, length).AsMap());
}
void Free(TfLiteContext* context, void* buffer) {
delete reinterpret_cast<SgnnProjectionAttributes*>(buffer);
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
const auto& attributes =
*reinterpret_cast<SgnnProjectionAttributes*>(node->user_data);
const TfLiteTensor* input_row_splits = GetInput(context, node, kRowSplits);
TfLiteTensor* output = GetOutput(context, node, 0);
TfLiteIntArray* output_shape = TfLiteIntArrayCreate(2);
output_shape->data[0] = SizeOfDimension(input_row_splits, 0) - 1;
output_shape->data[1] = attributes.hash_seed.size();
TF_LITE_ENSURE_OK(context,
context->ResizeTensor(context, output, output_shape));
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
const auto& attributes =
*reinterpret_cast<SgnnProjectionAttributes*>(node->user_data);
const TfLiteTensor* ngrams = GetInput(context, node, kValues);
const TfLiteTensor* row_splits = GetInput(context, node, kRowSplits);
auto row_splits_values = GetTensorData<int64_t>(row_splits);
auto output_values = GetTensorData<float>(GetOutput(context, node, 0));
int output_idx = 0;
for (int i = 1; i < SizeOfDimension(row_splits, 0); ++i) {
int len = row_splits_values[i] - row_splits_values[i - 1];
std::vector<int64_t> hash_signature(len);
// Follow the implementation from
// tensorflow/core/kernels/string_to_hash_bucket_op.h
for (int j = 0; j < len; ++j) {
int index = row_splits->data.i64[i - 1] + j;
StringRef str = GetString(ngrams, index);
hash_signature[j] =
util::Fingerprint64(str.str, str.len) % attributes.buckets;
}
for (int k = 0; k < attributes.hash_seed.size(); ++k) {
double result = 0;
for (int j = 0; j < len; ++j) {
int64_t tmp = hash_signature[j] * attributes.hash_seed[k];
int64_t value = abs(tmp) % attributes.buckets;
if (value > attributes.buckets / 2) {
value -= attributes.buckets;
}
result += value;
}
output_values[output_idx] =
static_cast<float>(result) / (attributes.buckets / 2) / len;
output_idx++;
}
}
return kTfLiteOk;
}
} // namespace sgnn
TfLiteRegistration* Register_tftext_SGNN_PROJECTION() {
static TfLiteRegistration r = {sgnn::Init, sgnn::Free, sgnn::Prepare,
sgnn::Eval};
return &r;
}
} // namespace custom
} // namespace ops
} // namespace tflite
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_H_
#define TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_H_
#include "tensorflow/lite/context.h"
namespace tflite {
namespace ops {
namespace custom {
TfLiteRegistration* Register_tftext_SGNN_PROJECTION();
} // namespace custom
} // namespace ops
} // namespace tflite
#endif // TENSORFLOW_MODELS_SEQUENCE_PROJECTION_SGNN_SGNN_PROJECTION_H_
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment