Unverified Commit 11dc461f authored by thunderfyc's avatar thunderfyc Committed by GitHub
Browse files

Rename sequence_projection to seq_flow_lite (#9448)

* Rename sequence_projection to seq_flow_lite

* Rename sequence_projection to seq_flow_lite
parent 63665121
......@@ -29,9 +29,10 @@ def classification_metric(per_example_loss, label_ids, logits):
def labeling_metric(per_example_loss, label_ids, logits):
"""Compute eval metrics."""
scores = tf.math.sigmoid(logits)
num_classes = label_ids.get_shape().as_list()[-1]
return_dict = {"eval_loss": tf.metrics.mean(per_example_loss)}
for idx in range(num_classes):
return_dict["auc/" + str(idx)] = tf.metrics.auc(
label_ids[:, idx], tf.math.sigmoid(logits[:, idx]))
return_dict["auc/" + str(idx)] = tf.metrics.auc(label_ids[:, idx],
scores[:, idx])
return return_dict
licenses(["notice"])
package(
default_visibility = ["//:friends"], # sequence projection
)
py_library(
name = "prado",
srcs = ["prado.py"],
srcs_version = "PY3",
deps = [
# package absl/logging
# package tensorflow
"//layers:base_layers", # sequence projection
"//layers:conv_layers", # sequence projection
"//layers:dense_layers", # sequence projection
"//layers:projection_layers", # sequence projection
"//layers:quantization_layers", # sequence projection
# "//tf_ops:tf_custom_ops" # sequence projection
"//tf_ops:tf_custom_ops_py", # sequence projection
],
)
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Implementation of PRADO model."""
import copy
from absl import logging
import numpy as np
import tensorflow as tf
from layers import base_layers # import seq_flow_lite module
from layers import conv_layers # import seq_flow_lite module
from layers import dense_layers # import seq_flow_lite module
from layers import projection_layers # import seq_flow_lite module
from layers import quantization_layers # import seq_flow_lite module
from tf_ops import tf_custom_ops_py # import seq_flow_lite module
class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen):
"""A layer that performs padded masked convolution."""
def __init__(self, invalid_value, ngram=2, skip_bigram=None, **kwargs):
self.invalid_value = invalid_value
assert ngram is None or (ngram >= 1 and ngram <= 5)
assert skip_bigram is None or skip_bigram == 1 or skip_bigram == 2
assert bool(ngram is None) != bool(skip_bigram is None)
self.kwidth = ngram if ngram is not None else (skip_bigram + 2)
mask = [1] * self.kwidth
if skip_bigram is not None:
mask[1], mask[skip_bigram] = 0, 0
self.mask = np.array(mask, dtype="float32").reshape((1, self.kwidth, 1, 1))
self.zero_pad = tf.keras.layers.ZeroPadding1D(padding=[0, self.kwidth - 1])
super(PaddedMaskedVarLenConv, self).__init__(
ksize=self.kwidth, rank=3, padding="VALID", activation=None, **kwargs)
def call(self, inputs, mask, inverse_normalizer):
self._assert_rank_and_type(inputs, 3)
self._assert_rank_and_type(mask, 3)
maskr4 = tf.expand_dims(mask, axis=1)
inputs_padded = self.zero_pad(inputs)
result = super(PaddedMaskedVarLenConv, self).call(inputs_padded, maskr4,
inverse_normalizer)
if self.parameters.mode not in [base_layers.PREDICT, base_layers.TFLITE]:
return result * mask + (1 - mask) * self.invalid_value
return result
def add_qweight(self, shape, num_bits=8):
weight = super(PaddedMaskedVarLenConv, self).add_qweight(
shape=shape, num_bits=num_bits)
return weight * tf.convert_to_tensor(self.mask)
class AttentionPoolReduce(base_layers.BaseLayer):
"""Attention pooling and reduce."""
def __init__(self, filters, ngram=2, skip_bigram=None, **kwargs):
super(AttentionPoolReduce, self).__init__(**kwargs)
self.filters = filters
self.value = PaddedMaskedVarLenConv(
0, filters=filters, ngram=ngram, skip_bigram=skip_bigram, **kwargs)
self.attention_logits = PaddedMaskedVarLenConv(
self.parameters.invalid_logit,
filters=filters,
ngram=ngram,
skip_bigram=skip_bigram,
**kwargs)
def call(self, values_in, attention_in, mask, inverse_normalizer):
self._assert_rank_and_type(values_in, 3)
self._assert_rank_and_type(attention_in, 3)
self._assert_rank_and_type(mask, 3)
values = self.value(values_in, mask, inverse_normalizer)
attention_logits = self.attention_logits(attention_in, mask,
inverse_normalizer)
if self.parameters.mode == base_layers.TFLITE:
return tf_custom_ops_py.expected_value_op(attention_logits, values)
else:
attention_logits = tf.transpose(attention_logits, [0, 2, 1])
values = tf.transpose(values, [0, 2, 1])
attention = tf.nn.softmax(attention_logits)
return tf.reduce_sum(attention * values, axis=2)
class Encoder(tf.keras.layers.Layer):
"""A PRADO keras model."""
def __init__(self, config, mode):
super(Encoder, self).__init__()
def _get_params(varname, default_value=None):
value = config[varname] if varname in config else default_value
default = "" if varname in config else " (default)"
logging.info("%s = %s%s", varname, value, default)
setattr(self, varname, value)
_get_params("labels")
_get_params("quantize", True)
_get_params("embedding_regularizer_scale", 35e-3)
_get_params("embedding_size", 64)
_get_params("unigram_channels", 0)
_get_params("bigram_channels", 0)
_get_params("trigram_channels", 0)
_get_params("fourgram_channels", 0)
_get_params("fivegram_channels", 0)
_get_params("skip1bigram_channels", 0)
_get_params("skip2bigram_channels", 0)
_get_params("network_regularizer_scale", 1e-4)
_get_params("keep_prob", 0.5)
self.num_classes = len(self.labels)
self.parameters = base_layers.Parameters(
mode,
quantize=self.quantize,
regularizer_scale=self.embedding_regularizer_scale)
self.values_fc = dense_layers.BaseQDenseVarLen(
units=self.embedding_size, rank=3, parameters=self.parameters)
self.attention_fc = dense_layers.BaseQDenseVarLen(
units=self.embedding_size, rank=3, parameters=self.parameters)
self.dropout = tf.keras.layers.Dropout(rate=(1 - self.keep_prob))
self.parameters = copy.copy(self.parameters)
self.parameters.regularizer_scale = self.network_regularizer_scale
self.attention_pool_layers = []
self._add_attention_pool_layer(self.unigram_channels, 1)
self._add_attention_pool_layer(self.bigram_channels, 2)
self._add_attention_pool_layer(self.trigram_channels, 3)
self._add_attention_pool_layer(self.fourgram_channels, 4)
self._add_attention_pool_layer(self.fivegram_channels, 5)
self._add_attention_pool_layer(self.skip1bigram_channels, None, 1)
self._add_attention_pool_layer(self.skip2bigram_channels, None, 2)
self.concat_quantizer = quantization_layers.ConcatQuantization(
axis=1, parameters=self.parameters)
self.final_fc = dense_layers.BaseQDense(
units=self.num_classes,
rank=2,
parameters=self.parameters,
activation=None)
def _add_attention_pool_layer(self, channels, ngram, skip_bigram=None):
if channels > 0:
self.attention_pool_layers.append(
AttentionPoolReduce(
filters=channels,
skip_bigram=skip_bigram,
ngram=ngram,
parameters=self.parameters))
def _apply_fc_dropout(self, layer, inputs, mask, inverse_normalizer):
outputs = layer(inputs, mask, inverse_normalizer)
if self.parameters.mode == base_layers.TRAIN:
return self.dropout(outputs)
return outputs
def call(self, projection, seq_length):
mask = tf.sequence_mask(
seq_length, tf.shape(projection)[1], dtype=tf.float32)
inverse_normalizer = tf.math.reciprocal(tf.reduce_sum(mask))
maskr3 = tf.expand_dims(mask, axis=2)
values_in = self._apply_fc_dropout(self.values_fc, projection, mask,
inverse_normalizer)
attention_in = self._apply_fc_dropout(self.attention_fc, projection, mask,
inverse_normalizer)
tensors = [
layer(values_in, attention_in, maskr3, inverse_normalizer)
for layer in self.attention_pool_layers
]
pre_logits = self.concat_quantizer(tensors)
return self.final_fc(pre_logits)
class Model(Encoder):
def __init__(self, config, mode):
super(Model, self).__init__(config, mode)
self.projection = projection_layers.ProjectionLayer(config, mode)
def call(self, inputs):
projection, seq_length = self.projection(inputs)
return super(Model, self).call(projection, seq_length)
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "sgnn/sgnn_projection.h" // sequence_projection
#include "models/sgnn/sgnn_projection.h" // seq_flow_lite
#include <cstdlib>
#include <iostream>
......@@ -106,7 +106,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
int index = row_splits->data.i64[i - 1] + j;
StringRef str = GetString(ngrams, index);
hash_signature[j] =
util::Fingerprint64(str.str, str.len) % attributes.buckets;
util::Fingerprint64(str.str, str.len) % attributes.buckets;
}
for (int k = 0; k < attributes.hash_seed.size(); ++k) {
double result = 0;
......
......@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "sgnn/sgnn_projection_op_resolver.h" // sequence_projection
#include "models/sgnn/sgnn_projection_op_resolver.h" // seq_flow_lite
#include "tensorflow/lite/mutable_op_resolver.h"
#include "sgnn/sgnn_projection.h" // sequence_projection
#include "models/sgnn/sgnn_projection.h" // seq_flow_lite
namespace tflite {
namespace ops {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "sgnn/sgnn_projection.h" // sequence_projection
#include "models/sgnn/sgnn_projection.h" // seq_flow_lite
#include <string>
#include <vector>
......
......@@ -14,11 +14,11 @@
# ==============================================================================
# Lint as: python3
"""Tests for sequence_projection.sgnn."""
"""Tests for seq_flow_lite.sgnn."""
import tensorflow as tf
from tensorflow.python.framework import test_util # pylint: disable=g-direct-tensorflow-import
import sgnn # import sequence_projection module
from models import sgnn # import seq_flow_lite module
@test_util.run_all_in_graph_and_eager_modes
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to train langid model.
The script builds language detection from wikipedia dataset,
......@@ -27,7 +26,7 @@ import numpy as np
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
import sgnn # import sequence_projection module
from models import sgnn # import seq_flow_lite module
FLAGS = flags.FLAGS
flags.DEFINE_string('output_dir', '/tmp/langid',
......
......@@ -10,6 +10,15 @@ package(
],
)
py_library(
name = "text_projection",
srcs = ["text_projection.py"],
srcs_version = "PY3",
deps = [
":sequence_string_projection_op_py",
],
)
cc_library(
name = "sequence_string_projection_op",
srcs = [
......@@ -118,3 +127,19 @@ gen_op_wrapper_py(
out = "sequence_string_projection_op.py",
kernel_lib = ":sequence_string_projection_op",
)
cc_library(
name = "tf_custom_ops",
srcs = ["tf_custom_ops.cc"],
deps = [
"@tensorflow_includes//:includes",
"@tensorflow_solib//:framework_lib",
],
alwayslink = 1,
)
gen_op_wrapper_py(
name = "tf_custom_ops_py",
out = "tf_custom_ops_py.py",
kernel_lib = ":tf_custom_ops",
)
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tf_ops/projection_normalizer_util.h" // sequence_projection
#include "tf_ops/projection_normalizer_util.h" // seq_flow_lite
#include <algorithm>
#include <cstddef>
......@@ -20,7 +20,7 @@ limitations under the License.
#include <sstream>
#include <utility>
#include "tf_ops/projection_util.h" // sequence_projection
#include "tf_ops/projection_util.h" // seq_flow_lite
// Returns true if the given text contains a number.
bool IsDigit(const std::string& text) {
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tf_ops/projection_tokenizer_util.h" // sequence_projection
#include "tf_ops/projection_tokenizer_util.h" // seq_flow_lite
#include <cstddef>
#include <iostream>
......@@ -20,7 +20,7 @@ limitations under the License.
#include <sstream>
#include <utility>
#include "tf_ops/projection_util.h" // sequence_projection
#include "tf_ops/projection_util.h" // seq_flow_lite
namespace {
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tf_ops/projection_util.h" // sequence_projection
#include "tf_ops/projection_util.h" // seq_flow_lite
#include <cstddef>
#include <iostream>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment