Unverified Commit ca552843 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'panoptic-segmentation' into panoptic-segmentation

parents 7e2f7a35 6b90e134
...@@ -111,6 +111,10 @@ message CenterNet { ...@@ -111,6 +111,10 @@ message CenterNet {
// Parameters to determine the architecture of the object center prediction // Parameters to determine the architecture of the object center prediction
// head. // head.
optional PredictionHeadParams center_head_params = 8; optional PredictionHeadParams center_head_params = 8;
// Max pool kernel size to use to pull off peak score locations in a
// neighborhood for the object detection heatmap.
optional int32 peak_max_pool_kernel_size = 9 [default = 3];
} }
optional ObjectCenterParams object_center_params = 5; optional ObjectCenterParams object_center_params = 5;
...@@ -266,6 +270,16 @@ message CenterNet { ...@@ -266,6 +270,16 @@ message CenterNet {
// with scores higher than the threshold. // with scores higher than the threshold.
optional float rescoring_threshold = 30 [default = 0.0]; optional float rescoring_threshold = 30 [default = 0.0];
// The ratio used to multiply the output feature map size to determine the
// denominator in the Gaussian formula. Only applicable when the
// candidate_ranking_mode is set to be 'gaussian_weighted_const'.
optional float gaussian_denom_ratio = 31 [default = 0.1];
// Whether to use the keypoint postprocessing logic that replaces topk op
// with argmax. Usually used when exporting the model for predicting
// keypoints of multiple instances in the browser.
optional bool argmax_postprocessing = 32 [default = false];
// Parameters to determine the architecture of the keypoint heatmap // Parameters to determine the architecture of the keypoint heatmap
// prediction head. // prediction head.
optional PredictionHeadParams heatmap_head_params = 25; optional PredictionHeadParams heatmap_head_params = 25;
......
...@@ -231,6 +231,10 @@ message WeightedDiceClassificationLoss { ...@@ -231,6 +231,10 @@ message WeightedDiceClassificationLoss {
// If set, we square the probabilities in the denominator term used for // If set, we square the probabilities in the denominator term used for
// normalization. // normalization.
optional bool squared_normalization = 1 [default=false]; optional bool squared_normalization = 1 [default=false];
// Whether or not the input prediction to the loss function is a
// probability. If not, the input is to be interpreted as logit
optional bool is_prediction_probability = 2 [default=false];
} }
...@@ -948,7 +948,8 @@ def merge_boxes_with_multiple_labels(boxes, ...@@ -948,7 +948,8 @@ def merge_boxes_with_multiple_labels(boxes,
def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None, def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
width_scale=None): width_scale=None,
name='nearest_neighbor_upsampling'):
"""Nearest neighbor upsampling implementation. """Nearest neighbor upsampling implementation.
Nearest neighbor upsampling function that maps input tensor with shape Nearest neighbor upsampling function that maps input tensor with shape
...@@ -965,6 +966,7 @@ def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None, ...@@ -965,6 +966,7 @@ def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
option when provided overrides `scale` option. option when provided overrides `scale` option.
width_scale: An integer multiple to scale the width of input image. This width_scale: An integer multiple to scale the width of input image. This
option when provided overrides `scale` option. option when provided overrides `scale` option.
name: A name for the operation (optional).
Returns: Returns:
data_up: A float32 tensor of size data_up: A float32 tensor of size
[batch, height_in*scale, width_in*scale, channels]. [batch, height_in*scale, width_in*scale, channels].
...@@ -976,13 +978,13 @@ def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None, ...@@ -976,13 +978,13 @@ def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
if not scale and (height_scale is None or width_scale is None): if not scale and (height_scale is None or width_scale is None):
raise ValueError('Provide either `scale` or `height_scale` and' raise ValueError('Provide either `scale` or `height_scale` and'
' `width_scale`.') ' `width_scale`.')
with tf.name_scope('nearest_neighbor_upsampling'): with tf.name_scope(name):
h_scale = scale if height_scale is None else height_scale h_scale = scale if height_scale is None else height_scale
w_scale = scale if width_scale is None else width_scale w_scale = scale if width_scale is None else width_scale
(batch_size, height, width, (batch_size, height, width,
channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor) channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
output_tensor = tf.stack([input_tensor] * w_scale, axis=3) output_tensor = tf.stack([input_tensor] * w_scale, axis=3, name='w_stack')
output_tensor = tf.stack([output_tensor] * h_scale, axis=2) output_tensor = tf.stack([output_tensor] * h_scale, axis=2, name='h_stack')
return tf.reshape(output_tensor, return tf.reshape(output_tensor,
[batch_size, height * h_scale, width * w_scale, channels]) [batch_size, height * h_scale, width * w_scale, channels])
......
...@@ -16,10 +16,10 @@ http_archive( ...@@ -16,10 +16,10 @@ http_archive(
http_archive( http_archive(
name = "org_tensorflow", name = "org_tensorflow",
sha256 = "fc6d7c57cd9427e695a38ad00fb6ecc3f623bac792dd44ad73a3f85b338b68be", sha256 = "40d3203ab5f246d83bae328288a24209a2b85794f1b3e2cd0329458d8e7c1985",
strip_prefix = "tensorflow-8a4ffe2e1ae722cff5306778df0cfca8b7f503fe", strip_prefix = "tensorflow-2.6.0",
urls = [ urls = [
"https://github.com/tensorflow/tensorflow/archive/8a4ffe2e1ae722cff5306778df0cfca8b7f503fe.tar.gz", "https://github.com/tensorflow/tensorflow/archive/v2.6.0.zip",
], ],
) )
...@@ -49,41 +49,6 @@ PROTOC_VERSION = "3.9.0" ...@@ -49,41 +49,6 @@ PROTOC_VERSION = "3.9.0"
PROTOC_SHA256 = "15e395b648a1a6dda8fd66868824a396e9d3e89bc2c8648e3b9ab9801bea5d55" PROTOC_SHA256 = "15e395b648a1a6dda8fd66868824a396e9d3e89bc2c8648e3b9ab9801bea5d55"
reverb_protoc_deps(version = PROTOC_VERSION, sha256 = PROTOC_SHA256) reverb_protoc_deps(version = PROTOC_VERSION, sha256 = PROTOC_SHA256)
# ABSL cpp library.
http_archive(
name = "com_google_absl",
sha256 = "f368a8476f4e2e0eccf8a7318b98dafbe30b2600f4e3cf52636e5eb145aba06a", # SHARED_ABSL_SHA
strip_prefix = "abseil-cpp-df3ea785d8c30a9503321a3d35ee7d35808f190d",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz",
"https://github.com/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz",
],
)
http_archive(
name = "rules_cc",
strip_prefix = "rules_cc-master",
urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
)
# GoogleTest/GoogleMock framework. Used by most unit-tests.
http_archive(
name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/master.zip"],
strip_prefix = "googletest-master",
)
# gflags needed by glog
http_archive(
name = "com_github_gflags_gflags",
sha256 = "6e16c8bc91b1310a44f3965e616383dbda48f83e8c1eaa2370a215057b00cabe",
strip_prefix = "gflags-77592648e3f3be87d6c7123eb81cbad75f9aef5a",
urls = [
"https://mirror.bazel.build/github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
"https://github.com/gflags/gflags/archive/77592648e3f3be87d6c7123eb81cbad75f9aef5a.tar.gz",
],
)
# glog # glog
http_archive( http_archive(
name = "com_google_glog", name = "com_google_glog",
...@@ -92,16 +57,6 @@ http_archive( ...@@ -92,16 +57,6 @@ http_archive(
urls = ["https://github.com/google/glog/archive/v0.4.0.tar.gz"], urls = ["https://github.com/google/glog/archive/v0.4.0.tar.gz"],
) )
http_archive(
name = "absl_py",
sha256 = "603febc9b95a8f2979a7bdb77d2f5e4d9b30d4e0d59579f88eba67d4e4cc5462",
strip_prefix = "abseil-py-pypi-v0.9.0",
urls = [
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz",
"https://github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz",
],
)
http_archive( http_archive(
name = "utf_archive", name = "utf_archive",
build_file = "@//third_party:utf.BUILD", build_file = "@//third_party:utf.BUILD",
...@@ -113,25 +68,17 @@ http_archive( ...@@ -113,25 +68,17 @@ http_archive(
) )
#----------------------------------------------------------------------------- load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3")
# proto tf_workspace3()
#-----------------------------------------------------------------------------
# proto_library, cc_proto_library and java_proto_library rules implicitly depend load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2")
# on @com_google_protobuf//:proto, @com_google_protobuf//:cc_toolchain and tf_workspace2()
# @com_google_protobuf//:java_toolchain, respectively.
# This statement defines the @com_google_protobuf repo.
http_archive(
name = "com_google_protobuf",
strip_prefix = "protobuf-3.8.0",
urls = ["https://github.com/google/protobuf/archive/v3.8.0.zip"],
sha256 = "1e622ce4b84b88b6d2cdf1db38d1a634fe2392d74f0b7b74ff98f3a51838ee53",
)
load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") load("@org_tensorflow//tensorflow:workspace1.bzl", "tf_workspace1")
flatbuffers() tf_workspace1()
load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace") load("@org_tensorflow//tensorflow:workspace0.bzl", "tf_workspace0")
tf_workspace(tf_repo_name = "org_tensorflow") tf_workspace0()
# TF submodule compilation doesn't take care of grpc deps. Do it manually here. # TF submodule compilation doesn't take care of grpc deps. Do it manually here.
...@@ -168,7 +115,7 @@ new_git_repository( ...@@ -168,7 +115,7 @@ new_git_repository(
remote = "https://github.com/unicode-org/icu", remote = "https://github.com/unicode-org/icu",
build_file = "@//third_party:icu.BUILD", build_file = "@//third_party:icu.BUILD",
patch_cmds = [ patch_cmds = [
"find . -type f -exec sed -i 's/#\s*include \"unicode/#include \"icu4c\/source\/common\/unicode/g' {} \;", "find . -type f -exec sed -i 's/#\\s*include \"unicode/#include \"icu4c\\/source\\/common\\/unicode/g' {} \\;",
], ],
) )
......
...@@ -5,5 +5,6 @@ sh_binary( ...@@ -5,5 +5,6 @@ sh_binary(
"//tf_ops:sequence_string_projection_op_py", "//tf_ops:sequence_string_projection_op_py",
"//tf_ops:sequence_string_projection_op_v2_py", "//tf_ops:sequence_string_projection_op_v2_py",
"//tf_ops:tf_custom_ops_py", "//tf_ops:tf_custom_ops_py",
"//tflite_ops:registerer",
], ],
) )
...@@ -30,3 +30,5 @@ cp -f "${RUNFILES_DIR}/tf_ops/libtf_custom_ops_py_gen_op.so" \ ...@@ -30,3 +30,5 @@ cp -f "${RUNFILES_DIR}/tf_ops/libtf_custom_ops_py_gen_op.so" \
cp -f "${RUNFILES_DIR}/tf_ops/tf_custom_ops_py.py" \ cp -f "${RUNFILES_DIR}/tf_ops/tf_custom_ops_py.py" \
"${BUILD_WORKSPACE_DIRECTORY}/tf_ops" "${BUILD_WORKSPACE_DIRECTORY}/tf_ops"
cp -f "${RUNFILES_DIR}/tflite_ops/registerer.so" \
"${BUILD_WORKSPACE_DIRECTORY}/tflite_ops"
...@@ -44,7 +44,7 @@ class _BazelBuildCommand(setuptools.Command): ...@@ -44,7 +44,7 @@ class _BazelBuildCommand(setuptools.Command):
setuptools.setup( setuptools.setup(
name='seq_flow_lite', name='seq_flow_lite',
version='0.1', version='0.1',
packages=['tf_ops'], packages=['tf_ops', 'tflite_ops'],
package_data={'': ['*.so']}, package_data={'': ['*.so']},
cmdclass={ cmdclass={
'build': _BuildCommand, 'build': _BuildCommand,
......
...@@ -48,9 +48,9 @@ std::unique_ptr<tflite::Interpreter> CreateInterpreter( ...@@ -48,9 +48,9 @@ std::unique_ptr<tflite::Interpreter> CreateInterpreter(
tflite::ops::builtin::BuiltinOpResolver resolver; tflite::ops::builtin::BuiltinOpResolver resolver;
resolver.AddCustom( resolver.AddCustom(
"SEQUENCE_STRING_PROJECTION", "SEQUENCE_STRING_PROJECTION",
tflite::ops::custom::Register_SEQUENCE_STRING_PROJECTION()); ::seq_flow_lite::ops::custom::Register_SEQUENCE_STRING_PROJECTION());
resolver.AddCustom("ExpectedValueOp", resolver.AddCustom("ExpectedValueOp",
tflite::ops::custom::Register_EXPECTED_VALUE()); ::seq_flow_lite::ops::custom::Register_EXPECTED_VALUE());
tflite::InterpreterBuilder(model, resolver, tflite::InterpreterBuilder(model, resolver,
/*error_reporter=*/nullptr)(&interpreter); /*error_reporter=*/nullptr)(&interpreter);
if (!interpreter) { if (!interpreter) {
...@@ -105,7 +105,7 @@ std::vector<float> InvokeModel( ...@@ -105,7 +105,7 @@ std::vector<float> InvokeModel(
const size_t num_classes = output_dims[kClassOutputClassIndex]; const size_t num_classes = output_dims[kClassOutputClassIndex];
for (int i = 0; i < num_classes; ++i) { for (int i = 0; i < num_classes; ++i) {
// Find class probability or log probability for the class index // Find class probability or log probability for the class index
classes.push_back(tflite::PodDequantize(*class_output, i)); classes.push_back(::seq_flow_lite::PodDequantize(*class_output, i));
} }
return classes; return classes;
} }
......
...@@ -30,6 +30,8 @@ from utils import tflite_utils # import seq_flow_lite module ...@@ -30,6 +30,8 @@ from utils import tflite_utils # import seq_flow_lite module
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
flags.DEFINE_string("output_dir", None, "The output or model directory.") flags.DEFINE_string("output_dir", None, "The output or model directory.")
flags.DEFINE_enum("output", "sigmoid", ["logits", "sigmoid", "softmax"],
"Specification of the output tensor.")
def load_runner_config(): def load_runner_config():
...@@ -51,12 +53,20 @@ def main(_): ...@@ -51,12 +53,20 @@ def main(_):
encoder = model.Encoder(model_config, base_layers.TFLITE) encoder = model.Encoder(model_config, base_layers.TFLITE)
projection, seq_lengh = prxlayer(text) projection, seq_lengh = prxlayer(text)
logits = encoder(projection, seq_lengh) logits = encoder(projection, seq_lengh)
if FLAGS.output == "logits":
outputs = logits
elif FLAGS.output == "sigmoid":
outputs = tf.math.sigmoid(logits)
else:
assert FLAGS.output == "softmax", "Unexpected output"
outputs = tf.nn.softmax(logits)
session.run(tf.global_variables_initializer()) session.run(tf.global_variables_initializer())
session.run(tf.local_variables_initializer()) session.run(tf.local_variables_initializer())
saver = tf.train.Saver() saver = tf.train.Saver()
saver.restore(session, tf.train.latest_checkpoint(FLAGS.output_dir)) saver.restore(session, tf.train.latest_checkpoint(FLAGS.output_dir))
tflite_fb = tflite_utils.generate_tflite(session, graph, [text], [logits]) tflite_fb = tflite_utils.generate_tflite(session, graph, [text],
[outputs])
output_file_name = os.path.join(FLAGS.output_dir, "tflite.fb") output_file_name = os.path.join(FLAGS.output_dir, "tflite.fb")
with tf.gfile.Open(output_file_name, "wb") as f: with tf.gfile.Open(output_file_name, "wb") as f:
f.write(tflite_fb) f.write(tflite_fb)
......
...@@ -54,19 +54,23 @@ class BaseLayer(tf.keras.layers.Layer): ...@@ -54,19 +54,23 @@ class BaseLayer(tf.keras.layers.Layer):
assert len(tensor.get_shape().as_list()) == rank assert len(tensor.get_shape().as_list()) == rank
assert tensor.dtype == dtype assert tensor.dtype == dtype
def add_qweight(self, shape, num_bits=8): def add_weight_wrapper(self, shape):
"""Return a quantized weight variable for the given shape.""" """Return a weight variable for the given shape."""
if self.parameters.initializer is not None: if self.parameters.initializer is not None:
initializer = self.parameters.initializer initializer = self.parameters.initializer
else: else:
initializer = tf.keras.initializers.GlorotUniform() initializer = tf.keras.initializers.GlorotUniform()
weight = self.add_weight( weight = self.add_weight(
"weight", shape, initializer=initializer, trainable=True) "weight",
shape,
initializer=initializer,
trainable=True,
dtype=tf.float32)
self.add_reg_loss(weight) self.add_reg_loss(weight)
return self._weight_quantization(weight, num_bits=num_bits) return weight
def _weight_quantization(self, tensor, num_bits=8): def quantize_parameter(self, tensor, num_bits=8):
"""Quantize weights when enabled.""" """Quantize parameters when enabled."""
# For infer mode, toco computes the min/max from the weights offline to # For infer mode, toco computes the min/max from the weights offline to
# quantize it. During train/eval this is computed from the current value # quantize it. During train/eval this is computed from the current value
# in the session by the graph itself. # in the session by the graph itself.
...@@ -98,21 +102,37 @@ class BaseLayer(tf.keras.layers.Layer): ...@@ -98,21 +102,37 @@ class BaseLayer(tf.keras.layers.Layer):
def assign_moving_average(self, var, update, ema_decay): def assign_moving_average(self, var, update, ema_decay):
return var.assign(var.read_value() * (1 - ema_decay) + (ema_decay) * update) return var.assign(var.read_value() * (1 - ema_decay) + (ema_decay) * update)
def qrange_sigmoid(self, tensor): def quantize_tensor(self, tf_only):
if self.parameters.quantize: if tf_only and self.parameters.mode == TFLITE:
return False
return self.parameters.quantize
def qrange_sigmoid(self, tensor, tf_only=False):
if self.quantize_tensor(tf_only):
return tf.quantization.fake_quant_with_min_max_args(tensor, 0.0, 1.0) return tf.quantization.fake_quant_with_min_max_args(tensor, 0.0, 1.0)
return tensor return tensor
def qrange_tanh(self, tensor): def qrange_tanh(self, tensor, tf_only=False):
if self.parameters.quantize: if self.quantize_tensor(tf_only):
return tf.quantization.fake_quant_with_min_max_args(tensor, -1.0, 1.0) return tf.quantization.fake_quant_with_min_max_args(tensor, -1.0, 1.0)
return tensor return tensor
def quantized_tanh(self, tensor): def quantized_tanh(self, tensor, tf_only=False):
return self.qrange_tanh(tf.tanh(tensor)) return self.qrange_tanh(tf.tanh(tensor), tf_only)
def quantized_sigmoid(self, tensor): def quantized_sigmoid(self, tensor, tf_only=False):
return self.qrange_sigmoid(tf.sigmoid(tensor)) return self.qrange_sigmoid(tf.sigmoid(tensor), tf_only)
def get_batch_dimension(self, tensor): def get_batch_dimension(self, tensor):
return tensor.get_shape().as_list()[0] or tf.shape(tensor)[0] return tensor.get_shape().as_list()[0] or tf.shape(tensor)[0]
def inverse_normalizer(self, mask):
return tf.math.reciprocal(tf.reduce_sum(mask))
def random_drop_to_zero(self, tensor, zero_probability):
rnd = tf.random.uniform(
shape=tf.shape(tensor),
minval=-zero_probability,
maxval=(1.0 - zero_probability),
dtype=tensor.dtype)
return tf.math.ceil(rnd)
...@@ -60,7 +60,7 @@ class EncoderQConvolution(base_layers.BaseLayer): ...@@ -60,7 +60,7 @@ class EncoderQConvolution(base_layers.BaseLayer):
assert len(input_shapes) == self.rank assert len(input_shapes) == self.rank
self.in_filters = input_shapes[-1] self.in_filters = input_shapes[-1]
shape = self.ksize + [self.in_filters, self.out_filters] shape = self.ksize + [self.in_filters, self.out_filters]
self.filters = self.add_qweight(shape=shape) self.filters = self.add_weight_wrapper(shape=shape)
if self.bias: if self.bias:
self.b = self.add_bias(shape=[self.out_filters]) self.b = self.add_bias(shape=[self.out_filters])
...@@ -70,7 +70,7 @@ class EncoderQConvolution(base_layers.BaseLayer): ...@@ -70,7 +70,7 @@ class EncoderQConvolution(base_layers.BaseLayer):
def _conv_r4(self, inputs, normalize_method): def _conv_r4(self, inputs, normalize_method):
outputs = tf.nn.conv2d( outputs = tf.nn.conv2d(
inputs, inputs,
self.filters, self.quantize_parameter(self.filters),
strides=self.strides, strides=self.strides,
padding=self.padding, padding=self.padding,
dilations=self.dilations) dilations=self.dilations)
......
...@@ -47,7 +47,7 @@ class BaseQDense(base_layers.BaseLayer): ...@@ -47,7 +47,7 @@ class BaseQDense(base_layers.BaseLayer):
assert input_shapes[1] == 1 or input_shapes[2] == 1 assert input_shapes[1] == 1 or input_shapes[2] == 1
self.in_units = input_shapes[-1] self.in_units = input_shapes[-1]
shape = [self.in_units, self.units] shape = [self.in_units, self.units]
self.w = self.add_qweight(shape=shape) self.w = self.add_weight_wrapper(shape=shape)
if self.bias: if self.bias:
self.b = self.add_bias(shape=[self.units]) self.b = self.add_bias(shape=[self.units])
...@@ -55,7 +55,7 @@ class BaseQDense(base_layers.BaseLayer): ...@@ -55,7 +55,7 @@ class BaseQDense(base_layers.BaseLayer):
self.normalization = normalization_layers.BatchNormalization(**kwargs) self.normalization = normalization_layers.BatchNormalization(**kwargs)
def _dense_r2(self, inputs, normalize_method): def _dense_r2(self, inputs, normalize_method):
outputs = tf.matmul(inputs, self.w) outputs = tf.matmul(inputs, self.quantize_parameter(self.w))
if self.bias: if self.bias:
outputs = tf.nn.bias_add(outputs, self.b) outputs = tf.nn.bias_add(outputs, self.b)
if self.normalize: if self.normalize:
...@@ -98,7 +98,9 @@ class BaseQDenseVarLen(BaseQDense): ...@@ -98,7 +98,9 @@ class BaseQDenseVarLen(BaseQDense):
self.normalization = normalization_layers.VarLenBatchNormalization( self.normalization = normalization_layers.VarLenBatchNormalization(
rank=2, **kwargs) rank=2, **kwargs)
def call(self, inputs, mask, inverse_normalizer): def call(self, inputs, mask, inverse_normalizer=None):
if inverse_normalizer is None:
inverse_normalizer = self.inverse_normalizer(mask)
def normalize_method(tensor): def normalize_method(tensor):
maskr2 = tf.reshape(mask, [-1, 1]) maskr2 = tf.reshape(mask, [-1, 1])
......
...@@ -25,7 +25,7 @@ from tf_ops import sequence_string_projection_op_v2 as sspv2 # import seq_flow_l ...@@ -25,7 +25,7 @@ from tf_ops import sequence_string_projection_op_v2 as sspv2 # import seq_flow_l
class ProjectionLayer(base_layers.BaseLayer): class ProjectionLayer(base_layers.BaseLayer):
"""Base class for encoders.""" """Base class for encoders."""
def __init__(self, model_config, mode): def __init__(self, model_config, mode, **kwargs):
"""Create projection.""" """Create projection."""
def _get_params(varname, default_value=None): def _get_params(varname, default_value=None):
...@@ -50,7 +50,7 @@ class ProjectionLayer(base_layers.BaseLayer): ...@@ -50,7 +50,7 @@ class ProjectionLayer(base_layers.BaseLayer):
if mode == base_layers.TRAIN: if mode == base_layers.TRAIN:
_get_params("distortion_probability", 0.0) _get_params("distortion_probability", 0.0)
parameters = base_layers.Parameters(mode, self.quantize) parameters = base_layers.Parameters(mode, self.quantize)
super(ProjectionLayer, self).__init__(parameters=parameters) super(ProjectionLayer, self).__init__(parameters=parameters, **kwargs)
def call(self, inputs): def call(self, inputs):
projection, _, seq_length = ssp.sequence_string_projection( projection, _, seq_length = ssp.sequence_string_projection(
...@@ -74,15 +74,14 @@ class ProjectionLayer(base_layers.BaseLayer): ...@@ -74,15 +74,14 @@ class ProjectionLayer(base_layers.BaseLayer):
batch_size = self.get_batch_dimension(inputs) batch_size = self.get_batch_dimension(inputs)
projection = tf.reshape(projection, projection = tf.reshape(projection,
[batch_size, self.max_seq_len, self.feature_size]) [batch_size, self.max_seq_len, self.feature_size])
if self.mode in modes: projection = self.qrange_tanh(projection)
projection = self.qrange_tanh(projection)
return projection, seq_length return projection, seq_length
class ProjectionLayerPreSegmented(base_layers.BaseLayer): class ProjectionLayerPreSegmented(base_layers.BaseLayer):
"""Base class for encoders.""" """Base class for encoders."""
def __init__(self, model_config, mode): def __init__(self, model_config, mode, **kwargs):
"""Create projection.""" """Create projection."""
def _get_params(varname, default_value=None): def _get_params(varname, default_value=None):
...@@ -101,11 +100,13 @@ class ProjectionLayerPreSegmented(base_layers.BaseLayer): ...@@ -101,11 +100,13 @@ class ProjectionLayerPreSegmented(base_layers.BaseLayer):
if mode == base_layers.TRAIN: if mode == base_layers.TRAIN:
_get_params("distortion_probability", 0.0) _get_params("distortion_probability", 0.0)
parameters = base_layers.Parameters(mode, self.quantize) parameters = base_layers.Parameters(mode, self.quantize)
super(ProjectionLayerPreSegmented, self).__init__(parameters=parameters) super(ProjectionLayerPreSegmented, self).__init__(
parameters=parameters, **kwargs)
def call(self, inputs, sequence_length): def call(self, inputs):
tokens, sequence_length = inputs
projection = sspv2.sequence_string_projection_v2( projection = sspv2.sequence_string_projection_v2(
input=inputs, input=tokens,
sequence_length=sequence_length, sequence_length=sequence_length,
feature_size=self.feature_size, feature_size=self.feature_size,
distortion_probability=self.distortion_probability, distortion_probability=self.distortion_probability,
......
...@@ -27,6 +27,8 @@ class ActivationQuantization(base_layers.BaseLayer): ...@@ -27,6 +27,8 @@ class ActivationQuantization(base_layers.BaseLayer):
self.ema_decay = ema_decay self.ema_decay = ema_decay
self.num_bits = num_bits self.num_bits = num_bits
super(ActivationQuantization, self).__init__(**kwargs) super(ActivationQuantization, self).__init__(**kwargs)
def build(self, input_shapes):
if self.parameters.quantize: if self.parameters.quantize:
self.min_var = self.add_weight( self.min_var = self.add_weight(
"min", initializer=tf.keras.initializers.Zeros(), trainable=False) "min", initializer=tf.keras.initializers.Zeros(), trainable=False)
...@@ -53,6 +55,7 @@ class ActivationQuantization(base_layers.BaseLayer): ...@@ -53,6 +55,7 @@ class ActivationQuantization(base_layers.BaseLayer):
return inputs return inputs
def quantize_using_range(self, inputs): def quantize_using_range(self, inputs):
# This method can only be called after a call to "call" method in this class
if self.parameters.quantize: if self.parameters.quantize:
return tf.quantization.fake_quant_with_min_max_vars( return tf.quantization.fake_quant_with_min_max_vars(
inputs, self.min_var, self.max_var, num_bits=self.num_bits) inputs, self.min_var, self.max_var, num_bits=self.num_bits)
...@@ -66,21 +69,24 @@ class ConcatQuantization(ActivationQuantization): ...@@ -66,21 +69,24 @@ class ConcatQuantization(ActivationQuantization):
self.axis = axis self.axis = axis
super(ConcatQuantization, self).__init__(**kwargs) super(ConcatQuantization, self).__init__(**kwargs)
def reduce_list(self, tensor_list, functor): def _reduce_list(self, tensor_list, functor):
reduce_result = [functor(tensor) for tensor in tensor_list] reduce_result = [functor(tensor) for tensor in tensor_list]
# Toco expects 0.0 to be part of the quantization range. # Toco expects 0.0 to be part of the quantization range.
reduce_result.append(tf.constant(0.0)) reduce_result.append(tf.constant(0.0))
return functor(tf.stack(reduce_result)) return functor(tf.stack(reduce_result))
def call(self, tensors): def call(self, tensors):
# Ignore empty invocations done to build the keras layer.
if tensors is None:
return
if self.parameters.quantize: if self.parameters.quantize:
if self.parameters.mode == base_layers.TRAIN: if self.parameters.mode == base_layers.TRAIN:
# Toco expects 0.0 to be part of the quantization range. # Toco expects 0.0 to be part of the quantization range.
batch_min = self.reduce_list(tensors, tf.reduce_min) batch_min = self._reduce_list(tensors, tf.reduce_min)
min_var = self.assign_moving_average(self.min_var, batch_min, min_var = self.assign_moving_average(self.min_var, batch_min,
self.ema_decay) self.ema_decay)
batch_max = self.reduce_list(tensors, tf.reduce_max) batch_max = self._reduce_list(tensors, tf.reduce_max)
max_var = self.assign_moving_average(self.max_var, batch_max, max_var = self.assign_moving_average(self.max_var, batch_max,
self.ema_decay) self.ema_decay)
else: else:
......
...@@ -27,21 +27,17 @@ def classification_metric(per_example_loss, label_ids, logits): ...@@ -27,21 +27,17 @@ def classification_metric(per_example_loss, label_ids, logits):
} }
THRESHOLDS = [0.5]
def labeling_metric(per_example_loss, label_ids, logits): def labeling_metric(per_example_loss, label_ids, logits):
"""Compute eval metrics.""" """Compute eval metrics."""
scores = tf.math.sigmoid(logits) scores = tf.math.sigmoid(logits)
binary_prediction = tf.math.greater_equal(scores, 0.5)
num_classes = label_ids.get_shape().as_list()[-1] num_classes = label_ids.get_shape().as_list()[-1]
return_dict = {"eval_loss": tf.metrics.mean(per_example_loss)} return_dict = {"eval_loss": tf.metrics.mean(per_example_loss)}
for idx in range(num_classes): for idx in range(num_classes):
return_dict["auc/" + str(idx)] = tf.metrics.auc(label_ids[:, idx], return_dict["auc/" + str(idx)] = tf.metrics.auc(label_ids[:, idx],
scores[:, idx]) scores[:, idx])
return_dict["precision@" + str(THRESHOLDS) + "/" + return_dict["precision/" + str(idx)] = tf.metrics.precision(
str(idx)] = tf.metrics.precision_at_thresholds( label_ids[:, idx], binary_prediction[:, idx])
label_ids[:, idx], scores[:, idx], thresholds=THRESHOLDS) return_dict["recall/" + str(idx)] = tf.metrics.recall(
return_dict["recall@" + str(THRESHOLDS) + "/" + label_ids[:, idx], binary_prediction[:, idx])
str(idx)] = tf.metrics.recall_at_thresholds(
label_ids[:, idx], scores[:, idx], thresholds=THRESHOLDS)
return return_dict return return_dict
...@@ -38,6 +38,7 @@ class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen): ...@@ -38,6 +38,7 @@ class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen):
assert bool(ngram is None) != bool(skip_bigram is None) assert bool(ngram is None) != bool(skip_bigram is None)
self.kwidth = ngram if ngram is not None else (skip_bigram + 2) self.kwidth = ngram if ngram is not None else (skip_bigram + 2)
mask = [1] * self.kwidth mask = [1] * self.kwidth
self.skipgram = skip_bigram is not None
if skip_bigram is not None: if skip_bigram is not None:
mask[1], mask[skip_bigram] = 0, 0 mask[1], mask[skip_bigram] = 0, 0
self.mask = np.array(mask, dtype="float32").reshape((1, self.kwidth, 1, 1)) self.mask = np.array(mask, dtype="float32").reshape((1, self.kwidth, 1, 1))
...@@ -56,10 +57,10 @@ class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen): ...@@ -56,10 +57,10 @@ class PaddedMaskedVarLenConv(conv_layers.EncoderQConvolutionVarLen):
return result * mask + (1 - mask) * self.invalid_value return result * mask + (1 - mask) * self.invalid_value
return result return result
def add_qweight(self, shape, num_bits=8): def quantize_parameter(self, weight, num_bits=8):
weight = super(PaddedMaskedVarLenConv, self).add_qweight( weight = super(PaddedMaskedVarLenConv, self).quantize_parameter(
shape=shape, num_bits=num_bits) weight, num_bits=num_bits)
return weight * tf.convert_to_tensor(self.mask) return weight * tf.convert_to_tensor(self.mask) if self.skipgram else weight
class AttentionPoolReduce(base_layers.BaseLayer): class AttentionPoolReduce(base_layers.BaseLayer):
...@@ -97,8 +98,8 @@ class AttentionPoolReduce(base_layers.BaseLayer): ...@@ -97,8 +98,8 @@ class AttentionPoolReduce(base_layers.BaseLayer):
class Encoder(tf.keras.layers.Layer): class Encoder(tf.keras.layers.Layer):
"""A PRADO keras model.""" """A PRADO keras model."""
def __init__(self, config, mode): def __init__(self, config, mode, **kwargs):
super(Encoder, self).__init__() super(Encoder, self).__init__(**kwargs)
def _get_params(varname, default_value=None): def _get_params(varname, default_value=None):
value = config[varname] if varname in config else default_value value = config[varname] if varname in config else default_value
...@@ -118,7 +119,7 @@ class Encoder(tf.keras.layers.Layer): ...@@ -118,7 +119,7 @@ class Encoder(tf.keras.layers.Layer):
_get_params("skip1bigram_channels", 0) _get_params("skip1bigram_channels", 0)
_get_params("skip2bigram_channels", 0) _get_params("skip2bigram_channels", 0)
_get_params("network_regularizer_scale", 1e-4) _get_params("network_regularizer_scale", 1e-4)
_get_params("keep_prob", 0.5) _get_params("keep_prob", 1.0)
self.num_classes = len(self.labels) self.num_classes = len(self.labels)
self.parameters = base_layers.Parameters( self.parameters = base_layers.Parameters(
...@@ -129,7 +130,6 @@ class Encoder(tf.keras.layers.Layer): ...@@ -129,7 +130,6 @@ class Encoder(tf.keras.layers.Layer):
units=self.embedding_size, rank=3, parameters=self.parameters) units=self.embedding_size, rank=3, parameters=self.parameters)
self.attention_fc = dense_layers.BaseQDenseVarLen( self.attention_fc = dense_layers.BaseQDenseVarLen(
units=self.embedding_size, rank=3, parameters=self.parameters) units=self.embedding_size, rank=3, parameters=self.parameters)
self.dropout = tf.keras.layers.Dropout(rate=(1 - self.keep_prob))
self.parameters = copy.copy(self.parameters) self.parameters = copy.copy(self.parameters)
self.parameters.regularizer_scale = self.network_regularizer_scale self.parameters.regularizer_scale = self.network_regularizer_scale
...@@ -161,8 +161,8 @@ class Encoder(tf.keras.layers.Layer): ...@@ -161,8 +161,8 @@ class Encoder(tf.keras.layers.Layer):
def _apply_fc_dropout(self, layer, inputs, mask, inverse_normalizer): def _apply_fc_dropout(self, layer, inputs, mask, inverse_normalizer):
outputs = layer(inputs, mask, inverse_normalizer) outputs = layer(inputs, mask, inverse_normalizer)
if self.parameters.mode == base_layers.TRAIN: if self.parameters.mode == base_layers.TRAIN and self.keep_prob < 1.0:
return self.dropout(outputs) return tf.nn.dropout(outputs, rate=(1 - self.keep_prob))
return outputs return outputs
def call(self, projection, seq_length): def call(self, projection, seq_length):
...@@ -178,14 +178,17 @@ class Encoder(tf.keras.layers.Layer): ...@@ -178,14 +178,17 @@ class Encoder(tf.keras.layers.Layer):
layer(values_in, attention_in, maskr3, inverse_normalizer) layer(values_in, attention_in, maskr3, inverse_normalizer)
for layer in self.attention_pool_layers for layer in self.attention_pool_layers
] ]
assert tensors, "no ngram channels have been configured"
pre_logits = self.concat_quantizer(tensors) pre_logits = self.concat_quantizer(tensors)
return self.final_fc(pre_logits) return self.final_fc(pre_logits)
class Model(Encoder): class Model(Encoder):
def __init__(self, config, mode): def __init__(self, config, mode, **kwargs):
super(Model, self).__init__(config, mode) super(Model, self).__init__(config, mode, **kwargs)
self.projection = projection_layers.ProjectionLayer(config, mode) self.projection = projection_layers.ProjectionLayer(config, mode)
def call(self, inputs): def call(self, inputs):
......
...@@ -93,8 +93,8 @@ py_binary( ...@@ -93,8 +93,8 @@ py_binary(
# Expect numpy installed # Expect numpy installed
# package TFLite flex delegate # package TFLite flex delegate
# package TFLite interpreter # package TFLite interpreter
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:ngrams_op_resolver", "@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:_pywrap_ngrams_op_resolver",
"@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:whitespace_tokenizer_op_resolver", "@org_tflite_support//tensorflow_lite_support/custom_ops/kernel:_pywrap_whitespace_tokenizer_op_resolver",
# Expect tensorflow text installed # Expect tensorflow text installed
], ],
) )
......
...@@ -10,15 +10,6 @@ package( ...@@ -10,15 +10,6 @@ package(
], ],
) )
py_library(
name = "text_projection",
srcs = ["text_projection.py"],
srcs_version = "PY3",
deps = [
":sequence_string_projection_op_py",
],
)
cc_library( cc_library(
name = "sequence_string_projection_op", name = "sequence_string_projection_op",
srcs = [ srcs = [
...@@ -30,7 +21,6 @@ cc_library( ...@@ -30,7 +21,6 @@ cc_library(
":projection_util", ":projection_util",
":text_distorter", ":text_distorter",
"@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/random",
"@tensorflow_includes//:includes", "@tensorflow_includes//:includes",
"@tensorflow_solib//:framework_lib", "@tensorflow_solib//:framework_lib",
], ],
...@@ -71,11 +61,9 @@ cc_library( ...@@ -71,11 +61,9 @@ cc_library(
srcs = ["text_distorter.cc"], srcs = ["text_distorter.cc"],
hdrs = ["text_distorter.h"], hdrs = ["text_distorter.h"],
deps = [ deps = [
"@com_google_absl//absl/strings",
"@icu4c", "@icu4c",
"@tensorflow_includes//:includes", "@tensorflow_includes//:includes",
"@tensorflow_solib//:framework_lib", "@tensorflow_solib//:framework_lib",
"@utf_archive//:utf",
], ],
) )
...@@ -102,7 +90,6 @@ cc_library( ...@@ -102,7 +90,6 @@ cc_library(
"@tensorflow_includes//:includes", "@tensorflow_includes//:includes",
"@tensorflow_solib//:framework_lib", "@tensorflow_solib//:framework_lib",
"@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/random",
], ],
alwayslink = 1, alwayslink = 1,
) )
......
...@@ -79,7 +79,7 @@ std::string ContractToken(const char* input_ptr, size_t len, size_t num_chars) { ...@@ -79,7 +79,7 @@ std::string ContractToken(const char* input_ptr, size_t len, size_t num_chars) {
// Count how many times this pattern appeared. // Count how many times this pattern appeared.
int num_cur_patterns = 0; int num_cur_patterns = 0;
if (cur_pattern.find(" ") == std::string::npos && !IsDigit(cur_pattern)) { if (cur_pattern.find(' ') == std::string::npos && !IsDigit(cur_pattern)) {
num_cur_patterns = num_cur_patterns =
GetNumPattern(char_tokens, i + num_chars, num_chars, cur_pattern); GetNumPattern(char_tokens, i + num_chars, num_chars, cur_pattern);
} }
......
...@@ -25,25 +25,28 @@ limitations under the License. ...@@ -25,25 +25,28 @@ limitations under the License.
namespace { namespace {
constexpr int kInvalid = -1; constexpr int kInvalid = -1;
constexpr char kSpace = ' '; constexpr char kSpace = ' ';
} // namespace
// A HashEngine that uses MurmurHash to convert text to hashcodes.
class MurmurHash : public HashEngine { class MurmurHash : public HashEngine {
public: public:
void GetHashCodes(const std::string& word, std::vector<uint64_t>* hash_codes, std::vector<uint64_t> GetHashCodes(const std::string& word,
int feature_size) override { int feature_size) override {
std::vector<uint64_t> hash_codes;
hash_codes.reserve(2 * (feature_size / 64 + 1));
uint64_t hash_low = 0; uint64_t hash_low = 0;
uint64_t hash_high = 0; uint64_t hash_high = 0;
for (int i = 0; i < feature_size; i += 64) { for (int i = 0; i < feature_size; i += 64) {
if (i == 0) { if (i == 0) {
auto hash = MurmurHash128(word.c_str(), word.size()); auto hash = MurmurHash128(word.data(), word.size());
hash_low = hash.first; hash_low = hash.first;
hash_high = hash.second; hash_high = hash.second;
} else { } else {
GetMoreBits(hash_low, hash_high, &hash_low, &hash_high); GetMoreBits(hash_low, hash_high, &hash_low, &hash_high);
} }
hash_codes->push_back(hash_low); hash_codes.push_back(hash_low);
hash_codes->push_back(hash_high); hash_codes.push_back(hash_high);
} }
return hash_codes;
} }
private: private:
...@@ -78,7 +81,7 @@ class MurmurHash : public HashEngine { ...@@ -78,7 +81,7 @@ class MurmurHash : public HashEngine {
std::pair<uint64_t, uint64_t> MurmurHash128(const char* buf, std::pair<uint64_t, uint64_t> MurmurHash128(const char* buf,
const size_t len) { const size_t len) {
// Initialize the hashing value. // Initialize the hashing value.
uint64_t hash = len * kMul; uint64_t hash1 = len * kMul;
// hash2 will be xored by hash during the hash computation iterations. // hash2 will be xored by hash during the hash computation iterations.
// In the end we use an alternative mixture multiplier for mixing // In the end we use an alternative mixture multiplier for mixing
// the bits in hash2. // the bits in hash2.
...@@ -90,34 +93,38 @@ class MurmurHash : public HashEngine { ...@@ -90,34 +93,38 @@ class MurmurHash : public HashEngine {
for (const char* p = buf; p != end; p += 8) { for (const char* p = buf; p != end; p += 8) {
// Manually unrolling this loop 2x did not help on Intel Core 2. // Manually unrolling this loop 2x did not help on Intel Core 2.
hash = MurmurStep(hash, Load64VariableLength(p, 8)); hash1 = MurmurStep(hash1, Load64VariableLength(p, 8));
hash2 ^= hash; hash2 ^= hash1;
} }
if ((len & 0x7) != 0) { if ((len & 0x7) != 0) {
const uint64_t data = Load64VariableLength(end, len & 0x7); const uint64_t data = Load64VariableLength(end, len & 0x7);
hash ^= data; hash1 ^= data;
hash *= kMul; hash1 *= kMul;
hash2 ^= hash; hash2 ^= hash1;
} }
hash = ShiftMix(hash) * kMul; hash1 = ShiftMix(hash1) * kMul;
hash2 ^= hash; hash2 ^= hash1;
hash = ShiftMix(hash); hash1 = ShiftMix(hash1);
// mul2 is a prime just above golden ratio. mul2 is used to ensure that the // mul2 is a prime just above golden ratio. mul2 is used to ensure that the
// impact of the last few bytes is different to the upper and lower 64 bits. // impact of the last few bytes is different to the upper and lower 64 bits.
hash2 = ShiftMix(hash2 * kMul2) * kMul2; hash2 = ShiftMix(hash2 * kMul2) * kMul2;
return std::make_pair(hash, hash2); return {hash1, hash2};
} }
}; };
// A HashEngine that uses a prefix and suffix preserving hash to convert text
// to hashcodes.
class XFixHash : public HashEngine { class XFixHash : public HashEngine {
public: public:
explicit XFixHash(int bits_per_char) explicit XFixHash(int bits_per_char)
: bits_per_char_(bits_per_char), bit_mask_((1ULL << bits_per_char) - 1) {} : bits_per_char_(bits_per_char), bit_mask_((1ULL << bits_per_char) - 1) {}
void GetHashCodes(const std::string& word, std::vector<uint64_t>* hash_codes, std::vector<uint64_t> GetHashCodes(const std::string& word,
int feature_size) override { int feature_size) override {
std::vector<uint64_t> hash_codes;
hash_codes.reserve(2 * (feature_size / 64 + 1));
auto token_ptr = reinterpret_cast<const uint8_t*>(word.c_str()); auto token_ptr = reinterpret_cast<const uint8_t*>(word.c_str());
size_t token_size = word.size(); size_t token_size = word.size();
int token_idx = 0; int token_idx = 0;
...@@ -134,9 +141,10 @@ class XFixHash : public HashEngine { ...@@ -134,9 +141,10 @@ class XFixHash : public HashEngine {
hash_low = (hash_low << bits_per_char_) | (frhash & bit_mask_); hash_low = (hash_low << bits_per_char_) | (frhash & bit_mask_);
hash_high = (hash_high << bits_per_char_) | (brhash & bit_mask_); hash_high = (hash_high << bits_per_char_) | (brhash & bit_mask_);
} }
hash_codes->push_back(hash_low); hash_codes.push_back(hash_low);
hash_codes->push_back(hash_high); hash_codes.push_back(hash_high);
} }
return hash_codes;
} }
private: private:
...@@ -146,6 +154,8 @@ class XFixHash : public HashEngine { ...@@ -146,6 +154,8 @@ class XFixHash : public HashEngine {
const uint64_t bit_mask_; const uint64_t bit_mask_;
}; };
// A HashEngine that performs a position preserving unicode level hashing to
// convert text to hashcodes.
class UnicodeHash : public HashEngine { class UnicodeHash : public HashEngine {
public: public:
// bits_per_unicode should be a divisor of 64. // bits_per_unicode should be a divisor of 64.
...@@ -154,8 +164,10 @@ class UnicodeHash : public HashEngine { ...@@ -154,8 +164,10 @@ class UnicodeHash : public HashEngine {
bit_mask_(((1ULL << bits_per_unicode) - 1) << (64 - bits_per_unicode)) { bit_mask_(((1ULL << bits_per_unicode) - 1) << (64 - bits_per_unicode)) {
} }
void GetHashCodes(const std::string& word, std::vector<uint64_t>* hash_codes, std::vector<uint64_t> GetHashCodes(const std::string& word,
int feature_size) override { int feature_size) override {
std::vector<uint64_t> hash_codes;
hash_codes.reserve(2 * (feature_size / 64 + 1));
auto word_ptr = word.c_str(); auto word_ptr = word.c_str();
int utflength = utflen(const_cast<char*>(word_ptr)); int utflength = utflen(const_cast<char*>(word_ptr));
// Both `feature_size` and `bits_per_unicode` are bit lengths. // Both `feature_size` and `bits_per_unicode` are bit lengths.
...@@ -187,8 +199,9 @@ class UnicodeHash : public HashEngine { ...@@ -187,8 +199,9 @@ class UnicodeHash : public HashEngine {
hash = hash >> bits_per_unicode_; hash = hash >> bits_per_unicode_;
} }
} }
hash_codes->push_back(hash); hash_codes.push_back(hash);
} }
return hash_codes;
} }
private: private:
...@@ -197,6 +210,8 @@ class UnicodeHash : public HashEngine { ...@@ -197,6 +210,8 @@ class UnicodeHash : public HashEngine {
const uint64_t bit_mask_; const uint64_t bit_mask_;
}; };
} // namespace
bool Hasher::SupportedHashType(const std::string& hash_type) { bool Hasher::SupportedHashType(const std::string& hash_type) {
std::unordered_set<std::string> supported({kMurmurHash, kUnicodeHash8, std::unordered_set<std::string> supported({kMurmurHash, kUnicodeHash8,
kUnicodeHash16, kXfixHash8, kUnicodeHash16, kXfixHash8,
...@@ -225,7 +240,7 @@ Hasher* Hasher::CreateHasher(int feature_size, const std::string& hash_type) { ...@@ -225,7 +240,7 @@ Hasher* Hasher::CreateHasher(int feature_size, const std::string& hash_type) {
Hasher::Hasher(int feature_size, HashEngine* hash_engine) Hasher::Hasher(int feature_size, HashEngine* hash_engine)
: feature_size_(feature_size), hash_engine_(hash_engine) { : feature_size_(feature_size), hash_engine_(hash_engine) {
hash_engine_->GetHashCodes(empty_string_, &null_hash_codes_, feature_size_); null_hash_codes_ = hash_engine_->GetHashCodes(empty_string_, feature_size_);
} }
std::string ProjectionUnicodeHandler::LowerCaseUTF8WithSupportedUnicodes( std::string ProjectionUnicodeHandler::LowerCaseUTF8WithSupportedUnicodes(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment