Commit b2c3fee4 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Comment/message updates: b/181866850 tracks next steps after b/149576200.

PiperOrigin-RevId: 360932205
parent 00633c22
......@@ -330,8 +330,7 @@ class SentencepieceTokenizer(tf.keras.layers.Layer):
if bool(model_file_path) == bool(model_serialized_proto):
raise ValueError("Exact one of `model_file_path` and "
"`model_serialized_proto` can be specified.")
# TODO(chendouble): After b/149576200 is resolved, support
# tokenize_with_offsets when strip_diacritics is True,
# TODO(b/181866850): Support tokenize_with_offsets for strip_diacritics=True
if tokenize_with_offsets and strip_diacritics:
raise ValueError("`tokenize_with_offsets` is not supported when "
"`strip_diacritics` is set to True.")
......@@ -378,8 +377,8 @@ class SentencepieceTokenizer(tf.keras.layers.Layer):
"""
if self._strip_diacritics:
if self.tokenize_with_offsets:
raise ValueError("`tokenize_with_offsets` is not supported yet due to "
"b/149576200, when `strip_diacritics` is set to True.")
raise ValueError("`tokenize_with_offsets` is not supported yet when "
"`strip_diacritics` is set to True (b/181866850).")
inputs = text.normalize_utf8(inputs, "NFD")
inputs = tf.strings.regex_replace(inputs, r"\p{Mn}", "")
......
......@@ -145,7 +145,7 @@ flags.DEFINE_integer(
"sequence length for the bert_pack_inputs subobject."
"Needed for --export_type preprocessing.")
flags.DEFINE_bool(
"tokenize_with_offsets", False, # Broken by b/149576200.
"tokenize_with_offsets", False, # TODO(b/181866850)
"Whether to export a .tokenize_with_offsets subobject for "
"--export_type preprocessing.")
flags.DEFINE_multi_string(
......
......@@ -564,7 +564,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
def test_exported_callables(self, use_sp_model):
preprocess = tf.saved_model.load(self._do_export(
["d", "ef", "abc", "xy"], do_lower_case=True,
tokenize_with_offsets=not use_sp_model, # TODO(b/149576200): drop this.
tokenize_with_offsets=not use_sp_model, # TODO(b/181866850): drop this.
experimental_disable_assert=True, # TODO(b/175369555): drop this.
use_sp_model=use_sp_model))
......@@ -590,7 +590,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
# .tokenize_with_offsets()
if use_sp_model:
# TODO(b/149576200): Enable tokenize_with_offsets when it works and test.
# TODO(b/181866850): Enable tokenize_with_offsets when it works and test.
self.assertFalse(hasattr(preprocess, "tokenize_with_offsets"))
else:
token_ids, start_offsets, limit_offsets = (
......@@ -691,7 +691,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
def test_shapes(self, use_sp_model):
preprocess = tf.saved_model.load(self._do_export(
["abc", "def"], do_lower_case=True,
tokenize_with_offsets=not use_sp_model, # TODO(b/149576200): drop this.
tokenize_with_offsets=not use_sp_model, # TODO(b/181866850): drop this.
experimental_disable_assert=True, # TODO(b/175369555): drop this.
use_sp_model=use_sp_model))
......@@ -711,7 +711,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
tf.TensorSpec([batch_size], tf.string)),
token_out_shape,
"with batch_size=%s" % batch_size)
# TODO(b/149576200): Enable tokenize_with_offsets when it works and test.
# TODO(b/181866850): Enable tokenize_with_offsets when it works and test.
if use_sp_model:
self.assertFalse(hasattr(preprocess, "tokenize_with_offsets"))
else:
......@@ -771,7 +771,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
"quick", "fox", "lazy", "dog"]
preprocess = tf.saved_model.load(self._do_export(
non_special_tokens, do_lower_case=True,
tokenize_with_offsets=use_bert, # TODO(b/149576200): drop this.
tokenize_with_offsets=use_bert, # TODO(b/181866850): drop this.
experimental_disable_assert=True, # TODO(b/175369555): drop this.
add_mask_token=True, use_sp_model=not use_bert))
vocab_size = len(non_special_tokens) + (5 if use_bert else 7)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment