Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
b2c3fee4
Commit
b2c3fee4
authored
Mar 04, 2021
by
A. Unique TensorFlower
Browse files
Comment/message updates: b/181866850 tracks next steps after b/149576200.
PiperOrigin-RevId: 360932205
parent
00633c22
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
10 deletions
+9
-10
official/nlp/modeling/layers/text_layers.py
official/nlp/modeling/layers/text_layers.py
+3
-4
official/nlp/tools/export_tfhub.py
official/nlp/tools/export_tfhub.py
+1
-1
official/nlp/tools/export_tfhub_lib_test.py
official/nlp/tools/export_tfhub_lib_test.py
+5
-5
No files found.
official/nlp/modeling/layers/text_layers.py
View file @
b2c3fee4
...
@@ -330,8 +330,7 @@ class SentencepieceTokenizer(tf.keras.layers.Layer):
...
@@ -330,8 +330,7 @@ class SentencepieceTokenizer(tf.keras.layers.Layer):
if
bool
(
model_file_path
)
==
bool
(
model_serialized_proto
):
if
bool
(
model_file_path
)
==
bool
(
model_serialized_proto
):
raise
ValueError
(
"Exact one of `model_file_path` and "
raise
ValueError
(
"Exact one of `model_file_path` and "
"`model_serialized_proto` can be specified."
)
"`model_serialized_proto` can be specified."
)
# TODO(chendouble): After b/149576200 is resolved, support
# TODO(b/181866850): Support tokenize_with_offsets for strip_diacritics=True
# tokenize_with_offsets when strip_diacritics is True,
if
tokenize_with_offsets
and
strip_diacritics
:
if
tokenize_with_offsets
and
strip_diacritics
:
raise
ValueError
(
"`tokenize_with_offsets` is not supported when "
raise
ValueError
(
"`tokenize_with_offsets` is not supported when "
"`strip_diacritics` is set to True."
)
"`strip_diacritics` is set to True."
)
...
@@ -378,8 +377,8 @@ class SentencepieceTokenizer(tf.keras.layers.Layer):
...
@@ -378,8 +377,8 @@ class SentencepieceTokenizer(tf.keras.layers.Layer):
"""
"""
if
self
.
_strip_diacritics
:
if
self
.
_strip_diacritics
:
if
self
.
tokenize_with_offsets
:
if
self
.
tokenize_with_offsets
:
raise
ValueError
(
"`tokenize_with_offsets` is not supported yet
due to
"
raise
ValueError
(
"`tokenize_with_offsets` is not supported yet
when
"
"
b/149576200, when
`strip_diacritics` is set to True."
)
"`strip_diacritics` is set to True
(b/181866850)
."
)
inputs
=
text
.
normalize_utf8
(
inputs
,
"NFD"
)
inputs
=
text
.
normalize_utf8
(
inputs
,
"NFD"
)
inputs
=
tf
.
strings
.
regex_replace
(
inputs
,
r
"\p{Mn}"
,
""
)
inputs
=
tf
.
strings
.
regex_replace
(
inputs
,
r
"\p{Mn}"
,
""
)
...
...
official/nlp/tools/export_tfhub.py
View file @
b2c3fee4
...
@@ -145,7 +145,7 @@ flags.DEFINE_integer(
...
@@ -145,7 +145,7 @@ flags.DEFINE_integer(
"sequence length for the bert_pack_inputs subobject."
"sequence length for the bert_pack_inputs subobject."
"Needed for --export_type preprocessing."
)
"Needed for --export_type preprocessing."
)
flags
.
DEFINE_bool
(
flags
.
DEFINE_bool
(
"tokenize_with_offsets"
,
False
,
#
Broken by b/149576200.
"tokenize_with_offsets"
,
False
,
#
TODO(b/181866850)
"Whether to export a .tokenize_with_offsets subobject for "
"Whether to export a .tokenize_with_offsets subobject for "
"--export_type preprocessing."
)
"--export_type preprocessing."
)
flags
.
DEFINE_multi_string
(
flags
.
DEFINE_multi_string
(
...
...
official/nlp/tools/export_tfhub_lib_test.py
View file @
b2c3fee4
...
@@ -564,7 +564,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -564,7 +564,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
def
test_exported_callables
(
self
,
use_sp_model
):
def
test_exported_callables
(
self
,
use_sp_model
):
preprocess
=
tf
.
saved_model
.
load
(
self
.
_do_export
(
preprocess
=
tf
.
saved_model
.
load
(
self
.
_do_export
(
[
"d"
,
"ef"
,
"abc"
,
"xy"
],
do_lower_case
=
True
,
[
"d"
,
"ef"
,
"abc"
,
"xy"
],
do_lower_case
=
True
,
tokenize_with_offsets
=
not
use_sp_model
,
# TODO(b/1
4957620
0): drop this.
tokenize_with_offsets
=
not
use_sp_model
,
# TODO(b/1
8186685
0): drop this.
experimental_disable_assert
=
True
,
# TODO(b/175369555): drop this.
experimental_disable_assert
=
True
,
# TODO(b/175369555): drop this.
use_sp_model
=
use_sp_model
))
use_sp_model
=
use_sp_model
))
...
@@ -590,7 +590,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -590,7 +590,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
# .tokenize_with_offsets()
# .tokenize_with_offsets()
if
use_sp_model
:
if
use_sp_model
:
# TODO(b/1
4957620
0): Enable tokenize_with_offsets when it works and test.
# TODO(b/1
8186685
0): Enable tokenize_with_offsets when it works and test.
self
.
assertFalse
(
hasattr
(
preprocess
,
"tokenize_with_offsets"
))
self
.
assertFalse
(
hasattr
(
preprocess
,
"tokenize_with_offsets"
))
else
:
else
:
token_ids
,
start_offsets
,
limit_offsets
=
(
token_ids
,
start_offsets
,
limit_offsets
=
(
...
@@ -691,7 +691,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -691,7 +691,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
def
test_shapes
(
self
,
use_sp_model
):
def
test_shapes
(
self
,
use_sp_model
):
preprocess
=
tf
.
saved_model
.
load
(
self
.
_do_export
(
preprocess
=
tf
.
saved_model
.
load
(
self
.
_do_export
(
[
"abc"
,
"def"
],
do_lower_case
=
True
,
[
"abc"
,
"def"
],
do_lower_case
=
True
,
tokenize_with_offsets
=
not
use_sp_model
,
# TODO(b/1
4957620
0): drop this.
tokenize_with_offsets
=
not
use_sp_model
,
# TODO(b/1
8186685
0): drop this.
experimental_disable_assert
=
True
,
# TODO(b/175369555): drop this.
experimental_disable_assert
=
True
,
# TODO(b/175369555): drop this.
use_sp_model
=
use_sp_model
))
use_sp_model
=
use_sp_model
))
...
@@ -711,7 +711,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -711,7 +711,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
tf
.
TensorSpec
([
batch_size
],
tf
.
string
)),
tf
.
TensorSpec
([
batch_size
],
tf
.
string
)),
token_out_shape
,
token_out_shape
,
"with batch_size=%s"
%
batch_size
)
"with batch_size=%s"
%
batch_size
)
# TODO(b/1
4957620
0): Enable tokenize_with_offsets when it works and test.
# TODO(b/1
8186685
0): Enable tokenize_with_offsets when it works and test.
if
use_sp_model
:
if
use_sp_model
:
self
.
assertFalse
(
hasattr
(
preprocess
,
"tokenize_with_offsets"
))
self
.
assertFalse
(
hasattr
(
preprocess
,
"tokenize_with_offsets"
))
else
:
else
:
...
@@ -771,7 +771,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -771,7 +771,7 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
"quick"
,
"fox"
,
"lazy"
,
"dog"
]
"quick"
,
"fox"
,
"lazy"
,
"dog"
]
preprocess
=
tf
.
saved_model
.
load
(
self
.
_do_export
(
preprocess
=
tf
.
saved_model
.
load
(
self
.
_do_export
(
non_special_tokens
,
do_lower_case
=
True
,
non_special_tokens
,
do_lower_case
=
True
,
tokenize_with_offsets
=
use_bert
,
# TODO(b/1
4957620
0): drop this.
tokenize_with_offsets
=
use_bert
,
# TODO(b/1
8186685
0): drop this.
experimental_disable_assert
=
True
,
# TODO(b/175369555): drop this.
experimental_disable_assert
=
True
,
# TODO(b/175369555): drop this.
add_mask_token
=
True
,
use_sp_model
=
not
use_bert
))
add_mask_token
=
True
,
use_sp_model
=
not
use_bert
))
vocab_size
=
len
(
non_special_tokens
)
+
(
5
if
use_bert
else
7
)
vocab_size
=
len
(
non_special_tokens
)
+
(
5
if
use_bert
else
7
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment