Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e55c1f42
Commit
e55c1f42
authored
Feb 07, 2020
by
Chen Chen
Committed by
A. Unique TensorFlower
Feb 07, 2020
Browse files
Split export_tfhub.py and add export_albert_tfhub to albert folder.
PiperOrigin-RevId: 293883773
parent
cfb2553d
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
189 additions
and
84 deletions
+189
-84
official/nlp/albert/export_albert_tfhub.py
official/nlp/albert/export_albert_tfhub.py
+89
-0
official/nlp/albert/export_albert_tfhub_test.py
official/nlp/albert/export_albert_tfhub_test.py
+90
-0
official/nlp/bert/export_tfhub.py
official/nlp/bert/export_tfhub.py
+10
-33
official/nlp/bert/export_tfhub_test.py
official/nlp/bert/export_tfhub_test.py
+0
-51
No files found.
official/nlp/albert/export_albert_tfhub.py
0 → 100644
View file @
e55c1f42
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A script to export the ALBERT core model as a TF-Hub SavedModel."""
from
__future__
import
absolute_import
from
__future__
import
division
# from __future__ import google_type_annotations
from
__future__
import
print_function
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
from
typing
import
Text
from
official.nlp
import
bert_modeling
from
official.nlp.bert
import
bert_models
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
"albert_config_file"
,
None
,
"Albert configuration file to define core albert layers."
)
flags
.
DEFINE_string
(
"model_checkpoint_path"
,
None
,
"File path to TF model checkpoint."
)
flags
.
DEFINE_string
(
"export_path"
,
None
,
"TF-Hub SavedModel destination path."
)
flags
.
DEFINE_string
(
"sp_model_file"
,
None
,
"The sentence piece model file that the ALBERT model was trained on."
)
def
create_albert_model
(
albert_config
:
bert_modeling
.
AlbertConfig
)
->
tf
.
keras
.
Model
:
"""Creates an ALBERT keras core model from ALBERT configuration.
Args:
albert_config: An `AlbertConfig` to create the core model.
Returns:
A keras model.
"""
# Adds input layers just as placeholders.
input_word_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
"input_word_ids"
)
input_mask
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
"input_mask"
)
input_type_ids
=
tf
.
keras
.
layers
.
Input
(
shape
=
(
None
,),
dtype
=
tf
.
int32
,
name
=
"input_type_ids"
)
transformer_encoder
=
bert_models
.
get_transformer_encoder
(
albert_config
,
sequence_length
=
None
,
float_dtype
=
tf
.
float32
)
sequence_output
,
pooled_output
=
transformer_encoder
(
[
input_word_ids
,
input_mask
,
input_type_ids
])
# To keep consistent with legacy hub modules, the outputs are
# "pooled_output" and "sequence_output".
return
tf
.
keras
.
Model
(
inputs
=
[
input_word_ids
,
input_mask
,
input_type_ids
],
outputs
=
[
pooled_output
,
sequence_output
]),
transformer_encoder
def
export_albert_tfhub
(
albert_config
:
bert_modeling
.
AlbertConfig
,
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
sp_model_file
:
Text
):
"""Restores a tf.keras.Model and saves for TF-Hub."""
core_model
,
encoder
=
create_albert_model
(
albert_config
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
)
checkpoint
.
restore
(
model_checkpoint_path
).
assert_consumed
()
core_model
.
sp_model_file
=
tf
.
saved_model
.
Asset
(
sp_model_file
)
core_model
.
save
(
hub_destination
,
include_optimizer
=
False
,
save_format
=
"tf"
)
def
main
(
_
):
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
albert_config
=
bert_modeling
.
AlbertConfig
.
from_json_file
(
FLAGS
.
albert_config_file
)
export_albert_tfhub
(
albert_config
,
FLAGS
.
model_checkpoint_path
,
FLAGS
.
export_path
,
FLAGS
.
sp_model_file
)
if
__name__
==
"__main__"
:
app
.
run
(
main
)
official/nlp/albert/export_albert_tfhub_test.py
0 → 100644
View file @
e55c1f42
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests official.nlp.albert.export_albert_tfhub."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow_hub
as
hub
from
official.nlp
import
bert_modeling
from
official.nlp.albert
import
export_albert_tfhub
class
ExportAlbertTfhubTest
(
tf
.
test
.
TestCase
):
def
test_export_albert_tfhub
(
self
):
# Exports a savedmodel for TF-Hub
albert_config
=
bert_modeling
.
AlbertConfig
(
vocab_size
=
100
,
embedding_size
=
8
,
hidden_size
=
16
,
intermediate_size
=
32
,
max_position_embeddings
=
128
,
num_attention_heads
=
2
,
num_hidden_layers
=
1
)
bert_model
,
encoder
=
export_albert_tfhub
.
create_albert_model
(
albert_config
)
model_checkpoint_dir
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"checkpoint"
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
)
checkpoint
.
save
(
os
.
path
.
join
(
model_checkpoint_dir
,
"test"
))
model_checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
model_checkpoint_dir
)
sp_model_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"sp_tokenizer.model"
)
with
tf
.
io
.
gfile
.
GFile
(
sp_model_file
,
"w"
)
as
f
:
f
.
write
(
"dummy content"
)
hub_destination
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"hub"
)
export_albert_tfhub
.
export_albert_tfhub
(
albert_config
,
model_checkpoint_path
,
hub_destination
,
sp_model_file
=
sp_model_file
)
# Restores a hub KerasLayer.
hub_layer
=
hub
.
KerasLayer
(
hub_destination
,
trainable
=
True
)
if
hasattr
(
hub_layer
,
"resolved_object"
):
with
tf
.
io
.
gfile
.
GFile
(
hub_layer
.
resolved_object
.
sp_model_file
.
asset_path
.
numpy
())
as
f
:
self
.
assertEqual
(
"dummy content"
,
f
.
read
())
# Checks the hub KerasLayer.
for
source_weight
,
hub_weight
in
zip
(
bert_model
.
trainable_weights
,
hub_layer
.
trainable_weights
):
self
.
assertAllClose
(
source_weight
.
numpy
(),
hub_weight
.
numpy
())
dummy_ids
=
np
.
zeros
((
2
,
10
),
dtype
=
np
.
int32
)
hub_outputs
=
hub_layer
([
dummy_ids
,
dummy_ids
,
dummy_ids
])
source_outputs
=
bert_model
([
dummy_ids
,
dummy_ids
,
dummy_ids
])
# The outputs of hub module are "pooled_output" and "sequence_output",
# while the outputs of encoder is in reversed order, i.e.,
# "sequence_output" and "pooled_output".
encoder_outputs
=
reversed
(
encoder
([
dummy_ids
,
dummy_ids
,
dummy_ids
]))
self
.
assertEqual
(
hub_outputs
[
0
].
shape
,
(
2
,
16
))
self
.
assertEqual
(
hub_outputs
[
1
].
shape
,
(
2
,
10
,
16
))
for
source_output
,
hub_output
,
encoder_output
in
zip
(
source_outputs
,
hub_outputs
,
encoder_outputs
):
self
.
assertAllClose
(
source_output
.
numpy
(),
hub_output
.
numpy
())
self
.
assertAllClose
(
source_output
.
numpy
(),
encoder_output
.
numpy
())
if
__name__
==
"__main__"
:
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
tf
.
test
.
main
()
official/nlp/bert/export_tfhub.py
View file @
e55c1f42
...
...
@@ -21,7 +21,7 @@ from __future__ import print_function
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
from
typing
import
Optional
,
Text
from
typing
import
Text
from
official.nlp
import
bert_modeling
from
official.nlp.bert
import
bert_models
...
...
@@ -35,20 +35,13 @@ flags.DEFINE_string("model_checkpoint_path", None,
flags
.
DEFINE_string
(
"export_path"
,
None
,
"TF-Hub SavedModel destination path."
)
flags
.
DEFINE_string
(
"vocab_file"
,
None
,
"The vocabulary file that the BERT model was trained on."
)
flags
.
DEFINE_string
(
"sp_model_file"
,
None
,
"The sentence piece model file that the ALBERT model was "
"trained on."
)
flags
.
DEFINE_enum
(
"model_type"
,
"bert"
,
[
"bert"
,
"albert"
],
"Specifies the type of the model. "
"If 'bert', will use canonical BERT; if 'albert', will use ALBERT model."
)
def
create_bert_model
(
bert_config
:
bert_modeling
.
BertConfig
):
def
create_bert_model
(
bert_config
:
bert_modeling
.
BertConfig
)
->
tf
.
keras
.
Model
:
"""Creates a BERT keras core model from BERT configuration.
Args:
bert_config: A BertConfig` to create the core model.
bert_config: A
`
BertConfig` to create the core model.
Returns:
A keras model.
...
...
@@ -72,23 +65,12 @@ def create_bert_model(bert_config: bert_modeling.BertConfig):
def
export_bert_tfhub
(
bert_config
:
bert_modeling
.
BertConfig
,
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
vocab_file
:
Optional
[
Text
]
=
None
,
sp_model_file
:
Optional
[
Text
]
=
None
):
model_checkpoint_path
:
Text
,
hub_destination
:
Text
,
vocab_file
:
Text
):
"""Restores a tf.keras.Model and saves for TF-Hub."""
core_model
,
encoder
=
create_bert_model
(
bert_config
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
)
checkpoint
.
restore
(
model_checkpoint_path
).
assert_consumed
()
if
isinstance
(
bert_config
,
bert_modeling
.
AlbertConfig
):
if
not
sp_model_file
:
raise
ValueError
(
"sp_model_file is required."
)
core_model
.
sp_model_file
=
tf
.
saved_model
.
Asset
(
sp_model_file
)
else
:
assert
isinstance
(
bert_config
,
bert_modeling
.
BertConfig
)
if
not
vocab_file
:
raise
ValueError
(
"vocab_file is required."
)
core_model
.
vocab_file
=
tf
.
saved_model
.
Asset
(
vocab_file
)
core_model
.
do_lower_case
=
tf
.
Variable
(
"uncased"
in
vocab_file
,
trainable
=
False
)
...
...
@@ -97,14 +79,9 @@ def export_bert_tfhub(bert_config: bert_modeling.BertConfig,
def
main
(
_
):
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
config_cls
=
{
"bert"
:
bert_modeling
.
BertConfig
,
"albert"
:
bert_modeling
.
AlbertConfig
,
}
bert_config
=
config_cls
[
FLAGS
.
model_type
].
from_json_file
(
FLAGS
.
bert_config_file
)
bert_config
=
bert_modeling
.
BertConfig
.
from_json_file
(
FLAGS
.
bert_config_file
)
export_bert_tfhub
(
bert_config
,
FLAGS
.
model_checkpoint_path
,
FLAGS
.
export_path
,
FLAGS
.
vocab_file
,
FLAGS
.
sp_model_file
)
FLAGS
.
vocab_file
)
if
__name__
==
"__main__"
:
...
...
official/nlp/bert/export_tfhub_test.py
View file @
e55c1f42
...
...
@@ -82,57 +82,6 @@ class ExportTfhubTest(tf.test.TestCase):
self
.
assertAllClose
(
source_output
.
numpy
(),
hub_output
.
numpy
())
self
.
assertAllClose
(
source_output
.
numpy
(),
encoder_output
.
numpy
())
def
test_export_albert_tfhub
(
self
):
# Exports a savedmodel for TF-Hub
bert_config
=
bert_modeling
.
AlbertConfig
(
vocab_size
=
100
,
embedding_size
=
8
,
hidden_size
=
16
,
intermediate_size
=
32
,
max_position_embeddings
=
128
,
num_attention_heads
=
2
,
num_hidden_layers
=
1
)
bert_model
,
encoder
=
export_tfhub
.
create_bert_model
(
bert_config
)
model_checkpoint_dir
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"checkpoint"
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
encoder
)
checkpoint
.
save
(
os
.
path
.
join
(
model_checkpoint_dir
,
"test"
))
model_checkpoint_path
=
tf
.
train
.
latest_checkpoint
(
model_checkpoint_dir
)
sp_model_file
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"sp_tokenizer.model"
)
with
tf
.
io
.
gfile
.
GFile
(
sp_model_file
,
"w"
)
as
f
:
f
.
write
(
"dummy content"
)
hub_destination
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
"hub"
)
export_tfhub
.
export_bert_tfhub
(
bert_config
,
model_checkpoint_path
,
hub_destination
,
sp_model_file
=
sp_model_file
)
# Restores a hub KerasLayer.
hub_layer
=
hub
.
KerasLayer
(
hub_destination
,
trainable
=
True
)
if
hasattr
(
hub_layer
,
"resolved_object"
):
with
tf
.
io
.
gfile
.
GFile
(
hub_layer
.
resolved_object
.
sp_model_file
.
asset_path
.
numpy
())
as
f
:
self
.
assertEqual
(
"dummy content"
,
f
.
read
())
# Checks the hub KerasLayer.
for
source_weight
,
hub_weight
in
zip
(
bert_model
.
trainable_weights
,
hub_layer
.
trainable_weights
):
self
.
assertAllClose
(
source_weight
.
numpy
(),
hub_weight
.
numpy
())
dummy_ids
=
np
.
zeros
((
2
,
10
),
dtype
=
np
.
int32
)
hub_outputs
=
hub_layer
([
dummy_ids
,
dummy_ids
,
dummy_ids
])
source_outputs
=
bert_model
([
dummy_ids
,
dummy_ids
,
dummy_ids
])
# The outputs of hub module are "pooled_output" and "sequence_output",
# while the outputs of encoder is in reversed order, i.e.,
# "sequence_output" and "pooled_output".
encoder_outputs
=
reversed
(
encoder
([
dummy_ids
,
dummy_ids
,
dummy_ids
]))
self
.
assertEqual
(
hub_outputs
[
0
].
shape
,
(
2
,
16
))
self
.
assertEqual
(
hub_outputs
[
1
].
shape
,
(
2
,
10
,
16
))
for
source_output
,
hub_output
,
encoder_output
in
zip
(
source_outputs
,
hub_outputs
,
encoder_outputs
):
self
.
assertAllClose
(
source_output
.
numpy
(),
hub_output
.
numpy
())
self
.
assertAllClose
(
source_output
.
numpy
(),
encoder_output
.
numpy
())
if
__name__
==
"__main__"
:
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment