Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e689246f
Commit
e689246f
authored
Jan 20, 2022
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Jan 20, 2022
Browse files
Internal change
PiperOrigin-RevId: 423199224
parent
c41d6565
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
4 deletions
+16
-4
official/nlp/data/pretrain_dynamic_dataloader.py
official/nlp/data/pretrain_dynamic_dataloader.py
+16
-4
No files found.
official/nlp/data/pretrain_dynamic_dataloader.py
View file @
e689246f
...
@@ -79,17 +79,29 @@ class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader):
...
@@ -79,17 +79,29 @@ class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader):
def
_decode
(
self
,
record
:
tf
.
Tensor
):
def
_decode
(
self
,
record
:
tf
.
Tensor
):
"""Decodes a serialized tf.Example."""
"""Decodes a serialized tf.Example."""
name_to_features
=
{
name_to_features
=
{
'input_ids'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'input_mask'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'input_mask'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_positions'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_positions'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_ids'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_ids'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_weights'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'masked_lm_weights'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
}
}
if
self
.
_params
.
use_v2_feature_names
:
input_ids_key
=
'input_word_ids'
segment_key
=
'input_type_ids'
name_to_features
.
update
({
input_ids_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
segment_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
})
else
:
input_ids_key
=
'input_ids'
segment_key
=
'segment_ids'
name_to_features
.
update
({
input_ids_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
segment_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
})
if
self
.
_use_next_sentence_label
:
if
self
.
_use_next_sentence_label
:
name_to_features
[
'next_sentence_labels'
]
=
tf
.
io
.
FixedLenFeature
([
1
],
name_to_features
[
'next_sentence_labels'
]
=
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
)
tf
.
int64
)
dynamic_keys
=
[
'
input_ids
'
,
'input_mask'
,
'
segment_
ids'
]
dynamic_keys
=
[
input_ids
_key
,
'input_mask'
,
segment_
key
]
if
self
.
_use_position_id
:
if
self
.
_use_position_id
:
name_to_features
[
'position_ids'
]
=
tf
.
io
.
VarLenFeature
(
tf
.
int64
)
name_to_features
[
'position_ids'
]
=
tf
.
io
.
VarLenFeature
(
tf
.
int64
)
dynamic_keys
.
append
(
'position_ids'
)
dynamic_keys
.
append
(
'position_ids'
)
...
@@ -102,7 +114,7 @@ class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader):
...
@@ -102,7 +114,7 @@ class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader):
# sequence length dimension.
# sequence length dimension.
# Pad before the first non pad from the back should not be removed.
# Pad before the first non pad from the back should not be removed.
mask
=
tf
.
math
.
greater
(
mask
=
tf
.
math
.
greater
(
tf
.
math
.
cumsum
(
example
[
'
input_ids
'
],
reverse
=
True
),
0
)
tf
.
math
.
cumsum
(
example
[
input_ids
_key
],
reverse
=
True
),
0
)
for
key
in
dynamic_keys
:
for
key
in
dynamic_keys
:
example
[
key
]
=
tf
.
boolean_mask
(
example
[
key
],
mask
)
example
[
key
]
=
tf
.
boolean_mask
(
example
[
key
],
mask
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment