Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
a982ddeb
Commit
a982ddeb
authored
Aug 24, 2020
by
Chen Chen
Committed by
A. Unique TensorFlower
Aug 24, 2020
Browse files
Internal change
PiperOrigin-RevId: 328206696
parent
00488c79
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
43 additions
and
23 deletions
+43
-23
official/nlp/bert/input_pipeline.py
official/nlp/bert/input_pipeline.py
+2
-2
official/nlp/data/classifier_data_lib.py
official/nlp/data/classifier_data_lib.py
+12
-10
official/nlp/data/sentence_prediction_dataloader.py
official/nlp/data/sentence_prediction_dataloader.py
+9
-0
official/nlp/data/sentence_retrieval_lib.py
official/nlp/data/sentence_retrieval_lib.py
+3
-3
official/nlp/tasks/sentence_prediction.py
official/nlp/tasks/sentence_prediction.py
+13
-6
official/nlp/tasks/sentence_prediction_test.py
official/nlp/tasks/sentence_prediction_test.py
+4
-2
No files found.
official/nlp/bert/input_pipeline.py
View file @
a982ddeb
...
@@ -262,7 +262,7 @@ def create_retrieval_dataset(file_path,
...
@@ -262,7 +262,7 @@ def create_retrieval_dataset(file_path,
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'input_mask'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
seq_length
],
tf
.
int64
),
'
int
_id
en
'
:
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
),
'
example
_id'
:
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
),
}
}
dataset
=
single_file_dataset
(
file_path
,
name_to_features
)
dataset
=
single_file_dataset
(
file_path
,
name_to_features
)
...
@@ -278,7 +278,7 @@ def create_retrieval_dataset(file_path,
...
@@ -278,7 +278,7 @@ def create_retrieval_dataset(file_path,
'input_mask'
:
record
[
'input_mask'
],
'input_mask'
:
record
[
'input_mask'
],
'input_type_ids'
:
record
[
'segment_ids'
]
'input_type_ids'
:
record
[
'segment_ids'
]
}
}
y
=
record
[
'
int
_id
en
'
]
y
=
record
[
'
example
_id'
]
return
(
x
,
y
)
return
(
x
,
y
)
dataset
=
dataset
.
map
(
dataset
=
dataset
.
map
(
...
...
official/nlp/data/classifier_data_lib.py
View file @
a982ddeb
...
@@ -39,7 +39,7 @@ class InputExample(object):
...
@@ -39,7 +39,7 @@ class InputExample(object):
text_b
=
None
,
text_b
=
None
,
label
=
None
,
label
=
None
,
weight
=
None
,
weight
=
None
,
int
_id
en
=
None
):
example
_id
=
None
):
"""Constructs a InputExample.
"""Constructs a InputExample.
Args:
Args:
...
@@ -53,15 +53,15 @@ class InputExample(object):
...
@@ -53,15 +53,15 @@ class InputExample(object):
examples, but not for test examples.
examples, but not for test examples.
weight: (Optional) float. The weight of the example to be used during
weight: (Optional) float. The weight of the example to be used during
training.
training.
int
_id
en
: (Optional) int. The int identification number of example in
the
example
_id: (Optional) int. The int identification number of example in
corpus.
the
corpus.
"""
"""
self
.
guid
=
guid
self
.
guid
=
guid
self
.
text_a
=
text_a
self
.
text_a
=
text_a
self
.
text_b
=
text_b
self
.
text_b
=
text_b
self
.
label
=
label
self
.
label
=
label
self
.
weight
=
weight
self
.
weight
=
weight
self
.
int
_id
en
=
int
_id
en
self
.
example
_id
=
example
_id
class
InputFeatures
(
object
):
class
InputFeatures
(
object
):
...
@@ -74,14 +74,14 @@ class InputFeatures(object):
...
@@ -74,14 +74,14 @@ class InputFeatures(object):
label_id
,
label_id
,
is_real_example
=
True
,
is_real_example
=
True
,
weight
=
None
,
weight
=
None
,
int
_id
en
=
None
):
example
_id
=
None
):
self
.
input_ids
=
input_ids
self
.
input_ids
=
input_ids
self
.
input_mask
=
input_mask
self
.
input_mask
=
input_mask
self
.
segment_ids
=
segment_ids
self
.
segment_ids
=
segment_ids
self
.
label_id
=
label_id
self
.
label_id
=
label_id
self
.
is_real_example
=
is_real_example
self
.
is_real_example
=
is_real_example
self
.
weight
=
weight
self
.
weight
=
weight
self
.
int
_id
en
=
int
_id
en
self
.
example
_id
=
example
_id
class
DataProcessor
(
object
):
class
DataProcessor
(
object
):
...
@@ -1050,7 +1050,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
...
@@ -1050,7 +1050,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
logging
.
info
(
"segment_ids: %s"
,
" "
.
join
([
str
(
x
)
for
x
in
segment_ids
]))
logging
.
info
(
"segment_ids: %s"
,
" "
.
join
([
str
(
x
)
for
x
in
segment_ids
]))
logging
.
info
(
"label: %s (id = %s)"
,
example
.
label
,
str
(
label_id
))
logging
.
info
(
"label: %s (id = %s)"
,
example
.
label
,
str
(
label_id
))
logging
.
info
(
"weight: %s"
,
example
.
weight
)
logging
.
info
(
"weight: %s"
,
example
.
weight
)
logging
.
info
(
"
int
_id
en
: %s"
,
str
(
example
.
int_iden
)
)
logging
.
info
(
"
example
_id: %s"
,
example
.
example_id
)
feature
=
InputFeatures
(
feature
=
InputFeatures
(
input_ids
=
input_ids
,
input_ids
=
input_ids
,
...
@@ -1059,7 +1059,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
...
@@ -1059,7 +1059,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
label_id
=
label_id
,
label_id
=
label_id
,
is_real_example
=
True
,
is_real_example
=
True
,
weight
=
example
.
weight
,
weight
=
example
.
weight
,
int
_id
en
=
example
.
int
_id
en
)
example
_id
=
example
.
example
_id
)
return
feature
return
feature
...
@@ -1102,8 +1102,10 @@ def file_based_convert_examples_to_features(examples,
...
@@ -1102,8 +1102,10 @@ def file_based_convert_examples_to_features(examples,
[
int
(
feature
.
is_real_example
)])
[
int
(
feature
.
is_real_example
)])
if
feature
.
weight
is
not
None
:
if
feature
.
weight
is
not
None
:
features
[
"weight"
]
=
create_float_feature
([
feature
.
weight
])
features
[
"weight"
]
=
create_float_feature
([
feature
.
weight
])
if
feature
.
int_iden
is
not
None
:
if
feature
.
example_id
is
not
None
:
features
[
"int_iden"
]
=
create_int_feature
([
feature
.
int_iden
])
features
[
"example_id"
]
=
create_int_feature
([
feature
.
example_id
])
else
:
features
[
"example_id"
]
=
create_int_feature
([
ex_index
])
tf_example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
features
))
tf_example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
features
))
writer
.
write
(
tf_example
.
SerializeToString
())
writer
.
write
(
tf_example
.
SerializeToString
())
...
...
official/nlp/data/sentence_prediction_dataloader.py
View file @
a982ddeb
...
@@ -35,6 +35,8 @@ class SentencePredictionDataConfig(cfg.DataConfig):
...
@@ -35,6 +35,8 @@ class SentencePredictionDataConfig(cfg.DataConfig):
is_training
:
bool
=
True
is_training
:
bool
=
True
seq_length
:
int
=
128
seq_length
:
int
=
128
label_type
:
str
=
'int'
label_type
:
str
=
'int'
# Whether to include the example id number.
include_example_id
:
bool
=
False
@
data_loader_factory
.
register_data_loader_cls
(
SentencePredictionDataConfig
)
@
data_loader_factory
.
register_data_loader_cls
(
SentencePredictionDataConfig
)
...
@@ -44,6 +46,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
...
@@ -44,6 +46,7 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
def
__init__
(
self
,
params
):
def
__init__
(
self
,
params
):
self
.
_params
=
params
self
.
_params
=
params
self
.
_seq_length
=
params
.
seq_length
self
.
_seq_length
=
params
.
seq_length
self
.
_include_example_id
=
params
.
include_example_id
def
_decode
(
self
,
record
:
tf
.
Tensor
):
def
_decode
(
self
,
record
:
tf
.
Tensor
):
"""Decodes a serialized tf.Example."""
"""Decodes a serialized tf.Example."""
...
@@ -54,6 +57,9 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
...
@@ -54,6 +57,9 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
FixedLenFeature
([
self
.
_seq_length
],
tf
.
int64
),
'label_ids'
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
'label_ids'
:
tf
.
io
.
FixedLenFeature
([],
label_type
),
}
}
if
self
.
_include_example_id
:
name_to_features
[
'example_id'
]
=
tf
.
io
.
FixedLenFeature
([],
tf
.
int64
)
example
=
tf
.
io
.
parse_single_example
(
record
,
name_to_features
)
example
=
tf
.
io
.
parse_single_example
(
record
,
name_to_features
)
# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
...
@@ -73,6 +79,9 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
...
@@ -73,6 +79,9 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
'input_mask'
:
record
[
'input_mask'
],
'input_mask'
:
record
[
'input_mask'
],
'input_type_ids'
:
record
[
'segment_ids'
]
'input_type_ids'
:
record
[
'segment_ids'
]
}
}
if
self
.
_include_example_id
:
x
[
'example_id'
]
=
record
[
'example_id'
]
y
=
record
[
'label_ids'
]
y
=
record
[
'label_ids'
]
return
(
x
,
y
)
return
(
x
,
y
)
...
...
official/nlp/data/sentence_retrieval_lib.py
View file @
a982ddeb
...
@@ -49,11 +49,11 @@ class BuccProcessor(classifier_data_lib.DataProcessor):
...
@@ -49,11 +49,11 @@ class BuccProcessor(classifier_data_lib.DataProcessor):
examples
=
[]
examples
=
[]
for
(
i
,
line
)
in
enumerate
(
lines
):
for
(
i
,
line
)
in
enumerate
(
lines
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
guid
=
"%s-%s"
%
(
set_type
,
i
)
int
_id
en
=
int
(
line
[
0
].
split
(
"-"
)[
1
])
example
_id
=
int
(
line
[
0
].
split
(
"-"
)[
1
])
text_a
=
self
.
process_text_fn
(
line
[
1
])
text_a
=
self
.
process_text_fn
(
line
[
1
])
examples
.
append
(
examples
.
append
(
classifier_data_lib
.
InputExample
(
classifier_data_lib
.
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
int_iden
=
int
_id
en
))
guid
=
guid
,
text_a
=
text_a
,
example_id
=
example
_id
))
return
examples
return
examples
...
@@ -86,7 +86,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor):
...
@@ -86,7 +86,7 @@ class TatoebaProcessor(classifier_data_lib.DataProcessor):
text_a
=
self
.
process_text_fn
(
line
[
0
])
text_a
=
self
.
process_text_fn
(
line
[
0
])
examples
.
append
(
examples
.
append
(
classifier_data_lib
.
InputExample
(
classifier_data_lib
.
InputExample
(
guid
=
guid
,
text_a
=
text_a
,
int
_id
en
=
i
))
guid
=
guid
,
text_a
=
text_a
,
example
_id
=
i
))
return
examples
return
examples
...
...
official/nlp/tasks/sentence_prediction.py
View file @
a982ddeb
...
@@ -246,22 +246,29 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig,
...
@@ -246,22 +246,29 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig,
def
predict_step
(
inputs
):
def
predict_step
(
inputs
):
"""Replicated prediction calculation."""
"""Replicated prediction calculation."""
x
,
_
=
inputs
x
,
_
=
inputs
example_id
=
x
.
pop
(
'example_id'
)
outputs
=
task
.
inference_step
(
x
,
model
)
outputs
=
task
.
inference_step
(
x
,
model
)
if
is_regression
:
if
is_regression
:
return
outputs
return
dict
(
example_id
=
example_id
,
predictions
=
outputs
)
else
:
else
:
return
tf
.
argmax
(
outputs
,
axis
=-
1
)
return
dict
(
example_id
=
example_id
,
predictions
=
tf
.
argmax
(
outputs
,
axis
=-
1
))
def
aggregate_fn
(
state
,
outputs
):
def
aggregate_fn
(
state
,
outputs
):
"""Concatenates model's outputs."""
"""Concatenates model's outputs."""
if
state
is
None
:
if
state
is
None
:
state
=
{
'predictions'
:
[]
}
state
=
[]
for
per_replica_batch_predictions
in
outputs
:
for
per_replica_example_id
,
per_replica_batch_predictions
in
zip
(
state
[
'predictions'
].
extend
(
per_replica_batch_predictions
)
outputs
[
'example_id'
],
outputs
[
'predictions'
]):
state
.
extend
(
zip
(
per_replica_example_id
,
per_replica_batch_predictions
))
return
state
return
state
dataset
=
orbit
.
utils
.
make_distributed_dataset
(
tf
.
distribute
.
get_strategy
(),
dataset
=
orbit
.
utils
.
make_distributed_dataset
(
tf
.
distribute
.
get_strategy
(),
task
.
build_inputs
,
params
)
task
.
build_inputs
,
params
)
outputs
=
utils
.
predict
(
predict_step
,
aggregate_fn
,
dataset
)
outputs
=
utils
.
predict
(
predict_step
,
aggregate_fn
,
dataset
)
return
outputs
[
'predictions'
]
# When running on TPU POD, the order of output cannot be maintained,
# so we need to sort by example_id.
outputs
=
sorted
(
outputs
,
key
=
lambda
x
:
x
[
0
])
return
[
x
[
1
]
for
x
in
outputs
]
official/nlp/tasks/sentence_prediction_test.py
View file @
a982ddeb
...
@@ -40,13 +40,14 @@ def _create_fake_dataset(output_path, seq_length, num_classes, num_examples):
...
@@ -40,13 +40,14 @@ def _create_fake_dataset(output_path, seq_length, num_classes, num_examples):
def
create_float_feature
(
values
):
def
create_float_feature
(
values
):
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
list
(
values
)))
return
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
list
(
values
)))
for
_
in
range
(
num_examples
):
for
i
in
range
(
num_examples
):
features
=
{}
features
=
{}
input_ids
=
np
.
random
.
randint
(
100
,
size
=
(
seq_length
))
input_ids
=
np
.
random
.
randint
(
100
,
size
=
(
seq_length
))
features
[
"input_ids"
]
=
create_int_feature
(
input_ids
)
features
[
"input_ids"
]
=
create_int_feature
(
input_ids
)
features
[
"input_mask"
]
=
create_int_feature
(
np
.
ones_like
(
input_ids
))
features
[
"input_mask"
]
=
create_int_feature
(
np
.
ones_like
(
input_ids
))
features
[
"segment_ids"
]
=
create_int_feature
(
np
.
ones_like
(
input_ids
))
features
[
"segment_ids"
]
=
create_int_feature
(
np
.
ones_like
(
input_ids
))
features
[
"segment_ids"
]
=
create_int_feature
(
np
.
ones_like
(
input_ids
))
features
[
"segment_ids"
]
=
create_int_feature
(
np
.
ones_like
(
input_ids
))
features
[
"example_id"
]
=
create_int_feature
([
i
])
if
num_classes
==
1
:
if
num_classes
==
1
:
features
[
"label_ids"
]
=
create_float_feature
([
np
.
random
.
random
()])
features
[
"label_ids"
]
=
create_float_feature
([
np
.
random
.
random
()])
...
@@ -250,7 +251,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
...
@@ -250,7 +251,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
is_training
=
False
,
is_training
=
False
,
label_type
=
"int"
if
num_classes
>
1
else
"float"
,
label_type
=
"int"
if
num_classes
>
1
else
"float"
,
global_batch_size
=
16
,
global_batch_size
=
16
,
drop_remainder
=
False
))
drop_remainder
=
False
,
include_example_id
=
True
))
predictions
=
sentence_prediction
.
predict
(
task
,
test_data_config
,
model
)
predictions
=
sentence_prediction
.
predict
(
task
,
test_data_config
,
model
)
self
.
assertLen
(
predictions
,
num_examples
)
self
.
assertLen
(
predictions
,
num_examples
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment