Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
32e4ca51
Commit
32e4ca51
authored
Nov 28, 2023
by
qianyj
Browse files
Update code to v2.11.0
parents
9485aa1d
71060f67
Changes
775
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
66 additions
and
31 deletions
+66
-31
official/nlp/data/README.md
official/nlp/data/README.md
+4
-0
official/nlp/data/__init__.py
official/nlp/data/__init__.py
+1
-1
official/nlp/data/classifier_data_lib.py
official/nlp/data/classifier_data_lib.py
+22
-2
official/nlp/data/classifier_data_lib_test.py
official/nlp/data/classifier_data_lib_test.py
+2
-2
official/nlp/data/create_finetuning_data.py
official/nlp/data/create_finetuning_data.py
+2
-3
official/nlp/data/create_pretraining_data.py
official/nlp/data/create_pretraining_data.py
+2
-2
official/nlp/data/create_pretraining_data_test.py
official/nlp/data/create_pretraining_data_test.py
+1
-1
official/nlp/data/create_xlnet_pretraining_data.py
official/nlp/data/create_xlnet_pretraining_data.py
+3
-3
official/nlp/data/create_xlnet_pretraining_data_test.py
official/nlp/data/create_xlnet_pretraining_data_test.py
+1
-1
official/nlp/data/data_loader.py
official/nlp/data/data_loader.py
+1
-1
official/nlp/data/data_loader_factory.py
official/nlp/data/data_loader_factory.py
+1
-1
official/nlp/data/data_loader_factory_test.py
official/nlp/data/data_loader_factory_test.py
+1
-1
official/nlp/data/dual_encoder_dataloader.py
official/nlp/data/dual_encoder_dataloader.py
+2
-2
official/nlp/data/dual_encoder_dataloader_test.py
official/nlp/data/dual_encoder_dataloader_test.py
+1
-1
official/nlp/data/pretrain_dataloader.py
official/nlp/data/pretrain_dataloader.py
+1
-1
official/nlp/data/pretrain_dataloader_test.py
official/nlp/data/pretrain_dataloader_test.py
+1
-1
official/nlp/data/pretrain_dynamic_dataloader.py
official/nlp/data/pretrain_dynamic_dataloader.py
+17
-5
official/nlp/data/pretrain_dynamic_dataloader_test.py
official/nlp/data/pretrain_dynamic_dataloader_test.py
+1
-1
official/nlp/data/question_answering_dataloader.py
official/nlp/data/question_answering_dataloader.py
+1
-1
official/nlp/data/question_answering_dataloader_test.py
official/nlp/data/question_answering_dataloader_test.py
+1
-1
No files found.
Too many changes to show.
To preserve performance only
775 of 775+
files are displayed.
Plain diff
Email patch
official/nlp/data/README.md
0 → 100644
View file @
32e4ca51
This directory contains binaries and utils required for input preprocessing,
tokenization, etc that can be used with model building blocks available in
NLP modeling library
[
nlp/modelling
](
https://github.com/tensorflow/models/tree/master/official/nlp/modeling
)
to train custom models and validate new research ideas.
official/nlp/data/__init__.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/classifier_data_lib.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -24,7 +24,7 @@ from absl import logging
import
tensorflow
as
tf
import
tensorflow_datasets
as
tfds
from
official.nlp.
bert
import
tokenization
from
official.nlp.
tools
import
tokenization
class
InputExample
(
object
):
...
...
@@ -187,6 +187,8 @@ class AxProcessor(DataProcessor):
def
_create_examples_tfds
(
self
,
dataset
,
set_type
):
"""Creates examples for the training/dev/test sets."""
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -218,6 +220,8 @@ class ColaProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/cola"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -312,6 +316,8 @@ class MnliProcessor(DataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/mnli"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -343,6 +349,8 @@ class MrpcProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/mrpc"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -453,6 +461,8 @@ class QnliProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/qnli"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -484,6 +494,8 @@ class QqpProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/qqp"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -517,6 +529,8 @@ class RteProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/rte"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -548,6 +562,8 @@ class SstProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/sst2"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -574,6 +590,8 @@ class StsBProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/stsb"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
@@ -742,6 +760,8 @@ class WnliProcessor(DefaultGLUEDataProcessor):
"""Creates examples for the training/dev/test sets."""
dataset
=
tfds
.
load
(
"glue/wnli"
,
split
=
set_type
,
try_gcs
=
True
).
as_numpy_iterator
()
dataset
=
list
(
dataset
)
dataset
.
sort
(
key
=
lambda
x
:
x
[
"idx"
])
examples
=
[]
for
i
,
example
in
enumerate
(
dataset
):
guid
=
"%s-%s"
%
(
set_type
,
i
)
...
...
official/nlp/data/classifier_data_lib_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -21,8 +21,8 @@ from absl.testing import parameterized
import
tensorflow
as
tf
import
tensorflow_datasets
as
tfds
from
official.nlp.bert
import
tokenization
from
official.nlp.data
import
classifier_data_lib
from
official.nlp.tools
import
tokenization
def
decode_record
(
record
,
name_to_features
):
...
...
official/nlp/data/create_finetuning_data.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -22,7 +22,6 @@ import os
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
from
official.nlp.bert
import
tokenization
from
official.nlp.data
import
classifier_data_lib
from
official.nlp.data
import
sentence_retrieval_lib
# word-piece tokenizer based squad_lib
...
...
@@ -30,10 +29,10 @@ from official.nlp.data import squad_lib as squad_lib_wp
# sentence-piece tokenizer based squad_lib
from
official.nlp.data
import
squad_lib_sp
from
official.nlp.data
import
tagging_data_lib
from
official.nlp.tools
import
tokenization
FLAGS
=
flags
.
FLAGS
# TODO(chendouble): consider moving each task to its own binary.
flags
.
DEFINE_enum
(
"fine_tuning_task_type"
,
"classification"
,
[
"classification"
,
"regression"
,
"squad"
,
"retrieval"
,
"tagging"
],
...
...
official/nlp/data/create_pretraining_data.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -24,7 +24,7 @@ from absl import flags
from
absl
import
logging
import
tensorflow
as
tf
from
official.nlp.
bert
import
tokenization
from
official.nlp.
tools
import
tokenization
FLAGS
=
flags
.
FLAGS
...
...
official/nlp/data/create_pretraining_data_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/create_xlnet_pretraining_data.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -14,6 +14,7 @@
"""Create LM TF examples for XLNet."""
import
dataclasses
import
json
import
math
import
os
...
...
@@ -28,11 +29,10 @@ from absl import app
from
absl
import
flags
from
absl
import
logging
import
dataclasses
import
numpy
as
np
import
tensorflow
as
tf
from
official.nlp.
bert
import
tokenization
from
official.nlp.
tools
import
tokenization
special_symbols
=
{
"<unk>"
:
0
,
...
...
official/nlp/data/create_xlnet_pretraining_data_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/data_loader.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/data_loader_factory.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/data_loader_factory_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/dual_encoder_dataloader.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -124,7 +124,7 @@ class DualEncoderDataLoader(data_loader.DataLoader):
raise
ValueError
(
'Expected {} to start with {}'
.
format
(
string
,
old
))
def
_switch_key_prefix
(
d
,
old
,
new
):
return
{
_switch_prefix
(
key
,
old
,
new
):
value
for
key
,
value
in
d
.
items
()}
return
{
_switch_prefix
(
key
,
old
,
new
):
value
for
key
,
value
in
d
.
items
()}
# pytype: disable=attribute-error # trace-all-classes
model_inputs
=
_switch_key_prefix
(
self
.
_bert_tokenize
(
record
,
self
.
_left_text_fields
),
...
...
official/nlp/data/dual_encoder_dataloader_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/pretrain_dataloader.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/pretrain_dataloader_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/pretrain_dynamic_dataloader.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -79,17 +79,29 @@ class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader):
def
_decode
(
self
,
record
:
tf
.
Tensor
):
"""Decodes a serialized tf.Example."""
name_to_features
=
{
'input_ids'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'input_mask'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'segment_ids'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_positions'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_ids'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'masked_lm_weights'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
}
if
self
.
_params
.
use_v2_feature_names
:
input_ids_key
=
'input_word_ids'
segment_key
=
'input_type_ids'
name_to_features
.
update
({
input_ids_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
segment_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
})
else
:
input_ids_key
=
'input_ids'
segment_key
=
'segment_ids'
name_to_features
.
update
({
input_ids_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
segment_key
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
})
if
self
.
_use_next_sentence_label
:
name_to_features
[
'next_sentence_labels'
]
=
tf
.
io
.
FixedLenFeature
([
1
],
tf
.
int64
)
dynamic_keys
=
[
'
input_ids
'
,
'input_mask'
,
'
segment_
ids'
]
dynamic_keys
=
[
input_ids
_key
,
'input_mask'
,
segment_
key
]
if
self
.
_use_position_id
:
name_to_features
[
'position_ids'
]
=
tf
.
io
.
VarLenFeature
(
tf
.
int64
)
dynamic_keys
.
append
(
'position_ids'
)
...
...
@@ -102,7 +114,7 @@ class PretrainingDynamicDataLoader(pretrain_dataloader.BertPretrainDataLoader):
# sequence length dimension.
# Pad before the first non pad from the back should not be removed.
mask
=
tf
.
math
.
greater
(
tf
.
math
.
cumsum
(
example
[
'
input_ids
'
],
reverse
=
True
),
0
)
tf
.
math
.
cumsum
(
example
[
input_ids
_key
],
reverse
=
True
),
0
)
for
key
in
dynamic_keys
:
example
[
key
]
=
tf
.
boolean_mask
(
example
[
key
],
mask
)
...
...
official/nlp/data/pretrain_dynamic_dataloader_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/question_answering_dataloader.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
official/nlp/data/question_answering_dataloader_test.py
View file @
32e4ca51
# Copyright 202
1
The TensorFlow Authors. All Rights Reserved.
# Copyright 202
2
The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
Prev
1
…
13
14
15
16
17
18
19
20
21
…
39
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment