Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
19e8d0a0
Commit
19e8d0a0
authored
Aug 22, 2020
by
Chen Chen
Committed by
A. Unique TensorFlower
Aug 22, 2020
Browse files
Internal change
PiperOrigin-RevId: 327983381
parent
56186d78
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
19 deletions
+19
-19
official/nlp/albert/README.md
official/nlp/albert/README.md
+2
-2
official/nlp/data/create_finetuning_data.py
official/nlp/data/create_finetuning_data.py
+17
-17
No files found.
official/nlp/albert/README.md
View file @
19e8d0a0
...
@@ -148,7 +148,7 @@ python ../data/create_finetuning_data.py \
...
@@ -148,7 +148,7 @@ python ../data/create_finetuning_data.py \
--meta_data_file_path
=
${
OUTPUT_DIR
}
/
${
TASK_NAME
}
_meta_data
\
--meta_data_file_path
=
${
OUTPUT_DIR
}
/
${
TASK_NAME
}
_meta_data
\
--fine_tuning_task_type
=
classification
--max_seq_length
=
128
\
--fine_tuning_task_type
=
classification
--max_seq_length
=
128
\
--classification_task_name
=
${
TASK_NAME
}
\
--classification_task_name
=
${
TASK_NAME
}
\
--tokeniz
er_impl
=
s
entence
_p
iece
--tokeniz
ation
=
S
entence
P
iece
```
```
*
SQUAD
*
SQUAD
...
@@ -177,7 +177,7 @@ python ../data/create_finetuning_data.py \
...
@@ -177,7 +177,7 @@ python ../data/create_finetuning_data.py \
--train_data_output_path
=
${
OUTPUT_DIR
}
/squad_
${
SQUAD_VERSION
}
_train.tf_record
\
--train_data_output_path
=
${
OUTPUT_DIR
}
/squad_
${
SQUAD_VERSION
}
_train.tf_record
\
--meta_data_file_path
=
${
OUTPUT_DIR
}
/squad_
${
SQUAD_VERSION
}
_meta_data
\
--meta_data_file_path
=
${
OUTPUT_DIR
}
/squad_
${
SQUAD_VERSION
}
_meta_data
\
--fine_tuning_task_type
=
squad
--max_seq_length
=
384
\
--fine_tuning_task_type
=
squad
--max_seq_length
=
384
\
--tokeniz
er_impl
=
s
entence
_p
iece
--tokeniz
ation
=
S
entence
P
iece
```
```
## Fine-tuning with ALBERT
## Fine-tuning with ALBERT
...
...
official/nlp/data/create_finetuning_data.py
View file @
19e8d0a0
...
@@ -142,10 +142,10 @@ flags.DEFINE_string("sp_model_file", "",
...
@@ -142,10 +142,10 @@ flags.DEFINE_string("sp_model_file", "",
"The path to the model used by sentence piece tokenizer."
)
"The path to the model used by sentence piece tokenizer."
)
flags
.
DEFINE_enum
(
flags
.
DEFINE_enum
(
"tokeniz
er_impl
"
,
"
w
ord
_p
iece"
,
[
"
w
ord
_p
iece"
,
"
s
entence
_p
iece"
],
"tokeniz
ation
"
,
"
W
ord
P
iece"
,
[
"
W
ord
P
iece"
,
"
S
entence
P
iece"
],
"Specifies the tokenizer implementation, i.e., whe
h
ter to use
w
ord
_p
iece "
"Specifies the tokenizer implementation, i.e., whet
h
er to use
W
ord
P
iece "
"or
s
entence
_p
iece tokenizer. Canonical BERT uses
w
ord
_p
iece tokenizer, "
"or
S
entence
P
iece tokenizer. Canonical BERT uses
W
ord
P
iece tokenizer, "
"while ALBERT uses
s
entence
_p
iece tokenizer."
)
"while ALBERT uses
S
entence
P
iece tokenizer."
)
flags
.
DEFINE_string
(
"tfds_params"
,
""
,
flags
.
DEFINE_string
(
"tfds_params"
,
""
,
"Comma-separated list of TFDS parameter assigments for "
"Comma-separated list of TFDS parameter assigments for "
...
@@ -158,12 +158,12 @@ def generate_classifier_dataset():
...
@@ -158,12 +158,12 @@ def generate_classifier_dataset():
assert
(
FLAGS
.
input_data_dir
and
FLAGS
.
classification_task_name
assert
(
FLAGS
.
input_data_dir
and
FLAGS
.
classification_task_name
or
FLAGS
.
tfds_params
)
or
FLAGS
.
tfds_params
)
if
FLAGS
.
tokeniz
er_impl
==
"
w
ord
_p
iece"
:
if
FLAGS
.
tokeniz
ation
==
"
W
ord
P
iece"
:
tokenizer
=
tokenization
.
FullTokenizer
(
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
processor_text_fn
=
tokenization
.
convert_to_unicode
processor_text_fn
=
tokenization
.
convert_to_unicode
else
:
else
:
assert
FLAGS
.
tokeniz
er_impl
==
"
s
entence
_p
iece"
assert
FLAGS
.
tokeniz
ation
==
"
S
entence
P
iece"
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
processor_text_fn
=
functools
.
partial
(
processor_text_fn
=
functools
.
partial
(
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
...
@@ -226,12 +226,12 @@ def generate_classifier_dataset():
...
@@ -226,12 +226,12 @@ def generate_classifier_dataset():
def
generate_regression_dataset
():
def
generate_regression_dataset
():
"""Generates regression dataset and returns input meta data."""
"""Generates regression dataset and returns input meta data."""
if
FLAGS
.
tokeniz
er_impl
==
"
w
ord
_p
iece"
:
if
FLAGS
.
tokeniz
ation
==
"
W
ord
P
iece"
:
tokenizer
=
tokenization
.
FullTokenizer
(
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
processor_text_fn
=
tokenization
.
convert_to_unicode
processor_text_fn
=
tokenization
.
convert_to_unicode
else
:
else
:
assert
FLAGS
.
tokeniz
er_impl
==
"
s
entence
_p
iece"
assert
FLAGS
.
tokeniz
ation
==
"
S
entence
P
iece"
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
processor_text_fn
=
functools
.
partial
(
processor_text_fn
=
functools
.
partial
(
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
...
@@ -255,13 +255,13 @@ def generate_regression_dataset():
...
@@ -255,13 +255,13 @@ def generate_regression_dataset():
def
generate_squad_dataset
():
def
generate_squad_dataset
():
"""Generates squad training dataset and returns input meta data."""
"""Generates squad training dataset and returns input meta data."""
assert
FLAGS
.
squad_data_file
assert
FLAGS
.
squad_data_file
if
FLAGS
.
tokeniz
er_impl
==
"
w
ord
_p
iece"
:
if
FLAGS
.
tokeniz
ation
==
"
W
ord
P
iece"
:
return
squad_lib_wp
.
generate_tf_record_from_json_file
(
return
squad_lib_wp
.
generate_tf_record_from_json_file
(
FLAGS
.
squad_data_file
,
FLAGS
.
vocab_file
,
FLAGS
.
train_data_output_path
,
FLAGS
.
squad_data_file
,
FLAGS
.
vocab_file
,
FLAGS
.
train_data_output_path
,
FLAGS
.
max_seq_length
,
FLAGS
.
do_lower_case
,
FLAGS
.
max_query_length
,
FLAGS
.
max_seq_length
,
FLAGS
.
do_lower_case
,
FLAGS
.
max_query_length
,
FLAGS
.
doc_stride
,
FLAGS
.
version_2_with_negative
)
FLAGS
.
doc_stride
,
FLAGS
.
version_2_with_negative
)
else
:
else
:
assert
FLAGS
.
tokeniz
er_impl
==
"
s
entence
_p
iece"
assert
FLAGS
.
tokeniz
ation
==
"
S
entence
P
iece"
return
squad_lib_sp
.
generate_tf_record_from_json_file
(
return
squad_lib_sp
.
generate_tf_record_from_json_file
(
FLAGS
.
squad_data_file
,
FLAGS
.
sp_model_file
,
FLAGS
.
squad_data_file
,
FLAGS
.
sp_model_file
,
FLAGS
.
train_data_output_path
,
FLAGS
.
max_seq_length
,
FLAGS
.
do_lower_case
,
FLAGS
.
train_data_output_path
,
FLAGS
.
max_seq_length
,
FLAGS
.
do_lower_case
,
...
@@ -271,12 +271,12 @@ def generate_squad_dataset():
...
@@ -271,12 +271,12 @@ def generate_squad_dataset():
def
generate_retrieval_dataset
():
def
generate_retrieval_dataset
():
"""Generate retrieval test and dev dataset and returns input meta data."""
"""Generate retrieval test and dev dataset and returns input meta data."""
assert
(
FLAGS
.
input_data_dir
and
FLAGS
.
retrieval_task_name
)
assert
(
FLAGS
.
input_data_dir
and
FLAGS
.
retrieval_task_name
)
if
FLAGS
.
tokeniz
er_impl
==
"
w
ord
_p
iece"
:
if
FLAGS
.
tokeniz
ation
==
"
W
ord
P
iece"
:
tokenizer
=
tokenization
.
FullTokenizer
(
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
processor_text_fn
=
tokenization
.
convert_to_unicode
processor_text_fn
=
tokenization
.
convert_to_unicode
else
:
else
:
assert
FLAGS
.
tokeniz
er_impl
==
"
s
entence
_p
iece"
assert
FLAGS
.
tokeniz
ation
==
"
S
entence
P
iece"
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
processor_text_fn
=
functools
.
partial
(
processor_text_fn
=
functools
.
partial
(
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
...
@@ -311,16 +311,16 @@ def generate_tagging_dataset():
...
@@ -311,16 +311,16 @@ def generate_tagging_dataset():
if
task_name
not
in
processors
:
if
task_name
not
in
processors
:
raise
ValueError
(
"Task not found: %s"
%
task_name
)
raise
ValueError
(
"Task not found: %s"
%
task_name
)
if
FLAGS
.
tokeniz
er_impl
==
"
w
ord
_p
iece"
:
if
FLAGS
.
tokeniz
ation
==
"
W
ord
P
iece"
:
tokenizer
=
tokenization
.
FullTokenizer
(
tokenizer
=
tokenization
.
FullTokenizer
(
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
vocab_file
=
FLAGS
.
vocab_file
,
do_lower_case
=
FLAGS
.
do_lower_case
)
processor_text_fn
=
tokenization
.
convert_to_unicode
processor_text_fn
=
tokenization
.
convert_to_unicode
elif
FLAGS
.
tokeniz
er_impl
==
"
s
entence
_p
iece"
:
elif
FLAGS
.
tokeniz
ation
==
"
S
entence
P
iece"
:
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
tokenizer
=
tokenization
.
FullSentencePieceTokenizer
(
FLAGS
.
sp_model_file
)
processor_text_fn
=
functools
.
partial
(
processor_text_fn
=
functools
.
partial
(
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
tokenization
.
preprocess_text
,
lower
=
FLAGS
.
do_lower_case
)
else
:
else
:
raise
ValueError
(
"Unsupported tokeniz
er_impl
: %s"
%
FLAGS
.
tokeniz
er_impl
)
raise
ValueError
(
"Unsupported tokeniz
ation
: %s"
%
FLAGS
.
tokeniz
ation
)
processor
=
processors
[
task_name
]()
processor
=
processors
[
task_name
]()
return
tagging_data_lib
.
generate_tf_record_from_data_file
(
return
tagging_data_lib
.
generate_tf_record_from_data_file
(
...
@@ -330,12 +330,12 @@ def generate_tagging_dataset():
...
@@ -330,12 +330,12 @@ def generate_tagging_dataset():
def
main
(
_
):
def
main
(
_
):
if
FLAGS
.
tokeniz
er_impl
==
"
w
ord
_p
iece"
:
if
FLAGS
.
tokeniz
ation
==
"
W
ord
P
iece"
:
if
not
FLAGS
.
vocab_file
:
if
not
FLAGS
.
vocab_file
:
raise
ValueError
(
raise
ValueError
(
"FLAG vocab_file for word-piece tokenizer is not specified."
)
"FLAG vocab_file for word-piece tokenizer is not specified."
)
else
:
else
:
assert
FLAGS
.
tokeniz
er_impl
==
"
s
entence
_p
iece"
assert
FLAGS
.
tokeniz
ation
==
"
S
entence
P
iece"
if
not
FLAGS
.
sp_model_file
:
if
not
FLAGS
.
sp_model_file
:
raise
ValueError
(
raise
ValueError
(
"FLAG sp_model_file for sentence-piece tokenizer is not specified."
)
"FLAG sp_model_file for sentence-piece tokenizer is not specified."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment