Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
252e6384
"vscode:/vscode.git/clone" did not exist on "47296a7ec30d23701e83fe8a65a3558865b811bf"
Commit
252e6384
authored
Nov 19, 2019
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Nov 19, 2019
Browse files
Use flags utils and distribution_utils
PiperOrigin-RevId: 281337671
parent
1b8c0ee8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
33 additions
and
47 deletions
+33
-47
official/nlp/bert/README.md
official/nlp/bert/README.md
+5
-5
official/nlp/bert/common_flags.py
official/nlp/bert/common_flags.py
+13
-11
official/nlp/bert/run_classifier.py
official/nlp/bert/run_classifier.py
+5
-11
official/nlp/bert/run_pretraining.py
official/nlp/bert/run_pretraining.py
+5
-9
official/nlp/bert/run_squad.py
official/nlp/bert/run_squad.py
+5
-11
No files found.
official/nlp/bert/README.md
View file @
252e6384
...
@@ -209,7 +209,7 @@ script should run with `tf-nightly`.
...
@@ -209,7 +209,7 @@ script should run with `tf-nightly`.
Just add the following flags to
`run_classifier.py`
or
`run_squad.py`
:
Just add the following flags to
`run_classifier.py`
or
`run_squad.py`
:
```
shell
```
shell
--strategy
_type
=
tpu
--
distribution_
strategy
=
tpu
--tpu
=
grpc://
${
TPU_IP_ADDRESS
}
:8470
--tpu
=
grpc://
${
TPU_IP_ADDRESS
}
:8470
```
```
...
@@ -243,7 +243,7 @@ python run_classifier.py \
...
@@ -243,7 +243,7 @@ python run_classifier.py \
--learning_rate
=
2e-5
\
--learning_rate
=
2e-5
\
--num_train_epochs
=
3
\
--num_train_epochs
=
3
\
--model_dir
=
${
MODEL_DIR
}
\
--model_dir
=
${
MODEL_DIR
}
\
--strategy
_type
=
mirror
--
distribution_
strategy
=
mirror
```
```
To use TPU, you only need to switch distribution strategy type to
`tpu`
with TPU
To use TPU, you only need to switch distribution strategy type to
`tpu`
with TPU
...
@@ -267,7 +267,7 @@ python run_classifier.py \
...
@@ -267,7 +267,7 @@ python run_classifier.py \
--learning_rate
=
2e-5
\
--learning_rate
=
2e-5
\
--num_train_epochs
=
3
\
--num_train_epochs
=
3
\
--model_dir
=
${
MODEL_DIR
}
\
--model_dir
=
${
MODEL_DIR
}
\
--strategy
_type
=
tpu
\
--
distribution_
strategy
=
tpu
\
--tpu
=
grpc://
${
TPU_IP_ADDRESS
}
:8470
--tpu
=
grpc://
${
TPU_IP_ADDRESS
}
:8470
```
```
...
@@ -299,7 +299,7 @@ python run_squad.py \
...
@@ -299,7 +299,7 @@ python run_squad.py \
--learning_rate
=
8e-5
\
--learning_rate
=
8e-5
\
--num_train_epochs
=
2
\
--num_train_epochs
=
2
\
--model_dir
=
${
MODEL_DIR
}
\
--model_dir
=
${
MODEL_DIR
}
\
--strategy
_type
=
mirror
--
distribution_
strategy
=
mirror
```
```
To use TPU, you need switch distribution strategy type to
`tpu`
with TPU
To use TPU, you need switch distribution strategy type to
`tpu`
with TPU
...
@@ -323,7 +323,7 @@ python run_squad.py \
...
@@ -323,7 +323,7 @@ python run_squad.py \
--learning_rate
=
8e-5
\
--learning_rate
=
8e-5
\
--num_train_epochs
=
2
\
--num_train_epochs
=
2
\
--model_dir
=
${
MODEL_DIR
}
\
--model_dir
=
${
MODEL_DIR
}
\
--strategy
_type
=
tpu
\
--
distribution_
strategy
=
tpu
\
--tpu
=
grpc://
${
TPU_IP_ADDRESS
}
:8470
--tpu
=
grpc://
${
TPU_IP_ADDRESS
}
:8470
```
```
...
...
official/nlp/bert/common_flags.py
View file @
252e6384
...
@@ -22,11 +22,21 @@ from official.utils.flags import core as flags_core
...
@@ -22,11 +22,21 @@ from official.utils.flags import core as flags_core
def
define_common_bert_flags
():
def
define_common_bert_flags
():
"""Define common flags for BERT tasks."""
"""Define common flags for BERT tasks."""
flags_core
.
define_base
(
data_dir
=
False
,
model_dir
=
True
,
clean
=
False
,
train_epochs
=
False
,
epochs_between_evals
=
False
,
stop_threshold
=
False
,
batch_size
=
False
,
num_gpu
=
True
,
hooks
=
False
,
export_dir
=
False
,
distribution_strategy
=
True
,
run_eagerly
=
True
)
flags
.
DEFINE_string
(
'bert_config_file'
,
None
,
flags
.
DEFINE_string
(
'bert_config_file'
,
None
,
'Bert configuration file to define core bert layers.'
)
'Bert configuration file to define core bert layers.'
)
flags
.
DEFINE_string
(
'model_dir'
,
None
,
(
'The directory where the model weights and training/evaluation summaries '
'are stored. If not specified, save to /tmp/bert20/.'
))
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
'model_export_path'
,
None
,
'model_export_path'
,
None
,
'Path to the directory, where trainined model will be '
'Path to the directory, where trainined model will be '
...
@@ -35,11 +45,6 @@ def define_common_bert_flags():
...
@@ -35,11 +45,6 @@ def define_common_bert_flags():
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
'init_checkpoint'
,
None
,
'init_checkpoint'
,
None
,
'Initial checkpoint (usually from a pre-trained BERT model).'
)
'Initial checkpoint (usually from a pre-trained BERT model).'
)
flags
.
DEFINE_enum
(
'strategy_type'
,
'mirror'
,
[
'tpu'
,
'mirror'
,
'multi_worker_mirror'
],
'Distribution Strategy type to use for training. `tpu` uses '
'TPUStrategy for running on TPUs, `mirror` uses GPUs with single host, '
'`multi_worker_mirror` uses CPUs or GPUs with multiple hosts.'
)
flags
.
DEFINE_integer
(
'num_train_epochs'
,
3
,
flags
.
DEFINE_integer
(
'num_train_epochs'
,
3
,
'Total number of training epochs to perform.'
)
'Total number of training epochs to perform.'
)
flags
.
DEFINE_integer
(
flags
.
DEFINE_integer
(
...
@@ -49,9 +54,6 @@ def define_common_bert_flags():
...
@@ -49,9 +54,6 @@ def define_common_bert_flags():
'inside.'
)
'inside.'
)
flags
.
DEFINE_float
(
'learning_rate'
,
5e-5
,
flags
.
DEFINE_float
(
'learning_rate'
,
5e-5
,
'The initial learning rate for Adam.'
)
'The initial learning rate for Adam.'
)
flags
.
DEFINE_boolean
(
'run_eagerly'
,
False
,
'Run the model op by op without building a model function.'
)
flags
.
DEFINE_boolean
(
flags
.
DEFINE_boolean
(
'scale_loss'
,
False
,
'scale_loss'
,
False
,
'Whether to divide the loss by number of replica inside the per-replica '
'Whether to divide the loss by number of replica inside the per-replica '
...
...
official/nlp/bert/run_classifier.py
View file @
252e6384
...
@@ -35,8 +35,8 @@ from official.nlp import optimization
...
@@ -35,8 +35,8 @@ from official.nlp import optimization
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
model_saving_utils
from
official.nlp.bert
import
model_saving_utils
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
tpu_lib
flags
.
DEFINE_enum
(
flags
.
DEFINE_enum
(
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'export_only'
],
'mode'
,
'train_and_eval'
,
[
'train_and_eval'
,
'export_only'
],
...
@@ -350,16 +350,10 @@ def main(_):
...
@@ -350,16 +350,10 @@ def main(_):
if
not
FLAGS
.
model_dir
:
if
not
FLAGS
.
model_dir
:
FLAGS
.
model_dir
=
'/tmp/bert20/'
FLAGS
.
model_dir
=
'/tmp/bert20/'
strategy
=
None
strategy
=
distribution_utils
.
get_distribution_strategy
(
if
FLAGS
.
strategy_type
==
'mirror'
:
distribution_strategy
=
FLAGS
.
distribution_strategy
,
strategy
=
tf
.
distribute
.
MirroredStrategy
()
num_gpus
=
FLAGS
.
num_gpus
,
elif
FLAGS
.
strategy_type
==
'tpu'
:
tpu_address
=
FLAGS
.
tpu
)
cluster_resolver
=
tpu_lib
.
tpu_initialize
(
FLAGS
.
tpu
)
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
(
cluster_resolver
)
else
:
raise
ValueError
(
'The distribution strategy type is not supported: %s'
%
FLAGS
.
strategy_type
)
max_seq_length
=
input_meta_data
[
'max_seq_length'
]
max_seq_length
=
input_meta_data
[
'max_seq_length'
]
train_input_fn
=
get_dataset_fn
(
train_input_fn
=
get_dataset_fn
(
FLAGS
.
train_data_path
,
FLAGS
.
train_data_path
,
...
...
official/nlp/bert/run_pretraining.py
View file @
252e6384
...
@@ -30,6 +30,7 @@ from official.nlp import optimization
...
@@ -30,6 +30,7 @@ from official.nlp import optimization
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
common_flags
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
input_pipeline
from
official.nlp.bert
import
model_saving_utils
from
official.nlp.bert
import
model_saving_utils
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
tpu_lib
from
official.utils.misc
import
tpu_lib
flags
.
DEFINE_string
(
'input_files'
,
None
,
flags
.
DEFINE_string
(
'input_files'
,
None
,
...
@@ -172,15 +173,10 @@ def main(_):
...
@@ -172,15 +173,10 @@ def main(_):
if
not
FLAGS
.
model_dir
:
if
not
FLAGS
.
model_dir
:
FLAGS
.
model_dir
=
'/tmp/bert20/'
FLAGS
.
model_dir
=
'/tmp/bert20/'
strategy
=
None
strategy
=
distribution_utils
.
get_distribution_strategy
(
if
FLAGS
.
strategy_type
==
'mirror'
:
distribution_strategy
=
FLAGS
.
distribution_strategy
,
strategy
=
tf
.
distribute
.
MirroredStrategy
()
num_gpus
=
FLAGS
.
num_gpus
,
elif
FLAGS
.
strategy_type
==
'tpu'
:
tpu_address
=
FLAGS
.
tpu
)
cluster_resolver
=
tpu_lib
.
tpu_initialize
(
FLAGS
.
tpu
)
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
(
cluster_resolver
)
else
:
raise
ValueError
(
'The distribution strategy type is not supported: %s'
%
FLAGS
.
strategy_type
)
if
strategy
:
if
strategy
:
print
(
'***** Number of cores used : '
,
strategy
.
num_replicas_in_sync
)
print
(
'***** Number of cores used : '
,
strategy
.
num_replicas_in_sync
)
...
...
official/nlp/bert/run_squad.py
View file @
252e6384
...
@@ -36,6 +36,7 @@ from official.nlp.bert import input_pipeline
...
@@ -36,6 +36,7 @@ from official.nlp.bert import input_pipeline
from
official.nlp.bert
import
model_saving_utils
from
official.nlp.bert
import
model_saving_utils
from
official.nlp.bert
import
squad_lib
from
official.nlp.bert
import
squad_lib
from
official.nlp.bert
import
tokenization
from
official.nlp.bert
import
tokenization
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
keras_utils
from
official.utils.misc
import
tpu_lib
from
official.utils.misc
import
tpu_lib
...
@@ -386,17 +387,10 @@ def main(_):
...
@@ -386,17 +387,10 @@ def main(_):
export_squad
(
FLAGS
.
model_export_path
,
input_meta_data
)
export_squad
(
FLAGS
.
model_export_path
,
input_meta_data
)
return
return
strategy
=
None
strategy
=
distribution_utils
.
get_distribution_strategy
(
if
FLAGS
.
strategy_type
==
'mirror'
:
distribution_strategy
=
FLAGS
.
distribution_strategy
,
strategy
=
tf
.
distribute
.
MirroredStrategy
()
num_gpus
=
FLAGS
.
num_gpus
,
elif
FLAGS
.
strategy_type
==
'multi_worker_mirror'
:
tpu_address
=
FLAGS
.
tpu
)
strategy
=
tf
.
distribute
.
experimental
.
MultiWorkerMirroredStrategy
()
elif
FLAGS
.
strategy_type
==
'tpu'
:
cluster_resolver
=
tpu_lib
.
tpu_initialize
(
FLAGS
.
tpu
)
strategy
=
tf
.
distribute
.
experimental
.
TPUStrategy
(
cluster_resolver
)
else
:
raise
ValueError
(
'The distribution strategy type is not supported: %s'
%
FLAGS
.
strategy_type
)
if
FLAGS
.
mode
in
(
'train'
,
'train_and_predict'
):
if
FLAGS
.
mode
in
(
'train'
,
'train_and_predict'
):
train_squad
(
strategy
,
input_meta_data
)
train_squad
(
strategy
,
input_meta_data
)
if
FLAGS
.
mode
in
(
'predict'
,
'train_and_predict'
):
if
FLAGS
.
mode
in
(
'predict'
,
'train_and_predict'
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment