Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
2741cc5f
Commit
2741cc5f
authored
Jan 30, 2020
by
A. Unique TensorFlower
Browse files
BERT enable gpu thread mode as "gpu_private" for 8-GPU runs.
PiperOrigin-RevId: 292369407
parent
2c48c0dd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
0 deletions
+22
-0
official/benchmark/bert_squad_benchmark.py
official/benchmark/bert_squad_benchmark.py
+20
-0
official/nlp/bert/common_flags.py
official/nlp/bert/common_flags.py
+2
-0
No files found.
official/benchmark/bert_squad_benchmark.py
View file @
2741cc5f
...
@@ -32,6 +32,7 @@ from official.benchmark import bert_benchmark_utils as benchmark_utils
...
@@ -32,6 +32,7 @@ from official.benchmark import bert_benchmark_utils as benchmark_utils
from
official.benchmark
import
squad_evaluate_v1_1
from
official.benchmark
import
squad_evaluate_v1_1
from
official.nlp.bert
import
run_squad
from
official.nlp.bert
import
run_squad
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
distribution_utils
from
official.utils.misc
import
keras_utils
from
official.utils.testing
import
benchmark_wrappers
from
official.utils.testing
import
benchmark_wrappers
...
@@ -90,10 +91,22 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
...
@@ -90,10 +91,22 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
distribution_strategy
=
'mirrored'
if
use_ds
else
'off'
,
distribution_strategy
=
'mirrored'
if
use_ds
else
'off'
,
num_gpus
=
self
.
num_gpus
)
num_gpus
=
self
.
num_gpus
)
def
_init_gpu_and_data_threads
(
self
):
"""Set env variables before any TF calls."""
if
FLAGS
.
tf_gpu_thread_mode
:
keras_utils
.
set_gpu_thread_mode_and_count
(
per_gpu_thread_count
=
FLAGS
.
per_gpu_thread_count
,
gpu_thread_mode
=
FLAGS
.
tf_gpu_thread_mode
,
num_gpus
=
self
.
num_gpus
,
datasets_num_private_threads
=
FLAGS
.
datasets_num_private_threads
)
@
flagsaver
.
flagsaver
@
flagsaver
.
flagsaver
def
_train_squad
(
self
,
use_ds
=
True
,
run_eagerly
=
False
):
def
_train_squad
(
self
,
use_ds
=
True
,
run_eagerly
=
False
):
"""Runs BERT SQuAD training."""
"""Runs BERT SQuAD training."""
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
self
.
_init_gpu_and_data_threads
()
input_meta_data
=
self
.
_read_input_meta_data_from_file
()
input_meta_data
=
self
.
_read_input_meta_data_from_file
()
strategy
=
self
.
_get_distribution_strategy
(
use_ds
)
strategy
=
self
.
_get_distribution_strategy
(
use_ds
)
...
@@ -107,6 +120,7 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
...
@@ -107,6 +120,7 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
def
_evaluate_squad
(
self
,
use_ds
=
True
):
def
_evaluate_squad
(
self
,
use_ds
=
True
):
"""Runs BERT SQuAD evaluation."""
"""Runs BERT SQuAD evaluation."""
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
assert
tf
.
version
.
VERSION
.
startswith
(
'2.'
)
self
.
_init_gpu_and_data_threads
()
input_meta_data
=
self
.
_read_input_meta_data_from_file
()
input_meta_data
=
self
.
_read_input_meta_data_from_file
()
strategy
=
self
.
_get_distribution_strategy
(
use_ds
)
strategy
=
self
.
_get_distribution_strategy
(
use_ds
)
...
@@ -231,6 +245,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
...
@@ -231,6 +245,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self
.
num_gpus
=
8
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
train_batch_size
=
32
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
...
@@ -292,6 +307,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
...
@@ -292,6 +307,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS
.
train_batch_size
=
32
FLAGS
.
train_batch_size
=
32
FLAGS
.
dtype
=
'fp16'
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
...
@@ -328,6 +344,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
...
@@ -328,6 +344,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS
.
train_batch_size
=
32
FLAGS
.
train_batch_size
=
32
FLAGS
.
dtype
=
'fp16'
FLAGS
.
dtype
=
'fp16'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
fp16_implementation
=
'graph_rewrite'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
...
@@ -400,6 +417,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
...
@@ -400,6 +417,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self
.
num_gpus
=
8
self
.
num_gpus
=
8
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad'
)
FLAGS
.
train_batch_size
=
24
FLAGS
.
train_batch_size
=
24
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
...
@@ -412,6 +430,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
...
@@ -412,6 +430,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
FLAGS
.
train_batch_size
=
32
FLAGS
.
train_batch_size
=
32
FLAGS
.
dtype
=
'fp16'
FLAGS
.
dtype
=
'fp16'
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
loss_scale
=
'dynamic'
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
...
@@ -423,6 +442,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
...
@@ -423,6 +442,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad_xla'
)
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
'benchmark_8_gpu_squad_xla'
)
FLAGS
.
train_batch_size
=
32
FLAGS
.
train_batch_size
=
32
FLAGS
.
enable_xla
=
True
FLAGS
.
enable_xla
=
True
FLAGS
.
tf_gpu_thread_mode
=
'gpu_private'
self
.
_run_and_report_benchmark
()
self
.
_run_and_report_benchmark
()
...
...
official/nlp/bert/common_flags.py
View file @
2741cc5f
...
@@ -83,6 +83,8 @@ def define_common_bert_flags():
...
@@ -83,6 +83,8 @@ def define_common_bert_flags():
loss_scale
=
True
,
loss_scale
=
True
,
all_reduce_alg
=
True
,
all_reduce_alg
=
True
,
num_packs
=
False
,
num_packs
=
False
,
tf_gpu_thread_mode
=
True
,
datasets_num_private_threads
=
True
,
enable_xla
=
True
,
enable_xla
=
True
,
fp16_implementation
=
True
,
fp16_implementation
=
True
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment