Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
f276d472
Commit
f276d472
authored
Apr 01, 2020
by
Allen Wang
Committed by
A. Unique TensorFlower
Apr 01, 2020
Browse files
Internal change
PiperOrigin-RevId: 304242451
parent
a3be7365
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
692 additions
and
59 deletions
+692
-59
official/benchmark/keras_imagenet_benchmark.py
official/benchmark/keras_imagenet_benchmark.py
+548
-14
official/modeling/hyperparams/base_config.py
official/modeling/hyperparams/base_config.py
+9
-2
official/vision/image_classification/callbacks.py
official/vision/image_classification/callbacks.py
+18
-7
official/vision/image_classification/classifier_trainer.py
official/vision/image_classification/classifier_trainer.py
+60
-27
official/vision/image_classification/classifier_trainer_test.py
...al/vision/image_classification/classifier_trainer_test.py
+11
-4
official/vision/image_classification/configs/base_configs.py
official/vision/image_classification/configs/base_configs.py
+15
-1
official/vision/image_classification/configs/configs.py
official/vision/image_classification/configs/configs.py
+2
-0
official/vision/image_classification/dataset_factory.py
official/vision/image_classification/dataset_factory.py
+27
-3
official/vision/image_classification/resnet/common.py
official/vision/image_classification/resnet/common.py
+2
-1
No files found.
official/benchmark/keras_imagenet_benchmark.py
View file @
f276d472
# Lint as: python3
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -15,14 +16,18 @@
"""Executes Keras benchmarks and accuracy tests."""
from
__future__
import
print_function
import
json
import
os
import
time
from
typing
import
Any
,
MutableMapping
,
Optional
from
absl
import
flags
import
tensorflow
as
tf
# pylint: disable=g-bad-import-order
from
official.benchmark
import
keras_benchmark
from
official.utils.testing
import
benchmark_wrappers
from
official.vision.image_classification
import
classifier_trainer
from
official.vision.image_classification.resnet
import
resnet_imagenet_main
MIN_TOP_1_ACCURACY
=
0.76
...
...
@@ -41,6 +46,62 @@ MODEL_OPTIMIZATION_TOP_1_ACCURACY = {
FLAGS
=
flags
.
FLAGS
def
_get_classifier_parameters
(
num_gpus
:
int
=
0
,
builder
:
str
=
'records'
,
skip_eval
:
bool
=
False
,
distribution_strategy
:
str
=
'mirrored'
,
per_replica_batch_size
:
int
=
128
,
epochs
:
int
=
90
,
steps
:
int
=
0
,
epochs_between_evals
:
int
=
1
,
dtype
:
str
=
'float32'
,
enable_xla
:
bool
=
False
,
run_eagerly
:
bool
=
False
,
gpu_thread_mode
:
Optional
[
str
]
=
None
,
dataset_num_private_threads
:
Optional
[
int
]
=
None
,
loss_scale
:
Optional
[
str
]
=
None
)
->
MutableMapping
[
str
,
Any
]:
"""Gets classifier trainer's ResNet parameters."""
return
{
'runtime'
:
{
'num_gpus'
:
num_gpus
,
'distribution_strategy'
:
distribution_strategy
,
'run_eagerly'
:
run_eagerly
,
'enable_xla'
:
enable_xla
,
'dataset_num_private_threads'
:
dataset_num_private_threads
,
'gpu_thread_mode'
:
gpu_thread_mode
,
'loss_scale'
:
loss_scale
,
},
'train_dataset'
:
{
'builder'
:
builder
,
'use_per_replica_batch_size'
:
True
,
'batch_size'
:
per_replica_batch_size
,
'image_size'
:
224
,
'dtype'
:
dtype
,
},
'validation_dataset'
:
{
'builder'
:
builder
,
'batch_size'
:
per_replica_batch_size
,
'use_per_replica_batch_size'
:
True
,
'image_size'
:
224
,
'dtype'
:
dtype
,
},
'train'
:
{
'epochs'
:
epochs
,
'steps'
:
steps
,
'callbacks'
:
{
'enable_tensorboard'
:
False
,
'enable_checkpoint_and_export'
:
False
,
'enable_time_history'
:
True
,
},
},
'evaluation'
:
{
'epochs_between_evals'
:
epochs_between_evals
,
'skip_eval'
:
skip_eval
,
},
}
class
Resnet50KerasAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Benchmark accuracy tests for ResNet50 in Keras."""
...
...
@@ -160,6 +221,136 @@ class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
class
Resnet50KerasClassifierAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Benchmark accuracy tests for ResNet50 in Keras."""
# TODO(allencwang) - Create the benchmark for eager, dist_strat, 8 GPUs and
# automatic mixed precision once Graph rewrite is supported.
def
__init__
(
self
,
output_dir
:
Optional
[
str
]
=
None
,
root_data_dir
:
Optional
[
str
]
=
None
,
**
kwargs
):
"""A benchmark class.
Args:
output_dir: directory where to output e.g. log files
root_data_dir: directory under which to look for dataset
**kwargs: arbitrary named arguments. This is needed to make the
constructor forward compatible in case PerfZero provides more
named arguments before updating the constructor.
"""
flag_methods
=
[
classifier_trainer
.
define_classifier_flags
]
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
super
(
Resnet50KerasClassifierAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
experiment_name
:
str
,
top_1_min
:
float
=
MIN_TOP_1_ACCURACY
,
top_1_max
:
float
=
MAX_TOP_1_ACCURACY
,
num_gpus
:
int
=
0
,
distribution_strategy
:
str
=
'mirrored'
,
per_replica_batch_size
:
int
=
128
,
epochs
:
int
=
90
,
steps
:
int
=
0
,
epochs_between_evals
:
int
=
1
,
dtype
:
str
=
'float32'
,
enable_xla
:
bool
=
False
,
run_eagerly
:
bool
=
False
,
gpu_thread_mode
:
Optional
[
str
]
=
None
,
dataset_num_private_threads
:
Optional
[
int
]
=
None
,
loss_scale
:
Optional
[
str
]
=
None
):
"""Runs and reports the benchmark given the provided configuration."""
self
.
_setup
()
FLAGS
.
model_type
=
'resnet'
FLAGS
.
dataset
=
'imagenet'
FLAGS
.
mode
=
'train_and_eval'
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
experiment_name
)
parameters
=
_get_classifier_parameters
(
num_gpus
=
num_gpus
,
distribution_strategy
=
distribution_strategy
,
per_replica_batch_size
=
per_replica_batch_size
,
epochs
=
epochs
,
steps
=
steps
,
epochs_between_evals
=
epochs_between_evals
,
dtype
=
dtype
,
enable_xla
=
enable_xla
,
run_eagerly
=
run_eagerly
,
gpu_thread_mode
=
gpu_thread_mode
,
dataset_num_private_threads
=
dataset_num_private_threads
,
loss_scale
=
loss_scale
)
FLAGS
.
params_override
=
json
.
dumps
(
parameters
)
total_batch_size
=
num_gpus
*
per_replica_batch_size
start_time_sec
=
time
.
time
()
stats
=
classifier_trainer
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet50KerasClassifierAccuracy
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
top_1_min
=
top_1_min
,
top_1_max
=
top_1_max
,
total_batch_size
=
total_batch_size
,
log_steps
=
100
)
def
benchmark_8_gpu
(
self
):
"""Tests Keras model with eager, dist_strat and 8 GPUs."""
self
.
_run_benchmark
(
experiment_name
=
'benchmark_8_gpu'
,
num_gpus
=
8
,
per_replica_batch_size
=
128
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float32'
,
dataset_num_private_threads
=
14
)
def
benchmark_8_gpu_fp16
(
self
):
"""Tests Keras model with eager, dist_strat, 8 GPUs, and fp16."""
self
.
_run_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16'
,
num_gpus
=
8
,
per_replica_batch_size
=
256
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float16'
,
gpu_thread_mode
=
'gpu_private'
)
def
benchmark_xla_8_gpu_fp16
(
self
):
"""Tests Keras model with XLA, eager, dist_strat, 8 GPUs and fp16."""
self
.
_run_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16'
,
num_gpus
=
8
,
per_replica_batch_size
=
256
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float16'
,
enable_xla
=
True
,
gpu_thread_mode
=
'gpu_private'
)
def
benchmark_xla_8_gpu_fp16_dynamic
(
self
):
"""Tests Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
self
.
_run_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_dynamic'
,
top_1_min
=
0.736
,
num_gpus
=
8
,
per_replica_batch_size
=
256
,
epochs
=
90
,
epochs_between_evals
=
10
,
dtype
=
'float16'
,
loss_scale
=
'dynamic'
,
gpu_thread_mode
=
'gpu_private'
)
def
_get_model_dir
(
self
,
folder_name
):
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
class
MobilenetV1KerasAccuracy
(
keras_benchmark
.
KerasBenchmark
):
"""Benchmark accuracy tests for MobilenetV1 in Keras."""
...
...
@@ -221,6 +412,352 @@ class MobilenetV1KerasAccuracy(keras_benchmark.KerasBenchmark):
return
os
.
path
.
join
(
self
.
output_dir
,
folder_name
)
class
Resnet50KerasClassifierBenchmarkBase
(
keras_benchmark
.
KerasBenchmark
):
"""Resnet50 (classifier_trainer) benchmarks."""
def
__init__
(
self
,
output_dir
=
None
,
default_flags
=
None
,
tpu
=
None
,
dataset_builder
=
'records'
,
train_epochs
=
1
,
train_steps
=
110
,
data_dir
=
None
):
flag_methods
=
[
classifier_trainer
.
define_classifier_flags
]
self
.
dataset_builder
=
dataset_builder
self
.
train_epochs
=
train_epochs
self
.
train_steps
=
train_steps
self
.
data_dir
=
data_dir
super
(
Resnet50KerasClassifierBenchmarkBase
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
,
default_flags
=
default_flags
,
tpu
=
tpu
)
@
benchmark_wrappers
.
enable_runtime_flags
def
_run_and_report_benchmark
(
self
,
experiment_name
:
str
,
skip_steps
:
Optional
[
int
]
=
None
,
top_1_min
:
float
=
MIN_TOP_1_ACCURACY
,
top_1_max
:
float
=
MAX_TOP_1_ACCURACY
,
num_gpus
:
int
=
0
,
distribution_strategy
:
str
=
'mirrored'
,
per_replica_batch_size
:
int
=
128
,
epochs_between_evals
:
int
=
1
,
dtype
:
str
=
'float32'
,
enable_xla
:
bool
=
False
,
run_eagerly
:
bool
=
False
,
gpu_thread_mode
:
Optional
[
str
]
=
None
,
dataset_num_private_threads
:
Optional
[
int
]
=
None
,
loss_scale
:
Optional
[
str
]
=
None
):
"""Runs and reports the benchmark given the provided configuration."""
self
.
_setup
()
FLAGS
.
model_type
=
'resnet'
FLAGS
.
dataset
=
'imagenet'
FLAGS
.
mode
=
'train_and_eval'
FLAGS
.
data_dir
=
self
.
data_dir
FLAGS
.
model_dir
=
self
.
_get_model_dir
(
experiment_name
)
parameters
=
_get_classifier_parameters
(
builder
=
self
.
dataset_builder
,
skip_eval
=
True
,
num_gpus
=
num_gpus
,
distribution_strategy
=
distribution_strategy
,
per_replica_batch_size
=
per_replica_batch_size
,
epochs
=
self
.
train_epochs
,
steps
=
self
.
train_steps
,
epochs_between_evals
=
epochs_between_evals
,
dtype
=
dtype
,
enable_xla
=
enable_xla
,
gpu_thread_mode
=
gpu_thread_mode
,
dataset_num_private_threads
=
dataset_num_private_threads
,
loss_scale
=
loss_scale
)
FLAGS
.
params_override
=
json
.
dumps
(
parameters
)
total_batch_size
=
num_gpus
*
per_replica_batch_size
start_time_sec
=
time
.
time
()
stats
=
classifier_trainer
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
# Number of logged step time entries that are excluded in performance
# report. We keep results from last 100 batches, or skip the steps based on
# input skip_steps.
warmup
=
(
skip_steps
or
(
self
.
train_steps
-
100
))
//
FLAGS
.
log_steps
super
(
Resnet50KerasClassifierBenchmarkBase
,
self
).
_report_benchmark
(
stats
,
wall_time_sec
,
total_batch_size
=
total_batch_size
,
log_steps
=
FLAGS
.
log_steps
,
warmup
=
warmup
,
start_time_sec
=
start_time_sec
)
def
benchmark_1_gpu_no_dist_strat
(
self
):
"""Tests Keras model with 1 GPU, no distribution strategy."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_no_dist_strat'
,
num_gpus
=
1
,
distribution_strategy
=
'off'
,
per_replica_batch_size
=
128
)
def
benchmark_1_gpu_no_dist_strat_run_eagerly
(
self
):
"""Tests Keras model with 1 GPU, no distribution strategy, run eagerly."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_no_dist_strat_run_eagerly'
,
num_gpus
=
1
,
run_eagerly
=
True
,
distribution_strategy
=
'off'
,
per_replica_batch_size
=
64
)
def
benchmark_1_gpu_no_dist_strat_run_eagerly_fp16
(
self
):
"""Tests with 1 GPU, no distribution strategy, fp16, run eagerly."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16'
,
num_gpus
=
1
,
run_eagerly
=
True
,
distribution_strategy
=
'off'
,
dtype
=
'float16'
,
per_replica_batch_size
=
128
)
def
benchmark_1_gpu
(
self
):
"""Tests Keras model with 1 GPU."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu'
,
num_gpus
=
1
,
distribution_strategy
=
'one_device'
,
per_replica_batch_size
=
128
)
def
benchmark_xla_1_gpu
(
self
):
"""Tests Keras model with XLA and 1 GPU."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
per_replica_batch_size
=
128
)
self
.
_setup
()
def
benchmark_1_gpu_fp16
(
self
):
"""Tests Keras model with 1 GPU and fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_fp16'
,
num_gpus
=
1
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
)
def
benchmark_1_gpu_fp16_dynamic
(
self
):
"""Tests Keras model with 1 GPU, fp16, and dynamic loss scaling."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_1_gpu_fp16_dynamic'
,
num_gpus
=
1
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
,
loss_scale
=
'dynamic'
)
def
benchmark_xla_1_gpu_fp16
(
self
):
"""Tests Keras model with XLA, 1 GPU and fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu_fp16'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
)
def
benchmark_xla_1_gpu_fp16_tweaked
(
self
):
"""Tests Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu_fp16_tweaked'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
)
def
benchmark_xla_1_gpu_fp16_dynamic
(
self
):
"""Tests Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_1_gpu_fp16_dynamic'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
dtype
=
'float16'
,
per_replica_batch_size
=
256
,
loss_scale
=
'dynamic'
)
def
benchmark_graph_1_gpu
(
self
):
"""Tests Keras model in legacy graph mode with 1 GPU."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_graph_1_gpu'
,
num_gpus
=
1
,
distribution_strategy
=
'one_device'
,
per_replica_batch_size
=
128
)
def
benchmark_graph_xla_1_gpu
(
self
):
"""Tests Keras model in legacy graph mode with XLA and 1 GPU."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_graph_xla_1_gpu'
,
num_gpus
=
1
,
enable_xla
=
True
,
distribution_strategy
=
'one_device'
,
per_replica_batch_size
=
128
)
def
benchmark_8_gpu
(
self
):
"""Tests Keras model with 8 GPUs."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu'
,
num_gpus
=
8
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
)
def
benchmark_8_gpu_tweaked
(
self
):
"""Tests Keras model with manual config tuning and 8 GPUs."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_tweaked'
,
num_gpus
=
8
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
,
dataset_num_private_threads
=
14
)
def
benchmark_xla_8_gpu
(
self
):
"""Tests Keras model with XLA and 8 GPUs."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
)
def
benchmark_xla_8_gpu_tweaked
(
self
):
"""Tests Keras model with manual config tuning, 8 GPUs, and XLA."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_tweaked'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
,
gpu_thread_mode
=
'gpu_private'
,
dataset_num_private_threads
=
24
)
def
benchmark_8_gpu_fp16
(
self
):
"""Tests Keras model with 8 GPUs and fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16'
,
num_gpus
=
8
,
dtype
=
'float16'
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
)
def
benchmark_8_gpu_fp16_tweaked
(
self
):
"""Tests Keras model with 8 GPUs, fp16, and manual config tuning."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16_tweaked'
,
num_gpus
=
8
,
dtype
=
'float16'
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
)
def
benchmark_8_gpu_fp16_dynamic_tweaked
(
self
):
"""Tests Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_8_gpu_fp16_dynamic_tweaked'
,
num_gpus
=
8
,
dtype
=
'float16'
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
loss_scale
=
'dynamic'
,
gpu_thread_mode
=
'gpu_private'
)
def
benchmark_xla_8_gpu_fp16
(
self
):
"""Tests Keras model with XLA, 8 GPUs and fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
)
def
benchmark_xla_8_gpu_fp16_tweaked
(
self
):
"""Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_tweaked'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
,
dataset_num_private_threads
=
48
)
def
benchmark_xla_8_gpu_fp16_tweaked_delay_measure
(
self
):
"""Tests with manual config tuning, XLA, 8 GPUs and fp16.
Delay performance measurement for stable performance on 96 vCPU platforms.
"""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_tweaked_delay_measure'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
,
steps
=
310
)
def
benchmark_xla_8_gpu_fp16_dynamic_tweaked
(
self
):
"""Tests Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_xla_8_gpu_fp16_dynamic_tweaked'
,
dtype
=
'float16'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
256
,
gpu_thread_mode
=
'gpu_private'
,
loss_scale
=
'dynamic'
,
dataset_num_private_threads
=
48
)
def
benchmark_graph_8_gpu
(
self
):
"""Tests Keras model in legacy graph mode with 8 GPUs."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_graph_8_gpu'
,
num_gpus
=
8
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
)
def
benchmark_graph_xla_8_gpu
(
self
):
"""Tests Keras model in legacy graph mode with XLA and 8 GPUs."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_graph_xla_8_gpu'
,
num_gpus
=
8
,
enable_xla
=
True
,
distribution_strategy
=
'mirrored'
,
per_replica_batch_size
=
128
)
def
benchmark_2x2_tpu_fp16
(
self
):
"""Test Keras model with 2x2 TPU, fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_2x2_tpu_fp16'
,
dtype
=
'bfloat16'
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
benchmark_4x4_tpu_fp16
(
self
):
"""Test Keras model with 4x4 TPU, fp16."""
self
.
_run_and_report_benchmark
(
experiment_name
=
'benchmark_4x4_tpu_fp16'
,
dtype
=
'bfloat16'
,
distribution_strategy
=
'tpu'
,
per_replica_batch_size
=
128
)
def
fill_report_object
(
self
,
stats
):
super
(
Resnet50KerasClassifierBenchmarkBase
,
self
).
fill_report_object
(
stats
,
total_batch_size
=
FLAGS
.
batch_size
,
log_steps
=
FLAGS
.
log_steps
)
class
Resnet50KerasBenchmarkBase
(
keras_benchmark
.
KerasBenchmark
):
"""Resnet50 benchmarks."""
...
...
@@ -842,34 +1379,31 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
log_steps
=
FLAGS
.
log_steps
)
class
Resnet50KerasBenchmarkSynth
(
Resnet50KerasBenchmarkBase
):
class
Resnet50KerasBenchmarkSynth
(
Resnet50Keras
Classifier
BenchmarkBase
):
"""Resnet50 synthetic benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
tpu
=
None
,
**
kwargs
):
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'report_accuracy_metrics'
]
=
False
def_flags
[
'use_synthetic_data'
]
=
True
def_flags
[
'train_steps'
]
=
110
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50KerasBenchmarkSynth
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
,
tpu
=
tpu
)
output_dir
=
output_dir
,
default_flags
=
def_flags
,
tpu
=
tpu
,
dataset_builder
=
'synthetic'
,
train_epochs
=
1
,
train_steps
=
110
)
class
Resnet50KerasBenchmarkReal
(
Resnet50KerasBenchmarkBase
):
class
Resnet50KerasBenchmarkReal
(
Resnet50Keras
Classifier
BenchmarkBase
):
"""Resnet50 real data benchmark tests."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
tpu
=
None
,
**
kwargs
):
data_dir
=
(
'/readahead/200M/placer/prod/home/distbelief/'
'imagenet-tensorflow/imagenet-2012-tfrecord'
)
def_flags
=
{}
def_flags
[
'skip_eval'
]
=
True
def_flags
[
'report_accuracy_metrics'
]
=
False
def_flags
[
'data_dir'
]
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
def_flags
[
'train_steps'
]
=
110
def_flags
[
'log_steps'
]
=
10
super
(
Resnet50KerasBenchmarkReal
,
self
).
__init__
(
output_dir
=
output_dir
,
default_flags
=
def_flags
,
tpu
=
tpu
)
output_dir
=
output_dir
,
default_flags
=
def_flags
,
tpu
=
tpu
,
dataset_builder
=
'records'
,
train_epochs
=
1
,
train_steps
=
110
,
data_dir
=
data_dir
)
class
Resnet50KerasBenchmarkRemoteData
(
Resnet50KerasBenchmarkBase
):
...
...
@@ -1245,7 +1779,7 @@ class Resnet50MultiWorkerKerasAccuracy(keras_benchmark.KerasBenchmark):
"""Resnet50 distributed accuracy tests with multiple workers."""
def
__init__
(
self
,
output_dir
=
None
,
root_data_dir
=
None
,
**
kwargs
):
flag_methods
=
[
resnet_imagenet_m
ain
.
define_imagenet_keras_flags
]
flag_methods
=
[
classifier_tr
ain
er
.
define_imagenet_keras_flags
]
self
.
data_dir
=
os
.
path
.
join
(
root_data_dir
,
'imagenet'
)
super
(
Resnet50MultiWorkerKerasAccuracy
,
self
).
__init__
(
output_dir
=
output_dir
,
flag_methods
=
flag_methods
)
...
...
@@ -1278,7 +1812,7 @@ class Resnet50MultiWorkerKerasAccuracy(keras_benchmark.KerasBenchmark):
top_1_min
=
MIN_TOP_1_ACCURACY
,
top_1_max
=
MAX_TOP_1_ACCURACY
):
start_time_sec
=
time
.
time
()
stats
=
resnet_imagenet_m
ain
.
run
(
flags
.
FLAGS
)
stats
=
classifier_tr
ain
er
.
run
(
flags
.
FLAGS
)
wall_time_sec
=
time
.
time
()
-
start_time_sec
super
(
Resnet50MultiWorkerKerasAccuracy
,
self
).
_report_benchmark
(
...
...
official/modeling/hyperparams/base_config.py
View file @
f276d472
...
...
@@ -257,7 +257,6 @@ class RuntimeConfig(Config):
Attributes:
distribution_strategy: e.g. 'mirrored', 'tpu', etc.
enable_eager: Whether or not to enable eager mode.
enable_xla: Whether or not to enable XLA.
per_gpu_thread_count: thread count per GPU.
gpu_threads_enabled: Whether or not GPU threads are enabled.
...
...
@@ -272,9 +271,12 @@ class RuntimeConfig(Config):
all_reduce_alg: Defines the algorithm for performing all-reduce.
num_packs: Sets `num_packs` in the cross device ops used in
MirroredStrategy. For details, see tf.distribute.NcclAllReduce.
loss_scale: The type of loss scale. This is used when setting the mixed
precision policy.
run_eagerly: Whether or not to run the experiment eagerly.
"""
distribution_strategy
:
str
=
'mirrored'
enable_eager
:
bool
=
False
enable_xla
:
bool
=
False
gpu_threads_enabled
:
bool
=
False
gpu_thread_mode
:
Optional
[
str
]
=
None
...
...
@@ -286,6 +288,8 @@ class RuntimeConfig(Config):
task_index
:
int
=
-
1
all_reduce_alg
:
Optional
[
str
]
=
None
num_packs
:
int
=
1
loss_scale
:
Optional
[
str
]
=
None
run_eagerly
:
bool
=
False
@
dataclasses
.
dataclass
...
...
@@ -312,7 +316,10 @@ class CallbacksConfig(Config):
Callback. Defaults to True.
enable_tensorboard: Whether or not to enable Tensorboard as a Callback.
Defaults to True.
enable_time_history: Whether or not to enable TimeHistory Callbacks.
Defaults to True.
"""
enable_checkpoint_and_export
:
bool
=
True
enable_tensorboard
:
bool
=
True
enable_time_history
:
bool
=
True
official/vision/image_classification/callbacks.py
View file @
f276d472
# Lint as: python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -22,15 +23,20 @@ import os
from
absl
import
logging
import
tensorflow
as
tf
from
typing
import
Any
,
List
,
MutableMapping
,
Text
from
typing
import
Any
,
List
,
MutableMapping
from
official.utils.misc
import
keras_utils
def
get_callbacks
(
model_checkpoint
:
bool
=
True
,
include_tensorboard
:
bool
=
True
,
time_history
:
bool
=
True
,
track_lr
:
bool
=
True
,
write_model_weights
:
bool
=
True
,
initial_step
:
int
=
0
,
model_dir
:
Text
=
None
)
->
List
[
tf
.
keras
.
callbacks
.
Callback
]:
batch_size
:
int
=
0
,
log_steps
:
int
=
0
,
model_dir
:
str
=
None
)
->
List
[
tf
.
keras
.
callbacks
.
Callback
]:
"""Get all callbacks."""
model_dir
=
model_dir
or
''
callbacks
=
[]
...
...
@@ -44,6 +50,11 @@ def get_callbacks(model_checkpoint: bool = True,
track_lr
=
track_lr
,
initial_step
=
initial_step
,
write_images
=
write_model_weights
))
if
time_history
:
callbacks
.
append
(
keras_utils
.
TimeHistory
(
batch_size
,
log_steps
,
logdir
=
model_dir
if
include_tensorboard
else
None
))
return
callbacks
...
...
@@ -74,7 +85,7 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
# classification loss
def
__init__
(
self
,
log_dir
:
Text
,
log_dir
:
str
,
track_lr
:
bool
=
False
,
initial_step
:
int
=
0
,
**
kwargs
):
...
...
@@ -84,7 +95,7 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
def
on_batch_begin
(
self
,
epoch
:
int
,
logs
:
MutableMapping
[
Text
,
Any
]
=
None
)
->
None
:
logs
:
MutableMapping
[
str
,
Any
]
=
None
)
->
None
:
self
.
step
+=
1
if
logs
is
None
:
logs
=
{}
...
...
@@ -93,7 +104,7 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
def
on_epoch_begin
(
self
,
epoch
:
int
,
logs
:
MutableMapping
[
Text
,
Any
]
=
None
)
->
None
:
logs
:
MutableMapping
[
str
,
Any
]
=
None
)
->
None
:
if
logs
is
None
:
logs
=
{}
metrics
=
self
.
_calculate_metrics
()
...
...
@@ -104,14 +115,14 @@ class CustomTensorBoard(tf.keras.callbacks.TensorBoard):
def
on_epoch_end
(
self
,
epoch
:
int
,
logs
:
MutableMapping
[
Text
,
Any
]
=
None
)
->
None
:
logs
:
MutableMapping
[
str
,
Any
]
=
None
)
->
None
:
if
logs
is
None
:
logs
=
{}
metrics
=
self
.
_calculate_metrics
()
logs
.
update
(
metrics
)
super
(
CustomTensorBoard
,
self
).
on_epoch_end
(
epoch
,
logs
)
def
_calculate_metrics
(
self
)
->
MutableMapping
[
Text
,
Any
]:
def
_calculate_metrics
(
self
)
->
MutableMapping
[
str
,
Any
]:
logs
=
{}
if
self
.
_track_lr
:
logs
[
'learning_rate'
]
=
self
.
_calculate_lr
()
...
...
official/vision/image_classification/classifier_trainer.py
View file @
f276d472
...
...
@@ -44,10 +44,24 @@ from official.vision.image_classification.efficientnet import efficientnet_model
from
official.vision.image_classification.resnet
import
common
from
official.vision.image_classification.resnet
import
resnet_model
MODELS
=
{
'efficientnet'
:
efficientnet_model
.
EfficientNet
.
from_name
,
'resnet'
:
resnet_model
.
resnet50
,
}
def
get_models
()
->
Mapping
[
str
,
tf
.
keras
.
Model
]:
"""Returns the mapping from model type name to Keras model."""
return
{
'efficientnet'
:
efficientnet_model
.
EfficientNet
.
from_name
,
'resnet'
:
resnet_model
.
resnet50
,
}
def
get_dtype_map
()
->
Mapping
[
str
,
tf
.
dtypes
.
DType
]:
"""Returns the mapping from dtype string representations to TF dtypes."""
return
{
'float32'
:
tf
.
float32
,
'bfloat16'
:
tf
.
bfloat16
,
'float16'
:
tf
.
float16
,
'fp32'
:
tf
.
float32
,
'bf16'
:
tf
.
bfloat16
,
}
def
_get_metrics
(
one_hot
:
bool
)
->
Mapping
[
Text
,
Any
]:
...
...
@@ -120,7 +134,7 @@ def _get_dataset_builders(params: base_configs.ExperimentConfig,
def
get_loss_scale
(
params
:
base_configs
.
ExperimentConfig
,
fp16_default
:
float
=
128.
)
->
float
:
"""Returns the loss scale for initializations."""
loss_scale
=
params
.
model
.
loss
.
loss_scale
loss_scale
=
params
.
runtime
.
loss_scale
if
loss_scale
==
'dynamic'
:
return
loss_scale
elif
loss_scale
is
not
None
:
...
...
@@ -145,7 +159,7 @@ def _get_params_from_flags(flags_obj: flags.FlagValues):
'name'
:
model
,
},
'runtime'
:
{
'
enable
_eager'
:
flags_obj
.
enable
_eager
,
'
run
_eager
ly
'
:
flags_obj
.
run
_eager
ly
,
'tpu'
:
flags_obj
.
tpu
,
},
'train_dataset'
:
{
...
...
@@ -154,6 +168,11 @@ def _get_params_from_flags(flags_obj: flags.FlagValues):
'validation_dataset'
:
{
'data_dir'
:
flags_obj
.
data_dir
,
},
'train'
:
{
'time_history'
:
{
'log_steps'
:
flags_obj
.
log_steps
,
},
},
}
overriding_configs
=
(
flags_obj
.
config_file
,
...
...
@@ -209,10 +228,11 @@ def resume_from_checkpoint(model: tf.keras.Model,
return
int
(
initial_epoch
)
def
initialize
(
params
:
base_configs
.
ExperimentConfig
):
def
initialize
(
params
:
base_configs
.
ExperimentConfig
,
dataset_builder
:
dataset_factory
.
DatasetBuilder
):
"""Initializes backend related initializations."""
keras_utils
.
set_session_config
(
enable_eager
=
params
.
runtime
.
enable
_eager
,
enable_eager
=
params
.
runtime
.
run
_eager
ly
,
enable_xla
=
params
.
runtime
.
enable_xla
)
if
params
.
runtime
.
gpu_threads_enabled
:
keras_utils
.
set_gpu_thread_mode_and_count
(
...
...
@@ -221,12 +241,11 @@ def initialize(params: base_configs.ExperimentConfig):
num_gpus
=
params
.
runtime
.
num_gpus
,
datasets_num_private_threads
=
params
.
runtime
.
dataset_num_private_threads
)
dataset
=
params
.
train_dataset
or
params
.
validation_dataset
performance
.
set_mixed_precision_policy
(
dataset
.
dtype
)
performance
.
set_mixed_precision_policy
(
dataset_builder
.
dtype
)
if
dataset
.
data_format
:
data_format
=
dataset
.
data_format
el
if
tf
.
config
.
list_physical_devices
(
'GPU'
):
if
dataset
_builder
.
config
.
data_format
:
data_format
=
dataset
_builder
.
config
.
data_format
if
tf
.
config
.
list_physical_devices
(
'GPU'
):
data_format
=
'channels_first'
else
:
data_format
=
'channels_last'
...
...
@@ -234,7 +253,7 @@ def initialize(params: base_configs.ExperimentConfig):
distribution_utils
.
configure_cluster
(
params
.
runtime
.
worker_hosts
,
params
.
runtime
.
task_index
)
if
params
.
runtime
.
enable
_eager
:
if
params
.
runtime
.
run
_eager
ly
:
# Enable eager execution to allow step-by-step debugging
tf
.
config
.
experimental_run_functions_eagerly
(
True
)
...
...
@@ -251,7 +270,7 @@ def define_classifier_flags():
default
=
None
,
help
=
'Mode to run: `train`, `eval`, `train_and_eval` or `export`.'
)
flags
.
DEFINE_bool
(
'
enable
_eager'
,
'
run
_eager
ly
'
,
default
=
None
,
help
=
'Use eager execution and disable autograph for debugging.'
)
flags
.
DEFINE_string
(
...
...
@@ -262,6 +281,10 @@ def define_classifier_flags():
'dataset'
,
default
=
None
,
help
=
'The name of the dataset, e.g. ImageNet, etc.'
)
flags
.
DEFINE_integer
(
'log_steps'
,
default
=
100
,
help
=
'The interval of steps between logging of batch level stats.'
)
def
serialize_config
(
params
:
base_configs
.
ExperimentConfig
,
...
...
@@ -304,11 +327,13 @@ def train_and_eval(
train_steps
=
params
.
train
.
steps
or
train_builder
.
num_steps
validation_steps
=
params
.
evaluation
.
steps
or
validation_builder
.
num_steps
initialize
(
params
,
train_builder
)
logging
.
info
(
'Global batch size: %d'
,
train_builder
.
global_batch_size
)
with
strategy_scope
:
model_params
=
params
.
model
.
model_params
.
as_dict
()
model
=
MODELS
[
params
.
model
.
name
](
**
model_params
)
model
=
get_models
()
[
params
.
model
.
name
](
**
model_params
)
learning_rate
=
optimizer_factory
.
build_learning_rate
(
params
=
params
.
model
.
learning_rate
,
batch_size
=
train_builder
.
global_batch_size
,
...
...
@@ -328,8 +353,7 @@ def train_and_eval(
loss_obj
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
()
model
.
compile
(
optimizer
=
optimizer
,
loss
=
loss_obj
,
metrics
=
metrics
,
run_eagerly
=
params
.
runtime
.
enable_eager
)
metrics
=
metrics
)
initial_epoch
=
0
if
params
.
train
.
resume_checkpoint
:
...
...
@@ -342,26 +366,37 @@ def train_and_eval(
callbacks
=
custom_callbacks
.
get_callbacks
(
model_checkpoint
=
params
.
train
.
callbacks
.
enable_checkpoint_and_export
,
include_tensorboard
=
params
.
train
.
callbacks
.
enable_tensorboard
,
time_history
=
params
.
train
.
callbacks
.
enable_time_history
,
track_lr
=
params
.
train
.
tensorboard
.
track_lr
,
write_model_weights
=
params
.
train
.
tensorboard
.
write_model_weights
,
initial_step
=
initial_epoch
*
train_steps
,
batch_size
=
train_builder
.
global_batch_size
,
log_steps
=
params
.
train
.
time_history
.
log_steps
,
model_dir
=
params
.
model_dir
)
if
params
.
evaluation
.
skip_eval
:
validation_kwargs
=
{}
else
:
validation_kwargs
=
{
'validation_data'
:
validation_dataset
,
'validation_steps'
:
validation_steps
,
'validation_freq'
:
params
.
evaluation
.
epochs_between_evals
,
}
history
=
model
.
fit
(
train_dataset
,
epochs
=
train_epochs
,
steps_per_epoch
=
train_steps
,
initial_epoch
=
initial_epoch
,
callbacks
=
callbacks
,
validation_data
=
validation_dataset
,
validation_steps
=
validation_steps
,
validation_freq
=
params
.
evaluation
.
epochs_between_evals
)
**
validation_kwargs
)
validation_output
=
model
.
evaluate
(
validation_dataset
,
steps
=
validation_steps
,
verbose
=
2
)
validation_output
=
None
if
not
params
.
evaluation
.
skip_eval
:
validation_output
=
model
.
evaluate
(
validation_dataset
,
steps
=
validation_steps
,
verbose
=
2
)
# TODO(dankondratyuk): eval and save final test accuracy
stats
=
common
.
build_stats
(
history
,
validation_output
,
callbacks
)
...
...
@@ -372,7 +407,7 @@ def export(params: base_configs.ExperimentConfig):
"""Runs the model export functionality."""
logging
.
info
(
'Exporting model.'
)
model_params
=
params
.
model
.
model_params
.
as_dict
()
model
=
MODELS
[
params
.
model
.
name
](
**
model_params
)
model
=
get_models
()
[
params
.
model
.
name
](
**
model_params
)
checkpoint
=
params
.
export
.
checkpoint
if
checkpoint
is
None
:
logging
.
info
(
'No export checkpoint was provided. Using the latest '
...
...
@@ -395,8 +430,6 @@ def run(flags_obj: flags.FlagValues,
Dictionary of training/eval stats
"""
params
=
_get_params_from_flags
(
flags_obj
)
initialize
(
params
)
if
params
.
mode
==
'train_and_eval'
:
return
train_and_eval
(
params
,
strategy_override
)
elif
params
.
mode
==
'export_only'
:
...
...
official/vision/image_classification/classifier_trainer_test.py
View file @
f276d472
...
...
@@ -233,8 +233,8 @@ class UtilTests(parameterized.TestCase, tf.test.TestCase):
)
def
test_get_loss_scale
(
self
,
loss_scale
,
dtype
,
expected
):
config
=
base_configs
.
ExperimentConfig
(
model
=
base_configs
.
Model
Config
(
loss
=
base_configs
.
LossConfig
(
loss_scale
=
loss_scale
)
)
,
runtime
=
base_configs
.
Runtime
Config
(
loss_scale
=
loss_scale
),
train_dataset
=
dataset_factory
.
DatasetConfig
(
dtype
=
dtype
))
ls
=
classifier_trainer
.
get_loss_scale
(
config
,
fp16_default
=
128
)
self
.
assertEqual
(
ls
,
expected
)
...
...
@@ -246,7 +246,7 @@ class UtilTests(parameterized.TestCase, tf.test.TestCase):
def
test_initialize
(
self
,
dtype
):
config
=
base_configs
.
ExperimentConfig
(
runtime
=
base_configs
.
RuntimeConfig
(
enable
_eager
=
False
,
run
_eager
ly
=
False
,
enable_xla
=
False
,
gpu_threads_enabled
=
True
,
per_gpu_thread_count
=
1
,
...
...
@@ -258,7 +258,14 @@ class UtilTests(parameterized.TestCase, tf.test.TestCase):
model
=
base_configs
.
ModelConfig
(
loss
=
base_configs
.
LossConfig
(
loss_scale
=
'dynamic'
)),
)
classifier_trainer
.
initialize
(
config
)
class
EmptyClass
:
pass
fake_ds_builder
=
EmptyClass
()
fake_ds_builder
.
dtype
=
dtype
fake_ds_builder
.
config
=
EmptyClass
()
fake_ds_builder
.
config
.
data_format
=
None
classifier_trainer
.
initialize
(
config
,
fake_ds_builder
)
def
test_resume_from_checkpoint
(
self
):
"""Tests functionality for resuming from checkpoint."""
...
...
official/vision/image_classification/configs/base_configs.py
View file @
f276d472
...
...
@@ -58,6 +58,17 @@ class MetricsConfig(base_config.Config):
top_5
:
bool
=
None
@
dataclasses
.
dataclass
class
TimeHistoryConfig
(
base_config
.
Config
):
"""Configuration for the TimeHistory callback.
Attributes:
log_steps: Interval of steps between logging of batch level stats.
"""
log_steps
:
int
=
None
@
dataclasses
.
dataclass
class
TrainConfig
(
base_config
.
Config
):
"""Configuration for training.
...
...
@@ -77,8 +88,9 @@ class TrainConfig(base_config.Config):
epochs
:
int
=
None
steps
:
int
=
None
callbacks
:
CallbacksConfig
=
CallbacksConfig
()
metrics
:
List
[
str
]
=
None
metrics
:
MetricsConfig
=
None
tensorboard
:
TensorboardConfig
=
TensorboardConfig
()
time_history
:
TimeHistoryConfig
=
TimeHistoryConfig
()
@
dataclasses
.
dataclass
...
...
@@ -91,10 +103,12 @@ class EvalConfig(base_config.Config):
steps: The number of eval steps to run during evaluation. If None, this will
be inferred based on the number of images and batch size. Defaults to
None.
skip_eval: Whether or not to skip evaluation.
"""
epochs_between_evals
:
int
=
None
steps
:
int
=
None
skip_eval
:
bool
=
False
@
dataclasses
.
dataclass
...
...
official/vision/image_classification/configs/configs.py
View file @
f276d472
...
...
@@ -52,6 +52,7 @@ class EfficientNetImageNetConfig(base_configs.ExperimentConfig):
callbacks
=
base_configs
.
CallbacksConfig
(
enable_checkpoint_and_export
=
True
,
enable_tensorboard
=
True
),
metrics
=
[
'accuracy'
,
'top_5'
],
time_history
=
base_configs
.
TimeHistoryConfig
(
log_steps
=
100
),
tensorboard
=
base_configs
.
TensorboardConfig
(
track_lr
=
True
,
write_model_weights
=
False
))
evaluation
:
base_configs
.
EvalConfig
=
base_configs
.
EvalConfig
(
...
...
@@ -83,6 +84,7 @@ class ResNetImagenetConfig(base_configs.ExperimentConfig):
callbacks
=
base_configs
.
CallbacksConfig
(
enable_checkpoint_and_export
=
True
,
enable_tensorboard
=
True
),
metrics
=
[
'accuracy'
,
'top_5'
],
time_history
=
base_configs
.
TimeHistoryConfig
(
log_steps
=
100
),
tensorboard
=
base_configs
.
TensorboardConfig
(
track_lr
=
True
,
write_model_weights
=
False
))
evaluation
:
base_configs
.
EvalConfig
=
base_configs
.
EvalConfig
(
...
...
official/vision/image_classification/dataset_factory.py
View file @
f276d472
...
...
@@ -203,6 +203,30 @@ class DatasetBuilder:
# Always divide by the global batch size to get the correct # of steps
return
self
.
num_examples
//
self
.
global_batch_size
@
property
def
dtype
(
self
)
->
tf
.
dtypes
.
DType
:
"""Converts the config's dtype string to a tf dtype.
Returns:
A mapping from string representation of a dtype to the `tf.dtypes.DType`.
Raises:
ValueError if the config's dtype is not supported.
"""
dtype_map
=
{
'float32'
:
tf
.
float32
,
'bfloat16'
:
tf
.
bfloat16
,
'float16'
:
tf
.
float16
,
'fp32'
:
tf
.
float32
,
'bf16'
:
tf
.
bfloat16
,
}
try
:
return
dtype_map
[
self
.
config
.
dtype
]
except
:
raise
ValueError
(
'Invalid DType provided. Supported types: {}'
.
format
(
dtype_map
.
keys
()))
@
property
def
image_size
(
self
)
->
int
:
"""The size of each image (can be inferred from the dataset)."""
...
...
@@ -326,7 +350,7 @@ class DatasetBuilder:
def
generate_data
(
_
):
image
=
tf
.
zeros
([
self
.
image_size
,
self
.
image_size
,
self
.
num_channels
],
dtype
=
self
.
config
.
dtype
)
dtype
=
self
.
dtype
)
label
=
tf
.
zeros
([
1
],
dtype
=
tf
.
int32
)
return
image
,
label
...
...
@@ -451,7 +475,7 @@ class DatasetBuilder:
image_size
=
self
.
image_size
,
mean_subtract
=
self
.
config
.
mean_subtract
,
standardize
=
self
.
config
.
standardize
,
dtype
=
self
.
config
.
dtype
,
dtype
=
self
.
dtype
,
augmenter
=
self
.
augmenter
)
else
:
image
=
preprocessing
.
preprocess_for_eval
(
...
...
@@ -460,7 +484,7 @@ class DatasetBuilder:
num_channels
=
self
.
num_channels
,
mean_subtract
=
self
.
config
.
mean_subtract
,
standardize
=
self
.
config
.
standardize
,
dtype
=
self
.
config
.
dtype
)
dtype
=
self
.
dtype
)
label
=
tf
.
cast
(
label
,
tf
.
int32
)
if
self
.
config
.
one_hot
:
...
...
official/vision/image_classification/resnet/common.py
View file @
f276d472
...
...
@@ -166,7 +166,6 @@ def build_stats(history, eval_output, callbacks):
if
eval_output
:
stats
[
'accuracy_top_1'
]
=
float
(
eval_output
[
1
])
stats
[
'eval_loss'
]
=
float
(
eval_output
[
0
])
if
history
and
history
.
history
:
train_hist
=
history
.
history
# Gets final loss from training.
...
...
@@ -176,6 +175,8 @@ def build_stats(history, eval_output, callbacks):
stats
[
TRAIN_TOP_1
]
=
float
(
train_hist
[
'categorical_accuracy'
][
-
1
])
elif
'sparse_categorical_accuracy'
in
train_hist
:
stats
[
TRAIN_TOP_1
]
=
float
(
train_hist
[
'sparse_categorical_accuracy'
][
-
1
])
elif
'accuracy'
in
train_hist
:
stats
[
TRAIN_TOP_1
]
=
float
(
train_hist
[
'accuracy'
][
-
1
])
if
not
callbacks
:
return
stats
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment