"mmdet3d/vscode:/vscode.git/clone" did not exist on "fc301b98fa634ff83dcc5ce31cc743bf4ee41eda"
Commit a87bb185 authored by Ruomei Yan's avatar Ruomei Yan
Browse files

Address comments from Aug 7

parent 20101930
......@@ -905,7 +905,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
FLAGS.batch_size = 128 * 8
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
......@@ -918,7 +918,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.fp16_implementation = 'graph_rewrite'
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_tweaked(self):
......@@ -929,7 +929,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
FLAGS.batch_size = 128 * 8
FLAGS.batch_size = 128 * 8 # 8 GPUs
FLAGS.datasets_num_private_threads = 14
self._run_and_report_benchmark()
......@@ -942,7 +942,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
FLAGS.batch_size = 128 * 8
FLAGS.batch_size = 128 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_amp(self):
......@@ -956,7 +956,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_amp')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_tweaked(self):
......@@ -982,7 +982,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16_tweaked(self):
......@@ -994,7 +994,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_eager = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 40
self._run_and_report_benchmark()
......@@ -1009,7 +1009,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir(
'benchmark_8_gpu_fp16_dynamic_tweaked')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.loss_scale = 'dynamic'
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 40
......@@ -1025,7 +1025,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
self._run_and_report_benchmark()
def benchmark_xla_8_gpu_fp16_tweaked(self):
......@@ -1038,7 +1038,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.enable_xla = True
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 48
self._run_and_report_benchmark()
......@@ -1074,7 +1074,7 @@ class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
FLAGS.distribution_strategy = 'mirrored'
FLAGS.model_dir = self._get_model_dir(
'benchmark_xla_8_gpu_fp16_dynamic_tweaked')
FLAGS.batch_size = 256 * 8
FLAGS.batch_size = 256 * 8 # 8 GPUs
FLAGS.loss_scale = 'dynamic'
FLAGS.tf_gpu_thread_mode = 'gpu_private'
FLAGS.datasets_num_private_threads = 48
......
......@@ -26,8 +26,6 @@ from absl import flags
from absl import logging
import tensorflow as tf
import tensorflow_model_optimization as tfmot
from official.modeling import performance
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils
......@@ -40,34 +38,28 @@ from official.vision.image_classification.resnet import resnet_model
def cluster_last_three_conv2d_layers(model):
import tensorflow_model_optimization as tfmot
last_three_conv2d_layers = [
layer for layer in model.layers
if isinstance(layer, tf.keras.layers.Conv2D) and
not isinstance(layer, tf.keras.layers.DepthwiseConv2D)
]
last_three_conv2d_layers = last_three_conv2d_layers[-3:]
if isinstance(layer, tf.keras.layers.Conv2D)
][-3:]
cluster_weights = tfmot.clustering.keras.cluster_weights
CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
clustering_params1 = {
'number_of_clusters': 256,
'cluster_centroids_init': CentroidInitialization.LINEAR
}
clustering_params2 = {
'number_of_clusters': 32,
'cluster_centroids_init': CentroidInitialization.LINEAR
}
def cluster_fn(layer):
if layer not in last_three_conv2d_layers:
return layer
return layer
if layer == last_three_conv2d_layers[0] or layer == last_three_conv2d_layers[1]:
clustered = cluster_weights(layer, **clustering_params1)
print("Clustered {} with {} clusters".format(layer.name, clustering_params1['number_of_clusters']))
if layer == last_three_conv2d_layers[0] or \
layer == last_three_conv2d_layers[1]:
clustered = cluster_weights(layer, number_of_clusters=256, \
cluster_centroids_init=CentroidInitialization.LINEAR)
print("Clustered {} with 256 clusters".format(layer.name))
else:
clustered = cluster_weights(layer, **clustering_params2)
print("Clustered {} with {} clusters".format(layer.name, clustering_params2['number_of_clusters']))
clustered = cluster_weights(layer, number_of_clusters=32, \
cluster_centroids_init=CentroidInitialization.LINEAR)
print("Clustered {} with 32 clusters".format(layer.name))
return clustered
return tf.keras.models.clone_model(model, clone_function=cluster_fn)
......@@ -151,7 +143,8 @@ def run(flags_obj):
# This use_keras_image_data_format flags indicates whether image preprocessor
# output format should be same as the keras backend image data format or just
# channel-last format.
use_keras_image_data_format = (flags_obj.model == 'mobilenet' or 'mobilenet_pretrained')
use_keras_image_data_format = \
(flags_obj.model == 'mobilenet' or 'mobilenet_pretrained')
train_input_dataset = input_fn(
is_training=True,
data_dir=flags_obj.data_dir,
......@@ -183,15 +176,17 @@ def run(flags_obj):
boundaries=list(p[1] for p in common.LR_SCHEDULE[1:]),
multipliers=list(p[0] for p in common.LR_SCHEDULE),
compute_lr_on_cpu=True)
steps_per_epoch = (imagenet_preprocessing.NUM_IMAGES['train'] //
flags_obj.batch_size)
steps_per_epoch = (
imagenet_preprocessing.NUM_IMAGES['train'] // flags_obj.batch_size)
with strategy_scope:
if flags_obj.optimizer == 'resnet50_default':
optimizer = common.get_optimizer(lr_schedule)
elif flags_obj.optimizer == 'mobilenet_default':
elif flags_obj.optimizer == 'mobilenet_default' or 'mobilenet_fine_tune':
initial_learning_rate = \
flags_obj.initial_learning_rate_per_sample * flags_obj.batch_size
if flags_obj.optimizer == 'mobilenet_fine_tune':
initial_learning_rate = 1e-5
optimizer = tf.keras.optimizers.SGD(
learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
......@@ -199,8 +194,6 @@ def run(flags_obj):
decay_rate=flags_obj.lr_decay_factor,
staircase=True),
momentum=0.9)
elif flags_obj.optimizer == 'mobilenet_fine_tune':
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-5, momentum=0.9)
if flags_obj.fp16_implementation == 'graph_rewrite':
# Note: when flags_obj.fp16_implementation == "graph_rewrite", dtype as
......@@ -217,24 +210,25 @@ def run(flags_obj):
elif flags_obj.model == 'resnet50_v1.5':
model = resnet_model.resnet50(
num_classes=imagenet_preprocessing.NUM_CLASSES)
elif flags_obj.model == 'mobilenet':
elif flags_obj.model == 'mobilenet' or 'mobilenet_pretrained':
# TODO(kimjaehong): Remove layers attribute when minimum TF version
# support 2.0 layers by default.
if flags_obj.model == 'mobilenet_pretrained':
classes_labels = 1000
initial_weights = 'imagenet'
else:
classes_labels = imagenet_preprocessing.NUM_CLASSES
initial_weights = None
model = tf.keras.applications.mobilenet.MobileNet(
weights=None,
classes=imagenet_preprocessing.NUM_CLASSES,
layers=tf.keras.layers)
elif flags_obj.model == 'mobilenet_pretrained':
model = tf.keras.applications.mobilenet.MobileNet(
dropout=1e-7,
weights='imagenet',
classes=1000,
weights=initial_weights,
classes=classes_labels,
layers=tf.keras.layers)
if flags_obj.pretrained_filepath:
model.load_weights(flags_obj.pretrained_filepath)
if flags_obj.pruning_method == 'polynomial_decay':
import tensorflow_model_optimization as tfmot
if dtype != tf.float32:
raise NotImplementedError(
'Pruning is currently only supported on dtype=tf.float32.')
......@@ -252,7 +246,9 @@ def run(flags_obj):
raise NotImplementedError('Only polynomial_decay is currently supported.')
if flags_obj.clustering_method == 'selective_clustering':
if dtype != tf.float32 or flags_obj.fp16_implementation == 'graph_rewrite':
import tensorflow_model_optimization as tfmot
if dtype != tf.float32 or \
flags_obj.fp16_implementation == 'graph_rewrite':
raise NotImplementedError(
'Clustering is currently only supported on dtype=tf.float32.')
model = cluster_last_three_conv2d_layers(model)
......@@ -260,11 +256,12 @@ def run(flags_obj):
raise NotImplementedError(
'Only selective_clustering is implemented.')
model.compile(loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=(['sparse_categorical_accuracy']
if flags_obj.report_accuracy_metrics else None),
run_eagerly=flags_obj.run_eagerly)
train_epochs = flags_obj.train_epochs
......@@ -278,8 +275,8 @@ def run(flags_obj):
steps_per_epoch = min(flags_obj.train_steps, steps_per_epoch)
train_epochs = 1
num_eval_steps = (imagenet_preprocessing.NUM_IMAGES['validation'] //
flags_obj.batch_size)
num_eval_steps = (
imagenet_preprocessing.NUM_IMAGES['validation'] // flags_obj.batch_size)
validation_data = eval_input_dataset
if flags_obj.skip_eval:
......@@ -336,9 +333,10 @@ def run(flags_obj):
def define_imagenet_keras_flags():
common.define_keras_flags(model=True,
optimizer=True,
pretrained_filepath=True)
common.define_keras_flags(
model=True,
optimizer=True,
pretrained_filepath=True)
common.define_pruning_flags()
common.define_clustering_flags()
flags_core.set_defaults()
......@@ -354,4 +352,4 @@ def main(_):
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
define_imagenet_keras_flags()
app.run(main)
\ No newline at end of file
app.run(main)
......@@ -250,14 +250,14 @@ def define_keras_flags(dynamic_loss_scale=True,
default=False,
help='Whether to enable Tensorboard callback.')
flags.DEFINE_string(
name='profile_steps',
default=None,
help='Save profiling data to model dir at given range of global steps. The '
'value must be a comma separated pair of positive integers, specifying '
'the first and last step to profile. For example, "--profile_steps=2,4" '
'triggers the profiler to process 3 steps, starting from the 2nd step. '
'Note that profiler has a non-trivial performance overhead, and the '
'output file can be gigantic if profiling many steps.')
name='profile_steps', default=None,
help='Save profiling data to model dir at given range of global steps.'
'The value must be a comma separated pair of positive integers,'
'specifying the first and last step to profile. For example,'
'"--profile_steps=2,4" triggers the profiler to process 3 steps,'
'starting from the 2nd step. Note that profiler has a non-trivial'
'performance overhead, and the output file can be gigantic if'
'profiling many steps.')
flags.DEFINE_integer(
name='train_steps',
default=None,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment