Commit ee3997b3 authored by qianyj's avatar qianyj
Browse files

new tf branch for dtk21.10.1

parent 2795dc1f
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD300 Model Configuration.
References:
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg
SSD: Single Shot MultiBox Detector
arXiv:1512.02325
Ported from MLPerf reference implementation:
https://github.com/mlperf/reference/tree/ssd/single_stage_detector/ssd
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import os
import re
import threading
import tensorflow.compat.v1 as tf
# pylint: disable=g-direct-tensorflow-import
import constants
import mlperf
import ssd_constants
from cnn_util import log_fn
from models import model as model_lib
from models import resnet_model
from tensorflow.contrib import layers as contrib_layers
from tensorflow.python.ops import variables
BACKBONE_MODEL_SCOPE_NAME = 'resnet34_backbone'
class SSD300Model(model_lib.CNNModel):
"""Single Shot Multibox Detection (SSD) model for 300x300 image datasets."""
def __init__(self, label_num=ssd_constants.NUM_CLASSES, batch_size=32,
learning_rate=1e-3, backbone='resnet34', params=None):
super(SSD300Model, self).__init__('ssd300', 300, batch_size, learning_rate,
params=params)
# For COCO dataset, 80 categories + 1 background = 81 labels
self.label_num = label_num
# Currently only support ResNet-34 as backbone model
if backbone != 'resnet34':
raise ValueError('Invalid backbone model %s for SSD.' % backbone)
mlperf.logger.log(key=mlperf.tags.BACKBONE, value=backbone)
# Number of channels and default boxes associated with the following layers:
# ResNet34 layer, Conv7, Conv8_2, Conv9_2, Conv10_2, Conv11_2
self.out_chan = [256, 512, 512, 256, 256, 256]
mlperf.logger.log(key=mlperf.tags.LOC_CONF_OUT_CHANNELS,
value=self.out_chan)
# Number of default boxes from layers of different scales
# 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4
self.num_dboxes = [4, 6, 6, 6, 4, 4]
mlperf.logger.log(key=mlperf.tags.NUM_DEFAULTS_PER_CELL,
value=self.num_dboxes)
# TODO(haoyuzhang): in order to correctly restore in replicated mode, need
# to create a saver for each tower before graph is finalized. Use variable
# manager for better efficiency.
self.backbone_savers = []
# Collected predictions for eval stage. It maps each image id in eval
# dataset to a dict containing the following information:
# source_id: raw ID of image
# raw_shape: raw shape of image
# pred_box: encoded box coordinates of prediction
# pred_scores: scores of classes in prediction
self.predictions = {}
# Global step when predictions are collected.
self.eval_global_step = 0
# Average precision. In asynchronous eval mode, this is the latest AP we
# get so far and may not be the results at current eval step.
self.eval_coco_ap = 0
# Process, queues, and thread for asynchronous evaluation. When enabled,
# create a separate process (async_eval_process) that continuously pull
# intermediate results from the predictions queue (a multiprocessing queue),
# process them, and push final results into results queue (another
# multiprocessing queue). The main thread is responsible to push message
# into predictions queue, and start a separate thread to continuously pull
# messages from results queue to update final results.
# Message in predictions queue should be a tuple of two elements:
# (evaluation step, predictions)
# Message in results queue should be a tuple of two elements:
# (evaluation step, final results)
self.async_eval_process = None
self.async_eval_predictions_queue = None
self.async_eval_results_queue = None
self.async_eval_results_getter_thread = None
# The MLPerf reference uses a starting lr of 1e-3 at bs=32.
self.base_lr_batch_size = 32
def skip_final_affine_layer(self):
return True
def gpu_preprocess_nhwc(self, images, phase_train=True):
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation ; To evaluate using COCO'
'metric, download and install Python COCO API from'
'https://github.com/cocodataset/cocoapi')
if phase_train:
images = ssd_dataloader.color_jitter(
images, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05)
images = ssd_dataloader.normalize_image(images)
return images
def add_backbone_model(self, cnn):
# --------------------------------------------------------------------------
# Resnet-34 backbone model -- modified for SSD
# --------------------------------------------------------------------------
# Input 300x300, output 150x150
cnn.conv(64, 7, 7, 2, 2, mode='SAME_RESNET', use_batch_norm=True)
cnn.mpool(3, 3, 2, 2, mode='SAME')
resnet34_layers = [3, 4, 6, 3]
version = 'v1'
# ResNet-34 block group 1
# Input 150x150, output 75x75
for i in range(resnet34_layers[0]):
# Last argument forces residual_block to use projection shortcut, even
# though the numbers of input and output channels are equal
resnet_model.residual_block(cnn, 64, 1, version)
# ResNet-34 block group 2
# Input 75x75, output 38x38
for i in range(resnet34_layers[1]):
stride = 2 if i == 0 else 1
resnet_model.residual_block(cnn, 128, stride, version, i == 0)
# ResNet-34 block group 3
# This block group is modified: first layer uses stride=1 so that the image
# size does not change in group of layers
# Input 38x38, output 38x38
for i in range(resnet34_layers[2]):
# The following line is intentionally commented out to differentiate from
# the original ResNet-34 model
# stride = 2 if i == 0 else 1
resnet_model.residual_block(cnn, 256, stride, version, i == 0)
# ResNet-34 block group 4: removed final block group
# The following 3 lines are intentionally commented out to differentiate
# from the original ResNet-34 model
# for i in range(resnet34_layers[3]):
# stride = 2 if i == 0 else 1
# resnet_model.residual_block(cnn, 512, stride, version, i == 0)
def add_inference(self, cnn):
cnn.use_batch_norm = True
cnn.batch_norm_config = {'decay': ssd_constants.BATCH_NORM_DECAY,
'epsilon': ssd_constants.BATCH_NORM_EPSILON,
'scale': True}
with tf.variable_scope(BACKBONE_MODEL_SCOPE_NAME):
self.add_backbone_model(cnn)
# --------------------------------------------------------------------------
# SSD additional layers
# --------------------------------------------------------------------------
def add_ssd_layer(cnn, depth, k_size, stride, mode):
return cnn.conv(
depth,
k_size,
k_size,
stride,
stride,
mode=mode,
use_batch_norm=False,
kernel_initializer=contrib_layers.xavier_initializer())
# Activations for feature maps of different layers
self.activations = [cnn.top_layer]
# Conv7_1, Conv7_2
# Input 38x38, output 19x19
add_ssd_layer(cnn, 256, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 512, 3, 2, 'same'))
# Conv8_1, Conv8_2
# Input 19x19, output 10x10
add_ssd_layer(cnn, 256, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 512, 3, 2, 'same'))
# Conv9_1, Conv9_2
# Input 10x10, output 5x5
add_ssd_layer(cnn, 128, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 256, 3, 2, 'same'))
# Conv10_1, Conv10_2
# Input 5x5, output 3x3
add_ssd_layer(cnn, 128, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 256, 3, 1, 'valid'))
# Conv11_1, Conv11_2
# Input 3x3, output 1x1
add_ssd_layer(cnn, 128, 1, 1, 'valid')
self.activations.append(add_ssd_layer(cnn, 256, 3, 1, 'valid'))
self.loc = []
self.conf = []
for nd, ac, oc in zip(self.num_dboxes, self.activations, self.out_chan):
l = cnn.conv(
nd * 4,
3,
3,
1,
1,
input_layer=ac,
num_channels_in=oc,
activation=None,
use_batch_norm=False,
kernel_initializer=contrib_layers.xavier_initializer())
scale = l.get_shape()[-1]
# shape = [batch_size, nd * 4, scale, scale]
l = tf.reshape(l, [self.batch_size, nd, 4, scale, scale])
# shape = [batch_size, nd, 4, scale, scale]
l = tf.transpose(l, [0, 1, 3, 4, 2])
# shape = [batch_size, nd, scale, scale, 4]
self.loc.append(tf.reshape(l, [self.batch_size, -1, 4]))
# shape = [batch_size, nd * scale * scale, 4]
c = cnn.conv(
nd * self.label_num,
3,
3,
1,
1,
input_layer=ac,
num_channels_in=oc,
activation=None,
use_batch_norm=False,
kernel_initializer=contrib_layers.xavier_initializer())
# shape = [batch_size, nd * label_num, scale, scale]
c = tf.reshape(c, [self.batch_size, nd, self.label_num, scale, scale])
# shape = [batch_size, nd, label_num, scale, scale]
c = tf.transpose(c, [0, 1, 3, 4, 2])
# shape = [batch_size, nd, scale, scale, label_num]
self.conf.append(tf.reshape(c, [self.batch_size, -1, self.label_num]))
# shape = [batch_size, nd * scale * scale, label_num]
# Shape of locs: [batch_size, NUM_SSD_BOXES, 4]
# Shape of confs: [batch_size, NUM_SSD_BOXES, label_num]
locs, confs = tf.concat(self.loc, 1), tf.concat(self.conf, 1)
# Pack location and confidence outputs into a single output layer
# Shape of logits: [batch_size, NUM_SSD_BOXES, 4+label_num]
logits = tf.concat([locs, confs], 2)
cnn.top_layer = logits
cnn.top_size = 4 + self.label_num
return cnn.top_layer
def get_learning_rate(self, global_step, batch_size):
rescaled_lr = self.get_scaled_base_learning_rate(batch_size)
# Defined in MLPerf reference model
boundaries = [160000, 200000]
boundaries = [b * self.base_lr_batch_size // batch_size for b in boundaries]
decays = [1, 0.1, 0.01]
learning_rates = [rescaled_lr * d for d in decays]
lr = tf.train.piecewise_constant(global_step, boundaries, learning_rates)
warmup_steps = int(118287 / batch_size * 5)
warmup_lr = (
rescaled_lr * tf.cast(global_step, tf.float32) / tf.cast(
warmup_steps, tf.float32))
return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
def get_scaled_base_learning_rate(self, batch_size):
"""Calculates base learning rate for creating lr schedule.
In replicated mode, gradients are summed rather than averaged which, with
the sgd and momentum optimizers, increases the effective learning rate by
lr * num_gpus. Dividing the base lr by num_gpus negates the increase.
Args:
batch_size: Total batch-size.
Returns:
Base learning rate to use to create lr schedule.
"""
base_lr = self.learning_rate
if self.params.variable_update == 'replicated':
base_lr = self.learning_rate / self.params.num_gpus
scaled_lr = base_lr * (batch_size / self.base_lr_batch_size)
return scaled_lr
def _collect_backbone_vars(self):
backbone_vars = tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope='.*'+ BACKBONE_MODEL_SCOPE_NAME)
var_list = {}
# Assume variables in the checkpoint are following the naming convention of
# a model checkpoint trained with TF official model
# TODO(haoyuzhang): the following variable name parsing is hacky and easy
# to break if there is change in naming convention of either benchmarks or
# official models.
for v in backbone_vars:
# conv2d variable example (model <-- checkpoint):
# v/cg/conv24/conv2d/kernel:0 <-- conv2d_24/kernel
if 'conv2d' in v.name:
re_match = re.search(r'conv(\d+)/conv2d/(.+):', v.name)
if re_match:
layer_id = int(re_match.group(1))
param_name = re_match.group(2)
vname_in_ckpt = self._var_name_in_official_model_ckpt(
'conv2d', layer_id, param_name)
var_list[vname_in_ckpt] = v
# batchnorm varariable example:
# v/cg/conv24/batchnorm25/gamma:0 <-- batch_normalization_25/gamma
elif 'batchnorm' in v.name:
re_match = re.search(r'batchnorm(\d+)/(.+):', v.name)
if re_match:
layer_id = int(re_match.group(1))
param_name = re_match.group(2)
vname_in_ckpt = self._var_name_in_official_model_ckpt(
'batch_normalization', layer_id, param_name)
var_list[vname_in_ckpt] = v
return var_list
def _var_name_in_official_model_ckpt(self, layer_name, layer_id, param_name):
"""Return variable names according to convention in TF official models."""
vname_in_ckpt = layer_name
if layer_id > 0:
vname_in_ckpt += '_' + str(layer_id)
vname_in_ckpt += '/' + param_name
return vname_in_ckpt
def loss_function(self, inputs, build_network_result):
logits = build_network_result.logits
# Unpack model output back to locations and confidence scores of predictions
# Shape of pred_loc: [batch_size, NUM_SSD_BOXES, 4]
# Shape of pred_label: [batch_size, NUM_SSD_BOXES, label_num]
pred_loc, pred_label = tf.split(logits, [4, self.label_num], 2)
# Shape of gt_loc: [batch_size, NUM_SSD_BOXES, 4]
# Shape of gt_label: [batch_size, NUM_SSD_BOXES, 1]
# Shape of num_gt: [batch_size]
_, gt_loc, gt_label, num_gt = inputs
gt_label = tf.cast(gt_label, tf.int32)
box_loss = self._localization_loss(pred_loc, gt_loc, gt_label, num_gt)
class_loss = self._classification_loss(pred_label, gt_label, num_gt)
tf.summary.scalar('box_loss', tf.reduce_mean(box_loss))
tf.summary.scalar('class_loss', tf.reduce_mean(class_loss))
return class_loss + box_loss
def _localization_loss(self, pred_loc, gt_loc, gt_label, num_matched_boxes):
"""Computes the localization loss.
Computes the localization loss using smooth l1 loss.
Args:
pred_loc: a flatten tensor that includes all predicted locations. The
shape is [batch_size, num_anchors, 4].
gt_loc: a tensor representing box regression targets in
[batch_size, num_anchors, 4].
gt_label: a tensor that represents the classification groundtruth targets.
The shape is [batch_size, num_anchors, 1].
num_matched_boxes: the number of anchors that are matched to a groundtruth
targets, used as the loss normalizater. The shape is [batch_size].
Returns:
box_loss: a float32 representing total box regression loss.
"""
mask = tf.greater(tf.squeeze(gt_label), 0)
float_mask = tf.cast(mask, tf.float32)
smooth_l1 = tf.reduce_sum(tf.losses.huber_loss(
gt_loc, pred_loc,
reduction=tf.losses.Reduction.NONE
), axis=2)
smooth_l1 = tf.multiply(smooth_l1, float_mask)
box_loss = tf.reduce_sum(smooth_l1, axis=1)
return tf.reduce_mean(box_loss / num_matched_boxes)
def _classification_loss(self, pred_label, gt_label, num_matched_boxes):
"""Computes the classification loss.
Computes the classification loss with hard negative mining.
Args:
pred_label: a flatten tensor that includes all predicted class. The shape
is [batch_size, num_anchors, num_classes].
gt_label: a tensor that represents the classification groundtruth targets.
The shape is [batch_size, num_anchors, 1].
num_matched_boxes: the number of anchors that are matched to a groundtruth
targets. This is used as the loss normalizater.
Returns:
box_loss: a float32 representing total box regression loss.
"""
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
gt_label, pred_label, reduction=tf.losses.Reduction.NONE)
mask = tf.greater(tf.squeeze(gt_label), 0)
float_mask = tf.cast(mask, tf.float32)
# Hard example mining
neg_masked_cross_entropy = cross_entropy * (1 - float_mask)
relative_position = tf.argsort(
tf.argsort(
neg_masked_cross_entropy, direction='DESCENDING'))
num_neg_boxes = tf.minimum(
tf.to_int32(num_matched_boxes) * ssd_constants.NEGS_PER_POSITIVE,
ssd_constants.NUM_SSD_BOXES)
top_k_neg_mask = tf.cast(tf.less(
relative_position,
tf.tile(num_neg_boxes[:, tf.newaxis], (1, ssd_constants.NUM_SSD_BOXES))
), tf.float32)
class_loss = tf.reduce_sum(
tf.multiply(cross_entropy, float_mask + top_k_neg_mask), axis=1)
return tf.reduce_mean(class_loss / num_matched_boxes)
def add_backbone_saver(self):
# Create saver with mapping from variable names in checkpoint of backbone
# model to variables in SSD model
backbone_var_list = self._collect_backbone_vars()
self.backbone_savers.append(tf.train.Saver(backbone_var_list))
def load_backbone_model(self, sess, backbone_model_path):
for saver in self.backbone_savers:
saver.restore(sess, backbone_model_path)
def get_input_data_types(self, subset):
if subset == 'validation':
return [self.data_type, tf.float32, tf.float32, tf.float32, tf.int32]
return [self.data_type, tf.float32, tf.float32, tf.float32]
def get_input_shapes(self, subset):
"""Return encoded tensor shapes for train and eval data respectively."""
if subset == 'validation':
# Validation data shapes:
# 1. images
# 2. ground truth locations of boxes
# 3. ground truth classes of objects in boxes
# 4. source image IDs
# 5. raw image shapes
return [
[self.batch_size, self.image_size, self.image_size, self.depth],
[self.batch_size, ssd_constants.MAX_NUM_EVAL_BOXES, 4],
[self.batch_size, ssd_constants.MAX_NUM_EVAL_BOXES, 1],
[self.batch_size],
[self.batch_size, 3],
]
# Training data shapes:
# 1. images
# 2. ground truth locations of boxes
# 3. ground truth classes of objects in boxes
# 4. numbers of objects in images
return [
[self.batch_size, self.image_size, self.image_size, self.depth],
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 4],
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 1],
[self.batch_size]
]
def accuracy_function(self, inputs, logits):
"""Returns the ops to measure the mean precision of the model."""
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
from object_detection.box_coders import faster_rcnn_box_coder # pylint: disable=g-import-not-at-top
from object_detection.core import box_coder # pylint: disable=g-import-not-at-top
from object_detection.core import box_list # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation ; To evaluate using COCO'
'metric, download and install Python COCO API from'
'https://github.com/cocodataset/cocoapi')
# Unpack model output back to locations and confidence scores of predictions
# pred_locs: relative locations (coordinates) of objects in all SSD boxes
# shape: [batch_size, NUM_SSD_BOXES, 4]
# pred_labels: confidence scores of objects being of all categories
# shape: [batch_size, NUM_SSD_BOXES, label_num]
pred_locs, pred_labels = tf.split(logits, [4, self.label_num], 2)
ssd_box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=ssd_constants.BOX_CODER_SCALES)
anchors = box_list.BoxList(
tf.convert_to_tensor(ssd_dataloader.DefaultBoxes()('ltrb')))
pred_boxes = box_coder.batch_decode(
encoded_boxes=pred_locs, box_coder=ssd_box_coder, anchors=anchors)
pred_scores = tf.nn.softmax(pred_labels, axis=2)
# TODO(haoyuzhang): maybe use `gt_boxes` and `gt_classes` for visualization.
_, gt_boxes, gt_classes, source_id, raw_shape = inputs # pylint: disable=unused-variable
return {
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.PRED_BOXES): pred_boxes,
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.PRED_SCORES): pred_scores,
# TODO(haoyuzhang): maybe use these values for visualization.
# constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_boxes': gt_boxes,
# constants.UNREDUCED_ACCURACY_OP_PREFIX+'gt_classes': gt_classes,
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.SOURCE_ID): source_id,
(constants.UNREDUCED_ACCURACY_OP_PREFIX +
ssd_constants.RAW_SHAPE): raw_shape
}
def postprocess(self, results):
"""Postprocess results returned from model."""
try:
import coco_metric # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation ; To evaluate using COCO'
'metric, download and install Python COCO API from'
'https://github.com/cocodataset/cocoapi')
pred_boxes = results[ssd_constants.PRED_BOXES]
pred_scores = results[ssd_constants.PRED_SCORES]
# TODO(haoyuzhang): maybe use these values for visualization.
# gt_boxes = results['gt_boxes']
# gt_classes = results['gt_classes']
source_id = results[ssd_constants.SOURCE_ID]
raw_shape = results[ssd_constants.RAW_SHAPE]
# COCO evaluation requires processing COCO_NUM_VAL_IMAGES exactly once. Due
# to rounding errors (i.e., COCO_NUM_VAL_IMAGES % batch_size != 0), setting
# `num_eval_epochs` to 1 is not enough and will often miss some images. We
# expect user to set `num_eval_epochs` to >1, which will leave some unused
# images from previous steps in `predictions`. Here we check if we are doing
# eval at a new global step.
if results['global_step'] > self.eval_global_step:
self.eval_global_step = results['global_step']
self.predictions.clear()
for i, sid in enumerate(source_id):
self.predictions[int(sid)] = {
ssd_constants.PRED_BOXES: pred_boxes[i],
ssd_constants.PRED_SCORES: pred_scores[i],
ssd_constants.SOURCE_ID: source_id[i],
ssd_constants.RAW_SHAPE: raw_shape[i]
}
# COCO metric calculates mAP only after a full epoch of evaluation. Return
# dummy results for top_N_accuracy to be compatible with benchmar_cnn.py.
if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES:
log_fn('Got results for all {:d} eval examples. Calculate mAP...'.format(
ssd_constants.COCO_NUM_VAL_IMAGES))
annotation_file = os.path.join(self.params.data_dir,
ssd_constants.ANNOTATION_FILE)
# Size of predictions before decoding about 15--30GB, while size after
# decoding is 100--200MB. When using async eval mode, decoding takes
# 20--30 seconds of main thread time but is necessary to avoid OOM during
# inter-process communication.
decoded_preds = coco_metric.decode_predictions(self.predictions.values())
self.predictions.clear()
if self.params.collect_eval_results_async:
def _eval_results_getter():
"""Iteratively get eval results from async eval process."""
while True:
step, eval_results = self.async_eval_results_queue.get()
self.eval_coco_ap = eval_results['COCO/AP']
mlperf.logger.log_eval_accuracy(
self.eval_coco_ap, step, self.batch_size * self.params.num_gpus,
ssd_constants.COCO_NUM_TRAIN_IMAGES)
if self.reached_target():
# Reached target, clear all pending messages in predictions queue
# and insert poison pill to stop the async eval process.
while not self.async_eval_predictions_queue.empty():
self.async_eval_predictions_queue.get()
self.async_eval_predictions_queue.put('STOP')
break
if not self.async_eval_process:
# Limiting the number of messages in predictions queue to prevent OOM.
# Each message (predictions data) can potentially consume a lot of
# memory, and normally there should only be few messages in the queue.
# If often blocked on this, consider reducing eval frequency.
self.async_eval_predictions_queue = multiprocessing.Queue(2)
self.async_eval_results_queue = multiprocessing.Queue()
# Reason to use a Process as opposed to Thread is mainly the
# computationally intensive eval runner. Python multithreading is not
# truly running in parallel, a runner thread would get significantly
# delayed (or alternatively delay the main thread).
self.async_eval_process = multiprocessing.Process(
target=coco_metric.async_eval_runner,
args=(self.async_eval_predictions_queue,
self.async_eval_results_queue,
annotation_file))
self.async_eval_process.daemon = True
self.async_eval_process.start()
self.async_eval_results_getter_thread = threading.Thread(
target=_eval_results_getter, args=())
self.async_eval_results_getter_thread.daemon = True
self.async_eval_results_getter_thread.start()
self.async_eval_predictions_queue.put(
(self.eval_global_step, decoded_preds))
return {'top_1_accuracy': 0, 'top_5_accuracy': 0.}
eval_results = coco_metric.compute_map(decoded_preds, annotation_file)
self.eval_coco_ap = eval_results['COCO/AP']
ret = {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.}
for metric_key, metric_value in eval_results.items():
ret[constants.SIMPLE_VALUE_RESULT_PREFIX + metric_key] = metric_value
mlperf.logger.log_eval_accuracy(self.eval_coco_ap, self.eval_global_step,
self.batch_size * self.params.num_gpus,
ssd_constants.COCO_NUM_TRAIN_IMAGES)
return ret
log_fn('Got {:d} out of {:d} eval examples.'
' Waiting for the remaining to calculate mAP...'.format(
len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES))
return {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.}
def get_synthetic_inputs(self, input_name, nclass):
"""Generating synthetic data matching real data shape and type."""
inputs = tf.random_uniform(
self.get_input_shapes('train')[0], dtype=self.data_type)
inputs = variables.VariableV1(inputs, trainable=False,
collections=[tf.GraphKeys.LOCAL_VARIABLES],
name=input_name)
boxes = tf.random_uniform(
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 4], dtype=tf.float32)
classes = tf.random_uniform(
[self.batch_size, ssd_constants.NUM_SSD_BOXES, 1], dtype=tf.float32)
nboxes = tf.random_uniform(
[self.batch_size], minval=1, maxval=10, dtype=tf.float32)
return (inputs, boxes, classes, nboxes)
def reached_target(self):
return (self.params.stop_at_top_1_accuracy and
self.eval_coco_ap >= self.params.stop_at_top_1_accuracy)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Image pre-processing utilities.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow.compat.v1 as tf
# pylint: disable=g-direct-tensorflow-import
import cnn_util
from tensorflow.python.data.experimental.ops import threadpool
from tensorflow.python.data.ops import multi_device_iterator_ops
from tensorflow.python.framework import function
from tensorflow.python.layers import utils
from tensorflow.python.ops import data_flow_ops
from tensorflow.python.platform import gfile
import mlperf
def parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
The output of the build_image_data.py image preprocessing script is a dataset
containing serialized Example protocol buffers. Each Example proto contains
the following fields:
image/height: 462
image/width: 581
image/colorspace: 'RGB'
image/channels: 3
image/class/label: 615
image/class/synset: 'n03623198'
image/class/text: 'knee pad'
image/object/bbox/xmin: 0.1
image/object/bbox/xmax: 0.9
image/object/bbox/ymin: 0.2
image/object/bbox/ymax: 0.6
image/object/bbox/label: 615
image/format: 'JPEG'
image/filename: 'ILSVRC2012_val_00041207.JPEG'
image/encoded: <JPEG encoded string>
Args:
example_serialized: scalar Tensor tf.string containing a serialized
Example protocol buffer.
Returns:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
label: Tensor tf.int32 containing the label.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
text: Tensor tf.string containing the human-readable label.
"""
# Dense features in Example proto.
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
# Sparse features in Example proto.
feature_map.update(
{k: sparse_float32 for k in ['image/object/bbox/xmin',
'image/object/bbox/ymin',
'image/object/bbox/xmax',
'image/object/bbox/ymax']})
features = tf.parse_single_example(example_serialized, feature_map)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
# Note that we impose an ordering of (y, x) just to make life difficult.
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
# Force the variable number of bounding boxes into the shape
# [1, num_boxes, coords].
bbox = tf.expand_dims(bbox, 0)
bbox = tf.transpose(bbox, [0, 2, 1])
return features['image/encoded'], label, bbox, features['image/class/text']
_RESIZE_METHOD_MAP = {
'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR,
'bilinear': tf.image.ResizeMethod.BILINEAR,
'bicubic': tf.image.ResizeMethod.BICUBIC,
'area': tf.image.ResizeMethod.AREA
}
def get_image_resize_method(resize_method, batch_position=0):
"""Get tensorflow resize method.
If resize_method is 'round_robin', return different methods based on batch
position in a round-robin fashion. NOTE: If the batch size is not a multiple
of the number of methods, then the distribution of methods will not be
uniform.
Args:
resize_method: (string) nearest, bilinear, bicubic, area, or round_robin.
batch_position: position of the image in a batch. NOTE: this argument can
be an integer or a tensor
Returns:
one of resize type defined in tf.image.ResizeMethod.
"""
if resize_method != 'round_robin':
return _RESIZE_METHOD_MAP[resize_method]
# return a resize method based on batch position in a round-robin fashion.
resize_methods = list(_RESIZE_METHOD_MAP.values())
def lookup(index):
return resize_methods[index]
def resize_method_0():
return utils.smart_cond(batch_position % len(resize_methods) == 0,
lambda: lookup(0), resize_method_1)
def resize_method_1():
return utils.smart_cond(batch_position % len(resize_methods) == 1,
lambda: lookup(1), resize_method_2)
def resize_method_2():
return utils.smart_cond(batch_position % len(resize_methods) == 2,
lambda: lookup(2), lambda: lookup(3))
# NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here
# because TF would not be able to construct a finite graph.
return resize_method_0()
def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer: scalar string Tensor.
scope: Optional scope for op_scope.
Returns:
3-D float Tensor with values ranging from [0, 1).
"""
# with tf.op_scope([image_buffer], scope, 'decode_jpeg'):
# with tf.name_scope(scope, 'decode_jpeg', [image_buffer]):
with tf.name_scope(scope or 'decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3,
fancy_upscaling=False,
dct_method='INTEGER_FAST')
# image = tf.Print(image, [tf.shape(image)], 'Image shape: ')
return image
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
def normalized_image(images):
# Rescale from [0, 255] to [0, 2]
images = tf.multiply(images, 1. / 127.5)
# Rescale to [-1, 1]
mlperf.logger.log(key=mlperf.tags.INPUT_MEAN_SUBTRACTION, value=[1.0] * 3)
return tf.subtract(images, 1.0)
def eval_image(image,
height,
width,
batch_position,
resize_method,
summary_verbosity=0):
"""Get the image for model evaluation.
We preprocess the image simiarly to Slim, see
https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/vgg_preprocessing.py
Validation images do not have bounding boxes, so to crop the image, we first
resize the image such that the aspect ratio is maintained and the resized
height and width are both at least 1.145 times `height` and `width`
respectively. Then, we do a central crop to size (`height`, `width`).
Args:
image: 3-D float Tensor representing the image.
height: The height of the image that will be returned.
width: The width of the image that will be returned.
batch_position: position of the image in a batch, which affects how images
are distorted and resized. NOTE: this argument can be an integer or a
tensor
resize_method: one of the strings 'round_robin', 'nearest', 'bilinear',
'bicubic', or 'area'.
summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
summaries and checkpoints.
Returns:
An image of size (output_height, output_width, 3) that is resized and
cropped as described above.
"""
# TODO(reedwm): Currently we resize then crop. Investigate if it's faster to
# crop then resize.
with tf.name_scope('eval_image'):
if summary_verbosity >= 3:
tf.summary.image(
'original_image', tf.expand_dims(image, 0))
shape = tf.shape(image)
image_height = shape[0]
image_width = shape[1]
image_height_float = tf.cast(image_height, tf.float32)
image_width_float = tf.cast(image_width, tf.float32)
# This value is chosen so that in resnet, images are cropped to a size of
# 256 x 256, which matches what other implementations do. The final image
# size for resnet is 224 x 224, and floor(224 * 1.145) = 256.
scale_factor = 1.145
# Compute resize_height and resize_width to be the minimum values such that
# 1. The aspect ratio is maintained (i.e. resize_height / resize_width is
# image_height / image_width), and
# 2. resize_height >= height * `scale_factor`, and
# 3. resize_width >= width * `scale_factor`
max_ratio = tf.maximum(height / image_height_float,
width / image_width_float)
resize_height = tf.cast(image_height_float * max_ratio * scale_factor,
tf.int32)
resize_width = tf.cast(image_width_float * max_ratio * scale_factor,
tf.int32)
mlperf.logger.log_input_resize_aspect_preserving(height, width,
scale_factor)
# Resize the image to shape (`resize_height`, `resize_width`)
image_resize_method = get_image_resize_method(resize_method, batch_position)
distorted_image = tf.image.resize_images(image,
[resize_height, resize_width],
image_resize_method,
align_corners=False)
# Do a central crop of the image to size (height, width).
# MLPerf requires us to log (height, width) with two different keys.
mlperf.logger.log(key=mlperf.tags.INPUT_CENTRAL_CROP, value=[height, width])
mlperf.logger.log(key=mlperf.tags.INPUT_RESIZE, value=[height, width])
total_crop_height = (resize_height - height)
crop_top = total_crop_height // 2
total_crop_width = (resize_width - width)
crop_left = total_crop_width // 2
distorted_image = tf.slice(distorted_image, [crop_top, crop_left, 0],
[height, width, 3])
distorted_image.set_shape([height, width, 3])
if summary_verbosity >= 3:
tf.summary.image(
'cropped_resized_image', tf.expand_dims(distorted_image, 0))
image = distorted_image
return image
def train_image(image_buffer,
height,
width,
bbox,
batch_position,
resize_method,
distortions,
scope=None,
summary_verbosity=0,
distort_color_in_yiq=False,
fuse_decode_and_crop=False):
"""Distort one image for training a network.
Distorting images provides a useful technique for augmenting the data
set during training in order to make the network invariant to aspects
of the image that do not effect the label.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
height: integer
width: integer
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged
as [ymin, xmin, ymax, xmax].
batch_position: position of the image in a batch, which affects how images
are distorted and resized. NOTE: this argument can be an integer or a
tensor
resize_method: round_robin, nearest, bilinear, bicubic, or area.
distortions: If true, apply full distortions for image colors.
scope: Optional scope for op_scope.
summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both
summaries and checkpoints.
distort_color_in_yiq: distort color of input images in YIQ space.
fuse_decode_and_crop: fuse the decode/crop operation.
Returns:
3-D float Tensor of distorted image used for training.
"""
# with tf.op_scope([image, height, width, bbox], scope, 'distort_image'):
# with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
with tf.name_scope(scope or 'distort_image'):
# A large fraction of image datasets contain a human-annotated bounding box
# delineating the region of the image containing the object of interest. We
# choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
min_object_covered = 0.1
aspect_ratio_range = [0.75, 1.33]
area_range = [0.05, 1.0]
max_attempts = 100
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV,
value=min_object_covered)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE,
value=aspect_ratio_range)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE,
value=area_range)
mlperf.logger.log(key=mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS,
value=max_attempts)
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.image.extract_jpeg_shape(image_buffer),
bounding_boxes=bbox,
min_object_covered=min_object_covered,
aspect_ratio_range=aspect_ratio_range,
area_range=area_range,
max_attempts=max_attempts,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
if summary_verbosity >= 3:
image = tf.image.decode_jpeg(image_buffer, channels=3,
dct_method='INTEGER_FAST')
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image_with_distorted_box = tf.image.draw_bounding_boxes(
tf.expand_dims(image, 0), distort_bbox)
tf.summary.image(
'images_with_distorted_bounding_box',
image_with_distorted_box)
# Crop the image to the specified bounding box.
if fuse_decode_and_crop:
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
image = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=3)
else:
image = tf.image.decode_jpeg(image_buffer, channels=3,
dct_method='INTEGER_FAST')
image = tf.slice(image, bbox_begin, bbox_size)
mlperf.logger.log(key=mlperf.tags.INPUT_RANDOM_FLIP)
distorted_image = tf.image.random_flip_left_right(image)
# This resizing operation may distort the images because the aspect
# ratio is not respected.
mlperf.logger.log(key=mlperf.tags.INPUT_RESIZE, value=[height, width])
image_resize_method = get_image_resize_method(resize_method, batch_position)
distorted_image = tf.image.resize_images(
distorted_image, [height, width],
image_resize_method,
align_corners=False)
# Restore the shape since the dynamic slice based upon the bbox_size loses
# the third dimension.
distorted_image.set_shape([height, width, 3])
if summary_verbosity >= 3:
tf.summary.image('cropped_resized_maybe_flipped_image',
tf.expand_dims(distorted_image, 0))
if distortions:
distorted_image = tf.cast(distorted_image, dtype=tf.float32)
# Images values are expected to be in [0,1] for color distortion.
distorted_image /= 255.
# Randomly distort the colors.
distorted_image = distort_color(distorted_image, batch_position,
distort_color_in_yiq=distort_color_in_yiq)
# Note: This ensures the scaling matches the output of eval_image
distorted_image *= 255
if summary_verbosity >= 3:
tf.summary.image(
'final_distorted_image',
tf.expand_dims(distorted_image, 0))
return distorted_image
def distort_color(image, batch_position=0, distort_color_in_yiq=False,
scope=None):
"""Distort the color of the image.
Each color distortion is non-commutative and thus ordering of the color ops
matters. Ideally we would randomly permute the ordering of the color ops.
Rather then adding that level of complication, we select a distinct ordering
of color ops based on the position of the image in a batch.
Args:
image: float32 Tensor containing single image. Tensor values should be in
range [0, 1].
batch_position: the position of the image in a batch. NOTE: this argument
can be an integer or a tensor
distort_color_in_yiq: distort color of input images in YIQ space.
scope: Optional scope for op_scope.
Returns:
color-distorted image
"""
if distort_color_in_yiq:
try:
from tensorflow.contrib.image.python.ops import distort_image_ops # pylint: disable=g-import-not-at-top
except ImportError:
raise ValueError(
'In TF2, you cannot pass --distortions unless you also pass '
'--nodistort_color_in_yiq. This is because the random_hsv_in_yiq was '
'removed in TF2. --distortions does not improve accuracy on resnet '
'so it is not recommended. --nodistort_color_in_yiq also has no '
'impact on accuracy, but may hurt performance.')
with tf.name_scope(scope or 'distort_color'):
def distort_fn_0(image=image):
"""Variant 0 of distort function."""
image = tf.image.random_brightness(image, max_delta=32. / 255.)
if distort_color_in_yiq:
image = distort_image_ops.random_hsv_in_yiq(
image, lower_saturation=0.5, upper_saturation=1.5,
max_delta_hue=0.2 * math.pi)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
return image
def distort_fn_1(image=image):
"""Variant 1 of distort function."""
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
if distort_color_in_yiq:
image = distort_image_ops.random_hsv_in_yiq(
image, lower_saturation=0.5, upper_saturation=1.5,
max_delta_hue=0.2 * math.pi)
else:
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
return image
image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0,
distort_fn_1)
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image
class InputPreprocessor(object):
"""Base class for all model preprocessors."""
def __init__(self, batch_size, output_shapes):
self.batch_size = batch_size
self.output_shapes = output_shapes
def supports_datasets(self):
"""Whether this preprocessor supports dataset."""
return False
def minibatch(self, dataset, subset, params, shift_ratio=-1):
"""Returns tensors representing a minibatch of all the input."""
raise NotImplementedError('Must be implemented by subclass.')
# The methods added below are only supported/used if supports_datasets()
# returns True.
# TODO(laigd): refactor benchmark_cnn.py and put the logic of
# _build_input_processing() into InputPreprocessor.
def parse_and_preprocess(self, value, batch_position):
"""Function to parse and preprocess an Example proto in input pipeline."""
raise NotImplementedError('Must be implemented by subclass.')
# TODO(laigd): figure out how to remove these parameters, since the
# preprocessor itself has self.batch_size, self.num_splits, etc defined.
def build_multi_device_iterator(self, batch_size, num_splits, cpu_device,
params, gpu_devices, dataset, doing_eval):
"""Creates a MultiDeviceIterator."""
assert self.supports_datasets()
assert num_splits == len(gpu_devices)
with tf.name_scope('batch_processing'):
if doing_eval:
subset = 'validation'
else:
subset = 'train'
batch_size_per_split = batch_size // num_splits
ds = self.create_dataset(
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train=(not doing_eval),
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
ds,
gpu_devices,
source_device=cpu_device,
max_buffer_size=params.multi_device_iterator_max_buffer_size)
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,
multi_device_iterator.initializer)
return multi_device_iterator
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
raise NotImplementedError('Must be implemented by subclass.')
def create_iterator(self, ds):
ds_iterator = tf.data.make_initializable_iterator(ds)
tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS,
ds_iterator.initializer)
return ds_iterator
def minibatch_fn(self, batch_size, model_input_shapes, num_splits,
dataset, subset, train, datasets_repeat_cached_sample,
num_threads, datasets_use_caching,
datasets_parallel_interleave_cycle_length,
datasets_sloppy_parallel_interleave,
datasets_parallel_interleave_prefetch):
"""Returns a function and list of args for the fn to create a minibatch."""
assert self.supports_datasets()
batch_size_per_split = batch_size // num_splits
assert batch_size_per_split == model_input_shapes[0][0]
with tf.name_scope('batch_processing'):
ds = self.create_dataset(batch_size, num_splits, batch_size_per_split,
dataset, subset, train,
datasets_repeat_cached_sample, num_threads,
datasets_use_caching,
datasets_parallel_interleave_cycle_length,
datasets_sloppy_parallel_interleave,
datasets_parallel_interleave_prefetch)
ds_iterator = self.create_iterator(ds)
ds_iterator_string_handle = ds_iterator.string_handle()
@function.Defun(tf.string)
def _fn(h):
remote_iterator = tf.data.Iterator.from_string_handle(
h, ds_iterator.output_types, ds_iterator.output_shapes)
input_list = remote_iterator.get_next()
reshaped_input_list = [
tf.reshape(input_list[i], shape=model_input_shapes[i])
for i in range(len(input_list))
]
return reshaped_input_list
return _fn, [ds_iterator_string_handle]
class BaseImagePreprocessor(InputPreprocessor):
"""Base class for all image model preprocessors."""
def __init__(self,
batch_size,
output_shapes,
num_splits,
dtype,
train,
distortions,
resize_method,
shift_ratio=-1,
summary_verbosity=0,
distort_color_in_yiq=True,
fuse_decode_and_crop=True,
match_mlperf=False):
super(BaseImagePreprocessor, self).__init__(batch_size, output_shapes)
image_shape = output_shapes[0]
# image_shape is in form (batch_size, height, width, depth)
self.height = image_shape[1]
self.width = image_shape[2]
self.depth = image_shape[3]
self.num_splits = num_splits
self.dtype = dtype
self.train = train
self.resize_method = resize_method
self.shift_ratio = shift_ratio
self.distortions = distortions
self.distort_color_in_yiq = distort_color_in_yiq
self.fuse_decode_and_crop = fuse_decode_and_crop
if self.batch_size % self.num_splits != 0:
raise ValueError(
('batch_size must be a multiple of num_splits: '
'batch_size %d, num_splits: %d') %
(self.batch_size, self.num_splits))
self.batch_size_per_split = self.batch_size // self.num_splits
self.summary_verbosity = summary_verbosity
self.match_mlperf = match_mlperf
def parse_and_preprocess(self, value, batch_position):
assert self.supports_datasets()
image_buffer, label_index, bbox, _ = parse_example_proto(value)
if self.match_mlperf:
bbox = tf.zeros((1, 0, 4), dtype=bbox.dtype)
mlperf.logger.log(key=mlperf.tags.INPUT_CROP_USES_BBOXES, value=False)
else:
mlperf.logger.log(key=mlperf.tags.INPUT_CROP_USES_BBOXES, value=True)
image = self.preprocess(image_buffer, bbox, batch_position)
return (image, label_index)
def preprocess(self, image_buffer, bbox, batch_position):
raise NotImplementedError('Must be implemented by subclass.')
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
file_names = gfile.Glob(glob_pattern)
if not file_names:
raise ValueError('Found no files in --data_dir matching: {}'
.format(glob_pattern))
ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave,
prefetch_input_elements=datasets_parallel_interleave_prefetch))
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
counter = tf.data.Dataset.range(batch_size)
counter = counter.repeat()
ds = tf.data.Dataset.zip((ds, counter))
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
buffer_size = 10000
mlperf.logger.log(key=mlperf.tags.INPUT_SHARD, value=buffer_size)
ds = ds.apply(
tf.data.experimental.shuffle_and_repeat(buffer_size=buffer_size))
else:
ds = ds.repeat()
ds = ds.apply(
tf.data.experimental.map_and_batch(
map_func=self.parse_and_preprocess,
batch_size=batch_size_per_split,
num_parallel_batches=num_splits))
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
ds = threadpool.override_threadpool(
ds,
threadpool.PrivateThreadPool(
num_threads, display_name='input_pipeline_thread_pool'))
return ds
class RecordInputImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor for images with RecordInput format."""
def preprocess(self, image_buffer, bbox, batch_position):
"""Preprocessing image_buffer as a function of its batch position."""
if self.train:
image = train_image(image_buffer, self.height, self.width, bbox,
batch_position, self.resize_method, self.distortions,
None, summary_verbosity=self.summary_verbosity,
distort_color_in_yiq=self.distort_color_in_yiq,
fuse_decode_and_crop=self.fuse_decode_and_crop)
else:
image = tf.image.decode_jpeg(
image_buffer, channels=3, dct_method='INTEGER_FAST')
image = eval_image(image, self.height, self.width, batch_position,
self.resize_method,
summary_verbosity=self.summary_verbosity)
# Note: image is now float32 [height,width,3] with range [0, 255]
# image = tf.cast(image, tf.uint8) # HACK TESTING
if self.match_mlperf:
mlperf.logger.log(key=mlperf.tags.INPUT_MEAN_SUBTRACTION,
value=_CHANNEL_MEANS)
normalized = image - _CHANNEL_MEANS
else:
normalized = normalized_image(image)
return tf.cast(normalized, self.dtype)
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
if shift_ratio < 0:
shift_ratio = self.shift_ratio
with tf.name_scope('batch_processing'):
# Build final results per split.
images = [[] for _ in range(self.num_splits)]
labels = [[] for _ in range(self.num_splits)]
if params.use_datasets:
ds = self.create_dataset(
self.batch_size, self.num_splits, self.batch_size_per_split,
dataset, subset, self.train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
for d in xrange(self.num_splits):
images[d], labels[d] = ds_iterator.get_next()
# TODO(laigd): consider removing the --use_datasets option, it should
# always use datasets.
else:
record_input = data_flow_ops.RecordInput(
file_pattern=dataset.tf_record_pattern(subset),
seed=301,
parallelism=64,
buffer_size=10000,
batch_size=self.batch_size,
shift_ratio=shift_ratio,
name='record_input')
records = record_input.get_yield_op()
records = tf.split(records, self.batch_size, 0)
records = [tf.reshape(record, []) for record in records]
for idx in xrange(self.batch_size):
value = records[idx]
(image, label) = self.parse_and_preprocess(value, idx)
split_index = idx % self.num_splits
labels[split_index].append(label)
images[split_index].append(image)
for split_index in xrange(self.num_splits):
if not params.use_datasets:
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.concat(labels[split_index], 0)
images[split_index] = tf.reshape(
images[split_index],
shape=[self.batch_size_per_split, self.height, self.width,
self.depth])
labels[split_index] = tf.reshape(labels[split_index],
[self.batch_size_per_split])
return images, labels
def supports_datasets(self):
return True
class ImagenetPreprocessor(RecordInputImagePreprocessor):
def preprocess(self, image_buffer, bbox, batch_position):
# pylint: disable=g-import-not-at-top
try:
from official.r1.resnet.imagenet_preprocessing import preprocess_image
except ImportError:
tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.')
raise
if self.train:
image = preprocess_image(
image_buffer, bbox, self.height, self.width, self.depth,
is_training=True)
else:
image = preprocess_image(
image_buffer, bbox, self.height, self.width, self.depth,
is_training=False)
return tf.cast(image, self.dtype)
class Cifar10ImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor for Cifar10 input images."""
def _distort_image(self, image):
"""Distort one image for training a network.
Adopted the standard data augmentation scheme that is widely used for
this dataset: the images are first zero-padded with 4 pixels on each side,
then randomly cropped to again produce distorted images; half of the images
are then horizontally mirrored.
Args:
image: input image.
Returns:
distorted image.
"""
image = tf.image.resize_image_with_crop_or_pad(
image, self.height + 8, self.width + 8)
distorted_image = tf.random_crop(image,
[self.height, self.width, self.depth])
# Randomly flip the image horizontally.
distorted_image = tf.image.random_flip_left_right(distorted_image)
if self.summary_verbosity >= 3:
tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
return distorted_image
def _eval_image(self, image):
"""Get the image for model evaluation."""
distorted_image = tf.image.resize_image_with_crop_or_pad(
image, self.width, self.height)
if self.summary_verbosity >= 3:
tf.summary.image('cropped.image', tf.expand_dims(distorted_image, 0))
return distorted_image
def preprocess(self, raw_image):
"""Preprocessing raw image."""
if self.summary_verbosity >= 3:
tf.summary.image('raw.image', tf.expand_dims(raw_image, 0))
if self.train and self.distortions:
image = self._distort_image(raw_image)
else:
image = self._eval_image(raw_image)
normalized = normalized_image(image)
return tf.cast(normalized, self.dtype)
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
# TODO(jsimsa): Implement datasets code path
del shift_ratio, params
with tf.name_scope('batch_processing'):
all_images, all_labels = dataset.read_data_files(subset)
all_images = tf.constant(all_images)
all_labels = tf.constant(all_labels)
input_image, input_label = tf.train.slice_input_producer(
[all_images, all_labels])
input_image = tf.cast(input_image, self.dtype)
input_label = tf.cast(input_label, tf.int32)
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(dataset.num_examples_per_epoch(subset) *
min_fraction_of_examples_in_queue)
raw_images, raw_labels = tf.train.shuffle_batch(
[input_image, input_label], batch_size=self.batch_size,
capacity=min_queue_examples + 3 * self.batch_size,
min_after_dequeue=min_queue_examples)
images = [[] for i in range(self.num_splits)]
labels = [[] for i in range(self.num_splits)]
# Create a list of size batch_size, each containing one image of the
# batch. Without the unstack call, raw_images[i] would still access the
# same image via a strided_slice op, but would be slower.
raw_images = tf.unstack(raw_images, axis=0)
raw_labels = tf.unstack(raw_labels, axis=0)
for i in xrange(self.batch_size):
split_index = i % self.num_splits
# The raw image read from data has the format [depth, height, width]
# reshape to the format returned by minibatch.
raw_image = tf.reshape(raw_images[i],
[dataset.depth, dataset.height, dataset.width])
raw_image = tf.transpose(raw_image, [1, 2, 0])
image = self.preprocess(raw_image)
images[split_index].append(image)
labels[split_index].append(raw_labels[i])
for split_index in xrange(self.num_splits):
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.parallel_stack(labels[split_index])
return images, labels
class COCOPreprocessor(BaseImagePreprocessor):
"""Preprocessor for COCO dataset input images, boxes, and labels."""
def minibatch(self,
dataset,
subset,
params,
shift_ratio=-1):
del shift_ratio # Not used when using datasets instead of data_flow_ops
with tf.name_scope('batch_processing'):
ds = self.create_dataset(
self.batch_size, self.num_splits, self.batch_size_per_split,
dataset, subset, self.train, params.datasets_repeat_cached_sample)
ds_iterator = self.create_iterator(ds)
# Training data: 4 tuple
# Validation data: 5 tuple
# See get_input_shapes in models/ssd_model.py for details.
input_len = 4 if subset == 'train' else 5
input_lists = [[None for _ in range(self.num_splits)]
for _ in range(input_len)]
for d in xrange(self.num_splits):
input_list = ds_iterator.get_next()
for i in range(input_len):
input_lists[i][d] = input_list[i]
return input_lists
def preprocess(self, data):
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
import ssd_constants # pylint: disable=g-import-not-at-top
from object_detection.core import preprocessor # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation')
image_buffer = data['image_buffer']
boxes = data['groundtruth_boxes']
classes = tf.reshape(data['groundtruth_classes'], [-1, 1])
source_id = tf.string_to_number(data['source_id'])
raw_shape = data['raw_shape']
ssd_encoder = ssd_dataloader.Encoder()
# Only 80 of the 90 COCO classes are used.
class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
classes = tf.gather(class_map, classes)
classes = tf.cast(classes, dtype=tf.float32)
if self.train:
image, boxes, classes = ssd_dataloader.ssd_decode_and_crop(
image_buffer, boxes, classes, raw_shape)
# ssd_crop resizes and returns image of dtype float32 and does not change
# its range (i.e., value in between 0--255). Divide by 255. converts it
# to [0, 1] range. Not doing this before cropping to avoid dtype cast
# (which incurs additional memory copy).
image /= 255.
image, boxes = preprocessor.random_horizontal_flip(
image=image, boxes=boxes)
# Random horizontal flip probability is 50%
# See https://github.com/tensorflow/models/blob/master/research/object_detection/core/preprocessor.py # pylint: disable=line-too-long
mlperf.logger.log(key=mlperf.tags.RANDOM_FLIP_PROBABILITY, value=0.5)
image = tf.cast(image, self.dtype)
encoded_returns = ssd_encoder.encode_labels(boxes, classes)
encoded_classes, encoded_boxes, num_matched_boxes = encoded_returns
# Shape of image: [width, height, channel]
# Shape of encoded_boxes: [NUM_SSD_BOXES, 4]
# Shape of encoded_classes: [NUM_SSD_BOXES, 1]
# Shape of num_matched_boxes: [1]
return (image, encoded_boxes, encoded_classes, num_matched_boxes)
else:
image = tf.image.decode_jpeg(image_buffer)
image = tf.image.resize_images(
image, size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE))
# resize_image returns image of dtype float32 and does not change its
# range. Divide by 255 to convert image to [0, 1] range.
image /= 255.
image = ssd_dataloader.normalize_image(image)
image = tf.cast(image, self.dtype)
def trim_and_pad(inp_tensor):
"""Limit the number of boxes, and pad if necessary."""
inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES]
num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0]
inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
return tf.reshape(inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES,
inp_tensor.get_shape()[1]])
boxes, classes = trim_and_pad(boxes), trim_and_pad(classes)
# Shape of boxes: [MAX_NUM_EVAL_BOXES, 4]
# Shape of classes: [MAX_NUM_EVAL_BOXES, 1]
# Shape of source_id: [] (scalar tensor)
# Shape of raw_shape: [3]
return (image, boxes, classes, source_id, raw_shape)
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
try:
import ssd_dataloader # pylint: disable=g-import-not-at-top
except ImportError:
raise ImportError('To use the COCO dataset, you must clone the '
'repo https://github.com/tensorflow/models and add '
'tensorflow/models and tensorflow/models/research to '
'the PYTHONPATH, and compile the protobufs by '
'following https://github.com/tensorflow/models/blob/'
'master/research/object_detection/g3doc/installation.md'
'#protobuf-compilation')
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
ds = tf.data.TFRecordDataset.list_files(glob_pattern, shuffle=train)
# TODO(haoyuzhang): Enable map+filter fusion after cl/218399112 in release
# options = tf.data.Options()
# options.experimental_optimization = tf.data.experimental.OptimizationOptions() # pylint: disable=line-too-long
# options.experimental_optimization.map_and_filter_fusion = True
# ds = ds.with_options(options)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave))
mlperf.logger.log(key=mlperf.tags.INPUT_ORDER)
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000))
mlperf.logger.log(key=mlperf.tags.INPUT_SHARD, value=10000)
mlperf.logger.log(key=mlperf.tags.INPUT_ORDER)
else:
ds = ds.repeat()
ds = ds.map(ssd_dataloader.ssd_parse_example_proto, num_parallel_calls=64)
ds = ds.filter(
lambda data: tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0))
ds = ds.apply(
tf.data.experimental.map_and_batch(
map_func=self.preprocess,
batch_size=batch_size_per_split,
num_parallel_batches=num_splits,
drop_remainder=train))
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
ds = threadpool.override_threadpool(
ds,
threadpool.PrivateThreadPool(
num_threads, display_name='input_pipeline_thread_pool'))
return ds
def supports_datasets(self):
return True
class TestImagePreprocessor(BaseImagePreprocessor):
"""Preprocessor used for testing.
set_fake_data() sets which images and labels will be output by minibatch(),
and must be called before minibatch(). This allows tests to easily specify
a set of images to use for training, without having to create any files.
Queue runners must be started for this preprocessor to work.
"""
def __init__(self,
batch_size,
output_shapes,
num_splits,
dtype,
train=None,
distortions=None,
resize_method=None,
shift_ratio=0,
summary_verbosity=0,
distort_color_in_yiq=False,
fuse_decode_and_crop=False,
match_mlperf=False):
super(TestImagePreprocessor, self).__init__(
batch_size, output_shapes, num_splits, dtype, train, distortions,
resize_method, shift_ratio, summary_verbosity=summary_verbosity,
distort_color_in_yiq=distort_color_in_yiq,
fuse_decode_and_crop=fuse_decode_and_crop, match_mlperf=match_mlperf)
self.expected_subset = None
def set_fake_data(self, fake_images, fake_labels):
assert len(fake_images.shape) == 4
assert len(fake_labels.shape) == 1
num_images = fake_images.shape[0]
assert num_images == fake_labels.shape[0]
assert num_images % self.batch_size == 0
self.fake_images = fake_images
self.fake_labels = fake_labels
def minibatch(self,
dataset,
subset,
params,
shift_ratio=0):
"""Get test image batches."""
del dataset, params
if (not hasattr(self, 'fake_images') or
not hasattr(self, 'fake_labels')):
raise ValueError('Must call set_fake_data() before calling minibatch '
'on TestImagePreprocessor')
if self.expected_subset is not None:
assert subset == self.expected_subset
shift_ratio = shift_ratio or self.shift_ratio
fake_images = cnn_util.roll_numpy_batches(self.fake_images, self.batch_size,
shift_ratio)
fake_labels = cnn_util.roll_numpy_batches(self.fake_labels, self.batch_size,
shift_ratio)
with tf.name_scope('batch_processing'):
image_slice, label_slice = tf.train.slice_input_producer(
[fake_images, fake_labels],
shuffle=False,
name='image_slice')
raw_images, raw_labels = tf.train.batch(
[image_slice, label_slice], batch_size=self.batch_size,
name='image_batch')
images = [[] for _ in range(self.num_splits)]
labels = [[] for _ in range(self.num_splits)]
for i in xrange(self.batch_size):
split_index = i % self.num_splits
raw_image = tf.cast(raw_images[i], self.dtype)
images[split_index].append(raw_image)
labels[split_index].append(raw_labels[i])
for split_index in xrange(self.num_splits):
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.parallel_stack(labels[split_index])
normalized = [normalized_image(part) for part in images]
return [[tf.cast(part, self.dtype) for part in normalized], labels]
class LibrispeechPreprocessor(InputPreprocessor):
"""Preprocessor for librispeech class for all image model preprocessors."""
def __init__(self, batch_size, output_shapes, num_splits, dtype, train,
**kwargs):
del kwargs
super(LibrispeechPreprocessor, self).__init__(batch_size, output_shapes)
self.num_splits = num_splits
self.dtype = dtype
self.is_train = train
if self.batch_size % self.num_splits != 0:
raise ValueError(('batch_size must be a multiple of num_splits: '
'batch_size %d, num_splits: %d') % (self.batch_size,
self.num_splits))
self.batch_size_per_split = self.batch_size // self.num_splits
def create_dataset(self,
batch_size,
num_splits,
batch_size_per_split,
dataset,
subset,
train,
datasets_repeat_cached_sample,
num_threads=None,
datasets_use_caching=False,
datasets_parallel_interleave_cycle_length=None,
datasets_sloppy_parallel_interleave=False,
datasets_parallel_interleave_prefetch=None):
"""Creates a dataset for the benchmark."""
# TODO(laigd): currently the only difference between this and the one in
# BaseImagePreprocessor is, this uses map() and padded_batch() while the
# latter uses tf.data.experimental.map_and_batch(). Try to merge them.
assert self.supports_datasets()
glob_pattern = dataset.tf_record_pattern(subset)
file_names = gfile.Glob(glob_pattern)
if not file_names:
raise ValueError('Found no files in --data_dir matching: {}'
.format(glob_pattern))
ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=train)
ds = ds.apply(
tf.data.experimental.parallel_interleave(
tf.data.TFRecordDataset,
cycle_length=datasets_parallel_interleave_cycle_length or 10,
sloppy=datasets_sloppy_parallel_interleave,
prefetch_input_elements=datasets_parallel_interleave_prefetch))
if datasets_repeat_cached_sample:
# Repeat a single sample element indefinitely to emulate memory-speed IO.
ds = ds.take(1).cache().repeat()
counter = tf.data.Dataset.range(batch_size)
counter = counter.repeat()
ds = tf.data.Dataset.zip((ds, counter))
ds = ds.prefetch(buffer_size=batch_size)
if datasets_use_caching:
ds = ds.cache()
if train:
ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000))
else:
ds = ds.repeat()
ds = ds.map(map_func=self.parse_and_preprocess,
num_parallel_calls=batch_size_per_split*num_splits)
ds = ds.padded_batch(
batch_size=batch_size_per_split,
padded_shapes=tuple([
tf.TensorShape(output_shape[1:])
for output_shape in self.output_shapes
]),
drop_remainder=True)
ds = ds.prefetch(buffer_size=num_splits)
if num_threads:
ds = threadpool.override_threadpool(
ds,
threadpool.PrivateThreadPool(
num_threads, display_name='input_pipeline_thread_pool'))
return ds
def minibatch(self, dataset, subset, params, shift_ratio=-1):
assert params.use_datasets
# TODO(laigd): unify this with CNNModel's minibatch()
# TODO(laigd): in distributed mode we use shift_ratio so different workers
# won't work on same inputs, so we should respect that.
del shift_ratio
with tf.name_scope('batch_processing'):
ds = self.create_dataset(
self.batch_size,
self.num_splits,
self.batch_size_per_split,
dataset,
subset,
self.is_train,
datasets_repeat_cached_sample=params.datasets_repeat_cached_sample,
num_threads=params.datasets_num_private_threads,
datasets_use_caching=params.datasets_use_caching,
datasets_parallel_interleave_cycle_length=(
params.datasets_parallel_interleave_cycle_length),
datasets_sloppy_parallel_interleave=(
params.datasets_sloppy_parallel_interleave),
datasets_parallel_interleave_prefetch=(
params.datasets_parallel_interleave_prefetch))
ds_iterator = self.create_iterator(ds)
# The four lists are: input spectrogram feature, labels, input lengths,
# label lengths
input_lists = [[None for _ in range(self.num_splits)] for _ in range(4)]
for d in xrange(self.num_splits):
input_list = ds_iterator.get_next()
for i in range(4):
input_lists[i][d] = input_list[i]
assert self.output_shapes == [
input_lists[i][0].shape.as_list() for i in range(4)
]
return tuple(input_lists)
def supports_datasets(self):
return True
def parse_and_preprocess(self, value, batch_position):
"""Parse an TFRecord."""
del batch_position
assert self.supports_datasets()
context_features = {
'labels': tf.VarLenFeature(dtype=tf.int64),
'input_length': tf.FixedLenFeature([], dtype=tf.int64),
'label_length': tf.FixedLenFeature([], dtype=tf.int64),
}
sequence_features = {
'features': tf.FixedLenSequenceFeature([161], dtype=tf.float32)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=value,
context_features=context_features,
sequence_features=sequence_features,
)
return [
# Input
tf.expand_dims(sequence_parsed['features'], axis=2),
# Label
tf.cast(
tf.reshape(
tf.sparse_tensor_to_dense(context_parsed['labels']), [-1]),
dtype=tf.int32),
# Input length
tf.cast(
tf.reshape(context_parsed['input_length'], [1]),
dtype=tf.int32),
# Label length
tf.cast(
tf.reshape(context_parsed['label_length'], [1]),
dtype=tf.int32),
]
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.sw[op]
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
# PyCharm
.idea/
# For mac
.DS_Store
Table of Contents
=================
* [Table of Contents](#table-of-contents)
* [Introduction](#introduction)
* [Executing tests](#executing-tests)
* [PerfZero on private GCE instance.](#perfzero-on-private-gce-instance)
* [Step one: Create GCE Instance](#step-one-create-gce-instance)
* [Step two: Build docker on GCE instance](#step-two-build-docker-on-gce-instance)
* [Step three: Start and "enter" the docker instance](#step-three-start-and-enter-the-docker-instance)
* [Step four: Run tests](#step-four-run-tests)
* [Step five: Delete the instance when done](#step-five-delete-the-instance-when-done)
* [PerfZero on local workstation or any server](#perfzero-on-local-workstation-or-any-server)
* [PerfZero without docker](#perfzero-without-docker)
* [Creating tests](#creating-tests)
* [Deep dive into individual tools](#deep-dive-into-individual-tools)
* [Build docker image](#build-docker-image)
* [Run benchmark](#run-benchmark)
* [Instructions for managing Google Cloud Platform computing instance](#instructions-for-managing-google-cloud-platform-computing-instance)
* [Understand the benchmark execution output](#understand-the-benchmark-execution-output)
* [Json formatted benchmark summary](#json-formatted-benchmark-summary)
* [Profiling](#profiling)
* [Visualize in TensorBoard](#visualize-in-tensorboard)
* [Visualize system metric values over time](#visualize-system-metric-values-over-time)
* [PerfZero development](#perfzero-development)
# Introduction
PerfZero is a benchmark framework for TensorFlow. It intends to address the
following use-cases:
1) For user who wants to execute TensorFlow test to debug performance
regression.
PerfZero makes it easy to execute the pre-defined test by consolidating the
docker image build, GPU driver installation, TensorFlow installation, benchmark
library checkout, data download, system statistics collection, benchmark metrics
collection, profiler data collection and so on into 2 to 3 commands. This allows
developer to focus on investigating the issue rather than setting up the test
environment.
2) For user who wants to track the performance change of TensorFlow for a
variety of setup (e.g. GPU model, cudnn version, TensorFlow version)
The developer can setup periodic job to execute these benchmark methods using
PerfZero. PerfZero will collect the information needed to identify the
benchmark (e.g. GPU model, TensorFlow version, dependent library git hash), get
benchmark execution result (e.g. wall time, accuracy, succeeded or not),
summarize the result in a easy-to-read json string and upload the result to
bigquery table. Using the data in the bigquery table, user can then visualize
the performance change in a dashboard, compare performance between different
setup in a table, and trigger alert on when there is performance regression.
# Executing tests
There are multiple ways to use PerfZero to execute a test. Listed from highest
to lowest abstraction.
* [PerfZero on private GCE instance](#perfzero-on-private-gce-instance)
* [PerfZero on local workstation or any server](#perfzero-on-local-workstation-or-any-server)
* [PerfZero without docker](#perfzero-without-docker)
## PerfZero on private GCE instance.
There are many variations on this approach, to get you started quickly the steps
below detail setting up an 8xV100 setup with local nvme drives where training
data would be stored. The only con to this setup is that it cannot be stopped
and can only be deleted due to the local nvme drives.
### Step one: Create GCE Instance
Creates an 8xV100 instance with 4 nvme drives. Output of the command will
provide the command to run to SSH to the machine. To set the project, zone, and
other features read
[cloud_manager tool details](https://github.com/tensorflow/benchmarks/tree/master/perfzero#instructions-for-managing-google-cloud-platform-computing-instance).
```bash
python perfzero/lib/cloud_manager.py create --accelerator_count 8 --nvme_count 4
```
### Step two: Build docker on GCE instance
After logging into the instance run the following command to create a docker
instance with the latest nightly TF 2.0 build. For more options read the
[build docker image section](https://github.com/tensorflow/benchmarks/tree/master/perfzero#build-docker-image)
```bash
python3 perfzero/lib/setup.py --dockerfile_path=docker/Dockerfile_ubuntu_1804_tf_v2
```
For all options for building the docker image, including controlling the version
of TensorFlow installed, checkout the public
[README for PerfZero](https://github.com/tensorflow/benchmarks/tree/master/perfzero#build-docker-image).
### Step three: Start and "enter" the docker instance
```bash
nvidia-docker run -it --rm -v $(pwd):/workspace -v /data:/data perfzero/tensorflow bash
```
### Step four: Run tests
The command below pulls GitHub official/models, downloads the cifar-10 dataset
from our internal Google Cloud storage bucket, and executes a ResNet56 benchmark
with TensorFlow 2.0 nightly build. For info on the args read the
[run benchmark section](https://github.com/tensorflow/benchmarks/tree/master/perfzero#run-benchmark).
```bash
python3 /workspace/perfzero/lib/benchmark.py \
--git_repos="https://github.com/tensorflow/models.git;benchmark" \
--python_path=models \
--data_downloads="gs://tf-perf-imagenet-uswest1/tensorflow/cifar10_data/cifar-10-batches-bin" \
--benchmark_methods=official.benchmark.keras_cifar_benchmark.Resnet56KerasBenchmarkReal.benchmark_1_gpu_no_dist_strat
```
For all options that can be used when executing a test checkout the public
[README for PerfZero](https://github.com/tensorflow/benchmarks/tree/master/perfzero#run-benchmark).
### Step five: Delete the instance when done
```bash
python perfzero/lib/cloud_manager.py delete
```
## PerfZero on local workstation or any server
This approach is the same as PerfZero on personal GCE instance, just jump to
Step two: Build docker on GCE instance.
If the workstation does not have access to the PerfZero GCS bucket and does not
need access, e.g. data is already copied locally via another method, passing
`--gcloud_key_file_url=""` will skip attempting to download the key.
A quick test that does not require accessing GCS for data is:
```bash
python3 /workspace/perfzero/lib/benchmark.py \
--git_repos="https://github.com/tensorflow/models.git;benchmark" \
--python_path=models \
--gcloud_key_file_url="" \
--benchmark_methods=official.benchmark.keras_cifar_benchmark.Resnet56KerasBenchmarkSynth.benchmark_1_gpu_no_dist_strat
```
## PerfZero without docker
PerfZero is not dependent on Docker. Docker is used to handle dependencies and
create a consistent environment. Most tests do not require much beyond
TensorFlow and PerfZero mostly depends on Google Cloud, but only for downloading
data and upload results if desired. While this approach works, we do not
maintain a clear list of the required libraries. The Docker build files are a
good starting point beyond.
Once the requirements are met the command below can be executed which will pull
GitHub official/models, downloads the cifar-10 dataset from our internal Google
Cloud storage bucket, and executes a ResNet56 benchmark with TensorFlow 2.0
nightly build.
```bash
python3 /workspace/perfzero/lib/benchmark.py \
--git_repos="https://github.com/tensorflow/models.git;benchmark" \
--python_path=models \
--data_downloads="gs://tf-perf-imagenet-uswest1/tensorflow/cifar10_data/cifar-10-batches-bin" \
--benchmark_methods=official.r1.resnet.estimator_benchmark.Resnet50EstimatorBenchmarkReal.benchmark_graph_1_gpu
```
# Creating tests
Here are the instructions that developers of benchmark method needs to follow in
order to run benchmark method in PerfZero. See
[estimator_benchmark.py](https://github.com/tensorflow/models/blob/master/official/r1/resnet/estimator_benchmark.py)
for example test code that supports PerfZero.
1) The benchmark class should extend the TensorFlow python class
[tensorflow.test.Benchmark](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/benchmark.py). The benchmark class constructor should have a
constructor with signature `__init__(self, output_dir, data_dir, **kwargs)`.
Below is the usage for each arguments:
- Benchmark method should put all generated files (e.g. logs) in `output_dir` so that PerfZero can
upload these files to Google Cloud Storage when `--output_gcs_url` is specified.
- Benchmark method should read data from `root_data_dir`. For example, the benchmark method can read data from e.g. `${root_data_dir}/cifar-10-binary`
- `**kwargs` is useful to make the benchmark constructor forward compatible when PerfZero provides more named arguments to the benchmark constructor before
updating the benchmark class.
2) At the end of the benchmark method execution, the method should call [report_benchmark()](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/platform/benchmark.py)
with the following parameters:
```
tf.test.Benchmark.report_benchmark(
iters=num_iteration, # The number of iterations of the benchmark.
wall_time=wall_time_sec, # Total wall time in sec for all iterations.
metrics = [ # List of metric entries
{
'name': 'accuracy_top_5', # Metric name
'value': 80, # Metric value
'min_value': 90, # Optional. Minimum acceptable metric value for the benchmark to succeed.
'max_value': 99 # Optional. Maximum acceptable metric value for the benchmark to succeed.
},
{
'name': 'accuracy_top_1',
'value': 99.5
}
]
)
```
This format allows PerfZero to specify whether the benchmark has succeeded
(e.g. for convergence test) in its summary based on the logic determined by
the benchmark developer.
3) Include dependent libraries in `--git_repos` and `--python_path`. These
libraries will be checked-out in the directory
`path_to_perfzero/workspace/site-packages` by default. Developer can edit these
libraries directly and execute benchmark with the local change.
# Deep dive into individual tools
The sections below go into details about the individual components of PerfZero.
## Build docker image
The command below builds the docker image named `perfzero/tensorflow` which contains the
libraries (e.g. TensorFlow) needed for benchmark.
```
python3 benchmarks/perfzero/lib/setup.py
```
Here are a few selected optional flags. Run `python3 setup.py -h` to see
detailed documentation for all supported flags.
1) Use `--dockerfile_path=docker/Dockerfile_ubuntu_1804_tf_v2` to build docker image for TensorFlow v2
2) Use `--tensorflow_pip_spec` to specify the tensorflow pip package name (and optionally version) to be
installed in the docker image, e.g. `--tensorflow_pip_spec=tensorflow==1.12.0`.
## Run benchmark
The command below executes the benchmark method specified by `--benchmark_methods`.
```
export ROOT_DATA_DIR=/data
nvidia-docker run -it --rm -v $(pwd):/workspace -v $ROOT_DATA_DIR:$ROOT_DATA_DIR perfzero/tensorflow \
python3 /workspace/benchmarks/perfzero/lib/benchmark.py \
--gcloud_key_file_url="" \
--git_repos="https://github.com/tensorflow/models.git;benchmark" \
--python_path=models \
--benchmark_methods=official.r1.resnet.estimator_benchmark.Resnet50EstimatorBenchmarkSynth.benchmark_graph_1_gpu \
--root_data_dir=$ROOT_DATA_DIR
```
`${ROOT_DATA_DIR}` should be the directory which contains the dataset files
required by the benchmark method. If the flag `--data_downloads` is specified,
PerfZero will download files from the specified url to the directory specified
by the flag `--root_data_dir`. Otherwise, user needs to manually download and
move the dataset files into the directory specified by `--root_data_dirs`. The
default `root_data_dir` is `/data`. Some benchmark methods, like the one run in
the sample command above, do not require any dataset files.
Here are a few selected optional flags. Run `python3 benchmark.py -h` to see
detailed documentation for all supported flags.
1) Use `--workspace=unique_workspace_name` if you need to run multiple benchmark
using different workspace setup. One example usecase is that you may want to
test a branch from a pull request without changing your existing workspace.
2) Use `--debug` if you need to see the debug level logging
3) Use `--git_repos="git_url;git_branch;git_hash"` to checkout a git repo with
the specified git_branch at the specified git_hash to the local folder with the
specified folder name. **Note that** the value of the flag `--git_repos` is
wrapped by the quotation mark `"` so that `;` will not be interpreted by the
bash as the end of the command. Specify the flag once for each repository you
want to checkout.
5) Use `--profiler_enabled_time=start_time:end_time` to collect profiler data
during period `[start_time, end_time)` after the benchmark method execution
starts. Skip `end_time` in the flag value to collect data until the end of
benchmark method execution. See [here](#visualize-tensorflow-graph-etc-using-tensorboard)
for instructions on how to use the generated profiler data.
## Instructions for managing Google Cloud Platform computing instance
PerfZero aims to make it easy to run and debug TensorFlow which is usually run
with GPU. However, most users do not have dedicated machine with expensive
hardware. One cost-effective solution is for users to create machine with the
desired hardware on demand in a public cloud when they need to debug TensorFlow.
We provide a script in PerfZero to make it easy to manage computing instance in
Google Cloud Platform. This assumes that you have access to an existing project
in GCP.
Run `python perfzero/lib/cloud_manager.py -h` for list of commands supported
by the script. Run `cloud_manager.py <command> -h` to see detailed documentation
for all supported flags for the specified `command`.
In most cases, user only needs to run the following commands:
```
# Create a new instance that is unique to your username
python perfzero/lib/cloud_manager.py create --project=project_name
# Query the status of the existing instanced created by your and its IP address
python perfzero/lib/cloud_manager.py status --project=project_name
# Stop the instance
python perfzero/lib/cloud_manager.py stop --project=project_name
# Start the instance
python perfzero/lib/cloud_manager.py start --project=project_name
# Delete the instance
python perfzero/lib/cloud_manager.py delete --project=project_name
```
## Understand the benchmark execution output
### Json formatted benchmark summary
PerfZero outputs a json-formatted summary that provides the information needed
to understand the benchmark result. The summary is printed in the stdout and
in the file `path_to_perfzero/${workspace}/output/${execution_id}/perfzero.log`.
Additionally, Perfzero outputs a pure json file containing the summary at
`path_to_perfzero/${workspace}/output/${execution_id}/perfzero_summary.json`
Here is an example output from PerfZero. Explanation is provided inline for each
key when the name of the key is not sufficiently self-explanary.
```
{
"ml_framework_info": { # Summary of the machine learning framework
"version": "1.13.0-dev20190206", # Short version. It is tf.__version__ for TensorFlow
"name": "tensorflow", # Machine learning framework name such as PyTorch
"build_label": "ml_framework_build_label", # Specified by the flag --ml_framework_build_label
"build_version": "v1.12.0-7504-g9b32b5742b" # Long version. It is tf.__git_version__ for TensorFlow
},
"execution_timestamp": 1550040322.8991697, # Timestamp when the benchmark is executed
"execution_id": "2019-02-13-06-45-22-899128", # A string that uniquely identify this benchmark execution
"benchmark_info": { # Summary of the benchmark framework setup
"output_url": "gs://tf-performance/test-results/2019-02-13-06-45-22-899128/", # Google storage url that contains the log file from this benchmark execution
"has_exception": false,
"site_package_info": {
"models": {
"branch": "benchmark",
"url": "https://github.com/tensorflow/models.git",
"hash": "f788046ca876a8820e05b0b48c1fc2e16b0955bc"
},
"benchmarks": {
"branch": "master",
"url": "https://github.com/tensorflow/benchmarks.git",
"hash": "af9e0ef36fc6867d9b63ebccc11f229375cd6a31"
}
},
"harness_name": "perfzero",
"harness_info": {
"url": "https://github.com/tensorflow/benchmarks.git",
"branch": "master",
"hash": "75d2991b88630dde10ef65aad8082a6d5cd8b5fc"
},
"execution_label": "execution_label" # Specified by the flag --execution_label
},
"system_info": { # Summary of the resources in the system that is used to execute the benchmark
"system_name": "system_name", # Specified by the flag --system_name
"accelerator_count": 2, # Number of GPUs in the system
"physical_cpu_count": 8, # Number of physical cpu cores in the system. Hyper thread CPUs are excluded.
"logical_cpu_count": 16, # Number of logical cpu cores in the system. Hyper thread CPUs are included.
"cpu_socket_count": 1, # Number of cpu socket in the system.
"platform_name": "platform_name", # Specified by the flag --platform_name
"accelerator_model": "Tesla V100-SXM2-16GB",
"accelerator_driver_version": "410.48",
"cpu_model": "Intel(R) Xeon(R) CPU @ 2.20GHz"
},
"process_info": { # Summary of the resources used by the process to execute the benchmark
"max_rss": 4269047808, # maximum physical memory in bytes used by the process
"max_vms": 39894450176, # maximum virtual memory in bytes used by the process
"max_cpu_percent": 771.1 # CPU utilization as a percentage. See psutil.Process.cpu_percent() for more information
},
"benchmark_result": { # Summary of the benchmark execution results. This is pretty much the same data structure defined in test_log.proto.
# Most values are read from test_log.proto which is written by tf.test.Benchmark.report_benchmark() defined in TensorFlow library.
"metrics": [ # This is derived from `extras` [test_log.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/test_log.proto)
# which is written by report_benchmark().
# If the EntryValue is double, then name is the extra's key and value is extra's double value.
# If the EntryValue is string, then name is the extra's key. The string value will be a json formated string whose keys
# include `value`, `succeeded` and `description`. Benchmark method can provide arbitrary metric key/value pairs here.
{
"name": "accuracy_top_5",
"value": 0.7558000087738037
},
{
"name": "accuracy_top_1",
"value": 0.2639999985694885
}
],
"name": "official.resnet.estimator_cifar_benchmark.EstimatorCifar10BenchmarkTests.unit_test", # Full path to the benchmark method, i.e. module_path.class_name.method_name
"succeeded": true, # True iff benchmark method execution finishes without exception and no metric in metrics show succeeded = false
"wall_time": 14.552583694458008 # The value is determined by tf.test.Benchmark.report_benchmark() called by the benchmark method. It is -1 if report_benchmark() is not called.
}
}
```
### Profiling
When the flag `--profiler_enabled_time=start_time:end_time` is specified, the
profiler data will be collected and stored in
`path_to_perfzero/${workspace}/output/${execution_id}/profiler_data`.
#### Visualize in TensorBoard
Firstly, install the profile plugin for TensorBoard.
```
pip install -U tensorboard-plugin-profile
```
Run `tensorboard --logdir=path_to_perfzero/workspace/output/${execution_id}/profiler_data` or
`python3 -m tensorboard.main --logdir=path_to_perfzero/workspace/output/${execution_id}/profiler_data` to open
TensorBoard server.
If PerfZero is executed on a remote machine, run `ssh -L
6006:127.0.0.1:6006 remote_ip` before opening `http://localhost:6006` in your
browser to access the TensorBoard UI.
You can also run TensorBoard inside the docker, e.g.
`tensorboard --logdir=/workspace/perfzero/workspace/output/${execution_id}/profiler_data --bind_all`
In this case, you have to start docker with port mapping, i.e. with "-p 6006:6006" flag, .e.g
```
nvidia-docker run -it --rm -v $(pwd):/workspace -p 6006:6006 perfzero/tensorflow
```
Normally, the pages you see will look like:
![Screenshot](screenshots/profiling_overview.png "Profiling Overview")
![Screenshot](screenshots/profiling_trace_view.png "Profiling Trace View")
### Visualize system metric values over time
PerfZero also records a few useful system metrics (e.g. rss, vms) over time in
the file `path_to_perfzero/${workspace}/output/${execution_id}/process_info.log`.
Run `python perfzero/scripts/plot_process_info.py process_info.log` to generate a
pdf showing the value of these metrics over time.
# PerfZero development
Avoid importing `tensorflow` package in any place that requires the `logging`
package because tensorflow package appears to prevent logging from working
properly. Importing `tensorflow` package only in the method that requires it.
Here are the commands to run unit tests and check code style.
```
# Run all unit tests
# This must be executed in directory perfzero/lib
python3 -B -m unittest discover -p "*_test.py"
# Format python code in place
find perfzero/lib -name *.py -exec pyformat --in_place {} \;
# Check python code format and report warning and errors
find perfzero/lib -name *.py -exec gpylint3 {} \;
```
Here is the command to generate table-of-contents for this README. Run this
command and copy/paste it to the README.md.
```
./perfzero/scripts/generate-readme-header.sh perfzero/README.md
```
# Ubuntu 18.04 Python3 with CUDA 10 and the following:
# - Installs tf-nightly-gpu-2.0-preview
# - Installs requirements.txt for tensorflow/models
# - Install bazel for building TF from source
FROM nvidia/cuda:10.0-base-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu-2.0-preview"
ARG extra_pip_specs=""
ARG local_tensorflow_pip_spec=""
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-command-line-tools-10-0 \
cuda-cublas-dev-10-0 \
cuda-cufft-dev-10-0 \
cuda-curand-dev-10-0 \
cuda-cusolver-dev-10-0 \
cuda-cusparse-dev-10-0 \
libcudnn7=7.6.2.24-1+cuda10.0 \
libcudnn7-dev=7.6.2.24-1+cuda10.0 \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl \
&& \
find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
RUN apt-get update && \
apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda10.0 \
libnvinfer-dev=5.1.5-1+cuda10.0 \
&& apt-get clean
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
build-essential \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python
# (bulding TF needs py2 even if building for Python3 as of 06-AUG-2019)
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv \
python
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client pyyaml google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip3 install -r /tmp/requirements.txt
RUN pip3 freeze
# Install bazel
ARG BAZEL_VERSION=0.24.1
RUN mkdir /bazel && \
wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
chmod +x /bazel/installer.sh && \
/bazel/installer.sh && \
rm -f /bazel/installer.sh
RUN git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
# Ubuntu 18.04 Python3 with CUDA 10 and the following:
# - Installs tf-nightly-gpu (this is TF 2.0)
# - Installs requirements.txt for tensorflow/models
# Additionally also installs:
# - Latest S4TF development snapshot for cuda 10.0
FROM nvidia/cuda:10.0-base-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
ARG swift_tf_url=https://storage.googleapis.com/swift-tensorflow-artifacts/nightlies/latest/swift-tensorflow-DEVELOPMENT-cuda10.0-cudnn7-ubuntu18.04.tar.gz
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-command-line-tools-10-0 \
cuda-cublas-10-0 \
cuda-cublas-dev-10-0 \
cuda-cufft-10-0 \
cuda-curand-10-0 \
cuda-cusolver-10-0 \
cuda-cusparse-10-0 \
libcudnn7=7.6.2.24-1+cuda10.0 \
libcudnn7-dev=7.6.2.24-1+cuda10.0 \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
RUN apt-get update && \
apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda10.0 \
libnvinfer-dev=5.1.5-1+cuda10.0 \
&& apt-get clean
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client pyyaml google-cloud google-cloud-bigquery mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN pip freeze
### Install Swift deps.
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
git \
python \
python-dev \
python-pip \
python-setuptools \
python-tk \
python3 \
python3-pip \
python3-setuptools \
clang \
libcurl4-openssl-dev \
libicu-dev \
libpython-dev \
libpython3-dev \
libncurses5-dev \
libxml2 \
libblocksruntime-dev
# Download and extract S4TF
WORKDIR /swift-tensorflow-toolchain
RUN if ! curl -fSsL --retry 5 $swift_tf_url -o swift.tar.gz; \
then sleep 30 && curl -fSsL --retry 5 $swift_tf_url -o swift.tar.gz; \
fi;
RUN mkdir usr \
&& tar -xzf swift.tar.gz --directory=usr --strip-components=1 \
&& rm swift.tar.gz
ENV PATH="/swift-tensorflow-toolchain/usr/bin:${PATH}"
ENV LD_LIBRARY_PATH="/swift-tensorflow-toolchain/usr/lib/swift/linux/:${LD_LIBRARY_PATH}"
# Ubuntu 18.04 Python3 with CUDA 10.1 and the following:
# - Installs tf-nightly-gpu (this is TF 2.1)
# - Installs requirements.txt for tensorflow/models
# - TF 2.0 tested with cuda 10.0, but we need to test tf 2.1 with cuda 10.1.
# Additionally also installs
# - Latest S4TF development snapshot for cuda 10.1
FROM nvidia/cuda:10.1-base-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
ARG swift_tf_url=https://storage.googleapis.com/swift-tensorflow-artifacts/nightlies/latest/swift-tensorflow-DEVELOPMENT-cuda10.1-cudnn7-stock-ubuntu18.04.tar.gz
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-command-line-tools-10-1 \
cuda-cufft-10-1 \
cuda-curand-10-1 \
cuda-cusolver-10-1 \
cuda-cusparse-10-1 \
libcudnn7=7.6.4.38-1+cuda10.1 \
libcudnn7-dev=7.6.4.38-1+cuda10.1 \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
RUN apt-get update && \
apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda10.1 \
libnvinfer-dev=5.1.5-1+cuda10.1 \
libnvinfer6=6.0.1-1+cuda10.1 \
&& apt-get clean
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client pyyaml google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN pip freeze
### Install Swift deps.
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
git \
python \
python-dev \
python-pip \
python-setuptools \
python-tk \
python3 \
python3-pip \
python3-setuptools \
clang \
libcurl4-openssl-dev \
libicu-dev \
libpython-dev \
libpython3-dev \
libncurses5-dev \
libxml2 \
libblocksruntime-dev
# Download and extract S4TF
WORKDIR /swift-tensorflow-toolchain
RUN if ! curl -fSsL --retry 5 $swift_tf_url -o swift.tar.gz; \
then sleep 30 && curl -fSsL --retry 5 $swift_tf_url -o swift.tar.gz; \
fi;
RUN mkdir usr \
&& tar -xzf swift.tar.gz --directory=usr --strip-components=1 \
&& rm swift.tar.gz
ENV PATH="/swift-tensorflow-toolchain/usr/bin:${PATH}"
ENV LD_LIBRARY_PATH="/swift-tensorflow-toolchain/usr/lib/swift/linux/:${LD_LIBRARY_PATH}"
# Ubuntu 18.04 Python3 with CUDA 11.0 and the following:
# - Installs tf-nightly-gpu (this is TF 2.4)
# - Installs requirements.txt for tensorflow/models
# Additionally also installs
# - Latest S4TF development snapshot for cuda 11.0
FROM nvidia/cuda:11.0-base-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
ARG swift_tf_url=https://storage.googleapis.com/swift-tensorflow-artifacts/nightlies/latest/swift-tensorflow-DEVELOPMENT-cuda11.0-cudnn8-stock-ubuntu18.04.tar.gz
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn8-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-tools-11-0 \
cuda-toolkit-11-0 \
libcudnn8=8.0.4.30-1+cuda11.0 \
libcudnn8-dev=8.0.4.30-1+cuda11.0 \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
RUN apt-get update && \
apt-get install -y --no-install-recommends libnvinfer7=7.2.0-1+cuda11.0 \
libnvinfer-dev=7.2.0-1+cuda11.0 \
&& apt-get clean
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client pyyaml google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN pip freeze
### Install Swift deps.
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
git \
python \
python-dev \
python-pip \
python-setuptools \
python-tk \
python3 \
python3-pip \
python3-setuptools \
clang \
libcurl4-openssl-dev \
libicu-dev \
libpython-dev \
libpython3-dev \
libncurses5-dev \
libxml2 \
libblocksruntime-dev
# Download and extract S4TF
WORKDIR /swift-tensorflow-toolchain
RUN if ! curl -fSsL --retry 5 $swift_tf_url -o swift.tar.gz; \
then sleep 30 && curl -fSsL --retry 5 $swift_tf_url -o swift.tar.gz; \
fi;
RUN mkdir usr \
&& tar -xzf swift.tar.gz --directory=usr --strip-components=1 \
&& rm swift.tar.gz
ENV PATH="/swift-tensorflow-toolchain/usr/bin:${PATH}"
ENV LD_LIBRARY_PATH="/swift-tensorflow-toolchain/usr/lib/swift/linux/:${LD_LIBRARY_PATH}"
# Ubuntu 18.04 Python3 with CUDA 10.1 and the following:
# - Installs tf-nightly-gpu (this is TF 2.1)
# - Installs requirements.txt for tensorflow/models
# - TF 2.0 tested with cuda 10.0, but we need to test tf 2.1 with cuda 10.1.
FROM nvidia/cuda:10.1-base-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-command-line-tools-10-1 \
cuda-cufft-10-1 \
cuda-curand-10-1 \
cuda-cusolver-10-1 \
cuda-cusparse-10-1 \
libcudnn7=7.6.4.38-1+cuda10.1 \
libcudnn7-dev=7.6.4.38-1+cuda10.1 \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
RUN apt-get update && \
apt-get install -y --no-install-recommends libnvinfer5=5.1.5-1+cuda10.1 \
libnvinfer-dev=5.1.5-1+cuda10.1 \
libnvinfer6=6.0.1-1+cuda10.1 \
&& apt-get clean
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client==1.8.0 pyyaml google-cloud google-cloud-bigquery mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN pip freeze
# Ubuntu 18.04 Python3 with CUDA 11 and the following:
# - Installs tf-nightly-gpu (this is TF 2.3)
# - Installs requirements.txt for tensorflow/models
FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
ENV PIP_CMD="python3.9 -m pip"
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
# Needed to disable prompts during installation.
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
# Python 3.9 related deps in this ppa.
RUN add-apt-repository ppa:deadsnakes/ppa
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3.9 \
python3-pip \
python3.9-dev \
python3-setuptools \
python3.9-venv \
python3.9-distutils \
python3.9-lib2to3
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN ${PIP_CMD} install --upgrade pip
RUN ${PIP_CMD} install --upgrade distlib
# setuptools upgraded to fix install requirements from model garden.
RUN ${PIP_CMD} install --upgrade setuptools
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
RUN ${PIP_CMD} install --upgrade pyyaml
RUN ${PIP_CMD} install --upgrade google-api-python-client==1.8.0
RUN ${PIP_CMD} install --upgrade google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN ${PIP_CMD} install wheel
RUN ${PIP_CMD} install absl-py
RUN ${PIP_CMD} install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN ${PIP_CMD} install tfds-nightly
RUN ${PIP_CMD} install -U scikit-learn
# Install dependnecies needed for tf.distribute test utils
RUN ${PIP_CMD} install dill tblib portpicker
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN ${PIP_CMD} install -r /tmp/requirements.txt
RUN ${PIP_CMD} install tf-estimator-nightly
RUN ${PIP_CMD} install tensorflow-text-nightly
# RUN nvidia-smi
RUN nvcc --version
RUN pip freeze
# Ubuntu 18.04 Python3 with CUDA 11 and the following:
# - Installs tf-nightly-gpu (this is TF 2.3)
# - Installs requirements.txt for tensorflow/models
FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-tools-11-0 \
cuda-toolkit-11-0 \
libcudnn8=8.0.4.30-1+cuda11.0 \
libcudnn8-dev=8.0.4.30-1+cuda11.0 \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client==1.8.0 pyyaml google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
# Install dependnecies needed for tf.distribute test utils
RUN pip install dill tblib portpicker
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN pip install tf-estimator-nightly
RUN pip install tensorflow-text-nightly
# RUN nvidia-smi
RUN nvcc --version
RUN pip freeze
# Ubuntu 18.04 Python3 with CUDA 11 and the following:
# - Installs tf-nightly-gpu (this is TF 2.3)
# - Installs requirements.txt for tensorflow/models
FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
RUN apt-get update && apt-get install -y --no-install-recommends \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client==1.8.0 pyyaml google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
# Install dependnecies needed for tf.distribute test utils
RUN pip install dill tblib portpicker
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN pip install tf-estimator-nightly
RUN pip install tensorflow-text-nightly
RUN pip install psutil
# RUN nvidia-smi
RUN nvcc --version
RUN pip freeze
# Ubuntu 18.04 Python3 with CUDA 11 and the following:
# - Installs tf-nightly-gpu (this is TF 2.3)
# - Installs requirements.txt for tensorflow/models
FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
ENV PIP_CMD="python3.9 -m pip"
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
# Needed to disable prompts during installation.
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
# Python 3.9 related deps in this ppa.
RUN add-apt-repository ppa:deadsnakes/ppa
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3.9 \
python3-pip \
python3.9-dev \
python3-setuptools \
python3.9-venv \
python3.9-distutils \
python3.9-lib2to3
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN ${PIP_CMD} install --upgrade pip
RUN ${PIP_CMD} install --upgrade distlib
# setuptools upgraded to fix install requirements from model garden.
RUN ${PIP_CMD} install --upgrade setuptools
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
RUN ${PIP_CMD} install --upgrade pyyaml
RUN ${PIP_CMD} install --upgrade google-api-python-client==1.8.0
RUN ${PIP_CMD} install --upgrade google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN ${PIP_CMD} install wheel
RUN ${PIP_CMD} install absl-py
RUN ${PIP_CMD} install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN ${PIP_CMD} install tfds-nightly
RUN ${PIP_CMD} install -U scikit-learn
# Install dependnecies needed for tf.distribute test utils
RUN ${PIP_CMD} install dill tblib portpicker
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN ${PIP_CMD} install -r /tmp/requirements.txt
RUN ${PIP_CMD} install tf-estimator-nightly
RUN ${PIP_CMD} install tensorflow-text-nightly
RUN ${PIP_CMD} install keras-nightly==2.7.0.dev2021082607
# RUN nvidia-smi
RUN nvcc --version
RUN pip freeze
# Ubuntu 18.04 Python3 with CUDA 11 and the following:
# - Installs tf-nightly-gpu (this is TF 2.3)
# - Installs requirements.txt for tensorflow/models
FROM nvidia/cuda:11.2.1-cudnn8-devel-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
ENV PIP_CMD="python3.9 -m pip"
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
# Needed to disable prompts during installation.
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
# Python 3.9 related deps in this ppa.
RUN add-apt-repository ppa:deadsnakes/ppa
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3.9 \
python3-pip \
python3.9-dev \
python3-setuptools \
python3.9-venv \
python3.9-distutils \
python3.9-lib2to3
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN ${PIP_CMD} install --upgrade pip
RUN ${PIP_CMD} install --upgrade distlib
# setuptools upgraded to fix install requirements from model garden.
RUN ${PIP_CMD} install --upgrade setuptools
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.2/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
RUN ${PIP_CMD} install --upgrade pyyaml
RUN ${PIP_CMD} install --upgrade google-api-python-client==1.8.0
RUN ${PIP_CMD} install --upgrade google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN ${PIP_CMD} install wheel
RUN ${PIP_CMD} install absl-py
RUN ${PIP_CMD} install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN ${PIP_CMD} install tfds-nightly
RUN ${PIP_CMD} install -U scikit-learn
# Install dependnecies needed for tf.distribute test utils
RUN ${PIP_CMD} install dill tblib portpicker
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN ${PIP_CMD} install -r /tmp/requirements.txt
RUN ${PIP_CMD} install tf-estimator-nightly
RUN ${PIP_CMD} install tensorflow-text-nightly
RUN ${PIP_CMD} install keras-nightly==2.7.0.dev2021070900
# RUN nvidia-smi
RUN nvcc --version
RUN pip freeze
# Ubuntu 18.04 Python3 with CUDA 11 and the following:
# - Installs tf-nightly-gpu (this is TF 2.3)
# - Installs requirements.txt for tensorflow/models
FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 as base
ARG tensorflow_pip_spec="tf-nightly-gpu"
ARG local_tensorflow_pip_spec=""
ARG extra_pip_specs=""
# setup.py passes the base path of local .whl file is chosen for the docker image.
# Otherwise passes an empty existing file from the context.
COPY ${local_tensorflow_pip_spec} /${local_tensorflow_pip_spec}
# Pick up some TF dependencies
# cublas-dev and libcudnn7-dev only needed because of libnvinfer-dev which may not
# really be needed.
# In the future, add the following lines in a shell script running on the
# benchmark vm to get the available dependent versions when updating cuda
# version (e.g to 10.2 or something later):
# sudo apt-cache search cuda-command-line-tool
# sudo apt-cache search cuda-cublas
# sudo apt-cache search cuda-cufft
# sudo apt-cache search cuda-curand
# sudo apt-cache search cuda-cusolver
# sudo apt-cache search cuda-cusparse
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
cuda-tools-11-0 \
cuda-toolkit-11-0 \
libcudnn8=8.0.4.30-1+cuda11.0 \
libcudnn8-dev=8.0.4.30-1+cuda11.0 \
libfreetype6-dev \
libhdf5-serial-dev \
libzmq3-dev \
libpng-dev \
pkg-config \
software-properties-common \
unzip \
lsb-core \
curl
# For CUDA profiling, TensorFlow requires CUPTI.
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.0/lib64:$LD_LIBRARY_PATH
# See http://bugs.python.org/issue19846
ENV LANG C.UTF-8
# Add google-cloud-sdk to the source list
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-$(lsb_release -c -s) main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
# Install extras needed by most models
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
wget \
htop \
zip \
google-cloud-sdk
# Install / update Python and Python3
RUN apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
python3-venv
# Upgrade pip, need to use pip3 and then pip after this or an error
# is thrown for no main found.
RUN pip3 install --upgrade pip
# setuptools upgraded to fix install requirements from model garden.
RUN pip install wheel
RUN pip install --upgrade setuptools google-api-python-client==1.8.0 pyyaml google-cloud google-cloud-bigquery google-cloud-datastore mock
RUN pip install absl-py
RUN pip install --upgrade --force-reinstall ${tensorflow_pip_spec} ${extra_pip_specs}
RUN pip install tfds-nightly
RUN pip install -U scikit-learn
# Install dependnecies needed for tf.distribute test utils
RUN pip install dill tblib portpicker
RUN curl https://raw.githubusercontent.com/tensorflow/models/master/official/requirements.txt > /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt
RUN pip install tf-estimator-nightly
RUN pip install tensorflow-text-nightly
# RUN nvidia-smi
RUN nvcc --version
RUN pip freeze
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment