Delete imagenet.py, which is no longer needed (#2486)

* Delete imagenet_dataset_utils.py as well

Delete imagenet.py, which is no longer needed (#2486)
* Delete imagenet_dataset_utils.py as well
385e669e · Neal Wu · GitHub · d43c736e · d43c736e · d43c736e
Commit 385e669e authored Oct 02, 2017 by Neal Wu Committed by GitHub Oct 02, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 335 deletions

official/resnet/imagenet.py official/resnet/imagenet.py +0 -198

official/resnet/imagenet_dataset_utils.py official/resnet/imagenet_dataset_utils.py +0 -137

No files found.
--- a/official/resnet/imagenet.py
+++ b/official/resnet/imagenet.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.
-Some images have one or more bounding boxes associated with the label of the
-image. See details here: http://image-net.org/download-bboxes
-ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use
-"WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech )
-and SYNSET OFFSET of WordNet. For more information, please refer to the
-WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].
-"There are bounding boxes for over 3000 popular synsets available.
-For each synset, there are on average 150 images with bounding boxes."
-WARNING: Don't use for object detection, in this case all the bounding boxes
-of the image belong to just one class.
-To read about optimizations that can be applied to the input preprocessing
-stage, see: https://www.tensorflow.org/performance/performance_guide#input_pipeline_optimization.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-from six.moves import urllib
-import tensorflow as tf
-import imagenet_dataset_utils
-slim = tf.contrib.slim
-# TODO(nsilberman): Add tfrecord file type once the script is updated.
-_FILE_PATTERN = '%s-*'
-_SPLITS_TO_SIZES = {
-    'train': 1281167,
-    'validation': 50000,
-}
-_ITEMS_TO_DESCRIPTIONS = {
-    'image': 'A color image of varying height and width.',
-    'label': 'The label id of the image, integer between 0 and 999',
-    'label_text': 'The text of the label.',
-    'object/bbox': 'A list of bounding boxes.',
-    'object/label': 'A list of labels, one per each object.',
-}
-_NUM_CLASSES = 1001
-def create_readable_names_for_imagenet_labels():
-  """Create a dict mapping label id to human readable string.
-  Returns:
-      labels_to_names: dictionary where keys are integers from to 1000
-      and values are human-readable names.
-  We retrieve a synset file, which contains a list of valid synset labels used
-  by ILSVRC competition. There is one synset one per line, eg.
-          #   n01440764
-          #   n01443537
-  We also retrieve a synset_to_human_file, which contains a mapping from synsets
-  to human-readable names for every synset in Imagenet. These are stored in a
-  tsv format, as follows:
-          #   n02119247    black fox
-          #   n02119359    silver fox
-  We assign each synset (in alphabetical order) an integer, starting from 1
-  (since 0 is reserved for the background class).
-  Code is based on
-  https://github.com/tensorflow/models/blob/master/research/slim/datasets/build_imagenet_data.py
-  """
-  # pylint: disable=g-line-too-long
-  base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/'
-  # pylint: enable=g-line-too-long
-  synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url)
-  synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url)
-  filename, _ = urllib.request.urlretrieve(synset_url)
-  synset_list = [s.strip() for s in open(filename).readlines()]
-  num_synsets_in_ilsvrc = len(synset_list)
-  assert num_synsets_in_ilsvrc == 1000
-  filename, _ = urllib.request.urlretrieve(synset_to_human_url)
-  synset_to_human_list = open(filename).readlines()
-  num_synsets_in_all_imagenet = len(synset_to_human_list)
-  assert num_synsets_in_all_imagenet == 21842
-  synset_to_human = {}
-  for s in synset_to_human_list:
-    parts = s.strip().split('\t')
-    assert len(parts) == 2
-    synset = parts[0]
-    human = parts[1]
-    synset_to_human[synset] = human
-  label_index = 1
-  labels_to_names = {0: 'background'}
-  for synset in synset_list:
-    name = synset_to_human[synset]
-    labels_to_names[label_index] = name
-    label_index += 1
-  return labels_to_names
-def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
-  """Gets a dataset tuple with instructions for reading ImageNet.
-  Args:
-    split_name: A train/test split name.
-    dataset_dir: The base directory of the dataset sources.
-    file_pattern: The file pattern to use when matching the dataset sources.
-      It is assumed that the pattern contains a '%s' string so that the split
-      name can be inserted.
-    reader: The TensorFlow reader type.
-  Returns:
-    A `Dataset` namedtuple.
-  Raises:
-    ValueError: if `split_name` is not a valid train/test split.
-  """
-  if split_name not in _SPLITS_TO_SIZES:
-    raise ValueError('split name %s was not recognized.' % split_name)
-  if not file_pattern:
-    file_pattern = _FILE_PATTERN
-  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
-  # Allowing None in the signature so that dataset_factory can use the default.
-  if reader is None:
-    reader = tf.TFRecordReader
-  keys_to_features = {
-      'image/encoded': tf.FixedLenFeature(
-          (), tf.string, default_value=''),
-      'image/format': tf.FixedLenFeature(
-          (), tf.string, default_value='jpeg'),
-      'image/class/label': tf.FixedLenFeature(
-          [], dtype=tf.int64, default_value=-1),
-      'image/class/text': tf.FixedLenFeature(
-          [], dtype=tf.string, default_value=''),
-      'image/object/bbox/xmin': tf.VarLenFeature(
-          dtype=tf.float32),
-      'image/object/bbox/ymin': tf.VarLenFeature(
-          dtype=tf.float32),
-      'image/object/bbox/xmax': tf.VarLenFeature(
-          dtype=tf.float32),
-      'image/object/bbox/ymax': tf.VarLenFeature(
-          dtype=tf.float32),
-      'image/object/class/label': tf.VarLenFeature(
-          dtype=tf.int64),
-  }
-  items_to_handlers = {
-      'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
-      'label': slim.tfexample_decoder.Tensor('image/class/label'),
-      'label_text': slim.tfexample_decoder.Tensor('image/class/text'),
-      'object/bbox': slim.tfexample_decoder.BoundingBox(
-          ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
-      'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),
-  }
-  decoder = slim.tfexample_decoder.TFExampleDecoder(
-      keys_to_features, items_to_handlers)
-  labels_to_names = None
-  if imagenet_dataset_utils.has_labels(dataset_dir):
-    labels_to_names = imagenet_dataset_utils.read_label_file(dataset_dir)
-  else:
-    labels_to_names = create_readable_names_for_imagenet_labels()
-    imagenet_dataset_utils.write_label_file(labels_to_names, dataset_dir)
-  return slim.dataset.Dataset(
-      data_sources=file_pattern,
-      reader=reader,
-      decoder=decoder,
-      num_samples=_SPLITS_TO_SIZES[split_name],
-      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
-      num_classes=_NUM_CLASSES,
-      labels_to_names=labels_to_names)
--- a/official/resnet/imagenet_dataset_utils.py
+++ b/official/resnet/imagenet_dataset_utils.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains utilities for downloading and converting datasets."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import sys
-import tarfile
-from six.moves import urllib
-import tensorflow as tf
-LABELS_FILENAME = 'labels.txt'
-def int64_feature(values):
-  """Returns a TF-Feature of int64s.
-  Args:
-    values: A scalar or list of values.
-  Returns:
-    a TF-Feature.
-  """
-  if not isinstance(values, (tuple, list)):
-    values = [values]
-  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
-def bytes_feature(values):
-  """Returns a TF-Feature of bytes.
-  Args:
-    values: A string.
-  Returns:
-    a TF-Feature.
-  """
-  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
-def image_to_tfexample(image_data, image_format, height, width, class_id):
-  return tf.train.Example(features=tf.train.Features(feature={
-      'image/encoded': bytes_feature(image_data),
-      'image/format': bytes_feature(image_format),
-      'image/class/label': int64_feature(class_id),
-      'image/height': int64_feature(height),
-      'image/width': int64_feature(width),
-  }))
-def download_and_uncompress_tarball(tarball_url, dataset_dir):
-  """Downloads the `tarball_url` and uncompresses it locally.
-  Args:
-    tarball_url: The URL of a tarball file.
-    dataset_dir: The directory where the temporary files are stored.
-  """
-  filename = tarball_url.split('/')[-1]
-  filepath = os.path.join(dataset_dir, filename)
-  def _progress(count, block_size, total_size):
-    sys.stdout.write('\r>> Downloading %s %.1f%%' % (
-        filename, float(count * block_size) / float(total_size) * 100.0))
-    sys.stdout.flush()
-  filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
-  print()
-  statinfo = os.stat(filepath)
-  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
-  tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
-def write_label_file(labels_to_class_names, dataset_dir,
-                     filename=LABELS_FILENAME):
-  """Writes a file with the list of class names.
-  Args:
-    labels_to_class_names: A map of (integer) labels to class names.
-    dataset_dir: The directory in which the labels file should be written.
-    filename: The filename where the class names are written.
-  """
-  labels_filename = os.path.join(dataset_dir, filename)
-  with tf.gfile.Open(labels_filename, 'w') as f:
-    for label in labels_to_class_names:
-      class_name = labels_to_class_names[label]
-      f.write('%d:%s\n' % (label, class_name))
-def has_labels(dataset_dir, filename=LABELS_FILENAME):
-  """Specifies whether or not the dataset directory contains a label map file.
-  Args:
-    dataset_dir: The directory in which the labels file is found.
-    filename: The filename where the class names are written.
-  Returns:
-    `True` if the labels file exists and `False` otherwise.
-  """
-  return tf.gfile.Exists(os.path.join(dataset_dir, filename))
-def read_label_file(dataset_dir, filename=LABELS_FILENAME):
-  """Reads the labels file and returns a mapping from ID to class name.
-  Args:
-    dataset_dir: The directory in which the labels file is found.
-    filename: The filename where the class names are written.
-  Returns:
-    A map from a label (integer) to class name.
-  """
-  labels_filename = os.path.join(dataset_dir, filename)
-  with tf.gfile.Open(labels_filename, 'r') as f:
-    lines = f.read().decode()
-  lines = lines.split('\n')
-  lines = filter(None, lines)
-  labels_to_class_names = {}
-  for line in lines:
-    index = line.index(':')
-    labels_to_class_names[int(line[:index])] = line[index+1:]
-  return labels_to_class_names