imageclassification decoder for tfds

eeb371d4 · vishnubanna · ded0e258 · eeb371d4 · eeb371d4 · eeb371d4
Commit eeb371d4 authored Oct 31, 2020 by vishnubanna
5 changed files
--- a/official/vision/beta/projects/yolo/configs/darknet_classification.py
+++ b/official/vision/beta/projects/yolo/configs/darknet_classification.py
 import os
-from typing import List
+from typing import List, Optional
 import dataclasses
 from official.core import config_definitions as cfg
 from official.core import exp_factory
@@ -28,7 +28,6 @@ class Losses(hyperparams.Config):
  label_smoothing: float = 0.0
  l2_weight_decay: float = 0.0

-
 @dataclasses.dataclass
 class ImageClassificationTask(cfg.TaskConfig):
  """The model config."""

--- a/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml
+++ b/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+task:
+  model:
+    num_classes: 1001
+    input_size: [256, 256, 3]
+    backbone:
+      type: 'darknet'
+      darknet:
+        model_id: 'cspdarknet53'
+  losses:
+    l2_weight_decay: 0.0005
+    one_hot: True
+    label_smoothing: 0.1
+  train_data:
+    tfds_name: 'imagenet2012'
+    tfds_split: 'train'
+    tfds_data_dir: '~/tensorflow_datasets'
+    is_training: true
+    global_batch_size: 128
+    dtype: 'float16'
+  validation_data:
+    tfds_name: 'imagenet2012'
+    tfds_split: 'validation'
+    tfds_data_dir: '~/tensorflow_datasets'
+    is_training: true
+    global_batch_size: 128
+    dtype: 'float16'
+    drop_remainder: false
+trainer:
+  train_steps: 1200000 # epochs: 120
+  validation_steps: 400 # size of validation data
+  validation_interval: 10000
+  steps_per_loop: 10000
+  summary_interval: 10000
+  checkpoint_interval: 10000
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'polynomial'
+      polynomial:
+        initial_learning_rate: 0.1
+        end_learning_rate: 0.0001
+        power: 4.0
+        decay_steps: 1190000
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1000 #learning rate rises from 0 to 0.1 over 1000 steps
--- a/official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
+++ b/official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+task:
+  model:
+    num_classes: 1001
+    input_size: [256, 256, 3]
+    backbone:
+      type: 'darknet'
+      darknet:
+        model_id: 'darknet53'
+  losses:
+    l2_weight_decay: 0.0005
+    one_hot: True
+  train_data:
+    tfds_name: 'imagenet2012'
+    tfds_split: 'train'
+    tfds_data_dir: '~/tensorflow_datasets'
+    is_training: true
+    global_batch_size: 128
+    dtype: 'float16'
+  validation_data:
+    tfds_name: 'imagenet2012'
+    tfds_split: 'validation'
+    tfds_data_dir: '~/tensorflow_datasets'
+    is_training: true
+    global_batch_size: 128
+    dtype: 'float16'
+    drop_remainder: false
+trainer:
+  train_steps: 800000 # epochs: 80
+  validation_steps: 400 # size of validation data
+  validation_interval: 10000
+  steps_per_loop: 10000
+  summary_interval: 10000
+  checkpoint_interval: 10000
+  optimizer_config:
+    optimizer:
+      type: 'sgd'
+      sgd:
+        momentum: 0.9
+    learning_rate:
+      type: 'polynomial'
+      polynomial:
+        initial_learning_rate: 0.1
+        end_learning_rate: 0.0001
+        power: 4.0
+        decay_steps: 799000
+    warmup:
+      type: 'linear'
+      linear:
+        warmup_steps: 1000 #learning rate rises from 0 to 0.1 over 1000 steps
--- a/official/vision/beta/projects/yolo/dataloaders/classification_input.py
+++ b/official/vision/beta/projects/yolo/dataloaders/classification_input.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Classification decoder and parser."""
+# Import libraries
+import tensorflow as tf
+
+from official.vision.beta.dataloaders import decoder
+from official.vision.beta.dataloaders import parser
+from official.vision.beta.ops import preprocess_ops
+
+MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
+STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
+
+
+class Decoder(decoder.Decoder):
+  """A tf.Example decoder for classification task."""
+
+  def decode(self, serialized_example):
+    sample_dict = {
+                'image/encoded': tf.io.encode_jpeg(serialized_example['image'], quality=100), 
+                'image/class/label': serialized_example['label'], 
+                }
+    tf.print(tf.image.extract_jpeg_shape(sample_dict['image/encoded']))
+    return sample_dict
+
+# class Parser(parser.Parser):
+#   """Parser to parse an image and its annotations into a dictionary of tensors."""
+
+#   def __init__(self,
+#                output_size,
+#                num_classes,
+#                aug_rand_hflip=True,
+#                dtype='float32'):
+#     """Initializes parameters for parsing annotations in the dataset.
+
+#     Args:
+#       output_size: `Tenssor` or `list` for [height, width] of output image. The
+#         output_size should be divided by the largest feature stride 2^max_level.
+#       num_classes: `float`, number of classes.
+#       aug_rand_hflip: `bool`, if True, augment training with random
+#         horizontal flip.
+#       dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
+#         or 'bfloat16'.
+#     """
+#     self._output_size = output_size
+#     self._aug_rand_hflip = aug_rand_hflip
+#     self._num_classes = num_classes
+#     if dtype == 'float32':
+#       self._dtype = tf.float32
+#     elif dtype == 'float16':
+#       self._dtype = tf.float16
+#     elif dtype == 'bfloat16':
+#       self._dtype = tf.bfloat16
+#     else:
+#       raise ValueError('dtype {!r} is not supported!'.format(dtype))
+
+#   def _parse_train_data(self, decoded_tensors):
+#     """Parses data for training."""
+#     label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
+
+#     image_bytes = decoded_tensors['image/encoded']
+#     image_shape = tf.image.extract_jpeg_shape(image_bytes)
+
+#     # Crops image.
+#     # TODO(pengchong): support image format other than JPEG.
+#     cropped_image = preprocess_ops.random_crop_image_v2(
+#         image_bytes, image_shape)
+#     image = tf.cond(
+#         tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
+#         lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
+#         lambda: cropped_image)
+
+#     if self._aug_rand_hflip:
+#       image = tf.image.random_flip_left_right(image)
+
+#     # Resizes image.
+#     image = tf.image.resize(
+#         image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
+
+#     # Normalizes image with mean and std pixel values.
+#     image = preprocess_ops.normalize_image(image,
+#                                            offset=MEAN_RGB,
+#                                            scale=STDDEV_RGB)
+
+#     # Convert image to self._dtype.
+#     image = tf.image.convert_image_dtype(image, self._dtype)
+
+#     return image, label
+
+#   def _parse_eval_data(self, decoded_tensors):
+#     """Parses data for evaluation."""
+#     label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
+#     image_bytes = decoded_tensors['image/encoded']
+#     image_shape = tf.image.extract_jpeg_shape(image_bytes)
+
+#     # Center crops and resizes image.
+#     image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)
+
+#     image = tf.image.resize(
+#         image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
+
+#     image = tf.reshape(image, [self._output_size[0], self._output_size[1], 3])
+
+#     # Normalizes image with mean and std pixel values.
+#     image = preprocess_ops.normalize_image(image,
+#                                            offset=MEAN_RGB,
+#                                            scale=STDDEV_RGB)
+
+#     # Convert image to self._dtype.
+#     image = tf.image.convert_image_dtype(image, self._dtype)
+
+#     return image, label
--- a/official/vision/beta/tasks/image_classification.py
+++ b/official/vision/beta/tasks/image_classification.py
@@ -20,6 +20,7 @@ from official.core import input_reader
 from official.core import task_factory
 from official.modeling import tf_utils
 from official.vision.beta.configs import image_classification as exp_cfg
+from official.vision.beta.projects.yolo.dataloaders import classification_input as cli
 from official.vision.beta.dataloaders import classification_input
 from official.vision.beta.modeling import factory

@@ -52,7 +53,10 @@ class ImageClassificationTask(base_task.Task):
    num_classes = self.task_config.model.num_classes
    input_size = self.task_config.model.input_size

-    decoder = classification_input.Decoder()
+    if params.tfds_name != None: 
+      decoder = cli.Decoder()
+    else:
+      decoder = classification_input.Decoder()
    parser = classification_input.Parser(
        output_size=input_size[:2],
        num_classes=num_classes,