darknet training on imagenet2012 with a new tfds decoder that works with the...

darknet training on imagenet2012 with a new tfds decoder that works with the existing classification example parser

darknet training on imagenet2012 with a new tfds decoder that works with the...
darknet training on imagenet2012 with a new tfds decoder that works with the existing classification example parser
6d3cfef4 · Vishnu Banna · 50dceb71 · 6d3cfef4 · 6d3cfef4 · 6d3cfef4
Commit 6d3cfef4 authored Nov 01, 2020 by Vishnu Banna
7 changed files
--- a/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml
+++ b/official/vision/beta/projects/yolo/configs/experiments/csp_darknet53_tfds.yaml
 runtime:
  distribution_strategy: 'mirrored'
-  mixed_precision_dtype: 'float32'
+  mixed_precision_dtype: 'float16'
 task:
  model:
    num_classes: 1001
@@ -33,7 +33,7 @@ task:
 trainer:
  train_steps: 1200000 # epochs: 120
  validation_steps: 400 # size of validation data
-  validation_interval: 10000
+  validation_interval: 500 # 10000
  steps_per_loop: 10000
  summary_interval: 10000
  checkpoint_interval: 10000

--- a/official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
+++ b/official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
 runtime:
  distribution_strategy: 'mirrored'
-  mixed_precision_dtype: 'float32'
+  mixed_precision_dtype: 'float16'
 task:
  model:
    num_classes: 1001
@@ -32,7 +32,7 @@ task:
 trainer:
  train_steps: 800000 # epochs: 80
  validation_steps: 400 # size of validation data
-  validation_interval: 10000
+  validation_interval: 500 #10000
  steps_per_loop: 10000
  summary_interval: 10000
  checkpoint_interval: 10000

--- a/official/vision/beta/projects/yolo/dataloaders/classification_input.py
+++ b/official/vision/beta/projects/yolo/dataloaders/classification_input.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Classification decoder and parser."""
+"""TFDS Classification decoder and parser."""
 # Import libraries
 import tensorflow as tf
@@ -27,101 +27,13 @@ STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
 class Decoder(decoder.Decoder):
  """A tf.Example decoder for classification task."""
  def __init__(self):
-    tf.print("decoder ahs been init")
+      return 
-    return
  def decode(self, serialized_example):
    sample_dict = {
                'image/encoded': tf.io.encode_jpeg(serialized_example['image'], quality=100), 
                'image/class/label': serialized_example['label'], 
                }
-    tf.print(tf.image.extract_jpeg_shape(sample_dict['image/encoded']))
    return sample_dict
-# class Parser(parser.Parser):
-#   """Parser to parse an image and its annotations into a dictionary of tensors."""
-#   def __init__(self,
-#                output_size,
-#                num_classes,
-#                aug_rand_hflip=True,
-#                dtype='float32'):
-#     """Initializes parameters for parsing annotations in the dataset.
-#     Args:
-#       output_size: `Tenssor` or `list` for [height, width] of output image. The
-#         output_size should be divided by the largest feature stride 2^max_level.
-#       num_classes: `float`, number of classes.
-#       aug_rand_hflip: `bool`, if True, augment training with random
-#         horizontal flip.
-#       dtype: `str`, cast output image in dtype. It can be 'float32', 'float16',
-#         or 'bfloat16'.
-#     """
-#     self._output_size = output_size
-#     self._aug_rand_hflip = aug_rand_hflip
-#     self._num_classes = num_classes
-#     if dtype == 'float32':
-#       self._dtype = tf.float32
-#     elif dtype == 'float16':
-#       self._dtype = tf.float16
-#     elif dtype == 'bfloat16':
-#       self._dtype = tf.bfloat16
-#     else:
-#       raise ValueError('dtype {!r} is not supported!'.format(dtype))
-#   def _parse_train_data(self, decoded_tensors):
-#     """Parses data for training."""
-#     label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
-#     image_bytes = decoded_tensors['image/encoded']
-#     image_shape = tf.image.extract_jpeg_shape(image_bytes)
-#     # Crops image.
-#     # TODO(pengchong): support image format other than JPEG.
-#     cropped_image = preprocess_ops.random_crop_image_v2(
-#         image_bytes, image_shape)
-#     image = tf.cond(
-#         tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
-#         lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
-#         lambda: cropped_image)
-#     if self._aug_rand_hflip:
-#       image = tf.image.random_flip_left_right(image)
-#     # Resizes image.
-#     image = tf.image.resize(
-#         image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
-#     # Normalizes image with mean and std pixel values.
-#     image = preprocess_ops.normalize_image(image,
-#                                            offset=MEAN_RGB,
-#                                            scale=STDDEV_RGB)
-#     # Convert image to self._dtype.
-#     image = tf.image.convert_image_dtype(image, self._dtype)
-#     return image, label
-#   def _parse_eval_data(self, decoded_tensors):
-#     """Parses data for evaluation."""
-#     label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
-#     image_bytes = decoded_tensors['image/encoded']
-#     image_shape = tf.image.extract_jpeg_shape(image_bytes)
-#     # Center crops and resizes image.
-#     image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)
-#     image = tf.image.resize(
-#         image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
-#     image = tf.reshape(image, [self._output_size[0], self._output_size[1], 3])
-#     # Normalizes image with mean and std pixel values.
-#     image = preprocess_ops.normalize_image(image,
-#                                            offset=MEAN_RGB,
-#                                            scale=STDDEV_RGB)
-#     # Convert image to self._dtype.
-#     image = tf.image.convert_image_dtype(image, self._dtype)
-#     return image, label
--- a/official/vision/beta/projects/yolo/tasks/image_classification.py
+++ b/official/vision/beta/projects/yolo/tasks/image_classification.py
@@ -54,10 +54,8 @@ class ImageClassificationTask(base_task.Task):
    input_size = self.task_config.model.input_size
    if params.tfds_name != None: 
-      tf.print("i am here for training using tfds")
      decoder = cli.Decoder()
    else:
-      tf.print("i am here for regular input")
      decoder = classification_input.Decoder()
    parser = classification_input.Parser(

--- a/training_dir/params.yaml
+++ b/training_dir/params.yaml
@@ -7,7 +7,7 @@ runtime:
  enable_xla: false
  gpu_thread_mode: null
  loss_scale: null
-  mixed_precision_dtype: float32
+  mixed_precision_dtype: float16
  num_cores_per_replica: 1
  num_gpus: 0
  num_packs: 1
@@ -120,5 +120,5 @@ trainer:
  train_steps: 800000
  train_tf_function: true
  train_tf_while_loop: true
-  validation_interval: 10000
+  validation_interval: 500
  validation_steps: 400
--- a/training_dir/train/events.out.tfevents.1604254799.jaeyounkim-purdue-1-vm.5641.12239.v2
+++ b/training_dir/train/events.out.tfevents.1604254799.jaeyounkim-purdue-1-vm.5641.12239.v2
--- a/training_dir/train/events.out.tfevents.1604255833.jaeyounkim-purdue-1-vm.5996.12339.v2
+++ b/training_dir/train/events.out.tfevents.1604255833.jaeyounkim-purdue-1-vm.5996.12339.v2