Merge pull request #8768 from kmindspark:newavarecords

PiperOrigin-RevId: 328778119

Merge pull request #8768 from kmindspark:newavarecords
PiperOrigin-RevId: 328778119
454e12b8 · TF Object Detection Team · 8f28cb91 · 1f579e0e · 454e12b8 · 454e12b8
Commit 454e12b8 authored Aug 27, 2020 by TF Object Detection Team
4 changed files
--- a/research/object_detection/dataset_tools/create_ava_actions_tf_record.py
+++ b/research/object_detection/dataset_tools/create_ava_actions_tf_record.py
--- a/research/object_detection/dataset_tools/download_and_preprocess_ava.sh
+++ b/research/object_detection/dataset_tools/download_and_preprocess_ava.sh
+#!/bin/bash
+# This script downloads the videos for the AVA dataset. There are no arguments.
+# Copy this script into the desired parent directory of the ava_vids_raw/
+# directory created in this script to store the raw videos.
+mkdir ava_vids_raw
+cd ava_vids_raw
+curl -O s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt
+echo "Downloading all videos."
+cat "ava_file_names_trainval_v2.1.txt" | while read line
+do
+  curl -O s3.amazonaws.com/ava-dataset/trainval/$line
+  echo "Downloaded " $line
+done
+rm "ava_file_names_trainval_v2.1.txt"
+cd ..
+# Trimming causes issues with frame seeking in the python script, so it is best left out.
+# If included, need to modify the python script to subtract 900 seconds wheen seeking.
+# echo "Trimming all videos."
+# mkdir ava_vids_trimmed
+# for filename in ava_vids_raw/*; do
+#   ffmpeg -ss 900 -to 1800 -i $filename -c copy ava_vids_trimmed/${filename##*/}
+# done
--- a/research/object_detection/dataset_tools/seq_example_util.py
+++ b/research/object_detection/dataset_tools/seq_example_util.py
-# Lint as: python2, python3
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -123,6 +122,15 @@ def sequence_bytes_feature(ndarray):
  return feature_list
+def sequence_strings_feature(strings):
+  new_str_arr = []
+  for single_str in strings:
+    new_str_arr.append(tf.train.Feature(
+        bytes_list=tf.train.BytesList(
+            value=[single_str.encode('utf8')])))
+  return tf.train.FeatureList(feature=new_str_arr)
 def boxes_to_box_components(bboxes):
  """Converts a list of numpy arrays (boxes) to box components.
@@ -137,8 +145,11 @@ def boxes_to_box_components(bboxes):
  ymax_list = []
  xmax_list = []
  for bbox in bboxes:
-    bbox = np.array(bbox).astype(np.float32)
+    if bbox != []:  # pylint: disable=g-explicit-bool-comparison
-    ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1)
+      bbox = np.array(bbox).astype(np.float32)
+      ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1)
+    else:
+      ymin, xmin, ymax, xmax = [], [], [], []
    ymin_list.append(np.reshape(ymin, [-1]))
    xmin_list.append(np.reshape(xmin, [-1]))
    ymax_list.append(np.reshape(ymax, [-1]))
@@ -159,7 +170,8 @@ def make_sequence_example(dataset_name,
                          label_strings=None,
                          detection_bboxes=None,
                          detection_classes=None,
-                          detection_scores=None):
+                          detection_scores=None,
+                          use_strs_for_source_id=False):
  """Constructs tf.SequenceExamples.
  Args:
@@ -189,6 +201,8 @@ def make_sequence_example(dataset_name,
    detection_scores: (Optional) A list (with num_frames_elements) of
      [num_boxes_i] numpy float32 arrays holding predicted object scores for
      each frame.
+    use_strs_for_source_id: (Optional) Whether to write the source IDs as
+      strings rather than byte lists of characters.
  Returns:
    A tf.train.SequenceExample.
@@ -221,7 +235,11 @@ def make_sequence_example(dataset_name,
  if image_format is not None:
    context_dict['image/format'] = context_bytes_feature([image_format])
  if image_source_ids is not None:
-    feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids)
+    if use_strs_for_source_id:
+      feature_list['image/source_id'] = sequence_strings_feature(
+          image_source_ids)
+    else:
+      feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids)
  if bboxes is not None:
    bbox_ymin, bbox_xmin, bbox_ymax, bbox_xmax = boxes_to_box_components(bboxes)
    feature_list['region/bbox/xmin'] = sequence_float_feature(bbox_xmin)

--- a/research/object_detection/dataset_tools/seq_example_util_test.py
+++ b/research/object_detection/dataset_tools/seq_example_util_test.py
@@ -104,12 +104,12 @@ class SeqExampleUtilTest(tf.test.TestCase):
        source_ids)
  def test_make_labeled_example(self):
-    num_frames = 2
+    num_frames = 3
    image_height = 100
    image_width = 200
    dataset_name = b'unlabeled_dataset'
    video_id = b'video_000'
-    labels = [b'dog', b'cat']
+    labels = [b'dog', b'cat', b'wolf']
    images = tf.cast(tf.random.uniform(
        [num_frames, image_height, image_width, 3],
        maxval=256,
@@ -117,15 +117,17 @@ class SeqExampleUtilTest(tf.test.TestCase):
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
    encoded_images = self.materialize_tensors(encoded_images_list)
-    timestamps = [100000, 110000]
+    timestamps = [100000, 110000, 120000]
-    is_annotated = [1, 0]
+    is_annotated = [1, 0, 1]
    bboxes = [
        np.array([[0., 0., 0., 0.],
                  [0., 0., 1., 1.]], dtype=np.float32),
-        np.zeros([0, 4], dtype=np.float32)
+        np.zeros([0, 4], dtype=np.float32),
+        np.array([], dtype=np.float32)
    ]
    label_strings = [
        np.array(labels),
+        np.array([]),
        np.array([])
    ]