Commit d9d47174 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

add new script, clean up both

parent 54c6d319
...@@ -31,7 +31,7 @@ Running this code as a module generates the data set on disk. First, the ...@@ -31,7 +31,7 @@ Running this code as a module generates the data set on disk. First, the
required files are downloaded (_download_data) which enables constructing the required files are downloaded (_download_data) which enables constructing the
label map. Then (in generate_examples), for each split in the data set, the label map. Then (in generate_examples), for each split in the data set, the
metadata and image frames are generated from the annotations for each sequence metadata and image frames are generated from the annotations for each sequence
example (_generate_metadata). The data set is written to disk as a set of example (_generate_examples). The data set is written to disk as a set of
numbered TFRecord files. numbered TFRecord files.
Generating the data on disk can take considerable time and disk space. Generating the data on disk can take considerable time and disk space.
...@@ -96,8 +96,8 @@ SPLITS = { ...@@ -96,8 +96,8 @@ SPLITS = {
"csv": '', "csv": '',
"excluded-csv": '' "excluded-csv": ''
} }
} }
NUM_CLASSES = 80 NUM_CLASSES = 80
def feature_list_feature(value): def feature_list_feature(value):
...@@ -188,7 +188,7 @@ class Ava(object): ...@@ -188,7 +188,7 @@ class Ava(object):
reader = csv.DictReader(annotations, fieldnames) reader = csv.DictReader(annotations, fieldnames)
frame_annotations = collections.defaultdict(list) frame_annotations = collections.defaultdict(list)
ids = set() ids = set()
# aggregate by video and timestamp: # aggreggate by video and timestamp:
for row in reader: for row in reader:
ids.add(row["id"]) ids.add(row["id"])
key = (row["id"], int(float(row["timestamp_seconds"]))) key = (row["id"], int(float(row["timestamp_seconds"])))
...@@ -197,8 +197,6 @@ class Ava(object): ...@@ -197,8 +197,6 @@ class Ava(object):
logging.info("Generating metadata...") logging.info("Generating metadata...")
media_num = 1 media_num = 1
for media_id in ids: for media_id in ids:
if media_num > 2:
continue
logging.info("%d/%d, ignore warnings.\n" % (media_num, len(ids))) logging.info("%d/%d, ignore warnings.\n" % (media_num, len(ids)))
media_num += 1 media_num += 1
...@@ -261,7 +259,6 @@ class Ava(object): ...@@ -261,7 +259,6 @@ class Ava(object):
windowed_timestamp += 1 windowed_timestamp += 1
if len(total_boxes) > 0: if len(total_boxes) > 0:
print(total_boxes)
yield seq_example_util.make_sequence_example("AVA", media_id, total_images, yield seq_example_util.make_sequence_example("AVA", media_id, total_images,
int(height), int(width), 'jpeg', total_source_ids, None, total_is_annotated, int(height), int(width), 'jpeg', total_source_ids, None, total_is_annotated,
total_boxes, total_label_strings, use_strs_for_source_id=True) total_boxes, total_label_strings, use_strs_for_source_id=True)
......
# Copyright 2019 The MediaPipe Authors. # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,55 +12,32 @@ ...@@ -12,55 +12,32 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Modified by Kaushik Shivakumar for the AVA Actions Dataset r"""Code to download and parse the AVA Actions dataset for TensorFlow models.
# to work without MediaPipe, code started by Bryan Seybold.
r"""Code to download and parse the AVA dataset for TensorFlow models. The [AVA Actions data set](
The [AVA data set](
https://research.google.com/ava/index.html) https://research.google.com/ava/index.html)
is a data set for human action recognition. is a dataset for human action recognition.
This script downloads the annotations and prepares data from similar annotations This script downloads the annotations and prepares data from similar annotations
if local video files are available. The video files can be downloaded if local video files are available. The video files can be downloaded
from the following website: from the following website:
https://github.com/cvdfoundation/ava-datset https://github.com/cvdfoundation/ava-dataset
Prior to running this script, please run download_and_preprocess_ava.sh to Prior to running this script, please run download_and_preprocess_ava.sh to
download and trim input videos. download input videos.
Running this code as a module generates the data set on disk. First, the Running this code as a module generates the data set on disk. First, the
required files are downloaded (_download_data) which enables constructing the required files are downloaded (_download_data) which enables constructing the
label map. Then (in generate_examples), for each split in the data set, the label map. Then (in generate_examples), for each split in the data set, the
metadata is generated from the annotations for each example metadata and image frames are generated from the annotations for each sequence
(_generate_metadata), and MediaPipe is used to fill in the video frames example (_generate_examples). The data set is written to disk as a set of
(_run_mediapipe). This script processes local video files defined in a custom
CSV in a comparable manner to the Kinetics data set for evaluating and
predicting values on your own data. The data set is written to disk as a set of
numbered TFRecord files. numbered TFRecord files.
The custom CSV format must match the Kinetics data set format, with columns
corresponding to [[label_name], video, start, end, split] followed by lines with
those fields. (Label_name is optional.) These field names can be used to
construct the paths to the video files using the Python string formatting
specification and the video_path_format_string flag:
--video_path_format_string="/path/to/video/{video}.mp4"
Generating the data on disk can take considerable time and disk space. Generating the data on disk can take considerable time and disk space.
(Image compression quality is the primary determiner of disk usage. TVL1 flow (Image compression quality is the primary determiner of disk usage.
determines runtime.)
Once the data is on disk, reading the data as a tf.data.Dataset is accomplished
with the following lines:
kinetics = Kinetics("kinetics_data_path") If using the Tensorflow Object Detection API, set the input_type field
dataset = kinetics.as_dataset("custom") in the input_reader to TF_SEQUENCE_EXAMPLE.
# implement additional processing and batching here
images_and_labels = dataset.make_one_shot_iterator().get_next()
images = images_and_labels["images"]
labels = image_and_labels["labels"]
IF using TFOD API, use the sequence example configuration in the config.proto.
This data is structured for per-clip action classification where images is This data is structured for per-clip action classification where images is
the sequence of images and labels are a one-hot encoded value. See the sequence of images and labels are a one-hot encoded value. See
...@@ -68,24 +45,20 @@ as_dataset() for more details. ...@@ -68,24 +45,20 @@ as_dataset() for more details.
Note that the number of videos changes in the data set over time, so it will Note that the number of videos changes in the data set over time, so it will
likely be necessary to change the expected number of examples. likely be necessary to change the expected number of examples.
"""
from __future__ import absolute_import The argument video_path_format_string expects a value as such:
from __future__ import division "/path/to/videos/{0}"
from __future__ import print_function
"""
import contextlib import contextlib
import csv import csv
import os import os
import random import random
import subprocess
import sys import sys
import tarfile
import zipfile import zipfile
import tempfile
import collections import collections
import glob import glob
import hashlib
from absl import app from absl import app
from absl import flags from absl import flags
...@@ -94,23 +67,23 @@ from six.moves import range ...@@ -94,23 +67,23 @@ from six.moves import range
from six.moves import urllib from six.moves import urllib
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
import cv2 import cv2
import hashlib
from object_detection.utils import dataset_util from object_detection.utils import dataset_util
GLOBAL_SOURCE_ID = 0
POSSIBLE_TIMESTAMPS = range(902, 1798) POSSIBLE_TIMESTAMPS = range(902, 1798)
ANNOTATION_URL = "https://research.google.com/ava/download/ava_v2.2.zip" ANNOTATION_URL = "https://research.google.com/ava/download/ava_v2.2.zip"
SECONDS_TO_MILLI = 1000 SECONDS_TO_MILLI = 1000
FILEPATTERN = "ava_actions_%s_1fps_rgb" FILEPATTERN = "ava_actions_%s_1fps_rgb"
SPLITS = { SPLITS = {
"train": { "train": {
"shards": 100, "shards": 1000,
"examples": 862663, "examples": 862663,
"csv": '', "csv": '',
"excluded-csv": '' "excluded-csv": ''
}, },
"val": { "val": {
"shards": 50, "shards": 100,
"examples": 243029, "examples": 243029,
"csv": '', "csv": '',
"excluded-csv": '' "excluded-csv": ''
...@@ -122,15 +95,15 @@ SPLITS = { ...@@ -122,15 +95,15 @@ SPLITS = {
"csv": '', "csv": '',
"excluded-csv": '' "excluded-csv": ''
} }
} }
NUM_CLASSES = 80 NUM_CLASSES = 80
def feature_list_feature(value): def feature_list_feature(value):
return tf.train.FeatureList(feature=value) return tf.train.FeatureList(feature=value)
class Ava(object): class Ava(object):
"""Generates and loads the Kinetics data set.""" """Generates and loads the AVA Actions 2.2 data set."""
def __init__(self, path_to_output_dir, path_to_data_download): def __init__(self, path_to_output_dir, path_to_data_download):
if not path_to_output_dir: if not path_to_output_dir:
...@@ -138,10 +111,9 @@ class Ava(object): ...@@ -138,10 +111,9 @@ class Ava(object):
self.path_to_data_download = path_to_data_download self.path_to_data_download = path_to_data_download
self.path_to_output_dir = path_to_output_dir self.path_to_output_dir = path_to_output_dir
def generate_examples(self, def generate_and_write_records(self,
splits_to_process="train,val,test", splits_to_process="train,val,test",
video_path_format_string=None, video_path_format_string=None,
download_labels_for_map=True,
seconds_per_sequence=10, seconds_per_sequence=10,
hop_between_sequences=10): hop_between_sequences=10):
"""Downloads data and generates sharded TFRecords. """Downloads data and generates sharded TFRecords.
...@@ -156,17 +128,15 @@ class Ava(object): ...@@ -156,17 +128,15 @@ class Ava(object):
a custom CSV with the CSV flag. The original data is still downloaded a custom CSV with the CSV flag. The original data is still downloaded
to generate the label_map. to generate the label_map.
video_path_format_string: The format string for the path to local files. video_path_format_string: The format string for the path to local files.
download_labels_for_map: If true, download the annotations to create the
label map.
seconds_per_sequence: The length of each sequence, in seconds. seconds_per_sequence: The length of each sequence, in seconds.
hop_between_sequences: The gap between the centers of hop_between_sequences: The gap between the centers of
successive sequences. successive sequences.
""" """
logging.info("Downloading data.") logging.info("Downloading data.")
download_output = self._download_data(download_labels_for_map) download_output = self._download_data()
for key in splits_to_process.split(","): for key in splits_to_process.split(","):
logging.info("Generating metadata for split: %s", key) logging.info("Generating examples for split: %s", key)
all_metadata = list(self._generate_metadata( all_metadata = list(self._generate_examples(
download_output[0][key][0], download_output[0][key][1], download_output[0][key][0], download_output[0][key][1],
download_output[1], seconds_per_sequence, hop_between_sequences, download_output[1], seconds_per_sequence, hop_between_sequences,
video_path_format_string)) video_path_format_string))
...@@ -184,10 +154,14 @@ class Ava(object): ...@@ -184,10 +154,14 @@ class Ava(object):
writers[i % len(writers)].write(seq_ex.SerializeToString()) writers[i % len(writers)].write(seq_ex.SerializeToString())
logging.info("Data extraction complete.") logging.info("Data extraction complete.")
def _generate_metadata(self, annotation_file, excluded_file, label_map, def _generate_examples(self, annotation_file, excluded_file, label_map,
seconds_per_sequence, hop_between_sequences, seconds_per_sequence, hop_between_sequences,
video_path_format_string): video_path_format_string):
"""For each row in the annotation CSV, generates the corresponding metadata. """For each row in the annotation CSV, generates the corresponding
examples. When iterating through frames for a single example, skips
over excluded frames. Generates equal-length sequence examples, each with
length seconds_per_sequence (1 fps) and gaps of hop_between_sequences
frames (and seconds) between them, possible greater due to excluded frames.
Args: Args:
annotation_file: path to the file of AVA CSV annotations. annotation_file: path to the file of AVA CSV annotations.
...@@ -197,9 +171,8 @@ class Ava(object): ...@@ -197,9 +171,8 @@ class Ava(object):
hop_between_sequences: The hop between sequences. If less than hop_between_sequences: The hop between sequences. If less than
seconds_per_sequence, will overlap. seconds_per_sequence, will overlap.
Yields: Yields:
Each tf.SequenceExample of metadata, ready to pass to MediaPipe. Each prepared tf.Example of metadata also containing video frames
""" """
global GLOBAL_SOURCE_ID
fieldnames = ["id", "timestamp_seconds", "xmin", "ymin", "xmax", "ymax", fieldnames = ["id", "timestamp_seconds", "xmin", "ymin", "xmax", "ymax",
"action_label"] "action_label"]
frame_excluded = {} frame_excluded = {}
...@@ -217,7 +190,7 @@ class Ava(object): ...@@ -217,7 +190,7 @@ class Ava(object):
ids.add(row["id"]) ids.add(row["id"])
key = (row["id"], int(float(row["timestamp_seconds"]))) key = (row["id"], int(float(row["timestamp_seconds"])))
frame_annotations[key].append(row) frame_annotations[key].append(row)
# for each video, find aggregates near each sampled frame.: # for each video, find aggreggates near each sampled frame.:
logging.info("Generating metadata...") logging.info("Generating metadata...")
media_num = 1 media_num = 1
for media_id in ids: for media_id in ids:
...@@ -240,8 +213,6 @@ class Ava(object): ...@@ -240,8 +213,6 @@ class Ava(object):
middle_frame_time = POSSIBLE_TIMESTAMPS[0] middle_frame_time = POSSIBLE_TIMESTAMPS[0]
cur_frame_num = 0 cur_frame_num = 0
while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]: while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]:
GLOBAL_SOURCE_ID += 1
cur_vid.set(cv2.CAP_PROP_POS_MSEC, cur_vid.set(cv2.CAP_PROP_POS_MSEC,
(middle_frame_time) * SECONDS_TO_MILLI) (middle_frame_time) * SECONDS_TO_MILLI)
success, image = cur_vid.read() success, image = cur_vid.read()
...@@ -255,8 +226,7 @@ class Ava(object): ...@@ -255,8 +226,7 @@ class Ava(object):
continue continue
cur_frame_num += 1 cur_frame_num += 1
source_id = str(GLOBAL_SOURCE_ID) + "_" + media_id source_id = str(middle_frame_time) + "_" + media_id
GLOBAL_SOURCE_ID += 1
xmins = [] xmins = []
xmaxs = [] xmaxs = []
...@@ -280,23 +250,7 @@ class Ava(object): ...@@ -280,23 +250,7 @@ class Ava(object):
else: else:
logging.warning("Unknown label: %s", row["action_label"]) logging.warning("Unknown label: %s", row["action_label"])
#Display the image and bounding boxes being middle_frame_time += 1
#processed (for debugging purposes)
"""
for i in range(len(xmins)):
cv2.rectangle(image, (int(xmins[i] * width),
int(ymaxs[i] * height)),
(int(xmaxs[i] * width),
int(ymins[i] * height)), (255, 0, 0), 2)
cv2.imshow("mywindow", image)
cv2.waitKey(1000)
"""
middle_frame_time += 1/3
if abs(middle_frame_time - round(middle_frame_time) < 0.000001):
middle_frame_time = round(middle_frame_time)
num_frames_in_adjusted = (middle_time_frame - 900) * 3 * 2
key = hashlib.sha256(bufstring).hexdigest() key = hashlib.sha256(bufstring).hexdigest()
date_captured_feature = ("2020-06-17 00:%02d:%02d" % ((middle_frame_time - 900) // 60, (middle_frame_time - 900) % 60)) date_captured_feature = ("2020-06-17 00:%02d:%02d" % ((middle_frame_time - 900) // 60, (middle_frame_time - 900) % 60))
context_feature_dict = { context_feature_dict = {
...@@ -345,7 +299,7 @@ class Ava(object): ...@@ -345,7 +299,7 @@ class Ava(object):
cur_vid.release() cur_vid.release()
def _download_data(self, download_labels_for_map): def _download_data(self):
"""Downloads and extracts data if not already available.""" """Downloads and extracts data if not already available."""
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
urlretrieve = urllib.request.urlretrieve urlretrieve = urllib.request.urlretrieve
...@@ -355,7 +309,6 @@ class Ava(object): ...@@ -355,7 +309,6 @@ class Ava(object):
tf.io.gfile.makedirs(self.path_to_data_download) tf.io.gfile.makedirs(self.path_to_data_download)
logging.info("Downloading annotations.") logging.info("Downloading annotations.")
paths = {} paths = {}
if download_labels_for_map:
zip_path = os.path.join(self.path_to_data_download, zip_path = os.path.join(self.path_to_data_download,
ANNOTATION_URL.split("/")[-1]) ANNOTATION_URL.split("/")[-1])
urlretrieve(ANNOTATION_URL, zip_path) urlretrieve(ANNOTATION_URL, zip_path)
...@@ -400,7 +353,6 @@ def bytes23(string): ...@@ -400,7 +353,6 @@ def bytes23(string):
"""Creates a bytes string in either Python 2 or 3.""" """Creates a bytes string in either Python 2 or 3."""
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
return bytes(string, "utf8") return bytes(string, "utf8")
else:
return bytes(string) return bytes(string)
@contextlib.contextmanager @contextlib.contextmanager
...@@ -416,10 +368,9 @@ def main(argv): ...@@ -416,10 +368,9 @@ def main(argv):
if len(argv) > 1: if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.") raise app.UsageError("Too many command-line arguments.")
Ava(flags.FLAGS.path_to_output_dir, Ava(flags.FLAGS.path_to_output_dir,
flags.FLAGS.path_to_download_data).generate_examples( flags.FLAGS.path_to_download_data).generate_and_write_records(
flags.FLAGS.splits_to_process, flags.FLAGS.splits_to_process,
flags.FLAGS.video_path_format_string, flags.FLAGS.video_path_format_string,
flags.FLAGS.download_labels_for_map,
flags.FLAGS.seconds_per_sequence, flags.FLAGS.seconds_per_sequence,
flags.FLAGS.hop_between_sequences) flags.FLAGS.hop_between_sequences)
...@@ -430,10 +381,6 @@ if __name__ == "__main__": ...@@ -430,10 +381,6 @@ if __name__ == "__main__":
flags.DEFINE_string("path_to_output_dir", flags.DEFINE_string("path_to_output_dir",
"", "",
"Path to directory to write data to.") "Path to directory to write data to.")
flags.DEFINE_boolean("download_labels_for_map",
True,
"If true, download the annotations to construct the "
"label map.")
flags.DEFINE_string("splits_to_process", flags.DEFINE_string("splits_to_process",
"train,val", "train,val",
"Process these splits. Useful for custom data splits.") "Process these splits. Useful for custom data splits.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment