Commit d9d47174 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

add new script, clean up both

parent 54c6d319
......@@ -31,7 +31,7 @@ Running this code as a module generates the data set on disk. First, the
required files are downloaded (_download_data) which enables constructing the
label map. Then (in generate_examples), for each split in the data set, the
metadata and image frames are generated from the annotations for each sequence
example (_generate_metadata). The data set is written to disk as a set of
example (_generate_examples). The data set is written to disk as a set of
numbered TFRecord files.
Generating the data on disk can take considerable time and disk space.
......@@ -96,8 +96,8 @@ SPLITS = {
"csv": '',
"excluded-csv": ''
}
}
NUM_CLASSES = 80
def feature_list_feature(value):
......@@ -188,7 +188,7 @@ class Ava(object):
reader = csv.DictReader(annotations, fieldnames)
frame_annotations = collections.defaultdict(list)
ids = set()
# aggregate by video and timestamp:
# aggreggate by video and timestamp:
for row in reader:
ids.add(row["id"])
key = (row["id"], int(float(row["timestamp_seconds"])))
......@@ -197,8 +197,6 @@ class Ava(object):
logging.info("Generating metadata...")
media_num = 1
for media_id in ids:
if media_num > 2:
continue
logging.info("%d/%d, ignore warnings.\n" % (media_num, len(ids)))
media_num += 1
......@@ -261,7 +259,6 @@ class Ava(object):
windowed_timestamp += 1
if len(total_boxes) > 0:
print(total_boxes)
yield seq_example_util.make_sequence_example("AVA", media_id, total_images,
int(height), int(width), 'jpeg', total_source_ids, None, total_is_annotated,
total_boxes, total_label_strings, use_strs_for_source_id=True)
......
# Copyright 2019 The MediaPipe Authors.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,55 +12,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified by Kaushik Shivakumar for the AVA Actions Dataset
# to work without MediaPipe, code started by Bryan Seybold.
r"""Code to download and parse the AVA Actions dataset for TensorFlow models.
r"""Code to download and parse the AVA dataset for TensorFlow models.
The [AVA data set](
The [AVA Actions data set](
https://research.google.com/ava/index.html)
is a data set for human action recognition.
is a dataset for human action recognition.
This script downloads the annotations and prepares data from similar annotations
if local video files are available. The video files can be downloaded
from the following website:
https://github.com/cvdfoundation/ava-datset
https://github.com/cvdfoundation/ava-dataset
Prior to running this script, please run download_and_preprocess_ava.sh to
download and trim input videos.
download input videos.
Running this code as a module generates the data set on disk. First, the
required files are downloaded (_download_data) which enables constructing the
label map. Then (in generate_examples), for each split in the data set, the
metadata is generated from the annotations for each example
(_generate_metadata), and MediaPipe is used to fill in the video frames
(_run_mediapipe). This script processes local video files defined in a custom
CSV in a comparable manner to the Kinetics data set for evaluating and
predicting values on your own data. The data set is written to disk as a set of
metadata and image frames are generated from the annotations for each sequence
example (_generate_examples). The data set is written to disk as a set of
numbered TFRecord files.
The custom CSV format must match the Kinetics data set format, with columns
corresponding to [[label_name], video, start, end, split] followed by lines with
those fields. (Label_name is optional.) These field names can be used to
construct the paths to the video files using the Python string formatting
specification and the video_path_format_string flag:
--video_path_format_string="/path/to/video/{video}.mp4"
Generating the data on disk can take considerable time and disk space.
(Image compression quality is the primary determiner of disk usage. TVL1 flow
determines runtime.)
Once the data is on disk, reading the data as a tf.data.Dataset is accomplished
with the following lines:
(Image compression quality is the primary determiner of disk usage.
kinetics = Kinetics("kinetics_data_path")
dataset = kinetics.as_dataset("custom")
# implement additional processing and batching here
images_and_labels = dataset.make_one_shot_iterator().get_next()
images = images_and_labels["images"]
labels = image_and_labels["labels"]
IF using TFOD API, use the sequence example configuration in the config.proto.
If using the Tensorflow Object Detection API, set the input_type field
in the input_reader to TF_SEQUENCE_EXAMPLE.
This data is structured for per-clip action classification where images is
the sequence of images and labels are a one-hot encoded value. See
......@@ -68,24 +45,20 @@ as_dataset() for more details.
Note that the number of videos changes in the data set over time, so it will
likely be necessary to change the expected number of examples.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
The argument video_path_format_string expects a value as such:
"/path/to/videos/{0}"
"""
import contextlib
import csv
import os
import random
import subprocess
import sys
import tarfile
import zipfile
import tempfile
import collections
import glob
import hashlib
from absl import app
from absl import flags
......@@ -94,23 +67,23 @@ from six.moves import range
from six.moves import urllib
import tensorflow.compat.v1 as tf
import cv2
import hashlib
from object_detection.utils import dataset_util
GLOBAL_SOURCE_ID = 0
POSSIBLE_TIMESTAMPS = range(902, 1798)
ANNOTATION_URL = "https://research.google.com/ava/download/ava_v2.2.zip"
SECONDS_TO_MILLI = 1000
FILEPATTERN = "ava_actions_%s_1fps_rgb"
SPLITS = {
"train": {
"shards": 100,
"shards": 1000,
"examples": 862663,
"csv": '',
"excluded-csv": ''
},
"val": {
"shards": 50,
"shards": 100,
"examples": 243029,
"csv": '',
"excluded-csv": ''
......@@ -122,15 +95,15 @@ SPLITS = {
"csv": '',
"excluded-csv": ''
}
}
NUM_CLASSES = 80
def feature_list_feature(value):
return tf.train.FeatureList(feature=value)
class Ava(object):
"""Generates and loads the Kinetics data set."""
"""Generates and loads the AVA Actions 2.2 data set."""
def __init__(self, path_to_output_dir, path_to_data_download):
if not path_to_output_dir:
......@@ -138,10 +111,9 @@ class Ava(object):
self.path_to_data_download = path_to_data_download
self.path_to_output_dir = path_to_output_dir
def generate_examples(self,
def generate_and_write_records(self,
splits_to_process="train,val,test",
video_path_format_string=None,
download_labels_for_map=True,
seconds_per_sequence=10,
hop_between_sequences=10):
"""Downloads data and generates sharded TFRecords.
......@@ -156,17 +128,15 @@ class Ava(object):
a custom CSV with the CSV flag. The original data is still downloaded
to generate the label_map.
video_path_format_string: The format string for the path to local files.
download_labels_for_map: If true, download the annotations to create the
label map.
seconds_per_sequence: The length of each sequence, in seconds.
hop_between_sequences: The gap between the centers of
successive sequences.
"""
logging.info("Downloading data.")
download_output = self._download_data(download_labels_for_map)
download_output = self._download_data()
for key in splits_to_process.split(","):
logging.info("Generating metadata for split: %s", key)
all_metadata = list(self._generate_metadata(
logging.info("Generating examples for split: %s", key)
all_metadata = list(self._generate_examples(
download_output[0][key][0], download_output[0][key][1],
download_output[1], seconds_per_sequence, hop_between_sequences,
video_path_format_string))
......@@ -184,10 +154,14 @@ class Ava(object):
writers[i % len(writers)].write(seq_ex.SerializeToString())
logging.info("Data extraction complete.")
def _generate_metadata(self, annotation_file, excluded_file, label_map,
def _generate_examples(self, annotation_file, excluded_file, label_map,
seconds_per_sequence, hop_between_sequences,
video_path_format_string):
"""For each row in the annotation CSV, generates the corresponding metadata.
"""For each row in the annotation CSV, generates the corresponding
examples. When iterating through frames for a single example, skips
over excluded frames. Generates equal-length sequence examples, each with
length seconds_per_sequence (1 fps) and gaps of hop_between_sequences
frames (and seconds) between them, possible greater due to excluded frames.
Args:
annotation_file: path to the file of AVA CSV annotations.
......@@ -197,9 +171,8 @@ class Ava(object):
hop_between_sequences: The hop between sequences. If less than
seconds_per_sequence, will overlap.
Yields:
Each tf.SequenceExample of metadata, ready to pass to MediaPipe.
Each prepared tf.Example of metadata also containing video frames
"""
global GLOBAL_SOURCE_ID
fieldnames = ["id", "timestamp_seconds", "xmin", "ymin", "xmax", "ymax",
"action_label"]
frame_excluded = {}
......@@ -217,7 +190,7 @@ class Ava(object):
ids.add(row["id"])
key = (row["id"], int(float(row["timestamp_seconds"])))
frame_annotations[key].append(row)
# for each video, find aggregates near each sampled frame.:
# for each video, find aggreggates near each sampled frame.:
logging.info("Generating metadata...")
media_num = 1
for media_id in ids:
......@@ -240,8 +213,6 @@ class Ava(object):
middle_frame_time = POSSIBLE_TIMESTAMPS[0]
cur_frame_num = 0
while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]:
GLOBAL_SOURCE_ID += 1
cur_vid.set(cv2.CAP_PROP_POS_MSEC,
(middle_frame_time) * SECONDS_TO_MILLI)
success, image = cur_vid.read()
......@@ -255,8 +226,7 @@ class Ava(object):
continue
cur_frame_num += 1
source_id = str(GLOBAL_SOURCE_ID) + "_" + media_id
GLOBAL_SOURCE_ID += 1
source_id = str(middle_frame_time) + "_" + media_id
xmins = []
xmaxs = []
......@@ -280,23 +250,7 @@ class Ava(object):
else:
logging.warning("Unknown label: %s", row["action_label"])
#Display the image and bounding boxes being
#processed (for debugging purposes)
"""
for i in range(len(xmins)):
cv2.rectangle(image, (int(xmins[i] * width),
int(ymaxs[i] * height)),
(int(xmaxs[i] * width),
int(ymins[i] * height)), (255, 0, 0), 2)
cv2.imshow("mywindow", image)
cv2.waitKey(1000)
"""
middle_frame_time += 1/3
if abs(middle_frame_time - round(middle_frame_time) < 0.000001):
middle_frame_time = round(middle_frame_time)
num_frames_in_adjusted = (middle_time_frame - 900) * 3 * 2
middle_frame_time += 1
key = hashlib.sha256(bufstring).hexdigest()
date_captured_feature = ("2020-06-17 00:%02d:%02d" % ((middle_frame_time - 900) // 60, (middle_frame_time - 900) % 60))
context_feature_dict = {
......@@ -345,7 +299,7 @@ class Ava(object):
cur_vid.release()
def _download_data(self, download_labels_for_map):
def _download_data(self):
"""Downloads and extracts data if not already available."""
if sys.version_info >= (3, 0):
urlretrieve = urllib.request.urlretrieve
......@@ -355,7 +309,6 @@ class Ava(object):
tf.io.gfile.makedirs(self.path_to_data_download)
logging.info("Downloading annotations.")
paths = {}
if download_labels_for_map:
zip_path = os.path.join(self.path_to_data_download,
ANNOTATION_URL.split("/")[-1])
urlretrieve(ANNOTATION_URL, zip_path)
......@@ -400,7 +353,6 @@ def bytes23(string):
"""Creates a bytes string in either Python 2 or 3."""
if sys.version_info >= (3, 0):
return bytes(string, "utf8")
else:
return bytes(string)
@contextlib.contextmanager
......@@ -416,10 +368,9 @@ def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
Ava(flags.FLAGS.path_to_output_dir,
flags.FLAGS.path_to_download_data).generate_examples(
flags.FLAGS.path_to_download_data).generate_and_write_records(
flags.FLAGS.splits_to_process,
flags.FLAGS.video_path_format_string,
flags.FLAGS.download_labels_for_map,
flags.FLAGS.seconds_per_sequence,
flags.FLAGS.hop_between_sequences)
......@@ -430,10 +381,6 @@ if __name__ == "__main__":
flags.DEFINE_string("path_to_output_dir",
"",
"Path to directory to write data to.")
flags.DEFINE_boolean("download_labels_for_map",
True,
"If true, download the annotations to construct the "
"label map.")
flags.DEFINE_string("splits_to_process",
"train,val",
"Process these splits. Useful for custom data splits.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment