Commit f44507ec authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

make fixes

parent dd564bb9
...@@ -52,10 +52,6 @@ The argument video_path_format_string expects a value as such: ...@@ -52,10 +52,6 @@ The argument video_path_format_string expects a value as such:
""" """
import absolute_import
import division
import print_function
import contextlib import contextlib
import csv import csv
import os import os
...@@ -115,10 +111,9 @@ class Ava(object): ...@@ -115,10 +111,9 @@ class Ava(object):
self.path_to_data_download = path_to_data_download self.path_to_data_download = path_to_data_download
self.path_to_output_dir = path_to_output_dir self.path_to_output_dir = path_to_output_dir
def generate_examples(self, def generate_and_write_records(self,
splits_to_process="train,val,test", splits_to_process="train,val,test",
video_path_format_string=None, video_path_format_string=None,
download_labels_for_map=True,
seconds_per_sequence=10, seconds_per_sequence=10,
hop_between_sequences=10): hop_between_sequences=10):
"""Downloads data and generates sharded TFRecords. """Downloads data and generates sharded TFRecords.
...@@ -133,18 +128,16 @@ class Ava(object): ...@@ -133,18 +128,16 @@ class Ava(object):
a custom CSV with the CSV flag. The original data is still downloaded a custom CSV with the CSV flag. The original data is still downloaded
to generate the label_map. to generate the label_map.
video_path_format_string: The format string for the path to local files. video_path_format_string: The format string for the path to local files.
download_labels_for_map: If true, download the annotations to create the
label map.
seconds_per_sequence: The length of each sequence, in seconds. seconds_per_sequence: The length of each sequence, in seconds.
hop_between_sequences: The gap between the centers of hop_between_sequences: The gap between the centers of
successive sequences. successive sequences.
""" """
global_source_id = 0 global_source_id = 0
logging.info("Downloading data.") logging.info("Downloading data.")
download_output = self._download_data(download_labels_for_map) download_output = self._download_data()
for key in splits_to_process.split(","): for key in splits_to_process.split(","):
logging.info("Generating metadata for split: %s", key) logging.info("Generating examples for split: %s", key)
all_metadata = list(self._generate_metadata( all_metadata = list(self._generate_examples(
download_output[0][key][0], download_output[0][key][1], download_output[0][key][0], download_output[0][key][1],
download_output[1], seconds_per_sequence, hop_between_sequences, download_output[1], seconds_per_sequence, hop_between_sequences,
video_path_format_string, global_source_id)) video_path_format_string, global_source_id))
...@@ -162,10 +155,16 @@ class Ava(object): ...@@ -162,10 +155,16 @@ class Ava(object):
writers[i % len(writers)].write(seq_ex.SerializeToString()) writers[i % len(writers)].write(seq_ex.SerializeToString())
logging.info("Data extraction complete.") logging.info("Data extraction complete.")
def _generate_metadata(self, annotation_file, excluded_file, label_map, def _generate_examples(self, annotation_file, excluded_file, label_map,
seconds_per_sequence, hop_between_sequences, seconds_per_sequence, hop_between_sequences,
video_path_format_string): video_path_format_string):
"""For each row in the annotation CSV, generates the corresponding metadata """For each row in the annotation CSV, generates the corresponding examples.
When iterating through frames for a single sequence example, skips over
excluded frames. When moving to the next sequence example, also skips over
excluded frames as if they don't exist. Generates equal-length sequence
examples, each with length seconds_per_sequence (1 fps) and gaps of
hop_between_sequences frames (and seconds) between them, possible greater
due to excluded frames.
Args: Args:
annotation_file: path to the file of AVA CSV annotations. annotation_file: path to the file of AVA CSV annotations.
...@@ -325,7 +324,7 @@ class Ava(object): ...@@ -325,7 +324,7 @@ class Ava(object):
cur_vid.release() cur_vid.release()
def _download_data(self, download_labels_for_map): def _download_data(self):
"""Downloads and extracts data if not already available.""" """Downloads and extracts data if not already available."""
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
urlretrieve = urllib.request.urlretrieve urlretrieve = urllib.request.urlretrieve
...@@ -335,7 +334,7 @@ class Ava(object): ...@@ -335,7 +334,7 @@ class Ava(object):
tf.io.gfile.makedirs(self.path_to_data_download) tf.io.gfile.makedirs(self.path_to_data_download)
logging.info("Downloading annotations.") logging.info("Downloading annotations.")
paths = {} paths = {}
if download_labels_for_map:
zip_path = os.path.join(self.path_to_data_download, zip_path = os.path.join(self.path_to_data_download,
ANNOTATION_URL.split("/")[-1]) ANNOTATION_URL.split("/")[-1])
urlretrieve(ANNOTATION_URL, zip_path) urlretrieve(ANNOTATION_URL, zip_path)
...@@ -396,10 +395,9 @@ def main(argv): ...@@ -396,10 +395,9 @@ def main(argv):
if len(argv) > 1: if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.") raise app.UsageError("Too many command-line arguments.")
Ava(flags.FLAGS.path_to_output_dir, Ava(flags.FLAGS.path_to_output_dir,
flags.FLAGS.path_to_download_data).generate_examples( flags.FLAGS.path_to_download_data).generate_and_write_records(
flags.FLAGS.splits_to_process, flags.FLAGS.splits_to_process,
flags.FLAGS.video_path_format_string, flags.FLAGS.video_path_format_string,
flags.FLAGS.download_labels_for_map,
flags.FLAGS.seconds_per_sequence, flags.FLAGS.seconds_per_sequence,
flags.FLAGS.hop_between_sequences) flags.FLAGS.hop_between_sequences)
...@@ -410,10 +408,6 @@ if __name__ == "__main__": ...@@ -410,10 +408,6 @@ if __name__ == "__main__":
flags.DEFINE_string("path_to_output_dir", flags.DEFINE_string("path_to_output_dir",
"", "",
"Path to directory to write data to.") "Path to directory to write data to.")
flags.DEFINE_boolean("download_labels_for_map",
True,
"If true, download the annotations to construct the "
"label map.")
flags.DEFINE_string("splits_to_process", flags.DEFINE_string("splits_to_process",
"train,val", "train,val",
"Process these splits. Useful for custom data splits.") "Process these splits. Useful for custom data splits.")
......
#!/bin/bash #!/bin/bash
# This script downloads the videos for the AVA dataset. There are no arguments.
# Copy this script into the desired parent directory of the ava_vids_raw/
# directory created in this script to store the raw videos.
mkdir ava_vids_raw mkdir ava_vids_raw
cd ava_vids_raw cd ava_vids_raw
...@@ -19,9 +22,9 @@ cd .. ...@@ -19,9 +22,9 @@ cd ..
# Trimming causes issues with frame seeking in the python script, so it is best left out. # Trimming causes issues with frame seeking in the python script, so it is best left out.
# If included, need to modify the python script to subtract 900 seconds wheen seeking. # If included, need to modify the python script to subtract 900 seconds wheen seeking.
#echo "Trimming all videos." # echo "Trimming all videos."
#mkdir ava_vids_trimmed # mkdir ava_vids_trimmed
#for filename in ava_vids_raw/*; do # for filename in ava_vids_raw/*; do
# ffmpeg -ss 900 -to 1800 -i $filename -c copy ava_vids_trimmed/${filename##*/} # ffmpeg -ss 900 -to 1800 -i $filename -c copy ava_vids_trimmed/${filename##*/}
#done # done
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment