"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "c9bb58f2c86375dce38a80853f55fcf282a8b6cd"
Commit ac82ad67 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

remove redundant file

parent c987d27c
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""Code to download and parse the AVA Actions dataset for TensorFlow models.
The [AVA Actions data set](
https://research.google.com/ava/index.html)
is a dataset for human action recognition.
This script downloads the annotations and prepares data from similar annotations
if local video files are available. The video files can be downloaded
from the following website:
https://github.com/cvdfoundation/ava-dataset
Prior to running this script, please run download_and_preprocess_ava.sh to
download input videos.
Running this code as a module generates the data set on disk. First, the
required files are downloaded (_download_data) which enables constructing the
label map. Then (in generate_examples), for each split in the data set, the
metadata and image frames are generated from the annotations for each sequence
example (_generate_examples). The data set is written to disk as a set of
numbered TFRecord files.
Generating the data on disk can take considerable time and disk space.
(Image compression quality is the primary determiner of disk usage.
If using the Tensorflow Object Detection API, set the input_type field
in the input_reader to TF_SEQUENCE_EXAMPLE.
This data is structured for per-clip action classification where images is
the sequence of images and labels are a one-hot encoded value. See
as_dataset() for more details.
Note that the number of videos changes in the data set over time, so it will
likely be necessary to change the expected number of examples.
The argument video_path_format_string expects a value as such:
"/path/to/videos/{0}"
"""
import contextlib
import csv
import os
import random
import sys
import zipfile
import collections
import glob
from absl import app
from absl import flags
from absl import logging
from six.moves import range
from six.moves import urllib
import tensorflow.compat.v1 as tf
import cv2
import hashlib
from object_detection.utils import dataset_util
POSSIBLE_TIMESTAMPS = range(902, 1798)
ANNOTATION_URL = "https://research.google.com/ava/download/ava_v2.2.zip"
SECONDS_TO_MILLI = 1000
FILEPATTERN = "ava_actions_%s_1fps_rgb"
SPLITS = {
"train": {
"shards": 1000,
"examples": 862663,
"csv": '',
"excluded-csv": ''
},
"val": {
"shards": 100,
"examples": 243029,
"csv": '',
"excluded-csv": ''
},
#Test doesn't have ground truth, so TF Records can't be created
"test": {
"shards": 100,
"examples": 0,
"csv": '',
"excluded-csv": ''
}
}
NUM_CLASSES = 80
def feature_list_feature(value):
return tf.train.FeatureList(feature=value)
class Ava(object):
"""Generates and loads the AVA Actions 2.2 data set."""
def __init__(self, path_to_output_dir, path_to_data_download):
if not path_to_output_dir:
raise ValueError("You must supply the path to the data directory.")
self.path_to_data_download = path_to_data_download
self.path_to_output_dir = path_to_output_dir
def generate_and_write_records(self,
splits_to_process="train,val,test",
video_path_format_string=None,
seconds_per_sequence=10,
hop_between_sequences=10):
"""Downloads data and generates sharded TFRecords.
Downloads the data files, generates metadata, and processes the metadata
with MediaPipe to produce tf.SequenceExamples for training. The resulting
files can be read with as_dataset(). After running this function the
original data files can be deleted.
Args:
splits_to_process: csv string of which splits to process. Allows providing
a custom CSV with the CSV flag. The original data is still downloaded
to generate the label_map.
video_path_format_string: The format string for the path to local files.
seconds_per_sequence: The length of each sequence, in seconds.
hop_between_sequences: The gap between the centers of
successive sequences.
"""
logging.info("Downloading data.")
download_output = self._download_data()
for key in splits_to_process.split(","):
logging.info("Generating examples for split: %s", key)
all_metadata = list(self._generate_examples(
download_output[0][key][0], download_output[0][key][1],
download_output[1], seconds_per_sequence, hop_between_sequences,
video_path_format_string))
logging.info("An example of the metadata: ")
logging.info(all_metadata[0])
random.seed(47)
random.shuffle(all_metadata)
shards = SPLITS[key]["shards"]
shard_names = [os.path.join(
self.path_to_output_dir, FILEPATTERN % key + "-%05d-of-%05d" % (
i, shards)) for i in range(shards)]
writers = [tf.io.TFRecordWriter(shard_name) for shard_name in shard_names]
with _close_on_exit(writers) as writers:
for i, seq_ex in enumerate(all_metadata):
writers[i % len(writers)].write(seq_ex.SerializeToString())
logging.info("Data extraction complete.")
def _generate_examples(self, annotation_file, excluded_file, label_map,
seconds_per_sequence, hop_between_sequences,
video_path_format_string):
"""For each row in the annotation CSV, generates the corresponding
examples. When iterating through frames for a single example, skips
over excluded frames. Generates equal-length sequence examples, each with
length seconds_per_sequence (1 fps) and gaps of hop_between_sequences
frames (and seconds) between them, possible greater due to excluded frames.
Args:
annotation_file: path to the file of AVA CSV annotations.
excluded_path: path to a CSV file of excluded timestamps for each video.
label_map: an {int: string} label map.
seconds_per_sequence: The number of seconds per example in each example.
hop_between_sequences: The hop between sequences. If less than
seconds_per_sequence, will overlap.
Yields:
Each prepared tf.Example of metadata also containing video frames
"""
fieldnames = ["id", "timestamp_seconds", "xmin", "ymin", "xmax", "ymax",
"action_label"]
frame_excluded = {}
# create a sparse, nested map of videos and frame indices.
with open(excluded_file, "r") as excluded:
reader = csv.reader(excluded)
for row in reader:
frame_excluded[(row[0], int(float(row[1])))] = True
with open(annotation_file, "r") as annotations:
reader = csv.DictReader(annotations, fieldnames)
frame_annotations = collections.defaultdict(list)
ids = set()
# aggreggate by video and timestamp:
for row in reader:
ids.add(row["id"])
key = (row["id"], int(float(row["timestamp_seconds"])))
frame_annotations[key].append(row)
# for each video, find aggreggates near each sampled frame.:
logging.info("Generating metadata...")
media_num = 1
for media_id in ids:
logging.info("%d/%d, ignore warnings.\n" % (media_num, len(ids)))
media_num += 1
filepath = glob.glob(
video_path_format_string.format(media_id) + "*")[0]
filename = filepath.split("/")[-1]
cur_vid = cv2.VideoCapture(filepath)
width = cur_vid.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cur_vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
middle_frame_time = POSSIBLE_TIMESTAMPS[0]
total_non_excluded = 0;
while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]:
if (media_id, middle_frame_time) not in frame_excluded:
total_non_excluded += 1
middle_frame_time += 1
middle_frame_time = POSSIBLE_TIMESTAMPS[0]
cur_frame_num = 0
while middle_frame_time < POSSIBLE_TIMESTAMPS[-1]:
cur_vid.set(cv2.CAP_PROP_POS_MSEC,
(middle_frame_time) * SECONDS_TO_MILLI)
success, image = cur_vid.read()
success, buffer = cv2.imencode('.jpg', image)
bufstring = buffer.tostring()
if (media_id, middle_frame_time) in frame_excluded:
middle_frame_time += 1
logging.info("Ignoring and skipping excluded frame.")
continue
cur_frame_num += 1
source_id = str(middle_frame_time) + "_" + media_id
xmins = []
xmaxs = []
ymins = []
ymaxs = []
areas = []
labels = []
label_strings = []
confidences = []
for row in frame_annotations[(media_id, middle_frame_time)]:
if len(row) > 2 and int(row["action_label"]) in label_map:
xmins.append(float(row["xmin"]))
xmaxs.append(float(row["xmax"]))
ymins.append(float(row["ymin"]))
ymaxs.append(float(row["ymax"]))
areas.append(float((xmaxs[-1] - xmins[-1]) *
(ymaxs[-1] - ymins[-1])) / 2)
labels.append(int(row["action_label"]))
label_strings.append(label_map[int(row["action_label"])])
confidences.append(1)
else:
logging.warning("Unknown label: %s", row["action_label"])
middle_frame_time += 1/3
if abs(middle_frame_time - round(middle_frame_time) < 0.0001):
middle_frame_time = round(middle_frame_time)
key = hashlib.sha256(bufstring).hexdigest()
date_captured_feature = ("2020-06-17 00:%02d:%02d" % ((middle_frame_time - 900)*3 // 60, (middle_frame_time - 900)*3 % 60))
context_feature_dict = {
'image/height':
dataset_util.int64_feature(int(height)),
'image/width':
dataset_util.int64_feature(int(width)),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(source_id.encode("utf8")),
'image/filename':
dataset_util.bytes_feature(source_id.encode("utf8")),
'image/encoded':
dataset_util.bytes_feature(bufstring),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymaxs),
'image/object/area':
dataset_util.float_list_feature(areas),
'image/object/class/label':
dataset_util.int64_list_feature(labels),
'image/object/class/text':
dataset_util.bytes_list_feature(label_strings),
'image/location':
dataset_util.bytes_feature(media_id.encode('utf8')),
'image/date_captured':
dataset_util.bytes_feature(date_captured_feature.encode('utf8')),
'image/seq_num_frames':
dataset_util.int64_feature(total_non_excluded),
'image/seq_frame_num':
dataset_util.int64_feature(cur_frame_num),
'image/seq_id':
dataset_util.bytes_feature(media_id.encode('utf8')),
}
yield tf.train.Example(
features=tf.train.Features(feature=context_feature_dict))
cur_vid.release()
def _download_data(self):
"""Downloads and extracts data if not already available."""
if sys.version_info >= (3, 0):
urlretrieve = urllib.request.urlretrieve
else:
urlretrieve = urllib.request.urlretrieve
logging.info("Creating data directory.")
tf.io.gfile.makedirs(self.path_to_data_download)
logging.info("Downloading annotations.")
paths = {}
zip_path = os.path.join(self.path_to_data_download,
ANNOTATION_URL.split("/")[-1])
urlretrieve(ANNOTATION_URL, zip_path)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(self.path_to_data_download)
for split in ["train", "test", "val"]:
csv_path = os.path.join(self.path_to_data_download, "ava_%s_v2.2.csv" % split)
excl_name = "ava_%s_excluded_timestamps_v2.2.csv" % split
excluded_csv_path = os.path.join(self.path_to_data_download, excl_name)
SPLITS[split]["csv"] = csv_path
SPLITS[split]["excluded-csv"] = excluded_csv_path
paths[split] = (csv_path, excluded_csv_path)
label_map = self.get_label_map(os.path.join(self.path_to_data_download, "ava_action_list_v2.2.pbtxt"))
return paths, label_map
def get_label_map(self, path):
"""Parsess a label map into {integer:string} format."""
label_map = {}
with open(path, "r") as f:
current_id = -1
current_label = ""
for line in f:
if "item {" in line:
current_id = -1
current_label = ""
if "name:" in line:
first_quote = line.find('"') + 1
second_quote = line.find('"', first_quote)
assert second_quote > -1
current_label = line[first_quote:second_quote]
if "id:" in line:
current_id = int(line.split()[1])
if "}" in line:
label_map[current_id] = bytes23(current_label)
logging.info(label_map)
assert len(label_map) == NUM_CLASSES
return label_map
def bytes23(string):
"""Creates a bytes string in either Python 2 or 3."""
if sys.version_info >= (3, 0):
return bytes(string, "utf8")
return bytes(string)
@contextlib.contextmanager
def _close_on_exit(writers):
"""Call close on all writers on exit."""
try:
yield writers
finally:
for writer in writers:
writer.close()
def main(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
Ava(flags.FLAGS.path_to_output_dir,
flags.FLAGS.path_to_download_data).generate_and_write_records(
flags.FLAGS.splits_to_process,
flags.FLAGS.video_path_format_string,
flags.FLAGS.seconds_per_sequence,
flags.FLAGS.hop_between_sequences)
if __name__ == "__main__":
flags.DEFINE_string("path_to_download_data",
"",
"Path to directory to download data to.")
flags.DEFINE_string("path_to_output_dir",
"",
"Path to directory to write data to.")
flags.DEFINE_string("splits_to_process",
"train,val",
"Process these splits. Useful for custom data splits.")
flags.DEFINE_string("video_path_format_string",
None,
"The format string for the path to local video files. "
"Uses the Python string.format() syntax with possible "
"arguments of {video}, {start}, {end}, {label_name}, and "
"{split}, corresponding to columns of the data csvs.")
flags.DEFINE_integer("seconds_per_sequence",
10,
"The number of seconds per example in each example.")
flags.DEFINE_integer("hop_between_sequences",
10,
"The hop between sequences. If less than "
"seconds_per_sequence, will overlap.")
app.run(main)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment