Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
ac82ad67
"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "c9bb58f2c86375dce38a80853f55fcf282a8b6cd"
Commit
ac82ad67
authored
Aug 18, 2020
by
Kaushik Shivakumar
Browse files
remove redundant file
parent
c987d27c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
403 deletions
+0
-403
research/object_detection/dataset_tools/create_ava_tf_record_for_context.py
...tection/dataset_tools/create_ava_tf_record_for_context.py
+0
-403
No files found.
research/object_detection/dataset_tools/create_ava_tf_record_for_context.py
deleted
100644 → 0
View file @
c987d27c
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r
"""Code to download and parse the AVA Actions dataset for TensorFlow models.
The [AVA Actions data set](
https://research.google.com/ava/index.html)
is a dataset for human action recognition.
This script downloads the annotations and prepares data from similar annotations
if local video files are available. The video files can be downloaded
from the following website:
https://github.com/cvdfoundation/ava-dataset
Prior to running this script, please run download_and_preprocess_ava.sh to
download input videos.
Running this code as a module generates the data set on disk. First, the
required files are downloaded (_download_data) which enables constructing the
label map. Then (in generate_examples), for each split in the data set, the
metadata and image frames are generated from the annotations for each sequence
example (_generate_examples). The data set is written to disk as a set of
numbered TFRecord files.
Generating the data on disk can take considerable time and disk space.
(Image compression quality is the primary determiner of disk usage.
If using the Tensorflow Object Detection API, set the input_type field
in the input_reader to TF_SEQUENCE_EXAMPLE.
This data is structured for per-clip action classification where images is
the sequence of images and labels are a one-hot encoded value. See
as_dataset() for more details.
Note that the number of videos changes in the data set over time, so it will
likely be necessary to change the expected number of examples.
The argument video_path_format_string expects a value as such:
"/path/to/videos/{0}"
"""
import
contextlib
import
csv
import
os
import
random
import
sys
import
zipfile
import
collections
import
glob
from
absl
import
app
from
absl
import
flags
from
absl
import
logging
from
six.moves
import
range
from
six.moves
import
urllib
import
tensorflow.compat.v1
as
tf
import
cv2
import
hashlib
from
object_detection.utils
import
dataset_util
POSSIBLE_TIMESTAMPS
=
range
(
902
,
1798
)
ANNOTATION_URL
=
"https://research.google.com/ava/download/ava_v2.2.zip"
SECONDS_TO_MILLI
=
1000
FILEPATTERN
=
"ava_actions_%s_1fps_rgb"
SPLITS
=
{
"train"
:
{
"shards"
:
1000
,
"examples"
:
862663
,
"csv"
:
''
,
"excluded-csv"
:
''
},
"val"
:
{
"shards"
:
100
,
"examples"
:
243029
,
"csv"
:
''
,
"excluded-csv"
:
''
},
#Test doesn't have ground truth, so TF Records can't be created
"test"
:
{
"shards"
:
100
,
"examples"
:
0
,
"csv"
:
''
,
"excluded-csv"
:
''
}
}
NUM_CLASSES
=
80
def
feature_list_feature
(
value
):
return
tf
.
train
.
FeatureList
(
feature
=
value
)
class
Ava
(
object
):
"""Generates and loads the AVA Actions 2.2 data set."""
def
__init__
(
self
,
path_to_output_dir
,
path_to_data_download
):
if
not
path_to_output_dir
:
raise
ValueError
(
"You must supply the path to the data directory."
)
self
.
path_to_data_download
=
path_to_data_download
self
.
path_to_output_dir
=
path_to_output_dir
def
generate_and_write_records
(
self
,
splits_to_process
=
"train,val,test"
,
video_path_format_string
=
None
,
seconds_per_sequence
=
10
,
hop_between_sequences
=
10
):
"""Downloads data and generates sharded TFRecords.
Downloads the data files, generates metadata, and processes the metadata
with MediaPipe to produce tf.SequenceExamples for training. The resulting
files can be read with as_dataset(). After running this function the
original data files can be deleted.
Args:
splits_to_process: csv string of which splits to process. Allows providing
a custom CSV with the CSV flag. The original data is still downloaded
to generate the label_map.
video_path_format_string: The format string for the path to local files.
seconds_per_sequence: The length of each sequence, in seconds.
hop_between_sequences: The gap between the centers of
successive sequences.
"""
logging
.
info
(
"Downloading data."
)
download_output
=
self
.
_download_data
()
for
key
in
splits_to_process
.
split
(
","
):
logging
.
info
(
"Generating examples for split: %s"
,
key
)
all_metadata
=
list
(
self
.
_generate_examples
(
download_output
[
0
][
key
][
0
],
download_output
[
0
][
key
][
1
],
download_output
[
1
],
seconds_per_sequence
,
hop_between_sequences
,
video_path_format_string
))
logging
.
info
(
"An example of the metadata: "
)
logging
.
info
(
all_metadata
[
0
])
random
.
seed
(
47
)
random
.
shuffle
(
all_metadata
)
shards
=
SPLITS
[
key
][
"shards"
]
shard_names
=
[
os
.
path
.
join
(
self
.
path_to_output_dir
,
FILEPATTERN
%
key
+
"-%05d-of-%05d"
%
(
i
,
shards
))
for
i
in
range
(
shards
)]
writers
=
[
tf
.
io
.
TFRecordWriter
(
shard_name
)
for
shard_name
in
shard_names
]
with
_close_on_exit
(
writers
)
as
writers
:
for
i
,
seq_ex
in
enumerate
(
all_metadata
):
writers
[
i
%
len
(
writers
)].
write
(
seq_ex
.
SerializeToString
())
logging
.
info
(
"Data extraction complete."
)
def
_generate_examples
(
self
,
annotation_file
,
excluded_file
,
label_map
,
seconds_per_sequence
,
hop_between_sequences
,
video_path_format_string
):
"""For each row in the annotation CSV, generates the corresponding
examples. When iterating through frames for a single example, skips
over excluded frames. Generates equal-length sequence examples, each with
length seconds_per_sequence (1 fps) and gaps of hop_between_sequences
frames (and seconds) between them, possible greater due to excluded frames.
Args:
annotation_file: path to the file of AVA CSV annotations.
excluded_path: path to a CSV file of excluded timestamps for each video.
label_map: an {int: string} label map.
seconds_per_sequence: The number of seconds per example in each example.
hop_between_sequences: The hop between sequences. If less than
seconds_per_sequence, will overlap.
Yields:
Each prepared tf.Example of metadata also containing video frames
"""
fieldnames
=
[
"id"
,
"timestamp_seconds"
,
"xmin"
,
"ymin"
,
"xmax"
,
"ymax"
,
"action_label"
]
frame_excluded
=
{}
# create a sparse, nested map of videos and frame indices.
with
open
(
excluded_file
,
"r"
)
as
excluded
:
reader
=
csv
.
reader
(
excluded
)
for
row
in
reader
:
frame_excluded
[(
row
[
0
],
int
(
float
(
row
[
1
])))]
=
True
with
open
(
annotation_file
,
"r"
)
as
annotations
:
reader
=
csv
.
DictReader
(
annotations
,
fieldnames
)
frame_annotations
=
collections
.
defaultdict
(
list
)
ids
=
set
()
# aggreggate by video and timestamp:
for
row
in
reader
:
ids
.
add
(
row
[
"id"
])
key
=
(
row
[
"id"
],
int
(
float
(
row
[
"timestamp_seconds"
])))
frame_annotations
[
key
].
append
(
row
)
# for each video, find aggreggates near each sampled frame.:
logging
.
info
(
"Generating metadata..."
)
media_num
=
1
for
media_id
in
ids
:
logging
.
info
(
"%d/%d, ignore warnings.
\n
"
%
(
media_num
,
len
(
ids
)))
media_num
+=
1
filepath
=
glob
.
glob
(
video_path_format_string
.
format
(
media_id
)
+
"*"
)[
0
]
filename
=
filepath
.
split
(
"/"
)[
-
1
]
cur_vid
=
cv2
.
VideoCapture
(
filepath
)
width
=
cur_vid
.
get
(
cv2
.
CAP_PROP_FRAME_WIDTH
)
height
=
cur_vid
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
middle_frame_time
=
POSSIBLE_TIMESTAMPS
[
0
]
total_non_excluded
=
0
;
while
middle_frame_time
<
POSSIBLE_TIMESTAMPS
[
-
1
]:
if
(
media_id
,
middle_frame_time
)
not
in
frame_excluded
:
total_non_excluded
+=
1
middle_frame_time
+=
1
middle_frame_time
=
POSSIBLE_TIMESTAMPS
[
0
]
cur_frame_num
=
0
while
middle_frame_time
<
POSSIBLE_TIMESTAMPS
[
-
1
]:
cur_vid
.
set
(
cv2
.
CAP_PROP_POS_MSEC
,
(
middle_frame_time
)
*
SECONDS_TO_MILLI
)
success
,
image
=
cur_vid
.
read
()
success
,
buffer
=
cv2
.
imencode
(
'.jpg'
,
image
)
bufstring
=
buffer
.
tostring
()
if
(
media_id
,
middle_frame_time
)
in
frame_excluded
:
middle_frame_time
+=
1
logging
.
info
(
"Ignoring and skipping excluded frame."
)
continue
cur_frame_num
+=
1
source_id
=
str
(
middle_frame_time
)
+
"_"
+
media_id
xmins
=
[]
xmaxs
=
[]
ymins
=
[]
ymaxs
=
[]
areas
=
[]
labels
=
[]
label_strings
=
[]
confidences
=
[]
for
row
in
frame_annotations
[(
media_id
,
middle_frame_time
)]:
if
len
(
row
)
>
2
and
int
(
row
[
"action_label"
])
in
label_map
:
xmins
.
append
(
float
(
row
[
"xmin"
]))
xmaxs
.
append
(
float
(
row
[
"xmax"
]))
ymins
.
append
(
float
(
row
[
"ymin"
]))
ymaxs
.
append
(
float
(
row
[
"ymax"
]))
areas
.
append
(
float
((
xmaxs
[
-
1
]
-
xmins
[
-
1
])
*
(
ymaxs
[
-
1
]
-
ymins
[
-
1
]))
/
2
)
labels
.
append
(
int
(
row
[
"action_label"
]))
label_strings
.
append
(
label_map
[
int
(
row
[
"action_label"
])])
confidences
.
append
(
1
)
else
:
logging
.
warning
(
"Unknown label: %s"
,
row
[
"action_label"
])
middle_frame_time
+=
1
/
3
if
abs
(
middle_frame_time
-
round
(
middle_frame_time
)
<
0.0001
):
middle_frame_time
=
round
(
middle_frame_time
)
key
=
hashlib
.
sha256
(
bufstring
).
hexdigest
()
date_captured_feature
=
(
"2020-06-17 00:%02d:%02d"
%
((
middle_frame_time
-
900
)
*
3
//
60
,
(
middle_frame_time
-
900
)
*
3
%
60
))
context_feature_dict
=
{
'image/height'
:
dataset_util
.
int64_feature
(
int
(
height
)),
'image/width'
:
dataset_util
.
int64_feature
(
int
(
width
)),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
.
encode
(
'utf8'
)),
'image/source_id'
:
dataset_util
.
bytes_feature
(
source_id
.
encode
(
"utf8"
)),
'image/filename'
:
dataset_util
.
bytes_feature
(
source_id
.
encode
(
"utf8"
)),
'image/encoded'
:
dataset_util
.
bytes_feature
(
bufstring
),
'image/key/sha256'
:
dataset_util
.
bytes_feature
(
key
.
encode
(
'utf8'
)),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
xmins
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
xmaxs
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
ymins
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
ymaxs
),
'image/object/area'
:
dataset_util
.
float_list_feature
(
areas
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
labels
),
'image/object/class/text'
:
dataset_util
.
bytes_list_feature
(
label_strings
),
'image/location'
:
dataset_util
.
bytes_feature
(
media_id
.
encode
(
'utf8'
)),
'image/date_captured'
:
dataset_util
.
bytes_feature
(
date_captured_feature
.
encode
(
'utf8'
)),
'image/seq_num_frames'
:
dataset_util
.
int64_feature
(
total_non_excluded
),
'image/seq_frame_num'
:
dataset_util
.
int64_feature
(
cur_frame_num
),
'image/seq_id'
:
dataset_util
.
bytes_feature
(
media_id
.
encode
(
'utf8'
)),
}
yield
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
context_feature_dict
))
cur_vid
.
release
()
def
_download_data
(
self
):
"""Downloads and extracts data if not already available."""
if
sys
.
version_info
>=
(
3
,
0
):
urlretrieve
=
urllib
.
request
.
urlretrieve
else
:
urlretrieve
=
urllib
.
request
.
urlretrieve
logging
.
info
(
"Creating data directory."
)
tf
.
io
.
gfile
.
makedirs
(
self
.
path_to_data_download
)
logging
.
info
(
"Downloading annotations."
)
paths
=
{}
zip_path
=
os
.
path
.
join
(
self
.
path_to_data_download
,
ANNOTATION_URL
.
split
(
"/"
)[
-
1
])
urlretrieve
(
ANNOTATION_URL
,
zip_path
)
with
zipfile
.
ZipFile
(
zip_path
,
'r'
)
as
zip_ref
:
zip_ref
.
extractall
(
self
.
path_to_data_download
)
for
split
in
[
"train"
,
"test"
,
"val"
]:
csv_path
=
os
.
path
.
join
(
self
.
path_to_data_download
,
"ava_%s_v2.2.csv"
%
split
)
excl_name
=
"ava_%s_excluded_timestamps_v2.2.csv"
%
split
excluded_csv_path
=
os
.
path
.
join
(
self
.
path_to_data_download
,
excl_name
)
SPLITS
[
split
][
"csv"
]
=
csv_path
SPLITS
[
split
][
"excluded-csv"
]
=
excluded_csv_path
paths
[
split
]
=
(
csv_path
,
excluded_csv_path
)
label_map
=
self
.
get_label_map
(
os
.
path
.
join
(
self
.
path_to_data_download
,
"ava_action_list_v2.2.pbtxt"
))
return
paths
,
label_map
def
get_label_map
(
self
,
path
):
"""Parsess a label map into {integer:string} format."""
label_map
=
{}
with
open
(
path
,
"r"
)
as
f
:
current_id
=
-
1
current_label
=
""
for
line
in
f
:
if
"item {"
in
line
:
current_id
=
-
1
current_label
=
""
if
"name:"
in
line
:
first_quote
=
line
.
find
(
'"'
)
+
1
second_quote
=
line
.
find
(
'"'
,
first_quote
)
assert
second_quote
>
-
1
current_label
=
line
[
first_quote
:
second_quote
]
if
"id:"
in
line
:
current_id
=
int
(
line
.
split
()[
1
])
if
"}"
in
line
:
label_map
[
current_id
]
=
bytes23
(
current_label
)
logging
.
info
(
label_map
)
assert
len
(
label_map
)
==
NUM_CLASSES
return
label_map
def
bytes23
(
string
):
"""Creates a bytes string in either Python 2 or 3."""
if
sys
.
version_info
>=
(
3
,
0
):
return
bytes
(
string
,
"utf8"
)
return
bytes
(
string
)
@
contextlib
.
contextmanager
def
_close_on_exit
(
writers
):
"""Call close on all writers on exit."""
try
:
yield
writers
finally
:
for
writer
in
writers
:
writer
.
close
()
def
main
(
argv
):
if
len
(
argv
)
>
1
:
raise
app
.
UsageError
(
"Too many command-line arguments."
)
Ava
(
flags
.
FLAGS
.
path_to_output_dir
,
flags
.
FLAGS
.
path_to_download_data
).
generate_and_write_records
(
flags
.
FLAGS
.
splits_to_process
,
flags
.
FLAGS
.
video_path_format_string
,
flags
.
FLAGS
.
seconds_per_sequence
,
flags
.
FLAGS
.
hop_between_sequences
)
if
__name__
==
"__main__"
:
flags
.
DEFINE_string
(
"path_to_download_data"
,
""
,
"Path to directory to download data to."
)
flags
.
DEFINE_string
(
"path_to_output_dir"
,
""
,
"Path to directory to write data to."
)
flags
.
DEFINE_string
(
"splits_to_process"
,
"train,val"
,
"Process these splits. Useful for custom data splits."
)
flags
.
DEFINE_string
(
"video_path_format_string"
,
None
,
"The format string for the path to local video files. "
"Uses the Python string.format() syntax with possible "
"arguments of {video}, {start}, {end}, {label_name}, and "
"{split}, corresponding to columns of the data csvs."
)
flags
.
DEFINE_integer
(
"seconds_per_sequence"
,
10
,
"The number of seconds per example in each example."
)
flags
.
DEFINE_integer
(
"hop_between_sequences"
,
10
,
"The hop between sequences. If less than "
"seconds_per_sequence, will overlap."
)
app
.
run
(
main
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment